[Git][ghc/ghc][wip/andreask/elem_rule_fix] Rework treatment of `elem`. Add UTF8 GHC.CString functions.

Wed Apr 1 22:07:52 UTC 2020

Andreas Klebinger pushed to branch wip/andreask/elem_rule_fix at Glasgow Haskell Compiler / GHC


Commits:
d6c0da22 by Andreas Klebinger at 2020-04-02T00:07:37+02:00
Rework treatment of `elem`. Add UTF8 GHC.CString functions.

We optimize `elem` now in more ways than before.

A fusion RULE for elem was broken preventing it from firing.
Fixing this allows a call to elem on a known list to be translated
into a series of equality checks. These ultimately simply to a
single case expression.

It will also turn into a fold for dynamic lists when list fusion
applies.

This logic however does not work on string literals. For this reason
I added a rule to do the transformation into a case explicitly via a
built in rule. It will fire for any short string literals. (<=32 Bytes).

Longer calls to elem on string literals are rewritten to calls
to elem variants specialized to string literals.

This means we will rewrite:
    * c `elem` (unpackCString "Foo"#) => elemCString# c' "Foo"#
    * c `elem` (unpackCStringUtf8 "Bär"#) => elemCStringUtf8# c' "Bär"#

This means all calls to elem on string literals, as well as many
on other statically known lists should now be allocation free.

As a byproduct GHC.CString functionality is now available for Ascii
and UTF8 strings. The following missing UTF8 variants were
added: unpackAppendCStringUtf8#, unpackFoldrCStringUtf8#

They work just like their ascii counterparts. Which hopefully
makes proper UTF8 support easier for library authors.

A version of `elem` operating on known string literals was added:
* elemCString#     :: Char# -> Addr# -> Bool
* elemCStringUtf8# :: Char# -> Addr# -> Bool

- - - - -


12 changed files:

- compiler/GHC/Core/Make.hs
- compiler/GHC/Core/Op/ConstantFold.hs
- compiler/prelude/PrelNames.hs
- compiler/utils/Util.hs
- libraries/base/GHC/Base.hs
- libraries/base/GHC/List.hs
- + libraries/base/tests/perf/Makefile
- + libraries/base/tests/perf/T17752.hs
- + libraries/base/tests/perf/T17752.stdout
- + libraries/base/tests/perf/all.T
- libraries/ghc-prim/GHC/CString.hs
- libraries/ghc-prim/changelog.md


Changes:

=====================================
compiler/GHC/Core/Make.hs
=====================================
@@ -13,6 +13,7 @@ module GHC.Core.Make (
         sortQuantVars, castBottomExpr,
 
         -- * Constructing boxed literals
+        trueExpr, falseExpr,
         mkWordExpr, mkWordExprWord,
         mkIntExpr, mkIntExprInt,
         mkIntegerExpr, mkNaturalExpr,
@@ -248,6 +249,10 @@ castBottomExpr e res_ty
 ************************************************************************
 -}
 
+trueExpr, falseExpr :: CoreExpr
+trueExpr  = Var trueDataConId
+falseExpr = Var falseDataConId
+
 -- | Create a 'CoreExpr' which will evaluate to the given @Int@
 mkIntExpr :: Platform -> Integer -> CoreExpr        -- Result = I# i :: Int
 mkIntExpr platform i = mkCoreConApps intDataCon  [mkIntLit platform i]


=====================================
compiler/GHC/Core/Op/ConstantFold.hs
=====================================
@@ -34,7 +34,7 @@ import GHC.Core
 import GHC.Core.Make
 import GHC.Types.Id
 import GHC.Types.Literal
-import GHC.Core.SimpleOpt ( exprIsLiteral_maybe )
+import GHC.Core.SimpleOpt ( exprIsLiteral_maybe, exprIsLambda_maybe )
 import PrimOp             ( PrimOp(..), tagToEnumKey )
 import TysWiredIn
 import TysPrim
@@ -67,6 +67,8 @@ import Data.Int
 import Data.Ratio
 import Data.Word
 
+import Encoding (utf8DecodeByteString)
+
 {-
 Note [Constant folding]
 ~~~~~~~~~~~~~~~~~~~~~~~
@@ -1254,11 +1256,18 @@ builtinRules :: [CoreRule]
 builtinRules
   = [BuiltinRule { ru_name = fsLit "AppendLitString",
                    ru_fn = unpackCStringFoldrName,
-                   ru_nargs = 4, ru_try = match_append_lit },
+                   ru_nargs = 4, ru_try = match_append_lit_C },
+     BuiltinRule { ru_name = fsLit "AppendLitStringUtf8",
+                   ru_fn = unpackCStringFoldrUtf8Name,
+                   ru_nargs = 4, ru_try = match_append_lit_utf8 },
      BuiltinRule { ru_name = fsLit "EqString", ru_fn = eqStringName,
                    ru_nargs = 2, ru_try = match_eq_string },
      BuiltinRule { ru_name = fsLit "Inline", ru_fn = inlineIdName,
                    ru_nargs = 2, ru_try = \_ _ _ -> match_inline },
+
+     -- See Note [Compiling `elem` on Strings] in CString.hs
+     BuiltinRule { ru_name = fsLit "ElemLitString", ru_fn = elemName,
+                   ru_nargs = 4, ru_try = match_elem },
      BuiltinRule { ru_name = fsLit "MagicDict", ru_fn = idName magicDictId,
                    ru_nargs = 4, ru_try = \_ _ _ -> match_magicDict },
 
@@ -1430,11 +1439,20 @@ builtinNaturalRules =
 
 ---------------------------------------------------
 -- The rule is this:

---      unpackFoldrCString# "foo" c (unpackFoldrCString# "baz" c n)
---      =  unpackFoldrCString# "foobaz" c n
+--      unpackFoldrCString*# "foo" c (unpackFoldrCString*# "baz" c n)
+--      =  unpackFoldrCString*# "foobaz" c n
+
+-- CString version
+match_append_lit_C :: RuleFun
+match_append_lit_C = match_append_lit unpackCStringFoldrIdKey
+
+-- CStringUTF8 version
+match_append_lit_utf8 :: RuleFun
+match_append_lit_utf8 = match_append_lit unpackCStringFoldrUtf8IdKey
 
-match_append_lit :: RuleFun
-match_append_lit _ id_unf _
+{-# INLINE match_append_lit #-}
+match_append_lit :: Unique -> RuleFun
+match_append_lit foldVariant _ id_unf _
         [ Type ty1
         , lit1
         , c1
@@ -1447,7 +1465,7 @@ match_append_lit _ id_unf _
                         `App` lit2
                         `App` c2
                         `App` n) <- stripTicksTop tickishFloatable e2
-  , unpk `hasKey` unpackCStringFoldrIdKey
+  , unpk `hasKey` foldVariant
   , cheapEqExpr' tickishFloatable c1 c2
   , (c1Ticks, c1') <- stripTicksTop tickishFloatable c1
   , c2Ticks <- stripTicksTopT tickishFloatable c2
@@ -1460,23 +1478,62 @@ match_append_lit _ id_unf _
                     `App` mkTicks (c1Ticks ++ c2Ticks) c1'
                     `App` n
 
-match_append_lit _ _ _ _ = Nothing
+match_append_lit _ _ _ _ _ = Nothing
 
 ---------------------------------------------------
 -- The rule is this:
 --      eqString (unpackCString# (Lit s1)) (unpackCString# (Lit s2)) = s1==s2
+-- Also  matches unpackCStringUtf8#
 
 match_eq_string :: RuleFun
 match_eq_string _ id_unf _
         [Var unpk1 `App` lit1, Var unpk2 `App` lit2]
-  | unpk1 `hasKey` unpackCStringIdKey
-  , unpk2 `hasKey` unpackCStringIdKey
+  | unpk_key1 <- getUnique unpk1
+  , unpk_key2 <- getUnique unpk2
+  , unpk_key1 == unpk_key2
+  , unpk_key1 `elem` [unpackCStringUtf8IdKey, unpackCStringIdKey]
   , Just (LitString s1) <- exprIsLiteral_maybe id_unf lit1
   , Just (LitString s2) <- exprIsLiteral_maybe id_unf lit2
   = Just (if s1 == s2 then trueValBool else falseValBool)
 
 match_eq_string _ _ _ _ = Nothing
 
+---------------------------------------------------
+-- The rule is this:
+--      elem x (build ((unpackFoldrCString# "abc"#)))
+--      = case x of
+--          'a'# -> True
+--          'b'# -> True
+--          'c'# -> True
+--          _    -> False
+--
+-- Only matches short string (<20 bytes).
+-- Also  matches unpackCStringUtf8#
+-- See Note [Compiling `elem` on Strings] in CString.hs
+
+match_elem :: RuleFun
+match_elem _ id_unf _ [Type ty, _dict, x, xs]
+  | ty `eqType` charTy
+  , ( xsTicks
+    , Var build `App` _char_ty `App` unf_str_app)
+        <- stripTicksTop tickishFloatable xs
+  , Just (_arg, (Var unfold_str `App` _ty2 `App` lit1), lam_ticks)
+      <- exprIsLambda_maybe id_unf unf_str_app
+  , build `hasKey` buildIdKey
+  , getUnique unfold_str `elem` [unpackCStringFoldrIdKey, unpackCStringFoldrUtf8IdKey]
+  , Just (LitString s1) <- exprIsLiteral_maybe id_unf lit1
+  , BS.length s1 <= 32
+  = let elem_string = nubSort $ utf8DecodeByteString s1 :: String
+        x_id = mkWildValBinder charTy :: Id
+        x_prim_id = mkWildValBinder charPrimTy :: Id
+        switch = Case (Var x_prim_id) x_prim_id boolTy
+                    ((DEFAULT, [], falseExpr):foldr mkCharAlt [] elem_string)
+        mkCharAlt :: Char -> [CoreAlt] -> [CoreAlt]
+        mkCharAlt c alts = (LitAlt (mkLitChar c), [], trueExpr):alts
+    in  Just $ mkTicks (xsTicks ++ lam_ticks)
+             $ Case x x_id boolTy [(DataAlt charDataCon, [x_id], switch)]
+
+match_elem _ _ _ _ = Nothing
 
 ---------------------------------------------------
 -- The rule is this:


=====================================
compiler/prelude/PrelNames.hs
=====================================
@@ -340,8 +340,8 @@ basicKnownKeyNames
         groupWithName,
 
         -- Strings and lists
-        unpackCStringName,
-        unpackCStringFoldrName, unpackCStringUtf8Name,
+        unpackCStringName, unpackCStringUtf8Name,
+        unpackCStringFoldrName, unpackCStringFoldrUtf8Name,
 
         -- Overloaded lists
         isListClassName,
@@ -352,6 +352,7 @@ basicKnownKeyNames
         -- List operations
         concatName, filterName, mapName,
         zipName, foldrName, buildName, augmentName, appendName,
+        elemName,
 
         -- FFI primitive types that are not wired-in.
         stablePtrTyConName, ptrTyConName, funPtrTyConName,
@@ -677,9 +678,10 @@ ordClass_RDR            = nameRdrName ordClassName
 enumClass_RDR           = nameRdrName enumClassName
 monadClass_RDR          = nameRdrName monadClassName
 
-map_RDR, append_RDR :: RdrName
+map_RDR, append_RDR, elem_RDR :: RdrName
 map_RDR                 = nameRdrName mapName
 append_RDR              = nameRdrName appendName
+elem_RDR                = nameRdrName elemName
 
 foldr_RDR, build_RDR, returnM_RDR, bindM_RDR, failM_RDR
     :: RdrName
@@ -712,11 +714,12 @@ ioDataCon_RDR :: RdrName
 ioDataCon_RDR           = nameRdrName ioDataConName
 
 eqString_RDR, unpackCString_RDR, unpackCStringFoldr_RDR,
-    unpackCStringUtf8_RDR :: RdrName
+    unpackCStringFoldrUtf8_RDR, unpackCStringUtf8_RDR :: RdrName
 eqString_RDR            = nameRdrName eqStringName
 unpackCString_RDR       = nameRdrName unpackCStringName
 unpackCStringFoldr_RDR  = nameRdrName unpackCStringFoldrName
 unpackCStringUtf8_RDR   = nameRdrName unpackCStringUtf8Name
+unpackCStringFoldrUtf8_RDR  = nameRdrName unpackCStringFoldrUtf8Name
 
 newStablePtr_RDR :: RdrName
 newStablePtr_RDR        = nameRdrName newStablePtrName
@@ -1006,11 +1009,12 @@ modIntName = varQual gHC_CLASSES (fsLit "modInt#") modIntIdKey
 
 -- Base strings Strings
 unpackCStringName, unpackCStringFoldrName,
-    unpackCStringUtf8Name, eqStringName :: Name
+    unpackCStringUtf8Name, unpackCStringFoldrUtf8Name,eqStringName :: Name
 unpackCStringName       = varQual gHC_CSTRING (fsLit "unpackCString#") unpackCStringIdKey
 unpackCStringFoldrName  = varQual gHC_CSTRING (fsLit "unpackFoldrCString#") unpackCStringFoldrIdKey
 unpackCStringUtf8Name   = varQual gHC_CSTRING (fsLit "unpackCStringUtf8#") unpackCStringUtf8IdKey
 eqStringName            = varQual gHC_BASE (fsLit "eqString")  eqStringIdKey
+unpackCStringFoldrUtf8Name  = varQual gHC_CSTRING (fsLit "unpackFoldrCStringUtf8#") unpackCStringFoldrUtf8IdKey
 
 -- The 'inline' function
 inlineIdName :: Name
@@ -1083,7 +1087,7 @@ groupWithName = varQual gHC_EXTS (fsLit "groupWith") groupWithIdKey
 
 -- Random PrelBase functions
 fromStringName, otherwiseIdName, foldrName, buildName, augmentName,
-    mapName, appendName, assertName,
+    mapName, appendName, elemName, assertName,
     breakpointName, breakpointCondName,
     opaqueTyConName, dollarName :: Name
 dollarName        = varQual gHC_BASE (fsLit "$")          dollarIdKey
@@ -1097,6 +1101,7 @@ assertName        = varQual gHC_BASE (fsLit "assert")     assertIdKey
 breakpointName    = varQual gHC_BASE (fsLit "breakpoint") breakpointIdKey
 breakpointCondName= varQual gHC_BASE (fsLit "breakpointCond") breakpointCondIdKey
 opaqueTyConName   = tcQual  gHC_BASE (fsLit "Opaque")     opaqueTyConKey
+elemName          = varQual gHC_LIST (fsLit "elem")       elemIdKey
 fromStringName = varQual dATA_STRING (fsLit "fromString") fromStringClassOpKey
 
 -- PrelTup
@@ -2081,13 +2086,15 @@ unsafeReflDataConKey      = mkPreludeDataConUnique 114
 -}
 
 wildCardKey, absentErrorIdKey, augmentIdKey, appendIdKey,
+    elemIdKey,
     buildIdKey, errorIdKey, foldrIdKey, recSelErrorIdKey,
     seqIdKey, eqStringIdKey,
     noMethodBindingErrorIdKey, nonExhaustiveGuardsErrorIdKey,
     runtimeErrorIdKey, patErrorIdKey, voidPrimIdKey,
     realWorldPrimIdKey, recConErrorIdKey,
     unpackCStringUtf8IdKey, unpackCStringAppendIdKey,
-    unpackCStringFoldrIdKey, unpackCStringIdKey,
+    unpackCStringFoldrIdKey, unpackCStringFoldrUtf8IdKey,
+    unpackCStringIdKey,
     typeErrorIdKey, divIntIdKey, modIntIdKey,
     absentSumFieldErrorIdKey :: Unique
 
@@ -2095,27 +2102,29 @@ wildCardKey                   = mkPreludeMiscIdUnique  0  -- See Note [WildCard
 absentErrorIdKey              = mkPreludeMiscIdUnique  1
 augmentIdKey                  = mkPreludeMiscIdUnique  2
 appendIdKey                   = mkPreludeMiscIdUnique  3
-buildIdKey                    = mkPreludeMiscIdUnique  4
-errorIdKey                    = mkPreludeMiscIdUnique  5
-foldrIdKey                    = mkPreludeMiscIdUnique  6
-recSelErrorIdKey              = mkPreludeMiscIdUnique  7
-seqIdKey                      = mkPreludeMiscIdUnique  8
-absentSumFieldErrorIdKey      = mkPreludeMiscIdUnique  9
-eqStringIdKey                 = mkPreludeMiscIdUnique 10
-noMethodBindingErrorIdKey     = mkPreludeMiscIdUnique 11
-nonExhaustiveGuardsErrorIdKey = mkPreludeMiscIdUnique 12
-runtimeErrorIdKey             = mkPreludeMiscIdUnique 13
-patErrorIdKey                 = mkPreludeMiscIdUnique 14
-realWorldPrimIdKey            = mkPreludeMiscIdUnique 15
-recConErrorIdKey              = mkPreludeMiscIdUnique 16
-unpackCStringUtf8IdKey        = mkPreludeMiscIdUnique 17
-unpackCStringAppendIdKey      = mkPreludeMiscIdUnique 18
-unpackCStringFoldrIdKey       = mkPreludeMiscIdUnique 19
-unpackCStringIdKey            = mkPreludeMiscIdUnique 20
-voidPrimIdKey                 = mkPreludeMiscIdUnique 21
-typeErrorIdKey                = mkPreludeMiscIdUnique 22
-divIntIdKey                   = mkPreludeMiscIdUnique 23
-modIntIdKey                   = mkPreludeMiscIdUnique 24
+elemIdKey                     = mkPreludeMiscIdUnique  4
+buildIdKey                    = mkPreludeMiscIdUnique  5
+errorIdKey                    = mkPreludeMiscIdUnique  6
+foldrIdKey                    = mkPreludeMiscIdUnique  7
+recSelErrorIdKey              = mkPreludeMiscIdUnique  8
+seqIdKey                      = mkPreludeMiscIdUnique  9
+absentSumFieldErrorIdKey      = mkPreludeMiscIdUnique 10
+eqStringIdKey                 = mkPreludeMiscIdUnique 11
+noMethodBindingErrorIdKey     = mkPreludeMiscIdUnique 12
+nonExhaustiveGuardsErrorIdKey = mkPreludeMiscIdUnique 13
+runtimeErrorIdKey             = mkPreludeMiscIdUnique 14
+patErrorIdKey                 = mkPreludeMiscIdUnique 15
+realWorldPrimIdKey            = mkPreludeMiscIdUnique 16
+recConErrorIdKey              = mkPreludeMiscIdUnique 17
+unpackCStringUtf8IdKey        = mkPreludeMiscIdUnique 18
+unpackCStringAppendIdKey      = mkPreludeMiscIdUnique 19
+unpackCStringFoldrIdKey       = mkPreludeMiscIdUnique 20
+unpackCStringFoldrUtf8IdKey   = mkPreludeMiscIdUnique 21
+unpackCStringIdKey            = mkPreludeMiscIdUnique 22
+voidPrimIdKey                 = mkPreludeMiscIdUnique 23
+typeErrorIdKey                = mkPreludeMiscIdUnique 24
+divIntIdKey                   = mkPreludeMiscIdUnique 25
+modIntIdKey                   = mkPreludeMiscIdUnique 26
 
 concatIdKey, filterIdKey, zipIdKey,
     bindIOIdKey, returnIOIdKey, newStablePtrIdKey,


=====================================
compiler/utils/Util.hs
=====================================
@@ -637,7 +637,6 @@ ordNub xs
       | Set.member x s = go s xs
       | otherwise = x : go (Set.insert x s) xs
 
-
 {-
 ************************************************************************
 *                                                                      *


=====================================
libraries/base/GHC/Base.hs
=====================================
@@ -1618,7 +1618,11 @@ iShiftRL# :: Int# -> Int# -> Int#
 a `iShiftRL#` b | isTrue# (b >=# WORD_SIZE_IN_BITS#) = 0#
                 | otherwise                          = a `uncheckedIShiftRL#` b
 
+------------------------------------------------------------------------
 -- Rules for C strings (the functions themselves are now in GHC.CString)
+------------------------------------------------------------------------
+
+-- Ascii variants
 {-# RULES
 "unpack"       [~1] forall a   . unpackCString# a             = build (unpackFoldrCString# a)
 "unpack-list"  [1]  forall a   . unpackFoldrCString# a (:) [] = unpackCString# a
@@ -1628,3 +1632,14 @@ a `iShiftRL#` b | isTrue# (b >=# WORD_SIZE_IN_BITS#) = 0#
 --      unpackFoldr "foo" c (unpackFoldr "baz" c n)  =  unpackFoldr "foobaz" c n
 
   #-}
+
+-- Utf8 variants
+{-# RULES
+"unpackUtf8"       [~1] forall a   . unpackCStringUtf8# a             = build (unpackFoldrCStringUtf8# a)
+"unpack-listUtf8"  [1]  forall a   . unpackFoldrCStringUtf8# a (:) [] = unpackCStringUtf8# a
+"unpack-appendUtf8"     forall a n . unpackFoldrCStringUtf8# a (:) n  = unpackAppendCStringUtf8# a n
+
+-- There's a built-in rule (in PrelRules.hs) for
+--      unpackFoldrUtf8 "föö" c (unpackFoldrUtf8 "bäz" c n)  =  unpackFoldrUtf8 "fööbäz" c n
+
+  #-}


=====================================
libraries/base/GHC/List.hs
=====================================
@@ -1147,10 +1147,17 @@ elem x                  =  any (== x)
 #else
 elem _ []       = False
 elem x (y:ys)   = x==y || elem x ys
-{-# NOINLINE [1] elem #-}
+{-# NOINLINE elem #-}
 {-# RULES
-"elem/build"    forall x (g :: forall b . Eq a => (a -> b -> b) -> b -> b)
+"elem/build" forall x (g :: forall b . (a -> b -> b) -> b -> b)
    . elem x (build g) = g (\ y r -> (x == y) || r) False
+-- See Note [Compiling `elem` on Strings] in CString.hs
+"elem at CString/unpack" [0]   forall (x :: Char) (addr :: Addr#)
+   . elem x (unpackCString# addr)
+   = x `elemCString#` addr
+"elem at CStringUtf8/unpack" [0]   forall (x :: Char) (addr :: Addr#)
+   . elem x (unpackCStringUtf8# addr)
+   = x `elemCStringUtf8#` addr
  #-}
 #endif
 
@@ -1172,10 +1179,17 @@ notElem x               =  all (/= x)
 #else
 notElem _ []    =  True
 notElem x (y:ys)=  x /= y && notElem x ys
-{-# NOINLINE [1] notElem #-}
+{-# NOINLINE notElem #-}
 {-# RULES
-"notElem/build" forall x (g :: forall b . Eq a => (a -> b -> b) -> b -> b)
+"notElem/build" forall x (g :: forall b . (a -> b -> b) -> b -> b)
    . notElem x (build g) = g (\ y r -> (x /= y) && r) True
+-- See Note [Compiling `elem` on Strings] in CString.hs
+"notElem at CString/unpack" [0]   forall (x :: Char) (addr :: Addr#)
+   . notElem x (unpackCString# addr)
+   = not (x `elemCString#` addr)
+"notElem at CStringUtf8/unpack" [0]   forall (x :: Char) (addr :: Addr#)
+   . notElem x (unpackCStringUtf8# addr)
+   = not (x `elemCStringUtf8#` addr)
  #-}
 #endif
 


=====================================
libraries/base/tests/perf/Makefile
=====================================
@@ -0,0 +1,17 @@
+# This Makefile runs the tests using GHC's testsuite framework.  It
+# assumes the package is part of a GHC build tree with the testsuite
+# installed in ../../../testsuite.
+
+TOP=../../../../testsuite
+include $(TOP)/mk/boilerplate.mk
+include $(TOP)/mk/test.mk
+
+
+T17752:
+	'$(TEST_HC)' $(TEST_HC_OPTS) -O --make T17752 -rtsopts -ddump-simpl -ddump-to-file -dsuppress-uniques
+	# We only grep for specific patterns to minimize failures when eg core syntax changes
+	# We expect two unfolds using eqChar
+	echo $$(cat T17752.dump-simpl | grep unpackFoldrCString)
+	echo $$(cat T17752.dump-simpl | grep eqChar)
+	# And two pattern matches
+	echo $$(cat T17752.dump-simpl | grep "case x1" -A4 )


=====================================
libraries/base/tests/perf/T17752.hs
=====================================
@@ -0,0 +1,82 @@
+module T17752 where
+
+-- Should compile to unfoldCStr if the rules fire
+isElemLit    x = x `elem`    "theQuickShinyGHCJumpsOverTheRuleRecursion"
+isNotElemLit x = x `notElem` "_theQuickShinyGHCCompileJumpsOverTheRuleRecursion"
+
+-- Should compile to a pattern match if the rules fire
+isElemList    x = x `elem` ['a','b','c']
+isNotElemList x = x `elem` ['x','y','z']
+
+{-
+
+Should the grep tests fail make sure the core still behaves
+like the one below (or better!)
+
+-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
+isElemLit1 = "theQuickShinyGHCJumpsOverTheRuleRecursion"#
+
+-- RHS size: {terms: 20, types: 9, coercions: 0, joins: 0/0}
+isElemLit
+  = \ x ->
+      unpackFoldrCString#
+        isElemLit1
+        (\ y r ->
+           case x of { C# x1 ->
+           case y of { C# y1 ->
+           case eqChar# x1 y1 of {
+             __DEFAULT -> r;
+             1# -> True
+           }
+           }
+           })
+        False
+
+-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
+isNotElemLit1
+  = "_theQuickShinyGHCCompileJumpsOverTheRuleRecursion"#
+
+-- RHS size: {terms: 25, types: 10, coercions: 0, joins: 0/0}
+isNotElemLit
+  = \ x ->
+      case unpackFoldrCString#
+             isNotElemLit1
+             (\ y r ->
+                case x of { C# x1 ->
+                case y of { C# y1 ->
+                case eqChar# x1 y1 of {
+                  __DEFAULT -> r;
+                  1# -> True
+                }
+                }
+                })
+             False
+      of {
+        False -> True;
+        True -> False
+      }
+
+-- RHS size: {terms: 14, types: 4, coercions: 0, joins: 0/0}
+isElemList
+  = \ x ->
+      case x of { C# x1 ->
+      case x1 of {
+        __DEFAULT -> False;
+        'a'# -> True;
+        'b'# -> True;
+        'c'# -> True
+      }
+      }
+
+-- RHS size: {terms: 14, types: 4, coercions: 0, joins: 0/0}
+isNotElemList
+  = \ x ->
+      case x of { C# x1 ->
+      case x1 of {
+        __DEFAULT -> False;
+        'x'# -> True;
+        'y'# -> True;
+        'z'# -> True
+      }
+      }
+-}


=====================================
libraries/base/tests/perf/T17752.stdout
=====================================
@@ -0,0 +1,25 @@
+[1 of 1] Compiling T17752           ( T17752.hs, T17752.o )
+GHC.CString.unpackFoldrCString#
+ case GHC.CString.unpackFoldrCString#
+case GHC.Prim.eqChar# x1 y1 of {
+ case GHC.Prim.eqChar# x1 y1 of {
+case x1 of {
+ __DEFAULT -> GHC.Types.False;
+ 'a'# -> GHC.Types.True;
+ 'b'# -> GHC.Types.True;
+ 'c'# -> GHC.Types.True
+ -- case x1 of {
+ __DEFAULT -> GHC.Types.False;
+ 'a'# -> GHC.Types.True;
+ 'b'# -> GHC.Types.True;
+ 'c'# -> GHC.Types.True
+ -- case x1 of {
+ __DEFAULT -> GHC.Types.False;
+ 'x'# -> GHC.Types.True;
+ 'y'# -> GHC.Types.True;
+ 'z'# -> GHC.Types.True
+ -- case x1 of {
+ __DEFAULT -> GHC.Types.False;
+ 'x'# -> GHC.Types.True;
+ 'y'# -> GHC.Types.True;
+ 'z'# -> GHC.Types.True


=====================================
libraries/base/tests/perf/all.T
=====================================
@@ -0,0 +1,5 @@
+#--------------------------------------
+# Check specialization of elem via rules
+#--------------------------------------
+
+test('T17752', [only_ways(['normal'])] , makefile_test, ['T17752'])
\ No newline at end of file


=====================================
libraries/ghc-prim/GHC/CString.hs
=====================================
@@ -1,4 +1,5 @@
 {-# LANGUAGE MagicHash, NoImplicitPrelude, BangPatterns #-}
+{-# OPTIONS -fregs-graph #-} -- Needed for elemCString#, see #17823
 
 -----------------------------------------------------------------------------
 -- |
@@ -17,8 +18,16 @@
 -----------------------------------------------------------------------------
 
 module GHC.CString (
+        -- * Ascii variants
         unpackCString#, unpackAppendCString#, unpackFoldrCString#,
-        unpackCStringUtf8#, unpackNBytes#
+        elemCString#,
+
+        -- * Utf variants
+        unpackCStringUtf8#, unpackAppendCStringUtf8#, unpackFoldrCStringUtf8#,
+        elemCStringUtf8#,
+
+        -- * Other
+        unpackNBytes#,
     ) where
 
 import GHC.Types
@@ -71,8 +80,22 @@ Moreover, we want to make it CONLIKE, so that:
 All of this goes for unpackCStringUtf8# too.
 -}
 
-{- Note [unpackCString# iterating over addr]
-   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+{- Note [NOINLINE for unpackFoldrCString]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Usually the unpack-list rule turns unpackFoldrCString# into unpackCString#
+It also has a BuiltInRule in PrelRules.hs:
+     unpackFoldrCString# "foo" c (unpackFoldrCString# "baz" c n)
+       =  unpackFoldrCString# "foobaz" c n
+At one stage I had NOINLINE [0] on the grounds that, unlike
+unpackCString#, there *is* some point in inlining
+unpackFoldrCString#, because we get better code for the
+higher-order function call.  BUT there may be a lot of
+literal strings, and making a separate 'unpack' loop for
+each is highly gratuitous.  See nofib/real/anna/PrettyPrint.
+
+  Note [unpackCString# iterating over addr]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 When unpacking unpackCString# and friends repeatedly return a cons cell
 containing:
@@ -89,6 +112,64 @@ This works since these two expressions will read from the same address.
 
 This way we avoid the need for the thunks to close over both the start of
 the string and the current offset, saving a word for each character unpacked.
+
+This has the additional advantage the we can guarantee that  only the
+increment will happen in the loop.
+If we use the offset start off with the increment and an addition
+to get the real address. Which might not be optimized aways.
+
+  Note [Compiling `elem` on Strings]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The pattern f c = c `elem` "<someString>" is quite common in
+parsers and the like and can be quite performance critical.
+
+The fastest way to process this pattern is to transform it
+into a case. That is we transform:
+
+  x `elem` "xy"
+=>
+  case x of
+    'x' -> True
+    'y' -> True
+    _   -> False
+
+For this reason there is a BUILTIN rule "ElemLitString" which
+performs this rewrite early in the pipeline in case "xy" is built
+from unpacking a string literal of small size.
+
+For long strings however we avoid doing this translation as
+it would impact code size negatively.
+
+However we do not want to fall back to allocating a String and
+then calling elem onto it. So towards the end of the pipeline
+we use rewrite it into a more efficient form working of the
+unboxed string literal.
+
+Implementing this in terms of unpackFoldrCString# is not possible
+without allocating. As the folding function would have to capture
+the element we look for.
+
+Instead we rewrite to a spezialized version of elem, `elemCString#`
+which works over unboxed string literals.
+
+  Note [Inlining of elemCString#]
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+We inline both elemCString*# variants in the hope to cancel out the unboxing
+of the otherwise lazy character argument.
+
+For elemCString# we allow inlining the inner loop as it's tiny comming in
+at less than 40bytes on x64. This comes out to about the same size of the
+size overhead we pay for a non-tail call. Not to speak of eliminating the
+runtime overhead of the call.
+
+For elemCStringUtf8# this is different. The whole unicode logic makes it large
+enough to make inlining a bad choice. So we use the magic `noinline` to avoid
+the inner loop containing the utf8 logic to be inlined. Given the additional
+overhead of unicode decoding the call overhead is also less significant so it's
+not as big of a loss.
+
 -}
 
 unpackCString# :: Addr# -> [Char]
@@ -111,22 +192,9 @@ unpackAppendCString# addr rest
         -- See Note [unpackCString# iterating over addr]
         !ch = indexCharOffAddr# addr 0#
 
-unpackFoldrCString# :: Addr# -> (Char  -> a -> a) -> a -> a
-
--- Usually the unpack-list rule turns unpackFoldrCString# into unpackCString#
-
--- It also has a BuiltInRule in GHC.Core.Op.ConstantFold:
---      unpackFoldrCString# "foo" c (unpackFoldrCString# "baz" c n)
---        =  unpackFoldrCString# "foobaz" c n
-
+-- See Note [NOINLINE for unpackFoldrCString]
 {-# NOINLINE unpackFoldrCString# #-}
--- At one stage I had NOINLINE [0] on the grounds that, unlike
--- unpackCString#, there *is* some point in inlining
--- unpackFoldrCString#, because we get better code for the
--- higher-order function call.  BUT there may be a lot of
--- literal strings, and making a separate 'unpack' loop for
--- each is highly gratuitous.  See nofib/real/anna/PrettyPrint.
-
+unpackFoldrCString# :: Addr# -> (Char  -> a -> a) -> a -> a
 unpackFoldrCString# addr f z
   | isTrue# (ch `eqChar#` '\0'#) = z
   | True                         = C# ch `f` unpackFoldrCString# (addr `plusAddr#` 1#) f z
@@ -134,32 +202,101 @@ unpackFoldrCString# addr f z
     -- See Note [unpackCString# iterating over addr]
     !ch = indexCharOffAddr# addr 0#
 
+-- See Note [Compiling `elem` on Strings]
+-- See Note [Inlining of elemCString#]
+{-# INLINE elemCString# #-}
+elemCString# :: Char -> Addr# -> Bool
+elemCString# c base_addr =
+    let !ch = (indexCharOffAddr# base_addr 0#)
+    in  -- We check for end-of-string first to preserve laziness
+        -- of the Char argument.
+        case ch of
+            '\0'#   -> False
+            _       -> case c of
+                        C# c' -> check c' ch base_addr
+  where
+    -- Invariant: ch != '\NULL'
+    check :: Char# -> Char# -> Addr# -> Bool
+    check c_ub ch addr
+      | isTrue# (ch `eqChar#` c_ub ) = True
+      | True =
+          let !addr' = (addr `plusAddr#` 1#)
+          in case indexCharOffAddr# addr' 0# of
+              '\0'#   -> False
+              next_ch -> check c_ub next_ch addr'
+
 -- There's really no point in inlining this for the same reasons as
 -- unpackCString. See Note [Inlining unpackCString#] above for details.
 unpackCStringUtf8# :: Addr# -> [Char]
 {-# NOINLINE CONLIKE unpackCStringUtf8# #-}
 unpackCStringUtf8# addr
     | isTrue# (ch `eqChar#` '\0'#  ) = []
-    | isTrue# (ch `leChar#` '\x7F'#) = C# ch : unpackCStringUtf8# (addr `plusAddr#` 1#)
-    | isTrue# (ch `leChar#` '\xDF'#) =
-        let !c = C# (chr# (((ord# ch                                  -# 0xC0#) `uncheckedIShiftL#`  6#) +#
-                            (ord# (indexCharOffAddr# (addr `plusAddr#` 1#) 0#) -# 0x80#)))
-        in c : unpackCStringUtf8# (addr `plusAddr#` 2#)
-    | isTrue# (ch `leChar#` '\xEF'#) =
-        let !c = C# (chr# (((ord# ch                                             -# 0xE0#) `uncheckedIShiftL#` 12#) +#
-                           ((ord# (indexCharOffAddr# (addr `plusAddr#` 1#) 0#) -# 0x80#) `uncheckedIShiftL#`  6#) +#
-                            (ord# (indexCharOffAddr# (addr `plusAddr#` 2#) 0#) -# 0x80#)))
-        in c : unpackCStringUtf8# (addr `plusAddr#` 3#)
-    | True                           =
-        let !c = C# (chr# (((ord# ch                                  -# 0xF0#) `uncheckedIShiftL#` 18#) +#
-                           ((ord# (indexCharOffAddr# (addr `plusAddr#` 1#) 0#) -# 0x80#) `uncheckedIShiftL#` 12#) +#
-                           ((ord# (indexCharOffAddr# (addr `plusAddr#` 2#) 0#) -# 0x80#) `uncheckedIShiftL#`  6#) +#
-                            (ord# (indexCharOffAddr# (addr `plusAddr#` 3#) 0#) -# 0x80#)))
-        in c : unpackCStringUtf8# (addr `plusAddr#` 4#)
+    | True =
+        let !byte_count = getByteCount ch
+            !utf_ch = unpackUtf8Char# byte_count ch addr
+            !addr' = addr `plusBytes` byte_count
+        in  C# utf_ch : unpackCStringUtf8# addr'
+      where
+        -- See Note [unpackCString# iterating over addr]
+        !ch = indexCharOffAddr# addr 0#
+
+
+unpackAppendCStringUtf8# :: Addr# -> [Char] -> [Char]
+{-# NOINLINE unpackAppendCStringUtf8# #-}
+     -- See the NOINLINE note on unpackCString#
+unpackAppendCStringUtf8# addr rest
+    | isTrue# (ch `eqChar#` '\0'#) = rest
+    | True =
+        let !byte_count = getByteCount ch
+            !utf_ch = unpackUtf8Char# byte_count ch addr
+            !addr' = (addr `plusBytes` byte_count)
+        in  C# utf_ch : unpackAppendCStringUtf8# addr' rest
       where
         -- See Note [unpackCString# iterating over addr]
         !ch = indexCharOffAddr# addr 0#
 
+-- See Note [NOINLINE for unpackFoldrCString]
+{-# NOINLINE unpackFoldrCStringUtf8# #-}
+unpackFoldrCStringUtf8# :: Addr# -> (Char -> a -> a) -> a -> a
+unpackFoldrCStringUtf8# addr f z
+    | isTrue# (ch `eqChar#` '\0'#) = z
+    | True =
+        let !byte_count = getByteCount ch
+            !utf_ch = unpackUtf8Char# byte_count ch addr
+            !addr' = (addr `plusBytes` byte_count)
+        in C# utf_ch `f` unpackFoldrCStringUtf8# addr' f z
+  where
+    -- See Note [unpackCString# iterating over addr]
+    !ch = indexCharOffAddr# addr 0#
+
+-- See Note [Inlining of elemCString#]
+-- See Note [Compiling `elem` on Strings]
+{-# INLINE elemCStringUtf8# #-}
+elemCStringUtf8# :: Char -> Addr# -> Bool
+elemCStringUtf8# c base_addr =
+    let !ch = (indexCharOffAddr# base_addr 0#)
+    in  -- We check for end-of-string first to preserve laziness
+        -- of the Char argument.
+        case ch of
+            '\0'#   -> False
+            _       -> case c of
+                        C# c' -> elemCStringUtf8_check# c' ch base_addr
+
+-- Local to elemCStringUtf8#, defined at the top to avoid unfolding it
+-- into use sites.
+-- Invariant: ch != '\NULL'
+{-# NOINLINE elemCStringUtf8_check# #-}
+elemCStringUtf8_check# :: Char# -> Char# -> Addr# -> Bool
+elemCStringUtf8_check# c_ub ch addr =
+    let !byte_count = getByteCount ch
+        !utf_ch = unpackUtf8Char# byte_count ch addr
+        !addr' = (addr `plusBytes` byte_count)
+    in if (isTrue# (utf_ch `eqChar#` c_ub))
+        then True
+        else case (indexCharOffAddr# addr' 0#) of
+                '\0'# -> False
+                ch'   -> elemCStringUtf8_check# c_ub ch' addr'
+
 -- There's really no point in inlining this for the same reasons as
 -- unpackCString. See Note [Inlining unpackCString#] above for details.
 unpackNBytes# :: Addr# -> Int# -> [Char]
@@ -174,3 +311,61 @@ unpackNBytes#  addr len# = unpack [] (len# -# 1#)
          case indexCharOffAddr# addr i# of
             ch -> unpack (C# ch : acc) (i# -# 1#)
 
+
+
+------------------------------
+--- UTF8 decoding utilities
+------------------------------
+--
+-- These functions make explizit the logic that was originally
+-- part of unpackCStringUtf8. Since we want the same support for ascii
+-- and non-ascii a variety of functions needs the same logic. Instead
+-- of C&P'in the decoding logic all over we have it here once, and then
+-- force GHC to inline it.
+--
+-- All the overhead of the Bytes argument and calls goes away once all is
+-- said and done. And what remains is readable code in Haskell land and
+-- performant code in the resulting binary.
+
+data Bytes = One | Two | Three | Four
+
+{-# INLINE getByteCount #-}
+getByteCount :: Char# -> Bytes
+getByteCount ch
+    | isTrue# (ch `leChar#` '\x7F'#) = One
+    | isTrue# (ch `leChar#` '\xDF'#) = Two
+    | isTrue# (ch `leChar#` '\xEF'#) = Three
+    | True                           = Four
+
+{-# INLINE plusBytes #-}
+plusBytes :: Addr# -> Bytes -> Addr#
+plusBytes addr bytes =
+  case bytes of
+    One   -> addr `plusAddr#` 1#
+    Two   -> addr `plusAddr#` 2#
+    Three -> addr `plusAddr#` 3#
+    Four  -> addr `plusAddr#` 4#
+
+-- | Take the current address, read unicode char of the given size.
+-- We obviously want the number of bytes, but we have to read one
+-- byte to determine the number of bytes for the current codepoint
+-- so we might as well reuse it and avoid a read.
+--
+-- Side Note: We don't dare to decode all 4 possibilities at once.
+-- Reading past the end of the addr might trigger an exception.
+-- For this reason we really have to check the width first and only
+-- decode after.
+{-# INLINE unpackUtf8Char# #-}
+unpackUtf8Char# :: Bytes -> Char# -> Addr# -> Char#
+unpackUtf8Char# bytes ch addr =
+  case bytes of
+    One -> ch
+    Two ->   (chr# (((ord# ch                                           -# 0xC0#) `uncheckedIShiftL#`  6#) +#
+                     (ord# (indexCharOffAddr# (addr `plusAddr#` 1#) 0#) -# 0x80#)))
+    Three -> (chr# (((ord# ch                                           -# 0xE0#) `uncheckedIShiftL#` 12#) +#
+                    ((ord# (indexCharOffAddr# (addr `plusAddr#` 1#) 0#) -# 0x80#) `uncheckedIShiftL#`  6#) +#
+                     (ord# (indexCharOffAddr# (addr `plusAddr#` 2#) 0#) -# 0x80#)))
+    Four ->  (chr# (((ord# ch                                           -# 0xF0#) `uncheckedIShiftL#` 18#) +#
+                    ((ord# (indexCharOffAddr# (addr `plusAddr#` 1#) 0#) -# 0x80#) `uncheckedIShiftL#` 12#) +#
+                    ((ord# (indexCharOffAddr# (addr `plusAddr#` 2#) 0#) -# 0x80#) `uncheckedIShiftL#`  6#) +#
+                     (ord# (indexCharOffAddr# (addr `plusAddr#` 3#) 0#) -# 0x80#)))


=====================================
libraries/ghc-prim/changelog.md
=====================================
@@ -1,3 +1,17 @@
+## 0.6.2 (edit as necessary)
+
+- Shipped with GHC 8.12.1
+
+- Added to `GHC.CString`:
+
+        unpackAppendCStringUtf8# :: Addr# -> [Char] -> [Char]
+        unpackFoldrCStringUtf8# :: Addr# -> (Char -> a -> a) -> a -> a
+        elemCString# :: Char# -> Addr# -> Bool
+        elemCStringUtf8# :: Char# -> Addr# -> Bool
+
+  `elemCString*#` is a version of `elem` specialized for operations
+  over GHC String literals.
+
 ## 0.6.1 (edit as necessary)
 
 - Shipped with GHC 8.10.1



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/d6c0da2267233daddc6d2f96013ec1394bf588e4

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/d6c0da2267233daddc6d2f96013ec1394bf588e4
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20200401/c2c33d47/attachment-0001.html>