[Git][ghc/ghc][wip/andreask/elem_rule_fix] Rework treatment of `elem`. Add UTF8 GHC.CString functions.
Andreas Klebinger
gitlab at gitlab.haskell.org
Wed Apr 1 22:07:52 UTC 2020
Andreas Klebinger pushed to branch wip/andreask/elem_rule_fix at Glasgow Haskell Compiler / GHC
Commits:
d6c0da22 by Andreas Klebinger at 2020-04-02T00:07:37+02:00
Rework treatment of `elem`. Add UTF8 GHC.CString functions.
We optimize `elem` now in more ways than before.
A fusion RULE for elem was broken preventing it from firing.
Fixing this allows a call to elem on a known list to be translated
into a series of equality checks. These ultimately simply to a
single case expression.
It will also turn into a fold for dynamic lists when list fusion
applies.
This logic however does not work on string literals. For this reason
I added a rule to do the transformation into a case explicitly via a
built in rule. It will fire for any short string literals. (<=32 Bytes).
Longer calls to elem on string literals are rewritten to calls
to elem variants specialized to string literals.
This means we will rewrite:
* c `elem` (unpackCString "Foo"#) => elemCString# c' "Foo"#
* c `elem` (unpackCStringUtf8 "Bär"#) => elemCStringUtf8# c' "Bär"#
This means all calls to elem on string literals, as well as many
on other statically known lists should now be allocation free.
As a byproduct GHC.CString functionality is now available for Ascii
and UTF8 strings. The following missing UTF8 variants were
added: unpackAppendCStringUtf8#, unpackFoldrCStringUtf8#
They work just like their ascii counterparts. Which hopefully
makes proper UTF8 support easier for library authors.
A version of `elem` operating on known string literals was added:
* elemCString# :: Char# -> Addr# -> Bool
* elemCStringUtf8# :: Char# -> Addr# -> Bool
- - - - -
12 changed files:
- compiler/GHC/Core/Make.hs
- compiler/GHC/Core/Op/ConstantFold.hs
- compiler/prelude/PrelNames.hs
- compiler/utils/Util.hs
- libraries/base/GHC/Base.hs
- libraries/base/GHC/List.hs
- + libraries/base/tests/perf/Makefile
- + libraries/base/tests/perf/T17752.hs
- + libraries/base/tests/perf/T17752.stdout
- + libraries/base/tests/perf/all.T
- libraries/ghc-prim/GHC/CString.hs
- libraries/ghc-prim/changelog.md
Changes:
=====================================
compiler/GHC/Core/Make.hs
=====================================
@@ -13,6 +13,7 @@ module GHC.Core.Make (
sortQuantVars, castBottomExpr,
-- * Constructing boxed literals
+ trueExpr, falseExpr,
mkWordExpr, mkWordExprWord,
mkIntExpr, mkIntExprInt,
mkIntegerExpr, mkNaturalExpr,
@@ -248,6 +249,10 @@ castBottomExpr e res_ty
************************************************************************
-}
+trueExpr, falseExpr :: CoreExpr
+trueExpr = Var trueDataConId
+falseExpr = Var falseDataConId
+
-- | Create a 'CoreExpr' which will evaluate to the given @Int@
mkIntExpr :: Platform -> Integer -> CoreExpr -- Result = I# i :: Int
mkIntExpr platform i = mkCoreConApps intDataCon [mkIntLit platform i]
=====================================
compiler/GHC/Core/Op/ConstantFold.hs
=====================================
@@ -34,7 +34,7 @@ import GHC.Core
import GHC.Core.Make
import GHC.Types.Id
import GHC.Types.Literal
-import GHC.Core.SimpleOpt ( exprIsLiteral_maybe )
+import GHC.Core.SimpleOpt ( exprIsLiteral_maybe, exprIsLambda_maybe )
import PrimOp ( PrimOp(..), tagToEnumKey )
import TysWiredIn
import TysPrim
@@ -67,6 +67,8 @@ import Data.Int
import Data.Ratio
import Data.Word
+import Encoding (utf8DecodeByteString)
+
{-
Note [Constant folding]
~~~~~~~~~~~~~~~~~~~~~~~
@@ -1254,11 +1256,18 @@ builtinRules :: [CoreRule]
builtinRules
= [BuiltinRule { ru_name = fsLit "AppendLitString",
ru_fn = unpackCStringFoldrName,
- ru_nargs = 4, ru_try = match_append_lit },
+ ru_nargs = 4, ru_try = match_append_lit_C },
+ BuiltinRule { ru_name = fsLit "AppendLitStringUtf8",
+ ru_fn = unpackCStringFoldrUtf8Name,
+ ru_nargs = 4, ru_try = match_append_lit_utf8 },
BuiltinRule { ru_name = fsLit "EqString", ru_fn = eqStringName,
ru_nargs = 2, ru_try = match_eq_string },
BuiltinRule { ru_name = fsLit "Inline", ru_fn = inlineIdName,
ru_nargs = 2, ru_try = \_ _ _ -> match_inline },
+
+ -- See Note [Compiling `elem` on Strings] in CString.hs
+ BuiltinRule { ru_name = fsLit "ElemLitString", ru_fn = elemName,
+ ru_nargs = 4, ru_try = match_elem },
BuiltinRule { ru_name = fsLit "MagicDict", ru_fn = idName magicDictId,
ru_nargs = 4, ru_try = \_ _ _ -> match_magicDict },
@@ -1430,11 +1439,20 @@ builtinNaturalRules =
---------------------------------------------------
-- The rule is this:
--- unpackFoldrCString# "foo" c (unpackFoldrCString# "baz" c n)
--- = unpackFoldrCString# "foobaz" c n
+-- unpackFoldrCString*# "foo" c (unpackFoldrCString*# "baz" c n)
+-- = unpackFoldrCString*# "foobaz" c n
+
+-- CString version
+match_append_lit_C :: RuleFun
+match_append_lit_C = match_append_lit unpackCStringFoldrIdKey
+
+-- CStringUTF8 version
+match_append_lit_utf8 :: RuleFun
+match_append_lit_utf8 = match_append_lit unpackCStringFoldrUtf8IdKey
-match_append_lit :: RuleFun
-match_append_lit _ id_unf _
+{-# INLINE match_append_lit #-}
+match_append_lit :: Unique -> RuleFun
+match_append_lit foldVariant _ id_unf _
[ Type ty1
, lit1
, c1
@@ -1447,7 +1465,7 @@ match_append_lit _ id_unf _
`App` lit2
`App` c2
`App` n) <- stripTicksTop tickishFloatable e2
- , unpk `hasKey` unpackCStringFoldrIdKey
+ , unpk `hasKey` foldVariant
, cheapEqExpr' tickishFloatable c1 c2
, (c1Ticks, c1') <- stripTicksTop tickishFloatable c1
, c2Ticks <- stripTicksTopT tickishFloatable c2
@@ -1460,23 +1478,62 @@ match_append_lit _ id_unf _
`App` mkTicks (c1Ticks ++ c2Ticks) c1'
`App` n
-match_append_lit _ _ _ _ = Nothing
+match_append_lit _ _ _ _ _ = Nothing
---------------------------------------------------
-- The rule is this:
-- eqString (unpackCString# (Lit s1)) (unpackCString# (Lit s2)) = s1==s2
+-- Also matches unpackCStringUtf8#
match_eq_string :: RuleFun
match_eq_string _ id_unf _
[Var unpk1 `App` lit1, Var unpk2 `App` lit2]
- | unpk1 `hasKey` unpackCStringIdKey
- , unpk2 `hasKey` unpackCStringIdKey
+ | unpk_key1 <- getUnique unpk1
+ , unpk_key2 <- getUnique unpk2
+ , unpk_key1 == unpk_key2
+ , unpk_key1 `elem` [unpackCStringUtf8IdKey, unpackCStringIdKey]
, Just (LitString s1) <- exprIsLiteral_maybe id_unf lit1
, Just (LitString s2) <- exprIsLiteral_maybe id_unf lit2
= Just (if s1 == s2 then trueValBool else falseValBool)
match_eq_string _ _ _ _ = Nothing
+---------------------------------------------------
+-- The rule is this:
+-- elem x (build ((unpackFoldrCString# "abc"#)))
+-- = case x of
+-- 'a'# -> True
+-- 'b'# -> True
+-- 'c'# -> True
+-- _ -> False
+--
+-- Only matches short string (<20 bytes).
+-- Also matches unpackCStringUtf8#
+-- See Note [Compiling `elem` on Strings] in CString.hs
+
+match_elem :: RuleFun
+match_elem _ id_unf _ [Type ty, _dict, x, xs]
+ | ty `eqType` charTy
+ , ( xsTicks
+ , Var build `App` _char_ty `App` unf_str_app)
+ <- stripTicksTop tickishFloatable xs
+ , Just (_arg, (Var unfold_str `App` _ty2 `App` lit1), lam_ticks)
+ <- exprIsLambda_maybe id_unf unf_str_app
+ , build `hasKey` buildIdKey
+ , getUnique unfold_str `elem` [unpackCStringFoldrIdKey, unpackCStringFoldrUtf8IdKey]
+ , Just (LitString s1) <- exprIsLiteral_maybe id_unf lit1
+ , BS.length s1 <= 32
+ = let elem_string = nubSort $ utf8DecodeByteString s1 :: String
+ x_id = mkWildValBinder charTy :: Id
+ x_prim_id = mkWildValBinder charPrimTy :: Id
+ switch = Case (Var x_prim_id) x_prim_id boolTy
+ ((DEFAULT, [], falseExpr):foldr mkCharAlt [] elem_string)
+ mkCharAlt :: Char -> [CoreAlt] -> [CoreAlt]
+ mkCharAlt c alts = (LitAlt (mkLitChar c), [], trueExpr):alts
+ in Just $ mkTicks (xsTicks ++ lam_ticks)
+ $ Case x x_id boolTy [(DataAlt charDataCon, [x_id], switch)]
+
+match_elem _ _ _ _ = Nothing
---------------------------------------------------
-- The rule is this:
=====================================
compiler/prelude/PrelNames.hs
=====================================
@@ -340,8 +340,8 @@ basicKnownKeyNames
groupWithName,
-- Strings and lists
- unpackCStringName,
- unpackCStringFoldrName, unpackCStringUtf8Name,
+ unpackCStringName, unpackCStringUtf8Name,
+ unpackCStringFoldrName, unpackCStringFoldrUtf8Name,
-- Overloaded lists
isListClassName,
@@ -352,6 +352,7 @@ basicKnownKeyNames
-- List operations
concatName, filterName, mapName,
zipName, foldrName, buildName, augmentName, appendName,
+ elemName,
-- FFI primitive types that are not wired-in.
stablePtrTyConName, ptrTyConName, funPtrTyConName,
@@ -677,9 +678,10 @@ ordClass_RDR = nameRdrName ordClassName
enumClass_RDR = nameRdrName enumClassName
monadClass_RDR = nameRdrName monadClassName
-map_RDR, append_RDR :: RdrName
+map_RDR, append_RDR, elem_RDR :: RdrName
map_RDR = nameRdrName mapName
append_RDR = nameRdrName appendName
+elem_RDR = nameRdrName elemName
foldr_RDR, build_RDR, returnM_RDR, bindM_RDR, failM_RDR
:: RdrName
@@ -712,11 +714,12 @@ ioDataCon_RDR :: RdrName
ioDataCon_RDR = nameRdrName ioDataConName
eqString_RDR, unpackCString_RDR, unpackCStringFoldr_RDR,
- unpackCStringUtf8_RDR :: RdrName
+ unpackCStringFoldrUtf8_RDR, unpackCStringUtf8_RDR :: RdrName
eqString_RDR = nameRdrName eqStringName
unpackCString_RDR = nameRdrName unpackCStringName
unpackCStringFoldr_RDR = nameRdrName unpackCStringFoldrName
unpackCStringUtf8_RDR = nameRdrName unpackCStringUtf8Name
+unpackCStringFoldrUtf8_RDR = nameRdrName unpackCStringFoldrUtf8Name
newStablePtr_RDR :: RdrName
newStablePtr_RDR = nameRdrName newStablePtrName
@@ -1006,11 +1009,12 @@ modIntName = varQual gHC_CLASSES (fsLit "modInt#") modIntIdKey
-- Base strings Strings
unpackCStringName, unpackCStringFoldrName,
- unpackCStringUtf8Name, eqStringName :: Name
+ unpackCStringUtf8Name, unpackCStringFoldrUtf8Name,eqStringName :: Name
unpackCStringName = varQual gHC_CSTRING (fsLit "unpackCString#") unpackCStringIdKey
unpackCStringFoldrName = varQual gHC_CSTRING (fsLit "unpackFoldrCString#") unpackCStringFoldrIdKey
unpackCStringUtf8Name = varQual gHC_CSTRING (fsLit "unpackCStringUtf8#") unpackCStringUtf8IdKey
eqStringName = varQual gHC_BASE (fsLit "eqString") eqStringIdKey
+unpackCStringFoldrUtf8Name = varQual gHC_CSTRING (fsLit "unpackFoldrCStringUtf8#") unpackCStringFoldrUtf8IdKey
-- The 'inline' function
inlineIdName :: Name
@@ -1083,7 +1087,7 @@ groupWithName = varQual gHC_EXTS (fsLit "groupWith") groupWithIdKey
-- Random PrelBase functions
fromStringName, otherwiseIdName, foldrName, buildName, augmentName,
- mapName, appendName, assertName,
+ mapName, appendName, elemName, assertName,
breakpointName, breakpointCondName,
opaqueTyConName, dollarName :: Name
dollarName = varQual gHC_BASE (fsLit "$") dollarIdKey
@@ -1097,6 +1101,7 @@ assertName = varQual gHC_BASE (fsLit "assert") assertIdKey
breakpointName = varQual gHC_BASE (fsLit "breakpoint") breakpointIdKey
breakpointCondName= varQual gHC_BASE (fsLit "breakpointCond") breakpointCondIdKey
opaqueTyConName = tcQual gHC_BASE (fsLit "Opaque") opaqueTyConKey
+elemName = varQual gHC_LIST (fsLit "elem") elemIdKey
fromStringName = varQual dATA_STRING (fsLit "fromString") fromStringClassOpKey
-- PrelTup
@@ -2081,13 +2086,15 @@ unsafeReflDataConKey = mkPreludeDataConUnique 114
-}
wildCardKey, absentErrorIdKey, augmentIdKey, appendIdKey,
+ elemIdKey,
buildIdKey, errorIdKey, foldrIdKey, recSelErrorIdKey,
seqIdKey, eqStringIdKey,
noMethodBindingErrorIdKey, nonExhaustiveGuardsErrorIdKey,
runtimeErrorIdKey, patErrorIdKey, voidPrimIdKey,
realWorldPrimIdKey, recConErrorIdKey,
unpackCStringUtf8IdKey, unpackCStringAppendIdKey,
- unpackCStringFoldrIdKey, unpackCStringIdKey,
+ unpackCStringFoldrIdKey, unpackCStringFoldrUtf8IdKey,
+ unpackCStringIdKey,
typeErrorIdKey, divIntIdKey, modIntIdKey,
absentSumFieldErrorIdKey :: Unique
@@ -2095,27 +2102,29 @@ wildCardKey = mkPreludeMiscIdUnique 0 -- See Note [WildCard
absentErrorIdKey = mkPreludeMiscIdUnique 1
augmentIdKey = mkPreludeMiscIdUnique 2
appendIdKey = mkPreludeMiscIdUnique 3
-buildIdKey = mkPreludeMiscIdUnique 4
-errorIdKey = mkPreludeMiscIdUnique 5
-foldrIdKey = mkPreludeMiscIdUnique 6
-recSelErrorIdKey = mkPreludeMiscIdUnique 7
-seqIdKey = mkPreludeMiscIdUnique 8
-absentSumFieldErrorIdKey = mkPreludeMiscIdUnique 9
-eqStringIdKey = mkPreludeMiscIdUnique 10
-noMethodBindingErrorIdKey = mkPreludeMiscIdUnique 11
-nonExhaustiveGuardsErrorIdKey = mkPreludeMiscIdUnique 12
-runtimeErrorIdKey = mkPreludeMiscIdUnique 13
-patErrorIdKey = mkPreludeMiscIdUnique 14
-realWorldPrimIdKey = mkPreludeMiscIdUnique 15
-recConErrorIdKey = mkPreludeMiscIdUnique 16
-unpackCStringUtf8IdKey = mkPreludeMiscIdUnique 17
-unpackCStringAppendIdKey = mkPreludeMiscIdUnique 18
-unpackCStringFoldrIdKey = mkPreludeMiscIdUnique 19
-unpackCStringIdKey = mkPreludeMiscIdUnique 20
-voidPrimIdKey = mkPreludeMiscIdUnique 21
-typeErrorIdKey = mkPreludeMiscIdUnique 22
-divIntIdKey = mkPreludeMiscIdUnique 23
-modIntIdKey = mkPreludeMiscIdUnique 24
+elemIdKey = mkPreludeMiscIdUnique 4
+buildIdKey = mkPreludeMiscIdUnique 5
+errorIdKey = mkPreludeMiscIdUnique 6
+foldrIdKey = mkPreludeMiscIdUnique 7
+recSelErrorIdKey = mkPreludeMiscIdUnique 8
+seqIdKey = mkPreludeMiscIdUnique 9
+absentSumFieldErrorIdKey = mkPreludeMiscIdUnique 10
+eqStringIdKey = mkPreludeMiscIdUnique 11
+noMethodBindingErrorIdKey = mkPreludeMiscIdUnique 12
+nonExhaustiveGuardsErrorIdKey = mkPreludeMiscIdUnique 13
+runtimeErrorIdKey = mkPreludeMiscIdUnique 14
+patErrorIdKey = mkPreludeMiscIdUnique 15
+realWorldPrimIdKey = mkPreludeMiscIdUnique 16
+recConErrorIdKey = mkPreludeMiscIdUnique 17
+unpackCStringUtf8IdKey = mkPreludeMiscIdUnique 18
+unpackCStringAppendIdKey = mkPreludeMiscIdUnique 19
+unpackCStringFoldrIdKey = mkPreludeMiscIdUnique 20
+unpackCStringFoldrUtf8IdKey = mkPreludeMiscIdUnique 21
+unpackCStringIdKey = mkPreludeMiscIdUnique 22
+voidPrimIdKey = mkPreludeMiscIdUnique 23
+typeErrorIdKey = mkPreludeMiscIdUnique 24
+divIntIdKey = mkPreludeMiscIdUnique 25
+modIntIdKey = mkPreludeMiscIdUnique 26
concatIdKey, filterIdKey, zipIdKey,
bindIOIdKey, returnIOIdKey, newStablePtrIdKey,
=====================================
compiler/utils/Util.hs
=====================================
@@ -637,7 +637,6 @@ ordNub xs
| Set.member x s = go s xs
| otherwise = x : go (Set.insert x s) xs
-
{-
************************************************************************
* *
=====================================
libraries/base/GHC/Base.hs
=====================================
@@ -1618,7 +1618,11 @@ iShiftRL# :: Int# -> Int# -> Int#
a `iShiftRL#` b | isTrue# (b >=# WORD_SIZE_IN_BITS#) = 0#
| otherwise = a `uncheckedIShiftRL#` b
+------------------------------------------------------------------------
-- Rules for C strings (the functions themselves are now in GHC.CString)
+------------------------------------------------------------------------
+
+-- Ascii variants
{-# RULES
"unpack" [~1] forall a . unpackCString# a = build (unpackFoldrCString# a)
"unpack-list" [1] forall a . unpackFoldrCString# a (:) [] = unpackCString# a
@@ -1628,3 +1632,14 @@ a `iShiftRL#` b | isTrue# (b >=# WORD_SIZE_IN_BITS#) = 0#
-- unpackFoldr "foo" c (unpackFoldr "baz" c n) = unpackFoldr "foobaz" c n
#-}
+
+-- Utf8 variants
+{-# RULES
+"unpackUtf8" [~1] forall a . unpackCStringUtf8# a = build (unpackFoldrCStringUtf8# a)
+"unpack-listUtf8" [1] forall a . unpackFoldrCStringUtf8# a (:) [] = unpackCStringUtf8# a
+"unpack-appendUtf8" forall a n . unpackFoldrCStringUtf8# a (:) n = unpackAppendCStringUtf8# a n
+
+-- There's a built-in rule (in PrelRules.hs) for
+-- unpackFoldrUtf8 "föö" c (unpackFoldrUtf8 "bäz" c n) = unpackFoldrUtf8 "fööbäz" c n
+
+ #-}
=====================================
libraries/base/GHC/List.hs
=====================================
@@ -1147,10 +1147,17 @@ elem x = any (== x)
#else
elem _ [] = False
elem x (y:ys) = x==y || elem x ys
-{-# NOINLINE [1] elem #-}
+{-# NOINLINE elem #-}
{-# RULES
-"elem/build" forall x (g :: forall b . Eq a => (a -> b -> b) -> b -> b)
+"elem/build" forall x (g :: forall b . (a -> b -> b) -> b -> b)
. elem x (build g) = g (\ y r -> (x == y) || r) False
+-- See Note [Compiling `elem` on Strings] in CString.hs
+"elem at CString/unpack" [0] forall (x :: Char) (addr :: Addr#)
+ . elem x (unpackCString# addr)
+ = x `elemCString#` addr
+"elem at CStringUtf8/unpack" [0] forall (x :: Char) (addr :: Addr#)
+ . elem x (unpackCStringUtf8# addr)
+ = x `elemCStringUtf8#` addr
#-}
#endif
@@ -1172,10 +1179,17 @@ notElem x = all (/= x)
#else
notElem _ [] = True
notElem x (y:ys)= x /= y && notElem x ys
-{-# NOINLINE [1] notElem #-}
+{-# NOINLINE notElem #-}
{-# RULES
-"notElem/build" forall x (g :: forall b . Eq a => (a -> b -> b) -> b -> b)
+"notElem/build" forall x (g :: forall b . (a -> b -> b) -> b -> b)
. notElem x (build g) = g (\ y r -> (x /= y) && r) True
+-- See Note [Compiling `elem` on Strings] in CString.hs
+"notElem at CString/unpack" [0] forall (x :: Char) (addr :: Addr#)
+ . notElem x (unpackCString# addr)
+ = not (x `elemCString#` addr)
+"notElem at CStringUtf8/unpack" [0] forall (x :: Char) (addr :: Addr#)
+ . notElem x (unpackCStringUtf8# addr)
+ = not (x `elemCStringUtf8#` addr)
#-}
#endif
=====================================
libraries/base/tests/perf/Makefile
=====================================
@@ -0,0 +1,17 @@
+# This Makefile runs the tests using GHC's testsuite framework. It
+# assumes the package is part of a GHC build tree with the testsuite
+# installed in ../../../testsuite.
+
+TOP=../../../../testsuite
+include $(TOP)/mk/boilerplate.mk
+include $(TOP)/mk/test.mk
+
+
+T17752:
+ '$(TEST_HC)' $(TEST_HC_OPTS) -O --make T17752 -rtsopts -ddump-simpl -ddump-to-file -dsuppress-uniques
+ # We only grep for specific patterns to minimize failures when eg core syntax changes
+ # We expect two unfolds using eqChar
+ echo $$(cat T17752.dump-simpl | grep unpackFoldrCString)
+ echo $$(cat T17752.dump-simpl | grep eqChar)
+ # And two pattern matches
+ echo $$(cat T17752.dump-simpl | grep "case x1" -A4 )
=====================================
libraries/base/tests/perf/T17752.hs
=====================================
@@ -0,0 +1,82 @@
+module T17752 where
+
+-- Should compile to unfoldCStr if the rules fire
+isElemLit x = x `elem` "theQuickShinyGHCJumpsOverTheRuleRecursion"
+isNotElemLit x = x `notElem` "_theQuickShinyGHCCompileJumpsOverTheRuleRecursion"
+
+-- Should compile to a pattern match if the rules fire
+isElemList x = x `elem` ['a','b','c']
+isNotElemList x = x `elem` ['x','y','z']
+
+{-
+
+Should the grep tests fail make sure the core still behaves
+like the one below (or better!)
+
+-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
+isElemLit1 = "theQuickShinyGHCJumpsOverTheRuleRecursion"#
+
+-- RHS size: {terms: 20, types: 9, coercions: 0, joins: 0/0}
+isElemLit
+ = \ x ->
+ unpackFoldrCString#
+ isElemLit1
+ (\ y r ->
+ case x of { C# x1 ->
+ case y of { C# y1 ->
+ case eqChar# x1 y1 of {
+ __DEFAULT -> r;
+ 1# -> True
+ }
+ }
+ })
+ False
+
+-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
+isNotElemLit1
+ = "_theQuickShinyGHCCompileJumpsOverTheRuleRecursion"#
+
+-- RHS size: {terms: 25, types: 10, coercions: 0, joins: 0/0}
+isNotElemLit
+ = \ x ->
+ case unpackFoldrCString#
+ isNotElemLit1
+ (\ y r ->
+ case x of { C# x1 ->
+ case y of { C# y1 ->
+ case eqChar# x1 y1 of {
+ __DEFAULT -> r;
+ 1# -> True
+ }
+ }
+ })
+ False
+ of {
+ False -> True;
+ True -> False
+ }
+
+-- RHS size: {terms: 14, types: 4, coercions: 0, joins: 0/0}
+isElemList
+ = \ x ->
+ case x of { C# x1 ->
+ case x1 of {
+ __DEFAULT -> False;
+ 'a'# -> True;
+ 'b'# -> True;
+ 'c'# -> True
+ }
+ }
+
+-- RHS size: {terms: 14, types: 4, coercions: 0, joins: 0/0}
+isNotElemList
+ = \ x ->
+ case x of { C# x1 ->
+ case x1 of {
+ __DEFAULT -> False;
+ 'x'# -> True;
+ 'y'# -> True;
+ 'z'# -> True
+ }
+ }
+-}
=====================================
libraries/base/tests/perf/T17752.stdout
=====================================
@@ -0,0 +1,25 @@
+[1 of 1] Compiling T17752 ( T17752.hs, T17752.o )
+GHC.CString.unpackFoldrCString#
+ case GHC.CString.unpackFoldrCString#
+case GHC.Prim.eqChar# x1 y1 of {
+ case GHC.Prim.eqChar# x1 y1 of {
+case x1 of {
+ __DEFAULT -> GHC.Types.False;
+ 'a'# -> GHC.Types.True;
+ 'b'# -> GHC.Types.True;
+ 'c'# -> GHC.Types.True
+ -- case x1 of {
+ __DEFAULT -> GHC.Types.False;
+ 'a'# -> GHC.Types.True;
+ 'b'# -> GHC.Types.True;
+ 'c'# -> GHC.Types.True
+ -- case x1 of {
+ __DEFAULT -> GHC.Types.False;
+ 'x'# -> GHC.Types.True;
+ 'y'# -> GHC.Types.True;
+ 'z'# -> GHC.Types.True
+ -- case x1 of {
+ __DEFAULT -> GHC.Types.False;
+ 'x'# -> GHC.Types.True;
+ 'y'# -> GHC.Types.True;
+ 'z'# -> GHC.Types.True
=====================================
libraries/base/tests/perf/all.T
=====================================
@@ -0,0 +1,5 @@
+#--------------------------------------
+# Check specialization of elem via rules
+#--------------------------------------
+
+test('T17752', [only_ways(['normal'])] , makefile_test, ['T17752'])
\ No newline at end of file
=====================================
libraries/ghc-prim/GHC/CString.hs
=====================================
@@ -1,4 +1,5 @@
{-# LANGUAGE MagicHash, NoImplicitPrelude, BangPatterns #-}
+{-# OPTIONS -fregs-graph #-} -- Needed for elemCString#, see #17823
-----------------------------------------------------------------------------
-- |
@@ -17,8 +18,16 @@
-----------------------------------------------------------------------------
module GHC.CString (
+ -- * Ascii variants
unpackCString#, unpackAppendCString#, unpackFoldrCString#,
- unpackCStringUtf8#, unpackNBytes#
+ elemCString#,
+
+ -- * Utf variants
+ unpackCStringUtf8#, unpackAppendCStringUtf8#, unpackFoldrCStringUtf8#,
+ elemCStringUtf8#,
+
+ -- * Other
+ unpackNBytes#,
) where
import GHC.Types
@@ -71,8 +80,22 @@ Moreover, we want to make it CONLIKE, so that:
All of this goes for unpackCStringUtf8# too.
-}
-{- Note [unpackCString# iterating over addr]
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+{- Note [NOINLINE for unpackFoldrCString]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Usually the unpack-list rule turns unpackFoldrCString# into unpackCString#
+It also has a BuiltInRule in PrelRules.hs:
+ unpackFoldrCString# "foo" c (unpackFoldrCString# "baz" c n)
+ = unpackFoldrCString# "foobaz" c n
+At one stage I had NOINLINE [0] on the grounds that, unlike
+unpackCString#, there *is* some point in inlining
+unpackFoldrCString#, because we get better code for the
+higher-order function call. BUT there may be a lot of
+literal strings, and making a separate 'unpack' loop for
+each is highly gratuitous. See nofib/real/anna/PrettyPrint.
+
+ Note [unpackCString# iterating over addr]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
When unpacking unpackCString# and friends repeatedly return a cons cell
containing:
@@ -89,6 +112,64 @@ This works since these two expressions will read from the same address.
This way we avoid the need for the thunks to close over both the start of
the string and the current offset, saving a word for each character unpacked.
+
+This has the additional advantage the we can guarantee that only the
+increment will happen in the loop.
+If we use the offset start off with the increment and an addition
+to get the real address. Which might not be optimized aways.
+
+ Note [Compiling `elem` on Strings]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The pattern f c = c `elem` "<someString>" is quite common in
+parsers and the like and can be quite performance critical.
+
+The fastest way to process this pattern is to transform it
+into a case. That is we transform:
+
+ x `elem` "xy"
+=>
+ case x of
+ 'x' -> True
+ 'y' -> True
+ _ -> False
+
+For this reason there is a BUILTIN rule "ElemLitString" which
+performs this rewrite early in the pipeline in case "xy" is built
+from unpacking a string literal of small size.
+
+For long strings however we avoid doing this translation as
+it would impact code size negatively.
+
+However we do not want to fall back to allocating a String and
+then calling elem onto it. So towards the end of the pipeline
+we use rewrite it into a more efficient form working of the
+unboxed string literal.
+
+Implementing this in terms of unpackFoldrCString# is not possible
+without allocating. As the folding function would have to capture
+the element we look for.
+
+Instead we rewrite to a spezialized version of elem, `elemCString#`
+which works over unboxed string literals.
+
+ Note [Inlining of elemCString#]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+We inline both elemCString*# variants in the hope to cancel out the unboxing
+of the otherwise lazy character argument.
+
+For elemCString# we allow inlining the inner loop as it's tiny comming in
+at less than 40bytes on x64. This comes out to about the same size of the
+size overhead we pay for a non-tail call. Not to speak of eliminating the
+runtime overhead of the call.
+
+For elemCStringUtf8# this is different. The whole unicode logic makes it large
+enough to make inlining a bad choice. So we use the magic `noinline` to avoid
+the inner loop containing the utf8 logic to be inlined. Given the additional
+overhead of unicode decoding the call overhead is also less significant so it's
+not as big of a loss.
+
-}
unpackCString# :: Addr# -> [Char]
@@ -111,22 +192,9 @@ unpackAppendCString# addr rest
-- See Note [unpackCString# iterating over addr]
!ch = indexCharOffAddr# addr 0#
-unpackFoldrCString# :: Addr# -> (Char -> a -> a) -> a -> a
-
--- Usually the unpack-list rule turns unpackFoldrCString# into unpackCString#
-
--- It also has a BuiltInRule in GHC.Core.Op.ConstantFold:
--- unpackFoldrCString# "foo" c (unpackFoldrCString# "baz" c n)
--- = unpackFoldrCString# "foobaz" c n
-
+-- See Note [NOINLINE for unpackFoldrCString]
{-# NOINLINE unpackFoldrCString# #-}
--- At one stage I had NOINLINE [0] on the grounds that, unlike
--- unpackCString#, there *is* some point in inlining
--- unpackFoldrCString#, because we get better code for the
--- higher-order function call. BUT there may be a lot of
--- literal strings, and making a separate 'unpack' loop for
--- each is highly gratuitous. See nofib/real/anna/PrettyPrint.
-
+unpackFoldrCString# :: Addr# -> (Char -> a -> a) -> a -> a
unpackFoldrCString# addr f z
| isTrue# (ch `eqChar#` '\0'#) = z
| True = C# ch `f` unpackFoldrCString# (addr `plusAddr#` 1#) f z
@@ -134,32 +202,101 @@ unpackFoldrCString# addr f z
-- See Note [unpackCString# iterating over addr]
!ch = indexCharOffAddr# addr 0#
+-- See Note [Compiling `elem` on Strings]
+-- See Note [Inlining of elemCString#]
+{-# INLINE elemCString# #-}
+elemCString# :: Char -> Addr# -> Bool
+elemCString# c base_addr =
+ let !ch = (indexCharOffAddr# base_addr 0#)
+ in -- We check for end-of-string first to preserve laziness
+ -- of the Char argument.
+ case ch of
+ '\0'# -> False
+ _ -> case c of
+ C# c' -> check c' ch base_addr
+ where
+ -- Invariant: ch != '\NULL'
+ check :: Char# -> Char# -> Addr# -> Bool
+ check c_ub ch addr
+ | isTrue# (ch `eqChar#` c_ub ) = True
+ | True =
+ let !addr' = (addr `plusAddr#` 1#)
+ in case indexCharOffAddr# addr' 0# of
+ '\0'# -> False
+ next_ch -> check c_ub next_ch addr'
+
-- There's really no point in inlining this for the same reasons as
-- unpackCString. See Note [Inlining unpackCString#] above for details.
unpackCStringUtf8# :: Addr# -> [Char]
{-# NOINLINE CONLIKE unpackCStringUtf8# #-}
unpackCStringUtf8# addr
| isTrue# (ch `eqChar#` '\0'# ) = []
- | isTrue# (ch `leChar#` '\x7F'#) = C# ch : unpackCStringUtf8# (addr `plusAddr#` 1#)
- | isTrue# (ch `leChar#` '\xDF'#) =
- let !c = C# (chr# (((ord# ch -# 0xC0#) `uncheckedIShiftL#` 6#) +#
- (ord# (indexCharOffAddr# (addr `plusAddr#` 1#) 0#) -# 0x80#)))
- in c : unpackCStringUtf8# (addr `plusAddr#` 2#)
- | isTrue# (ch `leChar#` '\xEF'#) =
- let !c = C# (chr# (((ord# ch -# 0xE0#) `uncheckedIShiftL#` 12#) +#
- ((ord# (indexCharOffAddr# (addr `plusAddr#` 1#) 0#) -# 0x80#) `uncheckedIShiftL#` 6#) +#
- (ord# (indexCharOffAddr# (addr `plusAddr#` 2#) 0#) -# 0x80#)))
- in c : unpackCStringUtf8# (addr `plusAddr#` 3#)
- | True =
- let !c = C# (chr# (((ord# ch -# 0xF0#) `uncheckedIShiftL#` 18#) +#
- ((ord# (indexCharOffAddr# (addr `plusAddr#` 1#) 0#) -# 0x80#) `uncheckedIShiftL#` 12#) +#
- ((ord# (indexCharOffAddr# (addr `plusAddr#` 2#) 0#) -# 0x80#) `uncheckedIShiftL#` 6#) +#
- (ord# (indexCharOffAddr# (addr `plusAddr#` 3#) 0#) -# 0x80#)))
- in c : unpackCStringUtf8# (addr `plusAddr#` 4#)
+ | True =
+ let !byte_count = getByteCount ch
+ !utf_ch = unpackUtf8Char# byte_count ch addr
+ !addr' = addr `plusBytes` byte_count
+ in C# utf_ch : unpackCStringUtf8# addr'
+ where
+ -- See Note [unpackCString# iterating over addr]
+ !ch = indexCharOffAddr# addr 0#
+
+
+unpackAppendCStringUtf8# :: Addr# -> [Char] -> [Char]
+{-# NOINLINE unpackAppendCStringUtf8# #-}
+ -- See the NOINLINE note on unpackCString#
+unpackAppendCStringUtf8# addr rest
+ | isTrue# (ch `eqChar#` '\0'#) = rest
+ | True =
+ let !byte_count = getByteCount ch
+ !utf_ch = unpackUtf8Char# byte_count ch addr
+ !addr' = (addr `plusBytes` byte_count)
+ in C# utf_ch : unpackAppendCStringUtf8# addr' rest
where
-- See Note [unpackCString# iterating over addr]
!ch = indexCharOffAddr# addr 0#
+-- See Note [NOINLINE for unpackFoldrCString]
+{-# NOINLINE unpackFoldrCStringUtf8# #-}
+unpackFoldrCStringUtf8# :: Addr# -> (Char -> a -> a) -> a -> a
+unpackFoldrCStringUtf8# addr f z
+ | isTrue# (ch `eqChar#` '\0'#) = z
+ | True =
+ let !byte_count = getByteCount ch
+ !utf_ch = unpackUtf8Char# byte_count ch addr
+ !addr' = (addr `plusBytes` byte_count)
+ in C# utf_ch `f` unpackFoldrCStringUtf8# addr' f z
+ where
+ -- See Note [unpackCString# iterating over addr]
+ !ch = indexCharOffAddr# addr 0#
+
+-- See Note [Inlining of elemCString#]
+-- See Note [Compiling `elem` on Strings]
+{-# INLINE elemCStringUtf8# #-}
+elemCStringUtf8# :: Char -> Addr# -> Bool
+elemCStringUtf8# c base_addr =
+ let !ch = (indexCharOffAddr# base_addr 0#)
+ in -- We check for end-of-string first to preserve laziness
+ -- of the Char argument.
+ case ch of
+ '\0'# -> False
+ _ -> case c of
+ C# c' -> elemCStringUtf8_check# c' ch base_addr
+
+-- Local to elemCStringUtf8#, defined at the top to avoid unfolding it
+-- into use sites.
+-- Invariant: ch != '\NULL'
+{-# NOINLINE elemCStringUtf8_check# #-}
+elemCStringUtf8_check# :: Char# -> Char# -> Addr# -> Bool
+elemCStringUtf8_check# c_ub ch addr =
+ let !byte_count = getByteCount ch
+ !utf_ch = unpackUtf8Char# byte_count ch addr
+ !addr' = (addr `plusBytes` byte_count)
+ in if (isTrue# (utf_ch `eqChar#` c_ub))
+ then True
+ else case (indexCharOffAddr# addr' 0#) of
+ '\0'# -> False
+ ch' -> elemCStringUtf8_check# c_ub ch' addr'
+
-- There's really no point in inlining this for the same reasons as
-- unpackCString. See Note [Inlining unpackCString#] above for details.
unpackNBytes# :: Addr# -> Int# -> [Char]
@@ -174,3 +311,61 @@ unpackNBytes# addr len# = unpack [] (len# -# 1#)
case indexCharOffAddr# addr i# of
ch -> unpack (C# ch : acc) (i# -# 1#)
+
+
+------------------------------
+--- UTF8 decoding utilities
+------------------------------
+--
+-- These functions make explizit the logic that was originally
+-- part of unpackCStringUtf8. Since we want the same support for ascii
+-- and non-ascii a variety of functions needs the same logic. Instead
+-- of C&P'in the decoding logic all over we have it here once, and then
+-- force GHC to inline it.
+--
+-- All the overhead of the Bytes argument and calls goes away once all is
+-- said and done. And what remains is readable code in Haskell land and
+-- performant code in the resulting binary.
+
+data Bytes = One | Two | Three | Four
+
+{-# INLINE getByteCount #-}
+getByteCount :: Char# -> Bytes
+getByteCount ch
+ | isTrue# (ch `leChar#` '\x7F'#) = One
+ | isTrue# (ch `leChar#` '\xDF'#) = Two
+ | isTrue# (ch `leChar#` '\xEF'#) = Three
+ | True = Four
+
+{-# INLINE plusBytes #-}
+plusBytes :: Addr# -> Bytes -> Addr#
+plusBytes addr bytes =
+ case bytes of
+ One -> addr `plusAddr#` 1#
+ Two -> addr `plusAddr#` 2#
+ Three -> addr `plusAddr#` 3#
+ Four -> addr `plusAddr#` 4#
+
+-- | Take the current address, read unicode char of the given size.
+-- We obviously want the number of bytes, but we have to read one
+-- byte to determine the number of bytes for the current codepoint
+-- so we might as well reuse it and avoid a read.
+--
+-- Side Note: We don't dare to decode all 4 possibilities at once.
+-- Reading past the end of the addr might trigger an exception.
+-- For this reason we really have to check the width first and only
+-- decode after.
+{-# INLINE unpackUtf8Char# #-}
+unpackUtf8Char# :: Bytes -> Char# -> Addr# -> Char#
+unpackUtf8Char# bytes ch addr =
+ case bytes of
+ One -> ch
+ Two -> (chr# (((ord# ch -# 0xC0#) `uncheckedIShiftL#` 6#) +#
+ (ord# (indexCharOffAddr# (addr `plusAddr#` 1#) 0#) -# 0x80#)))
+ Three -> (chr# (((ord# ch -# 0xE0#) `uncheckedIShiftL#` 12#) +#
+ ((ord# (indexCharOffAddr# (addr `plusAddr#` 1#) 0#) -# 0x80#) `uncheckedIShiftL#` 6#) +#
+ (ord# (indexCharOffAddr# (addr `plusAddr#` 2#) 0#) -# 0x80#)))
+ Four -> (chr# (((ord# ch -# 0xF0#) `uncheckedIShiftL#` 18#) +#
+ ((ord# (indexCharOffAddr# (addr `plusAddr#` 1#) 0#) -# 0x80#) `uncheckedIShiftL#` 12#) +#
+ ((ord# (indexCharOffAddr# (addr `plusAddr#` 2#) 0#) -# 0x80#) `uncheckedIShiftL#` 6#) +#
+ (ord# (indexCharOffAddr# (addr `plusAddr#` 3#) 0#) -# 0x80#)))
=====================================
libraries/ghc-prim/changelog.md
=====================================
@@ -1,3 +1,17 @@
+## 0.6.2 (edit as necessary)
+
+- Shipped with GHC 8.12.1
+
+- Added to `GHC.CString`:
+
+ unpackAppendCStringUtf8# :: Addr# -> [Char] -> [Char]
+ unpackFoldrCStringUtf8# :: Addr# -> (Char -> a -> a) -> a -> a
+ elemCString# :: Char# -> Addr# -> Bool
+ elemCStringUtf8# :: Char# -> Addr# -> Bool
+
+ `elemCString*#` is a version of `elem` specialized for operations
+ over GHC String literals.
+
## 0.6.1 (edit as necessary)
- Shipped with GHC 8.10.1
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/d6c0da2267233daddc6d2f96013ec1394bf588e4
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/d6c0da2267233daddc6d2f96013ec1394bf588e4
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20200401/c2c33d47/attachment-0001.html>
More information about the ghc-commits
mailing list