[Git][ghc/ghc][wip/andreask/elem_rule_fix] Fix "build/elem" RULE.

Fri Apr 17 08:55:38 UTC 2020

Andreas Klebinger pushed to branch wip/andreask/elem_rule_fix at Glasgow Haskell Compiler / GHC


Commits:
e9d9e1bb by Andreas Klebinger at 2020-04-17T10:53:39+02:00
Fix "build/elem" RULE.

An redundant constraint prevented the rule from matching.

Fixing this allows a call to elem on a known list to be translated
into a series of equality checks, and eventually a simple case
expression.

Surprisingly this seems to regress elem for strings. To avoid
this we now also allow foldrCString to inline and add an UTF8
variant. This usually results in elem to be compiled to a tight
non-allocating loop over the primitive string literal which
performs a linear search.

In the process this commit adds UTF8 variants for some of the
functions in CString. This is required to make this work for
both ASCII and UTF8 strings.

- - - - -


9 changed files:

- libraries/base/GHC/Base.hs
- libraries/base/GHC/List.hs
- libraries/base/changelog.md
- + libraries/base/tests/perf/Makefile
- + libraries/base/tests/perf/T17752.hs
- + libraries/base/tests/perf/T17752.stdout
- + libraries/base/tests/perf/all.T
- libraries/ghc-prim/GHC/CString.hs
- libraries/ghc-prim/changelog.md


Changes:

=====================================
libraries/base/GHC/Base.hs
=====================================
@@ -1624,6 +1624,10 @@ a `iShiftRL#` b | isTrue# (b >=# WORD_SIZE_IN_BITS#) = 0#
 "unpack-list"  [1]  forall a   . unpackFoldrCString# a (:) [] = unpackCString# a
 "unpack-append"     forall a n . unpackFoldrCString# a (:) n  = unpackAppendCString# a n
 
+"unpack-utf8"       [~1] forall a   . unpackCStringUtf8# a             = build (unpackFoldrCStringUtf8# a)
+"unpack-list-utf8"  [1]  forall a   . unpackFoldrCStringUtf8# a (:) [] = unpackCStringUtf8# a
+"unpack-append-utf8"     forall a n . unpackFoldrCStringUtf8# a (:) n  = unpackAppendCStringUtf8# a n
+
 -- There's a built-in rule (in GHC.Core.Op.ConstantFold) for
 --      unpackFoldr "foo" c (unpackFoldr "baz" c n)  =  unpackFoldr "foobaz" c n
 


=====================================
libraries/base/GHC/List.hs
=====================================
@@ -1149,7 +1149,7 @@ elem _ []       = False
 elem x (y:ys)   = x==y || elem x ys
 {-# NOINLINE [1] elem #-}
 {-# RULES
-"elem/build"    forall x (g :: forall b . Eq a => (a -> b -> b) -> b -> b)
+"elem/build"    forall x (g :: forall b . (a -> b -> b) -> b -> b)
    . elem x (build g) = g (\ y r -> (x == y) || r) False
  #-}
 #endif
@@ -1174,7 +1174,7 @@ notElem _ []    =  True
 notElem x (y:ys)=  x /= y && notElem x ys
 {-# NOINLINE [1] notElem #-}
 {-# RULES
-"notElem/build" forall x (g :: forall b . Eq a => (a -> b -> b) -> b -> b)
+"notElem/build" forall x (g :: forall b . (a -> b -> b) -> b -> b)
    . notElem x (build g) = g (\ y r -> (x /= y) && r) True
  #-}
 #endif


=====================================
libraries/base/changelog.md
=====================================
@@ -11,7 +11,9 @@
 
   * Add `singleton` function for `Data.List.NonEmpty`.
 
-
+  * An issue with list fusion and `elem` was fixed. `elem` applied to known
+    small lists will now compile to a simple case statement more often.
+   
 ## 4.14.0.0 *TBA*
   * Bundled with GHC 8.10.1
 


=====================================
libraries/base/tests/perf/Makefile
=====================================
@@ -0,0 +1,15 @@
+# This Makefile runs the tests using GHC's testsuite framework.  It
+# assumes the package is part of a GHC build tree with the testsuite
+# installed in ../../../testsuite.
+
+TOP=../../../../testsuite
+include $(TOP)/mk/boilerplate.mk
+include $(TOP)/mk/test.mk
+
+
+T17752:
+	'$(TEST_HC)' $(TEST_HC_OPTS) -O --make T17752 -rtsopts -ddump-simpl -ddump-to-file -dsuppress-uniques -dsuppress-all
+	# All occurences of elem should be optimized away.
+	# For strings these should result in loops after inlining foldCString.
+	# For lists it should result in a case expression.
+	echo $$(cat T17752.dump-simpl | grep "elem" -A4 )


=====================================
libraries/base/tests/perf/T17752.hs
=====================================
@@ -0,0 +1,18 @@
+module T17752 where
+
+--  All occurences of elem should be optimized away.
+-- 	 For strings these should result in loops after inlining foldCString.
+-- 	 For lists it should result in a case expression.
+
+-- Should compile to a pattern match if the rules fire
+isElemList    x = x `elem` ['a','b','c']
+isNotElemList x = x `elem` ['x','y','z']
+
+isOneOfThese x = x `elem` [1,2,3,4,5::Int]
+isNotOneOfThese x = x `notElem` [1,2,3,4,5::Int]
+
+isElemString x = elem x "foo"
+isNotElemString x = notElem x "bar"
+
+isElemStringUtf x = elem x "foö"
+isNotElemStringUtf x = notElem x "bär"


=====================================
libraries/base/tests/perf/T17752.stdout
=====================================
@@ -0,0 +1,2 @@
+[1 of 1] Compiling T17752           ( T17752.hs, T17752.o )
+


=====================================
libraries/base/tests/perf/all.T
=====================================
@@ -0,0 +1,5 @@
+#--------------------------------------
+# Check specialization of elem via rules
+#--------------------------------------
+
+test('T17752', [only_ways(['normal'])] , makefile_test, ['T17752'])
\ No newline at end of file


=====================================
libraries/ghc-prim/GHC/CString.hs
=====================================
@@ -17,13 +17,77 @@
 -----------------------------------------------------------------------------
 
 module GHC.CString (
+        -- * Ascii variants
         unpackCString#, unpackAppendCString#, unpackFoldrCString#,
-        unpackCStringUtf8#, unpackNBytes#
+
+        -- * Utf variants
+        unpackCStringUtf8#, unpackAppendCStringUtf8#, unpackFoldrCStringUtf8#,
+
+        -- * Other
+        unpackNBytes#,
     ) where
 
 import GHC.Types
 import GHC.Prim
 
+{-
+Note [String literals in GHC]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+String literals get quite a bit of special handling in GHC.  This Note
+summarises the moving parts.
+
+* Desugaring: see GHC.HsToCore.Match.Literal.dsLit, which in
+  turn calls GHC.Core.Make.mkStringExprFS.
+
+  The desugarer desugars the Haskell literal "foo" into Core
+     GHC.CString.unpackCString# "foo"#
+  where "foo"# is primitive string literal (of type Addr#).
+
+  When the string cannot be encoded as a C string, we use UTF8:
+     GHC.CString.unpackCStringUtf8# "foo"#
+
+* The library module ghc-prim:GHC.CString has a bunch of functions that
+  work over primitive strings, including GHC.CString.unpackCString#
+
+* GHC.Core.Op.ConstantFold has some RULES that optimise certain string
+  operations on literal strings. For example:
+
+    + Constant folding the desugared form of ("foo" ++ "bar")
+      into ("foobar")
+    + Comparing strings
+    + and more
+
+* GHC.Base has a number of regular rules for String literals.
+
+  + a rule "eqString": (==) @String = eqString
+    where GHC.Base.eqString :: String -> String -> Bool
+
+    ConstantFold has a RULE for eqString on literals:
+     eqString (Lit "foo"#) (Lit "bar"#) --> False
+
+    This allows compile time evaluation of things like "foo" == "bar"
+
+  + A bunch of rules to promote fusion:
+
+    "unpack"       [~1] forall a   . unpackCString# a             = build (unpackFoldrCString# a)
+    "unpack-list"  [1]  forall a   . unpackFoldrCString# a (:) [] = unpackCString# a
+    "unpack-append"     forall a n . unpackFoldrCString# a (:) n  = unpackAppendCString# a n
+
+    And UTF8 variants of these rules.
+
+* We allow primitive (unlifted) literal strings to be top-level
+  bindings, breaking out usual rule.  See GHC.Core
+  Note [Core top-level string literals]
+
+* TODO: There is work on a special code-gen path for top-level boxed strings
+     str :: [Char]
+     str = unpackCString# "foo"#
+  so that they can all share a common code pointer
+
+  There is a WIP MR on gitlab for this: !3012
+
+-}
+
 -----------------------------------------------------------------------------
 -- Unpacking C strings
 -----------------------------------------------------------------------------
@@ -71,8 +135,27 @@ Moreover, we want to make it CONLIKE, so that:
 All of this goes for unpackCStringUtf8# too.
 -}
 
-{- Note [unpackCString# iterating over addr]
-   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+{- Note [Inlining of unpackFoldrCString]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Usually the unpack-list rule turns unpackFoldrCString# into unpackCString#
+It also has a BuiltInRule in PrelRules.hs:
+     unpackFoldrCString# "foo" c (unpackFoldrCString# "baz" c n)
+       =  unpackFoldrCString# "foobaz" c n
+
+We use NOINLINE [0] on the grounds that, unlike
+unpackCString#, there *is* some point in inlining
+unpackFoldrCString#, because we get better code for the
+higher-order function call.
+
+This can cause a code size increase but it was minimal
+when looking at nofib.
+
+This is especially important for elem which then results in an
+allocation free loop.
+
+  Note [unpackCString# iterating over addr]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 When unpacking unpackCString# and friends repeatedly return a cons cell
 containing:
@@ -89,6 +172,10 @@ This works since these two expressions will read from the same address.
 
 This way we avoid the need for the thunks to close over both the start of
 the string and the current offset, saving a word for each character unpacked.
+
+This has the additional advantage the we can guarantee that only the
+increment will happen in the loop.
+
 -}
 
 unpackCString# :: Addr# -> [Char]
@@ -111,28 +198,17 @@ unpackAppendCString# addr rest
         -- See Note [unpackCString# iterating over addr]
         !ch = indexCharOffAddr# addr 0#
 
-unpackFoldrCString# :: Addr# -> (Char  -> a -> a) -> a -> a
-

--- Usually the unpack-list rule turns unpackFoldrCString# into unpackCString#
-
--- It also has a BuiltInRule in GHC.Core.Op.ConstantFold:
---      unpackFoldrCString# "foo" c (unpackFoldrCString# "baz" c n)
---        =  unpackFoldrCString# "foobaz" c n
-
-{-# NOINLINE unpackFoldrCString# #-}
--- At one stage I had NOINLINE [0] on the grounds that, unlike
--- unpackCString#, there *is* some point in inlining
--- unpackFoldrCString#, because we get better code for the
--- higher-order function call.  BUT there may be a lot of
--- literal strings, and making a separate 'unpack' loop for
--- each is highly gratuitous.  See nofib/real/anna/PrettyPrint.
-
-unpackFoldrCString# addr f z
-  | isTrue# (ch `eqChar#` '\0'#) = z
-  | True                         = C# ch `f` unpackFoldrCString# (addr `plusAddr#` 1#) f z
+-- See [Inlining of unpackFoldrCString]
+{-# NOINLINE[0] unpackFoldrCString# #-}
+unpackFoldrCString# :: Addr# -> (Char -> a -> a) -> a -> a
+unpackFoldrCString# str f z_init = go str z_init
   where
-    -- See Note [unpackCString# iterating over addr]
-    !ch = indexCharOffAddr# addr 0#
+    go addr z
+      | isTrue# (ch `eqChar#` '\0'#) = z
+      | True                         = C# ch `f` go (addr `plusAddr#` 1#) z
+      where
+        -- See Note [unpackCString# iterating over addr]
+        !ch = indexCharOffAddr# addr 0#
 
 -- There's really no point in inlining this for the same reasons as
 -- unpackCString. See Note [Inlining unpackCString#] above for details.
@@ -140,22 +216,43 @@ unpackCStringUtf8# :: Addr# -> [Char]
 {-# NOINLINE CONLIKE unpackCStringUtf8# #-}
 unpackCStringUtf8# addr
     | isTrue# (ch `eqChar#` '\0'#  ) = []
-    | isTrue# (ch `leChar#` '\x7F'#) = C# ch : unpackCStringUtf8# (addr `plusAddr#` 1#)
-    | isTrue# (ch `leChar#` '\xDF'#) =
-        let !c = C# (chr# (((ord# ch                                  -# 0xC0#) `uncheckedIShiftL#`  6#) +#
-                            (ord# (indexCharOffAddr# (addr `plusAddr#` 1#) 0#) -# 0x80#)))
-        in c : unpackCStringUtf8# (addr `plusAddr#` 2#)
-    | isTrue# (ch `leChar#` '\xEF'#) =
-        let !c = C# (chr# (((ord# ch                                             -# 0xE0#) `uncheckedIShiftL#` 12#) +#
-                           ((ord# (indexCharOffAddr# (addr `plusAddr#` 1#) 0#) -# 0x80#) `uncheckedIShiftL#`  6#) +#
-                            (ord# (indexCharOffAddr# (addr `plusAddr#` 2#) 0#) -# 0x80#)))
-        in c : unpackCStringUtf8# (addr `plusAddr#` 3#)
-    | True                           =
-        let !c = C# (chr# (((ord# ch                                  -# 0xF0#) `uncheckedIShiftL#` 18#) +#
-                           ((ord# (indexCharOffAddr# (addr `plusAddr#` 1#) 0#) -# 0x80#) `uncheckedIShiftL#` 12#) +#
-                           ((ord# (indexCharOffAddr# (addr `plusAddr#` 2#) 0#) -# 0x80#) `uncheckedIShiftL#`  6#) +#
-                            (ord# (indexCharOffAddr# (addr `plusAddr#` 3#) 0#) -# 0x80#)))
-        in c : unpackCStringUtf8# (addr `plusAddr#` 4#)
+    | True =
+        let !byte_count = getByteCount ch
+            !utf_ch = unpackUtf8Char# byte_count ch addr
+            !addr' = addr `plusBytes` byte_count
+        in  C# utf_ch : unpackCStringUtf8# addr'
+      where
+        -- See Note [unpackCString# iterating over addr]
+        !ch = indexCharOffAddr# addr 0#
+
+
+unpackAppendCStringUtf8# :: Addr# -> [Char] -> [Char]
+{-# NOINLINE unpackAppendCStringUtf8# #-}
+     -- See the NOINLINE note on unpackCString#
+unpackAppendCStringUtf8# addr rest
+    | isTrue# (ch `eqChar#` '\0'#) = rest
+    | True =
+        let !byte_count = getByteCount ch
+            !utf_ch = unpackUtf8Char# byte_count ch addr
+            !addr' = (addr `plusBytes` byte_count)
+        in  C# utf_ch : unpackAppendCStringUtf8# addr' rest
+      where
+        -- See Note [unpackCString# iterating over addr]
+        !ch = indexCharOffAddr# addr 0#
+
+-- See Note [Inlining of unpackFoldrCString]
+{-# NOINLINE[0] unpackFoldrCStringUtf8# #-}
+unpackFoldrCStringUtf8# :: Addr# -> (Char -> a -> a) -> a -> a
+unpackFoldrCStringUtf8# addr_init f z_init
+  = go addr_init z_init
+  where
+    go addr z
+      | isTrue# (ch `eqChar#` '\0'#) = z
+      | True =
+          let !byte_count = getByteCount ch
+              !utf_ch = unpackUtf8Char# byte_count ch addr
+              !addr' = (addr `plusBytes` byte_count)
+          in C# utf_ch `f` go addr' z
       where
         -- See Note [unpackCString# iterating over addr]
         !ch = indexCharOffAddr# addr 0#
@@ -174,3 +271,61 @@ unpackNBytes#  addr len# = unpack [] (len# -# 1#)
          case indexCharOffAddr# addr i# of
             ch -> unpack (C# ch : acc) (i# -# 1#)
 
+
+
+------------------------------
+--- UTF8 decoding utilities
+------------------------------
+--
+-- These functions make explicit the logic that was originally
+-- part of unpackCStringUtf8. Since we want the same support for ascii
+-- and non-ascii a variety of functions needs the same logic. Instead
+-- of C&P'in the decoding logic all over we have it here once, and then
+-- force GHC to inline it.
+--
+-- All the overhead of the Bytes argument and calls goes away once all is
+-- said and done. And what remains is readable code in Haskell land and
+-- performant code in the resulting binary.
+
+data Bytes = One | Two | Three | Four
+
+{-# INLINE getByteCount #-}
+getByteCount :: Char# -> Bytes
+getByteCount ch
+    | isTrue# (ch `leChar#` '\x7F'#) = One
+    | isTrue# (ch `leChar#` '\xDF'#) = Two
+    | isTrue# (ch `leChar#` '\xEF'#) = Three
+    | True                           = Four
+
+{-# INLINE plusBytes #-}
+plusBytes :: Addr# -> Bytes -> Addr#
+plusBytes addr bytes =
+  case bytes of
+    One   -> addr `plusAddr#` 1#
+    Two   -> addr `plusAddr#` 2#
+    Three -> addr `plusAddr#` 3#
+    Four  -> addr `plusAddr#` 4#
+
+-- | Take the current address, read unicode char of the given size.
+-- We obviously want the number of bytes, but we have to read one
+-- byte to determine the number of bytes for the current codepoint
+-- so we might as well reuse it and avoid a read.
+--
+-- Side Note: We don't dare to decode all 4 possibilities at once.
+-- Reading past the end of the addr might trigger an exception.
+-- For this reason we really have to check the width first and only
+-- decode after.
+{-# INLINE unpackUtf8Char# #-}
+unpackUtf8Char# :: Bytes -> Char# -> Addr# -> Char#
+unpackUtf8Char# bytes ch addr =
+  case bytes of
+    One -> ch
+    Two ->   (chr# (((ord# ch                                           -# 0xC0#) `uncheckedIShiftL#`  6#) +#
+                     (ord# (indexCharOffAddr# (addr `plusAddr#` 1#) 0#) -# 0x80#)))
+    Three -> (chr# (((ord# ch                                           -# 0xE0#) `uncheckedIShiftL#` 12#) +#
+                    ((ord# (indexCharOffAddr# (addr `plusAddr#` 1#) 0#) -# 0x80#) `uncheckedIShiftL#`  6#) +#
+                     (ord# (indexCharOffAddr# (addr `plusAddr#` 2#) 0#) -# 0x80#)))
+    Four ->  (chr# (((ord# ch                                           -# 0xF0#) `uncheckedIShiftL#` 18#) +#
+                    ((ord# (indexCharOffAddr# (addr `plusAddr#` 1#) 0#) -# 0x80#) `uncheckedIShiftL#` 12#) +#
+                    ((ord# (indexCharOffAddr# (addr `plusAddr#` 2#) 0#) -# 0x80#) `uncheckedIShiftL#`  6#) +#
+                     (ord# (indexCharOffAddr# (addr `plusAddr#` 3#) 0#) -# 0x80#)))


=====================================
libraries/ghc-prim/changelog.md
=====================================
@@ -1,3 +1,15 @@
+## 0.6.2 (edit as necessary)
+
+- Shipped with GHC 8.12.1
+
+- In order to support unicode better the following functions in `GHC.CString`
+  gained UTF8 counterparts:
+
+        unpackAppendCStringUtf8# :: Addr# -> [Char] -> [Char]
+        unpackFoldrCStringUtf8# :: Addr# -> (Char -> a -> a) -> a -> a
+
+- unpackFoldrCString* variants can now inline in phase [0].
+
 ## 0.6.1 (edit as necessary)
 
 - Shipped with GHC 8.10.1



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/e9d9e1bb4115a83b0f9130c63f248bc1a97590a7

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/e9d9e1bb4115a83b0f9130c63f248bc1a97590a7
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20200417/2e5d5227/attachment-0001.html>