[commit: ghc] wip/annotate-core: ghc-prim: Don't allocate a thunk for each unpacked UTF-8 character (60c3063)

git at git.haskell.org git at git.haskell.org
Tue Jul 25 17:54:19 UTC 2017


Repository : ssh://git@git.haskell.org/ghc

On branch  : wip/annotate-core
Link       : http://ghc.haskell.org/trac/ghc/changeset/60c306369c8344f378707894039c3901788dafb4/ghc

>---------------------------------------------------------------

commit 60c306369c8344f378707894039c3901788dafb4
Author: Ben Gamari <ben at smart-cactus.org>
Date:   Fri Jul 21 12:00:48 2017 -0400

    ghc-prim: Don't allocate a thunk for each unpacked UTF-8 character
    
    While debugging #14005 I noticed that unpackCStringUtf8# was allocating a thunk
    for each Unicode character that it unpacked. This seems hardly worthwhile given
    that the thunk's closure will be at least three words, whereas the Char itself
    will be only two and requires only a bit of bit twiddling to construct.


>---------------------------------------------------------------

60c306369c8344f378707894039c3901788dafb4
 libraries/ghc-prim/GHC/CString.hs | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/libraries/ghc-prim/GHC/CString.hs b/libraries/ghc-prim/GHC/CString.hs
index cdda2db..e739af7 100644
--- a/libraries/ghc-prim/GHC/CString.hs
+++ b/libraries/ghc-prim/GHC/CString.hs
@@ -129,20 +129,20 @@ unpackCStringUtf8# addr
       | isTrue# (ch `eqChar#` '\0'#  ) = []
       | isTrue# (ch `leChar#` '\x7F'#) = C# ch : unpack (nh +# 1#)
       | isTrue# (ch `leChar#` '\xDF'#) =
-          C# (chr# (((ord# ch                                  -# 0xC0#) `uncheckedIShiftL#`  6#) +#
-                     (ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#))) :
-          unpack (nh +# 2#)
+          let !c = C# (chr# (((ord# ch                                  -# 0xC0#) `uncheckedIShiftL#`  6#) +#
+                              (ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#)))
+          in c : unpack (nh +# 2#)
       | isTrue# (ch `leChar#` '\xEF'#) =
-          C# (chr# (((ord# ch                                  -# 0xE0#) `uncheckedIShiftL#` 12#) +#
-                    ((ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#) `uncheckedIShiftL#`  6#) +#
-                     (ord# (indexCharOffAddr# addr (nh +# 2#)) -# 0x80#))) :
-          unpack (nh +# 3#)
+          let !c = C# (chr# (((ord# ch                                  -# 0xE0#) `uncheckedIShiftL#` 12#) +#
+                             ((ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#) `uncheckedIShiftL#`  6#) +#
+                              (ord# (indexCharOffAddr# addr (nh +# 2#)) -# 0x80#)))
+          in c : unpack (nh +# 3#)
       | True                           =
-          C# (chr# (((ord# ch                                  -# 0xF0#) `uncheckedIShiftL#` 18#) +#
-                    ((ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#) `uncheckedIShiftL#` 12#) +#
-                    ((ord# (indexCharOffAddr# addr (nh +# 2#)) -# 0x80#) `uncheckedIShiftL#`  6#) +#
-                     (ord# (indexCharOffAddr# addr (nh +# 3#)) -# 0x80#))) :
-          unpack (nh +# 4#)
+          let !c = C# (chr# (((ord# ch                                  -# 0xF0#) `uncheckedIShiftL#` 18#) +#
+                             ((ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#) `uncheckedIShiftL#` 12#) +#
+                             ((ord# (indexCharOffAddr# addr (nh +# 2#)) -# 0x80#) `uncheckedIShiftL#`  6#) +#
+                              (ord# (indexCharOffAddr# addr (nh +# 3#)) -# 0x80#)))
+          in c : unpack (nh +# 4#)
       where
         !ch = indexCharOffAddr# addr nh
 



More information about the ghc-commits mailing list