[commit: ghc] wip/annotate-core: ghc-prim: Don't allocate a thunk for each unpacked UTF-8 character (60c3063)
git at git.haskell.org
git at git.haskell.org
Tue Jul 25 17:54:19 UTC 2017
Repository : ssh://git@git.haskell.org/ghc
On branch : wip/annotate-core
Link : http://ghc.haskell.org/trac/ghc/changeset/60c306369c8344f378707894039c3901788dafb4/ghc
>---------------------------------------------------------------
commit 60c306369c8344f378707894039c3901788dafb4
Author: Ben Gamari <ben at smart-cactus.org>
Date: Fri Jul 21 12:00:48 2017 -0400
ghc-prim: Don't allocate a thunk for each unpacked UTF-8 character
While debugging #14005 I noticed that unpackCStringUtf8# was allocating a thunk
for each Unicode character that it unpacked. This seems hardly worthwhile given
that the thunk's closure will be at least three words, whereas the Char itself
will be only two and requires only a bit of bit twiddling to construct.
>---------------------------------------------------------------
60c306369c8344f378707894039c3901788dafb4
libraries/ghc-prim/GHC/CString.hs | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/libraries/ghc-prim/GHC/CString.hs b/libraries/ghc-prim/GHC/CString.hs
index cdda2db..e739af7 100644
--- a/libraries/ghc-prim/GHC/CString.hs
+++ b/libraries/ghc-prim/GHC/CString.hs
@@ -129,20 +129,20 @@ unpackCStringUtf8# addr
| isTrue# (ch `eqChar#` '\0'# ) = []
| isTrue# (ch `leChar#` '\x7F'#) = C# ch : unpack (nh +# 1#)
| isTrue# (ch `leChar#` '\xDF'#) =
- C# (chr# (((ord# ch -# 0xC0#) `uncheckedIShiftL#` 6#) +#
- (ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#))) :
- unpack (nh +# 2#)
+ let !c = C# (chr# (((ord# ch -# 0xC0#) `uncheckedIShiftL#` 6#) +#
+ (ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#)))
+ in c : unpack (nh +# 2#)
| isTrue# (ch `leChar#` '\xEF'#) =
- C# (chr# (((ord# ch -# 0xE0#) `uncheckedIShiftL#` 12#) +#
- ((ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#) `uncheckedIShiftL#` 6#) +#
- (ord# (indexCharOffAddr# addr (nh +# 2#)) -# 0x80#))) :
- unpack (nh +# 3#)
+ let !c = C# (chr# (((ord# ch -# 0xE0#) `uncheckedIShiftL#` 12#) +#
+ ((ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#) `uncheckedIShiftL#` 6#) +#
+ (ord# (indexCharOffAddr# addr (nh +# 2#)) -# 0x80#)))
+ in c : unpack (nh +# 3#)
| True =
- C# (chr# (((ord# ch -# 0xF0#) `uncheckedIShiftL#` 18#) +#
- ((ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#) `uncheckedIShiftL#` 12#) +#
- ((ord# (indexCharOffAddr# addr (nh +# 2#)) -# 0x80#) `uncheckedIShiftL#` 6#) +#
- (ord# (indexCharOffAddr# addr (nh +# 3#)) -# 0x80#))) :
- unpack (nh +# 4#)
+ let !c = C# (chr# (((ord# ch -# 0xF0#) `uncheckedIShiftL#` 18#) +#
+ ((ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#) `uncheckedIShiftL#` 12#) +#
+ ((ord# (indexCharOffAddr# addr (nh +# 2#)) -# 0x80#) `uncheckedIShiftL#` 6#) +#
+ (ord# (indexCharOffAddr# addr (nh +# 3#)) -# 0x80#)))
+ in c : unpack (nh +# 4#)
where
!ch = indexCharOffAddr# addr nh
More information about the ghc-commits
mailing list