[commit: ghc] master: ghc-prim: Don't allocate a thunk for each unpacked UTF-8 character (85a295d)
git at git.haskell.org
git at git.haskell.org
Mon Jul 24 23:36:44 UTC 2017
Repository : ssh://git@git.haskell.org/ghc
On branch : master
Link : http://ghc.haskell.org/trac/ghc/changeset/85a295d5607b5f8015bb3517601ced0d1adc29ef/ghc
>---------------------------------------------------------------
commit 85a295d5607b5f8015bb3517601ced0d1adc29ef
Author: Ben Gamari <bgamari.foss at gmail.com>
Date: Mon Jul 24 19:01:58 2017 -0400
ghc-prim: Don't allocate a thunk for each unpacked UTF-8 character
While debugging #14005 I noticed that unpackCStringUtf8# was allocating
a thunk for each Unicode character that it unpacked. This seems hardly
worthwhile given that the thunk's closure will be at least three words,
whereas the Char itself will be only two and requires only a bit of bit
twiddling to construct.
Test Plan: Validate
Reviewers: simonmar, austin
Subscribers: dfeuer, rwbarton, thomie
Differential Revision: https://phabricator.haskell.org/D3769
>---------------------------------------------------------------
85a295d5607b5f8015bb3517601ced0d1adc29ef
libraries/ghc-prim/GHC/CString.hs | 28 ++++++++++++++++------------
1 file changed, 16 insertions(+), 12 deletions(-)
diff --git a/libraries/ghc-prim/GHC/CString.hs b/libraries/ghc-prim/GHC/CString.hs
index cdda2db..0e6199f 100644
--- a/libraries/ghc-prim/GHC/CString.hs
+++ b/libraries/ghc-prim/GHC/CString.hs
@@ -125,24 +125,28 @@ unpackCStringUtf8# :: Addr# -> [Char]
unpackCStringUtf8# addr
= unpack 0#
where
+ -- We take care to strictly evaluate the character decoding as
+ -- indexCharOffAddr# is marked with the can_fail flag and
+ -- consequently GHC won't evaluate the expression unless it is absolutely
+ -- needed.
unpack nh
| isTrue# (ch `eqChar#` '\0'# ) = []
| isTrue# (ch `leChar#` '\x7F'#) = C# ch : unpack (nh +# 1#)
| isTrue# (ch `leChar#` '\xDF'#) =
- C# (chr# (((ord# ch -# 0xC0#) `uncheckedIShiftL#` 6#) +#
- (ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#))) :
- unpack (nh +# 2#)
+ let !c = C# (chr# (((ord# ch -# 0xC0#) `uncheckedIShiftL#` 6#) +#
+ (ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#)))
+ in c : unpack (nh +# 2#)
| isTrue# (ch `leChar#` '\xEF'#) =
- C# (chr# (((ord# ch -# 0xE0#) `uncheckedIShiftL#` 12#) +#
- ((ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#) `uncheckedIShiftL#` 6#) +#
- (ord# (indexCharOffAddr# addr (nh +# 2#)) -# 0x80#))) :
- unpack (nh +# 3#)
+ let !c = C# (chr# (((ord# ch -# 0xE0#) `uncheckedIShiftL#` 12#) +#
+ ((ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#) `uncheckedIShiftL#` 6#) +#
+ (ord# (indexCharOffAddr# addr (nh +# 2#)) -# 0x80#)))
+ in c : unpack (nh +# 3#)
| True =
- C# (chr# (((ord# ch -# 0xF0#) `uncheckedIShiftL#` 18#) +#
- ((ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#) `uncheckedIShiftL#` 12#) +#
- ((ord# (indexCharOffAddr# addr (nh +# 2#)) -# 0x80#) `uncheckedIShiftL#` 6#) +#
- (ord# (indexCharOffAddr# addr (nh +# 3#)) -# 0x80#))) :
- unpack (nh +# 4#)
+ let !c = C# (chr# (((ord# ch -# 0xF0#) `uncheckedIShiftL#` 18#) +#
+ ((ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#) `uncheckedIShiftL#` 12#) +#
+ ((ord# (indexCharOffAddr# addr (nh +# 2#)) -# 0x80#) `uncheckedIShiftL#` 6#) +#
+ (ord# (indexCharOffAddr# addr (nh +# 3#)) -# 0x80#)))
+ in c : unpack (nh +# 4#)
where
!ch = indexCharOffAddr# addr nh
More information about the ghc-commits
mailing list