[Git][ghc/ghc][master] JS: Enable more efficient packing of string data (fixes #24706)

Marge Bot (@marge-bot) gitlab at gitlab.haskell.org
Sat May 4 00:48:21 UTC 2024



Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC


Commits:
ba480026 by Serge S. Gulin at 2024-05-03T20:47:01-04:00
JS: Enable more efficient packing of string data (fixes #24706)

- - - - -


2 changed files:

- compiler/GHC/StgToJS/Linker/Linker.hs
- compiler/GHC/StgToJS/Linker/Utils.hs


Changes:

=====================================
compiler/GHC/StgToJS/Linker/Linker.hs
=====================================
@@ -1284,7 +1284,16 @@ staticDeclStat (StaticInfo global_name static_value _) = jStgStatToJS decl
       StaticUnboxedBool b          -> app "h$p" [toJExpr b]
       StaticUnboxedInt i           -> app "h$p" [toJExpr i]
       StaticUnboxedDouble d        -> app "h$p" [toJExpr (unSaneDouble d)]
-      StaticUnboxedString str      -> app "h$rawStringData" [ValExpr (to_byte_list str)]
+      -- GHCJS used a function wrapper for this:
+      -- StaticUnboxedString str      -> ApplExpr (initStr str) []
+      -- But we are defining it statically for now.
+      StaticUnboxedString str      -> initStr str
       StaticUnboxedStringOffset {} -> 0
 
     to_byte_list = JList . map (Int . fromIntegral) . BS.unpack
+
+    initStr :: BS.ByteString -> JStgExpr
+    initStr str =
+      case decodeModifiedUTF8 str of
+        Just t  -> app "h$encodeModifiedUtf8" [ValExpr (JStr t)]
+        Nothing -> app "h$rawStringData" [ValExpr $ to_byte_list str]


=====================================
compiler/GHC/StgToJS/Linker/Utils.hs
=====================================
@@ -21,6 +21,7 @@ module GHC.StgToJS.Linker.Utils
   , getInstalledPackageLibDirs
   , getInstalledPackageHsLibs
   , commonCppDefs
+  , decodeModifiedUTF8
   )
 where
 
@@ -283,3 +284,15 @@ jsExeFileName dflags
     dropPrefix prefix xs
       | prefix `isPrefixOf` xs = drop (length prefix) xs
       | otherwise              = xs
+
+-- GHC produces string literals in ByteString.
+-- When ByteString has all bytes UTF-8 compatbile we make attempt to
+-- represent it as FastString.
+-- Otherwise (for example when string literal encodes long integers or zero bytes) we
+-- leave it as is.
+-- Having zero bytes points that this literal never was assumed to be a Modified UTF8 compatible.
+decodeModifiedUTF8 :: B.ByteString -> Maybe FastString
+decodeModifiedUTF8 bs
+  | B.any (==0) bs         = Nothing
+  | not $ B.isValidUtf8 bs = Nothing
+  | otherwise              = Just . mkFastStringByteString $ bs



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/ba480026903aa735e63818a64228ab13639ecdc9

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/ba480026903aa735e63818a64228ab13639ecdc9
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240503/13058dc7/attachment-0001.html>


More information about the ghc-commits mailing list