[Git][ghc/ghc][master] Fix z-encoding of tuples (#25364)

Marge Bot (@marge-bot) gitlab at gitlab.haskell.org
Sat Oct 12 03:46:51 UTC 2024



Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC


Commits:
9291c125 by Sylvain Henry at 2024-10-11T23:46:26-04:00
Fix z-encoding of tuples (#25364)

Tuples with prefix/suffix strings weren't always properly encoded with
their shortcut notations. Fix this.

- - - - -


4 changed files:

- libraries/ghc-boot/GHC/Utils/Encoding.hs
- + testsuite/tests/codeGen/should_run/T25364.hs
- + testsuite/tests/codeGen/should_run/T25364.stdout
- testsuite/tests/codeGen/should_run/all.T


Changes:

=====================================
libraries/ghc-boot/GHC/Utils/Encoding.hs
=====================================
@@ -1,4 +1,5 @@
 {-# LANGUAGE CPP #-}
+{-# LANGUAGE LambdaCase #-}
 {-# LANGUAGE BangPatterns, MagicHash, UnboxedTuples, MultiWayIf #-}
 {-# OPTIONS_GHC -O2 -fno-warn-name-shadowing #-}
 -- We always optimise this, otherwise performance of a non-optimised
@@ -88,14 +89,47 @@ type EncodedString = String     -- Encoded form
 
 
 zEncodeString :: UserString -> EncodedString
-zEncodeString cs = case maybe_tuple cs of
-                Just n  -> n            -- Tuples go to Z2T etc
-                Nothing -> go cs
-          where
-                go []     = []
-                go (c:cs) = encode_digit_ch c ++ go' cs
-                go' []     = []
-                go' (c:cs) = encode_ch c ++ go' cs
+zEncodeString = \case
+  []     -> []
+  (c:cs)
+    -- If a digit is at the start of a symbol then we need to encode it.
+    -- Otherwise package names like 9pH-0.1 give linker errors.
+    | c >= '0' && c <= '9' -> encode_as_unicode_char c ++ go cs
+    | otherwise            -> go (c:cs)
+  where
+    go = \case
+      [] -> []
+      -- encode boxed/unboxed tuples respectively as ZnT/ZnH (e.g. Z3T/Z3H for
+      -- 3-tuples). Note that the arity corresponds to the number of
+      -- commas+1. No comma means 0-arity, i.e. Z0T/Z0H.
+      --
+      -- The 1-arity unboxed tuple "(# #)" (notice the space between the '#'s)
+      -- isn't special-cased, i.e. it is encoded as "ZLzhz20UzhZR". There is no
+      -- 1-arity boxed tuple (we use Solo/MkSolo instead).
+      --
+      -- arity        boxed       z-name        unboxed       z-name
+      -- 0            ()          Z0T           (##)          Z0H
+      -- 1            N/A         N/A           (# #)         ZLzhz20UzhZR
+      -- 2            (,)         Z2T           (#,#)         Z2H
+      -- 3            (,,)        Z3T           (#,,#)        Z3H
+      -- ...
+      --
+      '(':'#':'#':')':cs -> "Z0H" ++ go cs
+      '(':')':cs         -> "Z0T" ++ go cs
+      '(':'#':cs
+        | (n, '#':')':cs') <- count_commas cs
+        -> 'Z' : shows (n+1) ('H': go cs')
+      '(':cs
+        | (n, ')':cs') <- count_commas cs
+        -> 'Z' : shows (n+1) ('T': go cs')
+      c:cs -> encode_ch c ++ go cs
+
+count_commas :: String -> (Int, String)
+count_commas = go 0
+  where
+    go !n = \case
+      ',':cs -> go (n+1) cs
+      cs     -> (n,cs)
 
 unencodedChar :: Char -> Bool   -- True for chars that don't need encoding
 unencodedChar 'Z' = False
@@ -104,12 +138,6 @@ unencodedChar c   =  c >= 'a' && c <= 'z'
                   || c >= 'A' && c <= 'Z'
                   || c >= '0' && c <= '9'
 
--- If a digit is at the start of a symbol then we need to encode it.
--- Otherwise package names like 9pH-0.1 give linker errors.
-encode_digit_ch :: Char -> EncodedString
-encode_digit_ch c | c >= '0' && c <= '9' = encode_as_unicode_char c
-encode_digit_ch c | otherwise            = encode_ch c
-
 encode_ch :: Char -> EncodedString
 encode_ch c | unencodedChar c = [c]     -- Common case first
 
@@ -215,34 +243,6 @@ decode_tuple d rest
     go n ('H':rest)     = '(' : '#' : replicate (n-1) ',' ++ "#)" ++ zDecodeString rest
     go n other = error ("decode_tuple: " ++ show n ++ ' ':other)
 
-{-
-Tuples are encoded as
-        Z3T or Z3H
-for 3-tuples or unboxed 3-tuples respectively.  No other encoding starts
-        Z<digit>
-
-* "(##)" is the tycon for an unboxed 0-tuple
-
-* "()" is the tycon for a boxed 0-tuple
--}
-
-maybe_tuple :: UserString -> Maybe EncodedString
-
-maybe_tuple "(##)" = Just("Z0H")
-maybe_tuple ('(' : '#' : cs) = case count_commas (0::Int) cs of
-                                 (n, '#' : ')' : _) -> Just ('Z' : shows (n+1) "H")
-                                 _                  -> Nothing
-maybe_tuple "()" = Just("Z0T")
-maybe_tuple ('(' : cs)       = case count_commas (0::Int) cs of
-                                 (n, ')' : _) -> Just ('Z' : shows (n+1) "T")
-                                 _            -> Nothing
-maybe_tuple _                = Nothing
-
-count_commas :: Int -> String -> (Int, String)
-count_commas n (',' : cs) = count_commas (n+1) cs
-count_commas n cs         = (n,cs)
-
-
 {-
 ************************************************************************
 *                                                                      *


=====================================
testsuite/tests/codeGen/should_run/T25364.hs
=====================================
@@ -0,0 +1,19 @@
+module Main where
+
+import GHC.Utils.Encoding (zEncodeString,zDecodeString)
+import Control.Monad
+
+main :: IO ()
+main = mapM_ test
+  [ "ghc-prim_GHC.Types_$tc'(#,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,#)_closure"
+  , "ghc-prim_GHC.Tuple_$tc'(,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,)2_closure"
+  , "ghc-prim_GHC.Tuple_(,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,)_info"
+  , "ghc-prim_GHC.Types_$tc'(# #)3_bytes"
+  ]
+
+test :: String -> IO ()
+test s = do
+  let e = zEncodeString s
+  putStrLn e
+  when (zDecodeString e /= s) $ do
+    error $ "Invalid z-encoding roundtrip for: " ++ s


=====================================
testsuite/tests/codeGen/should_run/T25364.stdout
=====================================
@@ -0,0 +1,4 @@
+ghczmprimzuGHCziTypeszuzdtczqZ32Hzuclosure
+ghczmprimzuGHCziTuplezuzdtczqZ35T2zuclosure
+ghczmprimzuGHCziTuplezuZ47Tzuinfo
+ghczmprimzuGHCziTypeszuzdtczqZLzhz20UzhZR3zubytes


=====================================
testsuite/tests/codeGen/should_run/all.T
=====================================
@@ -252,3 +252,4 @@ test('T24700', normal, compile_and_run, ['-O'])
 test('T24893', normal, compile_and_run, ['-O'])
 
 test('CCallConv', [req_c], compile_and_run, ['CCallConv_c.c'])
+test('T25364', normal, compile_and_run, [''])



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/9291c125b6009f1531071d4591a9320f54c00b39

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/9291c125b6009f1531071d4591a9320f54c00b39
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20241011/6c6e93d5/attachment-0001.html>


More information about the ghc-commits mailing list