[Git][ghc/ghc][wip/T23210] 8 commits: testsuite: Fix badly escaped literals

Ben Gamari (@bgamari) gitlab at gitlab.haskell.org
Fri Nov 8 17:10:38 UTC 2024



Ben Gamari pushed to branch wip/T23210 at Glasgow Haskell Compiler / GHC


Commits:
42f1801d by Ben Gamari at 2024-11-01T15:04:01-04:00
testsuite: Fix badly escaped literals

Use raw string literals to ensure that `\s` is correctly interpreted as
a character class.

- - - - -
29f4d34d by Ben Gamari at 2024-11-01T15:04:01-04:00
rts: Improve documentation of SLIDE bytecode instruction

- - - - -
2f1a492a by Ben Gamari at 2024-11-01T15:04:01-04:00
rts/Interpreter: Assert that TEST*_P discriminators are valid

- - - - -
4d7afaaa by Ben Gamari at 2024-11-01T15:04:01-04:00
rts/Interpreter: Improve documentation of TEST*_P instructions

- - - - -
5f7c2d3e by Ben Gamari at 2024-11-01T15:04:01-04:00
rts: Annotate BCOs with their Name

- - - - -
d14d3b82 by Ben Gamari at 2024-11-08T12:08:42-05:00
StgToByteCode: Don't assume that data con workers are nullary

Previously StgToByteCode assumed that all data-con workers were of a
nullary representation. This is not a valid assumption, as seen
in #23210, where an unsaturated application of a unary data
constructor's worker resulted in invalid bytecode. Sadly, I have not yet
been able to reduce a minimal testcase for this.

Fixes #23210.

- - - - -
250e70bd by Ben Gamari at 2024-11-08T12:08:42-05:00
StgToByteCode: Fix handling of Addr# literals

Previously we assumed that all unlifted types were Addr#.

- - - - -
2d806c85 by Ben Gamari at 2024-11-08T12:08:42-05:00
testsuite: Mark T23146* as unbroken

- - - - -


9 changed files:

- compiler/GHC/ByteCode/Asm.hs
- compiler/GHC/ByteCode/Instr.hs
- compiler/GHC/Stg/Utils.hs
- compiler/GHC/StgToByteCode.hs
- rts/Disassembler.c
- rts/Interpreter.c
- rts/include/rts/Bytecodes.h
- testsuite/driver/testlib.py
- testsuite/tests/codeGen/should_run/T23146/all.T


Changes:

=====================================
compiler/GHC/ByteCode/Asm.hs
=====================================
@@ -106,7 +106,7 @@ assembleBCOs interp profile proto_bcos tycons top_strs modbreaks spt_entries = d
   bcos'   <- mallocStrings interp bcos
   return CompiledByteCode
     { bc_bcos = bcos'
-    , bc_itbls =  itblenv
+    , bc_itbls = itblenv
     , bc_ffis = concatMap protoBCOFFIs proto_bcos
     , bc_strs = top_strs
     , bc_breaks = modbreaks
@@ -178,11 +178,12 @@ assembleOneBCO interp profile pbco = do
   return ubco'
 
 assembleBCO :: Platform -> ProtoBCO Name -> IO UnlinkedBCO
-assembleBCO platform (ProtoBCO { protoBCOName       = nm
-                             , protoBCOInstrs     = instrs
-                             , protoBCOBitmap     = bitmap
-                             , protoBCOBitmapSize = bsize
-                             , protoBCOArity      = arity }) = do
+assembleBCO platform
+            (ProtoBCO { protoBCOName       = nm
+                      , protoBCOInstrs     = instrs
+                      , protoBCOBitmap     = bitmap
+                      , protoBCOBitmapSize = bsize
+                      , protoBCOArity      = arity }) = do
   -- pass 1: collect up the offsets of the local labels.
   let asm = mapM_ (assembleI platform) instrs
 
@@ -527,6 +528,8 @@ assembleI platform i = case i of
                                                   , SmallOp tickx, SmallOp infox
                                                   , Op np
                                                   ]
+  BCO_NAME name            -> do np <- lit [BCONPtrStr name]
+                                 emit bci_BCO_NAME [Op np]
 
   where
     literal (LitLabel fs _)   = litlabel fs


=====================================
compiler/GHC/ByteCode/Instr.hs
=====================================
@@ -27,6 +27,8 @@ import GHC.Runtime.Heap.Layout ( StgWord )
 import Data.Int
 import Data.Word
 
+import Data.ByteString (ByteString)
+
 import GHC.Stack.CCS (CostCentre)
 
 import GHC.Stg.Syntax
@@ -130,7 +132,18 @@ data BCInstr
    | PUSH_APPLY_PPPPP
    | PUSH_APPLY_PPPPPP
 
-   | SLIDE     !WordOff{-this many-} !WordOff{-down by this much-}
+   -- | Drop entries @(n, n+by]@ entries from the stack. Graphically:
+   -- @
+   -- a_1  ← top
+   -- ...
+   -- a_n
+   -- b_1              =>    a_1  ← top
+   -- ...                    ...
+   -- b_by                   a_n
+   -- k                      k
+   -- @
+   | SLIDE     !WordOff -- ^ n = this many
+               !WordOff -- ^ by = down by this much
 
    -- To do with the heap
    | ALLOC_AP  !HalfWord {- make an AP with this many payload words.
@@ -175,7 +188,12 @@ data BCInstr
    -- The Word16 value is a constructor number and therefore
    -- stored in the insn stream rather than as an offset into
    -- the literal pool.
+
+   -- | Test whether the tag of a closure pointer is less than the given value.
+   -- If not, jump to the given label.
    | TESTLT_P  !Word16 LocalLabel
+   -- | Test whether the tag of a closure pointer is equal to the given value.
+   -- If not, jump to the given label.
    | TESTEQ_P  !Word16 LocalLabel
 
    | CASEFAIL
@@ -213,6 +231,10 @@ data BCInstr
                       !Word16                -- breakpoint info index
                       (RemotePtr CostCentre)
 
+   -- | A "meta"-instruction for recording the name of a BCO for debugging purposes.
+   -- These are ignored by the interpreter but helpfully printed by the disassmbler.
+   | BCO_NAME         !ByteString
+
 -- -----------------------------------------------------------------------------
 -- Printing bytecode instructions
 
@@ -367,6 +389,7 @@ instance Outputable BCInstr where
                                <+> text "<tick_module>" <+> ppr tickx
                                <+> text "<info_module>" <+> ppr infox
                                <+> text "<cc>"
+   ppr (BCO_NAME nm)         = text "BCO_NAME" <+> text (show nm)
 
 
 
@@ -471,3 +494,4 @@ bciStackUse SLIDE{}               = 0
 bciStackUse MKAP{}                = 0
 bciStackUse MKPAP{}               = 0
 bciStackUse PACK{}                = 1 -- worst case is PACK 0 words
+bciStackUse BCO_NAME{}            = 0


=====================================
compiler/GHC/Stg/Utils.hs
=====================================
@@ -11,6 +11,7 @@ module GHC.Stg.Utils
     , mkUnarisedId, mkUnarisedIds
 
     , allowTopLevelConApp
+    , hasNoNonZeroWidthArgs
     ) where
 
 import GHC.Prelude
@@ -19,6 +20,7 @@ import GHC.Platform
 import GHC.Types.Id
 import GHC.Core.Type
 import GHC.Core.TyCon
+import GHC.Core.Multiplicity     ( scaledThing )
 import GHC.Core.DataCon
 import GHC.Core ( AltCon(..) )
 import GHC.Types.Tickish
@@ -35,6 +37,13 @@ import GHC.Utils.Panic
 
 import GHC.Data.FastString
 
+-- | Returns whether there are any arguments with a non-zero-width runtime
+-- representation.
+--
+-- Returns True if the datacon has no or /just/ zero-width arguments.
+hasNoNonZeroWidthArgs :: DataCon -> Bool
+hasNoNonZeroWidthArgs = all (isZeroBitTy . scaledThing) . dataConRepArgTys
+
 mkUnarisedIds :: MonadUnique m => FastString -> [NvUnaryType] -> m [Id]
 mkUnarisedIds fs tys = mapM (mkUnarisedId fs) tys
 


=====================================
compiler/GHC/StgToByteCode.hs
=====================================
@@ -81,6 +81,7 @@ import GHC.Unit.Home.ModInfo (lookupHpt)
 import Data.Array
 import Data.Coerce (coerce)
 import Data.ByteString (ByteString)
+import qualified Data.ByteString.Char8 as BS
 import Data.Map (Map)
 import Data.IntMap (IntMap)
 import qualified Data.Map as Map
@@ -236,7 +237,8 @@ ppBCEnv p
 -- Create a BCO and do a spot of peephole optimisation on the insns
 -- at the same time.
 mkProtoBCO
-   :: Platform
+   :: (Outputable name)
+   => Platform
    -> name
    -> BCInstrList
    -> Either  [CgStgAlt] (CgStgRhs)
@@ -250,7 +252,7 @@ mkProtoBCO
 mkProtoBCO platform nm instrs_ordlist origin arity bitmap_size bitmap is_ret ffis
    = ProtoBCO {
         protoBCOName = nm,
-        protoBCOInstrs = maybe_with_stack_check,
+        protoBCOInstrs = maybe_add_bco_name $ maybe_add_stack_check peep_d,
         protoBCOBitmap = bitmap,
         protoBCOBitmapSize = fromIntegral bitmap_size,
         protoBCOArity = arity,
@@ -258,6 +260,10 @@ mkProtoBCO platform nm instrs_ordlist origin arity bitmap_size bitmap is_ret ffi
         protoBCOFFIs = ffis
       }
      where
+        maybe_add_bco_name instrs = BCO_NAME str : instrs
+          where
+            str = BS.pack $ showSDocOneLine defaultSDocContext (ppr nm)
+
         -- Overestimate the stack usage (in words) of this BCO,
         -- and if >= iNTERP_STACK_CHECK_THRESH, add an explicit
         -- stack check.  (The interpreter always does a stack check
@@ -265,17 +271,17 @@ mkProtoBCO platform nm instrs_ordlist origin arity bitmap_size bitmap is_ret ffi
         -- BCO anyway, so we only need to add an explicit one in the
         -- (hopefully rare) cases when the (overestimated) stack use
         -- exceeds iNTERP_STACK_CHECK_THRESH.
-        maybe_with_stack_check
-           | is_ret && stack_usage < fromIntegral (pc_AP_STACK_SPLIM (platformConstants platform)) = peep_d
+        maybe_add_stack_check instrs
+           | is_ret && stack_usage < fromIntegral (pc_AP_STACK_SPLIM (platformConstants platform)) = instrs
                 -- don't do stack checks at return points,
                 -- everything is aggregated up to the top BCO
                 -- (which must be a function).
                 -- That is, unless the stack usage is >= AP_STACK_SPLIM,
                 -- see bug #1466.
            | stack_usage >= fromIntegral iNTERP_STACK_CHECK_THRESH
-           = STKCHECK stack_usage : peep_d
+           = STKCHECK stack_usage : instrs
            | otherwise
-           = peep_d     -- the supposedly common case
+           = instrs     -- the supposedly common case
 
         -- We assume that this sum doesn't wrap
         stack_usage = sum (map bciStackUse peep_d)
@@ -1926,20 +1932,18 @@ pushAtom d p (StgVarArg var)
         -- PUSH_G doesn't tag constructors. So we use PACK here
         -- if we are dealing with nullary constructor.
         case isDataConWorkId_maybe var of
-          Just con -> do
-            massert (isNullaryRepDataCon con)
-            return (unitOL (PACK con 0), szb)
+          Just con
+            -- See Note [LFInfo of DataCon workers and wrappers] in GHC.Types.Id.Make.
+            | isNullaryRepDataCon con -> do
+              return (unitOL (PACK con 0), szb)
 
-          Nothing
             -- see Note [Generating code for top-level string literal bindings]
-            | isUnliftedType (idType var) -> do
-              massert (idType var `eqType` addrPrimTy)
+          _ | idType var `eqType` addrPrimTy ->
               return (unitOL (PUSH_ADDR (getName var)), szb)
 
             | otherwise -> do
               return (unitOL (PUSH_G (getName var)), szb)
 
-
 pushAtom _ _ (StgLitArg lit) = pushLiteral True lit
 
 pushLiteral :: Bool -> Literal -> BcM (BCInstrList, ByteOff)


=====================================
rts/Disassembler.c
=====================================
@@ -452,6 +452,13 @@ disInstr ( StgBCO *bco, int pc )
          debugBelch("RETURN_T\n ");
          break;
 
+      case bci_BCO_NAME: {
+         const char *name = (const char*) literals[instrs[pc]];
+         debugBelch("BCO_NAME    \"%s\"\n ", name);
+         pc += 1;
+         break;
+      }
+
       default:
          barf("disInstr: unknown opcode %u", (unsigned int) instr);
    }
@@ -464,10 +471,9 @@ void disassemble( StgBCO *bco )
    StgWord16*     instrs  = (StgWord16*)(bco->instrs->payload);
    StgMutArrPtrs* ptrs    = bco->ptrs;
    uint32_t       nbcs    = (uint32_t)(bco->instrs->bytes / sizeof(StgWord16));
-   uint32_t       pc      = 1;
+   uint32_t       pc      = 0;
 
    debugBelch("BCO\n" );
-   pc = 0;
    while (pc < nbcs) {
       debugBelch("\t%2d:  ", pc );
       pc = disInstr ( bco, pc );


=====================================
rts/Interpreter.c
=====================================
@@ -1630,7 +1630,11 @@ run_BCO:
         case bci_SLIDE: {
             W_ n  = BCO_GET_LARGE_ARG;
             W_ by = BCO_GET_LARGE_ARG;
-            /* a_1, .. a_n, b_1, .. b_by, s => a_1, .. a_n, s */
+            /*
+             * a_1 ... a_n, b_1 ... b_by, k
+             *           =>
+             * a_1 ... a_n, k
+             */
             while(n-- > 0) {
                 SpW(n+by) = SpW(n);
             }
@@ -1757,7 +1761,6 @@ run_BCO:
             // n_nptrs=1, n_ptrs=0.
             ASSERT(n_ptrs + n_nptrs == n_words || (n_nptrs == 1 && n_ptrs == 0));
             ASSERT(n_ptrs + n_nptrs > 0);
-            //ASSERT(n_words > 0); // We shouldn't ever need to allocate nullary constructors
             for (W_ i = 0; i < n_words; i++) {
                 con->payload[i] = (StgClosure*)SpW(i);
             }
@@ -1781,6 +1784,7 @@ run_BCO:
         case bci_TESTLT_P: {
             unsigned int discr  = BCO_NEXT;
             int failto = BCO_GET_LARGE_ARG;
+            ASSERT(discr <= TAG_MASK);
             StgClosure* con = UNTAG_CLOSURE((StgClosure*)SpW(0));
             if (GET_TAG(con) >= discr) {
                 bciPtr = failto;
@@ -1791,6 +1795,7 @@ run_BCO:
         case bci_TESTEQ_P: {
             unsigned int discr  = BCO_NEXT;
             int failto = BCO_GET_LARGE_ARG;
+            ASSERT(discr <= TAG_MASK);
             StgClosure* con = UNTAG_CLOSURE((StgClosure*)SpW(0));
             if (GET_TAG(con) != discr) {
                 bciPtr = failto;
@@ -2081,6 +2086,10 @@ run_BCO:
             goto do_return_nonpointer;
         }
 
+        case bci_BCO_NAME:
+            BCO_NEXT;
+            goto nextInsn;
+
         case bci_SWIZZLE: {
             W_ stkoff = BCO_GET_LARGE_ARG;
             StgInt n = BCO_GET_LARGE_ARG;


=====================================
rts/include/rts/Bytecodes.h
=====================================
@@ -112,6 +112,8 @@
 
 #define bci_PRIMCALL                    87
 
+#define bci_BCO_NAME                    88
+
 /* If you need to go past 255 then you will run into the flags */
 
 /* If you need to go below 0x0100 then you will run into the instructions */


=====================================
testsuite/driver/testlib.py
=====================================
@@ -3000,12 +3000,12 @@ def normalise_prof (s: str) -> str:
     # Source locations from internal libraries, remove the source location
     # > libraries/ghc-internal/src/path/Foo.hs:204:1-18
     # => ghc-internal/src/path/Foo.hs
-    s = re.sub('\slibraries/(\S+)(:\S+){2}\s',' \\1 ', s)
+    s = re.sub(r'\slibraries/(\S+)(:\S+){2}\s', r' \1 ', s)
 
     # Source locations from internal libraries, remove the source location
     # > libraries/ghc-internal/src/path/Foo.hs::(2,1)-(5,38)
     # => ghc-internal/src/path/Foo.hs
-    s = re.sub('\slibraries/(\S+)(:\S+){1}\s',' \\1 ', s)
+    s = re.sub(r'\slibraries/(\S+)(:\S+){1}\s', r' \1 ', s)
 
     # We have something like this:
     #


=====================================
testsuite/tests/codeGen/should_run/T23146/all.T
=====================================
@@ -1,4 +1,4 @@
-test('T23146', expect_broken_for(23060, ghci_ways), compile_and_run, [''])
+test('T23146', normal, compile_and_run, [''])
 test('T23146_lifted', normal, compile_and_run, [''])
-test('T23146_liftedeq', expect_broken_for(23060, ghci_ways), compile_and_run, [''])
+test('T23146_liftedeq', normal, compile_and_run, [''])
 test('T23146_lifted_unlifted', normal, compile_and_run, [''])



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/f641356616a30753abb2c5e9a6d136532d5407d8...2d806c856dc7747c663ffdfa88f4ec36073b3077

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/f641356616a30753abb2c5e9a6d136532d5407d8...2d806c856dc7747c663ffdfa88f4ec36073b3077
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20241108/af94bfd1/attachment-0001.html>


More information about the ghc-commits mailing list