[Git][ghc/ghc][wip/T23210] 8 commits: testsuite: Fix badly escaped literals
Ben Gamari (@bgamari)
gitlab at gitlab.haskell.org
Fri Nov 8 17:10:38 UTC 2024
Ben Gamari pushed to branch wip/T23210 at Glasgow Haskell Compiler / GHC
Commits:
42f1801d by Ben Gamari at 2024-11-01T15:04:01-04:00
testsuite: Fix badly escaped literals
Use raw string literals to ensure that `\s` is correctly interpreted as
a character class.
- - - - -
29f4d34d by Ben Gamari at 2024-11-01T15:04:01-04:00
rts: Improve documentation of SLIDE bytecode instruction
- - - - -
2f1a492a by Ben Gamari at 2024-11-01T15:04:01-04:00
rts/Interpreter: Assert that TEST*_P discriminators are valid
- - - - -
4d7afaaa by Ben Gamari at 2024-11-01T15:04:01-04:00
rts/Interpreter: Improve documentation of TEST*_P instructions
- - - - -
5f7c2d3e by Ben Gamari at 2024-11-01T15:04:01-04:00
rts: Annotate BCOs with their Name
- - - - -
d14d3b82 by Ben Gamari at 2024-11-08T12:08:42-05:00
StgToByteCode: Don't assume that data con workers are nullary
Previously StgToByteCode assumed that all data-con workers were of a
nullary representation. This is not a valid assumption, as seen
in #23210, where an unsaturated application of a unary data
constructor's worker resulted in invalid bytecode. Sadly, I have not yet
been able to reduce a minimal testcase for this.
Fixes #23210.
- - - - -
250e70bd by Ben Gamari at 2024-11-08T12:08:42-05:00
StgToByteCode: Fix handling of Addr# literals
Previously we assumed that all unlifted types were Addr#.
- - - - -
2d806c85 by Ben Gamari at 2024-11-08T12:08:42-05:00
testsuite: Mark T23146* as unbroken
- - - - -
9 changed files:
- compiler/GHC/ByteCode/Asm.hs
- compiler/GHC/ByteCode/Instr.hs
- compiler/GHC/Stg/Utils.hs
- compiler/GHC/StgToByteCode.hs
- rts/Disassembler.c
- rts/Interpreter.c
- rts/include/rts/Bytecodes.h
- testsuite/driver/testlib.py
- testsuite/tests/codeGen/should_run/T23146/all.T
Changes:
=====================================
compiler/GHC/ByteCode/Asm.hs
=====================================
@@ -106,7 +106,7 @@ assembleBCOs interp profile proto_bcos tycons top_strs modbreaks spt_entries = d
bcos' <- mallocStrings interp bcos
return CompiledByteCode
{ bc_bcos = bcos'
- , bc_itbls = itblenv
+ , bc_itbls = itblenv
, bc_ffis = concatMap protoBCOFFIs proto_bcos
, bc_strs = top_strs
, bc_breaks = modbreaks
@@ -178,11 +178,12 @@ assembleOneBCO interp profile pbco = do
return ubco'
assembleBCO :: Platform -> ProtoBCO Name -> IO UnlinkedBCO
-assembleBCO platform (ProtoBCO { protoBCOName = nm
- , protoBCOInstrs = instrs
- , protoBCOBitmap = bitmap
- , protoBCOBitmapSize = bsize
- , protoBCOArity = arity }) = do
+assembleBCO platform
+ (ProtoBCO { protoBCOName = nm
+ , protoBCOInstrs = instrs
+ , protoBCOBitmap = bitmap
+ , protoBCOBitmapSize = bsize
+ , protoBCOArity = arity }) = do
-- pass 1: collect up the offsets of the local labels.
let asm = mapM_ (assembleI platform) instrs
@@ -527,6 +528,8 @@ assembleI platform i = case i of
, SmallOp tickx, SmallOp infox
, Op np
]
+ BCO_NAME name -> do np <- lit [BCONPtrStr name]
+ emit bci_BCO_NAME [Op np]
where
literal (LitLabel fs _) = litlabel fs
=====================================
compiler/GHC/ByteCode/Instr.hs
=====================================
@@ -27,6 +27,8 @@ import GHC.Runtime.Heap.Layout ( StgWord )
import Data.Int
import Data.Word
+import Data.ByteString (ByteString)
+
import GHC.Stack.CCS (CostCentre)
import GHC.Stg.Syntax
@@ -130,7 +132,18 @@ data BCInstr
| PUSH_APPLY_PPPPP
| PUSH_APPLY_PPPPPP
- | SLIDE !WordOff{-this many-} !WordOff{-down by this much-}
+ -- | Drop entries @(n, n+by]@ entries from the stack. Graphically:
+ -- @
+ -- a_1 ← top
+ -- ...
+ -- a_n
+ -- b_1 => a_1 ← top
+ -- ... ...
+ -- b_by a_n
+ -- k k
+ -- @
+ | SLIDE !WordOff -- ^ n = this many
+ !WordOff -- ^ by = down by this much
-- To do with the heap
| ALLOC_AP !HalfWord {- make an AP with this many payload words.
@@ -175,7 +188,12 @@ data BCInstr
-- The Word16 value is a constructor number and therefore
-- stored in the insn stream rather than as an offset into
-- the literal pool.
+
+ -- | Test whether the tag of a closure pointer is less than the given value.
+ -- If not, jump to the given label.
| TESTLT_P !Word16 LocalLabel
+ -- | Test whether the tag of a closure pointer is equal to the given value.
+ -- If not, jump to the given label.
| TESTEQ_P !Word16 LocalLabel
| CASEFAIL
@@ -213,6 +231,10 @@ data BCInstr
!Word16 -- breakpoint info index
(RemotePtr CostCentre)
+ -- | A "meta"-instruction for recording the name of a BCO for debugging purposes.
+ -- These are ignored by the interpreter but helpfully printed by the disassmbler.
+ | BCO_NAME !ByteString
+
-- -----------------------------------------------------------------------------
-- Printing bytecode instructions
@@ -367,6 +389,7 @@ instance Outputable BCInstr where
<+> text "<tick_module>" <+> ppr tickx
<+> text "<info_module>" <+> ppr infox
<+> text "<cc>"
+ ppr (BCO_NAME nm) = text "BCO_NAME" <+> text (show nm)
@@ -471,3 +494,4 @@ bciStackUse SLIDE{} = 0
bciStackUse MKAP{} = 0
bciStackUse MKPAP{} = 0
bciStackUse PACK{} = 1 -- worst case is PACK 0 words
+bciStackUse BCO_NAME{} = 0
=====================================
compiler/GHC/Stg/Utils.hs
=====================================
@@ -11,6 +11,7 @@ module GHC.Stg.Utils
, mkUnarisedId, mkUnarisedIds
, allowTopLevelConApp
+ , hasNoNonZeroWidthArgs
) where
import GHC.Prelude
@@ -19,6 +20,7 @@ import GHC.Platform
import GHC.Types.Id
import GHC.Core.Type
import GHC.Core.TyCon
+import GHC.Core.Multiplicity ( scaledThing )
import GHC.Core.DataCon
import GHC.Core ( AltCon(..) )
import GHC.Types.Tickish
@@ -35,6 +37,13 @@ import GHC.Utils.Panic
import GHC.Data.FastString
+-- | Returns whether there are any arguments with a non-zero-width runtime
+-- representation.
+--
+-- Returns True if the datacon has no or /just/ zero-width arguments.
+hasNoNonZeroWidthArgs :: DataCon -> Bool
+hasNoNonZeroWidthArgs = all (isZeroBitTy . scaledThing) . dataConRepArgTys
+
mkUnarisedIds :: MonadUnique m => FastString -> [NvUnaryType] -> m [Id]
mkUnarisedIds fs tys = mapM (mkUnarisedId fs) tys
=====================================
compiler/GHC/StgToByteCode.hs
=====================================
@@ -81,6 +81,7 @@ import GHC.Unit.Home.ModInfo (lookupHpt)
import Data.Array
import Data.Coerce (coerce)
import Data.ByteString (ByteString)
+import qualified Data.ByteString.Char8 as BS
import Data.Map (Map)
import Data.IntMap (IntMap)
import qualified Data.Map as Map
@@ -236,7 +237,8 @@ ppBCEnv p
-- Create a BCO and do a spot of peephole optimisation on the insns
-- at the same time.
mkProtoBCO
- :: Platform
+ :: (Outputable name)
+ => Platform
-> name
-> BCInstrList
-> Either [CgStgAlt] (CgStgRhs)
@@ -250,7 +252,7 @@ mkProtoBCO
mkProtoBCO platform nm instrs_ordlist origin arity bitmap_size bitmap is_ret ffis
= ProtoBCO {
protoBCOName = nm,
- protoBCOInstrs = maybe_with_stack_check,
+ protoBCOInstrs = maybe_add_bco_name $ maybe_add_stack_check peep_d,
protoBCOBitmap = bitmap,
protoBCOBitmapSize = fromIntegral bitmap_size,
protoBCOArity = arity,
@@ -258,6 +260,10 @@ mkProtoBCO platform nm instrs_ordlist origin arity bitmap_size bitmap is_ret ffi
protoBCOFFIs = ffis
}
where
+ maybe_add_bco_name instrs = BCO_NAME str : instrs
+ where
+ str = BS.pack $ showSDocOneLine defaultSDocContext (ppr nm)
+
-- Overestimate the stack usage (in words) of this BCO,
-- and if >= iNTERP_STACK_CHECK_THRESH, add an explicit
-- stack check. (The interpreter always does a stack check
@@ -265,17 +271,17 @@ mkProtoBCO platform nm instrs_ordlist origin arity bitmap_size bitmap is_ret ffi
-- BCO anyway, so we only need to add an explicit one in the
-- (hopefully rare) cases when the (overestimated) stack use
-- exceeds iNTERP_STACK_CHECK_THRESH.
- maybe_with_stack_check
- | is_ret && stack_usage < fromIntegral (pc_AP_STACK_SPLIM (platformConstants platform)) = peep_d
+ maybe_add_stack_check instrs
+ | is_ret && stack_usage < fromIntegral (pc_AP_STACK_SPLIM (platformConstants platform)) = instrs
-- don't do stack checks at return points,
-- everything is aggregated up to the top BCO
-- (which must be a function).
-- That is, unless the stack usage is >= AP_STACK_SPLIM,
-- see bug #1466.
| stack_usage >= fromIntegral iNTERP_STACK_CHECK_THRESH
- = STKCHECK stack_usage : peep_d
+ = STKCHECK stack_usage : instrs
| otherwise
- = peep_d -- the supposedly common case
+ = instrs -- the supposedly common case
-- We assume that this sum doesn't wrap
stack_usage = sum (map bciStackUse peep_d)
@@ -1926,20 +1932,18 @@ pushAtom d p (StgVarArg var)
-- PUSH_G doesn't tag constructors. So we use PACK here
-- if we are dealing with nullary constructor.
case isDataConWorkId_maybe var of
- Just con -> do
- massert (isNullaryRepDataCon con)
- return (unitOL (PACK con 0), szb)
+ Just con
+ -- See Note [LFInfo of DataCon workers and wrappers] in GHC.Types.Id.Make.
+ | isNullaryRepDataCon con -> do
+ return (unitOL (PACK con 0), szb)
- Nothing
-- see Note [Generating code for top-level string literal bindings]
- | isUnliftedType (idType var) -> do
- massert (idType var `eqType` addrPrimTy)
+ _ | idType var `eqType` addrPrimTy ->
return (unitOL (PUSH_ADDR (getName var)), szb)
| otherwise -> do
return (unitOL (PUSH_G (getName var)), szb)
-
pushAtom _ _ (StgLitArg lit) = pushLiteral True lit
pushLiteral :: Bool -> Literal -> BcM (BCInstrList, ByteOff)
=====================================
rts/Disassembler.c
=====================================
@@ -452,6 +452,13 @@ disInstr ( StgBCO *bco, int pc )
debugBelch("RETURN_T\n ");
break;
+ case bci_BCO_NAME: {
+ const char *name = (const char*) literals[instrs[pc]];
+ debugBelch("BCO_NAME \"%s\"\n ", name);
+ pc += 1;
+ break;
+ }
+
default:
barf("disInstr: unknown opcode %u", (unsigned int) instr);
}
@@ -464,10 +471,9 @@ void disassemble( StgBCO *bco )
StgWord16* instrs = (StgWord16*)(bco->instrs->payload);
StgMutArrPtrs* ptrs = bco->ptrs;
uint32_t nbcs = (uint32_t)(bco->instrs->bytes / sizeof(StgWord16));
- uint32_t pc = 1;
+ uint32_t pc = 0;
debugBelch("BCO\n" );
- pc = 0;
while (pc < nbcs) {
debugBelch("\t%2d: ", pc );
pc = disInstr ( bco, pc );
=====================================
rts/Interpreter.c
=====================================
@@ -1630,7 +1630,11 @@ run_BCO:
case bci_SLIDE: {
W_ n = BCO_GET_LARGE_ARG;
W_ by = BCO_GET_LARGE_ARG;
- /* a_1, .. a_n, b_1, .. b_by, s => a_1, .. a_n, s */
+ /*
+ * a_1 ... a_n, b_1 ... b_by, k
+ * =>
+ * a_1 ... a_n, k
+ */
while(n-- > 0) {
SpW(n+by) = SpW(n);
}
@@ -1757,7 +1761,6 @@ run_BCO:
// n_nptrs=1, n_ptrs=0.
ASSERT(n_ptrs + n_nptrs == n_words || (n_nptrs == 1 && n_ptrs == 0));
ASSERT(n_ptrs + n_nptrs > 0);
- //ASSERT(n_words > 0); // We shouldn't ever need to allocate nullary constructors
for (W_ i = 0; i < n_words; i++) {
con->payload[i] = (StgClosure*)SpW(i);
}
@@ -1781,6 +1784,7 @@ run_BCO:
case bci_TESTLT_P: {
unsigned int discr = BCO_NEXT;
int failto = BCO_GET_LARGE_ARG;
+ ASSERT(discr <= TAG_MASK);
StgClosure* con = UNTAG_CLOSURE((StgClosure*)SpW(0));
if (GET_TAG(con) >= discr) {
bciPtr = failto;
@@ -1791,6 +1795,7 @@ run_BCO:
case bci_TESTEQ_P: {
unsigned int discr = BCO_NEXT;
int failto = BCO_GET_LARGE_ARG;
+ ASSERT(discr <= TAG_MASK);
StgClosure* con = UNTAG_CLOSURE((StgClosure*)SpW(0));
if (GET_TAG(con) != discr) {
bciPtr = failto;
@@ -2081,6 +2086,10 @@ run_BCO:
goto do_return_nonpointer;
}
+ case bci_BCO_NAME:
+ BCO_NEXT;
+ goto nextInsn;
+
case bci_SWIZZLE: {
W_ stkoff = BCO_GET_LARGE_ARG;
StgInt n = BCO_GET_LARGE_ARG;
=====================================
rts/include/rts/Bytecodes.h
=====================================
@@ -112,6 +112,8 @@
#define bci_PRIMCALL 87
+#define bci_BCO_NAME 88
+
/* If you need to go past 255 then you will run into the flags */
/* If you need to go below 0x0100 then you will run into the instructions */
=====================================
testsuite/driver/testlib.py
=====================================
@@ -3000,12 +3000,12 @@ def normalise_prof (s: str) -> str:
# Source locations from internal libraries, remove the source location
# > libraries/ghc-internal/src/path/Foo.hs:204:1-18
# => ghc-internal/src/path/Foo.hs
- s = re.sub('\slibraries/(\S+)(:\S+){2}\s',' \\1 ', s)
+ s = re.sub(r'\slibraries/(\S+)(:\S+){2}\s', r' \1 ', s)
# Source locations from internal libraries, remove the source location
# > libraries/ghc-internal/src/path/Foo.hs::(2,1)-(5,38)
# => ghc-internal/src/path/Foo.hs
- s = re.sub('\slibraries/(\S+)(:\S+){1}\s',' \\1 ', s)
+ s = re.sub(r'\slibraries/(\S+)(:\S+){1}\s', r' \1 ', s)
# We have something like this:
#
=====================================
testsuite/tests/codeGen/should_run/T23146/all.T
=====================================
@@ -1,4 +1,4 @@
-test('T23146', expect_broken_for(23060, ghci_ways), compile_and_run, [''])
+test('T23146', normal, compile_and_run, [''])
test('T23146_lifted', normal, compile_and_run, [''])
-test('T23146_liftedeq', expect_broken_for(23060, ghci_ways), compile_and_run, [''])
+test('T23146_liftedeq', normal, compile_and_run, [''])
test('T23146_lifted_unlifted', normal, compile_and_run, [''])
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/f641356616a30753abb2c5e9a6d136532d5407d8...2d806c856dc7747c663ffdfa88f4ec36073b3077
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/f641356616a30753abb2c5e9a6d136532d5407d8...2d806c856dc7747c663ffdfa88f4ec36073b3077
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20241108/af94bfd1/attachment-0001.html>
More information about the ghc-commits
mailing list