[Git][ghc/ghc][master] Support large stack frames/offsets in GHCi bytecode interpreter

Marge Bot (@marge-bot) gitlab at gitlab.haskell.org
Mon Jun 26 17:15:03 UTC 2023



Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC


Commits:
564164ef by Luite Stegeman at 2023-06-24T10:27:29+09:00
Support large stack frames/offsets in GHCi bytecode interpreter

Bytecode instructions like PUSH_L (push a local variable) contain
an operand that refers to the stack slot. Before this patch, the
operand type was SmallOp (Word16), limiting the maximum stack
offset to 65535 words. This could cause compiler panics in some
cases (See #22888).

This patch changes the operand type for stack offsets from
SmallOp to Op, removing the stack offset limit.

Fixes #22888

- - - - -


13 changed files:

- compiler/GHC/ByteCode/Asm.hs
- compiler/GHC/ByteCode/Instr.hs
- compiler/GHC/ByteCode/Types.hs
- compiler/GHC/CoreToIface.hs
- compiler/GHC/IfaceToCore.hs
- compiler/GHC/StgToByteCode.hs
- rts/Disassembler.c
- rts/Interpreter.c
- + testsuite/tests/ghci/should_run/LargeBCO.hs
- + testsuite/tests/ghci/should_run/LargeBCO.stdout
- + testsuite/tests/ghci/should_run/LargeBCO_A.hs
- + testsuite/tests/ghci/should_run/T22888.hs
- testsuite/tests/ghci/should_run/all.T


Changes:

=====================================
compiler/GHC/ByteCode/Asm.hs
=====================================
@@ -22,7 +22,7 @@ import GHC.ByteCode.InfoTable
 import GHC.ByteCode.Types
 import GHCi.RemoteTypes
 import GHC.Runtime.Interpreter
-import GHC.Runtime.Heap.Layout hiding ( WordOff )
+import GHC.Runtime.Heap.Layout ( fromStgWord, StgWord )
 
 import GHC.Types.Name
 import GHC.Types.Name.Set
@@ -199,8 +199,8 @@ assembleBCO platform (ProtoBCO { protoBCOName       = nm
       -- this BCO to be long.
       (n_insns0, lbl_map0) = inspectAsm platform False initial_offset asm
       ((n_insns, lbl_map), long_jumps)
-        | isLarge (fromIntegral $ Map.size lbl_map0)
-          || isLarge n_insns0
+        | isLargeW (fromIntegral $ Map.size lbl_map0)
+          || isLargeW n_insns0
                     = (inspectAsm platform True initial_offset asm, True)
         | otherwise = ((n_insns0, lbl_map0), False)
 
@@ -229,7 +229,7 @@ assembleBCO platform (ProtoBCO { protoBCOName       = nm
 
   return ul_bco
 
-mkBitmapArray :: Word16 -> [StgWord] -> UArray Int Word64
+mkBitmapArray :: Word -> [StgWord] -> UArray Int Word64
 -- Here the return type must be an array of Words, not StgWords,
 -- because the underlying ByteArray# will end up as a component
 -- of a BCO object.
@@ -244,9 +244,21 @@ type AsmState = (SizedSeq Word16,
 
 data Operand
   = Op Word
+  | IOp Int
   | SmallOp Word16
   | LabelOp LocalLabel
--- (unused)  | LargeOp Word
+
+wOp :: WordOff -> Operand
+wOp = Op . fromIntegral
+
+bOp :: ByteOff -> Operand
+bOp = Op . fromIntegral
+
+truncHalfWord :: Platform -> HalfWord -> Operand
+truncHalfWord platform w = case platformWordSize platform of
+  PW4 | w <= 65535      -> Op (fromIntegral w)
+  PW8 | w <= 4294967295 -> Op (fromIntegral w)
+  _ -> pprPanic "GHC.ByteCode.Asm.truncHalfWord" (ppr w)
 
 data Assembler a
   = AllocPtr (IO BCOPtr) (Word -> Assembler a)
@@ -287,9 +299,9 @@ type LabelEnv = LocalLabel -> Word
 largeOp :: Bool -> Operand -> Bool
 largeOp long_jumps op = case op of
    SmallOp _ -> False
-   Op w      -> isLarge w
+   Op w      -> isLargeW w
+   IOp i     -> isLargeI i
    LabelOp _ -> long_jumps
--- LargeOp _ -> True
 
 runAsm :: Platform -> Bool -> LabelEnv -> Assembler a -> StateT AsmState IO a
 runAsm platform long_jumps e = go
@@ -308,15 +320,15 @@ runAsm platform long_jumps e = go
       go $ k w
     go (AllocLabel _ k) = go k
     go (Emit w ops k) = do
-      let largeOps = any (largeOp long_jumps) ops
+      let largeArgs = any (largeOp long_jumps) ops
           opcode
-            | largeOps = largeArgInstr w
+            | largeArgs = largeArgInstr w
             | otherwise = w
           words = concatMap expand ops
           expand (SmallOp w) = [w]
           expand (LabelOp w) = expand (Op (e w))
-          expand (Op w) = if largeOps then largeArg platform (fromIntegral w) else [fromIntegral w]
---        expand (LargeOp w) = largeArg platform w
+          expand (Op w) = if largeArgs then largeArg platform (fromIntegral w) else [fromIntegral w]
+          expand (IOp i) = if largeArgs then largeArg platform (fromIntegral i) else [fromIntegral i]
       state $ \(st_i0,st_l0,st_p0) ->
         let st_i1 = addListToSS st_i0 (opcode : words)
         in ((), (st_i1,st_l0,st_p0))
@@ -350,7 +362,7 @@ inspectAsm platform long_jumps initial_offset
         count (SmallOp _) = 1
         count (LabelOp _) = count (Op 0)
         count (Op _) = if largeOps then largeArg16s platform else 1
---      count (LargeOp _) = largeArg16s platform
+        count (IOp _) = if largeOps then largeArg16s platform else 1
 
 -- Bring in all the bci_ bytecode constants.
 #include "Bytecodes.h"
@@ -379,15 +391,15 @@ assembleI :: Platform
           -> Assembler ()
 assembleI platform i = case i of
   STKCHECK n               -> emit bci_STKCHECK [Op n]
-  PUSH_L o1                -> emit bci_PUSH_L [SmallOp o1]
-  PUSH_LL o1 o2            -> emit bci_PUSH_LL [SmallOp o1, SmallOp o2]
-  PUSH_LLL o1 o2 o3        -> emit bci_PUSH_LLL [SmallOp o1, SmallOp o2, SmallOp o3]
-  PUSH8 o1                 -> emit bci_PUSH8 [SmallOp o1]
-  PUSH16 o1                -> emit bci_PUSH16 [SmallOp o1]
-  PUSH32 o1                -> emit bci_PUSH32 [SmallOp o1]
-  PUSH8_W o1               -> emit bci_PUSH8_W [SmallOp o1]
-  PUSH16_W o1              -> emit bci_PUSH16_W [SmallOp o1]
-  PUSH32_W o1              -> emit bci_PUSH32_W [SmallOp o1]
+  PUSH_L o1                -> emit bci_PUSH_L [wOp o1]
+  PUSH_LL o1 o2            -> emit bci_PUSH_LL [wOp o1, wOp o2]
+  PUSH_LLL o1 o2 o3        -> emit bci_PUSH_LLL [wOp o1, wOp o2, wOp o3]
+  PUSH8 o1                 -> emit bci_PUSH8 [bOp o1]
+  PUSH16 o1                -> emit bci_PUSH16 [bOp o1]
+  PUSH32 o1                -> emit bci_PUSH32 [bOp o1]
+  PUSH8_W o1               -> emit bci_PUSH8_W [bOp o1]
+  PUSH16_W o1              -> emit bci_PUSH16_W [bOp o1]
+  PUSH32_W o1              -> emit bci_PUSH32_W [bOp o1]
   PUSH_G nm                -> do p <- ptr (BCOPtrName nm)
                                  emit bci_PUSH_G [Op p]
   PUSH_PRIMOP op           -> do p <- ptr (BCOPtrPrimOp op)
@@ -419,7 +431,7 @@ assembleI platform i = case i of
   PUSH_UBX32 lit           -> do np <- literal lit
                                  emit bci_PUSH_UBX32 [Op np]
   PUSH_UBX lit nws         -> do np <- literal lit
-                                 emit bci_PUSH_UBX [Op np, SmallOp nws]
+                                 emit bci_PUSH_UBX [Op np, wOp nws]
 
   -- see Note [Generating code for top-level string literal bindings] in GHC.StgToByteCode
   PUSH_ADDR nm             -> do np <- lit [BCONPtrAddr nm]
@@ -437,15 +449,15 @@ assembleI platform i = case i of
   PUSH_APPLY_PPPPP         -> emit bci_PUSH_APPLY_PPPPP []
   PUSH_APPLY_PPPPPP        -> emit bci_PUSH_APPLY_PPPPPP []
 
-  SLIDE     n by           -> emit bci_SLIDE [SmallOp n, SmallOp by]
-  ALLOC_AP  n              -> emit bci_ALLOC_AP [SmallOp n]
-  ALLOC_AP_NOUPD n         -> emit bci_ALLOC_AP_NOUPD [SmallOp n]
-  ALLOC_PAP arity n        -> emit bci_ALLOC_PAP [SmallOp arity, SmallOp n]
-  MKAP      off sz         -> emit bci_MKAP [SmallOp off, SmallOp sz]
-  MKPAP     off sz         -> emit bci_MKPAP [SmallOp off, SmallOp sz]
-  UNPACK    n              -> emit bci_UNPACK [SmallOp n]
+  SLIDE     n by           -> emit bci_SLIDE [wOp n, wOp by]
+  ALLOC_AP  n              -> emit bci_ALLOC_AP [truncHalfWord platform n]
+  ALLOC_AP_NOUPD n         -> emit bci_ALLOC_AP_NOUPD [truncHalfWord platform n]
+  ALLOC_PAP arity n        -> emit bci_ALLOC_PAP [truncHalfWord platform arity, truncHalfWord platform n]
+  MKAP      off sz         -> emit bci_MKAP [wOp off, truncHalfWord platform sz]
+  MKPAP     off sz         -> emit bci_MKPAP [wOp off, truncHalfWord platform sz]
+  UNPACK    n              -> emit bci_UNPACK [wOp n]
   PACK      dcon sz        -> do itbl_no <- lit [BCONPtrItbl (getName dcon)]
-                                 emit bci_PACK [Op itbl_no, SmallOp sz]
+                                 emit bci_PACK [Op itbl_no, wOp sz]
   LABEL     lbl            -> label lbl
   TESTLT_I  i l            -> do np <- int i
                                  emit bci_TESTLT_I [Op np, LabelOp l]
@@ -498,13 +510,13 @@ assembleI platform i = case i of
   TESTLT_P  i l            -> emit bci_TESTLT_P [SmallOp i, LabelOp l]
   TESTEQ_P  i l            -> emit bci_TESTEQ_P [SmallOp i, LabelOp l]
   CASEFAIL                 -> emit bci_CASEFAIL []
-  SWIZZLE   stkoff n       -> emit bci_SWIZZLE [SmallOp stkoff, SmallOp n]
+  SWIZZLE   stkoff n       -> emit bci_SWIZZLE [wOp stkoff, IOp n]
   JMP       l              -> emit bci_JMP [LabelOp l]
   ENTER                    -> emit bci_ENTER []
   RETURN rep               -> emit (return_non_tuple rep) []
   RETURN_TUPLE             -> emit bci_RETURN_T []
   CCALL off m_addr i       -> do np <- addr m_addr
-                                 emit bci_CCALL [SmallOp off, Op np, SmallOp i]
+                                 emit bci_CCALL [wOp off, Op np, SmallOp i]
   PRIMCALL                 -> emit bci_PRIMCALL []
   BRK_FUN index uniq cc    -> do p1 <- ptr BCOPtrBreakArray
                                  q <- int (getKey uniq)
@@ -556,8 +568,11 @@ assembleI platform i = case i of
     words ws = lit (map BCONPtrWord ws)
     word w = words [w]
 
-isLarge :: Word -> Bool
-isLarge n = n > 65535
+isLargeW :: Word -> Bool
+isLargeW n = n > 65535
+
+isLargeI :: Int -> Bool
+isLargeI n = n > 32767 || n < -32768
 
 push_alts :: ArgRep -> Word16
 push_alts V   = bci_PUSH_ALTS_V


=====================================
compiler/GHC/ByteCode/Instr.hs
=====================================
@@ -23,7 +23,7 @@ import GHC.Types.Unique
 import GHC.Types.Literal
 import GHC.Core.DataCon
 import GHC.Builtin.PrimOps
-import GHC.Runtime.Heap.Layout
+import GHC.Runtime.Heap.Layout ( StgWord )
 
 import Data.Int
 import Data.Word
@@ -41,7 +41,7 @@ data ProtoBCO a
         protoBCOInstrs     :: [BCInstr],  -- instrs
         -- arity and GC info
         protoBCOBitmap     :: [StgWord],
-        protoBCOBitmapSize :: Word16,
+        protoBCOBitmapSize :: Word,
         protoBCOArity      :: Int,
         -- what the BCO came from, for debugging only
         protoBCOExpr       :: Either [CgStgAlt] CgStgRhs,
@@ -58,18 +58,18 @@ instance Outputable LocalLabel where
 
 data BCInstr
    -- Messing with the stack
-   = STKCHECK  Word
+   = STKCHECK  !Word
 
    -- Push locals (existing bits of the stack)
-   | PUSH_L    !Word16{-offset-}
-   | PUSH_LL   !Word16 !Word16{-2 offsets-}
-   | PUSH_LLL  !Word16 !Word16 !Word16{-3 offsets-}
+   | PUSH_L    !WordOff{-offset-}
+   | PUSH_LL   !WordOff !WordOff{-2 offsets-}
+   | PUSH_LLL  !WordOff !WordOff !WordOff{-3 offsets-}
 
    -- Push the specified local as a 8, 16, 32 bit value onto the stack. (i.e.,
    -- the stack will grow by 8, 16 or 32 bits)
-   | PUSH8  !Word16
-   | PUSH16 !Word16
-   | PUSH32 !Word16
+   | PUSH8  !ByteOff
+   | PUSH16 !ByteOff
+   | PUSH32 !ByteOff
 
    -- Push the specified local as a 8, 16, 32 bit value onto the stack, but the
    -- value will take the whole word on the stack (i.e., the stack will grow by
@@ -78,9 +78,9 @@ data BCInstr
    -- Currently we expect all values on the stack to take full words, except for
    -- the ones used for PACK (i.e., actually constructing new data types, in
    -- which case we use PUSH{8,16,32})
-   | PUSH8_W  !Word16
-   | PUSH16_W !Word16
-   | PUSH32_W !Word16
+   | PUSH8_W  !ByteOff
+   | PUSH16_W !ByteOff
+   | PUSH32_W !ByteOff
 
    -- Push a ptr  (these all map to PUSH_G really)
    | PUSH_G       Name
@@ -102,8 +102,8 @@ data BCInstr
    | PUSH_UBX8  Literal
    | PUSH_UBX16 Literal
    | PUSH_UBX32 Literal
-   | PUSH_UBX   Literal Word16
-        -- push this int/float/double/addr, on the stack. Word16
+   | PUSH_UBX   Literal !WordOff
+        -- push this int/float/double/addr, on the stack. Word
         -- is # of words to copy from literal pool.  Eitherness reflects
         -- the difficulty of dealing with MachAddr here, mostly due to
         -- the excessive (and unnecessary) restrictions imposed by the
@@ -129,58 +129,61 @@ data BCInstr
    | PUSH_APPLY_PPPPP
    | PUSH_APPLY_PPPPPP
 
-   | SLIDE     Word16{-this many-} Word16{-down by this much-}
+   | SLIDE     !WordOff{-this many-} !WordOff{-down by this much-}
 
    -- To do with the heap
-   | ALLOC_AP  !Word16 -- make an AP with this many payload words
-   | ALLOC_AP_NOUPD !Word16 -- make an AP_NOUPD with this many payload words
-   | ALLOC_PAP !Word16 !Word16 -- make a PAP with this arity / payload words
-   | MKAP      !Word16{-ptr to AP is this far down stack-} !Word16{-number of words-}
-   | MKPAP     !Word16{-ptr to PAP is this far down stack-} !Word16{-number of words-}
-   | UNPACK    !Word16 -- unpack N words from t.o.s Constr
-   | PACK      DataCon !Word16
+   | ALLOC_AP  !HalfWord {- make an AP with this many payload words.
+                            HalfWord matches the size of the n_args field in StgAP,
+                            make sure that we handle truncation when generating
+                            bytecode using this HalfWord type here -}
+   | ALLOC_AP_NOUPD !HalfWord -- make an AP_NOUPD with this many payload words
+   | ALLOC_PAP !HalfWord !HalfWord -- make a PAP with this arity / payload words
+   | MKAP      !WordOff{-ptr to AP is this far down stack-} !HalfWord{-number of words-}
+   | MKPAP     !WordOff{-ptr to PAP is this far down stack-} !HalfWord{-number of words-}
+   | UNPACK    !WordOff -- unpack N words from t.o.s Constr
+   | PACK      DataCon !WordOff
                         -- after assembly, the DataCon is an index into the
                         -- itbl array
    -- For doing case trees
    | LABEL     LocalLabel
-   | TESTLT_I  Int    LocalLabel
-   | TESTEQ_I  Int    LocalLabel
-   | TESTLT_W  Word   LocalLabel
-   | TESTEQ_W  Word   LocalLabel
-   | TESTLT_I64 Int64 LocalLabel
-   | TESTEQ_I64 Int64 LocalLabel
-   | TESTLT_I32 Int32 LocalLabel
-   | TESTEQ_I32 Int32 LocalLabel
-   | TESTLT_I16 Int16 LocalLabel
-   | TESTEQ_I16 Int16 LocalLabel
-   | TESTLT_I8 Int8   LocalLabel
-   | TESTEQ_I8 Int16  LocalLabel
-   | TESTLT_W64 Word64 LocalLabel
-   | TESTEQ_W64 Word64 LocalLabel
-   | TESTLT_W32 Word32 LocalLabel
-   | TESTEQ_W32 Word32 LocalLabel
-   | TESTLT_W16 Word16 LocalLabel
-   | TESTEQ_W16 Word16 LocalLabel
-   | TESTLT_W8 Word8 LocalLabel
-   | TESTEQ_W8 Word8 LocalLabel
-   | TESTLT_F  Float  LocalLabel
-   | TESTEQ_F  Float  LocalLabel
-   | TESTLT_D  Double LocalLabel
-   | TESTEQ_D  Double LocalLabel
+   | TESTLT_I   !Int    LocalLabel
+   | TESTEQ_I   !Int    LocalLabel
+   | TESTLT_W   !Word   LocalLabel
+   | TESTEQ_W   !Word   LocalLabel
+   | TESTLT_I64 !Int64  LocalLabel
+   | TESTEQ_I64 !Int64  LocalLabel
+   | TESTLT_I32 !Int32  LocalLabel
+   | TESTEQ_I32 !Int32  LocalLabel
+   | TESTLT_I16 !Int16  LocalLabel
+   | TESTEQ_I16 !Int16  LocalLabel
+   | TESTLT_I8  !Int8   LocalLabel
+   | TESTEQ_I8  !Int16  LocalLabel
+   | TESTLT_W64 !Word64 LocalLabel
+   | TESTEQ_W64 !Word64 LocalLabel
+   | TESTLT_W32 !Word32 LocalLabel
+   | TESTEQ_W32 !Word32 LocalLabel
+   | TESTLT_W16 !Word16 LocalLabel
+   | TESTEQ_W16 !Word16 LocalLabel
+   | TESTLT_W8  !Word8  LocalLabel
+   | TESTEQ_W8  !Word8  LocalLabel
+   | TESTLT_F   !Float  LocalLabel
+   | TESTEQ_F   !Float  LocalLabel
+   | TESTLT_D   !Double LocalLabel
+   | TESTEQ_D   !Double LocalLabel
 
    -- The Word16 value is a constructor number and therefore
    -- stored in the insn stream rather than as an offset into
    -- the literal pool.
-   | TESTLT_P  Word16 LocalLabel
-   | TESTEQ_P  Word16 LocalLabel
+   | TESTLT_P  !Word16 LocalLabel
+   | TESTEQ_P  !Word16 LocalLabel
 
    | CASEFAIL
    | JMP              LocalLabel
 
    -- For doing calls to C (via glue code generated by libffi)
-   | CCALL            Word16    -- stack frame size
+   | CCALL            !WordOff  -- stack frame size
                       (RemotePtr C_ffi_cif) -- addr of the glue code
-                      Word16    -- flags.
+                      !Word16   -- flags.
                                 --
                                 -- 0x1: call is interruptible
                                 -- 0x2: call is unsafe
@@ -191,8 +194,8 @@ data BCInstr
    | PRIMCALL
 
    -- For doing magic ByteArray passing to foreign calls
-   | SWIZZLE          Word16 -- to the ptr N words down the stack,
-                      Word16 -- add M (interpreted as a signed 16-bit entity)
+   | SWIZZLE          !WordOff -- to the ptr N words down the stack,
+                      !Int     -- add M
 
    -- To Infinity And Beyond
    | ENTER
@@ -202,7 +205,7 @@ data BCInstr
                    -- Note [unboxed tuple bytecodes and tuple_BCO] in GHC.StgToByteCode
 
    -- Breakpoints
-   | BRK_FUN          Word16 Unique (RemotePtr CostCentre)
+   | BRK_FUN         !Word16 Unique (RemotePtr CostCentre)
 
 -- -----------------------------------------------------------------------------
 -- Printing bytecode instructions


=====================================
compiler/GHC/ByteCode/Types.hs
=====================================
@@ -11,7 +11,7 @@ module GHC.ByteCode.Types
   , FFIInfo(..)
   , RegBitmap(..)
   , NativeCallType(..), NativeCallInfo(..), voidTupleReturnInfo, voidPrimCallInfo
-  , ByteOff(..), WordOff(..)
+  , ByteOff(..), WordOff(..), HalfWord(..)
   , UnlinkedBCO(..), BCOPtr(..), BCONPtr(..)
   , ItblEnv, ItblPtr(..)
   , AddrEnv, AddrPtr(..)
@@ -79,6 +79,12 @@ newtype ByteOff = ByteOff Int
 newtype WordOff = WordOff Int
     deriving (Enum, Eq, Show, Integral, Num, Ord, Real, Outputable)
 
+-- A type for values that are half the size of a word on the target
+-- platform where the interpreter runs (which may be a different
+-- wordsize than the compiler).
+newtype HalfWord = HalfWord Word
+    deriving (Enum, Eq, Show, Integral, Num, Ord, Real, Outputable)
+
 newtype RegBitmap = RegBitmap { unRegBitmap :: Word32 }
     deriving (Enum, Eq, Show, Integral, Num, Ord, Real, Bits, FiniteBits, Outputable)
 
@@ -188,7 +194,7 @@ instance NFData BCONPtr where
 data CgBreakInfo
    = CgBreakInfo
    { cgb_tyvars :: ![IfaceTvBndr] -- ^ Type variables in scope at the breakpoint
-   , cgb_vars   :: ![Maybe (IfaceIdBndr, Word16)]
+   , cgb_vars   :: ![Maybe (IfaceIdBndr, Word)]
    , cgb_resty  :: !IfaceType
    }
 -- See Note [Syncing breakpoint info] in GHC.Runtime.Eval


=====================================
compiler/GHC/CoreToIface.hs
=====================================
@@ -49,8 +49,6 @@ module GHC.CoreToIface
 
 import GHC.Prelude
 
-import Data.Word
-
 import GHC.StgToCmm.Types
 
 import GHC.ByteCode.Types
@@ -698,7 +696,7 @@ toIfaceLFInfo nm lfi = case lfi of
 
 -- Dehydrating CgBreakInfo
 
-dehydrateCgBreakInfo :: [TyVar] -> [Maybe (Id, Word16)] -> Type -> CgBreakInfo
+dehydrateCgBreakInfo :: [TyVar] -> [Maybe (Id, Word)] -> Type -> CgBreakInfo
 dehydrateCgBreakInfo ty_vars idOffSets tick_ty =
           CgBreakInfo
             { cgb_tyvars = map toIfaceTvBndr ty_vars


=====================================
compiler/GHC/IfaceToCore.hs
=====================================
@@ -35,8 +35,6 @@ import GHC.Prelude
 
 import GHC.ByteCode.Types
 
-import Data.Word
-
 import GHC.Driver.Env
 import GHC.Driver.Session
 import GHC.Driver.Config.Core.Lint ( initLintConfig )
@@ -2164,7 +2162,7 @@ bindIfaceTyConBinderX bind_tv (Bndr tv vis) thing_inside
 
 -- CgBreakInfo
 
-hydrateCgBreakInfo :: CgBreakInfo -> IfL ([Maybe (Id, Word16)], Type)
+hydrateCgBreakInfo :: CgBreakInfo -> IfL ([Maybe (Id, Word)], Type)
 hydrateCgBreakInfo CgBreakInfo{..} = do
   bindIfaceTyVars cgb_tyvars $ \_ -> do
     result_ty <- tcIfaceType cgb_resty


=====================================
compiler/GHC/StgToByteCode.hs
=====================================
@@ -242,7 +242,7 @@ mkProtoBCO
    -> Either  [CgStgAlt] (CgStgRhs)
                 -- ^ original expression; for debugging only
    -> Int       -- ^ arity
-   -> Word16    -- ^ bitmap size
+   -> WordOff   -- ^ bitmap size
    -> [StgWord] -- ^ bitmap
    -> Bool      -- ^ True <=> is a return point, rather than a function
    -> [FFIInfo]
@@ -252,7 +252,7 @@ mkProtoBCO platform nm instrs_ordlist origin arity bitmap_size bitmap is_ret ffi
         protoBCOName = nm,
         protoBCOInstrs = maybe_with_stack_check,
         protoBCOBitmap = bitmap,
-        protoBCOBitmapSize = bitmap_size,
+        protoBCOBitmapSize = fromIntegral bitmap_size,
         protoBCOArity = arity,
         protoBCOExpr = origin,
         protoBCOFFIs = ffis
@@ -396,7 +396,9 @@ schemeER_wrk d p (StgTick (Breakpoint tick_ty tick_no fvs) rhs)
         platform <- profilePlatform <$> getProfile
         let idOffSets = getVarOffSets platform d p fvs
             ty_vars   = tyCoVarsOfTypesWellScoped (tick_ty:map idType fvs)
-        let breakInfo = dehydrateCgBreakInfo ty_vars idOffSets tick_ty
+        let toWord :: Maybe (Id, WordOff) -> Maybe (Id, Word)
+            toWord = fmap (\(i, wo) -> (i, fromIntegral wo))
+            breakInfo  = dehydrateCgBreakInfo ty_vars (map toWord idOffSets) tick_ty
         newBreakInfo tick_no breakInfo
         hsc_env <- getHscEnv
         let cc | Just interp <- hsc_interp hsc_env
@@ -407,7 +409,7 @@ schemeER_wrk d p (StgTick (Breakpoint tick_ty tick_no fvs) rhs)
         return $ breakInstr `consOL` code
 schemeER_wrk d p rhs = schemeE d 0 p rhs
 
-getVarOffSets :: Platform -> StackDepth -> BCEnv -> [Id] -> [Maybe (Id, Word16)]
+getVarOffSets :: Platform -> StackDepth -> BCEnv -> [Id] -> [Maybe (Id, WordOff)]
 getVarOffSets platform depth env = map getOffSet
   where
     getOffSet id = case lookupBCEnv_maybe id env of
@@ -420,23 +422,9 @@ getVarOffSets platform depth env = map getOffSet
             -- this "adjustment" is needed due to stack manipulation for
             -- BRK_FUN in Interpreter.c In any case, this is used only when
             -- we trigger a breakpoint.
-            let !var_depth_ws =
-                    trunc16W $ bytesToWords platform (depth - offset) + 2
+            let !var_depth_ws = bytesToWords platform (depth - offset) + 2
             in Just (id, var_depth_ws)
 
-truncIntegral16 :: Integral a => a -> Word16
-truncIntegral16 w
-    | w > fromIntegral (maxBound :: Word16)
-    = panic "stack depth overflow"
-    | otherwise
-    = fromIntegral w
-
-trunc16B :: ByteOff -> Word16
-trunc16B = truncIntegral16
-
-trunc16W :: WordOff -> Word16
-trunc16W = truncIntegral16
-
 fvsToEnv :: BCEnv -> CgStgRhs -> [Id]
 -- Takes the free variables of a right-hand side, and
 -- delivers an ordered list of the local variables that will
@@ -493,7 +481,7 @@ returnUnliftedReps d s szb reps = do
                         PUSH_BCO tuple_bco `consOL`
                         unitOL RETURN_TUPLE
     return ( mkSlideB platform szb (d - s) -- clear to sequel
-             `appOL`  ret)                 -- go
+             `consOL` ret)                 -- go
 
 -- construct and return an unboxed tuple
 returnUnboxedTuple
@@ -557,7 +545,7 @@ schemeE d s p (StgLet _ext binds body) = do
          fvss  = map (fvsToEnv p') rhss
 
          -- Sizes of free vars
-         size_w = trunc16W . idSizeW platform
+         size_w = idSizeW platform
          sizes = map (\rhs_fvs -> sum (map size_w rhs_fvs)) fvss
 
          -- the arity of each rhs
@@ -576,13 +564,13 @@ schemeE d s p (StgLet _ext binds body) = do
          build_thunk
              :: StackDepth
              -> [Id]
-             -> Word16
+             -> WordOff
              -> ProtoBCO Name
-             -> Word16
-             -> Word16
+             -> WordOff
+             -> HalfWord
              -> BcM BCInstrList
          build_thunk _ [] size bco off arity
-            = return (PUSH_BCO bco `consOL` unitOL (mkap (off+size) size))
+            = return (PUSH_BCO bco `consOL` unitOL (mkap (off+size) (fromIntegral size)))
            where
                 mkap | arity == 0 = MKAP
                      | otherwise  = MKPAP
@@ -594,9 +582,9 @@ schemeE d s p (StgLet _ext binds body) = do
 
          alloc_code = toOL (zipWith mkAlloc sizes arities)
            where mkAlloc sz 0
-                    | is_tick     = ALLOC_AP_NOUPD sz
-                    | otherwise   = ALLOC_AP sz
-                 mkAlloc sz arity = ALLOC_PAP arity sz
+                    | is_tick     = ALLOC_AP_NOUPD (fromIntegral sz)
+                    | otherwise   = ALLOC_AP (fromIntegral sz)
+                 mkAlloc sz arity = ALLOC_PAP arity (fromIntegral sz)
 
          is_tick = case binds of
                      StgNonRec id _ -> occNameFS (getOccName id) == tickFS
@@ -607,7 +595,7 @@ schemeE d s p (StgLet _ext binds body) = do
                 build_thunk d' fvs size bco off arity
 
          compile_binds =
-            [ compile_bind d' fvs x rhs size arity (trunc16W n)
+            [ compile_bind d' fvs x rhs size arity n
             | (fvs, x, rhs, size, arity, n) <-
                 zip6 fvss xs rhss sizes arities [n_binds, n_binds-1 .. 1]
             ]
@@ -735,7 +723,7 @@ mkConAppCode orig_d _ p con args = app_code
                 more_push_code <- do_pushery (d + arg_bytes) args
                 return (push `appOL` more_push_code)
             do_pushery !d [] = do
-                let !n_arg_words = trunc16W $ bytesToWords platform (d - orig_d)
+                let !n_arg_words = bytesToWords platform (d - orig_d)
                 return (unitOL (PACK con n_arg_words))
 
         -- Push on the stack in the reverse order.
@@ -761,7 +749,7 @@ doTailCall init_d s p fn args = do
         platform <- profilePlatform <$> getProfile
         assert (sz == wordSize platform) return ()
         let slide = mkSlideB platform (d - init_d + wordSize platform) (init_d - s)
-        return (push_fn `appOL` (slide `appOL` unitOL ENTER))
+        return (push_fn `appOL` (slide `consOL` unitOL ENTER))
   do_pushes !d args reps = do
       let (push_apply, n, rest_of_reps) = findPushSeq reps
           (these_args, rest_of_args) = splitAt n args
@@ -948,7 +936,7 @@ doCase d s p scrut bndr alts
              massert isAlgCase
              rhs_code <- schemeE stack_bot s p' rhs
              return (my_discr alt,
-                     unitOL (UNPACK (trunc16W size)) `appOL` rhs_code)
+                     unitOL (UNPACK size) `appOL` rhs_code)
            where
              real_bndrs = filterOut isTyVar bndrs
 
@@ -1009,8 +997,9 @@ doCase d s p scrut bndr alts
            | ubx_tuple_frame              = ([1], 2) -- call_info, tuple_BCO
            | otherwise                    = ([], 0)
 
-        bitmap_size = trunc16W $ fromIntegral extra_slots +
-                                 bytesToWords platform (d - s)
+        bitmap_size :: WordOff
+        bitmap_size = fromIntegral extra_slots +
+                      bytesToWords platform (d - s)
 
         bitmap_size' :: Int
         bitmap_size' = fromIntegral bitmap_size
@@ -1028,15 +1017,15 @@ doCase d s p scrut bndr alts
                              isUnboxedSumType (idType id) = Nothing
                            | isFollowableArg (bcIdArgRep platform id) = Just (fromIntegral rel_offset)
                            | otherwise                      = Nothing
-                where rel_offset = trunc16W $ bytesToWords platform (d - offset)
+                where rel_offset = bytesToWords platform (d - offset)
 
-        bitmap = intsToReverseBitmap platform bitmap_size'{-size-} pointers
+        bitmap = intsToReverseBitmap platform bitmap_size' pointers
 
      alt_stuff <- mapM codeAlt alts
      alt_final0 <- mkMultiBranch maybe_ncons alt_stuff
 
      let alt_final
-           | ubx_tuple_frame    = mkSlideW 0 2 `mappend` alt_final0
+           | ubx_tuple_frame    = SLIDE 0 2 `consOL` alt_final0
            | otherwise          = alt_final0
 
      let
@@ -1306,11 +1295,11 @@ mkStackBitmap
   -- ^ The stack layout of the arguments, where each offset is relative to the
   -- /bottom/ of the stack space they occupy. Their offsets must be word-aligned,
   -- and the list must be sorted in order of ascending offset (i.e. bottom to top).
-  -> (Word16, [StgWord])
+  -> (WordOff, [StgWord])
 mkStackBitmap platform nptrs_prefix args_info args
   = (bitmap_size, bitmap)
   where
-    bitmap_size = trunc16W $ nptrs_prefix + arg_bottom
+    bitmap_size = nptrs_prefix + arg_bottom
     bitmap = intsToReverseBitmap platform (fromIntegral bitmap_size) ptr_offsets
 
     arg_bottom = nativeCallSize args_info
@@ -1384,7 +1373,7 @@ generatePrimCall d s p target _mb_unit _result_ty args
               (push_target `consOL`
                push_info `consOL`
                PUSH_BCO args_bco `consOL`
-               (mkSlideB platform szb (d - s) `appOL` unitOL PRIMCALL))
+               (mkSlideB platform szb (d - s) `consOL` unitOL PRIMCALL))
 
 -- -----------------------------------------------------------------------------
 -- Deal with a CCall.
@@ -1552,7 +1541,7 @@ generateCCall d0 s p (CCallSpec target cconv safety) result_ty args
          push_r =
              if returns_void
                 then nilOL
-                else unitOL (PUSH_UBX (mkDummyLiteral platform r_rep) (trunc16W r_sizeW))
+                else unitOL (PUSH_UBX (mkDummyLiteral platform r_rep) (r_sizeW))
 
          -- generate the marshalling code we're going to call
 
@@ -1560,7 +1549,7 @@ generateCCall d0 s p (CCallSpec target cconv safety) result_ty args
          -- instruction needs to describe the chunk of stack containing
          -- the ccall args to the GC, so it needs to know how large it
          -- is.  See comment in Interpreter.c with the CCALL instruction.
-         stk_offset   = trunc16W $ bytesToWords platform (d_after_r - s)
+         stk_offset   = bytesToWords platform (d_after_r - s)
 
          conv = case cconv of
            CCallConv -> FFICCall
@@ -1589,7 +1578,7 @@ generateCCall d0 s p (CCallSpec target cconv safety) result_ty args
 
          -- slide and return
          d_after_r_min_s = bytesToWords platform (d_after_r - s)
-         wrapup       = mkSlideW (trunc16W r_sizeW) (d_after_r_min_s - r_sizeW)
+         wrapup       = mkSlideW r_sizeW (d_after_r_min_s - r_sizeW)
                         `snocOL` RETURN (toArgRep platform r_rep)
          --trace (show (arg1_offW, args_offW  ,  (map argRepSizeW a_reps) )) $
      return (
@@ -1793,8 +1782,9 @@ pushAtom d p (StgVarArg var)
    = do platform <- targetPlatform <$> getDynFlags
 
         let !szb = idSizeCon platform var
+            with_instr :: (ByteOff -> BCInstr) -> BcM (OrdList BCInstr, ByteOff)
             with_instr instr = do
-                let !off_b = trunc16B $ d - d_v
+                let !off_b = d - d_v
                 return (unitOL (instr off_b), wordSize platform)
 
         case szb of
@@ -1803,7 +1793,7 @@ pushAtom d p (StgVarArg var)
             4 -> with_instr PUSH32_W
             _ -> do
                 let !szw = bytesToWords platform szb
-                    !off_w = trunc16W $ bytesToWords platform (d - d_v) + szw - 1
+                    !off_w = bytesToWords platform (d - d_v) + szw - 1
                 return (toOL (genericReplicate szw (PUSH_L off_w)),
                               wordsToBytes platform szw)
         -- d - d_v           offset from TOS to the first slot of the object
@@ -1864,7 +1854,7 @@ pushLiteral padded lit =
                 1  -> PUSH_UBX8 lit
                 2  -> PUSH_UBX16 lit
                 4  -> PUSH_UBX32 lit
-                _  -> PUSH_UBX lit (trunc16W $ bytesToWords platform size_bytes)
+                _  -> PUSH_UBX lit (bytesToWords platform size_bytes)
 
      case lit of
         LitLabel {}     -> code AddrRep
@@ -1903,7 +1893,7 @@ pushConstrAtom d p va@(StgVarArg v)
         platform <- targetPlatform <$> getDynFlags
         let !szb = idSizeCon platform v
             done instr = do
-                let !off = trunc16B $ d - d_v
+                let !off = d - d_v
                 return (unitOL (instr off), szb)
         case szb of
             1 -> done PUSH8
@@ -2153,25 +2143,20 @@ unsupportedCConvException = throwGhcException (ProgramError
   ("Error: bytecode compiler can't handle some foreign calling conventions\n"++
    "  Workaround: use -fobject-code, or compile this module to .o separately."))
 
-mkSlideB :: Platform -> ByteOff -> ByteOff -> OrdList BCInstr
-mkSlideB platform !nb !db = mkSlideW n d
+mkSlideB :: Platform -> ByteOff -> ByteOff -> BCInstr
+mkSlideB platform nb db = SLIDE n d
   where
-    !n = trunc16W $ bytesToWords platform nb
+    !n = bytesToWords platform nb
     !d = bytesToWords platform db
 
-mkSlideW :: Word16 -> WordOff -> OrdList BCInstr
+mkSlideW :: WordOff -> WordOff -> OrdList BCInstr
 mkSlideW !n !ws
-    | ws > fromIntegral limit
-    -- If the amount to slide doesn't fit in a Word16, generate multiple slide
-    -- instructions
-    = SLIDE n limit `consOL` mkSlideW n (ws - fromIntegral limit)
     | ws == 0
     = nilOL
     | otherwise
     = unitOL (SLIDE n $ fromIntegral ws)
-  where
-    limit :: Word16
-    limit = maxBound
+
+
 
 atomPrimRep :: StgArg -> PrimRep
 atomPrimRep (StgVarArg v) = bcIdPrimRep v


=====================================
rts/Disassembler.c
=====================================
@@ -28,7 +28,6 @@
 int
 disInstr ( StgBCO *bco, int pc )
 {
-   int i;
    StgWord16 instr;
 
    StgWord16*     instrs      = (StgWord16*)(bco->instrs->payload);
@@ -75,14 +74,15 @@ disInstr ( StgBCO *bco, int pc )
          debugBelch("\n");
          pc += 4;
          break;
-      case bci_SWIZZLE:
-         debugBelch("SWIZZLE stkoff %d by %d\n",
-                         instrs[pc], (signed int)instrs[pc+1]);
-         pc += 2; break;
-      case bci_CCALL:
+      case bci_SWIZZLE: {
+         W_     stkoff = BCO_GET_LARGE_ARG;
+         StgInt by     = BCO_GET_LARGE_ARG;
+         debugBelch("SWIZZLE stkoff %" FMT_Word " by %" FMT_Int "\n", stkoff, by);
+         break; }
+      case bci_CCALL: {
          debugBelch("CCALL    marshaller at 0x%" FMT_HexWord "\n",
                          literals[instrs[pc]] );
-         pc += 1; break;
+         pc += 1; break; }
       case bci_PRIMCALL:
          debugBelch("PRIMCALL\n");
          break;
@@ -91,34 +91,45 @@ disInstr ( StgBCO *bco, int pc )
          debugBelch("STKCHECK %" FMT_Word "\n", (W_)stk_words_reqd );
          break;
      }
-      case bci_PUSH_L:
-         debugBelch("PUSH_L   %d\n", instrs[pc] );
-         pc += 1; break;
-      case bci_PUSH_LL:
-         debugBelch("PUSH_LL  %d %d\n", instrs[pc], instrs[pc+1] );
-         pc += 2; break;
-      case bci_PUSH_LLL:
-         debugBelch("PUSH_LLL %d %d %d\n", instrs[pc], instrs[pc+1],
-                                                            instrs[pc+2] );
-         pc += 3; break;
-      case bci_PUSH8:
-         debugBelch("PUSH8    %d\n", instrs[pc] );
-         pc += 1; break;
-      case bci_PUSH16:
-         debugBelch("PUSH16   %d\n", instrs[pc] );
-         pc += 1; break;
-      case bci_PUSH32:
-         debugBelch("PUSH32   %d\n", instrs[pc] );
-         pc += 1; break;
-      case bci_PUSH8_W:
-         debugBelch("PUSH8_W  %d\n", instrs[pc] );
-         pc += 1; break;
-      case bci_PUSH16_W:
-         debugBelch("PUSH16_W %d\n", instrs[pc] );
-         pc += 1; break;
-      case bci_PUSH32_W:
-         debugBelch("PUSH32_W %d\n", instrs[pc] );
-         pc += 1; break;
+      case bci_PUSH_L: {
+         W_ x1 = BCO_GET_LARGE_ARG;
+         debugBelch("PUSH_L   %" FMT_Word "\n", x1 );
+         break; }
+      case bci_PUSH_LL: {
+         W_ x1 = BCO_GET_LARGE_ARG;
+         W_ x2 = BCO_GET_LARGE_ARG;
+         debugBelch("PUSH_LL  %" FMT_Word " %" FMT_Word "\n", x1, x2 );
+         break; }
+      case bci_PUSH_LLL: {
+         W_ x1 = BCO_GET_LARGE_ARG;
+         W_ x2 = BCO_GET_LARGE_ARG;
+         W_ x3 = BCO_GET_LARGE_ARG;
+         debugBelch("PUSH_LLL %" FMT_Word " %" FMT_Word " %" FMT_Word "\n", x1, x2, x3);
+         break; }
+      case bci_PUSH8: {
+         W_ x1 = BCO_GET_LARGE_ARG;
+         debugBelch("PUSH8    %" FMT_Word "\n", x1 );
+         break; }
+      case bci_PUSH16: {
+         W_ x1 = BCO_GET_LARGE_ARG;
+         debugBelch("PUSH16   %" FMT_Word "\n", x1 );
+         break; }
+      case bci_PUSH32: {
+         W_ x1 = BCO_GET_LARGE_ARG;
+         debugBelch("PUSH32   %" FMT_Word "\n", x1 );
+         break; }
+      case bci_PUSH8_W: {
+         W_ x1 = BCO_GET_LARGE_ARG;
+         debugBelch("PUSH8_W  %" FMT_Word "\n", x1 );
+         break; }
+      case bci_PUSH16_W: {
+         W_ x1 = BCO_GET_LARGE_ARG;
+         debugBelch("PUSH16_W %" FMT_Word "\n", x1 );
+         break; }
+      case bci_PUSH32_W: {
+         W_ x1 = BCO_GET_LARGE_ARG;
+         debugBelch("PUSH32_W %" FMT_Word "\n", x1 );
+         break; }
       case bci_PUSH_G:
          debugBelch("PUSH_G   " ); printPtr( ptrs[instrs[pc]] );
          debugBelch("\n" );
@@ -178,12 +189,14 @@ disInstr ( StgBCO *bco, int pc )
              "PUSH_UBX32 0x%" FMT_HexWord32 "\n",
              (StgWord32) literals[instrs[pc]] );
          pc += 1; break;
-      case bci_PUSH_UBX:
+      case bci_PUSH_UBX: {
          debugBelch("PUSH_UBX ");
-         for (i = 0; i < instrs[pc+1]; i++)
-            debugBelch("0x%" FMT_HexWord " ", literals[i + instrs[pc]] );
+         W_ offset = BCO_GET_LARGE_ARG;
+         W_ nwords = BCO_GET_LARGE_ARG;
+         for (W_ i = 0; i < nwords; i++)
+            debugBelch("0x%" FMT_HexWord " ", literals[i + offset] );
          debugBelch("\n");
-         pc += 2; break;
+         break; }
       case bci_PUSH_APPLY_N:
           debugBelch("PUSH_APPLY_N\n");
           break;
@@ -217,35 +230,48 @@ disInstr ( StgBCO *bco, int pc )
       case bci_PUSH_APPLY_PPPPPP:
           debugBelch("PUSH_APPLY_PPPPPP\n");
           break;
-      case bci_SLIDE:
-         debugBelch("SLIDE     %d down by %d\n", instrs[pc], instrs[pc+1] );
-         pc += 2; break;
-      case bci_ALLOC_AP:
-         debugBelch("ALLOC_AP  %d words\n", instrs[pc] );
-         pc += 1; break;
-      case bci_ALLOC_AP_NOUPD:
-         debugBelch("ALLOC_AP_NOUPD %d words\n", instrs[pc] );
-         pc += 1; break;
-      case bci_ALLOC_PAP:
-         debugBelch("ALLOC_PAP %d arity, %d words\n",
-                 instrs[pc], instrs[pc+1] );
-         pc += 2; break;
-      case bci_MKAP:
-         debugBelch("MKAP      %d words, %d stkoff\n", instrs[pc+1],
-                                                           instrs[pc] );
-         pc += 2; break;
-      case bci_MKPAP:
-         debugBelch("MKPAP     %d words, %d stkoff\n", instrs[pc+1],
-                                                           instrs[pc] );
-         pc += 2; break;
-      case bci_UNPACK:
-         debugBelch("UNPACK    %d\n", instrs[pc] );
-         pc += 1; break;
-      case bci_PACK:
-         debugBelch("PACK      %d words with itbl ", instrs[pc+1] );
-         printPtr( (StgPtr)literals[instrs[pc]] );
+      case bci_SLIDE: {
+         W_ nwords = BCO_GET_LARGE_ARG;
+         W_ by     = BCO_GET_LARGE_ARG;
+         debugBelch("SLIDE     %" FMT_Word " down by %" FMT_Word "\n", nwords, by );
+         break; }
+      case bci_ALLOC_AP: {
+         W_ nwords = BCO_GET_LARGE_ARG;
+         debugBelch("ALLOC_AP  %" FMT_Word " words\n", nwords );
+         break; }
+      case bci_ALLOC_AP_NOUPD: {
+         W_ nwords = BCO_GET_LARGE_ARG;
+         debugBelch("ALLOC_AP_NOUPD %" FMT_Word " words\n", nwords );
+         break; }
+      case bci_ALLOC_PAP: {
+         W_ arity = BCO_GET_LARGE_ARG;
+         W_ nwords = BCO_GET_LARGE_ARG;
+         debugBelch("ALLOC_PAP %" FMT_Word " arity, %" FMT_Word " words\n",
+                 arity, nwords );
+         break; }
+      case bci_MKAP: {
+         W_ stkoff = BCO_GET_LARGE_ARG;
+         W_ nwords = BCO_GET_LARGE_ARG;
+         debugBelch("MKAP      %" FMT_Word " words, %" FMT_Word " stkoff\n", nwords,
+                                                           stkoff );
+         break; }
+      case bci_MKPAP: {
+         W_ stkoff = BCO_GET_LARGE_ARG;
+         W_ nwords = BCO_GET_LARGE_ARG;
+         debugBelch("MKPAP     %" FMT_Word " words, %" FMT_Word " stkoff\n", nwords,
+                                                      stkoff );
+         break; }
+      case bci_UNPACK: {
+         W_ nwords = BCO_GET_LARGE_ARG;
+         debugBelch("UNPACK    %" FMT_Word "\n", nwords );
+         break; }
+      case bci_PACK: {
+         int itbl = BCO_NEXT;
+         W_ nwords = BCO_GET_LARGE_ARG;
+         debugBelch("PACK      %" FMT_Word " words with itbl ", nwords );
+         printPtr( (StgPtr)literals[itbl] );
          debugBelch("\n");
-         pc += 2; break;
+         break; }
 
       case bci_TESTLT_I: {
           unsigned int discr  = BCO_NEXT;


=====================================
rts/Interpreter.c
=====================================
@@ -1226,15 +1226,15 @@ run_BCO:
         }
 
         case bci_PUSH_L: {
-            int o1 = BCO_NEXT;
+            W_ o1 = BCO_GET_LARGE_ARG;
             SpW(-1) = SpW(o1);
             Sp_subW(1);
             goto nextInsn;
         }
 
         case bci_PUSH_LL: {
-            int o1 = BCO_NEXT;
-            int o2 = BCO_NEXT;
+            W_ o1 = BCO_GET_LARGE_ARG;
+            W_ o2 = BCO_GET_LARGE_ARG;
             SpW(-1) = SpW(o1);
             SpW(-2) = SpW(o2);
             Sp_subW(2);
@@ -1242,9 +1242,9 @@ run_BCO:
         }
 
         case bci_PUSH_LLL: {
-            int o1 = BCO_NEXT;
-            int o2 = BCO_NEXT;
-            int o3 = BCO_NEXT;
+            W_ o1 = BCO_GET_LARGE_ARG;
+            W_ o2 = BCO_GET_LARGE_ARG;
+            W_ o3 = BCO_GET_LARGE_ARG;
             SpW(-1) = SpW(o1);
             SpW(-2) = SpW(o2);
             SpW(-3) = SpW(o3);
@@ -1253,56 +1253,56 @@ run_BCO:
         }
 
         case bci_PUSH8: {
-            int off = BCO_NEXT;
+            W_ off = BCO_GET_LARGE_ARG;
             Sp_subB(1);
             *(StgWord8*)Sp = *(StgWord8*)(Sp_plusB(off+1));
             goto nextInsn;
         }
 
         case bci_PUSH16: {
-            int off = BCO_NEXT;
+            W_ off = BCO_GET_LARGE_ARG;
             Sp_subB(2);
             *(StgWord16*)Sp = *(StgWord16*)(Sp_plusB(off+2));
             goto nextInsn;
         }
 
         case bci_PUSH32: {
-            int off = BCO_NEXT;
+            W_ off = BCO_GET_LARGE_ARG;
             Sp_subB(4);
             *(StgWord32*)Sp = *(StgWord32*)(Sp_plusB(off+4));
             goto nextInsn;
         }
 
         case bci_PUSH8_W: {
-            int off = BCO_NEXT;
+            W_ off = BCO_GET_LARGE_ARG;
             *(StgWord*)(Sp_minusW(1)) = *(StgWord8*)(Sp_plusB(off));
             Sp_subW(1);
             goto nextInsn;
         }
 
         case bci_PUSH16_W: {
-            int off = BCO_NEXT;
+            W_ off = BCO_GET_LARGE_ARG;
             *(StgWord*)(Sp_minusW(1)) = *(StgWord16*)(Sp_plusB(off));
             Sp_subW(1);
             goto nextInsn;
         }
 
         case bci_PUSH32_W: {
-            int off = BCO_NEXT;
+            W_ off = BCO_GET_LARGE_ARG;
             *(StgWord*)(Sp_minusW(1)) = *(StgWord32*)(Sp_plusB(off));
             Sp_subW(1);
             goto nextInsn;
         }
 
         case bci_PUSH_G: {
-            int o1 = BCO_GET_LARGE_ARG;
+            W_ o1 = BCO_GET_LARGE_ARG;
             SpW(-1) = BCO_PTR(o1);
             Sp_subW(1);
             goto nextInsn;
         }
 
         case bci_PUSH_ALTS_P: {
-            int o_bco  = BCO_GET_LARGE_ARG;
+            W_ o_bco  = BCO_GET_LARGE_ARG;
             Sp_subW(2);
             SpW(1) = BCO_PTR(o_bco);
             SpW(0) = (W_)&stg_ctoi_R1p_info;
@@ -1315,7 +1315,7 @@ run_BCO:
         }
 
         case bci_PUSH_ALTS_N: {
-            int o_bco  = BCO_GET_LARGE_ARG;
+            W_ o_bco  = BCO_GET_LARGE_ARG;
             SpW(-2) = (W_)&stg_ctoi_R1n_info;
             SpW(-1) = BCO_PTR(o_bco);
             Sp_subW(2);
@@ -1328,7 +1328,7 @@ run_BCO:
         }
 
         case bci_PUSH_ALTS_F: {
-            int o_bco  = BCO_GET_LARGE_ARG;
+            W_ o_bco  = BCO_GET_LARGE_ARG;
             SpW(-2) = (W_)&stg_ctoi_F1_info;
             SpW(-1) = BCO_PTR(o_bco);
             Sp_subW(2);
@@ -1341,7 +1341,7 @@ run_BCO:
         }
 
         case bci_PUSH_ALTS_D: {
-            int o_bco  = BCO_GET_LARGE_ARG;
+            W_ o_bco  = BCO_GET_LARGE_ARG;
             SpW(-2) = (W_)&stg_ctoi_D1_info;
             SpW(-1) = BCO_PTR(o_bco);
             Sp_subW(2);
@@ -1354,7 +1354,7 @@ run_BCO:
         }
 
         case bci_PUSH_ALTS_L: {
-            int o_bco  = BCO_GET_LARGE_ARG;
+            W_ o_bco  = BCO_GET_LARGE_ARG;
             SpW(-2) = (W_)&stg_ctoi_L1_info;
             SpW(-1) = BCO_PTR(o_bco);
             Sp_subW(2);
@@ -1367,7 +1367,7 @@ run_BCO:
         }
 
         case bci_PUSH_ALTS_V: {
-            int o_bco  = BCO_GET_LARGE_ARG;
+            W_ o_bco  = BCO_GET_LARGE_ARG;
             SpW(-2) = (W_)&stg_ctoi_V_info;
             SpW(-1) = BCO_PTR(o_bco);
             Sp_subW(2);
@@ -1380,9 +1380,9 @@ run_BCO:
         }
 
         case bci_PUSH_ALTS_T: {
-            int o_bco = BCO_GET_LARGE_ARG;
+            W_ o_bco = BCO_GET_LARGE_ARG;
             W_ tuple_info = (W_)BCO_LIT(BCO_GET_LARGE_ARG);
-            int o_tuple_bco = BCO_GET_LARGE_ARG;
+            W_ o_tuple_bco = BCO_GET_LARGE_ARG;
 
 #if defined(PROFILING)
             SpW(-1) = (W_)cap->r.rCCCS;
@@ -1526,30 +1526,30 @@ run_BCO:
         }
 
         case bci_PUSH_UBX8: {
-            int o_lit = BCO_GET_LARGE_ARG;
+            W_ o_lit = BCO_GET_LARGE_ARG;
             Sp_subB(1);
             *(StgWord8*)Sp = *(StgWord8*)(literals+o_lit);
             goto nextInsn;
         }
 
         case bci_PUSH_UBX16: {
-            int o_lit = BCO_GET_LARGE_ARG;
+            W_ o_lit = BCO_GET_LARGE_ARG;
             Sp_subB(2);
             *(StgWord16*)Sp = *(StgWord16*)(literals+o_lit);
             goto nextInsn;
         }
 
         case bci_PUSH_UBX32: {
-            int o_lit = BCO_GET_LARGE_ARG;
+            W_ o_lit = BCO_GET_LARGE_ARG;
             Sp_subB(4);
             *(StgWord32*)Sp = *(StgWord32*)(literals+o_lit);
             goto nextInsn;
         }
 
         case bci_PUSH_UBX: {
-            int i;
-            int o_lits = BCO_GET_LARGE_ARG;
-            int n_words = BCO_NEXT;
+            W_ i;
+            W_ o_lits = BCO_GET_LARGE_ARG;
+            W_ n_words = BCO_GET_LARGE_ARG;
             Sp_subW(n_words);
             for (i = 0; i < n_words; i++) {
                 SpW(i) = (W_)BCO_LIT(o_lits+i);
@@ -1558,10 +1558,10 @@ run_BCO:
         }
 
         case bci_SLIDE: {
-            int n  = BCO_NEXT;
-            int by = BCO_NEXT;
+            W_ n  = BCO_GET_LARGE_ARG;
+            W_ by = BCO_GET_LARGE_ARG;
             /* a_1, .. a_n, b_1, .. b_by, s => a_1, .. a_n, s */
-            while(--n >= 0) {
+            while(n-- > 0) {
                 SpW(n+by) = SpW(n);
             }
             Sp_addW(by);
@@ -1570,7 +1570,7 @@ run_BCO:
         }
 
         case bci_ALLOC_AP: {
-            int n_payload = BCO_NEXT;
+            StgHalfWord n_payload = BCO_GET_LARGE_ARG;
             StgAP *ap = (StgAP*)allocate(cap, AP_sizeW(n_payload));
             SpW(-1) = (W_)ap;
             ap->n_args = n_payload;
@@ -1583,7 +1583,7 @@ run_BCO:
         }
 
         case bci_ALLOC_AP_NOUPD: {
-            int n_payload = BCO_NEXT;
+            StgHalfWord n_payload = BCO_GET_LARGE_ARG;
             StgAP *ap = (StgAP*)allocate(cap, AP_sizeW(n_payload));
             SpW(-1) = (W_)ap;
             ap->n_args = n_payload;
@@ -1597,8 +1597,8 @@ run_BCO:
 
         case bci_ALLOC_PAP: {
             StgPAP* pap;
-            int arity = BCO_NEXT;
-            int n_payload = BCO_NEXT;
+            StgHalfWord arity = BCO_GET_LARGE_ARG;
+            StgHalfWord n_payload = BCO_GET_LARGE_ARG;
             pap = (StgPAP*)allocate(cap, PAP_sizeW(n_payload));
             SpW(-1) = (W_)pap;
             pap->n_args = n_payload;
@@ -1611,11 +1611,11 @@ run_BCO:
         }
 
         case bci_MKAP: {
-            int i;
-            int stkoff = BCO_NEXT;
-            int n_payload = BCO_NEXT;
+            StgHalfWord i;
+            W_ stkoff = BCO_GET_LARGE_ARG;
+            StgHalfWord n_payload = BCO_GET_LARGE_ARG;
             StgAP* ap = (StgAP*)SpW(stkoff);
-            ASSERT((int)ap->n_args == n_payload);
+            ASSERT(ap->n_args == n_payload);
             ap->fun = (StgClosure*)SpW(0);
 
             // The function should be a BCO, and its bitmap should
@@ -1635,11 +1635,11 @@ run_BCO:
         }
 
         case bci_MKPAP: {
-            int i;
-            int stkoff = BCO_NEXT;
-            int n_payload = BCO_NEXT;
+            StgHalfWord i;
+            W_ stkoff = BCO_GET_LARGE_ARG;
+            StgHalfWord n_payload = BCO_GET_LARGE_ARG;
             StgPAP* pap = (StgPAP*)SpW(stkoff);
-            ASSERT((int)pap->n_args == n_payload);
+            ASSERT(pap->n_args == n_payload);
             pap->fun = (StgClosure*)SpW(0);
 
             // The function should be a BCO
@@ -1663,8 +1663,8 @@ run_BCO:
 
         case bci_UNPACK: {
             /* Unpack N ptr words from t.o.s constructor */
-            int i;
-            int n_words = BCO_NEXT;
+            W_ i;
+            W_ n_words = BCO_GET_LARGE_ARG;
             StgClosure* con = UNTAG_CLOSURE((StgClosure*)SpW(0));
             Sp_subW(n_words);
             for (i = 0; i < n_words; i++) {
@@ -1674,9 +1674,9 @@ run_BCO:
         }
 
         case bci_PACK: {
-            int i;
-            int o_itbl         = BCO_GET_LARGE_ARG;
-            int n_words        = BCO_NEXT;
+            W_ i;
+            W_ o_itbl         = BCO_GET_LARGE_ARG;
+            W_ n_words        = BCO_GET_LARGE_ARG;
             StgInfoTable* itbl = INFO_PTR_TO_STRUCT((StgInfoTable *)BCO_LIT(o_itbl));
             int request        = CONSTR_sizeW( itbl->layout.payload.ptrs,
                                                itbl->layout.payload.nptrs );
@@ -2006,9 +2006,9 @@ run_BCO:
         }
 
         case bci_SWIZZLE: {
-            int stkoff = BCO_NEXT;
-            signed short n = (signed short)(BCO_NEXT);
-            SpW(stkoff) += (W_)n;
+            W_ stkoff = BCO_GET_LARGE_ARG;
+            StgInt n = BCO_GET_LARGE_ARG;
+            (*(StgInt*)(Sp_plusW(stkoff))) += n;
             goto nextInsn;
         }
 
@@ -2020,7 +2020,7 @@ run_BCO:
 
         case bci_CCALL: {
             void *tok;
-            int stk_offset            = BCO_NEXT;
+            W_ stk_offset             = BCO_GET_LARGE_ARG;
             int o_itbl                = BCO_GET_LARGE_ARG;
             int flags                 = BCO_NEXT;
             bool interruptible        = flags & 0x1;
@@ -2056,7 +2056,7 @@ run_BCO:
             uint32_t nargs = cif->nargs;
             uint32_t ret_size;
             uint32_t i;
-            int j;
+            W_ j;
             StgPtr p;
             W_ ret[2];                  // max needed
             W_ *arguments[stk_offset];  // max needed


=====================================
testsuite/tests/ghci/should_run/LargeBCO.hs
=====================================
@@ -0,0 +1,32 @@
+
+{-
+   Test for BCOs that use larger than 16 bit stack offsets.
+
+   Using Template Haskell because loading the code directly into
+   GHCi produces different bytecode that does not have large stack
+   offsets.
+
+   testcase from #22888
+ -}
+
+{-# LANGUAGE TemplateHaskell #-}
+module Main where
+
+import LargeBCO_A
+
+import Data.Binary.Get (runGet, Get, getWord32be)
+import qualified Data.ByteString.Lazy as B
+import Data.Bits (Bits(..))
+import Data.Word (Word32)
+
+import Language.Haskell.TH.Lib
+
+result :: String
+result = $(let initState = SHA256S 1 2 3 4 5 6 7 8
+               input     = B.replicate 64 0
+               output    = runGet (processSHA256Block initState) input
+           in litE (stringL (show output))
+          )
+
+main :: IO ()
+main = putStrLn result


=====================================
testsuite/tests/ghci/should_run/LargeBCO.stdout
=====================================
@@ -0,0 +1 @@
+SHA256S 1251949539 2800197164 2023110800 2630081144 3831421046 3141654527 2982319529 2535435789


=====================================
testsuite/tests/ghci/should_run/LargeBCO_A.hs
=====================================
@@ -0,0 +1,215 @@
+{-# LANGUAGE TemplateHaskell #-}
+module LargeBCO_A (processSHA256Block, SHA256State(..)) where
+
+import Data.Binary.Get (runGet, Get, getWord32be)
+import qualified Data.ByteString.Lazy as B
+import Data.Binary.Get (Get, getWord32be)
+import Data.Bits (Bits(..))
+import Data.Word (Word32)
+import System.Environment
+
+data SHA256Sched = SHA256Sched !Word32 !Word32 !Word32 !Word32 !Word32 -- 00-04
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 05-09
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 10-04
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 15-09
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 20-04
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 25-09
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 30-04
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 35-09
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 40-04
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 45-09
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 50-04
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 55-09
+                               !Word32 !Word32 !Word32 !Word32         -- 60-63
+
+data SHA256State = SHA256S !Word32 !Word32 !Word32 !Word32
+                           !Word32 !Word32 !Word32 !Word32
+    deriving (Show)
+
+{-# SPECIALIZE ch :: Word32 -> Word32 -> Word32 -> Word32 #-}
+ch :: Bits a => a -> a -> a -> a
+ch x y z = (x .&. y) `xor` (complement x .&. z)
+
+{-# SPECIALIZE maj :: Word32 -> Word32 -> Word32 -> Word32 #-}
+maj :: Bits a => a -> a -> a -> a
+maj x y z = (x .&. (y .|. z)) .|. (y .&. z)
+
+bsig256_0 :: Word32 -> Word32
+bsig256_0 x = rotateR x 2 `xor` rotateR x 13 `xor` rotateR x 22
+
+bsig256_1 :: Word32 -> Word32
+bsig256_1 x = rotateR x 6 `xor` rotateR x 11 `xor` rotateR x 25
+
+lsig256_0 :: Word32 -> Word32
+lsig256_0 x = rotateR x 7 `xor` rotateR x 18 `xor` shiftR x 3
+
+lsig256_1 :: Word32 -> Word32
+lsig256_1 x = rotateR x 17 `xor` rotateR x 19 `xor` shiftR x 10
+
+getSHA256Sched :: Get SHA256Sched
+getSHA256Sched = do
+  w00 <- getWord32be
+  w01 <- getWord32be
+  w02 <- getWord32be
+  w03 <- getWord32be
+  w04 <- getWord32be
+  w05 <- getWord32be
+  w06 <- getWord32be
+  w07 <- getWord32be
+  w08 <- getWord32be
+  w09 <- getWord32be
+  w10 <- getWord32be
+  w11 <- getWord32be
+  w12 <- getWord32be
+  w13 <- getWord32be
+  w14 <- getWord32be
+  w15 <- getWord32be
+  let w16 = lsig256_1 w14 + w09 + lsig256_0 w01 + w00
+      w17 = lsig256_1 w15 + w10 + lsig256_0 w02 + w01
+      w18 = lsig256_1 w16 + w11 + lsig256_0 w03 + w02
+      w19 = lsig256_1 w17 + w12 + lsig256_0 w04 + w03
+      w20 = lsig256_1 w18 + w13 + lsig256_0 w05 + w04
+      w21 = lsig256_1 w19 + w14 + lsig256_0 w06 + w05
+      w22 = lsig256_1 w20 + w15 + lsig256_0 w07 + w06
+      w23 = lsig256_1 w21 + w16 + lsig256_0 w08 + w07
+      w24 = lsig256_1 w22 + w17 + lsig256_0 w09 + w08
+      w25 = lsig256_1 w23 + w18 + lsig256_0 w10 + w09
+      w26 = lsig256_1 w24 + w19 + lsig256_0 w11 + w10
+      w27 = lsig256_1 w25 + w20 + lsig256_0 w12 + w11
+      w28 = lsig256_1 w26 + w21 + lsig256_0 w13 + w12
+      w29 = lsig256_1 w27 + w22 + lsig256_0 w14 + w13
+      w30 = lsig256_1 w28 + w23 + lsig256_0 w15 + w14
+      w31 = lsig256_1 w29 + w24 + lsig256_0 w16 + w15
+      w32 = lsig256_1 w30 + w25 + lsig256_0 w17 + w16
+      w33 = lsig256_1 w31 + w26 + lsig256_0 w18 + w17
+      w34 = lsig256_1 w32 + w27 + lsig256_0 w19 + w18
+      w35 = lsig256_1 w33 + w28 + lsig256_0 w20 + w19
+      w36 = lsig256_1 w34 + w29 + lsig256_0 w21 + w20
+      w37 = lsig256_1 w35 + w30 + lsig256_0 w22 + w21
+      w38 = lsig256_1 w36 + w31 + lsig256_0 w23 + w22
+      w39 = lsig256_1 w37 + w32 + lsig256_0 w24 + w23
+      w40 = lsig256_1 w38 + w33 + lsig256_0 w25 + w24
+      w41 = lsig256_1 w39 + w34 + lsig256_0 w26 + w25
+      w42 = lsig256_1 w40 + w35 + lsig256_0 w27 + w26
+      w43 = lsig256_1 w41 + w36 + lsig256_0 w28 + w27
+      w44 = lsig256_1 w42 + w37 + lsig256_0 w29 + w28
+      w45 = lsig256_1 w43 + w38 + lsig256_0 w30 + w29
+      w46 = lsig256_1 w44 + w39 + lsig256_0 w31 + w30
+      w47 = lsig256_1 w45 + w40 + lsig256_0 w32 + w31
+      w48 = lsig256_1 w46 + w41 + lsig256_0 w33 + w32
+      w49 = lsig256_1 w47 + w42 + lsig256_0 w34 + w33
+      w50 = lsig256_1 w48 + w43 + lsig256_0 w35 + w34
+      w51 = lsig256_1 w49 + w44 + lsig256_0 w36 + w35
+      w52 = lsig256_1 w50 + w45 + lsig256_0 w37 + w36
+      w53 = lsig256_1 w51 + w46 + lsig256_0 w38 + w37
+      w54 = lsig256_1 w52 + w47 + lsig256_0 w39 + w38
+      w55 = lsig256_1 w53 + w48 + lsig256_0 w40 + w39
+      w56 = lsig256_1 w54 + w49 + lsig256_0 w41 + w40
+      w57 = lsig256_1 w55 + w50 + lsig256_0 w42 + w41
+      w58 = lsig256_1 w56 + w51 + lsig256_0 w43 + w42
+      w59 = lsig256_1 w57 + w52 + lsig256_0 w44 + w43
+      w60 = lsig256_1 w58 + w53 + lsig256_0 w45 + w44
+      w61 = lsig256_1 w59 + w54 + lsig256_0 w46 + w45
+      w62 = lsig256_1 w60 + w55 + lsig256_0 w47 + w46
+      w63 = lsig256_1 w61 + w56 + lsig256_0 w48 + w47
+  return $! SHA256Sched w00 w01 w02 w03 w04 w05 w06 w07 w08 w09
+                        w10 w11 w12 w13 w14 w15 w16 w17 w18 w19
+                        w20 w21 w22 w23 w24 w25 w26 w27 w28 w29
+                        w30 w31 w32 w33 w34 w35 w36 w37 w38 w39
+                        w40 w41 w42 w43 w44 w45 w46 w47 w48 w49
+                        w50 w51 w52 w53 w54 w55 w56 w57 w58 w59
+                        w60 w61 w62 w63
+
+{-# NOINLINE processSHA256Block #-}
+processSHA256Block :: SHA256State -> Get SHA256State
+processSHA256Block !s00@(SHA256S a00 b00 c00 d00 e00 f00 g00 h00) = do
+  (SHA256Sched w00 w01 w02 w03 w04 w05 w06 w07 w08 w09
+               w10 w11 w12 w13 w14 w15 w16 w17 w18 w19
+               w20 w21 w22 w23 w24 w25 w26 w27 w28 w29
+               w30 w31 w32 w33 w34 w35 w36 w37 w38 w39
+               w40 w41 w42 w43 w44 w45 w46 w47 w48 w49
+               w50 w51 w52 w53 w54 w55 w56 w57 w58 w59
+               w60 w61 w62 w63) <- getSHA256Sched
+  let s01 = step256 s00 0x428a2f98 w00
+      s02 = step256 s01 0x71374491 w01
+      s03 = step256 s02 0xb5c0fbcf w02
+      s04 = step256 s03 0xe9b5dba5 w03
+      s05 = step256 s04 0x3956c25b w04
+      s06 = step256 s05 0x59f111f1 w05
+      s07 = step256 s06 0x923f82a4 w06
+      s08 = step256 s07 0xab1c5ed5 w07
+      s09 = step256 s08 0xd807aa98 w08
+      s10 = step256 s09 0x12835b01 w09
+      s11 = step256 s10 0x243185be w10
+      s12 = step256 s11 0x550c7dc3 w11
+      s13 = step256 s12 0x72be5d74 w12
+      s14 = step256 s13 0x80deb1fe w13
+      s15 = step256 s14 0x9bdc06a7 w14
+      s16 = step256 s15 0xc19bf174 w15
+      s17 = step256 s16 0xe49b69c1 w16
+      s18 = step256 s17 0xefbe4786 w17
+      s19 = step256 s18 0x0fc19dc6 w18
+      s20 = step256 s19 0x240ca1cc w19
+      s21 = step256 s20 0x2de92c6f w20
+      s22 = step256 s21 0x4a7484aa w21
+      s23 = step256 s22 0x5cb0a9dc w22
+      s24 = step256 s23 0x76f988da w23
+      s25 = step256 s24 0x983e5152 w24
+      s26 = step256 s25 0xa831c66d w25
+      s27 = step256 s26 0xb00327c8 w26
+      s28 = step256 s27 0xbf597fc7 w27
+      s29 = step256 s28 0xc6e00bf3 w28
+      s30 = step256 s29 0xd5a79147 w29
+      s31 = step256 s30 0x06ca6351 w30
+      s32 = step256 s31 0x14292967 w31
+      s33 = step256 s32 0x27b70a85 w32
+      s34 = step256 s33 0x2e1b2138 w33
+      s35 = step256 s34 0x4d2c6dfc w34
+      s36 = step256 s35 0x53380d13 w35
+      s37 = step256 s36 0x650a7354 w36
+      s38 = step256 s37 0x766a0abb w37
+      s39 = step256 s38 0x81c2c92e w38
+      s40 = step256 s39 0x92722c85 w39
+      s41 = step256 s40 0xa2bfe8a1 w40
+      s42 = step256 s41 0xa81a664b w41
+      s43 = step256 s42 0xc24b8b70 w42
+      s44 = step256 s43 0xc76c51a3 w43
+      s45 = step256 s44 0xd192e819 w44
+      s46 = step256 s45 0xd6990624 w45
+      s47 = step256 s46 0xf40e3585 w46
+      s48 = step256 s47 0x106aa070 w47
+      s49 = step256 s48 0x19a4c116 w48
+      s50 = step256 s49 0x1e376c08 w49
+      s51 = step256 s50 0x2748774c w50
+      s52 = step256 s51 0x34b0bcb5 w51
+      s53 = step256 s52 0x391c0cb3 w52
+      s54 = step256 s53 0x4ed8aa4a w53
+      s55 = step256 s54 0x5b9cca4f w54
+      s56 = step256 s55 0x682e6ff3 w55
+      s57 = step256 s56 0x748f82ee w56
+      s58 = step256 s57 0x78a5636f w57
+      s59 = step256 s58 0x84c87814 w58
+      s60 = step256 s59 0x8cc70208 w59
+      s61 = step256 s60 0x90befffa w60
+      s62 = step256 s61 0xa4506ceb w61
+      s63 = step256 s62 0xbef9a3f7 w62
+      s64 = step256 s63 0xc67178f2 w63
+      SHA256S a64 b64 c64 d64 e64 f64 g64 h64 = s64
+  return $! SHA256S (a00 + a64) (b00 + b64) (c00 + c64) (d00 + d64)
+                    (e00 + e64) (f00 + f64) (g00 + g64) (h00 + h64)
+
+{-# INLINE step256 #-}
+step256 :: SHA256State -> Word32 -> Word32 -> SHA256State
+step256 !(SHA256S a b c d e f g h) k w = SHA256S a' b' c' d' e' f' g' h'
+ where
+  t1 = h + bsig256_1 e + ch e f g + k + w
+  t2 = bsig256_0 a + maj a b c
+  h' = g
+  g' = f
+  f' = e
+  e' = d + t1
+  d' = c
+  c' = b
+  b' = a
+  a' = t1 + t2
+


=====================================
testsuite/tests/ghci/should_run/T22888.hs
=====================================
@@ -0,0 +1,221 @@
+{-
+
+  This module produced a panic when compiled with -fbyte-code-and-object-code
+  and optimization because it required stack offsets greater than 65535
+
+  See #22888
+
+ -}
+
+module Main (main, processSHA256Block) where
+
+import Data.Binary.Get (Get, getWord32be)
+import Data.Bits (Bits(..))
+import Data.Word (Word32)
+
+main :: IO ()
+main = pure ()
+
+data SHA256Sched = SHA256Sched !Word32 !Word32 !Word32 !Word32 !Word32 -- 00-04
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 05-09
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 10-04
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 15-09
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 20-04
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 25-09
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 30-04
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 35-09
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 40-04
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 45-09
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 50-04
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 55-09
+                               !Word32 !Word32 !Word32 !Word32         -- 60-63
+
+data SHA256State = SHA256S !Word32 !Word32 !Word32 !Word32
+                           !Word32 !Word32 !Word32 !Word32
+
+{-# SPECIALIZE ch :: Word32 -> Word32 -> Word32 -> Word32 #-}
+ch :: Bits a => a -> a -> a -> a
+ch x y z = (x .&. y) `xor` (complement x .&. z)
+
+{-# SPECIALIZE maj :: Word32 -> Word32 -> Word32 -> Word32 #-}
+maj :: Bits a => a -> a -> a -> a
+maj x y z = (x .&. (y .|. z)) .|. (y .&. z)
+
+bsig256_0 :: Word32 -> Word32
+bsig256_0 x = rotateR x 2 `xor` rotateR x 13 `xor` rotateR x 22
+
+bsig256_1 :: Word32 -> Word32
+bsig256_1 x = rotateR x 6 `xor` rotateR x 11 `xor` rotateR x 25
+
+lsig256_0 :: Word32 -> Word32
+lsig256_0 x = rotateR x 7 `xor` rotateR x 18 `xor` shiftR x 3
+
+lsig256_1 :: Word32 -> Word32
+lsig256_1 x = rotateR x 17 `xor` rotateR x 19 `xor` shiftR x 10
+
+getSHA256Sched :: Get SHA256Sched
+getSHA256Sched = do
+  w00 <- getWord32be
+  w01 <- getWord32be
+  w02 <- getWord32be
+  w03 <- getWord32be
+  w04 <- getWord32be
+  w05 <- getWord32be
+  w06 <- getWord32be
+  w07 <- getWord32be
+  w08 <- getWord32be
+  w09 <- getWord32be
+  w10 <- getWord32be
+  w11 <- getWord32be
+  w12 <- getWord32be
+  w13 <- getWord32be
+  w14 <- getWord32be
+  w15 <- getWord32be
+  let w16 = lsig256_1 w14 + w09 + lsig256_0 w01 + w00
+      w17 = lsig256_1 w15 + w10 + lsig256_0 w02 + w01
+      w18 = lsig256_1 w16 + w11 + lsig256_0 w03 + w02
+      w19 = lsig256_1 w17 + w12 + lsig256_0 w04 + w03
+      w20 = lsig256_1 w18 + w13 + lsig256_0 w05 + w04
+      w21 = lsig256_1 w19 + w14 + lsig256_0 w06 + w05
+      w22 = lsig256_1 w20 + w15 + lsig256_0 w07 + w06
+      w23 = lsig256_1 w21 + w16 + lsig256_0 w08 + w07
+      w24 = lsig256_1 w22 + w17 + lsig256_0 w09 + w08
+      w25 = lsig256_1 w23 + w18 + lsig256_0 w10 + w09
+      w26 = lsig256_1 w24 + w19 + lsig256_0 w11 + w10
+      w27 = lsig256_1 w25 + w20 + lsig256_0 w12 + w11
+      w28 = lsig256_1 w26 + w21 + lsig256_0 w13 + w12
+      w29 = lsig256_1 w27 + w22 + lsig256_0 w14 + w13
+      w30 = lsig256_1 w28 + w23 + lsig256_0 w15 + w14
+      w31 = lsig256_1 w29 + w24 + lsig256_0 w16 + w15
+      w32 = lsig256_1 w30 + w25 + lsig256_0 w17 + w16
+      w33 = lsig256_1 w31 + w26 + lsig256_0 w18 + w17
+      w34 = lsig256_1 w32 + w27 + lsig256_0 w19 + w18
+      w35 = lsig256_1 w33 + w28 + lsig256_0 w20 + w19
+      w36 = lsig256_1 w34 + w29 + lsig256_0 w21 + w20
+      w37 = lsig256_1 w35 + w30 + lsig256_0 w22 + w21
+      w38 = lsig256_1 w36 + w31 + lsig256_0 w23 + w22
+      w39 = lsig256_1 w37 + w32 + lsig256_0 w24 + w23
+      w40 = lsig256_1 w38 + w33 + lsig256_0 w25 + w24
+      w41 = lsig256_1 w39 + w34 + lsig256_0 w26 + w25
+      w42 = lsig256_1 w40 + w35 + lsig256_0 w27 + w26
+      w43 = lsig256_1 w41 + w36 + lsig256_0 w28 + w27
+      w44 = lsig256_1 w42 + w37 + lsig256_0 w29 + w28
+      w45 = lsig256_1 w43 + w38 + lsig256_0 w30 + w29
+      w46 = lsig256_1 w44 + w39 + lsig256_0 w31 + w30
+      w47 = lsig256_1 w45 + w40 + lsig256_0 w32 + w31
+      w48 = lsig256_1 w46 + w41 + lsig256_0 w33 + w32
+      w49 = lsig256_1 w47 + w42 + lsig256_0 w34 + w33
+      w50 = lsig256_1 w48 + w43 + lsig256_0 w35 + w34
+      w51 = lsig256_1 w49 + w44 + lsig256_0 w36 + w35
+      w52 = lsig256_1 w50 + w45 + lsig256_0 w37 + w36
+      w53 = lsig256_1 w51 + w46 + lsig256_0 w38 + w37
+      w54 = lsig256_1 w52 + w47 + lsig256_0 w39 + w38
+      w55 = lsig256_1 w53 + w48 + lsig256_0 w40 + w39
+      w56 = lsig256_1 w54 + w49 + lsig256_0 w41 + w40
+      w57 = lsig256_1 w55 + w50 + lsig256_0 w42 + w41
+      w58 = lsig256_1 w56 + w51 + lsig256_0 w43 + w42
+      w59 = lsig256_1 w57 + w52 + lsig256_0 w44 + w43
+      w60 = lsig256_1 w58 + w53 + lsig256_0 w45 + w44
+      w61 = lsig256_1 w59 + w54 + lsig256_0 w46 + w45
+      w62 = lsig256_1 w60 + w55 + lsig256_0 w47 + w46
+      w63 = lsig256_1 w61 + w56 + lsig256_0 w48 + w47
+  return $! SHA256Sched w00 w01 w02 w03 w04 w05 w06 w07 w08 w09
+                        w10 w11 w12 w13 w14 w15 w16 w17 w18 w19
+                        w20 w21 w22 w23 w24 w25 w26 w27 w28 w29
+                        w30 w31 w32 w33 w34 w35 w36 w37 w38 w39
+                        w40 w41 w42 w43 w44 w45 w46 w47 w48 w49
+                        w50 w51 w52 w53 w54 w55 w56 w57 w58 w59
+                        w60 w61 w62 w63
+
+processSHA256Block :: SHA256State -> Get SHA256State
+processSHA256Block !s00@(SHA256S a00 b00 c00 d00 e00 f00 g00 h00) = do
+  (SHA256Sched w00 w01 w02 w03 w04 w05 w06 w07 w08 w09
+               w10 w11 w12 w13 w14 w15 w16 w17 w18 w19
+               w20 w21 w22 w23 w24 w25 w26 w27 w28 w29
+               w30 w31 w32 w33 w34 w35 w36 w37 w38 w39
+               w40 w41 w42 w43 w44 w45 w46 w47 w48 w49
+               w50 w51 w52 w53 w54 w55 w56 w57 w58 w59
+               w60 w61 w62 w63) <- getSHA256Sched
+  let s01 = step256 s00 0x428a2f98 w00
+      s02 = step256 s01 0x71374491 w01
+      s03 = step256 s02 0xb5c0fbcf w02
+      s04 = step256 s03 0xe9b5dba5 w03
+      s05 = step256 s04 0x3956c25b w04
+      s06 = step256 s05 0x59f111f1 w05
+      s07 = step256 s06 0x923f82a4 w06
+      s08 = step256 s07 0xab1c5ed5 w07
+      s09 = step256 s08 0xd807aa98 w08
+      s10 = step256 s09 0x12835b01 w09
+      s11 = step256 s10 0x243185be w10
+      s12 = step256 s11 0x550c7dc3 w11
+      s13 = step256 s12 0x72be5d74 w12
+      s14 = step256 s13 0x80deb1fe w13
+      s15 = step256 s14 0x9bdc06a7 w14
+      s16 = step256 s15 0xc19bf174 w15
+      s17 = step256 s16 0xe49b69c1 w16
+      s18 = step256 s17 0xefbe4786 w17
+      s19 = step256 s18 0x0fc19dc6 w18
+      s20 = step256 s19 0x240ca1cc w19
+      s21 = step256 s20 0x2de92c6f w20
+      s22 = step256 s21 0x4a7484aa w21
+      s23 = step256 s22 0x5cb0a9dc w22
+      s24 = step256 s23 0x76f988da w23
+      s25 = step256 s24 0x983e5152 w24
+      s26 = step256 s25 0xa831c66d w25
+      s27 = step256 s26 0xb00327c8 w26
+      s28 = step256 s27 0xbf597fc7 w27
+      s29 = step256 s28 0xc6e00bf3 w28
+      s30 = step256 s29 0xd5a79147 w29
+      s31 = step256 s30 0x06ca6351 w30
+      s32 = step256 s31 0x14292967 w31
+      s33 = step256 s32 0x27b70a85 w32
+      s34 = step256 s33 0x2e1b2138 w33
+      s35 = step256 s34 0x4d2c6dfc w34
+      s36 = step256 s35 0x53380d13 w35
+      s37 = step256 s36 0x650a7354 w36
+      s38 = step256 s37 0x766a0abb w37
+      s39 = step256 s38 0x81c2c92e w38
+      s40 = step256 s39 0x92722c85 w39
+      s41 = step256 s40 0xa2bfe8a1 w40
+      s42 = step256 s41 0xa81a664b w41
+      s43 = step256 s42 0xc24b8b70 w42
+      s44 = step256 s43 0xc76c51a3 w43
+      s45 = step256 s44 0xd192e819 w44
+      s46 = step256 s45 0xd6990624 w45
+      s47 = step256 s46 0xf40e3585 w46
+      s48 = step256 s47 0x106aa070 w47
+      s49 = step256 s48 0x19a4c116 w48
+      s50 = step256 s49 0x1e376c08 w49
+      s51 = step256 s50 0x2748774c w50
+      s52 = step256 s51 0x34b0bcb5 w51
+      s53 = step256 s52 0x391c0cb3 w52
+      s54 = step256 s53 0x4ed8aa4a w53
+      s55 = step256 s54 0x5b9cca4f w54
+      s56 = step256 s55 0x682e6ff3 w55
+      s57 = step256 s56 0x748f82ee w56
+      s58 = step256 s57 0x78a5636f w57
+      s59 = step256 s58 0x84c87814 w58
+      s60 = step256 s59 0x8cc70208 w59
+      s61 = step256 s60 0x90befffa w60
+      s62 = step256 s61 0xa4506ceb w61
+      s63 = step256 s62 0xbef9a3f7 w62
+      s64 = step256 s63 0xc67178f2 w63
+      SHA256S a64 b64 c64 d64 e64 f64 g64 h64 = s64
+  return $! SHA256S (a00 + a64) (b00 + b64) (c00 + c64) (d00 + d64)
+                    (e00 + e64) (f00 + f64) (g00 + g64) (h00 + h64)
+
+{-# INLINE step256 #-}
+step256 :: SHA256State -> Word32 -> Word32 -> SHA256State
+step256 !(SHA256S a b c d e f g h) k w = SHA256S a' b' c' d' e' f' g' h'
+ where
+  t1 = h + bsig256_1 e + ch e f g + k + w
+  t2 = bsig256_0 a + maj a b c
+  h' = g
+  g' = f
+  f' = e
+  e' = d + t1
+  d' = c
+  c' = b
+  b' = a
+  a' = t1 + t2
+


=====================================
testsuite/tests/ghci/should_run/all.T
=====================================
@@ -85,10 +85,10 @@ test('T19628', [extra_files(['T19628a.hs']), only_ways(['ghci']) ], compile_and_
 test('T21052', just_ghci, ghci_script, ['T21052.script'])
 test('T21300', just_ghci, ghci_script, ['T21300.script'])
 test('UnliftedDataType2', just_ghci, compile_and_run, [''])
-
 test('T22829', just_ghci + [extra_hc_opts("-Wmissing-import-lists -Werror")], compile_and_run, [''])
 test('T23229', just_ghci + [extra_hc_opts("-this-unit-id my-package -Wno-missing-methods T23229")], ghci_script, ['T23229.script'])
 test('T22958a', just_ghci, compile_and_run, [''])
 test('T22958b', just_ghci, compile_and_run, [''])
 test('T22958c', just_ghci, compile_and_run, [''])
 test('GhciMainIs', just_ghci, compile_and_run, ['-main-is otherMain'])
+test('LargeBCO', [extra_files(['LargeBCO_A.hs']), req_interp, extra_hc_opts("-O -fbyte-code-and-object-code -fprefer-byte-code")], compile_and_run, [''])



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/564164ef323a9f2cdeb8c69dcb2cf6df6382de4e

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/564164ef323a9f2cdeb8c69dcb2cf6df6382de4e
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20230626/dc2f2d92/attachment-0001.html>


More information about the ghc-commits mailing list