[Git][ghc/ghc][wip/andreask/interpreter_primops] Add Addr# indexing primops. Fix some obvious 32bit bugs.
Andreas Klebinger (@AndreasK)
gitlab at gitlab.haskell.org
Mon Mar 3 20:03:26 UTC 2025
Andreas Klebinger pushed to branch wip/andreask/interpreter_primops at Glasgow Haskell Compiler / GHC
Commits:
c5e9a83e by Andreas Klebinger at 2025-03-03T20:40:56+01:00
Add Addr# indexing primops. Fix some obvious 32bit bugs.
- - - - -
6 changed files:
- compiler/GHC/ByteCode/Asm.hs
- compiler/GHC/ByteCode/Instr.hs
- compiler/GHC/StgToByteCode.hs
- rts/Disassembler.c
- rts/Interpreter.c
- rts/include/rts/Bytecodes.h
Changes:
=====================================
compiler/GHC/ByteCode/Asm.hs
=====================================
@@ -544,109 +544,137 @@ assembleI platform i = case i of
W64 -> emit bci_OP_ADD_64 []
W32 -> emit bci_OP_ADD_32 []
W16 -> emit bci_OP_ADD_16 []
- W8 -> emit bci_OP_ADD_08 []
+ W8 -> emit bci_OP_ADD_08 []
+ _ -> unsupported_width
OP_SUB w -> case w of
W64 -> emit bci_OP_SUB_64 []
W32 -> emit bci_OP_SUB_32 []
W16 -> emit bci_OP_SUB_16 []
- W8 -> emit bci_OP_SUB_08 []
+ W8 -> emit bci_OP_SUB_08 []
+ _ -> unsupported_width
OP_AND w -> case w of
W64 -> emit bci_OP_AND_64 []
W32 -> emit bci_OP_AND_32 []
W16 -> emit bci_OP_AND_16 []
- W8 -> emit bci_OP_AND_08 []
+ W8 -> emit bci_OP_AND_08 []
+ _ -> unsupported_width
OP_XOR w -> case w of
W64 -> emit bci_OP_XOR_64 []
W32 -> emit bci_OP_XOR_32 []
W16 -> emit bci_OP_XOR_16 []
- W8 -> emit bci_OP_XOR_08 []
+ W8 -> emit bci_OP_XOR_08 []
+ _ -> unsupported_width
OP_OR w -> case w of
W64 -> emit bci_OP_OR_64 []
W32 -> emit bci_OP_OR_32 []
W16 -> emit bci_OP_OR_16 []
- W8 -> emit bci_OP_OR_08 []
+ W8 -> emit bci_OP_OR_08 []
+ _ -> unsupported_width
OP_NOT w -> case w of
W64 -> emit bci_OP_NOT_64 []
W32 -> emit bci_OP_NOT_32 []
W16 -> emit bci_OP_NOT_16 []
- W8 -> emit bci_OP_NOT_08 []
+ W8 -> emit bci_OP_NOT_08 []
+ _ -> unsupported_width
OP_NEG w -> case w of
W64 -> emit bci_OP_NEG_64 []
W32 -> emit bci_OP_NEG_32 []
W16 -> emit bci_OP_NEG_16 []
- W8 -> emit bci_OP_NEG_08 []
+ W8 -> emit bci_OP_NEG_08 []
+ _ -> unsupported_width
OP_MUL w -> case w of
W64 -> emit bci_OP_MUL_64 []
W32 -> emit bci_OP_MUL_32 []
W16 -> emit bci_OP_MUL_16 []
- W8 -> emit bci_OP_MUL_08 []
+ W8 -> emit bci_OP_MUL_08 []
+ _ -> unsupported_width
OP_SHL w -> case w of
W64 -> emit bci_OP_SHL_64 []
W32 -> emit bci_OP_SHL_32 []
W16 -> emit bci_OP_SHL_16 []
- W8 -> emit bci_OP_SHL_08 []
+ W8 -> emit bci_OP_SHL_08 []
+ _ -> unsupported_width
OP_ASR w -> case w of
W64 -> emit bci_OP_ASR_64 []
W32 -> emit bci_OP_ASR_32 []
W16 -> emit bci_OP_ASR_16 []
- W8 -> emit bci_OP_ASR_08 []
+ W8 -> emit bci_OP_ASR_08 []
+ _ -> unsupported_width
OP_LSR w -> case w of
W64 -> emit bci_OP_LSR_64 []
W32 -> emit bci_OP_LSR_32 []
W16 -> emit bci_OP_LSR_16 []
- W8 -> emit bci_OP_LSR_08 []
+ W8 -> emit bci_OP_LSR_08 []
+ _ -> unsupported_width
OP_NEQ w -> case w of
W64 -> emit bci_OP_NEQ_64 []
W32 -> emit bci_OP_NEQ_32 []
W16 -> emit bci_OP_NEQ_16 []
- W8 -> emit bci_OP_NEQ_08 []
+ W8 -> emit bci_OP_NEQ_08 []
+ _ -> unsupported_width
OP_EQ w -> case w of
W64 -> emit bci_OP_EQ_64 []
W32 -> emit bci_OP_EQ_32 []
W16 -> emit bci_OP_EQ_16 []
- W8 -> emit bci_OP_EQ_08 []
+ W8 -> emit bci_OP_EQ_08 []
+ _ -> unsupported_width
OP_U_LT w -> case w of
W64 -> emit bci_OP_U_LT_64 []
W32 -> emit bci_OP_U_LT_32 []
W16 -> emit bci_OP_U_LT_16 []
- W8 -> emit bci_OP_U_LT_08 []
+ W8 -> emit bci_OP_U_LT_08 []
+ _ -> unsupported_width
OP_S_LT w -> case w of
W64 -> emit bci_OP_S_LT_64 []
W32 -> emit bci_OP_S_LT_32 []
W16 -> emit bci_OP_S_LT_16 []
- W8 -> emit bci_OP_S_LT_08 []
+ W8 -> emit bci_OP_S_LT_08 []
+ _ -> unsupported_width
OP_U_GE w -> case w of
W64 -> emit bci_OP_U_GE_64 []
W32 -> emit bci_OP_U_GE_32 []
W16 -> emit bci_OP_U_GE_16 []
- W8 -> emit bci_OP_U_GE_08 []
+ W8 -> emit bci_OP_U_GE_08 []
+ _ -> unsupported_width
OP_S_GE w -> case w of
W64 -> emit bci_OP_S_GE_64 []
W32 -> emit bci_OP_S_GE_32 []
W16 -> emit bci_OP_S_GE_16 []
- W8 -> emit bci_OP_S_GE_08 []
+ W8 -> emit bci_OP_S_GE_08 []
+ _ -> unsupported_width
OP_U_GT w -> case w of
W64 -> emit bci_OP_U_GT_64 []
W32 -> emit bci_OP_U_GT_32 []
W16 -> emit bci_OP_U_GT_16 []
- W8 -> emit bci_OP_U_GT_08 []
+ W8 -> emit bci_OP_U_GT_08 []
+ _ -> unsupported_width
OP_S_GT w -> case w of
W64 -> emit bci_OP_S_GT_64 []
W32 -> emit bci_OP_S_GT_32 []
W16 -> emit bci_OP_S_GT_16 []
- W8 -> emit bci_OP_S_GT_08 []
+ W8 -> emit bci_OP_S_GT_08 []
+ _ -> unsupported_width
OP_U_LE w -> case w of
W64 -> emit bci_OP_U_LE_64 []
W32 -> emit bci_OP_U_LE_32 []
W16 -> emit bci_OP_U_LE_16 []
- W8 -> emit bci_OP_U_LE_08 []
+ W8 -> emit bci_OP_U_LE_08 []
+ _ -> unsupported_width
OP_S_LE w -> case w of
W64 -> emit bci_OP_S_LE_64 []
W32 -> emit bci_OP_S_LE_32 []
W16 -> emit bci_OP_S_LE_16 []
- W8 -> emit bci_OP_S_LE_08 []
+ W8 -> emit bci_OP_S_LE_08 []
+ _ -> unsupported_width
+
+ OP_INDEX_ADDR w -> case w of
+ W64 -> emit bci_OP_INDEX_ADDR_64 []
+ W32 -> emit bci_OP_INDEX_ADDR_32 []
+ W16 -> emit bci_OP_INDEX_ADDR_16 []
+ W8 -> emit bci_OP_INDEX_ADDR_08 []
+ _ -> unsupported_width
BRK_FUN arr tick_mod tickx info_mod infox cc ->
do p1 <- ptr (BCOPtrBreakArray arr)
@@ -664,6 +692,7 @@ assembleI platform i = case i of
#endif
where
+ unsupported_width = panic "GHC.ByteCode.Asm: Unsupported Width"
literal (LitLabel fs _) = litlabel fs
literal LitNullAddr = word 0
literal (LitFloat r) = float (fromRational r)
=====================================
compiler/GHC/ByteCode/Instr.hs
=====================================
@@ -36,7 +36,6 @@ import GHC.Stack.CCS (CostCentre)
import GHC.Stg.Syntax
import GHCi.BreakArray (BreakArray)
import Language.Haskell.Syntax.Module.Name (ModuleName)
-import GHC.Types.RepType (PrimRep)
import GHC.Cmm.Type (Width)
-- ----------------------------------------------------------------------------
@@ -245,6 +244,10 @@ data BCInstr
| OP_S_GT !Width
| OP_S_LE !Width
+ -- Always puts at least a machine word on the stack, with the low part of the stack containing the result.
+ -- We zero extend the result we put on the stack.
+ | OP_INDEX_ADDR !Width
+
-- For doing magic ByteArray passing to foreign calls
| SWIZZLE !WordOff -- to the ptr N words down the stack,
!Int -- add M
@@ -448,6 +451,8 @@ instance Outputable BCInstr where
ppr (OP_U_GT w) = text "OP_U_GT_" <> ppr w
ppr (OP_U_LE w) = text "OP_U_LE_" <> ppr w
+ ppr (OP_INDEX_ADDR w) = text "OP_INDEX_ADDR_" <> ppr w
+
ppr (SWIZZLE stkoff n) = text "SWIZZLE " <+> text "stkoff" <+> ppr stkoff
<+> text "by" <+> ppr n
ppr ENTER = text "ENTER"
@@ -578,6 +583,7 @@ bciStackUse OP_U_LT{} = 0
bciStackUse OP_U_GT{} = 0
bciStackUse OP_U_LE{} = 0
bciStackUse OP_U_GE{} = 0
+bciStackUse OP_INDEX_ADDR{} = 0
bciStackUse SWIZZLE{} = 0
bciStackUse BRK_FUN{} = 0
=====================================
compiler/GHC/StgToByteCode.hs
=====================================
@@ -734,9 +734,14 @@ schemeT d s p (StgOpApp (StgFCallOp (CCall ccall_spec) _ty) args result_ty)
then generateCCall d s p ccall_spec result_ty args
else unsupportedCConvException
-schemeT d s p (StgOpApp (StgPrimOp op) args _ty)
- | Just prim_code <- doPrimOp op d s p args = prim_code
- | otherwise = doTailCall d s p (primOpId op) (reverse args)
+schemeT d s p (StgOpApp (StgPrimOp op) args _ty) = do
+ profile <- getProfile
+ let platform = profilePlatform profile
+ case doPrimOp platform op d s p args of
+ -- Can we do this right in the interpreter?
+ Just prim_code -> prim_code
+ -- Otherwise we have to do a call to the primop wrapper instead :(
+ _ -> doTailCall d s p (primOpId op) (reverse args)
schemeT d s p (StgOpApp (StgPrimCallOp (PrimCall label unit)) args result_ty)
= generatePrimCall d s p label (Just unit) result_ty args
@@ -831,15 +836,15 @@ doTailCall init_d s p fn args = do
(final_d, more_push_code) <- push_seq (d + sz) args
return (final_d, push_code `appOL` more_push_code)
-doPrimOp :: PrimOp
+doPrimOp :: Platform
+ -> PrimOp
-> StackDepth
-> Sequel
-> BCEnv
-> [StgArg]
-> Maybe (BcM BCInstrList)
-doPrimOp op init_d s p args =
+doPrimOp platform op init_d s p args =
case op of
- -- TODO: IntAddOp and friends are only 64bit on 64bit platforms
IntAddOp -> primOp OP_ADD
Int64AddOp -> primOp OP_ADD
WordAddOp -> primOp OP_ADD
@@ -850,6 +855,7 @@ doPrimOp op init_d s p args =
WordSubOp -> primOp OP_SUB
Int64SubOp -> primOp OP_SUB
Word64SubOp -> primOp OP_SUB
+ AddrSubOp -> primOp OP_SUB
Int8SubOp -> primOp OP_SUB
Word8SubOp -> primOp OP_SUB
@@ -886,26 +892,33 @@ doPrimOp op init_d s p args =
IntNeOp -> primOp OP_NEQ
WordNeOp -> primOp OP_NEQ
Word64NeOp -> primOp OP_NEQ
+ AddrNeOp -> primOp OP_NEQ
IntEqOp -> primOp OP_EQ
WordEqOp -> primOp OP_EQ
Word64EqOp -> primOp OP_EQ
+ AddrEqOp -> primOp OP_EQ
+ CharEqOp -> primOp OP_EQ
IntLtOp -> primOp OP_S_LT
WordLtOp -> primOp OP_U_LT
Word64LtOp -> primOp OP_U_LT
+ AddrLtOp -> primOp OP_U_LT
IntGeOp -> primOp OP_S_GE
WordGeOp -> primOp OP_U_GE
Word64GeOp -> primOp OP_U_GE
+ AddrGeOp -> primOp OP_U_GE
IntGtOp -> primOp OP_S_GT
WordGtOp -> primOp OP_U_GT
Word64GtOp -> primOp OP_U_GT
+ AddrGtOp -> primOp OP_U_GT
IntLeOp -> primOp OP_S_LE
WordLeOp -> primOp OP_U_LE
Word64LeOp -> primOp OP_U_LE
+ AddrLeOp -> primOp OP_U_LE
IntNegOp -> primOp OP_NEG
Int64NegOp -> primOp OP_NEG
@@ -925,9 +938,14 @@ doPrimOp op init_d s p args =
ChrOp -> no_op -- Int# and Char# are rep'd the same
OrdOp -> no_op
+ IndexOffAddrOp_Word8 -> primOpWithRep (OP_INDEX_ADDR W8) W8
+ IndexOffAddrOp_Word16 -> primOpWithRep (OP_INDEX_ADDR W16) W16
+ IndexOffAddrOp_Word32 -> primOpWithRep (OP_INDEX_ADDR W32) W32
+ IndexOffAddrOp_Word64 -> primOpWithRep (OP_INDEX_ADDR W64) W64
+
_ -> Nothing
where
- primArg1Width platform (arg:_)
+ primArg1Width arg
| rep <- (stgArgRepU arg)
= case rep of
AddrRep -> platformWordWidth platform
@@ -953,19 +971,33 @@ doPrimOp op init_d s p args =
VecRep{} -> unexpectedRep
where
unexpectedRep = panic "doPrimOp: Unexpected argument rep"
- primArg1Width _ _ = panic "doPrimOp: Unexpected argument count"
+
+
+ -- TODO: The slides for the result need to be two words on 32bit for 64bit ops.
+ mkNReturn width
+ | W64 <- width = RETURN L -- L works for 64 bit on any platform
+ | otherwise = RETURN N -- <64bit width, fits in word on all platforms
+
+ mkSlideWords width = if platformWordWidth platform < width then 2 else 1
-- Push args, execute primop, slide, return_N
primOp op_inst = Just $ do
- platform <- profilePlatform <$> getProfile
- prim_code <- mkPrimOpCode init_d s p (op_inst $ primArg1Width platform args) $ args
- let slide = mkSlideW 1 (bytesToWords platform $ init_d - s) `snocOL` RETURN N
+ let width = primArg1Width (head args)
+ prim_code <- mkPrimOpCode init_d s p (op_inst width) $ args
+ let slide = mkSlideW (mkSlideWords width) (bytesToWords platform $ init_d - s) `snocOL` mkNReturn width
+ return $ prim_code `appOL` slide
+
+ primOpWithRep :: BCInstr -> Width -> Maybe (BcM (OrdList BCInstr))
+ primOpWithRep op_inst width = Just $ do
+ prim_code <- mkPrimOpCode init_d s p op_inst $ args
+
+ let slide = mkSlideW (mkSlideWords width) (bytesToWords platform $ init_d - s) `snocOL` mkNReturn width
return $ prim_code `appOL` slide
no_op = Just $ do
- platform <- profilePlatform <$> getProfile
+ let width = primArg1Width (head args)
prim_code <- terribleNoOp init_d s p undefined args
- let slide = mkSlideW 1 (bytesToWords platform $ init_d - s) `snocOL` RETURN N
+ let slide = mkSlideW (mkSlideWords width) (bytesToWords platform $ init_d - s) `snocOL` mkNReturn width
return $ prim_code `appOL` slide
-- It's horrible, but still better than calling intToWord ...
=====================================
rts/Disassembler.c
=====================================
@@ -487,6 +487,8 @@ disInstr ( StgBCO *bco, int pc )
BELCH_INSTR_NAME_ALL_SIZES(OP_S_GE);
BELCH_INSTR_NAME_ALL_SIZES(OP_S_LT);
+ BELCH_INSTR_NAME_ALL_SIZES(OP_INDEX_ADDR);
+
default:
barf("disInstr: unknown opcode %u", (unsigned int) instr);
}
=====================================
rts/Interpreter.c
=====================================
@@ -2274,6 +2274,40 @@ run_BCO:
case bci_OP_NOT_08: UN_SIZED_OP(~, StgWord8)
case bci_OP_NEG_08: UN_SIZED_OP(-, StgInt8)
+ case bci_OP_INDEX_ADDR_64:
+ {
+ StgWord64* addr = (StgWord64*) SpW(1);
+ StgWord offset = (StgWord) SpW(0);
+ if(sizeof(StgPtr) == sizeof(StgWord64)) {
+ Sp_addW(1);
+ }
+ SpW64(0) = *(addr+offset);
+ goto nextInsn;
+ }
+
+ case bci_OP_INDEX_ADDR_32:
+ {
+ StgWord32* addr = (StgWord32*) SpW(1);
+ StgWord offset = (StgWord) SpW(0);
+ Sp_addW(1);
+ SpW(0) = (StgWord) *(addr+offset);
+ goto nextInsn;
+ }
+ case bci_OP_INDEX_ADDR_16:
+ {
+ StgWord16* addr = (StgWord16*) SpW(0);
+ SpW(0) = (StgWord) *addr;
+ goto nextInsn;
+ }
+ case bci_OP_INDEX_ADDR_08:
+ {
+ StgWord8* addr = (StgWord8*) SpW(1);
+ StgWord offset = (StgWord) SpW(0);
+ Sp_addW(1);
+ SpW(0) = (StgWord) *(addr+offset);
+ goto nextInsn;
+ }
+
case bci_CCALL: {
void *tok;
W_ stk_offset = BCO_GET_LARGE_ARG;
=====================================
rts/include/rts/Bytecodes.h
=====================================
@@ -209,6 +209,11 @@
#define bci_OP_S_LT_08 228
#define bci_OP_S_LE_08 229
+#define bci_OP_INDEX_ADDR_08 240
+#define bci_OP_INDEX_ADDR_16 241
+#define bci_OP_INDEX_ADDR_32 242
+#define bci_OP_INDEX_ADDR_64 243
+
/* If you need to go past 255 then you will run into the flags */
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/c5e9a83e9bfb7d9b0cfc9406712a7cbcf3d32eec
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/c5e9a83e9bfb7d9b0cfc9406712a7cbcf3d32eec
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20250303/852ca224/attachment-0001.html>
More information about the ghc-commits
mailing list