[Git][ghc/ghc][wip/andreask/interpreter_primops] Add more ops, add support for subword primops
Andreas Klebinger (@AndreasK)
gitlab at gitlab.haskell.org
Sun Feb 23 15:39:02 UTC 2025
Andreas Klebinger pushed to branch wip/andreask/interpreter_primops at Glasgow Haskell Compiler / GHC
Commits:
64939899 by Andreas Klebinger at 2025-02-23T16:16:44+01:00
Add more ops, add support for subword primops
- - - - -
6 changed files:
- compiler/GHC/ByteCode/Asm.hs
- compiler/GHC/ByteCode/Instr.hs
- compiler/GHC/StgToByteCode.hs
- rts/Disassembler.c
- rts/Interpreter.c
- rts/include/rts/Bytecodes.h
Changes:
=====================================
compiler/GHC/ByteCode/Asm.hs
=====================================
@@ -360,6 +360,37 @@ inspectAsm platform long_jumps initial_offset
largeArgInstr :: Word16 -> Word16
largeArgInstr bci = bci_FLAG_LARGE_ARGS .|. bci
+sizedInstr :: Platform -> Word16 -> PrimRep -> Word16
+sizedInstr platform bci rep =
+ bci .|. ((interpreterWidth rep) `shiftL` 13)
+ where
+ -- For operations currently not used or supported by the interpreter.
+ not_supported = panic "sizedInstr: Trying to get width for rep not supported by interpreter"
+ -- For performance reasons 64bit wide operations should always use fixed width operations.
+ err_w64 = panic "sizedInstr: Trying to get width for a W64 rep, these should use fixed rep instructions instead"
+ interpreterWidth rep =
+ case rep of
+ Int8Rep -> 0
+ Int16Rep -> 1
+ Int32Rep -> 2
+ Int64Rep -> err_w64
+ IntRep -> case platformWordSize platform of
+ PW4 -> 2
+ PW8 -> err_w64
+ Word8Rep -> 0
+ Word16Rep -> 1
+ Word32Rep -> 2
+ Word64Rep -> err_w64
+ WordRep -> case platformWordSize platform of
+ PW4 -> 2
+ PW8 -> err_w64
+ AddrRep -> err_w64
+ FloatRep -> not_supported
+ DoubleRep -> not_supported
+ VecRep{} -> not_supported
+ BoxedRep{} -> not_supported
+
+
largeArg :: Platform -> Word64 -> [Word16]
largeArg platform w = case platformWordSize platform of
PW8 -> [fromIntegral (w `shiftR` 48),
@@ -510,10 +541,25 @@ assembleI platform i = case i of
PRIMCALL -> emit bci_PRIMCALL []
OP_ADD -> emit bci_OP_ADD []
+ OP_SUB -> emit bci_OP_SUB []
OP_AND -> emit bci_OP_AND []
OP_XOR -> emit bci_OP_XOR []
OP_NOT -> emit bci_OP_NOT []
+ OP_NEG -> emit bci_OP_NEG []
+ OP_MUL -> emit bci_OP_MUL []
+ OP_SHL -> emit bci_OP_SHL []
+ OP_ASR -> emit bci_OP_ASR []
+ OP_LSR -> emit bci_OP_LSR []
+
OP_NEQ -> emit bci_OP_NEQ []
+ OP_EQ -> emit bci_OP_EQ []
+
+ OP_LT -> emit bci_OP_LT []
+ OP_GE -> emit bci_OP_GE []
+ OP_GT -> emit bci_OP_GT []
+ OP_LE -> emit bci_OP_LE []
+
+ OP_SIZED_SUB rep -> emit (sizedInstr platform bci_OP_SIZED_SUB rep) []
BRK_FUN arr tick_mod tickx info_mod infox cc ->
do p1 <- ptr (BCOPtrBreakArray arr)
=====================================
compiler/GHC/ByteCode/Instr.hs
=====================================
@@ -36,6 +36,7 @@ import GHC.Stack.CCS (CostCentre)
import GHC.Stg.Syntax
import GHCi.BreakArray (BreakArray)
import Language.Haskell.Syntax.Module.Name (ModuleName)
+import GHC.Types.RepType (PrimRep)
-- ----------------------------------------------------------------------------
-- Bytecode instructions
@@ -214,13 +215,29 @@ data BCInstr
| PRIMCALL
+ -- Primops
| OP_ADD
+ | OP_SUB
| OP_AND
| OP_XOR
+ | OP_MUL
+ | OP_SHL
+ | OP_ASR
+ | OP_LSR
+
| OP_NOT
+ | OP_NEG
+
| OP_NEQ
+ | OP_EQ
+
+ | OP_LT
+ | OP_GE
+ | OP_GT
+ | OP_LE
+
+ | OP_SIZED_SUB PrimRep
- -- Primops
-- For doing magic ByteArray passing to foreign calls
| SWIZZLE !WordOff -- to the ptr N words down the stack,
@@ -403,10 +420,25 @@ instance Outputable BCInstr where
ppr PRIMCALL = text "PRIMCALL"
ppr OP_ADD = text "OP_ADD"
+ ppr OP_SUB = text "OP_SUB"
ppr OP_AND = text "OP_AND"
ppr OP_XOR = text "OP_XOR"
ppr OP_NOT = text "OP_NOT"
- ppr OP_NEQ = text "OP_NEQ"
+ ppr OP_NEG = text "OP_NEG"
+ ppr OP_MUL = text "OP_MUL"
+ ppr OP_SHL = text "OP_SHL"
+ ppr OP_ASR = text "OP_ASR"
+ ppr OP_LSR = text "OP_LSR"
+
+ ppr OP_EQ = text "OP_EQ"
+ ppr OP_NEQ = text "OP_NEQ"
+ ppr OP_LT = text "OP_LT"
+ ppr OP_GE = text "OP_GE"
+ ppr OP_GT = text "OP_GT"
+ ppr OP_LE = text "OP_LE"
+
+ ppr (OP_SIZED_SUB rep) = text "OP_SIZED_SUB" <+> (ppr rep)
+
ppr (SWIZZLE stkoff n) = text "SWIZZLE " <+> text "stkoff" <+> ppr stkoff
<+> text "by" <+> ppr n
@@ -517,10 +549,24 @@ bciStackUse RETURN_TUPLE{} = 1 -- pushes stg_ret_t header
bciStackUse CCALL{} = 0
bciStackUse PRIMCALL{} = 1 -- pushes stg_primcall
bciStackUse OP_ADD{} = 0 -- We overestimate, it's -1 actually ...
+bciStackUse OP_SUB{} = 0
bciStackUse OP_AND{} = 0
bciStackUse OP_XOR{} = 0
bciStackUse OP_NOT{} = 0
+bciStackUse OP_NEG{} = 0
+bciStackUse OP_MUL{} = 0
+bciStackUse OP_SHL{} = 0
+bciStackUse OP_ASR{} = 0
+bciStackUse OP_LSR{} = 0
+
bciStackUse OP_NEQ{} = 0
+bciStackUse OP_EQ{} = 0
+bciStackUse OP_LT{} = 0
+bciStackUse OP_GT{} = 0
+bciStackUse OP_LE{} = 0
+bciStackUse OP_GE{} = 0
+
+bciStackUse OP_SIZED_SUB{} = 0
bciStackUse SWIZZLE{} = 0
bciStackUse BRK_FUN{} = 0
=====================================
compiler/GHC/StgToByteCode.hs
=====================================
@@ -838,28 +838,81 @@ doPrimOp :: PrimOp
-> Maybe (BcM BCInstrList)
doPrimOp op init_d s p args =
case op of
- -- TODO: No idea if the argument order here is correct.
- -- But it doesn't matter for the current set of primops.
+ -- TODO: IntAddOp and friends are only 64bit on 64bit platforms
IntAddOp -> primOp OP_ADD
+ Int64AddOp -> primOp OP_ADD
WordAddOp -> primOp OP_ADD
+ Word64AddOp -> primOp OP_ADD
+
+ IntSubOp -> primOp OP_SUB
+ WordSubOp -> primOp OP_SUB
+ Int64SubOp -> primOp OP_SUB
+ Word64SubOp -> primOp OP_SUB
+
+ Int8SubOp -> primOp (OP_SIZED_SUB primArg1Width)
+ Word8SubOp -> primOp (OP_SIZED_SUB primArg1Width)
+ Int16SubOp -> primOp (OP_SIZED_SUB primArg1Width)
+ Word16SubOp -> primOp (OP_SIZED_SUB primArg1Width)
+ Int32SubOp -> primOp (OP_SIZED_SUB primArg1Width)
+ Word32SubOp -> primOp (OP_SIZED_SUB primArg1Width)
IntAndOp -> primOp OP_AND
WordAndOp -> primOp OP_AND
+ Word64AndOp -> primOp OP_AND
IntNotOp -> primOp OP_NOT
WordNotOp -> primOp OP_NOT
+ Word64NotOp -> primOp OP_NOT
IntXorOp -> primOp OP_XOR
WordXorOp -> primOp OP_XOR
+ Word64XorOp -> primOp OP_XOR
IntNeOp -> primOp OP_NEQ
WordNeOp -> primOp OP_NEQ
-
- IntToWordOp -> no_op
- WordToIntOp -> no_op
+ Word64NeOp -> primOp OP_NEQ
+
+ IntEqOp -> primOp OP_EQ
+ WordEqOp -> primOp OP_EQ
+ Word64EqOp -> primOp OP_EQ
+
+ IntLtOp -> primOp OP_LT
+ WordLtOp -> primOp OP_LT
+ Word64LtOp -> primOp OP_LT
+
+ IntGeOp -> primOp OP_GE
+ WordGeOp -> primOp OP_GE
+ Word64GeOp -> primOp OP_GE
+
+ IntGtOp -> primOp OP_GT
+ WordGtOp -> primOp OP_GT
+ Word64GtOp -> primOp OP_GT
+
+ IntLeOp -> primOp OP_LE
+ WordLeOp -> primOp OP_LE
+ Word64LeOp -> primOp OP_LE
+
+ IntNegOp -> primOp OP_NEG
+ Int64NegOp -> primOp OP_NEG
+
+ IntToWordOp -> no_op
+ WordToIntOp -> no_op
+ Int8ToWord8Op -> no_op
+ Word8ToInt8Op -> no_op
+ Int16ToWord16Op -> no_op
+ Word16ToInt16Op -> no_op
+ Int32ToWord32Op -> no_op
+ Word32ToInt32Op -> no_op
+ Int64ToWord64Op -> no_op
+ Word64ToInt64Op -> no_op
+ IntToAddrOp -> no_op
+ AddrToIntOp -> no_op
+ ChrOp -> no_op -- Int# and Char# are rep'd the same
+ OrdOp -> no_op
_ -> Nothing
where
+ primArg1Width = (stgArgRepU $ head args) :: PrimRep
-- Push args, execute primop, slide, return_N
primOp op_inst = Just $ do
platform <- profilePlatform <$> getProfile
=====================================
rts/Disassembler.c
=====================================
@@ -62,6 +62,7 @@ disInstr ( StgBCO *bco, int pc )
#error Cannot cope with WORD_SIZE_IN_BITS being nether 32 nor 64
#endif
#define BCO_GET_LARGE_ARG ((instr & bci_FLAG_LARGE_ARGS) ? BCO_READ_NEXT_WORD : BCO_NEXT)
+#define BCO_GET_BCI_WIDTH(bci) ((bci & bci_FLAG_WIDTH) >> 13)
switch (instr & 0xff) {
case bci_BRK_FUN:
@@ -462,22 +463,57 @@ disInstr ( StgBCO *bco, int pc )
case bci_OP_ADD:
debugBelch("OP_ADD\n");
break;
-
+ case bci_OP_SUB:
+ debugBelch("OP_SUB\n");
+ break;
case bci_OP_AND:
debugBelch("OP_AND\n");
break;
-
case bci_OP_XOR:
debugBelch("OP_XOR\n");
break;
-
case bci_OP_NOT:
debugBelch("OP_NOT\n");
break;
+ case bci_OP_NEG:
+ debugBelch("OP_NEG\n");
+ break;
+ case bci_OP_MUL:
+ debugBelch("OP_MUL\n");
+ break;
+ case bci_OP_SHL:
+ debugBelch("OP_SHL\n");
+ break;
+ case bci_OP_ASR:
+ debugBelch("OP_ASR\n");
+ break;
+ case bci_OP_LSR:
+ debugBelch("OP_LSR\n");
+ break;
case bci_OP_NEQ:
debugBelch("OP_NEQ\n");
break;
+ case bci_OP_EQ:
+ debugBelch("OP_EQ\n");
+ break;
+
+ case bci_OP_GT:
+ debugBelch("OP_GT\n");
+ break;
+ case bci_OP_LE:
+ debugBelch("OP_LE\n");
+ break;
+ case bci_OP_GE:
+ debugBelch("OP_GE\n");
+ break;
+ case bci_OP_LT:
+ debugBelch("OP_LT\n");
+ break;
+
+ case bci_OP_SIZED_SUB:
+ debugBelch("OP_SIZED_SUB_%d\n",BCO_GET_BCI_WIDTH(instr));
+ break;
default:
=====================================
rts/Interpreter.c
=====================================
@@ -95,6 +95,7 @@ tag functions as tag inference currently doesn't rely on those being properly ta
#error Cannot cope with WORD_SIZE_IN_BITS being nether 32 nor 64
#endif
#define BCO_GET_LARGE_ARG ((bci & bci_FLAG_LARGE_ARGS) ? BCO_READ_NEXT_WORD : BCO_NEXT)
+#define BCO_GET_BCI_WIDTH(bci) ((bci & bci_FLAG_WIDTH) >> 13)
#define BCO_PTR(n) (W_)ptrs[n]
#define BCO_LIT(n) literals[n]
@@ -161,16 +162,46 @@ tag functions as tag inference currently doesn't rely on those being properly ta
#define Sp_minusB(n) ((void *)((StgWord8*)Sp - (ptrdiff_t)(n)))
#define Sp_plusW(n) (Sp_plusB((ptrdiff_t)(n) * (ptrdiff_t)sizeof(W_)))
+#define Sp_plusW64(n) (Sp_plusB((ptrdiff_t)(n) * (ptrdiff_t)sizeof(StgWord64)))
#define Sp_minusW(n) (Sp_minusB((ptrdiff_t)(n) * (ptrdiff_t)sizeof(W_)))
#define Sp_addB(n) (Sp = Sp_plusB(n))
#define Sp_subB(n) (Sp = Sp_minusB(n))
#define Sp_addW(n) (Sp = Sp_plusW(n))
+#define Sp_addW64(n) (Sp = Sp_plusW64(n))
#define Sp_subW(n) (Sp = Sp_minusW(n))
#define SpW(n) (*(StgWord*)(Sp_plusW(n)))
+#define Sp_T(n,ty) (*(ty*)(Sp_plusB(sizeof(ty))))
+#define SpW64(n) (*(StgWord64*)(Sp_plusW64(n)))
#define SpB(n) (*(StgWord*)(Sp_plusB(n)))
+
+/* Note [Interpreter subword primops]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+In general the interpreter stack is host-platform word aligned.
+However we make two exceptions:
+
+When allocating a constructor we push subwords on the stack, which are
+cleaned up by the PACK instruction afterwards.
+
+Similarly when pushing arguments for subword primops we take the liberty
+of pushing those arguments in their actual size, and pop them in the actual
+primop implementation.
+
+For the subword operations the operation will push the result to the
+stack zero-extended to platform word size.
+
+*/
+
+#define BIN_OP(op,ty) \
+ { \
+ (ty) r = (ty) Sp_T(0,ty) op (ty) *(ty*)(Sp_plusB(off+2)); \
+ SpW64(1) = r; \
+ Sp_addB(sizeof(ty)); \
+ goto nextInsn; \
+ } \
+
STATIC_INLINE StgPtr
allocate_NONUPD (Capability *cap, int n_words)
{
@@ -1113,9 +1144,9 @@ run_BCO:
#endif
bci = BCO_NEXT;
- /* We use the high 8 bits for flags, only the highest of which is
- * currently allocated */
- ASSERT((bci & 0xFF00) == (bci & 0x8000));
+ /* We use the high 8 bits for flags. The highest three of which are
+ * currently allocated to LARGE_ARGS and WIDTH */
+ ASSERT((bci & 0xFF00) == (bci & ( bci_FLAG_LARGE_ARGS | bci_FLAG_WIDTH )));
switch (bci & 0xFF) {
@@ -2110,38 +2141,61 @@ run_BCO:
RETURN_TO_SCHEDULER_NO_PAUSE(ThreadRunGHC, ThreadYielding);
}
- case bci_OP_ADD: {
- StgWord r = (StgWord) SpW(0) + (StgWord) SpW(1);
- SpW(1) = (W_)r;
- Sp_addW(1);
- goto nextInsn;
- }
-
- case bci_OP_AND: {
- StgWord r = (StgWord) SpW(0) & (StgWord) SpW(1);
- SpW(1) = (W_)r;
- Sp_addW(1);
- goto nextInsn;
- }
-
- case bci_OP_NOT: {
- StgWord r = ~ (StgWord) SpW(0);
- SpW(0) = (W_)r;
- goto nextInsn;
- }
-
- case bci_OP_XOR: {
- StgWord r = (StgWord) SpW(0) ^ (StgWord) SpW(1);
- SpW(1) = (W_)r;
- Sp_addW(1);
- goto nextInsn;
- }
+#define UN_SIZED_OP(op,ty) \
+ { \
+ ty r = op (*(ty*) Sp_plusB(sizeof(ty))); \
+ if(sizeof(ty) > sizeof(StgWord)) { \
+ /* 64bit op on 32bit platforms */ \
+ SpW64(0) = (StgWord64) r; \
+ } else { \
+ SpW(0) = (StgWord) r; \
+ } \
+ goto nextInsn; \
+ }
- case bci_OP_NEQ: {
- StgWord r = (StgWord) SpW(0) != (StgWord) SpW(1);
- SpW(1) = (W_)r;
- Sp_addW(1);
- goto nextInsn;
+#define SIZED_BIN_OP(op,ty) \
+ { \
+ ty r = (*(ty*) Sp_plusB(0)) - (*(ty*) Sp_plusB(sizeof(ty))); \
+ /* If the two arguments didn't fit in a single word we have to clean up the stack.*/ \
+ if(sizeof(ty)*2 > sizeof(StgWord)) { \
+ Sp_addW(sizeof(ty)*2/sizeof(StgWord) - 1); /*One word accounts for result*/ \
+ } \
+ SpW(0) = (StgWord) r; \
+ goto nextInsn; \
+ }
+
+#define UN_INT64_OP(op) UN_SIZED_OP(op,StgInt64)
+#define BIN_INT64_OP(op) SIZED_BIN_OP(op,StgInt64)
+#define BIN_WORD64_OP(op) SIZED_BIN_OP(op,StgWord64)
+
+ case bci_OP_ADD: BIN_INT64_OP(+)
+ case bci_OP_SUB: BIN_INT64_OP(-)
+ case bci_OP_AND: BIN_INT64_OP(&)
+ case bci_OP_XOR: BIN_INT64_OP(^)
+ case bci_OP_MUL: BIN_INT64_OP(^)
+ case bci_OP_SHL: BIN_WORD64_OP(<<)
+ case bci_OP_LSR: BIN_WORD64_OP(>>)
+ case bci_OP_ASR: BIN_INT64_OP(>>)
+
+ case bci_OP_NEQ: BIN_INT64_OP(!=)
+ case bci_OP_EQ: BIN_INT64_OP(!=)
+ case bci_OP_GT: BIN_INT64_OP(>)
+ case bci_OP_GE: BIN_INT64_OP(>=)
+ case bci_OP_LT: BIN_INT64_OP(<)
+ case bci_OP_LE: BIN_INT64_OP(<=)
+
+ case bci_OP_NOT: UN_INT64_OP(~)
+ case bci_OP_NEG: UN_INT64_OP(-)
+
+ case bci_OP_SIZED_SUB:
+ {
+ StgWord width = BCO_GET_BCI_WIDTH(bci);
+ switch (width) {
+ case 0: SIZED_BIN_OP(-,StgInt8 )
+ case 1: SIZED_BIN_OP(-,StgInt16)
+ case 2: SIZED_BIN_OP(-,StgInt32)
+ default: barf("Unexpected bci width.");
+ };
}
case bci_CCALL: {
=====================================
rts/include/rts/Bytecodes.h
=====================================
@@ -115,16 +115,37 @@
#define bci_BCO_NAME 88
#define bci_OP_ADD 89
-#define bci_OP_AND 90
-#define bci_OP_XOR 91
-#define bci_OP_NOT 92
-#define bci_OP_NEQ 93
+#define bci_OP_SUB 90
+#define bci_OP_AND 91
+#define bci_OP_XOR 92
+#define bci_OP_NOT 93
+#define bci_OP_NEG 94
+#define bci_OP_MUL 95
+#define bci_OP_SHL 96
+#define bci_OP_ASR 97
+#define bci_OP_LSR 98
+
+#define bci_OP_NEQ 110
+#define bci_OP_EQ 111
+#define bci_OP_GE 112
+#define bci_OP_GT 113
+#define bci_OP_LT 114
+#define bci_OP_LE 115
+
+#define bci_OP_SIZED_SUB 130
+
/* If you need to go past 255 then you will run into the flags */
/* If you need to go below 0x0100 then you will run into the instructions */
#define bci_FLAG_LARGE_ARGS 0x8000
+/* Width of primitiv operations if width-polymorphic. We use two bits to store
+ * the width.
+ * 0->Word8;1->Word16;2->Word32
+ * Word64 operations always should take the OP with fixed width instead. */
+#define bci_FLAG_WIDTH 0x6000
+
/* If a BCO definitely requires less than this many words of stack,
don't include an explicit STKCHECK insn in it. The interpreter
will check for this many words of stack before running each BCO,
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/64939899a0042662d1059953fa61a33377674d8f
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/64939899a0042662d1059953fa61a33377674d8f
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20250223/65b67294/attachment-0001.html>
More information about the ghc-commits
mailing list