[Git][ghc/ghc][wip/andreask/interpreter_primops] Initial PoC for primop support in the Interpreter.
Andreas Klebinger (@AndreasK)
gitlab at gitlab.haskell.org
Wed Feb 19 09:33:39 UTC 2025
Andreas Klebinger pushed to branch wip/andreask/interpreter_primops at Glasgow Haskell Compiler / GHC
Commits:
df472bd3 by Andreas Klebinger at 2025-02-19T10:10:55+01:00
Initial PoC for primop support in the Interpreter.
This commit adds support for a very small number of primops directly
to the interpreter. Code decently heavy on those primops like code
involving IntSet runs about 25% faster with optimized core and these
changes.
For core without breakpoints this can go up to close to 50%.
- - - - -
6 changed files:
- compiler/GHC/ByteCode/Asm.hs
- compiler/GHC/ByteCode/Instr.hs
- compiler/GHC/StgToByteCode.hs
- rts/Disassembler.c
- rts/Interpreter.c
- rts/include/rts/Bytecodes.h
Changes:
=====================================
compiler/GHC/ByteCode/Asm.hs
=====================================
@@ -508,6 +508,13 @@ assembleI platform i = case i of
CCALL off m_addr i -> do np <- addr m_addr
emit bci_CCALL [wOp off, Op np, SmallOp i]
PRIMCALL -> emit bci_PRIMCALL []
+
+ OP_ADD -> emit bci_OP_ADD []
+ OP_AND -> emit bci_OP_AND []
+ OP_XOR -> emit bci_OP_XOR []
+ OP_NOT -> emit bci_OP_NOT []
+ OP_NEQ -> emit bci_OP_NEQ []
+
BRK_FUN arr tick_mod tickx info_mod infox cc ->
do p1 <- ptr (BCOPtrBreakArray arr)
tick_addr <- addr tick_mod
=====================================
compiler/GHC/ByteCode/Instr.hs
=====================================
@@ -214,6 +214,14 @@ data BCInstr
| PRIMCALL
+ | OP_ADD
+ | OP_AND
+ | OP_XOR
+ | OP_NOT
+ | OP_NEQ
+
+ -- Primops
+
-- For doing magic ByteArray passing to foreign calls
| SWIZZLE !WordOff -- to the ptr N words down the stack,
!Int -- add M
@@ -393,6 +401,13 @@ instance Outputable BCInstr where
0x2 -> text "(unsafe)"
_ -> empty)
ppr PRIMCALL = text "PRIMCALL"
+
+ ppr OP_ADD = text "OP_ADD"
+ ppr OP_AND = text "OP_AND"
+ ppr OP_XOR = text "OP_XOR"
+ ppr OP_NOT = text "OP_NOT"
+ ppr OP_NEQ = text "OP_NEQ"
+
ppr (SWIZZLE stkoff n) = text "SWIZZLE " <+> text "stkoff" <+> ppr stkoff
<+> text "by" <+> ppr n
ppr ENTER = text "ENTER"
@@ -501,6 +516,12 @@ bciStackUse RETURN{} = 1 -- pushes stg_ret_X for some X
bciStackUse RETURN_TUPLE{} = 1 -- pushes stg_ret_t header
bciStackUse CCALL{} = 0
bciStackUse PRIMCALL{} = 1 -- pushes stg_primcall
+bciStackUse OP_ADD{} = 0 -- We overestimate, it's -1 actually ...
+bciStackUse OP_AND{} = 0
+bciStackUse OP_XOR{} = 0
+bciStackUse OP_NOT{} = 0
+bciStackUse OP_NEQ{} = 0
+
bciStackUse SWIZZLE{} = 0
bciStackUse BRK_FUN{} = 0
=====================================
compiler/GHC/StgToByteCode.hs
=====================================
@@ -734,7 +734,8 @@ schemeT d s p (StgOpApp (StgFCallOp (CCall ccall_spec) _ty) args result_ty)
else unsupportedCConvException
schemeT d s p (StgOpApp (StgPrimOp op) args _ty)
- = doTailCall d s p (primOpId op) (reverse args)
+ | Just prim_code <- doPrimOp op d s p args = prim_code
+ | otherwise = doTailCall d s p (primOpId op) (reverse args)
schemeT d s p (StgOpApp (StgPrimCallOp (PrimCall label unit)) args result_ty)
= generatePrimCall d s p label (Just unit) result_ty args
@@ -829,6 +830,114 @@ doTailCall init_d s p fn args = do
(final_d, more_push_code) <- push_seq (d + sz) args
return (final_d, push_code `appOL` more_push_code)
+doPrimOp :: PrimOp
+ -> StackDepth
+ -> Sequel
+ -> BCEnv
+ -> [StgArg]
+ -> Maybe (BcM BCInstrList)
+doPrimOp op init_d s p args =
+ case op of
+ -- TODO: No idea if the argument order here is correct.
+ -- But it doesn't matter for the current set of primops.
+ IntAddOp -> primOp OP_ADD
+ WordAddOp -> primOp OP_ADD
+
+ IntAndOp -> primOp OP_AND
+ WordAndOp -> primOp OP_AND
+
+ IntNotOp -> primOp OP_NOT
+ WordNotOp -> primOp OP_NOT
+
+ IntXorOp -> primOp OP_XOR
+ WordXorOp -> primOp OP_XOR
+
+ IntNeOp -> primOp OP_NEQ
+ WordNeOp -> primOp OP_NEQ
+
+ IntToWordOp -> no_op
+ WordToIntOp -> no_op
+
+ _ -> Nothing
+ where
+ -- Push args, execute primop, slide, return_N
+ primOp op_inst = Just $ do
+ platform <- profilePlatform <$> getProfile
+ prim_code <- mkPrimOpCode init_d s p op_inst args
+ let slide = mkSlideW 1 (bytesToWords platform $ init_d - s) `snocOL` RETURN N
+ return $ prim_code `appOL` slide
+
+ no_op = Just $ do
+ platform <- profilePlatform <$> getProfile
+ prim_code <- terribleNoOp init_d s p undefined args
+ let slide = mkSlideW 1 (bytesToWords platform $ init_d - s) `snocOL` RETURN N
+ return $ prim_code `appOL` slide
+
+-- It's horrible, but still better than calling intToWord ...
+terribleNoOp
+ :: StackDepth
+ -> Sequel
+ -> BCEnv
+ -> BCInstr -- The operator
+ -> [StgArg] -- Args, in *reverse* order (must be fully applied)
+ -> BcM BCInstrList
+terribleNoOp orig_d _ p op_inst args = app_code
+ where
+ app_code = do
+ profile <- getProfile
+ let platform = profilePlatform profile
+
+ non_voids =
+ addArgReps (assertNonVoidStgArgs args)
+ (_, _, args_offsets) =
+ mkVirtHeapOffsetsWithPadding profile StdHeader non_voids
+
+ do_pushery !d (arg : args) = do
+ (push, arg_bytes) <- case arg of
+ (Padding l _) -> return $! pushPadding (ByteOff l)
+ (FieldOff a _) -> pushConstrAtom d p (fromNonVoid a)
+ more_push_code <- do_pushery (d + arg_bytes) args
+ return (push `appOL` more_push_code)
+ do_pushery !d [] = do
+ -- let !n_arg_words = bytesToWords platform (d - orig_d)
+ return (nilOL)
+
+ -- Push on the stack in the reverse order.
+ do_pushery orig_d (reverse args_offsets)
+
+-- Push the arguments on the stack and emit the given instruction
+mkPrimOpCode
+ :: StackDepth
+ -> Sequel
+ -> BCEnv
+ -> BCInstr -- The operator
+ -> [StgArg] -- Args, in *reverse* order (must be fully applied)
+ -> BcM BCInstrList
+mkPrimOpCode orig_d _ p op_inst args = app_code
+ where
+ app_code = do
+ profile <- getProfile
+ let platform = profilePlatform profile
+
+ non_voids =
+ addArgReps (assertNonVoidStgArgs args)
+ (_, _, args_offsets) =
+ mkVirtHeapOffsetsWithPadding profile StdHeader non_voids
+
+ do_pushery !d (arg : args) = do
+ (push, arg_bytes) <- case arg of
+ (Padding l _) -> return $! pushPadding (ByteOff l)
+ (FieldOff a _) -> pushConstrAtom d p (fromNonVoid a)
+ more_push_code <- do_pushery (d + arg_bytes) args
+ return (push `appOL` more_push_code)
+ do_pushery !d [] = do
+ -- let !n_arg_words = bytesToWords platform (d - orig_d)
+ return (unitOL op_inst)
+
+ -- Push on the stack in the reverse order.
+ do_pushery orig_d (reverse args_offsets)
+
+
-- v. similar to CgStackery.findMatch, ToDo: merge
findPushSeq :: [ArgRep] -> (BCInstr, Int, [ArgRep])
findPushSeq (P: P: P: P: P: P: rest)
=====================================
rts/Disassembler.c
=====================================
@@ -459,6 +459,27 @@ disInstr ( StgBCO *bco, int pc )
break;
}
+ case bci_OP_ADD:
+ debugBelch("OP_ADD\n");
+ break;
+
+ case bci_OP_AND:
+ debugBelch("OP_AND\n");
+ break;
+
+ case bci_OP_XOR:
+ debugBelch("OP_XOR\n");
+ break;
+
+ case bci_OP_NOT:
+ debugBelch("OP_NOT\n");
+ break;
+
+ case bci_OP_NEQ:
+ debugBelch("OP_NEQ\n");
+ break;
+
+
default:
barf("disInstr: unknown opcode %u", (unsigned int) instr);
}
=====================================
rts/Interpreter.c
=====================================
@@ -2110,6 +2110,40 @@ run_BCO:
RETURN_TO_SCHEDULER_NO_PAUSE(ThreadRunGHC, ThreadYielding);
}
+ case bci_OP_ADD: {
+ StgWord r = (StgWord) SpW(0) + (StgWord) SpW(1);
+ SpW(1) = (W_)r;
+ Sp_addW(1);
+ goto nextInsn;
+ }
+
+ case bci_OP_AND: {
+ StgWord r = (StgWord) SpW(0) & (StgWord) SpW(1);
+ SpW(1) = (W_)r;
+ Sp_addW(1);
+ goto nextInsn;
+ }
+
+ case bci_OP_NOT: {
+ StgWord r = ~ (StgWord) SpW(0);
+ SpW(0) = (W_)r;
+ goto nextInsn;
+ }
+
+ case bci_OP_XOR: {
+ StgWord r = (StgWord) SpW(0) ^ (StgWord) SpW(1);
+ SpW(1) = (W_)r;
+ Sp_addW(1);
+ goto nextInsn;
+ }
+
+ case bci_OP_NEQ: {
+ StgWord r = (StgWord) SpW(0) != (StgWord) SpW(1);
+ SpW(1) = (W_)r;
+ Sp_addW(1);
+ goto nextInsn;
+ }
+
case bci_CCALL: {
void *tok;
W_ stk_offset = BCO_GET_LARGE_ARG;
=====================================
rts/include/rts/Bytecodes.h
=====================================
@@ -114,6 +114,12 @@
#define bci_BCO_NAME 88
+#define bci_OP_ADD 89
+#define bci_OP_AND 90
+#define bci_OP_XOR 91
+#define bci_OP_NOT 92
+#define bci_OP_NEQ 93
+
/* If you need to go past 255 then you will run into the flags */
/* If you need to go below 0x0100 then you will run into the instructions */
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/df472bd3e029d1c11530144f442ff72071c8348a
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/df472bd3e029d1c11530144f442ff72071c8348a
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20250219/296d12f6/attachment-0001.html>
More information about the ghc-commits
mailing list