[Git][ghc/ghc][wip/andreask/interpreter_primops] Initial PoC for primop support in the Interpreter.

Andreas Klebinger (@AndreasK) gitlab at gitlab.haskell.org
Wed Feb 19 09:33:39 UTC 2025



Andreas Klebinger pushed to branch wip/andreask/interpreter_primops at Glasgow Haskell Compiler / GHC


Commits:
df472bd3 by Andreas Klebinger at 2025-02-19T10:10:55+01:00
Initial PoC for primop support in the Interpreter.

This commit adds support for a very small number of primops directly
to the interpreter. Code decently heavy on those primops like code
involving IntSet runs about 25% faster with optimized core and these
changes.

For core without breakpoints this can go up to close to 50%.

- - - - -


6 changed files:

- compiler/GHC/ByteCode/Asm.hs
- compiler/GHC/ByteCode/Instr.hs
- compiler/GHC/StgToByteCode.hs
- rts/Disassembler.c
- rts/Interpreter.c
- rts/include/rts/Bytecodes.h


Changes:

=====================================
compiler/GHC/ByteCode/Asm.hs
=====================================
@@ -508,6 +508,13 @@ assembleI platform i = case i of
   CCALL off m_addr i       -> do np <- addr m_addr
                                  emit bci_CCALL [wOp off, Op np, SmallOp i]
   PRIMCALL                 -> emit bci_PRIMCALL []
+
+  OP_ADD                   -> emit bci_OP_ADD []
+  OP_AND                   -> emit bci_OP_AND []
+  OP_XOR                   -> emit bci_OP_XOR []
+  OP_NOT                   -> emit bci_OP_NOT []
+  OP_NEQ                   -> emit bci_OP_NEQ []
+
   BRK_FUN arr tick_mod tickx info_mod infox cc ->
                               do p1 <- ptr (BCOPtrBreakArray arr)
                                  tick_addr <- addr tick_mod


=====================================
compiler/GHC/ByteCode/Instr.hs
=====================================
@@ -214,6 +214,14 @@ data BCInstr
 
    | PRIMCALL
 
+   | OP_ADD
+   | OP_AND
+   | OP_XOR
+   | OP_NOT
+   | OP_NEQ
+
+   -- Primops
+
    -- For doing magic ByteArray passing to foreign calls
    | SWIZZLE          !WordOff -- to the ptr N words down the stack,
                       !Int     -- add M
@@ -393,6 +401,13 @@ instance Outputable BCInstr where
                                                       0x2 -> text "(unsafe)"
                                                       _   -> empty)
    ppr PRIMCALL              = text "PRIMCALL"
+
+   ppr OP_ADD                = text "OP_ADD"
+   ppr OP_AND                = text "OP_AND"
+   ppr OP_XOR                = text "OP_XOR"
+   ppr OP_NOT                = text "OP_NOT"
+   ppr OP_NEQ                = text "OP_NEQ"
+
    ppr (SWIZZLE stkoff n)    = text "SWIZZLE " <+> text "stkoff" <+> ppr stkoff
                                                <+> text "by" <+> ppr n
    ppr ENTER                 = text "ENTER"
@@ -501,6 +516,12 @@ bciStackUse RETURN{}              = 1 -- pushes stg_ret_X for some X
 bciStackUse RETURN_TUPLE{}        = 1 -- pushes stg_ret_t header
 bciStackUse CCALL{}               = 0
 bciStackUse PRIMCALL{}            = 1 -- pushes stg_primcall
+bciStackUse OP_ADD{}              = 0 -- We overestimate, it's -1 actually ...
+bciStackUse OP_AND{}              = 0
+bciStackUse OP_XOR{}              = 0
+bciStackUse OP_NOT{}              = 0
+bciStackUse OP_NEQ{}              = 0
+
 bciStackUse SWIZZLE{}             = 0
 bciStackUse BRK_FUN{}             = 0
 


=====================================
compiler/GHC/StgToByteCode.hs
=====================================
@@ -734,7 +734,8 @@ schemeT d s p (StgOpApp (StgFCallOp (CCall ccall_spec) _ty) args result_ty)
       else unsupportedCConvException
 
 schemeT d s p (StgOpApp (StgPrimOp op) args _ty)
-   = doTailCall d s p (primOpId op) (reverse args)
+  | Just prim_code <- doPrimOp op d s p args = prim_code
+  | otherwise = doTailCall d s p (primOpId op) (reverse args)
 
 schemeT d s p (StgOpApp (StgPrimCallOp (PrimCall label unit)) args result_ty)
    = generatePrimCall d s p label (Just unit) result_ty args
@@ -829,6 +830,114 @@ doTailCall init_d s p fn args = do
     (final_d, more_push_code) <- push_seq (d + sz) args
     return (final_d, push_code `appOL` more_push_code)
 
+doPrimOp  :: PrimOp
+          -> StackDepth
+          -> Sequel
+          -> BCEnv
+          -> [StgArg]
+          -> Maybe (BcM BCInstrList)
+doPrimOp op init_d s p args =
+  case op of
+    -- TODO: No idea if the argument order here is correct.
+    -- But it doesn't matter for the current set of primops.
+    IntAddOp -> primOp OP_ADD
+    WordAddOp -> primOp OP_ADD
+
+    IntAndOp -> primOp OP_AND
+    WordAndOp -> primOp OP_AND
+
+    IntNotOp -> primOp OP_NOT
+    WordNotOp -> primOp OP_NOT
+
+    IntXorOp -> primOp OP_XOR
+    WordXorOp -> primOp OP_XOR
+
+    IntNeOp -> primOp OP_NEQ
+    WordNeOp -> primOp OP_NEQ
+
+    IntToWordOp -> no_op
+    WordToIntOp -> no_op
+
+    _ -> Nothing
+    where
+      -- Push args, execute primop, slide, return_N
+      primOp op_inst = Just $ do
+        platform <- profilePlatform <$> getProfile
+        prim_code <- mkPrimOpCode init_d s p op_inst args
+        let slide = mkSlideW 1 (bytesToWords platform $ init_d - s) `snocOL` RETURN N
+        return $ prim_code `appOL` slide
+
+      no_op = Just $ do
+        platform <- profilePlatform <$> getProfile
+        prim_code <- terribleNoOp init_d s p undefined args
+        let slide = mkSlideW 1 (bytesToWords platform $ init_d - s) `snocOL` RETURN N
+        return $ prim_code `appOL` slide
+
+-- It's horrible, but still better than calling intToWord ...
+terribleNoOp
+    :: StackDepth
+    -> Sequel
+    -> BCEnv
+    -> BCInstr                  -- The operator
+    -> [StgArg]                 -- Args, in *reverse* order (must be fully applied)
+    -> BcM BCInstrList
+terribleNoOp orig_d _ p op_inst args = app_code
+  where
+    app_code = do
+        profile <- getProfile
+        let platform = profilePlatform profile
+
+            non_voids =
+                addArgReps (assertNonVoidStgArgs args)
+            (_, _, args_offsets) =
+                mkVirtHeapOffsetsWithPadding profile StdHeader non_voids
+
+            do_pushery !d (arg : args) = do
+                (push, arg_bytes) <- case arg of
+                    (Padding l _) -> return $! pushPadding (ByteOff l)
+                    (FieldOff a _) -> pushConstrAtom d p (fromNonVoid a)
+                more_push_code <- do_pushery (d + arg_bytes) args
+                return (push `appOL` more_push_code)
+            do_pushery !d [] = do
+                -- let !n_arg_words = bytesToWords platform (d - orig_d)
+                return (nilOL)
+
+        -- Push on the stack in the reverse order.
+        do_pushery orig_d (reverse args_offsets)
+
+-- Push the arguments on the stack and emit the given instruction
+mkPrimOpCode
+    :: StackDepth
+    -> Sequel
+    -> BCEnv
+    -> BCInstr                  -- The operator
+    -> [StgArg]                 -- Args, in *reverse* order (must be fully applied)
+    -> BcM BCInstrList
+mkPrimOpCode orig_d _ p op_inst args = app_code
+  where
+    app_code = do
+        profile <- getProfile
+        let platform = profilePlatform profile
+
+            non_voids =
+                addArgReps (assertNonVoidStgArgs args)
+            (_, _, args_offsets) =
+                mkVirtHeapOffsetsWithPadding profile StdHeader non_voids
+
+            do_pushery !d (arg : args) = do
+                (push, arg_bytes) <- case arg of
+                    (Padding l _) -> return $! pushPadding (ByteOff l)
+                    (FieldOff a _) -> pushConstrAtom d p (fromNonVoid a)
+                more_push_code <- do_pushery (d + arg_bytes) args
+                return (push `appOL` more_push_code)
+            do_pushery !d [] = do
+                -- let !n_arg_words = bytesToWords platform (d - orig_d)
+                return (unitOL op_inst)
+
+        -- Push on the stack in the reverse order.
+        do_pushery orig_d (reverse args_offsets)
+
+
 -- v. similar to CgStackery.findMatch, ToDo: merge
 findPushSeq :: [ArgRep] -> (BCInstr, Int, [ArgRep])
 findPushSeq (P: P: P: P: P: P: rest)


=====================================
rts/Disassembler.c
=====================================
@@ -459,6 +459,27 @@ disInstr ( StgBCO *bco, int pc )
          break;
       }
 
+      case bci_OP_ADD:
+         debugBelch("OP_ADD\n");
+         break;
+
+      case bci_OP_AND:
+         debugBelch("OP_AND\n");
+         break;
+
+      case bci_OP_XOR:
+         debugBelch("OP_XOR\n");
+         break;
+
+      case bci_OP_NOT:
+         debugBelch("OP_NOT\n");
+         break;
+
+      case bci_OP_NEQ:
+         debugBelch("OP_NEQ\n");
+         break;
+
+
       default:
          barf("disInstr: unknown opcode %u", (unsigned int) instr);
    }


=====================================
rts/Interpreter.c
=====================================
@@ -2110,6 +2110,40 @@ run_BCO:
             RETURN_TO_SCHEDULER_NO_PAUSE(ThreadRunGHC, ThreadYielding);
         }
 
+        case bci_OP_ADD: {
+            StgWord r = (StgWord) SpW(0) + (StgWord) SpW(1);
+            SpW(1) = (W_)r;
+            Sp_addW(1);
+            goto nextInsn;
+        }
+
+        case bci_OP_AND: {
+            StgWord r = (StgWord) SpW(0) & (StgWord) SpW(1);
+            SpW(1) = (W_)r;
+            Sp_addW(1);
+            goto nextInsn;
+        }
+
+        case bci_OP_NOT: {
+            StgWord r = ~ (StgWord) SpW(0);
+            SpW(0) = (W_)r;
+            goto nextInsn;
+        }
+
+        case bci_OP_XOR: {
+            StgWord r = (StgWord) SpW(0) ^ (StgWord) SpW(1);
+            SpW(1) = (W_)r;
+            Sp_addW(1);
+            goto nextInsn;
+        }
+
+        case bci_OP_NEQ: {
+            StgWord r = (StgWord) SpW(0) != (StgWord) SpW(1);
+            SpW(1) = (W_)r;
+            Sp_addW(1);
+            goto nextInsn;
+        }
+
         case bci_CCALL: {
             void *tok;
             W_ stk_offset             = BCO_GET_LARGE_ARG;


=====================================
rts/include/rts/Bytecodes.h
=====================================
@@ -114,6 +114,12 @@
 
 #define bci_BCO_NAME                    88
 
+#define bci_OP_ADD                      89
+#define bci_OP_AND                      90
+#define bci_OP_XOR                      91
+#define bci_OP_NOT                      92
+#define bci_OP_NEQ                      93
+
 /* If you need to go past 255 then you will run into the flags */
 
 /* If you need to go below 0x0100 then you will run into the instructions */



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/df472bd3e029d1c11530144f442ff72071c8348a

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/df472bd3e029d1c11530144f442ff72071c8348a
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20250219/296d12f6/attachment-0001.html>


More information about the ghc-commits mailing list