[Git][ghc/ghc][wip/andreask/interpreter_primops] Add Addr# indexing primops. Fix some obvious 32bit bugs.

Andreas Klebinger (@AndreasK) gitlab at gitlab.haskell.org
Mon Mar 3 20:03:26 UTC 2025



Andreas Klebinger pushed to branch wip/andreask/interpreter_primops at Glasgow Haskell Compiler / GHC


Commits:
c5e9a83e by Andreas Klebinger at 2025-03-03T20:40:56+01:00
Add Addr# indexing primops. Fix some obvious 32bit bugs.

- - - - -


6 changed files:

- compiler/GHC/ByteCode/Asm.hs
- compiler/GHC/ByteCode/Instr.hs
- compiler/GHC/StgToByteCode.hs
- rts/Disassembler.c
- rts/Interpreter.c
- rts/include/rts/Bytecodes.h


Changes:

=====================================
compiler/GHC/ByteCode/Asm.hs
=====================================
@@ -544,109 +544,137 @@ assembleI platform i = case i of
     W64                   -> emit bci_OP_ADD_64 []
     W32                   -> emit bci_OP_ADD_32 []
     W16                   -> emit bci_OP_ADD_16 []
-    W8                   -> emit bci_OP_ADD_08 []
+    W8                    -> emit bci_OP_ADD_08 []
+    _                     -> unsupported_width
   OP_SUB w -> case w of
     W64                   -> emit bci_OP_SUB_64 []
     W32                   -> emit bci_OP_SUB_32 []
     W16                   -> emit bci_OP_SUB_16 []
-    W8                   -> emit bci_OP_SUB_08 []
+    W8                    -> emit bci_OP_SUB_08 []
+    _                     -> unsupported_width
   OP_AND w -> case w of
     W64                   -> emit bci_OP_AND_64 []
     W32                   -> emit bci_OP_AND_32 []
     W16                   -> emit bci_OP_AND_16 []
-    W8                   -> emit bci_OP_AND_08 []
+    W8                    -> emit bci_OP_AND_08 []
+    _                     -> unsupported_width
   OP_XOR w -> case w of
     W64                   -> emit bci_OP_XOR_64 []
     W32                   -> emit bci_OP_XOR_32 []
     W16                   -> emit bci_OP_XOR_16 []
-    W8                   -> emit bci_OP_XOR_08 []
+    W8                    -> emit bci_OP_XOR_08 []
+    _                     -> unsupported_width
   OP_OR w -> case w of
     W64                    -> emit bci_OP_OR_64 []
     W32                    -> emit bci_OP_OR_32 []
     W16                    -> emit bci_OP_OR_16 []
-    W8                    -> emit bci_OP_OR_08 []
+    W8                     -> emit bci_OP_OR_08 []
+    _                      -> unsupported_width
   OP_NOT w -> case w of
     W64                   -> emit bci_OP_NOT_64 []
     W32                   -> emit bci_OP_NOT_32 []
     W16                   -> emit bci_OP_NOT_16 []
-    W8                   -> emit bci_OP_NOT_08 []
+    W8                    -> emit bci_OP_NOT_08 []
+    _                     -> unsupported_width
   OP_NEG w -> case w of
     W64                   -> emit bci_OP_NEG_64 []
     W32                   -> emit bci_OP_NEG_32 []
     W16                   -> emit bci_OP_NEG_16 []
-    W8                   -> emit bci_OP_NEG_08 []
+    W8                    -> emit bci_OP_NEG_08 []
+    _                     -> unsupported_width
   OP_MUL w -> case w of
     W64                   -> emit bci_OP_MUL_64 []
     W32                   -> emit bci_OP_MUL_32 []
     W16                   -> emit bci_OP_MUL_16 []
-    W8                   -> emit bci_OP_MUL_08 []
+    W8                    -> emit bci_OP_MUL_08 []
+    _                     -> unsupported_width
   OP_SHL w -> case w of
     W64                   -> emit bci_OP_SHL_64 []
     W32                   -> emit bci_OP_SHL_32 []
     W16                   -> emit bci_OP_SHL_16 []
-    W8                   -> emit bci_OP_SHL_08 []
+    W8                    -> emit bci_OP_SHL_08 []
+    _                     -> unsupported_width
   OP_ASR w -> case w of
     W64                   -> emit bci_OP_ASR_64 []
     W32                   -> emit bci_OP_ASR_32 []
     W16                   -> emit bci_OP_ASR_16 []
-    W8                   -> emit bci_OP_ASR_08 []
+    W8                    -> emit bci_OP_ASR_08 []
+    _                     -> unsupported_width
   OP_LSR w -> case w of
     W64                   -> emit bci_OP_LSR_64 []
     W32                   -> emit bci_OP_LSR_32 []
     W16                   -> emit bci_OP_LSR_16 []
-    W8                   -> emit bci_OP_LSR_08 []
+    W8                    -> emit bci_OP_LSR_08 []
+    _                     -> unsupported_width
 
   OP_NEQ w -> case w of
     W64                   -> emit bci_OP_NEQ_64 []
     W32                   -> emit bci_OP_NEQ_32 []
     W16                   -> emit bci_OP_NEQ_16 []
-    W8                   -> emit bci_OP_NEQ_08 []
+    W8                    -> emit bci_OP_NEQ_08 []
+    _                     -> unsupported_width
   OP_EQ w -> case w of
     W64                    -> emit bci_OP_EQ_64 []
     W32                    -> emit bci_OP_EQ_32 []
     W16                    -> emit bci_OP_EQ_16 []
-    W8                    -> emit bci_OP_EQ_08 []
+    W8                     -> emit bci_OP_EQ_08 []
+    _                      -> unsupported_width
 
   OP_U_LT w -> case w of
     W64                  -> emit bci_OP_U_LT_64 []
     W32                  -> emit bci_OP_U_LT_32 []
     W16                  -> emit bci_OP_U_LT_16 []
-    W8                  -> emit bci_OP_U_LT_08 []
+    W8                   -> emit bci_OP_U_LT_08 []
+    _                    -> unsupported_width
   OP_S_LT w -> case w of
     W64                  -> emit bci_OP_S_LT_64 []
     W32                  -> emit bci_OP_S_LT_32 []
     W16                  -> emit bci_OP_S_LT_16 []
-    W8                  -> emit bci_OP_S_LT_08 []
+    W8                   -> emit bci_OP_S_LT_08 []
+    _                    -> unsupported_width
   OP_U_GE w -> case w of
     W64                  -> emit bci_OP_U_GE_64 []
     W32                  -> emit bci_OP_U_GE_32 []
     W16                  -> emit bci_OP_U_GE_16 []
-    W8                  -> emit bci_OP_U_GE_08 []
+    W8                   -> emit bci_OP_U_GE_08 []
+    _                    -> unsupported_width
   OP_S_GE w -> case w of
     W64                  -> emit bci_OP_S_GE_64 []
     W32                  -> emit bci_OP_S_GE_32 []
     W16                  -> emit bci_OP_S_GE_16 []
-    W8                  -> emit bci_OP_S_GE_08 []
+    W8                   -> emit bci_OP_S_GE_08 []
+    _                    -> unsupported_width
   OP_U_GT w -> case w of
     W64                  -> emit bci_OP_U_GT_64 []
     W32                  -> emit bci_OP_U_GT_32 []
     W16                  -> emit bci_OP_U_GT_16 []
-    W8                  -> emit bci_OP_U_GT_08 []
+    W8                   -> emit bci_OP_U_GT_08 []
+    _                    -> unsupported_width
   OP_S_GT w -> case w of
     W64                  -> emit bci_OP_S_GT_64 []
     W32                  -> emit bci_OP_S_GT_32 []
     W16                  -> emit bci_OP_S_GT_16 []
-    W8                  -> emit bci_OP_S_GT_08 []
+    W8                   -> emit bci_OP_S_GT_08 []
+    _                    -> unsupported_width
   OP_U_LE w -> case w of
     W64                  -> emit bci_OP_U_LE_64 []
     W32                  -> emit bci_OP_U_LE_32 []
     W16                  -> emit bci_OP_U_LE_16 []
-    W8                  -> emit bci_OP_U_LE_08 []
+    W8                   -> emit bci_OP_U_LE_08 []
+    _                    -> unsupported_width
   OP_S_LE w -> case w of
     W64                  -> emit bci_OP_S_LE_64 []
     W32                  -> emit bci_OP_S_LE_32 []
     W16                  -> emit bci_OP_S_LE_16 []
-    W8                  -> emit bci_OP_S_LE_08 []
+    W8                   -> emit bci_OP_S_LE_08 []
+    _                    -> unsupported_width
+
+  OP_INDEX_ADDR w -> case w of
+    W64                  -> emit bci_OP_INDEX_ADDR_64 []
+    W32                  -> emit bci_OP_INDEX_ADDR_32 []
+    W16                  -> emit bci_OP_INDEX_ADDR_16 []
+    W8                   -> emit bci_OP_INDEX_ADDR_08 []
+    _                    -> unsupported_width
 
   BRK_FUN arr tick_mod tickx info_mod infox cc ->
                               do p1 <- ptr (BCOPtrBreakArray arr)
@@ -664,6 +692,7 @@ assembleI platform i = case i of
 #endif
 
   where
+    unsupported_width = panic "GHC.ByteCode.Asm: Unsupported Width"
     literal (LitLabel fs _)   = litlabel fs
     literal LitNullAddr       = word 0
     literal (LitFloat r)      = float (fromRational r)


=====================================
compiler/GHC/ByteCode/Instr.hs
=====================================
@@ -36,7 +36,6 @@ import GHC.Stack.CCS (CostCentre)
 import GHC.Stg.Syntax
 import GHCi.BreakArray (BreakArray)
 import Language.Haskell.Syntax.Module.Name (ModuleName)
-import GHC.Types.RepType (PrimRep)
 import GHC.Cmm.Type (Width)
 
 -- ----------------------------------------------------------------------------
@@ -245,6 +244,10 @@ data BCInstr
    | OP_S_GT !Width
    | OP_S_LE !Width
 
+   -- Always puts at least a machine word on the stack, with the low part of the stack containing the result.
+   -- We zero extend the result we put on the stack.
+   | OP_INDEX_ADDR !Width
+
    -- For doing magic ByteArray passing to foreign calls
    | SWIZZLE          !WordOff -- to the ptr N words down the stack,
                       !Int     -- add M
@@ -448,6 +451,8 @@ instance Outputable BCInstr where
    ppr (OP_U_GT w)           = text "OP_U_GT_" <> ppr w
    ppr (OP_U_LE w)           = text "OP_U_LE_" <> ppr w
 
+   ppr (OP_INDEX_ADDR w)     = text "OP_INDEX_ADDR_" <> ppr w
+
    ppr (SWIZZLE stkoff n)    = text "SWIZZLE " <+> text "stkoff" <+> ppr stkoff
                                                <+> text "by" <+> ppr n
    ppr ENTER                 = text "ENTER"
@@ -578,6 +583,7 @@ bciStackUse OP_U_LT{}               = 0
 bciStackUse OP_U_GT{}               = 0
 bciStackUse OP_U_LE{}               = 0
 bciStackUse OP_U_GE{}               = 0
+bciStackUse OP_INDEX_ADDR{}         = 0
 
 bciStackUse SWIZZLE{}             = 0
 bciStackUse BRK_FUN{}             = 0


=====================================
compiler/GHC/StgToByteCode.hs
=====================================
@@ -734,9 +734,14 @@ schemeT d s p (StgOpApp (StgFCallOp (CCall ccall_spec) _ty) args result_ty)
       then generateCCall d s p ccall_spec result_ty args
       else unsupportedCConvException
 
-schemeT d s p (StgOpApp (StgPrimOp op) args _ty)
-  | Just prim_code <- doPrimOp op d s p args = prim_code
-  | otherwise = doTailCall d s p (primOpId op) (reverse args)
+schemeT d s p (StgOpApp (StgPrimOp op) args _ty) = do
+  profile <- getProfile
+  let platform = profilePlatform profile
+  case doPrimOp platform op d s p args of
+    -- Can we do this right in the interpreter?
+    Just prim_code -> prim_code
+    -- Otherwise we have to do a call to the primop wrapper instead :(
+    _         -> doTailCall d s p (primOpId op) (reverse args)
 
 schemeT d s p (StgOpApp (StgPrimCallOp (PrimCall label unit)) args result_ty)
    = generatePrimCall d s p label (Just unit) result_ty args
@@ -831,15 +836,15 @@ doTailCall init_d s p fn args = do
     (final_d, more_push_code) <- push_seq (d + sz) args
     return (final_d, push_code `appOL` more_push_code)
 
-doPrimOp  :: PrimOp
+doPrimOp  :: Platform
+          -> PrimOp
           -> StackDepth
           -> Sequel
           -> BCEnv
           -> [StgArg]
           -> Maybe (BcM BCInstrList)
-doPrimOp op init_d s p args =
+doPrimOp platform op init_d s p args =
   case op of
-    -- TODO: IntAddOp and friends are only 64bit on 64bit platforms
     IntAddOp -> primOp OP_ADD
     Int64AddOp -> primOp OP_ADD
     WordAddOp -> primOp OP_ADD
@@ -850,6 +855,7 @@ doPrimOp op init_d s p args =
     WordSubOp -> primOp OP_SUB
     Int64SubOp -> primOp OP_SUB
     Word64SubOp -> primOp OP_SUB
+    AddrSubOp -> primOp OP_SUB
 
     Int8SubOp   -> primOp OP_SUB
     Word8SubOp  -> primOp OP_SUB
@@ -886,26 +892,33 @@ doPrimOp op init_d s p args =
     IntNeOp -> primOp OP_NEQ
     WordNeOp -> primOp OP_NEQ
     Word64NeOp -> primOp OP_NEQ
+    AddrNeOp -> primOp OP_NEQ
 
     IntEqOp -> primOp OP_EQ
     WordEqOp -> primOp OP_EQ
     Word64EqOp -> primOp OP_EQ
+    AddrEqOp -> primOp OP_EQ
+    CharEqOp -> primOp OP_EQ
 
     IntLtOp -> primOp OP_S_LT
     WordLtOp -> primOp OP_U_LT
     Word64LtOp -> primOp OP_U_LT
+    AddrLtOp -> primOp OP_U_LT
 
     IntGeOp -> primOp OP_S_GE
     WordGeOp -> primOp OP_U_GE
     Word64GeOp -> primOp OP_U_GE
+    AddrGeOp -> primOp OP_U_GE
 
     IntGtOp -> primOp OP_S_GT
     WordGtOp -> primOp OP_U_GT
     Word64GtOp -> primOp OP_U_GT
+    AddrGtOp -> primOp OP_U_GT
 
     IntLeOp -> primOp OP_S_LE
     WordLeOp -> primOp OP_U_LE
     Word64LeOp -> primOp OP_U_LE
+    AddrLeOp -> primOp OP_U_LE
 
     IntNegOp -> primOp OP_NEG
     Int64NegOp -> primOp OP_NEG
@@ -925,9 +938,14 @@ doPrimOp op init_d s p args =
     ChrOp           -> no_op   -- Int# and Char# are rep'd the same
     OrdOp           -> no_op
 
+    IndexOffAddrOp_Word8 -> primOpWithRep (OP_INDEX_ADDR W8) W8
+    IndexOffAddrOp_Word16 -> primOpWithRep (OP_INDEX_ADDR W16) W16
+    IndexOffAddrOp_Word32 -> primOpWithRep (OP_INDEX_ADDR W32) W32
+    IndexOffAddrOp_Word64 -> primOpWithRep (OP_INDEX_ADDR W64) W64
+
     _ -> Nothing
   where
-    primArg1Width platform (arg:_)
+    primArg1Width arg
       | rep <- (stgArgRepU arg)
       = case rep of
         AddrRep -> platformWordWidth platform
@@ -953,19 +971,33 @@ doPrimOp op init_d s p args =
         VecRep{} -> unexpectedRep
       where
         unexpectedRep = panic "doPrimOp: Unexpected argument rep"
-    primArg1Width _ _  = panic "doPrimOp: Unexpected argument count"
+
+
+    -- TODO: The slides for the result need to be two words on 32bit for 64bit ops.
+    mkNReturn width
+      | W64 <- width = RETURN L -- L works for 64 bit on any platform
+      | otherwise = RETURN N -- <64bit width, fits in word on all platforms
+
+    mkSlideWords width = if platformWordWidth platform < width then 2 else 1
 
     -- Push args, execute primop, slide, return_N
     primOp op_inst = Just $ do
-      platform <- profilePlatform <$> getProfile
-      prim_code <- mkPrimOpCode init_d s p (op_inst $ primArg1Width platform args) $ args
-      let slide = mkSlideW 1 (bytesToWords platform $ init_d - s) `snocOL` RETURN N
+      let width = primArg1Width (head args)
+      prim_code <- mkPrimOpCode init_d s p (op_inst width) $ args
+      let slide = mkSlideW (mkSlideWords width) (bytesToWords platform $ init_d - s) `snocOL` mkNReturn width
+      return $ prim_code `appOL` slide
+
+    primOpWithRep :: BCInstr -> Width -> Maybe (BcM (OrdList BCInstr))
+    primOpWithRep op_inst width = Just $ do
+      prim_code <- mkPrimOpCode init_d s p op_inst $ args
+
+      let slide = mkSlideW (mkSlideWords width) (bytesToWords platform $ init_d - s) `snocOL` mkNReturn width
       return $ prim_code `appOL` slide
 
     no_op = Just $ do
-      platform <- profilePlatform <$> getProfile
+      let width = primArg1Width (head args)
       prim_code <- terribleNoOp init_d s p undefined args
-      let slide = mkSlideW 1 (bytesToWords platform $ init_d - s) `snocOL` RETURN N
+      let slide = mkSlideW (mkSlideWords width) (bytesToWords platform $ init_d - s) `snocOL` mkNReturn width
       return $ prim_code `appOL` slide
 
 -- It's horrible, but still better than calling intToWord ...


=====================================
rts/Disassembler.c
=====================================
@@ -487,6 +487,8 @@ disInstr ( StgBCO *bco, int pc )
       BELCH_INSTR_NAME_ALL_SIZES(OP_S_GE);
       BELCH_INSTR_NAME_ALL_SIZES(OP_S_LT);
 
+      BELCH_INSTR_NAME_ALL_SIZES(OP_INDEX_ADDR);
+
       default:
          barf("disInstr: unknown opcode %u", (unsigned int) instr);
    }


=====================================
rts/Interpreter.c
=====================================
@@ -2274,6 +2274,40 @@ run_BCO:
         case bci_OP_NOT_08: UN_SIZED_OP(~, StgWord8)
         case bci_OP_NEG_08: UN_SIZED_OP(-, StgInt8)
 
+        case bci_OP_INDEX_ADDR_64:
+        {
+            StgWord64* addr = (StgWord64*) SpW(1);
+            StgWord offset =     (StgWord) SpW(0);
+            if(sizeof(StgPtr) == sizeof(StgWord64)) {
+                Sp_addW(1);
+            }
+            SpW64(0) = *(addr+offset);
+            goto nextInsn;
+        }
+
+        case bci_OP_INDEX_ADDR_32:
+        {
+            StgWord32* addr = (StgWord32*) SpW(1);
+            StgWord offset =     (StgWord) SpW(0);
+            Sp_addW(1);
+            SpW(0) = (StgWord) *(addr+offset);
+            goto nextInsn;
+        }
+        case bci_OP_INDEX_ADDR_16:
+        {
+            StgWord16* addr = (StgWord16*) SpW(0);
+            SpW(0) = (StgWord) *addr;
+            goto nextInsn;
+        }
+        case bci_OP_INDEX_ADDR_08:
+        {
+            StgWord8* addr = (StgWord8*) SpW(1);
+            StgWord offset =   (StgWord) SpW(0);
+            Sp_addW(1);
+            SpW(0) = (StgWord) *(addr+offset);
+            goto nextInsn;
+        }
+
         case bci_CCALL: {
             void *tok;
             W_ stk_offset             = BCO_GET_LARGE_ARG;


=====================================
rts/include/rts/Bytecodes.h
=====================================
@@ -209,6 +209,11 @@
 #define bci_OP_S_LT_08                 228
 #define bci_OP_S_LE_08                 229
 
+#define bci_OP_INDEX_ADDR_08           240
+#define bci_OP_INDEX_ADDR_16           241
+#define bci_OP_INDEX_ADDR_32           242
+#define bci_OP_INDEX_ADDR_64           243
+
 
 /* If you need to go past 255 then you will run into the flags */
 



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/c5e9a83e9bfb7d9b0cfc9406712a7cbcf3d32eec

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/c5e9a83e9bfb7d9b0cfc9406712a7cbcf3d32eec
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20250303/852ca224/attachment-0001.html>


More information about the ghc-commits mailing list