[Git][ghc/ghc][wip/specialise-assembler] perf: Speed up bytecode assembler by specialising assembleI function

Matthew Pickering (@mpickering) gitlab at gitlab.haskell.org
Tue Feb 4 16:21:10 UTC 2025



Matthew Pickering pushed to branch wip/specialise-assembler at Glasgow Haskell Compiler / GHC


Commits:
7c9d6afe by Matthew Pickering at 2025-02-04T16:20:52+00:00
perf: Speed up bytecode assembler by specialising assembleI function

By specialising the assembleI function for the two intepreters we avoid
having to materialise the intermediate free-monad like structure.

The resulting generated code is much more direct.

- - - - -


4 changed files:

- compiler/GHC/ByteCode/Asm.hs
- compiler/GHC/ByteCode/Types.hs
- compiler/GHC/Data/FlatBag.hs
- compiler/GHC/Data/SmallArray.hs


Changes:

=====================================
compiler/GHC/ByteCode/Asm.hs
=====================================
@@ -1,8 +1,12 @@
 {-# LANGUAGE CPP             #-}
 {-# LANGUAGE DeriveFunctor   #-}
 {-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE MagicHash       #-}
+{-# LANGUAGE UnboxedTuples   #-}
+{-# LANGUAGE PatternSynonyms   #-}
 {-# OPTIONS_GHC -optc-DNON_POSIX_SOURCE #-}
 --
+--
 --  (c) The University of Glasgow 2002-2006
 --
 
@@ -12,10 +16,11 @@ module GHC.ByteCode.Asm (
         bcoFreeNames,
         SizedSeq, sizeSS, ssElts,
         iNTERP_STACK_CHECK_THRESH,
-        mkNativeCallInfoLit
+        mkNativeCallInfoLit, assembleRunAsm
   ) where
 
-import GHC.Prelude
+import GHC.Prelude hiding ( any )
+
 
 import GHC.ByteCode.Instr
 import GHC.ByteCode.InfoTable
@@ -36,6 +41,9 @@ import GHC.Utils.Panic
 import GHC.Core.TyCon
 import GHC.Data.FlatBag
 import GHC.Data.SizedSeq
+import GHC.Data.SmallArray
+
+
 
 import GHC.StgToCmm.Layout     ( ArgRep(..) )
 import GHC.Cmm.Expr
@@ -45,11 +53,11 @@ import GHC.Platform
 import GHC.Platform.Profile
 
 import Control.Monad
-import Control.Monad.Trans.Class
-import Control.Monad.Trans.State.Strict
+import qualified Control.Monad.Trans.State.Strict as MTL
 
 import qualified Data.Array.Unboxed as Array
-import Data.Array.Base  ( UArray(..) )
+import qualified Data.Array.IO as Array
+import Data.Array.Base  ( UArray(..), unsafeWrite, numElements, unsafeFreeze )
 
 import Foreign hiding (shiftL, shiftR)
 import Data.Char        ( ord )
@@ -58,6 +66,10 @@ import Data.Maybe (fromMaybe)
 import qualified Data.Map.Strict as Map
 import GHC.Float (castFloatToWord32, castDoubleToWord64)
 
+import qualified Data.List as List ( any )
+import GHC.Exts
+
+
 -- -----------------------------------------------------------------------------
 -- Unlinked BCOs
 
@@ -134,9 +146,9 @@ assembleBCOs interp profile proto_bcos tycons top_strs modbreaks spt_entries = d
 --
 mallocStrings ::  Interp -> FlatBag UnlinkedBCO -> IO (FlatBag UnlinkedBCO)
 mallocStrings interp ulbcos = do
-  let bytestrings = reverse (execState (mapM_ collect ulbcos) [])
+  let bytestrings = reverse (MTL.execState (mapM_ collect ulbcos) [])
   ptrs <- interpCmd interp (MallocStrings bytestrings)
-  return (evalState (mapM splice ulbcos) ptrs)
+  return (MTL.evalState (mapM splice ulbcos) ptrs)
  where
   splice bco at UnlinkedBCO{..} = do
     lits <- mapM spliceLit unlinkedBCOLits
@@ -144,10 +156,10 @@ mallocStrings interp ulbcos = do
     return bco { unlinkedBCOLits = lits, unlinkedBCOPtrs = ptrs }
 
   spliceLit (BCONPtrStr _) = do
-    rptrs <- get
+    rptrs <- MTL.get
     case rptrs of
       (RemotePtr p : rest) -> do
-        put rest
+        MTL.put rest
         return (BCONPtrWord (fromIntegral p))
       _ -> panic "mallocStrings:spliceLit"
   spliceLit other = return other
@@ -160,8 +172,8 @@ mallocStrings interp ulbcos = do
     mapM_ collectPtr unlinkedBCOPtrs
 
   collectLit (BCONPtrStr bs) = do
-    strs <- get
-    put (bs:strs)
+    strs <- MTL.get
+    MTL.put (bs:strs)
   collectLit _ = return ()
 
   collectPtr (BCOPtrBCO bco) = collect bco
@@ -176,6 +188,47 @@ assembleOneBCO interp profile pbco = do
   UnitFlatBag ubco' <- mallocStrings interp (UnitFlatBag ubco)
   return ubco'
 
+
+{-# NOINLINE inspectInstrs #-}
+inspectInstrs :: Platform -> Bool -> Word -> [BCInstr] -> InspectState
+inspectInstrs platform long_jump e instrs =
+  inspectAsm long_jump e (mapM_ (assembleI @InspectAsm platform) instrs)
+
+data RunAsmReader = RunAsmReader { isn_array :: {-# UNPACK #-} !(Array.IOUArray Int Word16)
+                                  , ptr_array :: {-# UNPACK #-} !(SmallMutableArrayIO BCOPtr)
+                                  , lit_array :: {-# UNPACK #-} !(SmallMutableArrayIO BCONPtr )
+                                  }
+
+data RunAsmResult = RunAsmResult { final_isn_array :: !(Array.UArray Int Word16)
+                                 , final_ptr_array :: !(SmallArray BCOPtr)
+                                 , final_lit_array :: !(SmallArray BCONPtr) }
+
+-- How many words we have written so far.
+data AsmState = AsmState { nisn :: !Int, nptr :: !Int, nlit :: !Int }
+
+
+{-# NOINLINE runInstrs #-}
+runInstrs ::  Platform -> Bool -> InspectState -> [BCInstr] -> IO RunAsmResult
+runInstrs platform long_jumps is_state instrs = do
+  isn_array <- Array.newArray_ (0, (fromIntegral $ instrCount is_state) - 1)
+  ptr_array <- newSmallArrayIO (fromIntegral $ ptrCount is_state) undefined
+  lit_array <- newSmallArrayIO (fromIntegral $ litCount is_state) undefined
+  let env :: LocalLabel -> Word
+      env lbl = fromMaybe
+        (pprPanic "assembleBCO.findLabel" (ppr lbl))
+        (Map.lookup lbl (lblEnv is_state))
+  let initial_state  = AsmState 0 0 0
+  let initial_reader = RunAsmReader{..}
+  runAsm long_jumps env initial_reader initial_state (mapM_ (\i -> assembleRunAsm platform i) instrs)
+  final_isn_array <- unsafeFreeze isn_array
+  final_ptr_array <- unsafeFreezeSmallArrayIO ptr_array
+  final_lit_array <- unsafeFreezeSmallArrayIO lit_array
+  return $ RunAsmResult {..}
+
+{-# NOINLINE assembleRunAsm #-}
+assembleRunAsm :: Platform -> BCInstr -> RunAsm ()
+assembleRunAsm p i = assembleI @RunAsm p i
+
 assembleBCO :: Platform -> ProtoBCO Name -> IO UnlinkedBCO
 assembleBCO platform
             (ProtoBCO { protoBCOName       = nm
@@ -184,9 +237,7 @@ assembleBCO platform
                       , protoBCOBitmapSize = bsize
                       , protoBCOArity      = arity }) = do
   -- pass 1: collect up the offsets of the local labels.
-  let asm = mapM_ (assembleI platform) instrs
-
-      initial_offset = 0
+  let initial_offset = 0
 
       -- Jump instructions are variable-sized, there are long and short variants
       -- depending on the magnitude of the offset.  However, we can't tell what
@@ -196,30 +247,25 @@ assembleBCO platform
       -- and if the final size is indeed small enough for short jumps, we are
       -- done.  Otherwise, we repeat the calculation, and we force all jumps in
       -- this BCO to be long.
-      (n_insns0, lbl_map0) = inspectAsm platform False initial_offset asm
-      ((n_insns, lbl_map), long_jumps)
-        | isLargeW (fromIntegral $ Map.size lbl_map0)
-          || isLargeW n_insns0
-                    = (inspectAsm platform True initial_offset asm, True)
-        | otherwise = ((n_insns0, lbl_map0), False)
-
-      env :: LocalLabel -> Word
-      env lbl = fromMaybe
-        (pprPanic "assembleBCO.findLabel" (ppr lbl))
-        (Map.lookup lbl lbl_map)
+      is0 = inspectInstrs platform False initial_offset instrs
+      (is1, long_jumps)
+        | isLargeInspectState is0
+                    = (inspectInstrs platform True initial_offset instrs, True)
+        | otherwise = (is0, False)
+
 
   -- pass 2: run assembler and generate instructions, literals and pointers
-  let initial_state = (emptySS, emptySS, emptySS)
-  (final_insns, final_lits, final_ptrs) <- flip execStateT initial_state $ runAsm platform long_jumps env asm
+  RunAsmResult{..} <- runInstrs platform long_jumps is1 instrs
 
   -- precomputed size should be equal to final size
-  massertPpr (n_insns == sizeSS final_insns)
+  massertPpr (fromIntegral (instrCount is1) == numElements final_isn_array
+              && fromIntegral (ptrCount is1) == sizeofSmallArray final_ptr_array
+              && fromIntegral (litCount is1) == sizeofSmallArray final_lit_array)
              (text "bytecode instruction count mismatch")
 
-  let asm_insns = ssElts final_insns
-      !insns_arr =  mkBCOByteArray $ Array.listArray (0 :: Int, fromIntegral n_insns - 1) asm_insns
+  let !insns_arr =  mkBCOByteArray $ final_isn_array
       !bitmap_arr = mkBCOByteArray $ mkBitmapArray bsize bitmap
-      ul_bco = UnlinkedBCO nm arity insns_arr bitmap_arr (fromSizedSeq final_lits) (fromSizedSeq final_ptrs)
+      ul_bco = UnlinkedBCO nm arity insns_arr bitmap_arr (fromSmallArray final_lit_array) (fromSmallArray final_ptr_array)
 
   -- 8 Aug 01: Finalisers aren't safe when attached to non-primitive
   -- objects, since they might get run too early.  Disable this until
@@ -236,10 +282,6 @@ mkBitmapArray bsize bitmap
   = Array.listArray (0, length bitmap) $
       fromIntegral bsize : map (fromInteger . fromStgWord) bitmap
 
--- instrs nonptrs ptrs
-type AsmState = (SizedSeq Word16,
-                 SizedSeq BCONPtr,
-                 SizedSeq BCOPtr)
 
 data Operand
   = Op Word
@@ -259,39 +301,9 @@ truncHalfWord platform w = case platformWordSize platform of
   PW8 | w <= 4294967295 -> Op (fromIntegral w)
   _ -> pprPanic "GHC.ByteCode.Asm.truncHalfWord" (ppr w)
 
-data Assembler a
-  = AllocPtr (IO BCOPtr) (Word -> Assembler a)
-  | AllocLit [BCONPtr] (Word -> Assembler a)
-  | AllocLabel LocalLabel (Assembler a)
-  | Emit Word16 [Operand] (Assembler a)
-  | NullAsm a
-  deriving (Functor)
-
-instance Applicative Assembler where
-    pure = NullAsm
-    (<*>) = ap
-
-instance Monad Assembler where
-  NullAsm x >>= f = f x
-  AllocPtr p k >>= f = AllocPtr p (k >=> f)
-  AllocLit l k >>= f = AllocLit l (k >=> f)
-  AllocLabel lbl k >>= f = AllocLabel lbl (k >>= f)
-  Emit w ops k >>= f = Emit w ops (k >>= f)
-
-ioptr :: IO BCOPtr -> Assembler Word
-ioptr p = AllocPtr p return
-
-ptr :: BCOPtr -> Assembler Word
-ptr = ioptr . return
-
-lit :: [BCONPtr] -> Assembler Word
-lit l = AllocLit l return
-
-label :: LocalLabel -> Assembler ()
-label w = AllocLabel w (return ())
 
-emit :: Word16 -> [Operand] -> Assembler ()
-emit w ops = Emit w ops (return ())
+ptr :: MonadAssembler m => BCOPtr -> m Word
+ptr = ioptr . return
 
 type LabelEnv = LocalLabel -> Word
 
@@ -302,36 +314,130 @@ largeOp long_jumps op = case op of
    IOp i     -> isLargeI i
    LabelOp _ -> long_jumps
 
-runAsm :: Platform -> Bool -> LabelEnv -> Assembler a -> StateT AsmState IO a
-runAsm platform long_jumps e = go
+newtype RunAsm a = RunAsm' { runRunAsm :: Bool
+                                       -> LabelEnv
+                                       -> RunAsmReader
+                                       -> AsmState
+                                       -> IO (AsmState, a) }
+
+pattern RunAsm :: (Bool -> LabelEnv -> RunAsmReader -> AsmState -> IO (AsmState, a))
+                  -> RunAsm a
+pattern RunAsm m <- RunAsm' m
   where
-    go (NullAsm x) = return x
-    go (AllocPtr p_io k) = do
-      p <- lift p_io
-      w <- state $ \(st_i0,st_l0,st_p0) ->
-        let st_p1 = addToSS st_p0 p
-        in (sizeSS st_p0, (st_i0,st_l0,st_p1))
-      go $ k w
-    go (AllocLit lits k) = do
-      w <- state $ \(st_i0,st_l0,st_p0) ->
-        let st_l1 = addListToSS st_l0 lits
-        in (sizeSS st_l0, (st_i0,st_l1,st_p0))
-      go $ k w
-    go (AllocLabel _ k) = go k
-    go (Emit w ops k) = do
-      let largeArgs = any (largeOp long_jumps) ops
-          opcode
-            | largeArgs = largeArgInstr w
-            | otherwise = w
-          words = concatMap expand ops
-          expand (SmallOp w) = [w]
-          expand (LabelOp w) = expand (Op (e w))
-          expand (Op w) = if largeArgs then largeArg platform (fromIntegral w) else [fromIntegral w]
-          expand (IOp i) = if largeArgs then largeArg platform (fromIntegral i) else [fromIntegral i]
-      state $ \(st_i0,st_l0,st_p0) ->
-        let st_i1 = addListToSS st_i0 (opcode : words)
-        in ((), (st_i1,st_l0,st_p0))
-      go k
+    RunAsm m = RunAsm' (oneShot $ \a -> oneShot $ \b -> oneShot $ \c -> oneShot $ \d -> m a b c d)
+{-# COMPLETE RunAsm #-}
+
+instance Functor RunAsm where
+  fmap f (RunAsm x) = RunAsm (\a b c !s -> fmap (fmap f) (x a b c s))
+
+instance Applicative RunAsm where
+  pure x = RunAsm $ \_ _ _ !s -> pure (s, x)
+  (RunAsm f) <*> (RunAsm x) = RunAsm $ \a b c !s -> do
+                                  (!s', f') <- f a b c s
+                                  (!s'', x') <- x a b c s'
+                                  return (s'', f' x')
+  {-# INLINE (<*>) #-}
+
+
+instance Monad RunAsm where
+  return  = pure
+  (RunAsm m) >>= f = RunAsm $ \a b c !s -> m a b c s >>= \(s', r) -> runRunAsm (f r) a b c s'
+  {-# INLINE (>>=) #-}
+
+runAsm :: Bool -> LabelEnv -> RunAsmReader -> AsmState -> RunAsm a -> IO a
+runAsm long_jumps e r s (RunAsm'{runRunAsm}) = fmap snd $ runRunAsm long_jumps e r s
+
+-- Unrolled to avoid allocating intermediate lists.
+expand :: PlatformWordSize -> Bool -> Operand -> RunAsm ()
+expand word_size largeArgs o = do
+  e <- askEnv
+  case o of
+    (SmallOp w) -> writeIsn w
+    (LabelOp w) -> let !r = e w in handleLargeArg r
+    (Op w) -> handleLargeArg w
+    (IOp i) -> handleLargeArg i
+
+  where
+    handleLargeArg :: Integral a => a -> RunAsm ()
+    handleLargeArg w  =
+      if largeArgs
+        then largeArg word_size (fromIntegral w)
+        else writeIsn (fromIntegral w)
+
+
+{-# INLINE any #-}
+-- Any is unrolled manually so that the call in `emit` can be eliminated without
+-- relying on SpecConstr (which does not work across modules).
+any :: (a -> Bool) -> [a] -> Bool
+any _ [] = False
+any f [x] = f x
+any f [x,y] = f x && f y
+any f [x,y,z] = f x && f y && f z
+any f [x1,x2,x3,x4] = f x1 && f x2 && f x3 && f x4
+any f [x1,x2,x3,x4, x5] = f x1 && f x2 && f x3 && f x4 && f x5
+any f [x1,x2,x3,x4,x5,x6] = f x1 && f x2 && f x3 && f x4 && f x5 && f x6
+any f xs = List.any f xs
+
+lift :: IO a -> RunAsm a
+lift io = RunAsm $ \_ _ _ s -> io >>= \a -> pure (s, a)
+
+askLongJumps :: RunAsm Bool
+askLongJumps = RunAsm $ \a _ _ s -> pure (s, a)
+
+askEnv :: RunAsm LabelEnv
+askEnv = RunAsm $ \_ b _ s -> pure (s, b)
+
+writePtr :: BCOPtr -> RunAsm Word
+writePtr w
+            = RunAsm $ \_ _ (RunAsmReader{..}) asm -> do
+              writeSmallArrayIO ptr_array (nptr asm) w
+              let !n' = nptr asm + 1
+              let !asm' = asm { nptr = n' }
+              return (asm', fromIntegral (nptr asm))
+
+writeLit :: BCONPtr -> RunAsm Word
+writeLit w = RunAsm $ \_ _ (RunAsmReader{..}) asm -> do
+              writeSmallArrayIO lit_array (nlit asm) w
+              let !n' = nlit asm + 1
+              let !asm' = asm { nlit = n' }
+              return (asm', fromIntegral (nlit asm))
+
+writeLits :: [BCONPtr] -> RunAsm Word
+writeLits []  = error "empty"
+writeLits [l] = writeLit l
+writeLits [l1,l2] = writeLit l1 <* writeLit l2
+writeLits (l:ls)  = writeLit l  <* mapM_ writeLit ls
+
+writeIsn :: Word16 -> RunAsm ()
+writeIsn w = RunAsm $ \_ _ (RunAsmReader{..}) asm -> do
+              unsafeWrite isn_array (nisn asm) w
+              let !n' = nisn asm + 1
+              let !asm' = asm { nisn = n' }
+              return (asm', ())
+
+
+instance MonadAssembler RunAsm where
+  ioptr p_io = do
+                 p <- lift p_io
+                 writePtr p
+  lit lits = writeLits lits
+
+  label _ = return ()
+
+  emit pwordsize w ops = do
+                long_jumps <- askLongJumps
+                let largeArgs = any (largeOp long_jumps) ops
+                let opcode
+                      | largeArgs = largeArgInstr w
+                      | otherwise = w
+                writeIsn opcode
+                mapM_ (expand pwordsize largeArgs) ops
+
+  {-# INLINE emit #-}
+  {-# INLINE label #-}
+  {-# INLINE lit #-}
+  {-# INLINE ioptr #-}
+
 
 type LabelEnvMap = Map LocalLabel Word
 
@@ -342,27 +448,81 @@ data InspectState = InspectState
   , lblEnv :: LabelEnvMap
   }
 
+isLargeInspectState :: InspectState -> Bool
+isLargeInspectState InspectState{..} =
+  isLargeW (fromIntegral $ Map.size lblEnv)
+    || isLargeW instrCount
 
-inspectAsm :: Platform -> Bool -> Word -> Assembler a -> (Word, LabelEnvMap)
-inspectAsm platform long_jumps initial_offset
-  = go (InspectState initial_offset 0 0 Map.empty)
-  where
-    go s (NullAsm _) = (instrCount s, lblEnv s)
-    go s (AllocPtr _ k) = go (s { ptrCount = n + 1 }) (k n)
-      where n = ptrCount s
-    go s (AllocLit ls k) = go (s { litCount = n + strictGenericLength ls }) (k n)
-      where n = litCount s
-    go s (AllocLabel lbl k) = go s' k
-      where s' = s { lblEnv = Map.insert lbl (instrCount s) (lblEnv s) }
-    go s (Emit _ ops k) = go s' k
-      where
-        s' = s { instrCount = instrCount s + size }
-        size = sum (map count ops) + 1
+newtype InspectEnv = InspectEnv { _inspectLongJumps :: Bool
+                                }
+
+newtype InspectAsm a = InspectAsm { runInspectAsm :: InspectEnv -> InspectState -> (# InspectState,  a #) }
+
+instance Functor InspectAsm where
+  fmap f (InspectAsm k) = InspectAsm $ \a b -> case k a b of
+                                                  (# b', c #) -> (# b', f c #)
+
+instance Applicative InspectAsm where
+  pure x = InspectAsm $ \_ s -> (# s, x #)
+  (InspectAsm f) <*> (InspectAsm x) = InspectAsm $ \a b -> case f a b of
+                                                              (# s', f' #) ->
+                                                                case x a s' of
+                                                                  (# s'', x' #) -> (# s'', f' x' #)
+
+instance Monad InspectAsm where
+  return = pure
+  (InspectAsm m) >>= f = InspectAsm $ \ a b -> case m a b of
+                                                (# s', a' #) -> runInspectAsm (f a') a s'
+
+get_ :: InspectAsm InspectState
+get_ = InspectAsm $ \_ b -> (# b, b #)
+
+put_ :: InspectState -> InspectAsm ()
+put_ !s = InspectAsm $ \_ _ -> (# s, () #)
+
+modify_ :: (InspectState -> InspectState) -> InspectAsm ()
+modify_ f = InspectAsm $ \_ s -> let !s' = f s in (# s', () #)
+
+ask_ :: InspectAsm InspectEnv
+ask_ = InspectAsm $ \a b -> (# b, a #)
+
+inspectAsm :: Bool -> Word -> InspectAsm () -> InspectState
+inspectAsm long_jumps initial_offset (InspectAsm s) =
+  case s (InspectEnv long_jumps) (InspectState initial_offset 0 0 Map.empty) of
+    (# res, () #) -> res
+{-# INLINE inspectAsm #-}
+
+instance MonadAssembler InspectAsm where
+  ioptr _ = do
+    s <- get_
+    let n = ptrCount s
+    put_ (s { ptrCount = n + 1 })
+    return n
+
+  lit ls = do
+    s <- get_
+    let n = litCount s
+    put_ (s { litCount = n + strictGenericLength ls })
+    return n
+
+  label lbl = modify_ (\s -> s { lblEnv = Map.insert lbl (instrCount s) (lblEnv s) })
+
+  emit pwordsize _ ops = do
+    InspectEnv long_jumps <- ask_
+    let size = sum (map count ops) + 1
         largeOps = any (largeOp long_jumps) ops
         count (SmallOp _) = 1
         count (LabelOp _) = count (Op 0)
-        count (Op _) = if largeOps then largeArg16s platform else 1
-        count (IOp _) = if largeOps then largeArg16s platform else 1
+        count (Op _) = if largeOps then largeArg16s pwordsize else 1
+        count (IOp _) = if largeOps then largeArg16s pwordsize else 1
+    s <- get_
+    put_ (s { instrCount = instrCount s + size })
+
+  {-# INLINE emit #-}
+  {-# INLINE label #-}
+  {-# INLINE lit #-}
+  {-# INLINE ioptr #-}
+
 
 -- Bring in all the bci_ bytecode constants.
 #include "Bytecodes.h"
@@ -370,47 +530,58 @@ inspectAsm platform long_jumps initial_offset
 largeArgInstr :: Word16 -> Word16
 largeArgInstr bci = bci_FLAG_LARGE_ARGS .|. bci
 
-largeArg :: Platform -> Word64 -> [Word16]
-largeArg platform w = case platformWordSize platform of
-   PW8 -> [fromIntegral (w `shiftR` 48),
-           fromIntegral (w `shiftR` 32),
-           fromIntegral (w `shiftR` 16),
-           fromIntegral w]
+{-# INLINE largeArg #-}
+largeArg :: PlatformWordSize -> Word64 -> RunAsm ()
+largeArg wsize w = case wsize of
+   PW8 ->  do writeIsn (fromIntegral (w `shiftR` 48))
+              writeIsn (fromIntegral (w `shiftR` 32))
+              writeIsn (fromIntegral (w `shiftR` 16))
+              writeIsn (fromIntegral w)
    PW4 -> assertPpr (w < fromIntegral (maxBound :: Word32))
-                    (text "largeArg too big:" <+> ppr w) $
-          [fromIntegral (w `shiftR` 16),
-           fromIntegral w]
+                    (text "largeArg too big:" <+> ppr w) $ do
+          writeIsn (fromIntegral (w `shiftR` 16))
+          writeIsn (fromIntegral w)
 
-largeArg16s :: Platform -> Word
-largeArg16s platform = case platformWordSize platform of
+largeArg16s :: PlatformWordSize -> Word
+largeArg16s pwordsize = case pwordsize of
    PW8 -> 4
    PW4 -> 2
 
-assembleI :: Platform
+class Monad m => MonadAssembler m where
+  ioptr :: IO BCOPtr -> m Word
+  lit :: [BCONPtr] -> m Word
+  label :: LocalLabel -> m ()
+  emit :: PlatformWordSize -> Word16 -> [Operand] -> m ()
+
+{-# SPECIALISE assembleI :: Platform -> BCInstr -> InspectAsm () #-}
+{-# SPECIALISE assembleI :: Platform -> BCInstr -> RunAsm () #-}
+
+assembleI :: forall m . MonadAssembler m
+          => Platform
           -> BCInstr
-          -> Assembler ()
+          -> m ()
 assembleI platform i = case i of
-  STKCHECK n               -> emit bci_STKCHECK [Op n]
-  PUSH_L o1                -> emit bci_PUSH_L [wOp o1]
-  PUSH_LL o1 o2            -> emit bci_PUSH_LL [wOp o1, wOp o2]
-  PUSH_LLL o1 o2 o3        -> emit bci_PUSH_LLL [wOp o1, wOp o2, wOp o3]
-  PUSH8 o1                 -> emit bci_PUSH8 [bOp o1]
-  PUSH16 o1                -> emit bci_PUSH16 [bOp o1]
-  PUSH32 o1                -> emit bci_PUSH32 [bOp o1]
-  PUSH8_W o1               -> emit bci_PUSH8_W [bOp o1]
-  PUSH16_W o1              -> emit bci_PUSH16_W [bOp o1]
-  PUSH32_W o1              -> emit bci_PUSH32_W [bOp o1]
+  STKCHECK n               -> emit_ bci_STKCHECK [Op n]
+  PUSH_L o1                -> emit_ bci_PUSH_L [wOp o1]
+  PUSH_LL o1 o2            -> emit_ bci_PUSH_LL [wOp o1, wOp o2]
+  PUSH_LLL o1 o2 o3        -> emit_ bci_PUSH_LLL [wOp o1, wOp o2, wOp o3]
+  PUSH8 o1                 -> emit_ bci_PUSH8 [bOp o1]
+  PUSH16 o1                -> emit_ bci_PUSH16 [bOp o1]
+  PUSH32 o1                -> emit_ bci_PUSH32 [bOp o1]
+  PUSH8_W o1               -> emit_ bci_PUSH8_W [bOp o1]
+  PUSH16_W o1              -> emit_ bci_PUSH16_W [bOp o1]
+  PUSH32_W o1              -> emit_ bci_PUSH32_W [bOp o1]
   PUSH_G nm                -> do p <- ptr (BCOPtrName nm)
-                                 emit bci_PUSH_G [Op p]
+                                 emit_ bci_PUSH_G [Op p]
   PUSH_PRIMOP op           -> do p <- ptr (BCOPtrPrimOp op)
-                                 emit bci_PUSH_G [Op p]
+                                 emit_ bci_PUSH_G [Op p]
   PUSH_BCO proto           -> do let ul_bco = assembleBCO platform proto
                                  p <- ioptr (liftM BCOPtrBCO ul_bco)
-                                 emit bci_PUSH_G [Op p]
+                                 emit_ bci_PUSH_G [Op p]
   PUSH_ALTS proto pk
                            -> do let ul_bco = assembleBCO platform proto
                                  p <- ioptr (liftM BCOPtrBCO ul_bco)
-                                 emit (push_alts pk) [Op p]
+                                 emit_ (push_alts pk) [Op p]
   PUSH_ALTS_TUPLE proto call_info tuple_proto
                            -> do let ul_bco = assembleBCO platform proto
                                      ul_tuple_bco = assembleBCO platform
@@ -419,121 +590,125 @@ assembleI platform i = case i of
                                  p_tup <- ioptr (liftM BCOPtrBCO ul_tuple_bco)
                                  info <- word (fromIntegral $
                                               mkNativeCallInfoSig platform call_info)
-                                 emit bci_PUSH_ALTS_T
+                                 emit_ bci_PUSH_ALTS_T
                                       [Op p, Op info, Op p_tup]
-  PUSH_PAD8                -> emit bci_PUSH_PAD8 []
-  PUSH_PAD16               -> emit bci_PUSH_PAD16 []
-  PUSH_PAD32               -> emit bci_PUSH_PAD32 []
+  PUSH_PAD8                -> emit_ bci_PUSH_PAD8 []
+  PUSH_PAD16               -> emit_ bci_PUSH_PAD16 []
+  PUSH_PAD32               -> emit_ bci_PUSH_PAD32 []
   PUSH_UBX8 lit            -> do np <- literal lit
-                                 emit bci_PUSH_UBX8 [Op np]
+                                 emit_ bci_PUSH_UBX8 [Op np]
   PUSH_UBX16 lit           -> do np <- literal lit
-                                 emit bci_PUSH_UBX16 [Op np]
+                                 emit_ bci_PUSH_UBX16 [Op np]
   PUSH_UBX32 lit           -> do np <- literal lit
-                                 emit bci_PUSH_UBX32 [Op np]
+                                 emit_ bci_PUSH_UBX32 [Op np]
   PUSH_UBX lit nws         -> do np <- literal lit
-                                 emit bci_PUSH_UBX [Op np, wOp nws]
-
+                                 emit_ bci_PUSH_UBX [Op np, wOp nws]
   -- see Note [Generating code for top-level string literal bindings] in GHC.StgToByteCode
   PUSH_ADDR nm             -> do np <- lit [BCONPtrAddr nm]
-                                 emit bci_PUSH_UBX [Op np, SmallOp 1]
-
-  PUSH_APPLY_N             -> emit bci_PUSH_APPLY_N []
-  PUSH_APPLY_V             -> emit bci_PUSH_APPLY_V []
-  PUSH_APPLY_F             -> emit bci_PUSH_APPLY_F []
-  PUSH_APPLY_D             -> emit bci_PUSH_APPLY_D []
-  PUSH_APPLY_L             -> emit bci_PUSH_APPLY_L []
-  PUSH_APPLY_P             -> emit bci_PUSH_APPLY_P []
-  PUSH_APPLY_PP            -> emit bci_PUSH_APPLY_PP []
-  PUSH_APPLY_PPP           -> emit bci_PUSH_APPLY_PPP []
-  PUSH_APPLY_PPPP          -> emit bci_PUSH_APPLY_PPPP []
-  PUSH_APPLY_PPPPP         -> emit bci_PUSH_APPLY_PPPPP []
-  PUSH_APPLY_PPPPPP        -> emit bci_PUSH_APPLY_PPPPPP []
-
-  SLIDE     n by           -> emit bci_SLIDE [wOp n, wOp by]
-  ALLOC_AP  n              -> emit bci_ALLOC_AP [truncHalfWord platform n]
-  ALLOC_AP_NOUPD n         -> emit bci_ALLOC_AP_NOUPD [truncHalfWord platform n]
-  ALLOC_PAP arity n        -> emit bci_ALLOC_PAP [truncHalfWord platform arity, truncHalfWord platform n]
-  MKAP      off sz         -> emit bci_MKAP [wOp off, truncHalfWord platform sz]
-  MKPAP     off sz         -> emit bci_MKPAP [wOp off, truncHalfWord platform sz]
-  UNPACK    n              -> emit bci_UNPACK [wOp n]
+                                 emit_ bci_PUSH_UBX [Op np, SmallOp 1]
+
+  PUSH_APPLY_N             -> emit_ bci_PUSH_APPLY_N []
+  PUSH_APPLY_V             -> emit_ bci_PUSH_APPLY_V []
+  PUSH_APPLY_F             -> emit_ bci_PUSH_APPLY_F []
+  PUSH_APPLY_D             -> emit_ bci_PUSH_APPLY_D []
+  PUSH_APPLY_L             -> emit_ bci_PUSH_APPLY_L []
+  PUSH_APPLY_P             -> emit_ bci_PUSH_APPLY_P []
+  PUSH_APPLY_PP            -> emit_ bci_PUSH_APPLY_PP []
+  PUSH_APPLY_PPP           -> emit_ bci_PUSH_APPLY_PPP []
+  PUSH_APPLY_PPPP          -> emit_ bci_PUSH_APPLY_PPPP []
+  PUSH_APPLY_PPPPP         -> emit_ bci_PUSH_APPLY_PPPPP []
+  PUSH_APPLY_PPPPPP        -> emit_ bci_PUSH_APPLY_PPPPPP []
+
+  SLIDE     n by           -> emit_ bci_SLIDE [wOp n, wOp by]
+  ALLOC_AP  n              -> emit_ bci_ALLOC_AP [truncHalfWord platform n]
+  ALLOC_AP_NOUPD n         -> emit_ bci_ALLOC_AP_NOUPD [truncHalfWord platform n]
+  ALLOC_PAP arity n        -> emit_ bci_ALLOC_PAP [truncHalfWord platform arity, truncHalfWord platform n]
+  MKAP      off sz         -> emit_ bci_MKAP [wOp off, truncHalfWord platform sz]
+  MKPAP     off sz         -> emit_ bci_MKPAP [wOp off, truncHalfWord platform sz]
+  UNPACK    n              -> emit_ bci_UNPACK [wOp n]
   PACK      dcon sz        -> do itbl_no <- lit [BCONPtrItbl (getName dcon)]
-                                 emit bci_PACK [Op itbl_no, wOp sz]
+                                 emit_ bci_PACK [Op itbl_no, wOp sz]
   LABEL     lbl            -> label lbl
   TESTLT_I  i l            -> do np <- int i
-                                 emit bci_TESTLT_I [Op np, LabelOp l]
+                                 emit_ bci_TESTLT_I [Op np, LabelOp l]
   TESTEQ_I  i l            -> do np <- int i
-                                 emit bci_TESTEQ_I [Op np, LabelOp l]
+                                 emit_ bci_TESTEQ_I [Op np, LabelOp l]
   TESTLT_W  w l            -> do np <- word w
-                                 emit bci_TESTLT_W [Op np, LabelOp l]
+                                 emit_ bci_TESTLT_W [Op np, LabelOp l]
   TESTEQ_W  w l            -> do np <- word w
-                                 emit bci_TESTEQ_W [Op np, LabelOp l]
+                                 emit_ bci_TESTEQ_W [Op np, LabelOp l]
   TESTLT_I64  i l          -> do np <- word64 (fromIntegral i)
-                                 emit bci_TESTLT_I64 [Op np, LabelOp l]
+                                 emit_ bci_TESTLT_I64 [Op np, LabelOp l]
   TESTEQ_I64  i l          -> do np <- word64 (fromIntegral i)
-                                 emit bci_TESTEQ_I64 [Op np, LabelOp l]
+                                 emit_ bci_TESTEQ_I64 [Op np, LabelOp l]
   TESTLT_I32  i l          -> do np <- word (fromIntegral i)
-                                 emit bci_TESTLT_I32 [Op np, LabelOp l]
+                                 emit_ bci_TESTLT_I32 [Op np, LabelOp l]
   TESTEQ_I32 i l           -> do np <- word (fromIntegral i)
-                                 emit bci_TESTEQ_I32 [Op np, LabelOp l]
+                                 emit_ bci_TESTEQ_I32 [Op np, LabelOp l]
   TESTLT_I16  i l          -> do np <- word (fromIntegral i)
-                                 emit bci_TESTLT_I16 [Op np, LabelOp l]
+                                 emit_ bci_TESTLT_I16 [Op np, LabelOp l]
   TESTEQ_I16 i l           -> do np <- word (fromIntegral i)
-                                 emit bci_TESTEQ_I16 [Op np, LabelOp l]
+                                 emit_ bci_TESTEQ_I16 [Op np, LabelOp l]
   TESTLT_I8  i l           -> do np <- word (fromIntegral i)
-                                 emit bci_TESTLT_I8 [Op np, LabelOp l]
+                                 emit_ bci_TESTLT_I8 [Op np, LabelOp l]
   TESTEQ_I8 i l            -> do np <- word (fromIntegral i)
-                                 emit bci_TESTEQ_I8 [Op np, LabelOp l]
+                                 emit_ bci_TESTEQ_I8 [Op np, LabelOp l]
   TESTLT_W64  w l          -> do np <- word64 w
-                                 emit bci_TESTLT_W64 [Op np, LabelOp l]
+                                 emit_ bci_TESTLT_W64 [Op np, LabelOp l]
   TESTEQ_W64  w l          -> do np <- word64 w
-                                 emit bci_TESTEQ_W64 [Op np, LabelOp l]
+                                 emit_ bci_TESTEQ_W64 [Op np, LabelOp l]
   TESTLT_W32  w l          -> do np <- word (fromIntegral w)
-                                 emit bci_TESTLT_W32 [Op np, LabelOp l]
+                                 emit_ bci_TESTLT_W32 [Op np, LabelOp l]
   TESTEQ_W32  w l          -> do np <- word (fromIntegral w)
-                                 emit bci_TESTEQ_W32 [Op np, LabelOp l]
+                                 emit_ bci_TESTEQ_W32 [Op np, LabelOp l]
   TESTLT_W16  w l          -> do np <- word (fromIntegral w)
-                                 emit bci_TESTLT_W16 [Op np, LabelOp l]
+                                 emit_ bci_TESTLT_W16 [Op np, LabelOp l]
   TESTEQ_W16  w l          -> do np <- word (fromIntegral w)
-                                 emit bci_TESTEQ_W16 [Op np, LabelOp l]
+                                 emit_ bci_TESTEQ_W16 [Op np, LabelOp l]
   TESTLT_W8  w l           -> do np <- word (fromIntegral w)
-                                 emit bci_TESTLT_W8 [Op np, LabelOp l]
+                                 emit_ bci_TESTLT_W8 [Op np, LabelOp l]
   TESTEQ_W8  w l           -> do np <- word (fromIntegral w)
-                                 emit bci_TESTEQ_W8 [Op np, LabelOp l]
+                                 emit_ bci_TESTEQ_W8 [Op np, LabelOp l]
   TESTLT_F  f l            -> do np <- float f
-                                 emit bci_TESTLT_F [Op np, LabelOp l]
+                                 emit_ bci_TESTLT_F [Op np, LabelOp l]
   TESTEQ_F  f l            -> do np <- float f
-                                 emit bci_TESTEQ_F [Op np, LabelOp l]
+                                 emit_ bci_TESTEQ_F [Op np, LabelOp l]
   TESTLT_D  d l            -> do np <- double d
-                                 emit bci_TESTLT_D [Op np, LabelOp l]
+                                 emit_ bci_TESTLT_D [Op np, LabelOp l]
   TESTEQ_D  d l            -> do np <- double d
-                                 emit bci_TESTEQ_D [Op np, LabelOp l]
-  TESTLT_P  i l            -> emit bci_TESTLT_P [SmallOp i, LabelOp l]
-  TESTEQ_P  i l            -> emit bci_TESTEQ_P [SmallOp i, LabelOp l]
-  CASEFAIL                 -> emit bci_CASEFAIL []
-  SWIZZLE   stkoff n       -> emit bci_SWIZZLE [wOp stkoff, IOp n]
-  JMP       l              -> emit bci_JMP [LabelOp l]
-  ENTER                    -> emit bci_ENTER []
-  RETURN rep               -> emit (return_non_tuple rep) []
-  RETURN_TUPLE             -> emit bci_RETURN_T []
+                                 emit_ bci_TESTEQ_D [Op np, LabelOp l]
+  TESTLT_P  i l            -> emit_ bci_TESTLT_P [SmallOp i, LabelOp l]
+  TESTEQ_P  i l            -> emit_ bci_TESTEQ_P [SmallOp i, LabelOp l]
+  CASEFAIL                 -> emit_ bci_CASEFAIL []
+  SWIZZLE   stkoff n       -> emit_ bci_SWIZZLE [wOp stkoff, IOp n]
+  JMP       l              -> emit_ bci_JMP [LabelOp l]
+  ENTER                    -> emit_ bci_ENTER []
+  RETURN rep               -> emit_ (return_non_tuple rep) []
+  RETURN_TUPLE             -> emit_ bci_RETURN_T []
   CCALL off m_addr i       -> do np <- addr m_addr
-                                 emit bci_CCALL [wOp off, Op np, SmallOp i]
-  PRIMCALL                 -> emit bci_PRIMCALL []
+                                 emit_ bci_CCALL [wOp off, Op np, SmallOp i]
+  PRIMCALL                 -> emit_ bci_PRIMCALL []
   BRK_FUN arr tick_mod tickx info_mod infox cc ->
                               do p1 <- ptr (BCOPtrBreakArray arr)
                                  tick_addr <- addr tick_mod
                                  info_addr <- addr info_mod
                                  np <- addr cc
-                                 emit bci_BRK_FUN [ Op p1
+                                 emit_ bci_BRK_FUN [ Op p1
                                                   , Op tick_addr, Op info_addr
                                                   , SmallOp tickx, SmallOp infox
                                                   , Op np
                                                   ]
 #if MIN_VERSION_rts(1,0,3)
   BCO_NAME name            -> do np <- lit [BCONPtrStr name]
-                                 emit bci_BCO_NAME [Op np]
+                                 emit_ bci_BCO_NAME [Op np]
 #endif
 
+
+
   where
+    emit_ = emit word_size
+
+    literal :: Literal -> m Word
     literal (LitLabel fs _)   = litlabel fs
     literal LitNullAddr       = word 0
     literal (LitFloat r)      = float (fromRational r)
@@ -573,16 +748,16 @@ assembleI platform i = case i of
     -- Note that we only support host endianness == target endianness for now,
     -- even with the external interpreter. This would need to be fixed to
     -- support host endianness /= target endianness
-    int :: Int -> Assembler Word
+    int :: Int -> m Word
     int  i = word (fromIntegral i)
 
-    float :: Float -> Assembler Word
+    float :: Float -> m Word
     float f = word32 (castFloatToWord32 f)
 
-    double :: Double -> Assembler Word
+    double :: Double -> m Word
     double d = word64 (castDoubleToWord64 d)
 
-    word64 :: Word64 -> Assembler Word
+    word64 :: Word64 -> m Word
     word64 ww = case word_size of
        PW4 ->
         let !wl = fromIntegral ww
@@ -592,17 +767,17 @@ assembleI platform i = case i of
             BigEndian    -> words [wh,wl]
        PW8 -> word (fromIntegral ww)
 
-    word8 :: Word8 -> Assembler Word
+    word8 :: Word8 -> m Word
     word8  x = case platformByteOrder platform of
       LittleEndian -> word (fromIntegral x)
       BigEndian    -> word (fromIntegral x `unsafeShiftL` (word_size_bits - 8))
 
-    word16 :: Word16 -> Assembler Word
+    word16 :: Word16 -> m Word
     word16 x = case platformByteOrder platform of
       LittleEndian -> word (fromIntegral x)
       BigEndian    -> word (fromIntegral x `unsafeShiftL` (word_size_bits - 16))
 
-    word32 :: Word32 -> Assembler Word
+    word32 :: Word32 -> m Word
     word32 x = case platformByteOrder platform of
       LittleEndian -> word (fromIntegral x)
       BigEndian    -> case word_size of


=====================================
compiler/GHC/ByteCode/Types.hs
=====================================
@@ -21,7 +21,7 @@ module GHC.ByteCode.Types
   , CgBreakInfo(..)
   , ModBreaks (..), BreakIndex, emptyModBreaks
   , CCostCentre
-  , FlatBag, sizeFlatBag, fromSizedSeq, elemsFlatBag
+  , FlatBag, sizeFlatBag, fromSmallArray, elemsFlatBag
   ) where
 
 import GHC.Prelude


=====================================
compiler/GHC/Data/FlatBag.hs
=====================================
@@ -8,13 +8,11 @@ module GHC.Data.FlatBag
   , mappendFlatBag
   -- * Construction
   , fromList
-  , fromSizedSeq
+  , fromSmallArray
   ) where
 
 import GHC.Prelude
 
-import GHC.Data.SizedSeq (SizedSeq, ssElts, sizeSS)
-
 import Control.DeepSeq
 
 import GHC.Data.SmallArray
@@ -125,5 +123,10 @@ fromList n elts =
 -- | Convert a 'SizedSeq' into its flattened representation.
 -- A 'FlatBag a' is more memory efficient than '[a]', if no further modification
 -- is necessary.
-fromSizedSeq :: SizedSeq a -> FlatBag a
-fromSizedSeq s = fromList (sizeSS s) (ssElts s)
+fromSmallArray :: SmallArray a -> FlatBag a
+fromSmallArray s = case sizeofSmallArray s of
+                      0 -> EmptyFlatBag
+                      1 -> UnitFlatBag (indexSmallArray s 0)
+                      2 -> TupleFlatBag (indexSmallArray s 0) (indexSmallArray s 1)
+                      _ -> FlatBag s
+


=====================================
compiler/GHC/Data/SmallArray.hs
=====================================
@@ -16,11 +16,18 @@ module GHC.Data.SmallArray
   , mapSmallArray
   , foldMapSmallArray
   , rnfSmallArray
+
+  -- * IO Operations
+  , SmallMutableArrayIO
+  , newSmallArrayIO
+  , writeSmallArrayIO
+  , unsafeFreezeSmallArrayIO
   )
 where
 
 import GHC.Exts
 import GHC.Prelude
+import GHC.IO
 import GHC.ST
 import Control.DeepSeq
 
@@ -28,6 +35,8 @@ data SmallArray a = SmallArray (SmallArray# a)
 
 data SmallMutableArray s a = SmallMutableArray (SmallMutableArray# s a)
 
+type SmallMutableArrayIO a = SmallMutableArray RealWorld a
+
 newSmallArray
   :: Int  -- ^ size
   -> a    -- ^ initial contents
@@ -37,6 +46,9 @@ newSmallArray
 newSmallArray (I# sz) x s = case newSmallArray# sz x s of
   (# s', a #) -> (# s', SmallMutableArray a #)
 
+newSmallArrayIO :: Int -> a -> IO (SmallMutableArrayIO a)
+newSmallArrayIO sz x = IO $ \s -> newSmallArray sz x s
+
 writeSmallArray
   :: SmallMutableArray s a -- ^ array
   -> Int                   -- ^ index
@@ -46,6 +58,12 @@ writeSmallArray
 {-# INLINE writeSmallArray #-}
 writeSmallArray (SmallMutableArray a) (I# i) x = writeSmallArray# a i x
 
+writeSmallArrayIO :: SmallMutableArrayIO a
+                  -> Int
+                  -> a
+                  -> IO ()
+writeSmallArrayIO a ix v = IO $ \s -> (# writeSmallArray a ix v s, () #)
+
 
 -- | Copy and freeze a slice of a mutable array.
 freezeSmallArray
@@ -69,6 +87,9 @@ unsafeFreezeSmallArray (SmallMutableArray ma) s =
   case unsafeFreezeSmallArray# ma s of
     (# s', a #) -> (# s', SmallArray a #)
 
+unsafeFreezeSmallArrayIO :: SmallMutableArrayIO a -> IO (SmallArray a)
+unsafeFreezeSmallArrayIO arr = IO $ \s -> unsafeFreezeSmallArray arr s
+
 -- | Get the size of a 'SmallArray'
 sizeofSmallArray
   :: SmallArray a



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/7c9d6afe6a1e9bec12c5f2db226aa68b84ecc63c

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/7c9d6afe6a1e9bec12c5f2db226aa68b84ecc63c
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20250204/4223eb94/attachment-0001.html>


More information about the ghc-commits mailing list