[Git][ghc/ghc][wip/buggymcbugfix/arrayOf-primop] 2 commits: Implement general arrayOf# primop

Vilem-Benjamin Liepelt gitlab at gitlab.haskell.org
Mon Jun 22 22:31:25 UTC 2020



Vilem-Benjamin Liepelt pushed to branch wip/buggymcbugfix/arrayOf-primop at Glasgow Haskell Compiler / GHC


Commits:
df424446 by buggymcbugfix at 2020-06-22T23:31:08+01:00
Implement general arrayOf# primop

- - - - -
6b3daca3 by buggymcbugfix at 2020-06-22T23:31:08+01:00
Remove unnecessary function

- - - - -


7 changed files:

- compiler/GHC/Builtin/primops.txt.pp
- compiler/GHC/Cmm/CLabel.hs
- compiler/GHC/Cmm/Info/Build.hs
- compiler/GHC/Cmm/Parser.y
- compiler/GHC/StgToCmm/Closure.hs
- compiler/GHC/StgToCmm/Heap.hs
- compiler/GHC/StgToCmm/Prim.hs


Changes:

=====================================
compiler/GHC/Builtin/primops.txt.pp
=====================================
@@ -1249,7 +1249,7 @@ primop  NewSmallArrayOp "newSmallArray#" GenPrimOp
 
 primop SmallArrayOfOp "smallArrayOf#" GenPrimOp
    o -> SmallArray# b
-   {Create a new immutable array with two elements.}
+   {smallArrayOf# :: (# a, .., a #) -> Array# a}
    with
    has_side_effects = True
 


=====================================
compiler/GHC/Cmm/CLabel.hs
=====================================
@@ -40,7 +40,7 @@ module GHC.Cmm.CLabel (
         mkAsmTempDerivedLabel,
         mkAsmTempEndLabel,
         mkAsmTempDieLabel,
-
+        mkUnliftedDataLabel,
         mkDirty_MUT_VAR_Label,
         mkNonmovingWriteBarrierEnabledLabel,
         mkUpdInfoLabel,
@@ -251,7 +251,6 @@ data CLabel
   -- | A label before an info table to prevent excessive dead-stripping on darwin
   | DeadStripPreventer CLabel
 
-
   -- | Per-module table of tick locations
   | HpcTicksLabel Module
 
@@ -263,8 +262,14 @@ data CLabel
   | LargeBitmapLabel
         {-# UNPACK #-} !Unique
 
+  -- | Static data from local definitions allocated in the data section,
+  -- arising from a primop, like 'arrayOf#'
+  | UnliftedDataLabel {-# UNPACK #-} !Unique PrimOp
   deriving Eq
 
+mkUnliftedDataLabel :: Unique -> PrimOp -> CLabel
+mkUnliftedDataLabel = UnliftedDataLabel
+
 isIdLabel :: CLabel -> Bool
 isIdLabel IdLabel{} = True
 isIdLabel _ = False
@@ -318,6 +323,8 @@ instance Ord CLabel where
     nonDetCmpUnique u1 u2
   compare (LargeBitmapLabel u1) (LargeBitmapLabel u2) =
     nonDetCmpUnique u1 u2
+  compare (UnliftedDataLabel u1 _) (UnliftedDataLabel u2 _) =
+    nonDetCmpUnique u1 u2
   compare IdLabel{} _ = LT
   compare _ IdLabel{} = GT
   compare CmmLabel{} _ = LT
@@ -348,6 +355,8 @@ instance Ord CLabel where
   compare _ HpcTicksLabel{} = GT
   compare SRTLabel{} _ = LT
   compare _ SRTLabel{} = GT
+  compare UnliftedDataLabel{} _ = LT
+  compare _ UnliftedDataLabel{} = GT
 
 -- | Record where a foreign label is stored.
 data ForeignLabelSource
@@ -622,6 +631,8 @@ isStaticClosureLabel :: CLabel -> Bool
 isStaticClosureLabel (IdLabel _ _ Closure) = True
 -- Closure defined in cmm
 isStaticClosureLabel (CmmLabel _ _ CmmClosure) = True
+-- Unlifted data allocated in the data
+isStaticClosureLabel UnliftedDataLabel{} = True
 isStaticClosureLabel _lbl = False
 
 -- | Whether label is a .rodata label
@@ -716,6 +727,7 @@ mkAsmTempDieLabel l = mkAsmTempDerivedLabel l (fsLit "_die")
 toClosureLbl :: CLabel -> CLabel
 toClosureLbl (IdLabel n c _) = IdLabel n c Closure
 toClosureLbl (CmmLabel m str _) = CmmLabel m str CmmClosure
+toClosureLbl l at UnliftedDataLabel{} = l
 toClosureLbl l = pprPanic "toClosureLbl" (ppr l)
 
 toSlowEntryLbl :: CLabel -> CLabel
@@ -775,7 +787,7 @@ hasCAF _                            = False
 -- -----------------------------------------------------------------------------
 -- Does a CLabel need declaring before use or not?
 --
--- See wiki:commentary/compiler/backends/ppr-c#prototypes
+-- See wiki: https://gitlab.haskell.org/ghc/ghc/-/wikis/commentary/compiler/backends/ppr-c#prototypes
 
 needsCDecl :: CLabel -> Bool
   -- False <=> it's pre-declared; don't bother
@@ -803,10 +815,11 @@ needsCDecl l@(ForeignLabel{})           = not (isMathFun l)
 needsCDecl (CC_Label _)                 = True
 needsCDecl (CCS_Label _)                = True
 needsCDecl (HpcTicksLabel _)            = True
+needsCDecl UnliftedDataLabel{}          = True
+
 needsCDecl (DynamicLinkerLabel {})      = panic "needsCDecl DynamicLinkerLabel"
 needsCDecl PicBaseLabel                 = panic "needsCDecl PicBaseLabel"
 needsCDecl (DeadStripPreventer {})      = panic "needsCDecl DeadStripPreventer"
-
 -- | If a label is a local block label then return just its 'BlockId', otherwise
 -- 'Nothing'.
 maybeLocalBlockLabel :: CLabel -> Maybe BlockId
@@ -928,6 +941,7 @@ externallyVisibleCLabel (DynamicLinkerLabel _ _)  = False
 externallyVisibleCLabel (HpcTicksLabel _)       = True
 externallyVisibleCLabel (LargeBitmapLabel _)    = False
 externallyVisibleCLabel (SRTLabel _)            = False
+externallyVisibleCLabel UnliftedDataLabel{}     = False
 externallyVisibleCLabel (PicBaseLabel {}) = panic "externallyVisibleCLabel PicBaseLabel"
 externallyVisibleCLabel (DeadStripPreventer {}) = panic "externallyVisibleCLabel DeadStripPreventer"
 
@@ -988,6 +1002,7 @@ labelType PicBaseLabel                          = DataLabel
 labelType (DeadStripPreventer _)                = DataLabel
 labelType (HpcTicksLabel _)                     = DataLabel
 labelType (LargeBitmapLabel _)                  = DataLabel
+labelType UnliftedDataLabel{}                   = GcPtrLabel
 
 idInfoLabelType :: IdLabelInfo -> CLabelType
 idInfoLabelType info =
@@ -1295,7 +1310,7 @@ pprCLbl dflags = \case
    (CC_Label cc)       -> ppr cc
    (CCS_Label ccs)     -> ppr ccs
    (HpcTicksLabel mod) -> text "_hpc_tickboxes_"  <> ppr mod <> ptext (sLit "_hpc")
-
+   (UnliftedDataLabel u op) -> tempLabelPrefixOrUnderscore <> ppr op <> pprUniqueAlways u
    (AsmTempLabel {})        -> panic "pprCLbl AsmTempLabel"
    (AsmTempDerivedLabel {}) -> panic "pprCLbl AsmTempDerivedLabel"
    (DynamicLinkerLabel {})  -> panic "pprCLbl DynamicLinkerLabel"


=====================================
compiler/GHC/Cmm/Info/Build.hs
=====================================
@@ -47,8 +47,8 @@ import GHC.Types.Name.Set
 
 {- Note [SRTs]
 
-SRTs are the mechanism by which the garbage collector can determine
-the live CAFs in the program.
+Static Reference Tables (SRTs) are the mechanism by which the garbage collector
+can determine the live CAFs in the program.
 
 Representation
 ^^^^^^^^^^^^^^
@@ -481,9 +481,7 @@ addCafLabel l s
   | otherwise
   = s
 
-cafAnalData
-  :: CmmStatics
-  -> CAFSet
+cafAnalData :: CmmStatics -> CAFSet
 
 cafAnalData (CmmStaticsRaw _lbl _data) =
     Set.empty
@@ -1111,7 +1109,6 @@ buildSRTChain dflags cafSet =
   where
     mAX_SRT_SIZE = 16
 
-
 buildSRT :: DynFlags -> [SRTEntry] -> UniqSM (CmmDeclSRTs, SRTEntry)
 buildSRT dflags refs = do
   id <- getUniqueM
@@ -1121,6 +1118,7 @@ buildSRT dflags refs = do
     srt_n_info = mkSRTInfoLabel (length refs)
     fields =
       mkStaticClosure dflags srt_n_info dontCareCCS
+        [] -- no header
         [ CmmLabel lbl | SRTEntry lbl <- refs ]
         [] -- no padding
         [mkIntCLit platform 0] -- link field


=====================================
compiler/GHC/Cmm/Parser.y
=====================================
@@ -423,7 +423,7 @@ static  :: { CmmParse [CmmStatic] }
                         mkStaticClosure dflags (mkForeignLabel $3 Nothing ForeignLabelInExternalPackage IsData)
                          -- mkForeignLabel because these are only used
                          -- for CHARLIKE and INTLIKE closures in the RTS.
-                        dontCareCCS (map getLit lits) [] [] [] } }
+                        dontCareCCS (map getLit lits) [] [] [] [] } }
         -- arrays of closures required for the CHARLIKE & INTLIKE arrays
 
 lits    :: { [CmmParse CmmExpr] }
@@ -1166,7 +1166,7 @@ profilingInfo dflags desc_str ty_str
 staticClosure :: Unit -> FastString -> FastString -> [CmmLit] -> CmmParse ()
 staticClosure pkg cl_label info payload
   = do dflags <- getDynFlags
-       let lits = mkStaticClosure dflags (mkCmmInfoLabel pkg info) dontCareCCS payload [] [] []
+       let lits = mkStaticClosure dflags (mkCmmInfoLabel pkg info) dontCareCCS payload [] [] [] []
        code $ emitDataLits (mkCmmDataLabel pkg cl_label) lits
 
 foreignCall


=====================================
compiler/GHC/StgToCmm/Closure.hs
=====================================
@@ -60,6 +60,7 @@ module GHC.StgToCmm.Closure (
         cafBlackHoleInfoTable,
         indStaticInfoTable,
         staticClosureNeedsLink,
+        smallArrayStaticInfoTable,
     ) where
 
 #include "HsVersions.h"
@@ -986,6 +987,14 @@ indStaticInfoTable
                  , cit_srt  = Nothing
                  , cit_clo  = Nothing }
 
+smallArrayStaticInfoTable :: WordOff -> CmmInfoTable
+smallArrayStaticInfoTable n
+  = CmmInfoTable { cit_lbl  = mkSMAP_FROZEN_DIRTY_infoLabel
+                 , cit_rep  = smallArrPtrsRep (fromIntegral n)
+                 , cit_prof = NoProfilingInfo
+                 , cit_srt  = Nothing
+                 , cit_clo  = Nothing }
+
 staticClosureNeedsLink :: Bool -> CmmInfoTable -> Bool
 -- A static closure needs a link field to aid the GC when traversing
 -- the static closure graph.  But it only needs such a field if either


=====================================
compiler/GHC/StgToCmm/Heap.hs
=====================================
@@ -174,11 +174,13 @@ mkStaticClosureFields
         -> [CmmLit]             -- Payload
         -> [CmmLit]             -- The full closure
 mkStaticClosureFields dflags info_tbl ccs caf_refs payload
-  = mkStaticClosure dflags info_lbl ccs payload padding
+  = mkStaticClosure dflags (cit_lbl info_tbl) ccs header payload padding
         static_link_field saved_info_field
   where
     platform = targetPlatform dflags
-    info_lbl = cit_lbl info_tbl
+    header = case cit_rep info_tbl of
+      SmallArrayPtrsRep size -> [mkIntCLit (targetPlatform dflags) size]
+      _ -> []
 
     -- CAFs must have consistent layout, regardless of whether they
     -- are actually updatable or not.  The layout of a CAF is:
@@ -219,11 +221,12 @@ mkStaticClosureFields dflags info_tbl ccs caf_refs payload
                                       -- See Note [STATIC_LINK fields]
                                       -- in rts/sm/Storage.h
 
-mkStaticClosure :: DynFlags -> CLabel -> CostCentreStack -> [CmmLit]
+mkStaticClosure :: DynFlags -> CLabel -> CostCentreStack -> [CmmLit] -> [CmmLit]
   -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit]
-mkStaticClosure dflags info_lbl ccs payload padding static_link_field saved_info_field
+mkStaticClosure dflags info_lbl ccs header payload padding static_link_field saved_info_field
   =  [CmmLabel info_lbl]
   ++ staticProfHdr dflags ccs
+  ++ header
   ++ payload
   ++ padding
   ++ static_link_field


=====================================
compiler/GHC/StgToCmm/Prim.hs
=====================================
@@ -57,6 +57,9 @@ import Data.Maybe
 import Data.Bits ((.&.), bit)
 import Control.Monad (liftM, when, unless)
 
+import GHC.Types.CostCentre (dontCareCCS)
+import GHC.StgToCmm.Closure
+
 ------------------------------------------------------------------------
 --      Primitive operations and foreign calls
 ------------------------------------------------------------------------
@@ -171,13 +174,13 @@ emitPrimOp dflags = \case
   NewByteArrayOp_Char -> \case
     [(CmmLit (CmmInt n w))]
       | asUnsigned w n <= fromIntegral (maxInlineAllocSize dflags)
-      -> opAllDone  $ \ [res] -> doNewByteArrayOp res (fromInteger n)
+      -> PrimopCmmEmit_IntoRegs $ \ [res] -> doNewByteArrayOp res (fromInteger n)
     _ -> PrimopCmmEmit_External
 
   NewArrayOp -> \case
     [(CmmLit (CmmInt n w)), init]
       | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags)
-      -> opAllDone $ \[res] -> doNewArrayOp res (arrPtrsRep dflags (fromInteger n)) mkMAP_DIRTY_infoLabel
+      -> PrimopCmmEmit_IntoRegs $ \[res] -> doNewArrayOp res (arrPtrsRep dflags (fromInteger n)) mkMAP_DIRTY_infoLabel
         [ (mkIntExpr platform (fromInteger n),
            fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags)
         , (mkIntExpr platform (nonHdrSizeW (arrPtrsRep dflags (fromInteger n))),
@@ -188,52 +191,52 @@ emitPrimOp dflags = \case
 
   CopyArrayOp -> \case
     [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] ->
-      opAllDone $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
+      PrimopCmmEmit_IntoRegs $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
     _ -> PrimopCmmEmit_External
 
   CopyMutableArrayOp -> \case
     [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] ->
-      opAllDone $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
+      PrimopCmmEmit_IntoRegs $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
     _ -> PrimopCmmEmit_External
 
   CopyArrayArrayOp -> \case
     [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] ->
-      opAllDone $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
+      PrimopCmmEmit_IntoRegs $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
     _ -> PrimopCmmEmit_External
 
   CopyMutableArrayArrayOp -> \case
     [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] ->
-      opAllDone $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
+      PrimopCmmEmit_IntoRegs $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
     _ -> PrimopCmmEmit_External
 
   CloneArrayOp -> \case
     [src, src_off, (CmmLit (CmmInt n w))]
       | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags)
-      -> opAllDone $ \ [res] -> emitCloneArray mkMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
+      -> PrimopCmmEmit_IntoRegs $ \ [res] -> emitCloneArray mkMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
     _ -> PrimopCmmEmit_External
 
   CloneMutableArrayOp -> \case
     [src, src_off, (CmmLit (CmmInt n w))]
       | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags)
-      -> opAllDone $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
+      -> PrimopCmmEmit_IntoRegs $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
     _ -> PrimopCmmEmit_External
 
   FreezeArrayOp -> \case
     [src, src_off, (CmmLit (CmmInt n w))]
       | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags)
-      -> opAllDone $ \ [res] -> emitCloneArray mkMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
+      -> PrimopCmmEmit_IntoRegs $ \ [res] -> emitCloneArray mkMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
     _ -> PrimopCmmEmit_External
 
   ThawArrayOp -> \case
     [src, src_off, (CmmLit (CmmInt n w))]
       | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags)
-      -> opAllDone $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
+      -> PrimopCmmEmit_IntoRegs $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
     _ -> PrimopCmmEmit_External
 
   NewSmallArrayOp -> \case
     [(CmmLit (CmmInt n w)), init]
       | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags)
-      -> opAllDone $ \ [res] ->
+      -> PrimopCmmEmit_IntoRegs $ \ [res] ->
         doNewArrayOp res (smallArrPtrsRep (fromInteger n)) mkSMAP_DIRTY_infoLabel
         [ (mkIntExpr platform (fromInteger n),
            fixedHdrSize dflags + oFFSET_StgSmallMutArrPtrs_ptrs dflags)
@@ -241,53 +244,65 @@ emitPrimOp dflags = \case
         (replicate (fromIntegral n) init)
     _ -> PrimopCmmEmit_External
 
-  SmallArrayOfOp -> \elems -> opAllDone $ \[res] ->
+  op at SmallArrayOfOp -> \elems -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     let n = length elems
-      in doNewArrayOp
-          res
-          (smallArrPtrsRep (fromIntegral n))
-          mkSMAP_FROZEN_DIRTY_infoLabel
-          [ ( mkIntExpr platform n
-            , fixedHdrSize dflags + oFFSET_StgSmallMutArrPtrs_ptrs dflags ) ]
-          elems
+    case allStatic elems of
+      Just known -> do
+        u <- newUnique
+        let lbl = mkUnliftedDataLabel u op
+        emitDataCon lbl (smallArrayStaticInfoTable n) dontCareCCS known
+        emit $ mkAssign (CmmLocal res) (CmmLit $ CmmLabel lbl)
+      Nothing -> doNewArrayOp
+        res
+        (smallArrPtrsRep (fromIntegral n))
+        mkSMAP_FROZEN_DIRTY_infoLabel
+        [ ( mkIntExpr platform n
+          , fixedHdrSize dflags + oFFSET_StgSmallMutArrPtrs_ptrs dflags ) ]
+        elems
+      where
+        -- todo: comment
+        allStatic = foldr step (Just [])
+
+        step (CmmLit l) (Just acc) = Just (l : acc) -- c.f. XXX getLit
+        step _ _ = Nothing
 
   CopySmallArrayOp -> \case
     [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] ->
-      opAllDone $ \ [] -> doCopySmallArrayOp src src_off dst dst_off (fromInteger n)
+      PrimopCmmEmit_IntoRegs $ \ [] -> doCopySmallArrayOp src src_off dst dst_off (fromInteger n)
     _ -> PrimopCmmEmit_External
 
   CopySmallMutableArrayOp -> \case
     [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] ->
-      opAllDone $ \ [] -> doCopySmallMutableArrayOp src src_off dst dst_off (fromInteger n)
+      PrimopCmmEmit_IntoRegs $ \ [] -> doCopySmallMutableArrayOp src src_off dst dst_off (fromInteger n)
     _ -> PrimopCmmEmit_External
 
   CloneSmallArrayOp -> \case
     [src, src_off, (CmmLit (CmmInt n w))]
       | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags)
-      -> opAllDone $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
+      -> PrimopCmmEmit_IntoRegs $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
     _ -> PrimopCmmEmit_External
 
   CloneSmallMutableArrayOp -> \case
     [src, src_off, (CmmLit (CmmInt n w))]
       | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags)
-      -> opAllDone $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
+      -> PrimopCmmEmit_IntoRegs $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
     _ -> PrimopCmmEmit_External
 
   FreezeSmallArrayOp -> \case
     [src, src_off, (CmmLit (CmmInt n w))]
       | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags)
-      -> opAllDone $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
+      -> PrimopCmmEmit_IntoRegs $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
     _ -> PrimopCmmEmit_External
 
   ThawSmallArrayOp -> \case
     [src, src_off, (CmmLit (CmmInt n w))]
       | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags)
-      -> opAllDone $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
+      -> PrimopCmmEmit_IntoRegs $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
     _ -> PrimopCmmEmit_External
 
 -- First we handle various awkward cases specially.
 
-  ParOp -> \[arg] -> opAllDone $ \[res] -> do
+  ParOp -> \[arg] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     -- for now, just implement this in a C function
     -- later, we might want to inline it.
     emitCCall
@@ -295,7 +310,7 @@ emitPrimOp dflags = \case
         (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
         [(baseExpr, AddrHint), (arg,AddrHint)]
 
-  SparkOp -> \[arg] -> opAllDone $ \[res] -> do
+  SparkOp -> \[arg] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     -- returns the value of arg in res.  We're going to therefore
     -- refer to arg twice (once to pass to newSpark(), and once to
     -- assign to res), so put it in a temporary.
@@ -307,23 +322,23 @@ emitPrimOp dflags = \case
         [(baseExpr, AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
     emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))
 
-  GetCCSOfOp -> \[arg] -> opAllDone $ \[res] -> do
+  GetCCSOfOp -> \[arg] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     let
       val
        | gopt Opt_SccProfilingOn dflags = costCentreFrom dflags (cmmUntag dflags arg)
        | otherwise                      = CmmLit (zeroCLit platform)
     emitAssign (CmmLocal res) val
 
-  GetCurrentCCSOp -> \[_] -> opAllDone $ \[res] -> do
+  GetCurrentCCSOp -> \[_] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitAssign (CmmLocal res) cccsExpr
 
-  MyThreadIdOp -> \[] -> opAllDone $ \[res] -> do
+  MyThreadIdOp -> \[] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitAssign (CmmLocal res) currentTSOExpr
 
-  ReadMutVarOp -> \[mutv] -> opAllDone $ \[res] -> do
+  ReadMutVarOp -> \[mutv] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitAssign (CmmLocal res) (cmmLoadIndexW platform mutv (fixedHdrSizeW dflags) (gcWord platform))
 
-  WriteMutVarOp -> \[mutv, var] -> opAllDone $ \res@[] -> do
+  WriteMutVarOp -> \[mutv, var] -> PrimopCmmEmit_IntoRegs $ \res@[] -> do
     old_val <- CmmLocal <$> newTemp (cmmExprType platform var)
     emitAssign old_val (cmmLoadIndexW platform mutv (fixedHdrSizeW dflags) (gcWord platform))
 
@@ -341,7 +356,7 @@ emitPrimOp dflags = \case
 
 --  #define sizzeofByteArrayzh(r,a) \
 --     r = ((StgArrBytes *)(a))->bytes
-  SizeofByteArrayOp -> \[arg] -> opAllDone $ \[res] -> do
+  SizeofByteArrayOp -> \[arg] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emit $ mkAssign (CmmLocal res) (cmmLoadIndexW platform arg (fixedHdrSizeW dflags) (bWord platform))
 
 --  #define sizzeofMutableByteArrayzh(r,a) \
@@ -350,31 +365,31 @@ emitPrimOp dflags = \case
 
 --  #define getSizzeofMutableByteArrayzh(r,a) \
 --      r = ((StgArrBytes *)(a))->bytes
-  GetSizeofMutableByteArrayOp -> \[arg] -> opAllDone $ \[res] -> do
+  GetSizeofMutableByteArrayOp -> \[arg] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitAssign (CmmLocal res) (cmmLoadIndexW platform arg (fixedHdrSizeW dflags) (bWord platform))
 
 
 --  #define touchzh(o)                  /* nothing */
-  TouchOp -> \args@[_] -> opAllDone $ \res@[] -> do
+  TouchOp -> \args@[_] -> PrimopCmmEmit_IntoRegs $ \res@[] -> do
     emitPrimCall res MO_Touch args
 
 --  #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
-  ByteArrayContents_Char -> \[arg] -> opAllDone $ \[res] -> do
+  ByteArrayContents_Char -> \[arg] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitAssign (CmmLocal res) (cmmOffsetB platform arg (arrWordsHdrSize dflags))
 
 --  #define stableNameToIntzh(r,s)   (r = ((StgStableName *)s)->sn)
-  StableNameToIntOp -> \[arg] -> opAllDone $ \[res] -> do
+  StableNameToIntOp -> \[arg] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitAssign (CmmLocal res) (cmmLoadIndexW platform arg (fixedHdrSizeW dflags) (bWord platform))
 
-  ReallyUnsafePtrEqualityOp -> \[arg1, arg2] -> opAllDone $ \[res] -> do
+  ReallyUnsafePtrEqualityOp -> \[arg1, arg2] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq platform) [arg1,arg2])
 
 --  #define addrToHValuezh(r,a) r=(P_)a
-  AddrToAnyOp -> \[arg] -> opAllDone $ \[res] -> do
+  AddrToAnyOp -> \[arg] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitAssign (CmmLocal res) arg
 
 --  #define hvalueToAddrzh(r, a) r=(W_)a
-  AnyToAddrOp -> \[arg] -> opAllDone $ \[res] -> do
+  AnyToAddrOp -> \[arg] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitAssign (CmmLocal res) arg
 
 {- Freezing arrays-of-ptrs requires changing an info table, for the
@@ -387,70 +402,70 @@ emitPrimOp dflags = \case
 --        SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN_DIRTY_info);
 --        r = a;
 --      }
-  UnsafeFreezeArrayOp -> \[arg] -> opAllDone $ \[res] -> do
+  UnsafeFreezeArrayOp -> \[arg] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emit $ catAGraphs
       [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_DIRTY_infoLabel)),
         mkAssign (CmmLocal res) arg ]
-  UnsafeFreezeArrayArrayOp -> \[arg] -> opAllDone $ \[res] -> do
+  UnsafeFreezeArrayArrayOp -> \[arg] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emit $ catAGraphs
       [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_DIRTY_infoLabel)),
         mkAssign (CmmLocal res) arg ]
-  UnsafeFreezeSmallArrayOp -> \[arg] -> opAllDone $ \[res] -> do
+  UnsafeFreezeSmallArrayOp -> \[arg] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emit $ catAGraphs
       [ setInfo arg (CmmLit (CmmLabel mkSMAP_FROZEN_DIRTY_infoLabel)),
         mkAssign (CmmLocal res) arg ]
 
 --  #define unsafeFreezzeByteArrayzh(r,a)       r=(a)
-  UnsafeFreezeByteArrayOp -> \[arg] -> opAllDone $ \[res] -> do
+  UnsafeFreezeByteArrayOp -> \[arg] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitAssign (CmmLocal res) arg
 
 -- Reading/writing pointer arrays
 
-  ReadArrayOp -> \[obj, ix] -> opAllDone $ \[res] -> do
+  ReadArrayOp -> \[obj, ix] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     doReadPtrArrayOp res obj ix
-  IndexArrayOp -> \[obj, ix] -> opAllDone $ \[res] -> do
+  IndexArrayOp -> \[obj, ix] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     doReadPtrArrayOp res obj ix
-  WriteArrayOp -> \[obj, ix, v] -> opAllDone $ \[] -> do
+  WriteArrayOp -> \[obj, ix, v] -> PrimopCmmEmit_IntoRegs $ \[] -> do
     doWritePtrArrayOp obj ix v
 
-  IndexArrayArrayOp_ByteArray -> \[obj, ix] -> opAllDone $ \[res] -> do
+  IndexArrayArrayOp_ByteArray -> \[obj, ix] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     doReadPtrArrayOp res obj ix
-  IndexArrayArrayOp_ArrayArray -> \[obj, ix] -> opAllDone $ \[res] -> do
+  IndexArrayArrayOp_ArrayArray -> \[obj, ix] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     doReadPtrArrayOp res obj ix
-  ReadArrayArrayOp_ByteArray -> \[obj, ix] -> opAllDone $ \[res] -> do
+  ReadArrayArrayOp_ByteArray -> \[obj, ix] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     doReadPtrArrayOp res obj ix
-  ReadArrayArrayOp_MutableByteArray -> \[obj, ix] -> opAllDone $ \[res] -> do
+  ReadArrayArrayOp_MutableByteArray -> \[obj, ix] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     doReadPtrArrayOp res obj ix
-  ReadArrayArrayOp_ArrayArray -> \[obj, ix] -> opAllDone $ \[res] -> do
+  ReadArrayArrayOp_ArrayArray -> \[obj, ix] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     doReadPtrArrayOp res obj ix
-  ReadArrayArrayOp_MutableArrayArray -> \[obj, ix] -> opAllDone $ \[res] -> do
+  ReadArrayArrayOp_MutableArrayArray -> \[obj, ix] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     doReadPtrArrayOp res obj ix
-  WriteArrayArrayOp_ByteArray -> \[obj,ix,v] -> opAllDone $ \[] -> do
+  WriteArrayArrayOp_ByteArray -> \[obj,ix,v] -> PrimopCmmEmit_IntoRegs $ \[] -> do
     doWritePtrArrayOp obj ix v
-  WriteArrayArrayOp_MutableByteArray -> \[obj,ix,v] -> opAllDone $ \[] -> do
+  WriteArrayArrayOp_MutableByteArray -> \[obj,ix,v] -> PrimopCmmEmit_IntoRegs $ \[] -> do
     doWritePtrArrayOp obj ix v
-  WriteArrayArrayOp_ArrayArray -> \[obj,ix,v] -> opAllDone $ \[] -> do
+  WriteArrayArrayOp_ArrayArray -> \[obj,ix,v] -> PrimopCmmEmit_IntoRegs $ \[] -> do
     doWritePtrArrayOp obj ix v
-  WriteArrayArrayOp_MutableArrayArray -> \[obj,ix,v] -> opAllDone $ \[] -> do
+  WriteArrayArrayOp_MutableArrayArray -> \[obj,ix,v] -> PrimopCmmEmit_IntoRegs $ \[] -> do
     doWritePtrArrayOp obj ix v
 
-  ReadSmallArrayOp -> \[obj, ix] -> opAllDone $ \[res] -> do
+  ReadSmallArrayOp -> \[obj, ix] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     doReadSmallPtrArrayOp res obj ix
-  IndexSmallArrayOp -> \[obj, ix] -> opAllDone $ \[res] -> do
+  IndexSmallArrayOp -> \[obj, ix] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     doReadSmallPtrArrayOp res obj ix
-  WriteSmallArrayOp -> \[obj,ix,v] -> opAllDone $ \[] -> do
+  WriteSmallArrayOp -> \[obj,ix,v] -> PrimopCmmEmit_IntoRegs $ \[] -> do
     doWriteSmallPtrArrayOp obj ix v
 
 -- Getting the size of pointer arrays
 
-  SizeofArrayOp -> \[arg] -> opAllDone $ \[res] -> do
+  SizeofArrayOp -> \[arg] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emit $ mkAssign (CmmLocal res) (cmmLoadIndexW platform arg
       (fixedHdrSizeW dflags + bytesToWordsRoundUp platform (oFFSET_StgMutArrPtrs_ptrs dflags))
         (bWord platform))
   SizeofMutableArrayOp -> emitPrimOp dflags SizeofArrayOp
   SizeofArrayArrayOp -> emitPrimOp dflags SizeofArrayOp
   SizeofMutableArrayArrayOp -> emitPrimOp dflags SizeofArrayOp
-  SizeofSmallArrayOp -> \[arg] -> opAllDone $ \[res] -> do
+  SizeofSmallArrayOp -> \[arg] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emit $ mkAssign (CmmLocal res)
      (cmmLoadIndexW platform arg
      (fixedHdrSizeW dflags + bytesToWordsRoundUp platform (oFFSET_StgSmallMutArrPtrs_ptrs dflags))
@@ -461,413 +476,413 @@ emitPrimOp dflags = \case
 
 -- IndexXXXoffAddr
 
-  IndexOffAddrOp_Char -> \args -> opAllDone $ \res -> do
+  IndexOffAddrOp_Char -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexOffAddrOp   (Just (mo_u_8ToWord platform)) b8 res args
-  IndexOffAddrOp_WideChar -> \args -> opAllDone $ \res -> do
+  IndexOffAddrOp_WideChar -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexOffAddrOp   (Just (mo_u_32ToWord platform)) b32 res args
-  IndexOffAddrOp_Int -> \args -> opAllDone $ \res -> do
+  IndexOffAddrOp_Int -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexOffAddrOp   Nothing (bWord platform) res args
-  IndexOffAddrOp_Word -> \args -> opAllDone $ \res -> do
+  IndexOffAddrOp_Word -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexOffAddrOp   Nothing (bWord platform) res args
-  IndexOffAddrOp_Addr -> \args -> opAllDone $ \res -> do
+  IndexOffAddrOp_Addr -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexOffAddrOp   Nothing (bWord platform) res args
-  IndexOffAddrOp_Float -> \args -> opAllDone $ \res -> do
+  IndexOffAddrOp_Float -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexOffAddrOp   Nothing f32 res args
-  IndexOffAddrOp_Double -> \args -> opAllDone $ \res -> do
+  IndexOffAddrOp_Double -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexOffAddrOp   Nothing f64 res args
-  IndexOffAddrOp_StablePtr -> \args -> opAllDone $ \res -> do
+  IndexOffAddrOp_StablePtr -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexOffAddrOp   Nothing (bWord platform) res args
-  IndexOffAddrOp_Int8 -> \args -> opAllDone $ \res -> do
+  IndexOffAddrOp_Int8 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexOffAddrOp   (Just (mo_s_8ToWord platform)) b8  res args
-  IndexOffAddrOp_Int16 -> \args -> opAllDone $ \res -> do
+  IndexOffAddrOp_Int16 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexOffAddrOp   (Just (mo_s_16ToWord platform)) b16 res args
-  IndexOffAddrOp_Int32 -> \args -> opAllDone $ \res -> do
+  IndexOffAddrOp_Int32 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexOffAddrOp   (Just (mo_s_32ToWord platform)) b32 res args
-  IndexOffAddrOp_Int64 -> \args -> opAllDone $ \res -> do
+  IndexOffAddrOp_Int64 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexOffAddrOp   Nothing b64 res args
-  IndexOffAddrOp_Word8 -> \args -> opAllDone $ \res -> do
+  IndexOffAddrOp_Word8 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexOffAddrOp   (Just (mo_u_8ToWord platform)) b8  res args
-  IndexOffAddrOp_Word16 -> \args -> opAllDone $ \res -> do
+  IndexOffAddrOp_Word16 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexOffAddrOp   (Just (mo_u_16ToWord platform)) b16 res args
-  IndexOffAddrOp_Word32 -> \args -> opAllDone $ \res -> do
+  IndexOffAddrOp_Word32 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexOffAddrOp   (Just (mo_u_32ToWord platform)) b32 res args
-  IndexOffAddrOp_Word64 -> \args -> opAllDone $ \res -> do
+  IndexOffAddrOp_Word64 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexOffAddrOp   Nothing b64 res args
 
 -- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
 
-  ReadOffAddrOp_Char -> \args -> opAllDone $ \res -> do
+  ReadOffAddrOp_Char -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexOffAddrOp   (Just (mo_u_8ToWord platform)) b8 res args
-  ReadOffAddrOp_WideChar -> \args -> opAllDone $ \res -> do
+  ReadOffAddrOp_WideChar -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexOffAddrOp   (Just (mo_u_32ToWord platform)) b32 res args
-  ReadOffAddrOp_Int -> \args -> opAllDone $ \res -> do
+  ReadOffAddrOp_Int -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexOffAddrOp   Nothing (bWord platform) res args
-  ReadOffAddrOp_Word -> \args -> opAllDone $ \res -> do
+  ReadOffAddrOp_Word -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexOffAddrOp   Nothing (bWord platform) res args
-  ReadOffAddrOp_Addr -> \args -> opAllDone $ \res -> do
+  ReadOffAddrOp_Addr -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexOffAddrOp   Nothing (bWord platform) res args
-  ReadOffAddrOp_Float -> \args -> opAllDone $ \res -> do
+  ReadOffAddrOp_Float -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexOffAddrOp   Nothing f32 res args
-  ReadOffAddrOp_Double -> \args -> opAllDone $ \res -> do
+  ReadOffAddrOp_Double -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexOffAddrOp   Nothing f64 res args
-  ReadOffAddrOp_StablePtr -> \args -> opAllDone $ \res -> do
+  ReadOffAddrOp_StablePtr -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexOffAddrOp   Nothing (bWord platform) res args
-  ReadOffAddrOp_Int8 -> \args -> opAllDone $ \res -> do
+  ReadOffAddrOp_Int8 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexOffAddrOp   (Just (mo_s_8ToWord platform)) b8  res args
-  ReadOffAddrOp_Int16 -> \args -> opAllDone $ \res -> do
+  ReadOffAddrOp_Int16 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexOffAddrOp   (Just (mo_s_16ToWord platform)) b16 res args
-  ReadOffAddrOp_Int32 -> \args -> opAllDone $ \res -> do
+  ReadOffAddrOp_Int32 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexOffAddrOp   (Just (mo_s_32ToWord platform)) b32 res args
-  ReadOffAddrOp_Int64 -> \args -> opAllDone $ \res -> do
+  ReadOffAddrOp_Int64 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexOffAddrOp   Nothing b64 res args
-  ReadOffAddrOp_Word8 -> \args -> opAllDone $ \res -> do
+  ReadOffAddrOp_Word8 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexOffAddrOp   (Just (mo_u_8ToWord platform)) b8  res args
-  ReadOffAddrOp_Word16 -> \args -> opAllDone $ \res -> do
+  ReadOffAddrOp_Word16 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexOffAddrOp   (Just (mo_u_16ToWord platform)) b16 res args
-  ReadOffAddrOp_Word32 -> \args -> opAllDone $ \res -> do
+  ReadOffAddrOp_Word32 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexOffAddrOp   (Just (mo_u_32ToWord platform)) b32 res args
-  ReadOffAddrOp_Word64 -> \args -> opAllDone $ \res -> do
+  ReadOffAddrOp_Word64 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexOffAddrOp   Nothing b64 res args
 
 -- IndexXXXArray
 
-  IndexByteArrayOp_Char -> \args -> opAllDone $ \res -> do
+  IndexByteArrayOp_Char -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOp   (Just (mo_u_8ToWord platform)) b8 res args
-  IndexByteArrayOp_WideChar -> \args -> opAllDone $ \res -> do
+  IndexByteArrayOp_WideChar -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOp   (Just (mo_u_32ToWord platform)) b32 res args
-  IndexByteArrayOp_Int -> \args -> opAllDone $ \res -> do
+  IndexByteArrayOp_Int -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOp   Nothing (bWord platform) res args
-  IndexByteArrayOp_Word -> \args -> opAllDone $ \res -> do
+  IndexByteArrayOp_Word -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOp   Nothing (bWord platform) res args
-  IndexByteArrayOp_Addr -> \args -> opAllDone $ \res -> do
+  IndexByteArrayOp_Addr -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOp   Nothing (bWord platform) res args
-  IndexByteArrayOp_Float -> \args -> opAllDone $ \res -> do
+  IndexByteArrayOp_Float -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOp   Nothing f32 res args
-  IndexByteArrayOp_Double -> \args -> opAllDone $ \res -> do
+  IndexByteArrayOp_Double -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOp   Nothing f64 res args
-  IndexByteArrayOp_StablePtr -> \args -> opAllDone $ \res -> do
+  IndexByteArrayOp_StablePtr -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOp   Nothing (bWord platform) res args
-  IndexByteArrayOp_Int8 -> \args -> opAllDone $ \res -> do
+  IndexByteArrayOp_Int8 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOp   (Just (mo_s_8ToWord platform)) b8  res args
-  IndexByteArrayOp_Int16 -> \args -> opAllDone $ \res -> do
+  IndexByteArrayOp_Int16 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOp   (Just (mo_s_16ToWord platform)) b16  res args
-  IndexByteArrayOp_Int32 -> \args -> opAllDone $ \res -> do
+  IndexByteArrayOp_Int32 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOp   (Just (mo_s_32ToWord platform)) b32  res args
-  IndexByteArrayOp_Int64 -> \args -> opAllDone $ \res -> do
+  IndexByteArrayOp_Int64 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOp   Nothing b64  res args
-  IndexByteArrayOp_Word8 -> \args -> opAllDone $ \res -> do
+  IndexByteArrayOp_Word8 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOp   (Just (mo_u_8ToWord platform)) b8  res args
-  IndexByteArrayOp_Word16 -> \args -> opAllDone $ \res -> do
+  IndexByteArrayOp_Word16 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOp   (Just (mo_u_16ToWord platform)) b16  res args
-  IndexByteArrayOp_Word32 -> \args -> opAllDone $ \res -> do
+  IndexByteArrayOp_Word32 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOp   (Just (mo_u_32ToWord platform)) b32  res args
-  IndexByteArrayOp_Word64 -> \args -> opAllDone $ \res -> do
+  IndexByteArrayOp_Word64 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOp   Nothing b64  res args
 
 -- ReadXXXArray, identical to IndexXXXArray.
 
-  ReadByteArrayOp_Char -> \args -> opAllDone $ \res -> do
+  ReadByteArrayOp_Char -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOp   (Just (mo_u_8ToWord platform)) b8 res args
-  ReadByteArrayOp_WideChar -> \args -> opAllDone $ \res -> do
+  ReadByteArrayOp_WideChar -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOp   (Just (mo_u_32ToWord platform)) b32 res args
-  ReadByteArrayOp_Int -> \args -> opAllDone $ \res -> do
+  ReadByteArrayOp_Int -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOp   Nothing (bWord platform) res args
-  ReadByteArrayOp_Word -> \args -> opAllDone $ \res -> do
+  ReadByteArrayOp_Word -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOp   Nothing (bWord platform) res args
-  ReadByteArrayOp_Addr -> \args -> opAllDone $ \res -> do
+  ReadByteArrayOp_Addr -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOp   Nothing (bWord platform) res args
-  ReadByteArrayOp_Float -> \args -> opAllDone $ \res -> do
+  ReadByteArrayOp_Float -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOp   Nothing f32 res args
-  ReadByteArrayOp_Double -> \args -> opAllDone $ \res -> do
+  ReadByteArrayOp_Double -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOp   Nothing f64 res args
-  ReadByteArrayOp_StablePtr -> \args -> opAllDone $ \res -> do
+  ReadByteArrayOp_StablePtr -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOp   Nothing (bWord platform) res args
-  ReadByteArrayOp_Int8 -> \args -> opAllDone $ \res -> do
+  ReadByteArrayOp_Int8 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOp   (Just (mo_s_8ToWord platform)) b8  res args
-  ReadByteArrayOp_Int16 -> \args -> opAllDone $ \res -> do
+  ReadByteArrayOp_Int16 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOp   (Just (mo_s_16ToWord platform)) b16  res args
-  ReadByteArrayOp_Int32 -> \args -> opAllDone $ \res -> do
+  ReadByteArrayOp_Int32 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOp   (Just (mo_s_32ToWord platform)) b32  res args
-  ReadByteArrayOp_Int64 -> \args -> opAllDone $ \res -> do
+  ReadByteArrayOp_Int64 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOp   Nothing b64  res args
-  ReadByteArrayOp_Word8 -> \args -> opAllDone $ \res -> do
+  ReadByteArrayOp_Word8 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOp   (Just (mo_u_8ToWord platform)) b8  res args
-  ReadByteArrayOp_Word16 -> \args -> opAllDone $ \res -> do
+  ReadByteArrayOp_Word16 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOp   (Just (mo_u_16ToWord platform)) b16  res args
-  ReadByteArrayOp_Word32 -> \args -> opAllDone $ \res -> do
+  ReadByteArrayOp_Word32 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOp   (Just (mo_u_32ToWord platform)) b32  res args
-  ReadByteArrayOp_Word64 -> \args -> opAllDone $ \res -> do
+  ReadByteArrayOp_Word64 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOp   Nothing b64  res args
 
 -- IndexWord8ArrayAsXXX
 
-  IndexByteArrayOp_Word8AsChar -> \args -> opAllDone $ \res -> do
+  IndexByteArrayOp_Word8AsChar -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOpAs   (Just (mo_u_8ToWord platform)) b8 b8 res args
-  IndexByteArrayOp_Word8AsWideChar -> \args -> opAllDone $ \res -> do
+  IndexByteArrayOp_Word8AsWideChar -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOpAs   (Just (mo_u_32ToWord platform)) b32 b8 res args
-  IndexByteArrayOp_Word8AsInt -> \args -> opAllDone $ \res -> do
+  IndexByteArrayOp_Word8AsInt -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOpAs   Nothing (bWord platform) b8 res args
-  IndexByteArrayOp_Word8AsWord -> \args -> opAllDone $ \res -> do
+  IndexByteArrayOp_Word8AsWord -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOpAs   Nothing (bWord platform) b8 res args
-  IndexByteArrayOp_Word8AsAddr -> \args -> opAllDone $ \res -> do
+  IndexByteArrayOp_Word8AsAddr -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOpAs   Nothing (bWord platform) b8 res args
-  IndexByteArrayOp_Word8AsFloat -> \args -> opAllDone $ \res -> do
+  IndexByteArrayOp_Word8AsFloat -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOpAs   Nothing f32 b8 res args
-  IndexByteArrayOp_Word8AsDouble -> \args -> opAllDone $ \res -> do
+  IndexByteArrayOp_Word8AsDouble -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOpAs   Nothing f64 b8 res args
-  IndexByteArrayOp_Word8AsStablePtr -> \args -> opAllDone $ \res -> do
+  IndexByteArrayOp_Word8AsStablePtr -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOpAs   Nothing (bWord platform) b8 res args
-  IndexByteArrayOp_Word8AsInt16 -> \args -> opAllDone $ \res -> do
+  IndexByteArrayOp_Word8AsInt16 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOpAs   (Just (mo_s_16ToWord platform)) b16 b8 res args
-  IndexByteArrayOp_Word8AsInt32 -> \args -> opAllDone $ \res -> do
+  IndexByteArrayOp_Word8AsInt32 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOpAs   (Just (mo_s_32ToWord platform)) b32 b8 res args
-  IndexByteArrayOp_Word8AsInt64 -> \args -> opAllDone $ \res -> do
+  IndexByteArrayOp_Word8AsInt64 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOpAs   Nothing b64 b8 res args
-  IndexByteArrayOp_Word8AsWord16 -> \args -> opAllDone $ \res -> do
+  IndexByteArrayOp_Word8AsWord16 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOpAs   (Just (mo_u_16ToWord platform)) b16 b8 res args
-  IndexByteArrayOp_Word8AsWord32 -> \args -> opAllDone $ \res -> do
+  IndexByteArrayOp_Word8AsWord32 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOpAs   (Just (mo_u_32ToWord platform)) b32 b8 res args
-  IndexByteArrayOp_Word8AsWord64 -> \args -> opAllDone $ \res -> do
+  IndexByteArrayOp_Word8AsWord64 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOpAs   Nothing b64 b8 res args
 
 -- ReadInt8ArrayAsXXX, identical to IndexInt8ArrayAsXXX
 
-  ReadByteArrayOp_Word8AsChar -> \args -> opAllDone $ \res -> do
+  ReadByteArrayOp_Word8AsChar -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOpAs   (Just (mo_u_8ToWord platform)) b8 b8 res args
-  ReadByteArrayOp_Word8AsWideChar -> \args -> opAllDone $ \res -> do
+  ReadByteArrayOp_Word8AsWideChar -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOpAs   (Just (mo_u_32ToWord platform)) b32 b8 res args
-  ReadByteArrayOp_Word8AsInt -> \args -> opAllDone $ \res -> do
+  ReadByteArrayOp_Word8AsInt -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOpAs   Nothing (bWord platform) b8 res args
-  ReadByteArrayOp_Word8AsWord -> \args -> opAllDone $ \res -> do
+  ReadByteArrayOp_Word8AsWord -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOpAs   Nothing (bWord platform) b8 res args
-  ReadByteArrayOp_Word8AsAddr -> \args -> opAllDone $ \res -> do
+  ReadByteArrayOp_Word8AsAddr -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOpAs   Nothing (bWord platform) b8 res args
-  ReadByteArrayOp_Word8AsFloat -> \args -> opAllDone $ \res -> do
+  ReadByteArrayOp_Word8AsFloat -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOpAs   Nothing f32 b8 res args
-  ReadByteArrayOp_Word8AsDouble -> \args -> opAllDone $ \res -> do
+  ReadByteArrayOp_Word8AsDouble -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOpAs   Nothing f64 b8 res args
-  ReadByteArrayOp_Word8AsStablePtr -> \args -> opAllDone $ \res -> do
+  ReadByteArrayOp_Word8AsStablePtr -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOpAs   Nothing (bWord platform) b8 res args
-  ReadByteArrayOp_Word8AsInt16 -> \args -> opAllDone $ \res -> do
+  ReadByteArrayOp_Word8AsInt16 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOpAs   (Just (mo_s_16ToWord platform)) b16 b8 res args
-  ReadByteArrayOp_Word8AsInt32 -> \args -> opAllDone $ \res -> do
+  ReadByteArrayOp_Word8AsInt32 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOpAs   (Just (mo_s_32ToWord platform)) b32 b8 res args
-  ReadByteArrayOp_Word8AsInt64 -> \args -> opAllDone $ \res -> do
+  ReadByteArrayOp_Word8AsInt64 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOpAs   Nothing b64 b8 res args
-  ReadByteArrayOp_Word8AsWord16 -> \args -> opAllDone $ \res -> do
+  ReadByteArrayOp_Word8AsWord16 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOpAs   (Just (mo_u_16ToWord platform)) b16 b8 res args
-  ReadByteArrayOp_Word8AsWord32 -> \args -> opAllDone $ \res -> do
+  ReadByteArrayOp_Word8AsWord32 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOpAs   (Just (mo_u_32ToWord platform)) b32 b8 res args
-  ReadByteArrayOp_Word8AsWord64 -> \args -> opAllDone $ \res -> do
+  ReadByteArrayOp_Word8AsWord64 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doIndexByteArrayOpAs   Nothing b64 b8 res args
 
 -- WriteXXXoffAddr
 
-  WriteOffAddrOp_Char -> \args -> opAllDone $ \res -> do
+  WriteOffAddrOp_Char -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteOffAddrOp (Just (mo_WordTo8 platform))  b8 res args
-  WriteOffAddrOp_WideChar -> \args -> opAllDone $ \res -> do
+  WriteOffAddrOp_WideChar -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteOffAddrOp (Just (mo_WordTo32 platform)) b32 res args
-  WriteOffAddrOp_Int -> \args -> opAllDone $ \res -> do
+  WriteOffAddrOp_Int -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteOffAddrOp Nothing (bWord platform) res args
-  WriteOffAddrOp_Word -> \args -> opAllDone $ \res -> do
+  WriteOffAddrOp_Word -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteOffAddrOp Nothing (bWord platform) res args
-  WriteOffAddrOp_Addr -> \args -> opAllDone $ \res -> do
+  WriteOffAddrOp_Addr -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteOffAddrOp Nothing (bWord platform) res args
-  WriteOffAddrOp_Float -> \args -> opAllDone $ \res -> do
+  WriteOffAddrOp_Float -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteOffAddrOp Nothing f32 res args
-  WriteOffAddrOp_Double -> \args -> opAllDone $ \res -> do
+  WriteOffAddrOp_Double -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteOffAddrOp Nothing f64 res args
-  WriteOffAddrOp_StablePtr -> \args -> opAllDone $ \res -> do
+  WriteOffAddrOp_StablePtr -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteOffAddrOp Nothing (bWord platform) res args
-  WriteOffAddrOp_Int8 -> \args -> opAllDone $ \res -> do
+  WriteOffAddrOp_Int8 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteOffAddrOp (Just (mo_WordTo8 platform))  b8 res args
-  WriteOffAddrOp_Int16 -> \args -> opAllDone $ \res -> do
+  WriteOffAddrOp_Int16 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteOffAddrOp (Just (mo_WordTo16 platform)) b16 res args
-  WriteOffAddrOp_Int32 -> \args -> opAllDone $ \res -> do
+  WriteOffAddrOp_Int32 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteOffAddrOp (Just (mo_WordTo32 platform)) b32 res args
-  WriteOffAddrOp_Int64 -> \args -> opAllDone $ \res -> do
+  WriteOffAddrOp_Int64 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteOffAddrOp Nothing b64 res args
-  WriteOffAddrOp_Word8 -> \args -> opAllDone $ \res -> do
+  WriteOffAddrOp_Word8 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteOffAddrOp (Just (mo_WordTo8 platform))  b8 res args
-  WriteOffAddrOp_Word16 -> \args -> opAllDone $ \res -> do
+  WriteOffAddrOp_Word16 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteOffAddrOp (Just (mo_WordTo16 platform)) b16 res args
-  WriteOffAddrOp_Word32 -> \args -> opAllDone $ \res -> do
+  WriteOffAddrOp_Word32 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteOffAddrOp (Just (mo_WordTo32 platform)) b32 res args
-  WriteOffAddrOp_Word64 -> \args -> opAllDone $ \res -> do
+  WriteOffAddrOp_Word64 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteOffAddrOp Nothing b64 res args
 
 -- WriteXXXArray
 
-  WriteByteArrayOp_Char -> \args -> opAllDone $ \res -> do
+  WriteByteArrayOp_Char -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteByteArrayOp (Just (mo_WordTo8 platform))  b8 res args
-  WriteByteArrayOp_WideChar -> \args -> opAllDone $ \res -> do
+  WriteByteArrayOp_WideChar -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteByteArrayOp (Just (mo_WordTo32 platform)) b32 res args
-  WriteByteArrayOp_Int -> \args -> opAllDone $ \res -> do
+  WriteByteArrayOp_Int -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteByteArrayOp Nothing (bWord platform) res args
-  WriteByteArrayOp_Word -> \args -> opAllDone $ \res -> do
+  WriteByteArrayOp_Word -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteByteArrayOp Nothing (bWord platform) res args
-  WriteByteArrayOp_Addr -> \args -> opAllDone $ \res -> do
+  WriteByteArrayOp_Addr -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteByteArrayOp Nothing (bWord platform) res args
-  WriteByteArrayOp_Float -> \args -> opAllDone $ \res -> do
+  WriteByteArrayOp_Float -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteByteArrayOp Nothing f32 res args
-  WriteByteArrayOp_Double -> \args -> opAllDone $ \res -> do
+  WriteByteArrayOp_Double -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteByteArrayOp Nothing f64 res args
-  WriteByteArrayOp_StablePtr -> \args -> opAllDone $ \res -> do
+  WriteByteArrayOp_StablePtr -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteByteArrayOp Nothing (bWord platform) res args
-  WriteByteArrayOp_Int8 -> \args -> opAllDone $ \res -> do
+  WriteByteArrayOp_Int8 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteByteArrayOp (Just (mo_WordTo8 platform))  b8 res args
-  WriteByteArrayOp_Int16 -> \args -> opAllDone $ \res -> do
+  WriteByteArrayOp_Int16 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteByteArrayOp (Just (mo_WordTo16 platform)) b16 res args
-  WriteByteArrayOp_Int32 -> \args -> opAllDone $ \res -> do
+  WriteByteArrayOp_Int32 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteByteArrayOp (Just (mo_WordTo32 platform)) b32 res args
-  WriteByteArrayOp_Int64 -> \args -> opAllDone $ \res -> do
+  WriteByteArrayOp_Int64 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteByteArrayOp Nothing b64 res args
-  WriteByteArrayOp_Word8 -> \args -> opAllDone $ \res -> do
+  WriteByteArrayOp_Word8 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteByteArrayOp (Just (mo_WordTo8 platform))  b8  res args
-  WriteByteArrayOp_Word16 -> \args -> opAllDone $ \res -> do
+  WriteByteArrayOp_Word16 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteByteArrayOp (Just (mo_WordTo16 platform)) b16 res args
-  WriteByteArrayOp_Word32 -> \args -> opAllDone $ \res -> do
+  WriteByteArrayOp_Word32 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteByteArrayOp (Just (mo_WordTo32 platform)) b32 res args
-  WriteByteArrayOp_Word64 -> \args -> opAllDone $ \res -> do
+  WriteByteArrayOp_Word64 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteByteArrayOp Nothing b64 res args
 
 -- WriteInt8ArrayAsXXX
 
-  WriteByteArrayOp_Word8AsChar -> \args -> opAllDone $ \res -> do
+  WriteByteArrayOp_Word8AsChar -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteByteArrayOp (Just (mo_WordTo8 platform))  b8 res args
-  WriteByteArrayOp_Word8AsWideChar -> \args -> opAllDone $ \res -> do
+  WriteByteArrayOp_Word8AsWideChar -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteByteArrayOp (Just (mo_WordTo32 platform)) b8 res args
-  WriteByteArrayOp_Word8AsInt -> \args -> opAllDone $ \res -> do
+  WriteByteArrayOp_Word8AsInt -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteByteArrayOp Nothing b8 res args
-  WriteByteArrayOp_Word8AsWord -> \args -> opAllDone $ \res -> do
+  WriteByteArrayOp_Word8AsWord -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteByteArrayOp Nothing b8 res args
-  WriteByteArrayOp_Word8AsAddr -> \args -> opAllDone $ \res -> do
+  WriteByteArrayOp_Word8AsAddr -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteByteArrayOp Nothing b8 res args
-  WriteByteArrayOp_Word8AsFloat -> \args -> opAllDone $ \res -> do
+  WriteByteArrayOp_Word8AsFloat -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteByteArrayOp Nothing b8 res args
-  WriteByteArrayOp_Word8AsDouble -> \args -> opAllDone $ \res -> do
+  WriteByteArrayOp_Word8AsDouble -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteByteArrayOp Nothing b8 res args
-  WriteByteArrayOp_Word8AsStablePtr -> \args -> opAllDone $ \res -> do
+  WriteByteArrayOp_Word8AsStablePtr -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteByteArrayOp Nothing b8 res args
-  WriteByteArrayOp_Word8AsInt16 -> \args -> opAllDone $ \res -> do
+  WriteByteArrayOp_Word8AsInt16 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteByteArrayOp (Just (mo_WordTo16 platform)) b8 res args
-  WriteByteArrayOp_Word8AsInt32 -> \args -> opAllDone $ \res -> do
+  WriteByteArrayOp_Word8AsInt32 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteByteArrayOp (Just (mo_WordTo32 platform)) b8 res args
-  WriteByteArrayOp_Word8AsInt64 -> \args -> opAllDone $ \res -> do
+  WriteByteArrayOp_Word8AsInt64 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteByteArrayOp Nothing b8 res args
-  WriteByteArrayOp_Word8AsWord16 -> \args -> opAllDone $ \res -> do
+  WriteByteArrayOp_Word8AsWord16 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteByteArrayOp (Just (mo_WordTo16 platform)) b8 res args
-  WriteByteArrayOp_Word8AsWord32 -> \args -> opAllDone $ \res -> do
+  WriteByteArrayOp_Word8AsWord32 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteByteArrayOp (Just (mo_WordTo32 platform)) b8 res args
-  WriteByteArrayOp_Word8AsWord64 -> \args -> opAllDone $ \res -> do
+  WriteByteArrayOp_Word8AsWord64 -> \args -> PrimopCmmEmit_IntoRegs $ \res -> do
     doWriteByteArrayOp Nothing b8 res args
 
 -- Copying and setting byte arrays
-  CopyByteArrayOp -> \[src,src_off,dst,dst_off,n] -> opAllDone $ \[] -> do
+  CopyByteArrayOp -> \[src,src_off,dst,dst_off,n] -> PrimopCmmEmit_IntoRegs $ \[] -> do
     doCopyByteArrayOp src src_off dst dst_off n
-  CopyMutableByteArrayOp -> \[src,src_off,dst,dst_off,n] -> opAllDone $ \[] -> do
+  CopyMutableByteArrayOp -> \[src,src_off,dst,dst_off,n] -> PrimopCmmEmit_IntoRegs $ \[] -> do
     doCopyMutableByteArrayOp src src_off dst dst_off n
-  CopyByteArrayToAddrOp -> \[src,src_off,dst,n] -> opAllDone $ \[] -> do
+  CopyByteArrayToAddrOp -> \[src,src_off,dst,n] -> PrimopCmmEmit_IntoRegs $ \[] -> do
     doCopyByteArrayToAddrOp src src_off dst n
-  CopyMutableByteArrayToAddrOp -> \[src,src_off,dst,n] -> opAllDone $ \[] -> do
+  CopyMutableByteArrayToAddrOp -> \[src,src_off,dst,n] -> PrimopCmmEmit_IntoRegs $ \[] -> do
     doCopyMutableByteArrayToAddrOp src src_off dst n
-  CopyAddrToByteArrayOp -> \[src,dst,dst_off,n] -> opAllDone $ \[] -> do
+  CopyAddrToByteArrayOp -> \[src,dst,dst_off,n] -> PrimopCmmEmit_IntoRegs $ \[] -> do
     doCopyAddrToByteArrayOp src dst dst_off n
-  SetByteArrayOp -> \[ba,off,len,c] -> opAllDone $ \[] -> do
+  SetByteArrayOp -> \[ba,off,len,c] -> PrimopCmmEmit_IntoRegs $ \[] -> do
     doSetByteArrayOp ba off len c
 
 -- Comparing byte arrays
-  CompareByteArraysOp -> \[ba1,ba1_off,ba2,ba2_off,n] -> opAllDone $ \[res] -> do
+  CompareByteArraysOp -> \[ba1,ba1_off,ba2,ba2_off,n] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     doCompareByteArraysOp res ba1 ba1_off ba2 ba2_off n
 
-  BSwap16Op -> \[w] -> opAllDone $ \[res] -> do
+  BSwap16Op -> \[w] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitBSwapCall res w W16
-  BSwap32Op -> \[w] -> opAllDone $ \[res] -> do
+  BSwap32Op -> \[w] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitBSwapCall res w W32
-  BSwap64Op -> \[w] -> opAllDone $ \[res] -> do
+  BSwap64Op -> \[w] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitBSwapCall res w W64
-  BSwapOp -> \[w] -> opAllDone $ \[res] -> do
+  BSwapOp -> \[w] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitBSwapCall res w (wordWidth platform)
 
-  BRev8Op -> \[w] -> opAllDone $ \[res] -> do
+  BRev8Op -> \[w] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitBRevCall res w W8
-  BRev16Op -> \[w] -> opAllDone $ \[res] -> do
+  BRev16Op -> \[w] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitBRevCall res w W16
-  BRev32Op -> \[w] -> opAllDone $ \[res] -> do
+  BRev32Op -> \[w] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitBRevCall res w W32
-  BRev64Op -> \[w] -> opAllDone $ \[res] -> do
+  BRev64Op -> \[w] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitBRevCall res w W64
-  BRevOp -> \[w] -> opAllDone $ \[res] -> do
+  BRevOp -> \[w] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitBRevCall res w (wordWidth platform)
 
 -- Population count
-  PopCnt8Op -> \[w] -> opAllDone $ \[res] -> do
+  PopCnt8Op -> \[w] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitPopCntCall res w W8
-  PopCnt16Op -> \[w] -> opAllDone $ \[res] -> do
+  PopCnt16Op -> \[w] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitPopCntCall res w W16
-  PopCnt32Op -> \[w] -> opAllDone $ \[res] -> do
+  PopCnt32Op -> \[w] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitPopCntCall res w W32
-  PopCnt64Op -> \[w] -> opAllDone $ \[res] -> do
+  PopCnt64Op -> \[w] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitPopCntCall res w W64
-  PopCntOp -> \[w] -> opAllDone $ \[res] -> do
+  PopCntOp -> \[w] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitPopCntCall res w (wordWidth platform)
 
 -- Parallel bit deposit
-  Pdep8Op -> \[src, mask] -> opAllDone $ \[res] -> do
+  Pdep8Op -> \[src, mask] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitPdepCall res src mask W8
-  Pdep16Op -> \[src, mask] -> opAllDone $ \[res] -> do
+  Pdep16Op -> \[src, mask] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitPdepCall res src mask W16
-  Pdep32Op -> \[src, mask] -> opAllDone $ \[res] -> do
+  Pdep32Op -> \[src, mask] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitPdepCall res src mask W32
-  Pdep64Op -> \[src, mask] -> opAllDone $ \[res] -> do
+  Pdep64Op -> \[src, mask] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitPdepCall res src mask W64
-  PdepOp -> \[src, mask] -> opAllDone $ \[res] -> do
+  PdepOp -> \[src, mask] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitPdepCall res src mask (wordWidth platform)
 
 -- Parallel bit extract
-  Pext8Op -> \[src, mask] -> opAllDone $ \[res] -> do
+  Pext8Op -> \[src, mask] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitPextCall res src mask W8
-  Pext16Op -> \[src, mask] -> opAllDone $ \[res] -> do
+  Pext16Op -> \[src, mask] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitPextCall res src mask W16
-  Pext32Op -> \[src, mask] -> opAllDone $ \[res] -> do
+  Pext32Op -> \[src, mask] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitPextCall res src mask W32
-  Pext64Op -> \[src, mask] -> opAllDone $ \[res] -> do
+  Pext64Op -> \[src, mask] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitPextCall res src mask W64
-  PextOp -> \[src, mask] -> opAllDone $ \[res] -> do
+  PextOp -> \[src, mask] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitPextCall res src mask (wordWidth platform)
 
 -- count leading zeros
-  Clz8Op -> \[w] -> opAllDone $ \[res] -> do
+  Clz8Op -> \[w] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitClzCall res w W8
-  Clz16Op -> \[w] -> opAllDone $ \[res] -> do
+  Clz16Op -> \[w] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitClzCall res w W16
-  Clz32Op -> \[w] -> opAllDone $ \[res] -> do
+  Clz32Op -> \[w] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitClzCall res w W32
-  Clz64Op -> \[w] -> opAllDone $ \[res] -> do
+  Clz64Op -> \[w] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitClzCall res w W64
-  ClzOp -> \[w] -> opAllDone $ \[res] -> do
+  ClzOp -> \[w] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitClzCall res w (wordWidth platform)
 
 -- count trailing zeros
-  Ctz8Op -> \[w] -> opAllDone $ \[res] -> do
+  Ctz8Op -> \[w] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitCtzCall res w W8
-  Ctz16Op -> \[w] -> opAllDone $ \[res] -> do
+  Ctz16Op -> \[w] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitCtzCall res w W16
-  Ctz32Op -> \[w] -> opAllDone $ \[res] -> do
+  Ctz32Op -> \[w] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitCtzCall res w W32
-  Ctz64Op -> \[w] -> opAllDone $ \[res] -> do
+  Ctz64Op -> \[w] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitCtzCall res w W64
-  CtzOp -> \[w] -> opAllDone $ \[res] -> do
+  CtzOp -> \[w] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitCtzCall res w (wordWidth platform)
 
 -- Unsigned int to floating point conversions
-  Word2FloatOp -> \[w] -> opAllDone $ \[res] -> do
+  Word2FloatOp -> \[w] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitPrimCall [res] (MO_UF_Conv W32) [w]
-  Word2DoubleOp -> \[w] -> opAllDone $ \[res] -> do
+  Word2DoubleOp -> \[w] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     emitPrimCall [res] (MO_UF_Conv W64) [w]
 
 -- SIMD primops
-  (VecBroadcastOp vcat n w) -> \[e] -> opAllDone $ \[res] -> do
+  (VecBroadcastOp vcat n w) -> \[e] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     checkVecCompatibility dflags vcat n w
     doVecPackOp (vecElemInjectCast platform vcat w) ty zeros (replicate n e) res
    where
@@ -883,7 +898,7 @@ emitPrimOp dflags = \case
     ty :: CmmType
     ty = vecVmmType vcat n w
 
-  (VecPackOp vcat n w) -> \es -> opAllDone $ \[res] -> do
+  (VecPackOp vcat n w) -> \es -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     checkVecCompatibility dflags vcat n w
     when (es `lengthIsNot` n) $
         panic "emitPrimOp: VecPackOp has wrong number of arguments"
@@ -901,7 +916,7 @@ emitPrimOp dflags = \case
     ty :: CmmType
     ty = vecVmmType vcat n w
 
-  (VecUnpackOp vcat n w) -> \[arg] -> opAllDone $ \res -> do
+  (VecUnpackOp vcat n w) -> \[arg] -> PrimopCmmEmit_IntoRegs $ \res -> do
     checkVecCompatibility dflags vcat n w
     when (res `lengthIsNot` n) $
         panic "emitPrimOp: VecUnpackOp has wrong number of results"
@@ -910,56 +925,56 @@ emitPrimOp dflags = \case
     ty :: CmmType
     ty = vecVmmType vcat n w
 
-  (VecInsertOp vcat n w) -> \[v,e,i] -> opAllDone $ \[res] -> do
+  (VecInsertOp vcat n w) -> \[v,e,i] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     checkVecCompatibility dflags vcat n w
     doVecInsertOp (vecElemInjectCast platform vcat w) ty v e i res
    where
     ty :: CmmType
     ty = vecVmmType vcat n w
 
-  (VecIndexByteArrayOp vcat n w) -> \args -> opAllDone $ \res0 -> do
+  (VecIndexByteArrayOp vcat n w) -> \args -> PrimopCmmEmit_IntoRegs $ \res0 -> do
     checkVecCompatibility dflags vcat n w
     doIndexByteArrayOp Nothing ty res0 args
    where
     ty :: CmmType
     ty = vecVmmType vcat n w
 
-  (VecReadByteArrayOp vcat n w) -> \args -> opAllDone $ \res0 -> do
+  (VecReadByteArrayOp vcat n w) -> \args -> PrimopCmmEmit_IntoRegs $ \res0 -> do
     checkVecCompatibility dflags vcat n w
     doIndexByteArrayOp Nothing ty res0 args
    where
     ty :: CmmType
     ty = vecVmmType vcat n w
 
-  (VecWriteByteArrayOp vcat n w) -> \args -> opAllDone $ \res0 -> do
+  (VecWriteByteArrayOp vcat n w) -> \args -> PrimopCmmEmit_IntoRegs $ \res0 -> do
     checkVecCompatibility dflags vcat n w
     doWriteByteArrayOp Nothing ty res0 args
    where
     ty :: CmmType
     ty = vecVmmType vcat n w
 
-  (VecIndexOffAddrOp vcat n w) -> \args -> opAllDone $ \res0 -> do
+  (VecIndexOffAddrOp vcat n w) -> \args -> PrimopCmmEmit_IntoRegs $ \res0 -> do
     checkVecCompatibility dflags vcat n w
     doIndexOffAddrOp Nothing ty res0 args
    where
     ty :: CmmType
     ty = vecVmmType vcat n w
 
-  (VecReadOffAddrOp vcat n w) -> \args -> opAllDone $ \res0 -> do
+  (VecReadOffAddrOp vcat n w) -> \args -> PrimopCmmEmit_IntoRegs $ \res0 -> do
     checkVecCompatibility dflags vcat n w
     doIndexOffAddrOp Nothing ty res0 args
    where
     ty :: CmmType
     ty = vecVmmType vcat n w
 
-  (VecWriteOffAddrOp vcat n w) -> \args -> opAllDone $ \res0 -> do
+  (VecWriteOffAddrOp vcat n w) -> \args -> PrimopCmmEmit_IntoRegs $ \res0 -> do
     checkVecCompatibility dflags vcat n w
     doWriteOffAddrOp Nothing ty res0 args
    where
     ty :: CmmType
     ty = vecVmmType vcat n w
 
-  (VecIndexScalarByteArrayOp vcat n w) -> \args -> opAllDone $ \res0 -> do
+  (VecIndexScalarByteArrayOp vcat n w) -> \args -> PrimopCmmEmit_IntoRegs $ \res0 -> do
     checkVecCompatibility dflags vcat n w
     doIndexByteArrayOpAs Nothing vecty ty res0 args
    where
@@ -969,7 +984,7 @@ emitPrimOp dflags = \case
     ty :: CmmType
     ty = vecCmmCat vcat w
 
-  (VecReadScalarByteArrayOp vcat n w) -> \args -> opAllDone $ \res0 -> do
+  (VecReadScalarByteArrayOp vcat n w) -> \args -> PrimopCmmEmit_IntoRegs $ \res0 -> do
     checkVecCompatibility dflags vcat n w
     doIndexByteArrayOpAs Nothing vecty ty res0 args
    where
@@ -979,14 +994,14 @@ emitPrimOp dflags = \case
     ty :: CmmType
     ty = vecCmmCat vcat w
 
-  (VecWriteScalarByteArrayOp vcat n w) -> \args -> opAllDone $ \res0 -> do
+  (VecWriteScalarByteArrayOp vcat n w) -> \args -> PrimopCmmEmit_IntoRegs $ \res0 -> do
     checkVecCompatibility dflags vcat n w
     doWriteByteArrayOp Nothing ty res0 args
    where
     ty :: CmmType
     ty = vecCmmCat vcat w
 
-  (VecIndexScalarOffAddrOp vcat n w) -> \args -> opAllDone $ \res0 -> do
+  (VecIndexScalarOffAddrOp vcat n w) -> \args -> PrimopCmmEmit_IntoRegs $ \res0 -> do
     checkVecCompatibility dflags vcat n w
     doIndexOffAddrOpAs Nothing vecty ty res0 args
    where
@@ -996,7 +1011,7 @@ emitPrimOp dflags = \case
     ty :: CmmType
     ty = vecCmmCat vcat w
 
-  (VecReadScalarOffAddrOp vcat n w) -> \args -> opAllDone $ \res0 -> do
+  (VecReadScalarOffAddrOp vcat n w) -> \args -> PrimopCmmEmit_IntoRegs $ \res0 -> do
     checkVecCompatibility dflags vcat n w
     doIndexOffAddrOpAs Nothing vecty ty res0 args
    where
@@ -1006,7 +1021,7 @@ emitPrimOp dflags = \case
     ty :: CmmType
     ty = vecCmmCat vcat w
 
-  (VecWriteScalarOffAddrOp vcat n w) -> \args -> opAllDone $ \res0 -> do
+  (VecWriteScalarOffAddrOp vcat n w) -> \args -> PrimopCmmEmit_IntoRegs $ \res0 -> do
     checkVecCompatibility dflags vcat n w
     doWriteOffAddrOp Nothing ty res0 args
    where
@@ -1014,59 +1029,59 @@ emitPrimOp dflags = \case
     ty = vecCmmCat vcat w
 
 -- Prefetch
-  PrefetchByteArrayOp3         -> \args -> opAllDone $ \[] -> do
+  PrefetchByteArrayOp3         -> \args -> PrimopCmmEmit_IntoRegs $ \[] -> do
     doPrefetchByteArrayOp 3  args
-  PrefetchMutableByteArrayOp3  -> \args -> opAllDone $ \[] -> do
+  PrefetchMutableByteArrayOp3  -> \args -> PrimopCmmEmit_IntoRegs $ \[] -> do
     doPrefetchMutableByteArrayOp 3  args
-  PrefetchAddrOp3              -> \args -> opAllDone $ \[] -> do
+  PrefetchAddrOp3              -> \args -> PrimopCmmEmit_IntoRegs $ \[] -> do
     doPrefetchAddrOp  3  args
-  PrefetchValueOp3             -> \args -> opAllDone $ \[] -> do
+  PrefetchValueOp3             -> \args -> PrimopCmmEmit_IntoRegs $ \[] -> do
     doPrefetchValueOp 3 args
 
-  PrefetchByteArrayOp2         -> \args -> opAllDone $ \[] -> do
+  PrefetchByteArrayOp2         -> \args -> PrimopCmmEmit_IntoRegs $ \[] -> do
     doPrefetchByteArrayOp 2  args
-  PrefetchMutableByteArrayOp2  -> \args -> opAllDone $ \[] -> do
+  PrefetchMutableByteArrayOp2  -> \args -> PrimopCmmEmit_IntoRegs $ \[] -> do
     doPrefetchMutableByteArrayOp 2  args
-  PrefetchAddrOp2              -> \args -> opAllDone $ \[] -> do
+  PrefetchAddrOp2              -> \args -> PrimopCmmEmit_IntoRegs $ \[] -> do
     doPrefetchAddrOp 2  args
-  PrefetchValueOp2             -> \args -> opAllDone $ \[] -> do
+  PrefetchValueOp2             -> \args -> PrimopCmmEmit_IntoRegs $ \[] -> do
     doPrefetchValueOp 2 args
-  PrefetchByteArrayOp1         -> \args -> opAllDone $ \[] -> do
+  PrefetchByteArrayOp1         -> \args -> PrimopCmmEmit_IntoRegs $ \[] -> do
     doPrefetchByteArrayOp 1  args
-  PrefetchMutableByteArrayOp1  -> \args -> opAllDone $ \[] -> do
+  PrefetchMutableByteArrayOp1  -> \args -> PrimopCmmEmit_IntoRegs $ \[] -> do
     doPrefetchMutableByteArrayOp 1  args
-  PrefetchAddrOp1              -> \args -> opAllDone $ \[] -> do
+  PrefetchAddrOp1              -> \args -> PrimopCmmEmit_IntoRegs $ \[] -> do
     doPrefetchAddrOp 1  args
-  PrefetchValueOp1             -> \args -> opAllDone $ \[] -> do
+  PrefetchValueOp1             -> \args -> PrimopCmmEmit_IntoRegs $ \[] -> do
     doPrefetchValueOp 1 args
 
-  PrefetchByteArrayOp0         -> \args -> opAllDone $ \[] -> do
+  PrefetchByteArrayOp0         -> \args -> PrimopCmmEmit_IntoRegs $ \[] -> do
     doPrefetchByteArrayOp 0  args
-  PrefetchMutableByteArrayOp0  -> \args -> opAllDone $ \[] -> do
+  PrefetchMutableByteArrayOp0  -> \args -> PrimopCmmEmit_IntoRegs $ \[] -> do
     doPrefetchMutableByteArrayOp 0  args
-  PrefetchAddrOp0              -> \args -> opAllDone $ \[] -> do
+  PrefetchAddrOp0              -> \args -> PrimopCmmEmit_IntoRegs $ \[] -> do
     doPrefetchAddrOp 0  args
-  PrefetchValueOp0             -> \args -> opAllDone $ \[] -> do
+  PrefetchValueOp0             -> \args -> PrimopCmmEmit_IntoRegs $ \[] -> do
     doPrefetchValueOp 0 args
 
 -- Atomic read-modify-write
-  FetchAddByteArrayOp_Int -> \[mba, ix, n] -> opAllDone $ \[res] -> do
+  FetchAddByteArrayOp_Int -> \[mba, ix, n] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     doAtomicRMW res AMO_Add mba ix (bWord platform) n
-  FetchSubByteArrayOp_Int -> \[mba, ix, n] -> opAllDone $ \[res] -> do
+  FetchSubByteArrayOp_Int -> \[mba, ix, n] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     doAtomicRMW res AMO_Sub mba ix (bWord platform) n
-  FetchAndByteArrayOp_Int -> \[mba, ix, n] -> opAllDone $ \[res] -> do
+  FetchAndByteArrayOp_Int -> \[mba, ix, n] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     doAtomicRMW res AMO_And mba ix (bWord platform) n
-  FetchNandByteArrayOp_Int -> \[mba, ix, n] -> opAllDone $ \[res] -> do
+  FetchNandByteArrayOp_Int -> \[mba, ix, n] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     doAtomicRMW res AMO_Nand mba ix (bWord platform) n
-  FetchOrByteArrayOp_Int -> \[mba, ix, n] -> opAllDone $ \[res] -> do
+  FetchOrByteArrayOp_Int -> \[mba, ix, n] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     doAtomicRMW res AMO_Or mba ix (bWord platform) n
-  FetchXorByteArrayOp_Int -> \[mba, ix, n] -> opAllDone $ \[res] -> do
+  FetchXorByteArrayOp_Int -> \[mba, ix, n] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     doAtomicRMW res AMO_Xor mba ix (bWord platform) n
-  AtomicReadByteArrayOp_Int -> \[mba, ix] -> opAllDone $ \[res] -> do
+  AtomicReadByteArrayOp_Int -> \[mba, ix] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     doAtomicReadByteArray res mba ix (bWord platform)
-  AtomicWriteByteArrayOp_Int -> \[mba, ix, val] -> opAllDone $ \[] -> do
+  AtomicWriteByteArrayOp_Int -> \[mba, ix, val] -> PrimopCmmEmit_IntoRegs $ \[] -> do
     doAtomicWriteByteArray mba ix (bWord platform) val
-  CasByteArrayOp_Int -> \[mba, ix, old, new] -> opAllDone $ \[res] -> do
+  CasByteArrayOp_Int -> \[mba, ix, old, new] -> PrimopCmmEmit_IntoRegs $ \[res] -> do
     doCasByteArray res mba ix (bWord platform) old new
 
 -- The rest just translate straightforwardly
@@ -1613,12 +1628,6 @@ opCallishHandledLater args callOrNot = PrimopCmmEmit_IntoRegs $ \res0 -> case ca
   Left op   -> emit $ mkUnsafeCall (PrimTarget op) res0 args
   Right gen -> gen res0 args
 
-opAllDone
-  :: ([LocalReg] -- where to put the results
-      -> FCode ())
-  -> PrimopCmmEmit
-opAllDone f = PrimopCmmEmit_IntoRegs $ f
-
 type GenericOp = [CmmFormal] -> [CmmActual] -> FCode ()
 
 genericIntQuotRemOp :: Width -> GenericOp



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/1263758be90f0a51c5f0d2e9ec6dfccb02419b5a...6b3daca3b85a5e9c1c3c8257f89aa703ec363400

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/1263758be90f0a51c5f0d2e9ec6dfccb02419b5a...6b3daca3b85a5e9c1c3c8257f89aa703ec363400
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20200622/b150cd53/attachment-0001.html>


More information about the ghc-commits mailing list