[Git][ghc/ghc][wip/ncg-simd] keep track of GlobalRegUse for register allocation

sheaf (@sheaf) gitlab at gitlab.haskell.org
Fri Jun 21 12:18:37 UTC 2024



sheaf pushed to branch wip/ncg-simd at Glasgow Haskell Compiler / GHC


Commits:
7d901da2 by sheaf at 2024-06-21T14:18:14+02:00
keep track of GlobalRegUse for register allocation

- - - - -


24 changed files:

- compiler/GHC/Cmm.hs
- compiler/GHC/Cmm/Graph.hs
- compiler/GHC/Cmm/Liveness.hs
- compiler/GHC/Cmm/Node.hs
- compiler/GHC/Cmm/Parser.y
- compiler/GHC/Cmm/ProcPoint.hs
- compiler/GHC/Cmm/Sink.hs
- compiler/GHC/CmmToAsm/AArch64.hs
- compiler/GHC/CmmToAsm/Instr.hs
- compiler/GHC/CmmToAsm/PPC.hs
- compiler/GHC/CmmToAsm/PPC/CodeGen.hs
- compiler/GHC/CmmToAsm/PPC/Instr.hs
- compiler/GHC/CmmToAsm/Reg/Graph/Spill.hs
- compiler/GHC/CmmToAsm/Reg/Linear.hs
- compiler/GHC/CmmToAsm/Reg/Linear/X86.hs
- compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs
- compiler/GHC/CmmToAsm/Reg/Liveness.hs
- compiler/GHC/CmmToAsm/X86/CodeGen.hs
- compiler/GHC/CmmToAsm/X86/Instr.hs
- compiler/GHC/CmmToAsm/X86/Regs.hs
- compiler/GHC/CmmToLlvm.hs
- compiler/GHC/CmmToLlvm/CodeGen.hs
- compiler/GHC/CmmToLlvm/Ppr.hs
- compiler/GHC/StgToCmm/Monad.hs


Changes:

=====================================
compiler/GHC/Cmm.hs
=====================================
@@ -100,7 +100,7 @@ data GenCmmDecl d h g
   = CmmProc     -- A procedure
      h                 -- Extra header such as the info table
      CLabel            -- Entry label
-     [GlobalReg]       -- Registers live on entry. Note that the set of live
+     [GlobalRegUse]    -- Registers live on entry. Note that the set of live
                        -- registers will be correct in generated C-- code, but
                        -- not in hand-written C-- code. However,
                        -- splitAtProcPoints calculates correct liveness


=====================================
compiler/GHC/Cmm/Graph.hs
=====================================
@@ -208,7 +208,7 @@ mkJump profile conv e actuals updfr_off =
 
 -- | A jump where the caller says what the live GlobalRegs are.  Used
 -- for low-level hand-written Cmm.
-mkRawJump       :: Profile -> CmmExpr -> UpdFrameOffset -> [GlobalReg]
+mkRawJump       :: Profile -> CmmExpr -> UpdFrameOffset -> [GlobalRegUse]
                 -> CmmAGraph
 mkRawJump profile e updfr_off vols =
   lastWithArgs profile Jump Old NativeNodeCall [] updfr_off $
@@ -297,7 +297,7 @@ stackStubExpr w = CmmLit (CmmInt 0 w)
 copyInOflow  :: Profile -> Convention -> Area
              -> [CmmFormal]
              -> [CmmFormal]
-             -> (Int, [GlobalReg], CmmAGraph)
+             -> (Int, [GlobalRegUse], CmmAGraph)
 
 copyInOflow profile conv area formals extra_stk
   = (offset, gregs, catAGraphs $ map mkMiddle nodes)
@@ -308,9 +308,9 @@ copyInOflow profile conv area formals extra_stk
 copyIn :: Profile -> Convention -> Area
        -> [CmmFormal]
        -> [CmmFormal]
-       -> (ByteOff, [GlobalReg], [CmmNode O O])
+       -> (ByteOff, [GlobalRegUse], [CmmNode O O])
 copyIn profile conv area formals extra_stk
-  = (stk_size, [r | (_, RegisterParam r) <- args], map ci (stk_args ++ args))
+  = (stk_size, [GlobalRegUse r (localRegType lr)| (lr, RegisterParam r) <- args], map ci (stk_args ++ args))
   where
     platform = profilePlatform profile
 
@@ -365,7 +365,7 @@ data Transfer = Call | JumpRet | Jump | Ret deriving Eq
 copyOutOflow :: Profile -> Convention -> Transfer -> Area -> [CmmExpr]
              -> UpdFrameOffset
              -> [CmmExpr] -- extra stack args
-             -> (Int, [GlobalReg], CmmAGraph)
+             -> (Int, [GlobalRegUse], CmmAGraph)
 
 -- Generate code to move the actual parameters into the locations
 -- required by the calling convention.  This includes a store for the
@@ -383,8 +383,8 @@ copyOutOflow profile conv transfer area actuals updfr_off extra_stack_stuff
     (regs, graph) = foldr co ([], mkNop) (setRA ++ args ++ stack_params)
 
     co :: (CmmExpr, ParamLocation)
-       -> ([GlobalReg], CmmAGraph)
-       -> ([GlobalReg], CmmAGraph)
+       -> ([GlobalRegUse], CmmAGraph)
+       -> ([GlobalRegUse], CmmAGraph)
     co (v, RegisterParam r@(VanillaReg {})) (rs, ms) =
         let width = cmmExprWidth platform v
             value
@@ -393,12 +393,14 @@ copyOutOflow profile conv transfer area actuals updfr_off extra_stack_stuff
                 | width < wordWidth platform =
                     CmmMachOp (MO_XX_Conv width (wordWidth platform)) [v]
                 | otherwise = panic "Parameter width greater than word width"
+            ru = GlobalRegUse r (cmmExprType platform value)
 
-        in (r:rs, mkAssign (CmmGlobal $ GlobalRegUse r (cmmExprType platform value)) value <*> ms)
+        in (ru:rs, mkAssign (CmmGlobal ru) value <*> ms)
 
     -- Non VanillaRegs
     co (v, RegisterParam r) (rs, ms) =
-        (r:rs, mkAssign (CmmGlobal $ GlobalRegUse r (cmmExprType platform v)) v <*> ms)
+      let ru = GlobalRegUse r (cmmExprType platform v)
+      in (ru:rs, mkAssign (CmmGlobal ru) v <*> ms)
 
     co (v, StackParam off)  (rs, ms)
       = (rs, mkStore (CmmStackSlot area off) (value v) <*> ms)
@@ -461,13 +463,13 @@ copyOutOflow profile conv transfer area actuals updfr_off extra_stack_stuff
 
 
 mkCallEntry :: Profile -> Convention -> [CmmFormal] -> [CmmFormal]
-            -> (Int, [GlobalReg], CmmAGraph)
+            -> (Int, [GlobalRegUse], CmmAGraph)
 mkCallEntry profile conv formals extra_stk
   = copyInOflow profile conv Old formals extra_stk
 
 lastWithArgs :: Profile -> Transfer -> Area -> Convention -> [CmmExpr]
              -> UpdFrameOffset
-             -> (ByteOff -> [GlobalReg] -> CmmAGraph)
+             -> (ByteOff -> [GlobalRegUse] -> CmmAGraph)
              -> CmmAGraph
 lastWithArgs profile transfer area conv actuals updfr_off last =
   lastWithArgsAndExtraStack profile transfer area conv actuals
@@ -476,7 +478,7 @@ lastWithArgs profile transfer area conv actuals updfr_off last =
 lastWithArgsAndExtraStack :: Profile
              -> Transfer -> Area -> Convention -> [CmmExpr]
              -> UpdFrameOffset -> [CmmExpr]
-             -> (ByteOff -> [GlobalReg] -> CmmAGraph)
+             -> (ByteOff -> [GlobalRegUse] -> CmmAGraph)
              -> CmmAGraph
 lastWithArgsAndExtraStack profile transfer area conv actuals updfr_off
                           extra_stack last =
@@ -490,7 +492,7 @@ noExtraStack :: [CmmExpr]
 noExtraStack = []
 
 toCall :: CmmExpr -> Maybe BlockId -> UpdFrameOffset -> ByteOff
-       -> ByteOff -> [GlobalReg]
+       -> ByteOff -> [GlobalRegUse]
        -> CmmAGraph
 toCall e cont updfr_off res_space arg_space regs =
   mkLast $ CmmCall e cont regs arg_space res_space updfr_off


=====================================
compiler/GHC/Cmm/Liveness.hs
=====================================
@@ -59,7 +59,7 @@ cmmLocalLiveness platform graph =
     check facts =
         noLiveOnEntry entry (expectJust "check" $ mapLookup entry facts) facts
 
-cmmGlobalLiveness :: Platform -> CmmGraph -> BlockEntryLiveness GlobalReg
+cmmGlobalLiveness :: Platform -> CmmGraph -> BlockEntryLiveness GlobalRegUse
 cmmGlobalLiveness platform graph =
     analyzeCmmBwd liveLattice (xferLive platform) graph mapEmpty
 
@@ -92,7 +92,7 @@ xferLive platform (BlockCC eNode middle xNode) fBase =
         !result = foldNodesBwdOO (gen_kill platform) middle joined
     in mapSingleton (entryLabel eNode) result
 {-# SPECIALIZE xferLive :: Platform -> TransferFun (CmmLive LocalReg) #-}
-{-# SPECIALIZE xferLive :: Platform -> TransferFun (CmmLive GlobalReg) #-}
+{-# SPECIALIZE xferLive :: Platform -> TransferFun (CmmLive GlobalRegUse) #-}
 
 -----------------------------------------------------------------------------
 -- | Specialization that only retains the keys for local variables.


=====================================
compiler/GHC/Cmm/Node.hs
=====================================
@@ -118,7 +118,7 @@ data CmmNode e x where
           -- occur in CmmExprs, namely as (CmmLit (CmmBlock b)) or
           -- (CmmStackSlot (Young b) _).
 
-      cml_args_regs :: [GlobalReg],
+      cml_args_regs :: [GlobalRegUse],
           -- The argument GlobalRegs (Rx, Fx, Dx, Lx) that are passed
           -- to the call.  This is essential information for the
           -- native code generator's register allocator; without
@@ -544,7 +544,7 @@ instance UserOfRegs LocalReg (CmmNode e x) where
                => (b -> LocalReg -> b) -> b -> a -> b
           fold f z n = foldRegsUsed platform f z n
 
-instance UserOfRegs GlobalReg (CmmNode e x) where
+instance UserOfRegs GlobalRegUse (CmmNode e x) where
   {-# INLINEABLE foldRegsUsed #-}
   foldRegsUsed platform f !z n = case n of
     CmmAssign _ expr -> fold f z expr
@@ -555,8 +555,8 @@ instance UserOfRegs GlobalReg (CmmNode e x) where
     CmmCall {cml_target=tgt, cml_args_regs=args} -> fold f (fold f z args) tgt
     CmmForeignCall {tgt=tgt, args=args} -> fold f (fold f z tgt) args
     _ -> z
-    where fold :: forall a b.  UserOfRegs GlobalReg a
-               => (b -> GlobalReg -> b) -> b -> a -> b
+    where fold :: forall a b.  UserOfRegs GlobalRegUse a
+               => (b -> GlobalRegUse -> b) -> b -> a -> b
           fold f z n = foldRegsUsed platform f z n
 instance (Ord r, UserOfRegs r CmmReg) => UserOfRegs r ForeignTarget where
   -- The (Ord r) in the context is necessary here
@@ -576,7 +576,7 @@ instance DefinerOfRegs LocalReg (CmmNode e x) where
                => (b -> LocalReg -> b) -> b -> a -> b
           fold f z n = foldRegsDefd platform f z n
 
-instance DefinerOfRegs GlobalReg (CmmNode e x) where
+instance DefinerOfRegs GlobalRegUse (CmmNode e x) where
   {-# INLINEABLE foldRegsDefd #-}
   foldRegsDefd platform f !z n = case n of
     CmmAssign lhs _ -> fold f z lhs
@@ -585,12 +585,13 @@ instance DefinerOfRegs GlobalReg (CmmNode e x) where
     CmmForeignCall {} -> fold f z activeRegs
                       -- See Note [Safe foreign calls clobber STG registers]
     _ -> z
-    where fold :: forall a b. DefinerOfRegs GlobalReg a
-               => (b -> GlobalReg -> b) -> b -> a -> b
+    where fold :: forall a b. DefinerOfRegs GlobalRegUse a
+               => (b -> GlobalRegUse -> b) -> b -> a -> b
           fold f z n = foldRegsDefd platform f z n
 
-          activeRegs = activeStgRegs platform
-          activeCallerSavesRegs = filter (callerSaves platform) activeRegs
+          activeRegs :: [GlobalRegUse]
+          activeRegs = map (\ r -> GlobalRegUse r (globalRegSpillType platform r)) $ activeStgRegs platform
+          activeCallerSavesRegs = filter (callerSaves platform . globalRegUseGlobalReg) activeRegs
 
           foreignTargetRegs (ForeignTarget _ (ForeignConvention _ _ _ CmmNeverReturns)) = []
           foreignTargetRegs _ = activeCallerSavesRegs


=====================================
compiler/GHC/Cmm/Parser.y
=====================================
@@ -770,13 +770,15 @@ safety  :: { Safety }
         : {- empty -}                   { PlayRisky }
         | STRING                        {% parseSafety $1 }
 
-vols    :: { [GlobalReg] }
+vols    :: { [GlobalRegUse] }
         : '[' ']'                       { [] }
-        | '[' '*' ']'                   {% do platform <- PD.getPlatform
-                                         ; return (realArgRegsCover platform) }
-                                           -- All of them. See comment attached
-                                           -- to realArgRegsCover
-        | '[' globals ']'               { map globalRegUseGlobalReg $2 }
+        | '[' '*' ']'                   {% do platform <- PD.getPlatform;
+                                              let { gregs = realArgRegsCover platform
+                                                  ; uses = map (\gr -> GlobalRegUse gr (globalRegSpillType platform gr)) gregs };
+                                              return uses }
+                                               -- All of them. See comment attached
+                                               -- to realArgRegsCover
+        | '[' globals ']'               { $2 }
 
 globals :: { [GlobalRegUse] }
         : GLOBALREG                     { [$1] }
@@ -1374,7 +1376,7 @@ mkReturnSimple profile actuals updfr_off =
   where e = entryCode platform (cmmLoadGCWord platform (CmmStackSlot Old updfr_off))
         platform = profilePlatform profile
 
-doRawJump :: CmmParse CmmExpr -> [GlobalReg] -> CmmParse ()
+doRawJump :: CmmParse CmmExpr -> [GlobalRegUse] -> CmmParse ()
 doRawJump expr_code vols = do
   profile <- getProfile
   expr <- expr_code


=====================================
compiler/GHC/Cmm/ProcPoint.hs
=====================================
@@ -262,7 +262,7 @@ splitAtProcPoints platform entry_label callPPs procPoints procMap cmmProc = do
 
 
   let liveness = cmmGlobalLiveness platform g
-  let ppLiveness pp = filter isArgReg $ regSetToList $
+  let ppLiveness pp = filter (isArgReg . globalRegUseGlobalReg) $ regSetToList $
                         expectJust "ppLiveness" $ mapLookup pp liveness
   graphEnv <- return $ foldlGraphBlocks add_block mapEmpty g
 


=====================================
compiler/GHC/Cmm/Sink.hs
=====================================
@@ -709,7 +709,7 @@ conflicts platform (r, rhs, addr) node
 globalRegistersConflict :: Platform -> CmmExpr -> CmmNode e x -> Bool
 globalRegistersConflict platform expr node =
    -- See Note [Inlining foldRegsDefd]
-   inline foldRegsDefd platform (\b r -> b || globalRegUsedIn platform r expr)
+   inline foldRegsDefd platform (\b r -> b || globalRegUsedIn platform (globalRegUseGlobalReg r) expr)
                 False node
 
 -- Returns True if node defines any local registers that are used in the


=====================================
compiler/GHC/CmmToAsm/AArch64.hs
=====================================
@@ -44,7 +44,7 @@ ncgAArch64 config
 -- | Instruction instance for aarch64
 instance Instruction AArch64.Instr where
         regUsageOfInstr         = AArch64.regUsageOfInstr
-        patchRegsOfInstr        = AArch64.patchRegsOfInstr
+        patchRegsOfInstr _      = AArch64.patchRegsOfInstr
         isJumpishInstr          = AArch64.isJumpishInstr
         jumpDestsOfInstr        = AArch64.jumpDestsOfInstr
         canFallthroughTo        = AArch64.canFallthroughTo


=====================================
compiler/GHC/CmmToAsm/Instr.hs
=====================================
@@ -62,7 +62,9 @@ class Instruction instr where
         -- | Apply a given mapping to all the register references in this
         --      instruction.
         patchRegsOfInstr
-                :: instr
+                :: HasDebugCallStack
+                => Platform
+                -> instr
                 -> (Reg -> Reg)
                 -> instr
 


=====================================
compiler/GHC/CmmToAsm/PPC.hs
=====================================
@@ -43,7 +43,7 @@ ncgPPC config = NcgImpl
 -- | Instruction instance for powerpc
 instance Instruction PPC.Instr where
    regUsageOfInstr     = PPC.regUsageOfInstr
-   patchRegsOfInstr    = PPC.patchRegsOfInstr
+   patchRegsOfInstr _  = PPC.patchRegsOfInstr
    isJumpishInstr      = PPC.isJumpishInstr
    jumpDestsOfInstr    = PPC.jumpDestsOfInstr
    canFallthroughTo    = PPC.canFallthroughTo


=====================================
compiler/GHC/CmmToAsm/PPC/CodeGen.hs
=====================================
@@ -195,8 +195,11 @@ stmtToInstrs stmt = do
     _ ->
       panic "stmtToInstrs: statement should have been cps'd away"
 
-jumpRegs :: Platform -> [GlobalReg] -> [Reg]
-jumpRegs platform gregs = [ RegReal r | Just r <- map (globalRegMaybe platform) gregs ]
+jumpRegs :: Platform -> [GlobalRegUse] -> [RegFormat]
+jumpRegs platform gregs =
+  [ RegFormat (RegReal r) (cmmTypeFormat ty)
+  | GlobalRegUse gr ty <- gregs
+  , Just r <- [globalRegMaybe platform gr] ]
 
 --------------------------------------------------------------------------------
 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
@@ -1091,7 +1094,7 @@ assignReg_FltCode = assignReg_IntCode
 
 
 
-genJump :: CmmExpr{-the branch target-} -> [Reg] -> NatM InstrBlock
+genJump :: CmmExpr{-the branch target-} -> [RegFormat] -> NatM InstrBlock
 
 genJump (CmmLit (CmmLabel lbl)) regs
   = return (unitOL $ JMP lbl regs)
@@ -1101,7 +1104,7 @@ genJump tree gregs
         platform <- getPlatform
         genJump' tree (platformToGCP platform) gregs
 
-genJump' :: CmmExpr -> GenCCallPlatform -> [Reg] -> NatM InstrBlock
+genJump' :: CmmExpr -> GenCCallPlatform -> [RegFormat] -> NatM InstrBlock
 
 genJump' tree (GCP64ELF 1) regs
   = do


=====================================
compiler/GHC/CmmToAsm/PPC/Instr.hs
=====================================
@@ -221,11 +221,11 @@ data Instr
                                     --    Just True:  branch likely taken
                                     --    Just False: branch likely not taken
                                     --    Nothing:    no hint
-    | JMP     CLabel [Reg]          -- same as branch,
+    | JMP     CLabel [RegFormat]    -- same as branch,
                                     -- but with CLabel instead of block ID
                                     -- and live global registers
     | MTCTR   Reg
-    | BCTR    [Maybe BlockId] (Maybe CLabel) [Reg]
+    | BCTR    [Maybe BlockId] (Maybe CLabel) [RegFormat]
                                     -- with list of local destinations, and
                                     -- jump table location if necessary
     | BL      CLabel [Reg]          -- with list of argument regs
@@ -333,9 +333,9 @@ regUsageOfInstr platform instr
     CMPL    _ reg ri         -> usage (reg : regRI ri,[])
     BCC     _ _ _            -> noUsage
     BCCFAR  _ _ _            -> noUsage
-    JMP     _ regs           -> usage (regs, [])
+    JMP     _ regs           -> usage (map regFormatReg regs, [])
     MTCTR   reg              -> usage ([reg],[])
-    BCTR    _ _ regs         -> usage (regs, [])
+    BCTR    _ _ regs         -> usage (map regFormatReg regs, [])
     BL      _ params         -> usage (params, callClobberedRegs platform)
     BCTRL   params           -> usage (params, callClobberedRegs platform)
 


=====================================
compiler/GHC/CmmToAsm/Reg/Graph/Spill.hs
=====================================
@@ -208,9 +208,9 @@ regSpill_instr platform regSlotMap (LiveInstr instr (Just _)) = do
   let rsSpillModify       = filter (\r -> elemUFM (regFormatReg r) regSlotMap) rsModify
 
   -- rewrite the instr and work out spill code.
-  (instr1, prepost1)      <- mapAccumLM (spillRead   regSlotMap) instr  rsSpillRead
-  (instr2, prepost2)      <- mapAccumLM (spillWrite  regSlotMap) instr1 rsSpillWritten
-  (instr3, prepost3)      <- mapAccumLM (spillModify regSlotMap) instr2 rsSpillModify
+  (instr1, prepost1)      <- mapAccumLM (spillRead   platform regSlotMap) instr  rsSpillRead
+  (instr2, prepost2)      <- mapAccumLM (spillWrite  platform regSlotMap) instr1 rsSpillWritten
+  (instr3, prepost3)      <- mapAccumLM (spillModify platform regSlotMap) instr2 rsSpillModify
 
   let (mPrefixes, mPostfixes) = unzip (prepost1 ++ prepost2 ++ prepost3)
   let prefixes                = concat mPrefixes
@@ -228,14 +228,15 @@ regSpill_instr platform regSlotMap (LiveInstr instr (Just _)) = do
 --   writes to a vreg that is being spilled.
 spillRead
         :: Instruction instr
-        => UniqFM Reg Int
+        => Platform
+        -> UniqFM Reg Int
         -> instr
         -> RegFormat
         -> SpillM (instr, ([LiveInstr instr'], [LiveInstr instr']))
 
-spillRead regSlotMap instr (RegFormat reg fmt)
+spillRead platform regSlotMap instr (RegFormat reg fmt)
  | Just slot     <- lookupUFM regSlotMap reg
- = do    (instr', nReg)  <- patchInstr reg instr
+ = do    (instr', nReg)  <- patchInstr platform reg instr
 
          modify $ \s -> s
                 { stateSpillSL  = addToUFM_C accSpillSL (stateSpillSL s) reg (reg, 0, 1) }
@@ -251,14 +252,15 @@ spillRead regSlotMap instr (RegFormat reg fmt)
 --   writes to a vreg that is being spilled.
 spillWrite
         :: Instruction instr
-        => UniqFM Reg Int
+        => Platform
+        -> UniqFM Reg Int
         -> instr
         -> RegFormat
         -> SpillM (instr, ([LiveInstr instr'], [LiveInstr instr']))
 
-spillWrite regSlotMap instr (RegFormat reg fmt)
+spillWrite platform regSlotMap instr (RegFormat reg fmt)
  | Just slot     <- lookupUFM regSlotMap reg
- = do    (instr', nReg)  <- patchInstr reg instr
+ = do    (instr', nReg)  <- patchInstr platform reg instr
 
          modify $ \s -> s
                 { stateSpillSL  = addToUFM_C accSpillSL (stateSpillSL s) reg (reg, 1, 0) }
@@ -274,14 +276,15 @@ spillWrite regSlotMap instr (RegFormat reg fmt)
 --   both reads and writes to a vreg that is being spilled.
 spillModify
         :: Instruction instr
-        => UniqFM Reg Int
+        => Platform
+        -> UniqFM Reg Int
         -> instr
         -> RegFormat
         -> SpillM (instr, ([LiveInstr instr'], [LiveInstr instr']))
 
-spillModify regSlotMap instr (RegFormat reg fmt)
+spillModify platform regSlotMap instr (RegFormat reg fmt)
  | Just slot     <- lookupUFM regSlotMap reg
- = do    (instr', nReg)  <- patchInstr reg instr
+ = do    (instr', nReg)  <- patchInstr platform reg instr
 
          modify $ \s -> s
                 { stateSpillSL  = addToUFM_C accSpillSL (stateSpillSL s) reg (reg, 1, 1) }
@@ -297,9 +300,9 @@ spillModify regSlotMap instr (RegFormat reg fmt)
 --   virtual reg.
 patchInstr
         :: Instruction instr
-        => Reg -> instr -> SpillM (instr, Reg)
+        => Platform -> Reg -> instr -> SpillM (instr, Reg)
 
-patchInstr reg instr
+patchInstr platform reg instr
  = do   nUnique         <- newUnique
 
         -- The register we're rewriting is supposed to be virtual.
@@ -312,19 +315,19 @@ patchInstr reg instr
                 RegReal{}
                  -> panic "RegAlloc.Graph.Spill.patchIntr: not patching real reg"
 
-        let instr'      = patchReg1 reg nReg instr
+        let instr'      = patchReg1 platform reg nReg instr
         return          (instr', nReg)
 
 
 patchReg1
         :: Instruction instr
-        => Reg -> Reg -> instr -> instr
+        => Platform -> Reg -> Reg -> instr -> instr
 
-patchReg1 old new instr
+patchReg1 platform old new instr
  = let  patchF r
                 | r == old      = new
                 | otherwise     = r
-   in   patchRegsOfInstr instr patchF
+   in   patchRegsOfInstr platform instr patchF
 
 
 -- Spiller monad --------------------------------------------------------------


=====================================
compiler/GHC/CmmToAsm/Reg/Linear.hs
=====================================
@@ -575,7 +575,7 @@ genRaInsn block_live new_instrs block_id instr r_dying w_dying = do
 
         patched_instr :: instr
         patched_instr
-                = patchRegsOfInstr adjusted_instr patchLookup
+                = patchRegsOfInstr platform adjusted_instr patchLookup
 
         patchLookup :: Reg -> Reg
         patchLookup x


=====================================
compiler/GHC/CmmToAsm/Reg/Linear/X86.hs
=====================================
@@ -32,12 +32,12 @@ getFreeRegs platform cls (FreeRegs f) =
   case cls of
     RcInteger ->
       [ RealRegSingle i
-      | i <- [ 0 .. lastint platform ]
+      | i <- intregnos platform
       , testBit f i
       ]
     RcFloatOrVector ->
       [ RealRegSingle i
-      | i <- [ lastint platform + 1 .. lastxmm platform ]
+      | i <- xmmregnos platform
       , testBit f i
       ]
 


=====================================
compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs
=====================================
@@ -32,12 +32,12 @@ getFreeRegs platform cls (FreeRegs f) =
   case cls of
     RcInteger ->
       [ RealRegSingle i
-      | i <- [ 0 .. lastint platform ]
+      | i <- intregnos platform
       , testBit f i
       ]
     RcFloatOrVector ->
       [ RealRegSingle i
-      | i <- [ lastint platform + 1 .. lastxmm platform ]
+      | i <- xmmregnos platform
       , testBit f i
       ]
 


=====================================
compiler/GHC/CmmToAsm/Reg/Liveness.hs
=====================================
@@ -114,9 +114,9 @@ instance Instruction instr => Instruction (InstrSR instr) where
                 SPILL  reg _  -> RU [reg] []
                 RELOAD _ reg  -> RU [] [reg]
 
-        patchRegsOfInstr i f
+        patchRegsOfInstr platform i f
          = case i of
-                Instr instr     -> Instr (patchRegsOfInstr instr f)
+                Instr instr     -> Instr (patchRegsOfInstr platform instr f)
                 SPILL  reg slot -> SPILL (updReg f reg) slot
                 RELOAD slot reg -> RELOAD slot (updReg f reg)
           where
@@ -648,7 +648,7 @@ patchEraseLive platform patchF cmm
                 | otherwise
                 = li' : patchInstrs lis
 
-                where   li'     = patchRegsLiveInstr patchF li
+                where   li'     = patchRegsLiveInstr platform patchF li
 
         eatMe   r1 r2 live
                 -- source and destination regs are the same
@@ -666,17 +666,18 @@ patchEraseLive platform patchF cmm
 --
 patchRegsLiveInstr
         :: (Instruction instr, HasDebugCallStack)
-        => (Reg -> Reg)
+        => Platform
+        -> (Reg -> Reg)
         -> LiveInstr instr -> LiveInstr instr
 
-patchRegsLiveInstr patchF li
+patchRegsLiveInstr platform patchF li
  = case li of
         LiveInstr instr Nothing
-         -> LiveInstr (patchRegsOfInstr instr patchF) Nothing
+         -> LiveInstr (patchRegsOfInstr platform instr patchF) Nothing
 
         LiveInstr instr (Just live)
          -> LiveInstr
-                (patchRegsOfInstr instr patchF)
+                (patchRegsOfInstr platform instr patchF)
                 (Just live
                         { -- WARNING: have to go via lists here because patchF changes the uniq in the Reg
                           liveBorn      = mapRegFormatSet patchF $ liveBorn live


=====================================
compiler/GHC/CmmToAsm/X86/CodeGen.hs
=====================================
@@ -378,8 +378,11 @@ stmtToInstrs bid stmt = do
         panic "stmtToInstrs: statement should have been cps'd away"
 
 
-jumpRegs :: Platform -> [GlobalReg] -> [Reg]
-jumpRegs platform gregs = [ RegReal r | Just r <- map (globalRegMaybe platform) gregs ]
+jumpRegs :: Platform -> [GlobalRegUse] -> [RegFormat]
+jumpRegs platform gregs =
+  [ RegFormat (RegReal r) (cmmTypeFormat ty)
+  | GlobalRegUse gr ty <- gregs
+  , Just r <- [globalRegMaybe platform gr] ]
 
 --------------------------------------------------------------------------------
 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
@@ -535,6 +538,8 @@ mkMOV platform fmt op1 op2 =
     cls1 = case op1 of { OpReg r1 -> Just (targetClassOfReg platform r1); _ -> Nothing }
     cls2 = case op2 of { OpReg r2 -> Just (targetClassOfReg platform r2); _ -> Nothing }
 
+
+
 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 assignMem_I64Code addrTree valueTree = do
   platform <- getPlatform
@@ -2848,7 +2853,7 @@ assignReg_VecCode format reg src = do
   let flag     = use_avx || use_sse
   return (src_code (getVecRegisterReg platform flag format reg))
 
-genJump :: CmmExpr{-the branch target-} -> [Reg] -> NatM InstrBlock
+genJump :: CmmExpr{-the branch target-} -> [RegFormat] -> NatM InstrBlock
 
 genJump (CmmLoad mem _ _) regs = do
   Amode target code <- getAmode mem
@@ -3479,11 +3484,11 @@ genCCall64 addr conv dest_regs args = do
     let prom_args = map (maybePromoteCArg platform W32) args
 
     let load_args :: [CmmExpr]
-                  -> [Reg]         -- int regs avail for args
-                  -> [Reg]         -- FP regs avail for args
+                  -> [RegFormat]         -- int regs avail for args
+                  -> [RegFormat]         -- FP regs avail for args
                   -> InstrBlock    -- code computing args
                   -> InstrBlock    -- code assigning args to ABI regs
-                  -> NatM ([CmmExpr],[Reg],[Reg],InstrBlock,InstrBlock)
+                  -> NatM ([CmmExpr],[RegFormat],[RegFormat],InstrBlock,InstrBlock)
         -- no more regs to use
         load_args args [] [] code acode     =
             return (args, [], [], code, acode)
@@ -3495,12 +3500,12 @@ genCCall64 addr conv dest_regs args = do
         load_args (arg : rest) aregs fregs code acode
             | isFloatType arg_rep = case fregs of
                  []     -> push_this_arg
-                 (r:rs) -> do
+                 (RegFormat r _fmt:rs) -> do
                     (code',acode') <- reg_this_arg r
                     load_args rest aregs rs code' acode'
             | otherwise           = case aregs of
                  []     -> push_this_arg
-                 (r:rs) -> do
+                 (RegFormat r _fmt:rs) -> do
                     (code',acode') <- reg_this_arg r
                     load_args rest rs fregs code' acode'
             where
@@ -3540,11 +3545,11 @@ genCCall64 addr conv dest_regs args = do
               arg_fmt = cmmTypeFormat arg_rep
 
         load_args_win :: [CmmExpr]
-                      -> [Reg]        -- used int regs
-                      -> [Reg]        -- used FP regs
+                      -> [RegFormat]        -- used int regs
+                      -> [RegFormat]        -- used FP regs
                       -> [(Reg, Reg)] -- (int, FP) regs avail for args
                       -> InstrBlock
-                      -> NatM ([CmmExpr],[Reg],[Reg],InstrBlock,InstrBlock)
+                      -> NatM ([CmmExpr],[RegFormat],[RegFormat],InstrBlock,InstrBlock)
         load_args_win args usedInt usedFP [] code
             = return (args, usedInt, usedFP, code, nilOL)
             -- no more regs to use
@@ -3555,16 +3560,19 @@ genCCall64 addr conv dest_regs args = do
                       ((ireg, freg) : regs) code
             | isFloatType arg_rep = do
                  arg_code <- getAnyReg arg
-                 load_args_win rest (ireg : usedInt) (freg : usedFP) regs
+                 load_args_win rest (mkRegFormat platform ireg II64: usedInt) (mkRegFormat platform freg FF64 : usedFP) regs
                                (code `appOL`
                                 arg_code freg `snocOL`
                                 -- If we are calling a varargs function
                                 -- then we need to define ireg as well
                                 -- as freg
-                                mkMOV platform II64 (OpReg freg) (OpReg ireg))
+                                CVTTSD2SIQ II64 (OpReg freg) ireg)
+                                  -- SLD TODO: I changed this from MOV FF64 (OpReg freg) (OpReg ireg)
+                                  -- to CVTTSD2SIQ ...
+                                  -- because it is going between two different types of register
             | otherwise = do
                  arg_code <- getAnyReg arg
-                 load_args_win rest (ireg : usedInt) usedFP regs
+                 load_args_win rest (mkRegFormat platform ireg II64: usedInt) usedFP regs
                                (code `appOL` arg_code ireg)
             where
               arg_rep = cmmExprType platform arg
@@ -3611,19 +3619,20 @@ genCCall64 addr conv dest_regs args = do
         if platformOS platform == OSMinGW32
         then load_args_win prom_args [] [] (allArgRegs platform) nilOL
         else do
+           let intArgRegs = map (\r -> mkRegFormat platform r II64) $ allIntArgRegs platform
+               fpArgRegs = map (\r -> mkRegFormat platform r FF64) $ allFPArgRegs platform
            (stack_args, aregs, fregs, load_args_code, assign_args_code)
-               <- load_args prom_args (allIntArgRegs platform)
-                                      (allFPArgRegs platform)
-                                      nilOL nilOL
+               <- load_args prom_args intArgRegs fpArgRegs nilOL nilOL
            let used_regs rs as = dropTail (length rs) as
-               fregs_used      = used_regs fregs (allFPArgRegs platform)
-               aregs_used      = used_regs aregs (allIntArgRegs platform)
+               fregs_used      = used_regs fregs fpArgRegs
+               aregs_used      = used_regs aregs intArgRegs
            return (stack_args, aregs_used, fregs_used, load_args_code
                                                       , assign_args_code)
 
     let
+        wordFmt = archWordFormat (target32Bit platform)
         arg_regs_used = int_regs_used ++ fp_regs_used
-        arg_regs = [eax] ++ arg_regs_used
+        arg_regs = [mkRegFormat platform eax wordFmt] ++ arg_regs_used
                 -- for annotating the call instruction with
         sse_regs = length fp_regs_used
         arg_stack_slots = if platformOS platform == OSMinGW32


=====================================
compiler/GHC/CmmToAsm/X86/Instr.hs
=====================================
@@ -71,7 +71,7 @@ import GHC.Types.Basic (Alignment)
 import GHC.Cmm.DebugBlock (UnwindTable)
 import GHC.Utils.Misc ( HasDebugCallStack )
 
-import Data.Maybe       (fromMaybe)
+import GHC.Data.Maybe
 
 -- Format of an x86/x86_64 memory address, in bytes.
 --
@@ -316,7 +316,7 @@ data Instr
         --  | POPA
 
         -- Jumping around.
-        | JMP         Operand [Reg] -- including live Regs at the call
+        | JMP         Operand [RegFormat] -- including live Regs at the call
         | JXX         Cond BlockId  -- includes unconditional branches
         | JXX_GBL     Cond Imm      -- non-local version of JXX
         -- Table jump
@@ -326,7 +326,7 @@ data Instr
                       CLabel    -- Label of jump table
         -- | X86 call instruction
         | CALL        (Either Imm Reg) -- ^ Jump target
-                      [Reg]            -- ^ Arguments (required for register allocation)
+                      [RegFormat]      -- ^ Arguments (required for register allocation)
 
         -- Other things.
         | CLTD Format            -- sign extend %eax into %edx:%eax
@@ -419,8 +419,16 @@ regUsageOfInstr :: Platform -> Instr -> RegUsage
 regUsageOfInstr platform instr
  = case instr of
     MOV    fmt src dst    -> usageRW fmt src dst
-    MOVD   fmt src dst    -> usageRW fmt src dst
-    CMOV _ fmt src dst    -> mkRU fmt (use_R src [dst]) [dst]
+    MOVD   fmt src dst    ->
+      mkRU (use_R fmt src []) (use_R out_fmt dst [])
+        where
+           out_fmt = case fmt of
+             II32 -> FF32
+             II64 -> FF64
+             FF32 -> II32
+             FF64 -> II64
+             _ -> panic "MOVD: not a scalar 32/64 bit format"
+    CMOV _ fmt src dst    -> mkRU (use_R fmt src [mk fmt dst]) [mk fmt dst]
     MOVZxL fmt src dst    -> usageRW fmt src dst
     MOVSxL fmt src dst    -> usageRW fmt src dst
     LEA    fmt src dst    -> usageRW fmt src dst
@@ -431,80 +439,80 @@ regUsageOfInstr platform instr
     IMUL   fmt src dst    -> usageRM fmt src dst
 
     -- Result of IMULB will be in just in %ax
-    IMUL2  II8 src       -> mkRU II8 (eax:use_R src []) [eax]
+    IMUL2  II8 src       -> mkRU (mk II8 eax:use_R II8 src []) [mk II8 eax]
     -- Result of IMUL for wider values, will be split between %dx/%edx/%rdx and
     -- %ax/%eax/%rax.
-    IMUL2  fmt src        -> mkRU fmt (eax:use_R src []) [eax,edx]
+    IMUL2  fmt src        -> mkRU (mk fmt eax:use_R fmt src []) [mk fmt eax,mk fmt edx]
 
     MUL    fmt src dst    -> usageRM fmt src dst
-    MUL2   fmt src        -> mkRU fmt (eax:use_R src []) [eax,edx]
-    DIV    fmt op -> mkRU fmt (eax:edx:use_R op []) [eax,edx]
-    IDIV   fmt op -> mkRU fmt (eax:edx:use_R op []) [eax,edx]
+    MUL2   fmt src        -> mkRU (mk fmt eax:use_R fmt src []) [mk fmt eax,mk fmt edx]
+    DIV    fmt op -> mkRU (mk fmt eax:mk fmt edx:use_R fmt op []) [mk fmt eax, mk fmt edx]
+    IDIV   fmt op -> mkRU (mk fmt eax:mk fmt edx:use_R fmt op []) [mk fmt eax, mk fmt edx]
     ADD_CC fmt src dst    -> usageRM fmt src dst
     SUB_CC fmt src dst    -> usageRM fmt src dst
     AND    fmt src dst    -> usageRM fmt src dst
     OR     fmt src dst    -> usageRM fmt src dst
 
     XOR    fmt (OpReg src) (OpReg dst)
-        | src == dst    -> mkRU fmt [] [dst]
+        | src == dst    -> mkRU [] [mk fmt dst]
 
     XOR    fmt src dst    -> usageRM fmt src dst
     NOT    fmt op         -> usageM fmt op
-    BSWAP  fmt reg        -> mkRU fmt [reg] [reg]
+    BSWAP  fmt reg        -> mkRU [mk fmt reg] [mk fmt reg]
     NEGI   fmt op         -> usageM fmt op
     SHL    fmt imm dst    -> usageRM fmt imm dst
     SAR    fmt imm dst    -> usageRM fmt imm dst
     SHR    fmt imm dst    -> usageRM fmt imm dst
     SHLD   fmt imm dst1 dst2 -> usageRMM fmt imm dst1 dst2
     SHRD   fmt imm dst1 dst2 -> usageRMM fmt imm dst1 dst2
-    BT     fmt _   src    -> mkRUR fmt (use_R src [])
-
-    PUSH   fmt op         -> mkRUR fmt (use_R op [])
-    POP    fmt op         -> mkRU fmt [] (def_W op)
-    TEST   fmt src dst    -> mkRUR fmt (use_R src $! use_R dst [])
-    CMP    fmt src dst    -> mkRUR fmt (use_R src $! use_R dst [])
-    SETCC  _ op         -> mkRUFormat [] (def_W op)
-    JXX    _ _          -> mkRUFormat [] []
-    JXX_GBL _ _         -> mkRUFormat [] []
-    JMP     op regs     -> mkRUFormat (use_R op regs) []
-    JMP_TBL op _ _ _    -> mkRUFormat (use_R op []) []
-    CALL (Left _)  params   -> mkRUFormat params (callClobberedRegs platform)
-    CALL (Right reg) params -> mkRUFormat (reg:params) (callClobberedRegs platform)
-    CLTD   fmt          -> mkRU fmt [eax] [edx]
-    NOP                 -> mkRUFormat [] []
-
-    X87Store    fmt  dst    -> mkRUR fmt ( use_EA dst [])
-
-    CVTSS2SD   src dst  -> mkRUFormat [src] [dst]
-    CVTSD2SS   src dst  -> mkRUFormat [src] [dst]
-    CVTTSS2SIQ _ src dst -> mkRUFormat (use_R src []) [dst]
-    CVTTSD2SIQ _ src dst -> mkRUFormat (use_R src []) [dst]
-    CVTSI2SS   _ src dst -> mkRUFormat (use_R src []) [dst]
-    CVTSI2SD   _ src dst -> mkRUFormat (use_R src []) [dst]
+    BT     fmt _   src    -> mkRUR (use_R fmt src [])
+
+    PUSH   fmt op         -> mkRUR (use_R fmt op [])
+    POP    fmt op         -> mkRU [] (def_W fmt op)
+    TEST   fmt src dst    -> mkRUR (use_R fmt src $! use_R fmt dst [])
+    CMP    fmt src dst    -> mkRUR (use_R fmt src $! use_R fmt dst [])
+    SETCC  _ op         -> mkRU [] (def_W II8 op)
+    JXX    _ _          -> mkRU [] []
+    JXX_GBL _ _         -> mkRU [] []
+    JMP     op regs     -> mkRU (use_R addrFmt op regs) []
+    JMP_TBL op _ _ _    -> mkRU (use_R addrFmt op []) []
+    CALL (Left _)  params   -> mkRU params (map mkFmt $ callClobberedRegs platform)
+    CALL (Right reg) params -> mkRU (mk addrFmt reg:params) (map mkFmt $ callClobberedRegs platform)
+    CLTD   fmt          -> mkRU [mk fmt eax] [mk fmt edx]
+    NOP                 -> mkRU [] []
+
+    X87Store _fmt  dst -> mkRUR ( use_EA dst [])
+
+    CVTSS2SD   src dst  -> mkRU [mk FF32 src] [mk FF64 dst]
+    CVTSD2SS   src dst  -> mkRU [mk FF64 src] [mk FF32 dst]
+    CVTTSS2SIQ fmt src dst -> mkRU (use_R FF32 src []) [mk fmt dst]
+    CVTTSD2SIQ fmt src dst -> mkRU (use_R FF64 src []) [mk fmt dst]
+    CVTSI2SS   fmt src dst -> mkRU (use_R fmt src []) [mk FF32 dst]
+    CVTSI2SD   fmt src dst -> mkRU (use_R fmt src []) [mk FF64 dst]
     FDIV fmt     src dst  -> usageRM fmt src dst
-    SQRT fmt src dst      -> mkRU fmt (use_R src []) [dst]
+    SQRT fmt src dst      -> mkRU (use_R fmt src []) [mk fmt dst]
 
-    FETCHGOT reg        -> mkRU II64 [] [reg]
-    FETCHPC  reg        -> mkRU II64 [] [reg]
+    FETCHGOT reg        -> mkRU [] [mk addrFmt reg]
+    FETCHPC  reg        -> mkRU [] [mk addrFmt reg]
 
     COMMENT _           -> noUsage
     LOCATION{}          -> noUsage
     UNWIND{}            -> noUsage
     DELTA   _           -> noUsage
 
-    POPCNT fmt src dst -> mkRU fmt (use_R src []) [dst]
-    LZCNT  fmt src dst -> mkRU fmt (use_R src []) [dst]
-    TZCNT  fmt src dst -> mkRU fmt (use_R src []) [dst]
-    BSF    fmt src dst -> mkRU fmt (use_R src []) [dst]
-    BSR    fmt src dst -> mkRU fmt (use_R src []) [dst]
+    POPCNT fmt src dst -> mkRU (use_R fmt src []) [mk fmt dst]
+    LZCNT  fmt src dst -> mkRU (use_R fmt src []) [mk fmt dst]
+    TZCNT  fmt src dst -> mkRU (use_R fmt src []) [mk fmt dst]
+    BSF    fmt src dst -> mkRU (use_R fmt src []) [mk fmt dst]
+    BSR    fmt src dst -> mkRU (use_R fmt src []) [mk fmt dst]
 
-    PDEP   fmt src mask dst -> mkRU fmt (use_R src $ use_R mask []) [dst]
-    PEXT   fmt src mask dst -> mkRU fmt (use_R src $ use_R mask []) [dst]
+    PDEP   fmt src mask dst -> mkRU (use_R fmt src $ use_R fmt mask []) [mk fmt dst]
+    PEXT   fmt src mask dst -> mkRU (use_R fmt src $ use_R fmt mask []) [mk fmt dst]
 
     FMA3 fmt _ _ src3 src2 dst -> usageFMA fmt src3 src2 dst
 
     -- note: might be a better way to do this
-    PREFETCH _  fmt src -> mkRU fmt (use_R src []) []
+    PREFETCH _  fmt src -> mkRU (use_R fmt src []) []
     LOCK i              -> regUsageOfInstr platform i
     XADD fmt src dst      -> usageMM fmt src dst
     CMPXCHG fmt src dst   -> usageRMM fmt src dst (OpReg eax)
@@ -512,10 +520,10 @@ regUsageOfInstr platform instr
     MFENCE -> noUsage
 
     -- vector instructions
-    VBROADCAST fmt src dst   -> mkRU fmt (use_EA src []) [dst]
-    VEXTRACT     fmt _off src dst -> mkRU fmt [src] (use_R dst [])
+    VBROADCAST fmt src dst   -> mkRU (use_EA src []) [mk fmt dst]
+    VEXTRACT     fmt _off src dst -> mkRU [mk fmt src] (use_R fmt dst [])
     INSERTPS     fmt (ImmInt off) src dst
-      -> mkRU fmt ((use_R src []) ++ [dst | not doesNotReadDst]) [dst]
+      -> mkRU ((use_R fmt src []) ++ [mk fmt dst | not doesNotReadDst]) [mk fmt dst]
         where
           -- Compute whether the instruction reads the destination register or not.
           -- Immediate bits: ss_dd_zzzz s = src pos, d = dst pos, z = zeroed components.
@@ -524,42 +532,42 @@ regUsageOfInstr platform instr
             -- are being zeroed.
             where pos = ( off `shiftR` 4 ) .&. 0b11
     INSERTPS fmt _off src dst
-      -> mkRU fmt ((use_R src []) ++ [dst]) [dst]
-
-    VMOVU        fmt src dst   -> mkRU fmt (use_R src []) (use_R dst [])
-    MOVU         fmt src dst   -> mkRU fmt (use_R src []) (use_R dst [])
-    MOVA         fmt src dst   -> mkRU fmt (use_R src []) (use_R dst [])
-    MOVL         fmt src dst   -> mkRU fmt (use_R src []) (use_R dst [])
-    MOVH         fmt src dst   -> mkRU fmt (use_R src []) (use_R dst [])
-    MOVDQU       fmt src dst   -> mkRU fmt (use_R src []) (use_R dst [])
-    VMOVDQU      fmt src dst   -> mkRU fmt (use_R src []) (use_R dst [])
-
-    VPXOR        fmt s1 s2 dst -> mkRU fmt [s1,s2] [dst]
-
-    VADD         fmt s1 s2 dst -> mkRU fmt ((use_R s1 []) ++ [s2]) [dst]
-    VSUB         fmt s1 s2 dst -> mkRU fmt ((use_R s1 []) ++ [s2]) [dst]
-    VMUL         fmt s1 s2 dst -> mkRU fmt ((use_R s1 []) ++ [s2]) [dst]
-    VDIV         fmt s1 s2 dst -> mkRU fmt ((use_R s1 []) ++ [s2]) [dst]
-
-    VPSHUFD      fmt _off src dst
-      -> mkRU fmt (use_R src []) [dst]
-    PSHUFD       fmt _off src dst
-      -> mkRU fmt (use_R src []) [dst]
-    SHUFPD      fmt _off src dst
-      -> mkRU fmt (use_R src [dst]) [dst]
-    SHUFPS      fmt _off src dst
-      -> mkRU fmt (use_R src [dst]) [dst]
-    VSHUFPD     fmt _off src1 src2 dst
-      -> mkRU fmt (use_R src1 [src2]) [dst]
-    VSHUFPS     fmt _off src1 src2 dst
-      -> mkRU fmt (use_R src1 [src2]) [dst]
-
-    PSLLDQ       fmt off dst -> mkRU fmt (use_R off []) [dst]
+      -> mkRU ((use_R fmt src []) ++ [mk fmt dst]) [mk fmt dst]
+
+    VMOVU        fmt src dst   -> mkRU (use_R fmt src []) (use_R fmt dst [])
+    MOVU         fmt src dst   -> mkRU (use_R fmt src []) (use_R fmt dst [])
+    MOVA         fmt src dst   -> mkRU (use_R fmt src []) (use_R fmt dst [])
+    MOVL         fmt src dst   -> mkRU (use_R fmt src []) (use_R fmt dst [])
+    MOVH         fmt src dst   -> mkRU (use_R fmt src []) (use_R fmt dst [])
+    MOVDQU       fmt src dst   -> mkRU (use_R fmt src []) (use_R fmt dst [])
+    VMOVDQU      fmt src dst   -> mkRU (use_R fmt src []) (use_R fmt dst [])
+
+    VPXOR        fmt s1 s2 dst -> mkRU (map (mk fmt) [s1,s2]) [mk fmt dst]
+
+    VADD         fmt s1 s2 dst -> mkRU ((use_R fmt s1 []) ++ [mk fmt s2]) [mk fmt dst]
+    VSUB         fmt s1 s2 dst -> mkRU ((use_R fmt s1 []) ++ [mk fmt s2]) [mk fmt dst]
+    VMUL         fmt s1 s2 dst -> mkRU ((use_R fmt s1 []) ++ [mk fmt s2]) [mk fmt dst]
+    VDIV         fmt s1 s2 dst -> mkRU ((use_R fmt s1 []) ++ [mk fmt s2]) [mk fmt dst]
+
+    VPSHUFD fmt _off src dst
+      -> mkRU (use_R fmt src []) [mk fmt dst]
+    PSHUFD fmt _off src dst
+      -> mkRU (use_R fmt src []) [mk fmt dst]
+    SHUFPD fmt _off src dst
+      -> mkRU (use_R fmt src [mk fmt dst]) [mk fmt dst]
+    SHUFPS fmt _off src dst
+      -> mkRU (use_R fmt src [mk fmt dst]) [mk fmt dst]
+    VSHUFPD fmt _off src1 src2 dst
+      -> mkRU (use_R fmt src1 [mk fmt src2]) [mk fmt dst]
+    VSHUFPS fmt _off src1 src2 dst
+      -> mkRU (use_R fmt src1 [mk fmt src2]) [mk fmt dst]
+
+    PSLLDQ       fmt off dst -> mkRU (use_R fmt off []) [mk fmt dst]
 
     MOVHLPS    fmt src dst
-      -> mkRU fmt (use_R src []) [dst]
+      -> mkRU (use_R fmt src []) [mk fmt dst]
     PUNPCKLQDQ fmt src dst
-      -> mkRU fmt (use_R src [dst]) [dst]
+      -> mkRU (use_R fmt src [mk fmt dst]) [mk fmt dst]
 
     _other              -> panic "regUsage: unrecognised instr"
  where
@@ -574,81 +582,91 @@ regUsageOfInstr platform instr
 
     -- 2 operand form; first operand Read; second Written
     usageRW :: HasDebugCallStack => Format -> Operand -> Operand -> RegUsage
-    usageRW fmt op (OpReg reg)      = mkRU fmt (use_R op []) [reg]
-    usageRW fmt op (OpAddr ea)      = mkRUR fmt (use_R op $! use_EA ea [])
+    usageRW fmt op (OpReg reg)      = mkRU (use_R fmt op []) [mk fmt reg]
+    usageRW fmt op (OpAddr ea)      = mkRUR (use_R fmt op $! use_EA ea [])
     usageRW _ _ _                   = panic "X86.RegInfo.usageRW: no match"
 
     -- 2 operand form; first operand Read; second Modified
     usageRM :: HasDebugCallStack => Format -> Operand -> Operand -> RegUsage
-    usageRM fmt op (OpReg reg)      = mkRU fmt (use_R op [reg]) [reg]
-    usageRM fmt op (OpAddr ea)      = mkRUR fmt (use_R op $! use_EA ea [])
+    usageRM fmt op (OpReg reg)      = mkRU (use_R fmt op [mk fmt reg]) [mk fmt reg]
+    usageRM fmt op (OpAddr ea)      = mkRUR (use_R fmt op $! use_EA ea [])
     usageRM _ _ _                   = panic "X86.RegInfo.usageRM: no match"
 
     -- 2 operand form; first operand Modified; second Modified
     usageMM :: HasDebugCallStack => Format -> Operand -> Operand -> RegUsage
-    usageMM fmt (OpReg src) (OpReg dst) = mkRU fmt [src, dst] [src, dst]
-    usageMM fmt (OpReg src) (OpAddr ea) = mkRU fmt (use_EA ea [src]) [src]
-    usageMM fmt (OpAddr ea) (OpReg dst) = mkRU fmt (use_EA ea [dst]) [dst]
+    usageMM fmt (OpReg src) (OpReg dst) = mkRU (map (mk fmt) [src, dst]) (map (mk fmt) [src, dst])
+    usageMM fmt (OpReg src) (OpAddr ea) = mkRU (use_EA ea [mk fmt src]) [mk fmt src]
+    usageMM fmt (OpAddr ea) (OpReg dst) = mkRU (use_EA ea [mk fmt dst]) [mk fmt dst]
     usageMM _ _ _                       = panic "X86.RegInfo.usageMM: no match"
 
     -- 3 operand form; first operand Read; second Modified; third Modified
     usageRMM :: HasDebugCallStack => Format -> Operand -> Operand -> Operand -> RegUsage
-    usageRMM fmt (OpReg src) (OpReg dst) (OpReg reg) = mkRU fmt [src, dst, reg] [dst, reg]
-    usageRMM fmt (OpReg src) (OpAddr ea) (OpReg reg) = mkRU fmt (use_EA ea [src, reg]) [reg]
+    usageRMM fmt (OpReg src) (OpReg dst) (OpReg reg) = mkRU (map (mk fmt) [src, dst, reg]) (map (mk fmt) [dst, reg])
+    usageRMM fmt (OpReg src) (OpAddr ea) (OpReg reg) = mkRU (use_EA ea (map (mk fmt) [src, reg])) [mk fmt reg]
     usageRMM _ _ _ _                                 = panic "X86.RegInfo.usageRMM: no match"
 
     -- 3 operand form of FMA instructions.
     usageFMA :: HasDebugCallStack => Format -> Operand -> Reg -> Reg -> RegUsage
-    usageFMA fmt (OpReg src1) src2 dst
-      = mkRU fmt [src1, src2, dst] [dst]
+    usageFMA fmt (OpReg src1) src2 dst =
+      mkRU (map (\r -> mkRegFormat platform r fmt) [src1, src2, dst]) [ mkRegFormat platform dst fmt ]
     usageFMA fmt (OpAddr ea1) src2 dst
-      = mkRU fmt (use_EA ea1 [src2, dst]) [dst]
+      = mkRU (use_EA ea1 (map (\r -> mkRegFormat platform r fmt) [src2, dst])) [ mkRegFormat platform dst fmt ]
     usageFMA _ _ _ _
       = panic "X86.RegInfo.usageFMA: no match"
 
     -- 1 operand form; operand Modified
     usageM :: HasDebugCallStack => Format -> Operand -> RegUsage
-    usageM fmt (OpReg reg)        = mkRU fmt [reg] [reg]
-    usageM fmt (OpAddr ea)        = mkRUR fmt (use_EA ea [])
-    usageM _ _                    = panic "X86.RegInfo.usageM: no match"
+    usageM fmt (OpReg reg) =
+      let r' = mk fmt reg
+      in mkRU [r'] [r']
+    usageM _ (OpAddr ea) = mkRUR (use_EA ea [])
+    usageM _ _ = panic "X86.RegInfo.usageM: no match"
 
     -- Registers defd when an operand is written.
-    def_W (OpReg reg)           = [reg]
-    def_W (OpAddr _ )           = []
-    def_W _                     = panic "X86.RegInfo.def_W: no match"
+    def_W fmt (OpReg reg)         = [mk fmt reg]
+    def_W _   (OpAddr _ )         = []
+    def_W _   _                   = panic "X86.RegInfo.def_W: no match"
 
     -- Registers used when an operand is read.
-    use_R (OpReg reg)  tl = reg : tl
-    use_R (OpImm _)    tl = tl
-    use_R (OpAddr ea)  tl = use_EA ea tl
+    use_R fmt (OpReg reg)  tl = mk fmt reg : tl
+    use_R _   (OpImm _)    tl = tl
+    use_R _   (OpAddr ea)  tl = use_EA ea tl
 
     -- Registers used to compute an effective address.
     use_EA (ImmAddr _ _) tl = tl
     use_EA (AddrBaseIndex base index _) tl =
         use_base base $! use_index index tl
-        where use_base (EABaseReg r)  tl = r : tl
+        where use_base (EABaseReg r)  tl = mk addrFmt r : tl
               use_base _              tl = tl
               use_index EAIndexNone   tl = tl
-              use_index (EAIndex i _) tl = i : tl
-
-    mkRUR :: HasDebugCallStack => Format -> [Reg] -> RegUsage
-    mkRUR fmt src = src' `seq` RU (map (\ r -> mkRegFormat platform r fmt) src') []
-        where src' = filter (interesting platform) src
-
-    mkRU :: HasDebugCallStack => Format -> [Reg] -> [Reg] -> RegUsage
-    mkRU fmt src dst = src' `seq` dst' `seq` RU (map (\ r -> mkRegFormat platform r fmt) src') (map (\ r -> mkRegFormat platform r fmt) dst')
-        where src' = filter (interesting platform) src
-              dst' = filter (interesting platform) dst
-
-    mkRUFormat :: HasDebugCallStack => [Reg] -> [Reg] -> RegUsage
-    mkRUFormat src dst = src' `seq` dst' `seq` RU (map mkFormat src') (map mkFormat dst')
-        where src' = filter (interesting platform) src
-              dst' = filter (interesting platform) dst
-              mkFormat reg =
-                mkRegFormat platform reg $
-                  case targetClassOfReg platform reg of
-                    RcInteger -> archWordFormat (target32Bit platform)
-                    RcFloatOrVector -> FF64
+              use_index (EAIndex i _) tl = mk addrFmt i : tl
+
+    mkRUR :: [RegFormat] -> RegUsage
+    mkRUR src = mkRU src []
+
+    mkRU :: [RegFormat] -> [RegFormat] -> RegUsage
+    mkRU src dst = src' `seq` dst' `seq` RU src' dst'
+        where src' = filter (interesting platform . regFormatReg) src
+              dst' = filter (interesting platform . regFormatReg) dst
+
+    addrFmt = archWordFormat (target32Bit platform)
+    mk :: HasDebugCallStack => Format -> Reg -> RegFormat
+    mk fmt r = mkRegFormat platform r fmt
+
+    mkFmt :: HasDebugCallStack => Reg -> RegFormat
+    mkFmt r = RegFormat r $ case targetClassOfReg platform r of
+      RcInteger -> addrFmt
+      RcFloatOrVector -> FF64
+
+    --mkRUFormat :: HasDebugCallStack => [Reg] -> [Reg] -> RegUsage
+    --mkRUFormat src dst = src' `seq` dst' `seq` RU (map mkFormat src') (map mkFormat dst')
+    --    where src' = filter (interesting platform) src
+    --          dst' = filter (interesting platform) dst
+    --          mkFormat reg =
+    --            mkRegFormat platform reg $
+    --              case targetClassOfReg platform reg of
+    --                RcInteger -> archWordFormat (target32Bit platform)
+    --                RcFloatOrVector -> FF64
 
 -- | Is this register interesting for the register allocator?
 interesting :: Platform -> Reg -> Bool
@@ -659,10 +677,26 @@ interesting platform (RegReal (RealRegSingle i)) = freeReg platform i
 
 -- | Applies the supplied function to all registers in instructions.
 -- Typically used to change virtual registers to real registers.
-patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
-patchRegsOfInstr instr env
- = case instr of
-    MOV  fmt src dst     -> patch2 (MOV  fmt) src dst
+patchRegsOfInstr :: HasDebugCallStack => Platform -> Instr -> (Reg -> Reg) -> Instr
+patchRegsOfInstr platform instr env
+  = case instr of
+    MOV fmt src dst ->
+      mkMOV fmt (patchOp src) (patchOp dst)
+        where
+          fmtCls = if isIntFormat fmt then RcInteger else RcFloatOrVector
+          mkMOV :: HasDebugCallStack => Format -> Operand -> Operand -> Instr
+          mkMOV fmt op1 op2 =
+            assertPpr (all (== fmtCls) $ catMaybes [cls1, cls2])
+              (vcat [ text "patchRegsOfInstr produced invalid MOV instruction"
+                    , text "fmt:" <+> ppr fmt
+                    , case op1 of { OpReg r1 -> ppr r1 <+> dcolon <+> ppr (fromJust cls1); _ -> empty }
+                    , case op2 of { OpReg r2 -> ppr r2 <+> dcolon <+> ppr (fromJust cls2); _ -> empty }
+                    ])
+              $ MOV fmt op1 op2
+            where
+              cls1 = case op1 of { OpReg r1 -> Just (targetClassOfReg platform r1); _ -> Nothing }
+              cls2 = case op2 of { OpReg r2 -> Just (targetClassOfReg platform r2); _ -> Nothing }
+
     MOVD fmt src dst     -> patch2 (MOVD fmt) src dst
     CMOV cc fmt src dst  -> CMOV cc fmt (patchOp src) (env dst)
     MOVZxL fmt src dst   -> patch2 (MOVZxL fmt) src dst
@@ -740,7 +774,7 @@ patchRegsOfInstr instr env
 
     PREFETCH lvl format src -> PREFETCH lvl format (patchOp src)
 
-    LOCK i               -> LOCK (patchRegsOfInstr i env)
+    LOCK i               -> LOCK (patchRegsOfInstr platform i env)
     XADD fmt src dst     -> patch2 (XADD fmt) src dst
     CMPXCHG fmt src dst  -> patch2 (CMPXCHG fmt) src dst
     XCHG fmt src dst     -> XCHG fmt (patchOp src) (env dst)
@@ -1138,7 +1172,7 @@ mkStackAllocInstr platform amount
         case platformArch platform of
             ArchX86_64 | needs_probe_call platform amount ->
                            [ MOV II64 (OpImm (ImmInt amount)) (OpReg rax)
-                           , CALL (Left $ strImmLit (fsLit "___chkstk_ms")) [rax]
+                           , CALL (Left $ strImmLit (fsLit "___chkstk_ms")) [mkRegFormat platform rax II64]
                            , SUB II64 (OpReg rax) (OpReg rsp)
                            ]
                        | otherwise ->


=====================================
compiler/GHC/CmmToAsm/X86/Regs.hs
=====================================
@@ -38,6 +38,7 @@ module GHC.CmmToAsm.X86.Regs (
         xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
         xmm,
         firstxmm, lastxmm,
+        intregnos, xmmregnos,
 
         ripRel,
         allFPArgRegs,


=====================================
compiler/GHC/CmmToLlvm.hs
=====================================
@@ -139,7 +139,7 @@ llvmGroupLlvmGens cmm = do
                          Nothing                   -> l
                          Just (CmmStaticsRaw info_lbl _) -> info_lbl
               lml <- strCLabel_llvm l'
-              funInsert lml =<< llvmFunTy live
+              funInsert lml =<< llvmFunTy (map globalRegUseGlobalReg live)
               return Nothing
         cdata <- fmap catMaybes $ mapM split cmm
 


=====================================
compiler/GHC/CmmToLlvm/CodeGen.hs
=====================================
@@ -57,7 +57,7 @@ genLlvmProc :: RawCmmDecl -> LlvmM [LlvmCmmDecl]
 genLlvmProc (CmmProc infos lbl live graph) = do
     let blocks = toBlockListEntryFirstFalseFallthrough graph
 
-    (lmblocks, lmdata) <- basicBlocksCodeGen live blocks
+    (lmblocks, lmdata) <- basicBlocksCodeGen (map globalRegUseGlobalReg live) blocks
     let info = mapLookup (g_entry graph) infos
         proc = CmmProc info lbl live (ListGraph lmblocks)
     return (proc:lmdata)
@@ -152,7 +152,7 @@ stmtToInstrs ubid stmt = case stmt of
 
     -- Tail call
     CmmCall { cml_target = arg,
-              cml_args_regs = live } -> genJump arg live
+              cml_args_regs = live } -> genJump arg $ map globalRegUseGlobalReg live
 
     _ -> panic "Llvm.CodeGen.stmtToInstrs"
 


=====================================
compiler/GHC/CmmToLlvm/Ppr.hs
=====================================
@@ -49,8 +49,9 @@ pprLlvmCmmDecl (CmmData _ lmdata) = do
   return ( vcat $ map (pprLlvmData opts) lmdata
          , vcat $ map (pprLlvmData opts) lmdata)
 
-pprLlvmCmmDecl (CmmProc mb_info entry_lbl live (ListGraph blks))
-  = do let lbl = case mb_info of
+pprLlvmCmmDecl (CmmProc mb_info entry_lbl liveWithUses (ListGraph blks))
+  = do let live = map globalRegUseGlobalReg liveWithUses
+           lbl = case mb_info of
                      Nothing -> entry_lbl
                      Just (CmmStaticsRaw info_lbl _) -> info_lbl
            link = if externallyVisibleCLabel lbl


=====================================
compiler/GHC/StgToCmm/Monad.hs
=====================================
@@ -778,7 +778,7 @@ emitProcWithConvention :: Convention -> Maybe CmmInfoTable -> CLabel
 emitProcWithConvention conv mb_info lbl args blocks
   = emitProcWithStackFrame conv mb_info lbl [] args blocks True
 
-emitProc :: Maybe CmmInfoTable -> CLabel -> [GlobalReg] -> CmmAGraphScoped
+emitProc :: Maybe CmmInfoTable -> CLabel -> [GlobalRegUse] -> CmmAGraphScoped
          -> Int -> Bool -> FCode ()
 emitProc mb_info lbl live blocks offset do_layout
   = do  { l <- newBlockId



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/7d901da2e8b1c972a92a9988757d0a57439b3ec3

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/7d901da2e8b1c972a92a9988757d0a57439b3ec3
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240621/c2546910/attachment-0001.html>


More information about the ghc-commits mailing list