[Git][ghc/ghc][wip/ncg-simd] 2 commits: SIMD cleanups

Sat Jun 22 10:15:51 UTC 2024

sheaf pushed to branch wip/ncg-simd at Glasgow Haskell Compiler / GHC


Commits:
9d2a6591 by sheaf at 2024-06-22T11:29:49+02:00
SIMD cleanups

- - - - -
02a7018e by sheaf at 2024-06-22T12:15:13+02:00
SIMD: fix MOV between rax & xmm

- - - - -


13 changed files:

- compiler/GHC/CmmToAsm/AArch64/Instr.hs
- compiler/GHC/CmmToAsm/Format.hs
- compiler/GHC/CmmToAsm/Instr.hs
- compiler/GHC/CmmToAsm/PPC/Instr.hs
- compiler/GHC/CmmToAsm/Reg/Graph.hs
- compiler/GHC/CmmToAsm/Reg/Graph/SpillClean.hs
- compiler/GHC/CmmToAsm/Reg/Linear.hs
- compiler/GHC/CmmToAsm/Reg/Linear/Base.hs
- compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs
- compiler/GHC/CmmToAsm/Reg/Target.hs
- compiler/GHC/CmmToAsm/X86/CodeGen.hs
- compiler/GHC/CmmToAsm/X86/Instr.hs
- compiler/GHC/CmmToAsm/X86/Ppr.hs


Changes:

=====================================
compiler/GHC/CmmToAsm/AArch64/Instr.hs
=====================================
@@ -15,6 +15,7 @@ import GHC.CmmToAsm.Types
 import GHC.CmmToAsm.Utils
 import GHC.CmmToAsm.Config
 import GHC.Platform.Reg
+import GHC.Platform.Reg.Class
 
 import GHC.Platform.Regs
 import GHC.Cmm.BlockId
@@ -30,6 +31,7 @@ import GHC.Utils.Panic
 import Data.Maybe (fromMaybe)
 
 import GHC.Stack
+import GHC.CmmToAsm.Reg.Target (targetClassOfReg)
 
 -- | LR and FP (8 byte each) are the prologue of each stack frame
 stackFrameHeaderSize :: Int
@@ -149,11 +151,15 @@ regUsageOfInstr platform instr = case instr of
         -- filtering the usage is necessary, otherwise the register
         -- allocator will try to allocate pre-defined fixed stg
         -- registers as well, as they show up.
-        usage (src, dst) = RU (map (\r -> RegFormat r II64) $ filter (interesting platform) src)
-                              (map (\r -> RegFormat r II64) $ filter (interesting platform) dst)
+        usage (src, dst) = RU (map mkFmt $ filter (interesting platform) src)
+                              (map mkFmt $ filter (interesting platform) dst)
           -- SIMD NCG TODO: the format here is used for register spilling/unspilling.
           -- As the AArch64 NCG does not currently support SIMD registers,
-          -- we simply use II64 format for all instructions.
+          -- this simple logic is OK.
+        mkFmt r = RegFormat r fmt
+          where fmt = case targetClassOfReg platform r of
+                        RcInteger -> II64
+                        RcFloatOrVector -> FF64
 
         regAddr :: AddrMode -> [Reg]
         regAddr (AddrRegReg r1 r2) = [r1, r2]


=====================================
compiler/GHC/CmmToAsm/Format.hs
=====================================
@@ -42,7 +42,6 @@ import GHC.Types.Unique.Set
 import GHC.Utils.Outputable
 import GHC.Utils.Panic
 
-
 {- Note [GHC's data format representations]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 GHC has severals types that represent various aspects of data format.


=====================================
compiler/GHC/CmmToAsm/Instr.hs
=====================================
@@ -138,7 +138,7 @@ class Instruction instr where
         --      Must work for all register classes.
         mkRegRegMoveInstr
                 :: HasDebugCallStack
-                => Platform
+                => NCGConfig
                 -> Format
                 -> Reg -- ^ source register
                 -> Reg -- ^ destination register


=====================================
compiler/GHC/CmmToAsm/PPC/Instr.hs
=====================================
@@ -391,11 +391,15 @@ regUsageOfInstr platform instr
     FMADD _ _ rt ra rc rb   -> usage ([ra, rc, rb], [rt])
     _                       -> noUsage
   where
-    usage (src, dst) = RU (map (\ r -> RegFormat r II64) $ filter (interesting platform) src)
-                          (map (\ r -> RegFormat r II64) $ filter (interesting platform) dst)
+    usage (src, dst) = RU (map mkFmt $ filter (interesting platform) src)
+                          (map mkFmt $ filter (interesting platform) dst)
       -- SIMD NCG TODO: the format here is used for register spilling/unspilling.
-      -- As the PowerPC NCG does not currently support SIMD registers,
-      -- we simply use II64 format for all instructions.
+      -- As the AArch64 NCG does not currently support SIMD registers,
+      -- this simple logic is OK.
+    mkFmt r = RegFormat r fmt
+      where fmt = case targetClassOfReg platform r of
+                    RcInteger -> archWordFormat (target32Bit platform)
+                    RcFloatOrVector -> FF64
     regAddr (AddrRegReg r1 r2) = [r1, r2]
     regAddr (AddrRegImm r1 _)  = [r1]
 


=====================================
compiler/GHC/CmmToAsm/Reg/Graph.hs
=====================================
@@ -216,7 +216,7 @@ regAlloc_spin config spinCount triv regsFree slotsFree slotsCount debug_codeGrap
                 --   of a vreg, but it might not need to be on the stack for
                 --   its entire lifetime.
                 let code_spillclean
-                        = map (cleanSpills platform) code_patched
+                        = map (cleanSpills config) code_patched
 
                 -- Strip off liveness information from the allocated code.
                 -- Also rewrite SPILL/RELOAD meta instructions into real machine


=====================================
compiler/GHC/CmmToAsm/Reg/Graph/SpillClean.hs
=====================================
@@ -35,6 +35,7 @@ module GHC.CmmToAsm.Reg.Graph.SpillClean (
 ) where
 import GHC.Prelude
 
+import GHC.CmmToAsm.Config
 import GHC.CmmToAsm.Reg.Liveness
 import GHC.CmmToAsm.Format
 import GHC.CmmToAsm.Instr
@@ -49,7 +50,6 @@ import GHC.Builtin.Uniques
 import GHC.Utils.Monad.State.Strict
 import GHC.Utils.Outputable
 import GHC.Utils.Panic
-import GHC.Platform
 import GHC.Cmm.Dataflow.Label
 
 import Data.List (nub, foldl1', find)
@@ -58,6 +58,7 @@ import Data.IntSet              (IntSet)
 import qualified Data.IntSet    as IntSet
 
 
+
 -- | The identification number of a spill slot.
 --   A value is stored in a spill slot when we don't have a free
 --   register to hold it.
@@ -67,23 +68,23 @@ type Slot = Int
 -- | Clean out unneeded spill\/reloads from this top level thing.
 cleanSpills
         :: Instruction instr
-        => Platform
+        => NCGConfig
         -> LiveCmmDecl statics instr
         -> LiveCmmDecl statics instr
 
-cleanSpills platform cmm
-        = evalState (cleanSpin platform 0 cmm) initCleanS
+cleanSpills config cmm
+        = evalState (cleanSpin config 0 cmm) initCleanS
 
 
 -- | Do one pass of cleaning.
 cleanSpin
         :: Instruction instr
-        => Platform
+        => NCGConfig
         -> Int                              -- ^ Iteration number for the cleaner.
         -> LiveCmmDecl statics instr        -- ^ Liveness annotated code to clean.
         -> CleanM (LiveCmmDecl statics instr)
 
-cleanSpin platform spinCount code
+cleanSpin config spinCount code
  = do
         -- Initialise count of cleaned spill and reload instructions.
         modify $ \s -> s
@@ -91,7 +92,7 @@ cleanSpin platform spinCount code
                 , sCleanedReloadsAcc    = 0
                 , sReloadedBy           = emptyUFM }
 
-        code_forward    <- mapBlockTopM (cleanBlockForward platform) code
+        code_forward    <- mapBlockTopM (cleanBlockForward config) code
         code_backward   <- cleanTopBackward code_forward
 
         -- During the cleaning of each block we collected information about
@@ -113,7 +114,7 @@ cleanSpin platform spinCount code
            then return code
 
         -- otherwise go around again
-           else cleanSpin platform (spinCount + 1) code_backward
+           else cleanSpin config (spinCount + 1) code_backward
 
 
 -------------------------------------------------------------------------------
@@ -121,11 +122,11 @@ cleanSpin platform spinCount code
 --   while walking forward over the code.
 cleanBlockForward
         :: Instruction instr
-        => Platform
+        => NCGConfig
         -> LiveBasicBlock instr
         -> CleanM (LiveBasicBlock instr)
 
-cleanBlockForward platform (BasicBlock blockId instrs)
+cleanBlockForward config (BasicBlock blockId instrs)
  = do
         -- See if we have a valid association for the entry to this block.
         jumpValid       <- gets sJumpValid
@@ -133,7 +134,7 @@ cleanBlockForward platform (BasicBlock blockId instrs)
                                 Just assoc      -> assoc
                                 Nothing         -> emptyAssoc
 
-        instrs_reload   <- cleanForward platform blockId assoc [] instrs
+        instrs_reload   <- cleanForward config blockId assoc [] instrs
         return  $ BasicBlock blockId instrs_reload
 
 
@@ -146,7 +147,7 @@ cleanBlockForward platform (BasicBlock blockId instrs)
 --
 cleanForward
         :: Instruction instr
-        => Platform
+        => NCGConfig
         -> BlockId                  -- ^ the block that we're currently in
         -> Assoc Store              -- ^ two store locations are associated if
                                     --     they have the same value
@@ -159,24 +160,23 @@ cleanForward _ _ _ acc []
 
 -- Rewrite live range joins via spill slots to just a spill and a reg-reg move
 -- hopefully the spill will be also be cleaned in the next pass
-cleanForward platform blockId assoc acc (li1 : li2 : instrs)
-
+cleanForward config blockId assoc acc (li1 : li2 : instrs)
         | LiveInstr (SPILL  reg1  slot1) _ <- li1
         , LiveInstr (RELOAD slot2 reg2)  _ <- li2
         , slot1 == slot2
         = do
                 modify $ \s -> s { sCleanedReloadsAcc = sCleanedReloadsAcc s + 1 }
-                cleanForward platform blockId assoc acc
-                 $ li1 : LiveInstr (mkRegRegMoveInstr platform (regFormatFormat reg2) (regFormatReg reg1) (regFormatReg reg2)) Nothing
+                cleanForward config blockId assoc acc
+                 $ li1 : LiveInstr (mkRegRegMoveInstr config (regFormatFormat reg2) (regFormatReg reg1) (regFormatReg reg2)) Nothing
                        : instrs
 
-cleanForward platform blockId assoc acc (li@(LiveInstr i1 _) : instrs)
-        | Just (r1, r2) <- takeRegRegMoveInstr platform i1
+cleanForward config blockId assoc acc (li@(LiveInstr i1 _) : instrs)
+        | Just (r1, r2) <- takeRegRegMoveInstr (ncgPlatform config) i1
         = if r1 == r2
                 -- Erase any left over nop reg reg moves while we're here
                 -- this will also catch any nop moves that the previous case
                 -- happens to add.
-                then cleanForward platform blockId assoc acc instrs
+                then cleanForward config blockId assoc acc instrs
 
                 -- If r1 has the same value as some slots and we copy r1 to r2,
                 --      then r2 is now associated with those slots instead
@@ -184,26 +184,26 @@ cleanForward platform blockId assoc acc (li@(LiveInstr i1 _) : instrs)
                                         $ delAssoc (SReg r2)
                                         $ assoc
 
-                        cleanForward platform blockId assoc' (li : acc) instrs
+                        cleanForward config blockId assoc' (li : acc) instrs
 
 
-cleanForward platform blockId assoc acc (li : instrs)
+cleanForward config blockId assoc acc (li : instrs)
 
         -- Update association due to the spill.
         | LiveInstr (SPILL reg slot) _  <- li
         = let   assoc'  = addAssoc (SReg $ regFormatReg reg)  (SSlot slot)
                         $ delAssoc (SSlot slot)
                         $ assoc
-          in    cleanForward platform blockId assoc' (li : acc) instrs
+          in    cleanForward config blockId assoc' (li : acc) instrs
 
         -- Clean a reload instr.
         | LiveInstr (RELOAD{}) _        <- li
-        = do    (assoc', mli)   <- cleanReload platform blockId assoc li
+        = do    (assoc', mli)   <- cleanReload config blockId assoc li
                 case mli of
-                 Nothing        -> cleanForward platform blockId assoc' acc
+                 Nothing        -> cleanForward config blockId assoc' acc
                                                 instrs
 
-                 Just li'       -> cleanForward platform blockId assoc' (li' : acc)
+                 Just li'       -> cleanForward config blockId assoc' (li' : acc)
                                                 instrs
 
         -- Remember the association over a jump.
@@ -211,26 +211,26 @@ cleanForward platform blockId assoc acc (li : instrs)
         , targets               <- jumpDestsOfInstr instr
         , not $ null targets
         = do    mapM_ (accJumpValid assoc) targets
-                cleanForward platform blockId assoc (li : acc) instrs
+                cleanForward config blockId assoc (li : acc) instrs
 
         -- Writing to a reg changes its value.
         | LiveInstr instr _     <- li
-        , RU _ written          <- regUsageOfInstr platform instr
+        , RU _ written          <- regUsageOfInstr (ncgPlatform config) instr
         = let assoc'    = foldr delAssoc assoc (map SReg $ nub $ map regFormatReg written)
-          in  cleanForward platform blockId assoc' (li : acc) instrs
+          in  cleanForward config blockId assoc' (li : acc) instrs
 
 
 
 -- | Try and rewrite a reload instruction to something more pleasing
 cleanReload
         :: Instruction instr
-        => Platform
+        => NCGConfig
         -> BlockId
         -> Assoc Store
         -> LiveInstr instr
         -> CleanM (Assoc Store, Maybe (LiveInstr instr))
 
-cleanReload platform blockId assoc li@(LiveInstr (RELOAD slot (RegFormat reg fmt)) _)
+cleanReload config blockId assoc li@(LiveInstr (RELOAD slot (RegFormat reg fmt)) _)
 
         -- If the reg we're reloading already has the same value as the slot
         --      then we can erase the instruction outright.
@@ -248,7 +248,7 @@ cleanReload platform blockId assoc li@(LiveInstr (RELOAD slot (RegFormat reg fmt
                                 $ assoc
 
                 return  ( assoc'
-                        , Just $ LiveInstr (mkRegRegMoveInstr platform fmt reg2 reg) Nothing )
+                        , Just $ LiveInstr (mkRegRegMoveInstr config fmt reg2 reg) Nothing )
 
         -- Gotta keep this instr.
         | otherwise


=====================================
compiler/GHC/CmmToAsm/Reg/Linear.hs
=====================================
@@ -440,7 +440,7 @@ raInsn block_live new_instrs id (LiveInstr (Instr instr) (Just live))
                           not (dst `elemUFM` assig),
                           isRealReg src || isInReg src assig -> do
            case src of
-              RegReal rr -> setAssigR (addToUFM assig dst (InReg $ mkRealRegUsage platform rr fmt))
+              RegReal rr -> setAssigR (addToUFM assig dst (InReg $ RealRegUsage rr fmt))
                 -- if src is a fixed reg, then we just map dest to this
                 -- reg in the assignment.  src must be an allocatable reg,
                 -- otherwise it wouldn't be in r_dying.
@@ -509,7 +509,7 @@ genRaInsn block_live new_instrs block_id instr r_dying w_dying = do
     -- only read by this instr.  (the list is typically ~2 elements,
     -- so using nub isn't a problem).
     let virt_read :: [VirtualRegFormat]
-        virt_read = nubOrdOn virtualRegFormatReg [ mkVirtualRegFormat vr fmt | RegFormat (RegVirtual vr) fmt <- read ]
+        virt_read = nubOrdOn virtualRegFormatReg [ VirtualRegFormat vr fmt | RegFormat (RegVirtual vr) fmt <- read ]
 
 --     do
 --         let real_read       = nub [ rr      | (RegReal rr) <- read]
@@ -701,7 +701,8 @@ saveClobberedTemps clobbered dying
      -- See Note [UniqFM and the register allocator]
      clobber :: Unique -> (RegMap Loc,[instr]) -> RealRegUsage -> RegM freeRegs (RegMap Loc,[instr])
      clobber temp (assig,instrs) (RealRegUsage reg fmt)
-       = do platform <- getPlatform
+       = do config <- getConfig
+            platform <- getPlatform
 
             freeRegs <- getFreeRegsR
             let regclass
@@ -719,20 +720,20 @@ saveClobberedTemps clobbered dying
               (my_reg : _) -> do
                   setFreeRegsR (frAllocateReg platform my_reg freeRegs)
 
-                  let new_assign = addToUFM_Directly assig temp (InReg (mkRealRegUsage platform my_reg fmt))
-                  let instr = mkRegRegMoveInstr platform fmt
+                  let new_assign = addToUFM_Directly assig temp (InReg (RealRegUsage my_reg fmt))
+                  let instr = mkRegRegMoveInstr config fmt
                                   (RegReal reg) (RegReal my_reg)
 
                   return (new_assign,(instr : instrs))
 
               -- (2) no free registers: spill the value
               [] -> do
-                  (spill, slot)   <- spillR (mkRegFormat platform (RegReal reg) fmt) temp
+                  (spill, slot)   <- spillR (RegFormat (RegReal reg) fmt) temp
 
                   -- record why this reg was spilled for profiling
                   recordSpill (SpillClobber temp)
 
-                  let new_assign  = addToUFM_Directly assig temp (InBoth (mkRealRegUsage platform reg fmt) slot)
+                  let new_assign  = addToUFM_Directly assig temp (InBoth (RealRegUsage reg fmt) slot)
 
                   return (new_assign, (spill ++ instrs))
 
@@ -891,10 +892,11 @@ allocRegsAndSpill_spill reading keep spills alloc r@(VirtualRegFormat vr fmt) rs
                         = reg
                         | otherwise
                         = first_free
-                spills'   <- loadTemp platform r spill_loc final_reg spills
+
+                spills'   <- loadTemp r spill_loc final_reg spills
 
                 setAssigR $ toRegMap
-                          $ (addToUFM assig vr $! newLocation spill_loc $ mkRealRegUsage platform final_reg fmt)
+                          $ (addToUFM assig vr $! newLocation spill_loc $ RealRegUsage final_reg fmt)
                 setFreeRegsR $  frAllocateReg platform final_reg freeRegs
 
                 allocateRegsAndSpill reading keep spills' (final_reg : alloc) rs
@@ -937,9 +939,9 @@ allocRegsAndSpill_spill reading keep spills alloc r@(VirtualRegFormat vr fmt) rs
                         -- we have a temporary that is in both register and mem,
                         -- just free up its register for use.
                         | (temp, (RealRegUsage my_reg _old_fmt), slot) : _ <- candidates_inBoth
-                        = do    spills' <- loadTemp platform r spill_loc my_reg spills
+                        = do    spills' <- loadTemp r spill_loc my_reg spills
                                 let assig1  = addToUFM_Directly assig temp (InMem slot)
-                                let assig2  = addToUFM assig1 vr $! newLocation spill_loc (mkRealRegUsage platform my_reg fmt)
+                                let assig2  = addToUFM assig1 vr $! newLocation spill_loc (RealRegUsage my_reg fmt)
 
                                 setAssigR $ toRegMap assig2
                                 allocateRegsAndSpill reading keep spills' (my_reg:alloc) rs
@@ -949,18 +951,18 @@ allocRegsAndSpill_spill reading keep spills alloc r@(VirtualRegFormat vr fmt) rs
                         | (temp_to_push_out, RealRegUsage my_reg fmt) : _
                                         <- candidates_inReg
                         = do
-                                (spill_store, slot) <- spillR (mkRegFormat platform (RegReal my_reg) fmt) temp_to_push_out
+                                (spill_store, slot) <- spillR (RegFormat (RegReal my_reg) fmt) temp_to_push_out
 
                                 -- record that this temp was spilled
                                 recordSpill (SpillAlloc temp_to_push_out)
 
                                 -- update the register assignment
                                 let assig1  = addToUFM_Directly assig temp_to_push_out (InMem slot)
-                                let assig2  = addToUFM assig1 vr $! newLocation spill_loc (mkRealRegUsage platform my_reg fmt)
+                                let assig2  = addToUFM assig1 vr $! newLocation spill_loc (RealRegUsage my_reg fmt)
                                 setAssigR $ toRegMap assig2
 
                                 -- if need be, load up a spilled temp into the reg we've just freed up.
-                                spills' <- loadTemp platform r spill_loc my_reg spills
+                                spills' <- loadTemp r spill_loc my_reg spills
 
                                 allocateRegsAndSpill reading keep
                                         (spill_store ++ spills')
@@ -978,6 +980,7 @@ allocRegsAndSpill_spill reading keep spills alloc r@(VirtualRegFormat vr fmt) rs
                                 , text "initFreeRegs:     " <> text (showRegs (frInitFreeRegs platform `asTypeOf` freeRegs))
                                 ]
                         where showRegs = show . map (\reg -> (reg, targetClassOfRealReg platform reg)) . allFreeRegs platform
+
                 result
 
 
@@ -991,18 +994,17 @@ newLocation _ my_reg = InReg my_reg
 -- | Load up a spilled temporary if we need to (read from memory).
 loadTemp
         :: (Instruction instr)
-        => Platform
-        -> VirtualRegFormat   -- the temp being loaded
+        => VirtualRegFormat   -- the temp being loaded
         -> SpillLoc     -- the current location of this temp
         -> RealReg      -- the hreg to load the temp into
         -> [instr]
         -> RegM freeRegs [instr]
 
-loadTemp platform (VirtualRegFormat vreg fmt) (ReadMem slot) hreg spills
+loadTemp (VirtualRegFormat vreg fmt) (ReadMem slot) hreg spills
  = do
-        insn <- loadR (mkRegFormat platform (RegReal hreg) fmt) slot
+        insn <- loadR (RegFormat (RegReal hreg) fmt) slot
         recordSpill (SpillLoad $ getUnique vreg)
         return  $  {- mkComment (text "spill load") : -} insn ++ spills
 
-loadTemp _ _ _ _ spills =
+loadTemp _ _ _ spills =
    return spills


=====================================
compiler/GHC/CmmToAsm/Reg/Linear/Base.hs
=====================================
@@ -12,7 +12,6 @@ module GHC.CmmToAsm.Reg.Linear.Base (
         Loc(..),
         regsOfLoc,
         RealRegUsage(..),
-        mkRealRegUsage,
 
         -- for stats
         SpillReason(..),
@@ -29,7 +28,6 @@ import GHC.Prelude
 import GHC.CmmToAsm.Reg.Linear.StackMap
 import GHC.CmmToAsm.Reg.Liveness
 import GHC.CmmToAsm.Config
-import GHC.Platform
 import GHC.Platform.Reg
 
 import GHC.Utils.Outputable
@@ -40,9 +38,6 @@ import GHC.Cmm.BlockId
 import GHC.Cmm.Dataflow.Label
 import GHC.CmmToAsm.Reg.Utils
 import GHC.CmmToAsm.Format
-import GHC.Platform.Reg.Class
-import GHC.Utils.Panic
-import GHC.CmmToAsm.Reg.Target (targetClassOfRealReg)
 
 import Data.Function ( on )
 
@@ -114,40 +109,28 @@ data Loc
                    {-# UNPACK #-} !StackSlot
         deriving (Eq, Ord, Show)
 
+instance Outputable Loc where
+        ppr l = text (show l)
+
+-- | A 'RealReg', together with the specific 'Format' it is used at.
 data RealRegUsage
   = RealRegUsage
     { realReg :: !RealReg
     , realRegFormat :: !Format
     } deriving Show
 
+instance Outputable RealRegUsage where
+  ppr (RealRegUsage r fmt) = ppr r <> dcolon <+> ppr fmt
+
+-- NB: these instances only compare the underlying 'RealReg', as that is what
+-- is important for register allocation.
+--
+-- (It would nonetheless be a good idea to remove these instances.)
 instance Eq RealRegUsage where
   (==) = (==) `on` realReg
 instance Ord RealRegUsage where
   compare = compare `on` realReg
 
-mkRealRegUsage :: Platform -> RealReg -> Format -> RealRegUsage
-mkRealRegUsage platform reg fmt
-  = assertPpr (regCls == fmtCls)
-    (vcat [ text "mkRealRegUsage: incompatible register & format"
-          , text "reg:" <+> ppr reg <+> dcolon <+> ppr regCls
-          , text "fmt:" <+> ppr fmt <+> parens (ppr fmtCls) ])
-  $ RealRegUsage reg fmt
-  where
-    regCls = targetClassOfRealReg platform reg
-    fmtCls = formatRegClass fmt
-

--- TODO: SIMD debugging
-formatRegClass :: Format -> RegClass
-formatRegClass fmt
-  | isIntFormat fmt
-  = RcInteger
-  | otherwise
-  = RcFloatOrVector
-
-instance Outputable Loc where
-        ppr l = text (show l)
-
-
 -- | Get the reg numbers stored in this Loc.
 regsOfLoc :: Loc -> [RealRegUsage]
 regsOfLoc (InReg r)    = [r]


=====================================
compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs
=====================================
@@ -361,12 +361,10 @@ makeMove
 
 makeMove delta vreg src dst
  = do config <- getConfig
-      let platform = ncgPlatform config
-
       case (src, dst) of
           (InReg (RealRegUsage s _), InReg (RealRegUsage d fmt)) ->
               do recordSpill (SpillJoinRR vreg)
-                 return $ [mkRegRegMoveInstr platform fmt (RegReal s) (RegReal d)]
+                 return $ [mkRegRegMoveInstr config fmt (RegReal s) (RegReal d)]
           (InMem s, InReg (RealRegUsage d cls)) ->
               do recordSpill (SpillJoinRM vreg)
                  return $ mkLoadInstr config (RegFormat (RegReal d) cls) delta s


=====================================
compiler/GHC/CmmToAsm/Reg/Target.hs
=====================================
@@ -15,7 +15,7 @@ module GHC.CmmToAsm.Reg.Target (
         targetMkVirtualReg,
         targetRegDotColor,
         targetClassOfReg,
-        mkVirtualRegFormat, mkRegFormat, mapRegFormatSet,
+        mapRegFormatSet,
 )
 
 where
@@ -142,35 +142,5 @@ targetClassOfReg platform reg
    RegVirtual vr -> classOfVirtualReg vr
    RegReal rr -> targetClassOfRealReg platform rr
 
-mkVirtualRegFormat :: HasDebugCallStack => VirtualReg -> Format -> VirtualRegFormat
-mkVirtualRegFormat reg fmt
-  = assertPpr (regCls == fmtCls)
-    (vcat [ text "mkVirtualRegFormat: incompatible register & format"
-          , text "reg:" <+> ppr reg <+> dcolon <+> ppr regCls
-          , text "fmt:" <+> ppr fmt <+> parens (ppr fmtCls) ])
-  $ VirtualRegFormat reg fmt
-  where
-    regCls = classOfVirtualReg reg
-    fmtCls = formatRegClass fmt
-
-mkRegFormat :: HasDebugCallStack => Platform -> Reg -> Format -> RegFormat
-mkRegFormat platform reg fmt
-  = assertPpr (regCls == fmtCls)
-    (vcat [ text "mkRegFormat: incompatible register & format"
-          , text "reg:" <+> ppr reg <+> dcolon <+> ppr regCls
-          , text "fmt:" <+> ppr fmt <+> parens (ppr fmtCls) ])
-  $ RegFormat reg fmt
-  where
-    regCls = targetClassOfReg platform reg
-    fmtCls = formatRegClass fmt
-
--- TODO: SIMD debugging
-formatRegClass :: Format -> RegClass
-formatRegClass fmt
-  | isIntFormat fmt
-  = RcInteger
-  | otherwise
-  = RcFloatOrVector
-
 mapRegFormatSet :: HasDebugCallStack => (Reg -> Reg) -> UniqSet RegFormat -> UniqSet RegFormat
 mapRegFormatSet f = mapUniqSet (\ ( RegFormat r fmt ) -> RegFormat ( f r ) fmt)


=====================================
compiler/GHC/CmmToAsm/X86/CodeGen.hs
=====================================
@@ -35,7 +35,6 @@ import GHC.CmmToAsm.X86.Cond
 import GHC.CmmToAsm.X86.Regs
 import GHC.CmmToAsm.X86.Ppr
 import GHC.CmmToAsm.X86.RegInfo
-import GHC.CmmToAsm.Reg.Target
 
 import GHC.Platform.Regs
 import GHC.CmmToAsm.CPrim
@@ -90,7 +89,6 @@ import Data.Maybe
 import Data.Word
 
 import qualified Data.Map as M
-import GHC.Platform.Reg.Class (RegClass(..))
 
 is32BitPlatform :: NatM Bool
 is32BitPlatform = do
@@ -522,44 +520,24 @@ getSomeReg expr = do
     Fixed _ reg code ->
         return (reg, code)
 
-
-mkMOV :: HasDebugCallStack => Platform -> Format -> Operand -> Operand -> Instr
-mkMOV platform fmt op1 op2 =
-  assertPpr (all (== fmtCls) $ catMaybes [cls1, cls2])
-    (vcat [ text "invalid mkMOV instruction"
-          , text "fmt:" <+> ppr fmt
-          , case op1 of { OpReg r1 -> ppr r1 <+> dcolon <+> ppr (fromJust cls1); _ -> empty }
-          , case op2 of { OpReg r2 -> ppr r2 <+> dcolon <+> ppr (fromJust cls2); _ -> empty }
-          ])
-    $ MOV fmt op1 op2
-
-  where
-    fmtCls = if isIntFormat fmt then RcInteger else RcFloatOrVector
-    cls1 = case op1 of { OpReg r1 -> Just (targetClassOfReg platform r1); _ -> Nothing }
-    cls2 = case op2 of { OpReg r2 -> Just (targetClassOfReg platform r2); _ -> Nothing }
-
-
-
 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 assignMem_I64Code addrTree valueTree = do
-  platform <- getPlatform
   Amode addr addr_code <- getAmode addrTree
   RegCode64 vcode rhi rlo <- iselExpr64 valueTree
   let
         -- Little-endian store
-        mov_lo = mkMOV platform II32 (OpReg rlo) (OpAddr addr)
-        mov_hi = mkMOV platform II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
+        mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
+        mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
   return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 
 
 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 assignReg_I64Code (CmmLocal dst) valueTree = do
-   platform <- getPlatform
    RegCode64 vcode r_src_hi r_src_lo <- iselExpr64 valueTree
    let
          Reg64 r_dst_hi r_dst_lo = localReg64 dst
-         mov_lo = mkMOV platform II32 (OpReg r_src_lo) (OpReg r_dst_lo)
-         mov_hi = mkMOV platform II32 (OpReg r_src_hi) (OpReg r_dst_hi)
+         mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
+         mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
    return (
         vcode `snocOL` mov_lo `snocOL` mov_hi
      )
@@ -569,24 +547,22 @@ assignReg_I64Code _ _
 
 iselExpr64 :: HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
 iselExpr64 (CmmLit (CmmInt i _)) = do
-  platform <- getPlatform
   Reg64 rhi rlo <- getNewReg64
   let
         r = fromIntegral (fromIntegral i :: Word32)
         q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
         code = toOL [
-                mkMOV platform II32 (OpImm (ImmInteger r)) (OpReg rlo),
-                mkMOV platform II32 (OpImm (ImmInteger q)) (OpReg rhi)
+                MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
+                MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
                 ]
   return (RegCode64 code rhi rlo)
 
 iselExpr64 (CmmLoad addrTree ty _) | isWord64 ty = do
-   platform <- getPlatform
    Amode addr addr_code <- getAmode addrTree
    Reg64 rhi rlo <- getNewReg64
    let
-        mov_lo = mkMOV platform II32 (OpAddr addr) (OpReg rlo)
-        mov_hi = mkMOV platform II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
+        mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
+        mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
    return (
             RegCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) rhi rlo
      )
@@ -596,44 +572,41 @@ iselExpr64 (CmmReg (CmmLocal local_reg)) = do
   return (RegCode64 nilOL hi lo)
 
 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
-   platform <- getPlatform
    RegCode64 code1 r1hi r1lo <- iselExpr64 e1
    Reg64 rhi rlo <- getNewReg64
    let
         r = fromIntegral (fromIntegral i :: Word32)
         q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
         code =  code1 `appOL`
-                toOL [ mkMOV platform II32 (OpReg r1lo) (OpReg rlo),
+                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
                        ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
-                       mkMOV platform II32 (OpReg r1hi) (OpReg rhi),
+                       MOV II32 (OpReg r1hi) (OpReg rhi),
                        ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
    return (RegCode64 code rhi rlo)
 
 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
-   platform <- getPlatform
    RegCode64 code1 r1hi r1lo <- iselExpr64 e1
    RegCode64 code2 r2hi r2lo <- iselExpr64 e2
    Reg64 rhi rlo <- getNewReg64
    let
         code =  code1 `appOL`
                 code2 `appOL`
-                toOL [ mkMOV platform II32 (OpReg r1lo) (OpReg rlo),
+                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
                        ADD II32 (OpReg r2lo) (OpReg rlo),
-                       mkMOV platform II32 (OpReg r1hi) (OpReg rhi),
+                       MOV II32 (OpReg r1hi) (OpReg rhi),
                        ADC II32 (OpReg r2hi) (OpReg rhi) ]
    return (RegCode64 code rhi rlo)
 
 iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
-   platform <- getPlatform
    RegCode64 code1 r1hi r1lo <- iselExpr64 e1
    RegCode64 code2 r2hi r2lo <- iselExpr64 e2
    Reg64 rhi rlo <- getNewReg64
    let
         code =  code1 `appOL`
                 code2 `appOL`
-                toOL [ mkMOV platform II32 (OpReg r1lo) (OpReg rlo),
+                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
                        SUB II32 (OpReg r2lo) (OpReg rlo),
-                       mkMOV platform II32 (OpReg r1hi) (OpReg rhi),
+                       MOV II32 (OpReg r1hi) (OpReg rhi),
                        SBB II32 (OpReg r2hi) (OpReg rhi) ]
    return (RegCode64 code rhi rlo)
 
@@ -666,48 +639,44 @@ iselExpr64 (CmmMachOp (MO_UU_Conv W8 W64) [expr]) = do
                           r_dst_lo
 
 iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) = do
-     platform <- getPlatform
      code <- getAnyReg expr
      Reg64 r_dst_hi r_dst_lo <- getNewReg64
      return $ RegCode64 (code r_dst_lo `snocOL`
-                          mkMOV platform II32 (OpReg r_dst_lo) (OpReg eax) `snocOL`
+                          MOV II32 (OpReg r_dst_lo) (OpReg eax) `snocOL`
                           CLTD II32 `snocOL`
-                          mkMOV platform II32 (OpReg eax) (OpReg r_dst_lo) `snocOL`
-                          mkMOV platform II32 (OpReg edx) (OpReg r_dst_hi))
+                          MOV II32 (OpReg eax) (OpReg r_dst_lo) `snocOL`
+                          MOV II32 (OpReg edx) (OpReg r_dst_hi))
                           r_dst_hi
                           r_dst_lo
 
 iselExpr64 (CmmMachOp (MO_SS_Conv W16 W64) [expr]) = do
-     platform <- getPlatform
      (r, code) <- getByteReg expr
      Reg64 r_dst_hi r_dst_lo <- getNewReg64
      return $ RegCode64 (code `appOL` toOL [
                           MOVSxL II16 (OpReg r) (OpReg eax),
                           CLTD II32,
-                          mkMOV platform II32 (OpReg eax) (OpReg r_dst_lo),
-                          mkMOV platform II32 (OpReg edx) (OpReg r_dst_hi)])
+                          MOV II32 (OpReg eax) (OpReg r_dst_lo),
+                          MOV II32 (OpReg edx) (OpReg r_dst_hi)])
                           r_dst_hi
                           r_dst_lo
 
 iselExpr64 (CmmMachOp (MO_SS_Conv W8 W64) [expr]) = do
-     platform <- getPlatform
      (r, code) <- getByteReg expr
      Reg64 r_dst_hi r_dst_lo <- getNewReg64
      return $ RegCode64 (code `appOL` toOL [
                           MOVSxL II8 (OpReg r) (OpReg eax),
                           CLTD II32,
-                          mkMOV platform II32 (OpReg eax) (OpReg r_dst_lo),
-                          mkMOV platform II32 (OpReg edx) (OpReg r_dst_hi)])
+                          MOV II32 (OpReg eax) (OpReg r_dst_lo),
+                          MOV II32 (OpReg edx) (OpReg r_dst_hi)])
                           r_dst_hi
                           r_dst_lo
 
 iselExpr64 (CmmMachOp (MO_S_Neg _) [expr]) = do
-   platform <- getPlatform
    RegCode64 code rhi rlo <- iselExpr64 expr
    Reg64 rohi rolo <- getNewReg64
    let
         ocode = code `appOL`
-                toOL [ mkMOV platform II32 (OpReg rlo) (OpReg rolo),
+                toOL [ MOV II32 (OpReg rlo) (OpReg rolo),
                        XOR II32 (OpReg rohi) (OpReg rohi),
                        NEGI II32 (OpReg rolo),
                        SBB II32 (OpReg rhi) (OpReg rohi) ]
@@ -723,7 +692,6 @@ iselExpr64 (CmmMachOp (MO_S_Neg _) [expr]) = do
 -- Note that @(r1hi * r2hi) << 64@ can be dropped because it overflows completely.
 
 iselExpr64 (CmmMachOp (MO_Mul _) [e1,e2]) = do
-   platform <- getPlatform
    RegCode64 code1 r1hi r1lo <- iselExpr64 e1
    RegCode64 code2 r2hi r2lo <- iselExpr64 e2
    Reg64 rhi rlo <- getNewReg64
@@ -731,27 +699,26 @@ iselExpr64 (CmmMachOp (MO_Mul _) [e1,e2]) = do
    let
         code =  code1 `appOL`
                 code2 `appOL`
-                toOL [ mkMOV platform  II32 (OpReg r1lo) (OpReg eax),
-                       mkMOV platform  II32 (OpReg r2lo) (OpReg tmp),
-                       mkMOV platform  II32 (OpReg r1hi) (OpReg rhi),
+                toOL [ MOV  II32 (OpReg r1lo) (OpReg eax),
+                       MOV  II32 (OpReg r2lo) (OpReg tmp),
+                       MOV  II32 (OpReg r1hi) (OpReg rhi),
                        IMUL II32 (OpReg tmp) (OpReg rhi),
-                       mkMOV platform  II32 (OpReg r2hi) (OpReg rlo),
+                       MOV  II32 (OpReg r2hi) (OpReg rlo),
                        IMUL II32 (OpReg eax) (OpReg rlo),
                        ADD  II32 (OpReg rlo) (OpReg rhi),
                        MUL2 II32 (OpReg tmp),
                        ADD  II32 (OpReg edx) (OpReg rhi),
-                       mkMOV platform  II32 (OpReg eax) (OpReg rlo)
+                       MOV  II32 (OpReg eax) (OpReg rlo)
                      ]
    return (RegCode64 code rhi rlo)
 
 iselExpr64 (CmmMachOp (MO_S_MulMayOflo W64) _) = do
-   platform <- getPlatform
    -- Performance sensitive users won't use 32 bit so let's keep it simple:
    -- We always return a (usually false) positive.
    Reg64 rhi rlo <- getNewReg64
    let code = toOL   [
-                       mkMOV platform  II32 (OpImm (ImmInt 1)) (OpReg rhi),
-                       mkMOV platform  II32 (OpImm (ImmInt 1)) (OpReg rlo)
+                       MOV  II32 (OpImm (ImmInt 1)) (OpReg rhi),
+                       MOV  II32 (OpImm (ImmInt 1)) (OpReg rlo)
                      ]
    return (RegCode64 code rhi rlo)
 
@@ -765,7 +732,6 @@ iselExpr64 (CmmMachOp (MO_S_MulMayOflo W64) _) = do
 -- the contents of @rlo@ to @rhi@ and clear @rlo at .
 
 iselExpr64 (CmmMachOp (MO_Shl _) [e1,e2]) = do
-   platform <- getPlatform
    RegCode64 code1 r1hi r1lo <- iselExpr64 e1
    code2 <- getAnyReg e2
    Reg64 rhi rlo <- getNewReg64
@@ -774,15 +740,15 @@ iselExpr64 (CmmMachOp (MO_Shl _) [e1,e2]) = do
    let
         code =  code1 `appOL`
                 code2 ecx `appOL`
-                toOL [ mkMOV platform II32 (OpReg r1lo) (OpReg rlo),
-                       mkMOV platform II32 (OpReg r1hi) (OpReg rhi),
+                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
+                       MOV II32 (OpReg r1hi) (OpReg rhi),
                        SHLD II32 (OpReg ecx) (OpReg rlo) (OpReg rhi),
                        SHL II32 (OpReg ecx) (OpReg rlo),
                        TEST II32 (OpImm (ImmInt 32)) (OpReg ecx),
                        JXX EQQ lbl2,
                        JXX ALWAYS lbl1,
                        NEWBLOCK lbl1,
-                       mkMOV platform II32 (OpReg rlo) (OpReg rhi),
+                       MOV II32 (OpReg rlo) (OpReg rhi),
                        XOR II32 (OpReg rlo) (OpReg rlo),
                        JXX ALWAYS lbl2,
                        NEWBLOCK lbl2
@@ -796,7 +762,6 @@ iselExpr64 (CmmMachOp (MO_Shl _) [e1,e2]) = do
 -- To accomplish that we shift @rhi@ by 31.
 
 iselExpr64 (CmmMachOp (MO_S_Shr _) [e1,e2]) = do
-   platform <- getPlatform
    RegCode64 code1 r1hi r1lo <- iselExpr64 e1
    (r2, code2) <- getSomeReg e2
    Reg64 rhi rlo <- getNewReg64
@@ -805,16 +770,16 @@ iselExpr64 (CmmMachOp (MO_S_Shr _) [e1,e2]) = do
    let
         code =  code1 `appOL`
                 code2 `appOL`
-                toOL [ mkMOV platform II32 (OpReg r1lo) (OpReg rlo),
-                       mkMOV platform II32 (OpReg r1hi) (OpReg rhi),
-                       mkMOV platform II32 (OpReg r2) (OpReg ecx),
+                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
+                       MOV II32 (OpReg r1hi) (OpReg rhi),
+                       MOV II32 (OpReg r2) (OpReg ecx),
                        SHRD II32 (OpReg ecx) (OpReg rhi) (OpReg rlo),
                        SAR II32 (OpReg ecx) (OpReg rhi),
                        TEST II32 (OpImm (ImmInt 32)) (OpReg ecx),
                        JXX EQQ lbl2,
                        JXX ALWAYS lbl1,
                        NEWBLOCK lbl1,
-                       mkMOV platform II32 (OpReg rhi) (OpReg rlo),
+                       MOV II32 (OpReg rhi) (OpReg rlo),
                        SAR II32 (OpImm (ImmInt 31)) (OpReg rhi),
                        JXX ALWAYS lbl2,
                        NEWBLOCK lbl2
@@ -824,7 +789,6 @@ iselExpr64 (CmmMachOp (MO_S_Shr _) [e1,e2]) = do
 -- Similar to the above.
 
 iselExpr64 (CmmMachOp (MO_U_Shr _) [e1,e2]) = do
-   platform <- getPlatform
    RegCode64 code1 r1hi r1lo <- iselExpr64 e1
    (r2, code2) <- getSomeReg e2
    Reg64 rhi rlo <- getNewReg64
@@ -833,16 +797,16 @@ iselExpr64 (CmmMachOp (MO_U_Shr _) [e1,e2]) = do
    let
         code =  code1 `appOL`
                 code2 `appOL`
-                toOL [ mkMOV platform II32 (OpReg r1lo) (OpReg rlo),
-                       mkMOV platform II32 (OpReg r1hi) (OpReg rhi),
-                       mkMOV platform II32 (OpReg r2) (OpReg ecx),
+                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
+                       MOV II32 (OpReg r1hi) (OpReg rhi),
+                       MOV II32 (OpReg r2) (OpReg ecx),
                        SHRD II32 (OpReg ecx) (OpReg rhi) (OpReg rlo),
                        SHR II32 (OpReg ecx) (OpReg rhi),
                        TEST II32 (OpImm (ImmInt 32)) (OpReg ecx),
                        JXX EQQ lbl2,
                        JXX ALWAYS lbl1,
                        NEWBLOCK lbl1,
-                       mkMOV platform II32 (OpReg rhi) (OpReg rlo),
+                       MOV II32 (OpReg rhi) (OpReg rlo),
                        XOR II32 (OpReg rhi) (OpReg rhi),
                        JXX ALWAYS lbl2,
                        NEWBLOCK lbl2
@@ -854,13 +818,12 @@ iselExpr64 (CmmMachOp (MO_Or  _) [e1,e2]) = iselExpr64ParallelBin OR  e1 e2
 iselExpr64 (CmmMachOp (MO_Xor _) [e1,e2]) = iselExpr64ParallelBin XOR e1 e2
 
 iselExpr64 (CmmMachOp (MO_Not _) [e1]) = do
-   platform <- getPlatform
    RegCode64 code1 r1hi r1lo <- iselExpr64 e1
    Reg64 rhi rlo <- getNewReg64
    let
         code =  code1 `appOL`
-                toOL [ mkMOV platform II32 (OpReg r1lo) (OpReg rlo),
-                       mkMOV platform II32 (OpReg r1hi) (OpReg rhi),
+                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
+                       MOV II32 (OpReg r1hi) (OpReg rhi),
                        NOT II32 (OpReg rlo),
                        NOT II32 (OpReg rhi)
                      ]
@@ -876,15 +839,14 @@ iselExpr64 expr
 iselExpr64ParallelBin :: (Format -> Operand -> Operand -> Instr)
                       -> CmmExpr -> CmmExpr -> NatM (RegCode64 (OrdList Instr))
 iselExpr64ParallelBin op e1 e2 = do
-   platform <- getPlatform
    RegCode64 code1 r1hi r1lo <- iselExpr64 e1
    RegCode64 code2 r2hi r2lo <- iselExpr64 e2
    Reg64 rhi rlo <- getNewReg64
    let
         code =  code1 `appOL`
                 code2 `appOL`
-                toOL [ mkMOV platform II32 (OpReg r1lo) (OpReg rlo),
-                       mkMOV platform II32 (OpReg r1hi) (OpReg rhi),
+                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
+                       MOV II32 (OpReg r1hi) (OpReg rhi),
                        op  II32 (OpReg r2lo) (OpReg rlo),
                        op  II32 (OpReg r2hi) (OpReg rhi)
                      ]
@@ -1033,8 +995,7 @@ getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _ _])
 
 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _ _])
  | not is32Bit = do
-  platform <- getPlatform
-  code <- intLoadCode (mkMOV platform II32) addr -- 32-bit loads zero-extend
+  code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
   return (Any II64 code)
 
 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _ _])
@@ -1101,16 +1062,16 @@ getRegister' platform is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
       MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
       MO_SS_Conv W8  W16 -> integerExtend W8  W16 MOVSxL x
 
-      -- We don't care about the upper bits for MO_XX_Conv, so mkMOV platform is enough. However, on 32-bit we
+      -- We don't care about the upper bits for MO_XX_Conv, so MOV is enough. However, on 32-bit we
       -- have 8-bit registers only for a few registers (as opposed to x86-64 where every register
       -- has 8-bit version). So for 32-bit code, we'll just zero-extend.
       MO_XX_Conv W8  W32
           | is32Bit   -> integerExtend W8 W32 MOVZxL x
-          | otherwise -> integerExtend W8 W32 (mkMOV platform) x
+          | otherwise -> integerExtend W8 W32 (MOV) x
       MO_XX_Conv W8  W16
           | is32Bit   -> integerExtend W8 W16 MOVZxL x
-          | otherwise -> integerExtend W8 W16 (mkMOV platform) x
-      MO_XX_Conv W16 W32 -> integerExtend W16 W32 (mkMOV platform) x
+          | otherwise -> integerExtend W8 W16 (MOV) x
+      MO_XX_Conv W16 W32 -> integerExtend W16 W32 (MOV) x
 
       MO_UU_Conv W8  W64 | not is32Bit -> integerExtend W8  W64 MOVZxL x
       MO_UU_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVZxL x
@@ -1123,10 +1084,10 @@ getRegister' platform is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
       -- away as an unnecessary reg-to-reg move, so we keep it in
       -- the form of a movzl and print it as a movl later.
       -- This doesn't apply to MO_XX_Conv since in this case we don't care about
-      -- the upper bits. So we can just use mkMOV platform.
-      MO_XX_Conv W8  W64 | not is32Bit -> integerExtend W8  W64 (mkMOV platform) x
-      MO_XX_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 (mkMOV platform) x
-      MO_XX_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 (mkMOV platform) x
+      -- the upper bits. So we can just use MOV.
+      MO_XX_Conv W8  W64 | not is32Bit -> integerExtend W8  W64 (MOV) x
+      MO_XX_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 (MOV) x
+      MO_XX_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 (MOV) x
 
       MO_FF_Conv W32 W64 -> coerceFP2FP W64 x
       MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
@@ -1350,10 +1311,7 @@ getRegister' platform is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
           (reg, exp) <- getSomeReg expr
           let fmt = VecFormat len FmtInt64
           return $ Any fmt (\dst -> exp `snocOL`
-                                    (mkMOV platform II64 (OpReg reg) (OpReg dst)) `snocOL`
-                                    (PUNPCKLQDQ fmt (OpReg dst) dst) `snocOL`
-                                    (PUNPCKLQDQ fmt (OpReg dst) dst) `snocOL`
-                                    (PUNPCKLQDQ fmt (OpReg dst) dst) `snocOL`
+                                    (MOV2 II64 fmt (OpReg reg) (OpReg dst)) `snocOL`
                                     (PUNPCKLQDQ fmt (OpReg dst) dst)
                                     )
         vector_int_broadcast _ _ c
@@ -1745,7 +1703,7 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
           imm      = litToImm lit
           code dst
             = case lit of
-                CmmInt 0 _ -> exp `snocOL` (mkMOV platform FF32 (OpReg r) (OpReg dst))
+                CmmInt 0 _ -> exp `snocOL` (MOV FF32 (OpReg r) (OpReg dst))
                 CmmInt _ _ -> exp `snocOL` (VPSHUFD format imm (OpReg r) dst)
                 _          -> panic "Error in offset while unpacking"
       return (Any format code)
@@ -1756,7 +1714,7 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
           code dst
             = case lit of
                 CmmInt 0 _ -> exp `snocOL`
-                              (mkMOV platform FF64 (OpReg r) (OpReg dst))
+                              (MOV FF64 (OpReg r) (OpReg dst))
                 CmmInt 1 _ -> exp `snocOL`
                               (MOVHLPS format (OpReg r) dst)
                 _          -> panic "Error in offset while unpacking"
@@ -1798,10 +1756,10 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
       let code dst
             = case lit of
                 CmmInt 0 _ -> exp `snocOL`
-                              (mkMOV platform II64 (OpReg r) (OpReg dst))
+                              (MOV2 fmt II64 (OpReg r) (OpReg dst))
                 CmmInt 1 _ -> exp `snocOL`
                               (MOVHLPS fmt (OpReg r) tmp) `snocOL`
-                              (mkMOV platform II64 (OpReg tmp) (OpReg dst))
+                              (MOV2 fmt II64 (OpReg tmp) (OpReg dst))
                 _          -> panic "Error in offset while unpacking"
       return (Any fmt code)
     vector_int_unpack_sse _ w c e
@@ -1942,11 +1900,11 @@ getRegister' platform _is32Bit (CmmMachOp mop [x, y, z]) = do -- ternary MachOps
               = case offset of
                   CmmInt 0 _ -> valExp `appOL`
                                 vecExp `snocOL`
-                                (mkMOV platform FF64 (OpReg valReg) (OpReg dst)) `snocOL`
+                                (MOV FF64 (OpReg valReg) (OpReg dst)) `snocOL`
                                 (SHUFPD fmt (ImmInt 0b00) (OpReg vecReg) dst)
                   CmmInt 1 _ -> valExp `appOL`
                                 vecExp `snocOL`
-                                (mkMOV platform FF64 (OpReg vecReg) (OpReg dst)) `snocOL`
+                                (MOV FF64 (OpReg vecReg) (OpReg dst)) `snocOL`
                                 (SHUFPD fmt (ImmInt 0b00) (OpReg valReg) dst)
                   _ -> pprPanic "MO_VF_Insert DoubleX2: unsupported offset" (ppr offset)
          in return $ Any fmt code
@@ -1972,23 +1930,21 @@ getRegister' platform _is32Bit (CmmMachOp mop [x, y, z]) = do -- ternary MachOps
     -- Int64X2
     vector_int_insert_sse len at 2 W64 vecExpr valExpr (CmmLit offset)
       = do
-        pprTraceM "vecExpr:" (pdoc platform vecExpr)
         (valReg, valExp) <- getSomeReg valExpr
         (vecReg, vecExp) <- getSomeReg vecExpr
         let fmt = VecFormat len FmtInt64
         tmp <- getNewRegNat fmt
-        pprTraceM "tmp:" (ppr tmp)
         let code dst
               = case offset of
                   CmmInt 0 _ -> valExp `appOL`
                                 vecExp `snocOL`
                                 (MOVHLPS fmt (OpReg vecReg) tmp) `snocOL`
-                                (mkMOV platform II64 (OpReg valReg) (OpReg dst)) `snocOL`
+                                (MOV2 II64 fmt (OpReg valReg) (OpReg dst)) `snocOL`
                                 (PUNPCKLQDQ fmt (OpReg tmp) dst)
                   CmmInt 1 _ -> valExp `appOL`
                                 vecExp `snocOL`
-                                (mkMOV platform fmt (OpReg vecReg) (OpReg dst)) `snocOL`
-                                (mkMOV platform II64 (OpReg valReg) (OpReg tmp)) `snocOL`
+                                (MOV fmt (OpReg vecReg) (OpReg dst)) `snocOL`
+                                (MOV2 II64 fmt (OpReg valReg) (OpReg tmp)) `snocOL`
                                 (PUNPCKLQDQ fmt (OpReg tmp) dst)
                   _ -> pprPanic "MO_V_Insert Int64X2: unsupported offset" (ppr offset)
          in return $ Any fmt code
@@ -2024,11 +1980,10 @@ getRegister' _ _ (CmmLoad mem pk _)
 getRegister' _ is32Bit (CmmLoad mem pk _)
   | is32Bit && not (isWord64 pk)
   = do
-    platform <- getPlatform
     let
       instr = case width of
                 W8     -> MOVZxL II8
-                _other -> mkMOV platform format
+                _other -> MOV format
     code <- intLoadCode instr mem
     return (Any format code)
   where
@@ -2044,8 +1999,7 @@ getRegister' _ is32Bit (CmmLoad mem pk _)
 getRegister' _ is32Bit (CmmLoad mem pk _)
  | not is32Bit
   = do
-    platform <- getPlatform
-    code <- intLoadCode (mkMOV platform format) mem
+    code <- intLoadCode (MOV format) mem
     return (Any format code)
   where format = intFormat $ typeWidth pk
 
@@ -2086,7 +2040,7 @@ getRegister' platform is32Bit (CmmLit lit)
   | not is32Bit, isWord64 (cmmLitType platform lit), not (isBigLit lit)
   = let
         imm = litToImm lit
-        code dst = unitOL (mkMOV platform II32 (OpImm imm) (OpReg dst))
+        code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
     in
         return (Any II64 code)
   where
@@ -2115,7 +2069,7 @@ getRegister' platform _ (CmmLit lit)
       = do
       let format = cmmTypeFormat ctype
           imm = litToImm lit
-          code dst = unitOL (mkMOV platform format (OpImm imm) (OpReg dst))
+          code dst = unitOL (MOV format (OpImm imm) (OpReg dst))
       return (Any format code)
 
 getRegister' platform _ other
@@ -2138,15 +2092,15 @@ getAnyReg expr = do
 anyReg :: HasDebugCallStack => Register -> NatM (Reg -> InstrBlock)
 anyReg (Any _ code)          = return code
 anyReg (Fixed rep reg fcode) = do
-  platform <- getPlatform
-  return (\dst -> fcode `snocOL` mkRegRegMoveInstr platform rep reg dst)
+  config <- getConfig
+  return (\dst -> fcode `snocOL` mkRegRegMoveInstr config rep reg dst)
 
 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
 -- Fixed registers might not be byte-addressable, so we make sure we've
 -- got a temporary, inserting an extra reg copy if necessary.
 getByteReg :: HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
 getByteReg expr = do
-  platform <- getPlatform
+  config <- getConfig
   is32Bit <- is32BitPlatform
   if is32Bit
       then do r <- getRegister expr
@@ -2158,7 +2112,7 @@ getByteReg expr = do
                     | isVirtualReg reg -> return (reg,code)
                     | otherwise -> do
                         tmp <- getNewRegNat rep
-                        return (tmp, code `snocOL` mkRegRegMoveInstr platform rep reg tmp)
+                        return (tmp, code `snocOL` mkRegRegMoveInstr config rep reg tmp)
                     -- ToDo: could optimise slightly by checking for
                     -- byte-addressable real registers, but that will
                     -- happen very rarely if at all.
@@ -2169,7 +2123,8 @@ getByteReg expr = do
 getNonClobberedReg :: HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
 getNonClobberedReg expr = do
   r <- getRegister expr
-  platform <- ncgPlatform <$> getConfig
+  config <- getConfig
+  let platform = ncgPlatform config
   case r of
     Any rep code -> do
         tmp <- getNewRegNat rep
@@ -2179,7 +2134,7 @@ getNonClobberedReg expr = do
         | reg `elem` instrClobberedRegs platform
         -> do
                 tmp <- getNewRegNat rep
-                return (tmp, code `snocOL` mkRegRegMoveInstr platform rep reg tmp)
+                return (tmp, code `snocOL` mkRegRegMoveInstr config rep reg tmp)
         | otherwise ->
                 return (reg, code)
 
@@ -2453,10 +2408,9 @@ memConstant align lit = do
 
 loadFloatAmode :: Width -> AddrMode -> InstrBlock -> NatM Register
 loadFloatAmode w addr addr_code = do
-  platform <- getPlatform
   let format = floatFormat w
       code dst = addr_code `snocOL`
-                    mkMOV platform format (OpAddr addr) (OpReg dst)
+                    MOV format (OpAddr addr) (OpReg dst)
 
   return (Any format code)
 
@@ -2629,19 +2583,19 @@ condIntCode' platform cond x y
         cmpExact :: OrdList Instr
         cmpExact =
           toOL
-            [ mkMOV platform II32 (OpReg r1_hi) (OpReg tmp1)
-            , mkMOV platform II32 (OpReg r1_lo) (OpReg tmp2)
+            [ MOV II32 (OpReg r1_hi) (OpReg tmp1)
+            , MOV II32 (OpReg r1_lo) (OpReg tmp2)
             , XOR II32 (OpReg r2_hi) (OpReg tmp1)
             , XOR II32 (OpReg r2_lo) (OpReg tmp2)
             , OR  II32 (OpReg tmp1)  (OpReg tmp2)
             ]
         cmpGE = toOL
-            [ mkMOV platform II32 (OpReg r1_hi) (OpReg tmp1)
+            [ MOV II32 (OpReg r1_hi) (OpReg tmp1)
             , CMP II32 (OpReg r2_lo) (OpReg r1_lo)
             , SBB II32 (OpReg r2_hi) (OpReg tmp1)
             ]
         cmpLE = toOL
-            [ mkMOV platform II32 (OpReg r2_hi) (OpReg tmp1)
+            [ MOV II32 (OpReg r2_hi) (OpReg tmp1)
             , CMP II32 (OpReg r1_lo) (OpReg r2_lo)
             , SBB II32 (OpReg r1_hi) (OpReg tmp1)
             ]
@@ -2781,10 +2735,10 @@ assignMem_IntCode pk addr src = do
     let
         code = code_src `appOL`
                code_addr `snocOL`
-                  mkMOV platform pk op_src (OpAddr addr)
+                  MOV pk op_src (OpAddr addr)
         -- NOTE: op_src is stable, so it will still be valid
         -- after code_addr.  This may involve the introduction
-        -- of an extra mkMOV platform to a temporary register, but we hope
+        -- of an extra MOV to a temporary register, but we hope
         -- the register allocator will get rid of it.
     --
     return code
@@ -2799,8 +2753,7 @@ assignMem_IntCode pk addr src = do
 
 -- Assign; dst is a reg, rhs is mem
 assignReg_IntCode pk reg (CmmLoad src _ _) = do
-  platform <- getPlatform
-  load_code <- intLoadCode (mkMOV platform pk) src
+  load_code <- intLoadCode (MOV pk) src
   platform <- ncgPlatform <$> getConfig
   return (load_code (getRegisterReg platform reg))
 
@@ -2813,13 +2766,12 @@ assignReg_IntCode _ reg src = do
 
 -- Floating point assignment to memory
 assignMem_FltCode pk addr src = do
-  platform <- getPlatform
   (src_reg, src_code) <- getNonClobberedReg src
   Amode addr addr_code <- getAmode addr
   let
         code = src_code `appOL`
                addr_code `snocOL`
-               mkMOV platform pk (OpReg src_reg) (OpAddr addr)
+               MOV pk (OpReg src_reg) (OpAddr addr)
 
   return code
 
@@ -3371,7 +3323,7 @@ genCCall32 addr _ dest_regs args = do
                                       in
 
                                       -- assume SSE2
-                                       mkMOV platform format (OpReg reg) (OpAddr addr)
+                                       MOV format (OpReg reg) (OpAddr addr)
 
                                      ]
                                )
@@ -3453,12 +3405,12 @@ genCCall32 addr _ dest_regs args = do
                                    -- NB: This code will need to be
                                    -- revisited once GHC does more work around
                                    -- SIGFPE f
-                                   mkMOV platform fmt (OpAddr tmp_amode) (OpReg r_dest),
+                                   MOV fmt (OpAddr tmp_amode) (OpReg r_dest),
                                    ADD II32 (OpImm (ImmInt b)) (OpReg esp),
                                    DELTA delta0]
-              | isWord64 ty    = toOL [mkMOV platform II32 (OpReg eax) (OpReg r_dest),
-                                        mkMOV platform II32 (OpReg edx) (OpReg r_dest_hi)]
-              | otherwise      = unitOL (mkMOV platform (intFormat w)
+              | isWord64 ty    = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
+                                        MOV II32 (OpReg edx) (OpReg r_dest_hi)]
+              | otherwise      = unitOL (MOV (intFormat w)
                                              (OpReg eax)
                                              (OpReg r_dest))
               where
@@ -3479,7 +3431,8 @@ genCCall64 :: CmmExpr           -- ^ address of function to call
            -> [CmmActual]       -- ^ arguments (of mixed type)
            -> NatM InstrBlock
 genCCall64 addr conv dest_regs args = do
-    platform <- getPlatform
+    config <- getConfig
+    let platform = ncgPlatform config
     -- load up the register arguments
     let prom_args = map (maybePromoteCArg platform W32) args
 
@@ -3538,7 +3491,7 @@ genCCall64 addr conv dest_regs args = do
                     tmp      <- getNewRegNat arg_fmt
                     let
                       code'  = code `appOL` arg_code tmp
-                      acode' = acode `snocOL` mkRegRegMoveInstr platform arg_fmt tmp r
+                      acode' = acode `snocOL` mkRegRegMoveInstr config arg_fmt tmp r
                     return (code',acode')
 
               arg_rep = cmmExprType platform arg
@@ -3560,7 +3513,7 @@ genCCall64 addr conv dest_regs args = do
                       ((ireg, freg) : regs) code
             | isFloatType arg_rep = do
                  arg_code <- getAnyReg arg
-                 load_args_win rest (mkRegFormat platform ireg II64: usedInt) (mkRegFormat platform freg FF64 : usedFP) regs
+                 load_args_win rest (RegFormat ireg II64: usedInt) (RegFormat freg FF64 : usedFP) regs
                                (code `appOL`
                                 arg_code freg `snocOL`
                                 -- If we are calling a varargs function
@@ -3572,7 +3525,7 @@ genCCall64 addr conv dest_regs args = do
                                   -- because it is going between two different types of register
             | otherwise = do
                  arg_code <- getAnyReg arg
-                 load_args_win rest (mkRegFormat platform ireg II64: usedInt) usedFP regs
+                 load_args_win rest (RegFormat ireg II64: usedInt) usedFP regs
                                (code `appOL` arg_code ireg)
             where
               arg_rep = cmmExprType platform arg
@@ -3588,7 +3541,7 @@ genCCall64 addr conv dest_regs args = do
              let code' = code `appOL` arg_code `appOL` toOL [
                             SUB (intFormat (wordWidth platform)) (OpImm (ImmInt arg_size)) (OpReg rsp),
                             DELTA (delta-arg_size),
-                            mkMOV platform (floatFormat width) (OpReg arg_reg) (OpAddr (spRel platform 0))]
+                            MOV (floatFormat width) (OpReg arg_reg) (OpAddr (spRel platform 0))]
              push_args rest code'
 
            | otherwise = do
@@ -3619,8 +3572,8 @@ genCCall64 addr conv dest_regs args = do
         if platformOS platform == OSMinGW32
         then load_args_win prom_args [] [] (allArgRegs platform) nilOL
         else do
-           let intArgRegs = map (\r -> mkRegFormat platform r II64) $ allIntArgRegs platform
-               fpArgRegs = map (\r -> mkRegFormat platform r FF64) $ allFPArgRegs platform
+           let intArgRegs = map (\r -> RegFormat r II64) $ allIntArgRegs platform
+               fpArgRegs = map (\r -> RegFormat r FF64) $ allFPArgRegs platform
            (stack_args, aregs, fregs, load_args_code, assign_args_code)
                <- load_args prom_args intArgRegs fpArgRegs nilOL nilOL
            let used_regs rs as = dropTail (length rs) as
@@ -3632,7 +3585,7 @@ genCCall64 addr conv dest_regs args = do
     let
         wordFmt = archWordFormat (target32Bit platform)
         arg_regs_used = int_regs_used ++ fp_regs_used
-        arg_regs = [mkRegFormat platform eax wordFmt] ++ arg_regs_used
+        arg_regs = [RegFormat eax wordFmt] ++ arg_regs_used
                 -- for annotating the call instruction with
         sse_regs = length fp_regs_used
         arg_stack_slots = if platformOS platform == OSMinGW32
@@ -3681,7 +3634,7 @@ genCCall64 addr conv dest_regs args = do
         -- It's not safe to omit this assignment, even if the number
         -- of SSE2 regs in use is zero.  If %al is larger than 8
         -- on entry to a varargs function, seg faults ensue.
-        assign_eax n = unitOL (mkMOV platform II32 (OpImm (ImmInt n)) (OpReg eax))
+        assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
 
     let call = callinsns `appOL`
                toOL (
@@ -3698,13 +3651,13 @@ genCCall64 addr conv dest_regs args = do
         assign_code []     = nilOL
         assign_code [dest] =
           case typeWidth rep of
-                W32 | isFloatType rep -> unitOL (mkMOV platform (floatFormat W32)
+                W32 | isFloatType rep -> unitOL (MOV (floatFormat W32)
                                                      (OpReg xmm0)
                                                      (OpReg r_dest))
-                W64 | isFloatType rep -> unitOL (mkMOV platform (floatFormat W64)
+                W64 | isFloatType rep -> unitOL (MOV (floatFormat W64)
                                                      (OpReg xmm0)
                                                      (OpReg r_dest))
-                _ -> unitOL (mkMOV platform (cmmTypeFormat rep) (OpReg rax) (OpReg r_dest))
+                _ -> unitOL (MOV (cmmTypeFormat rep) (OpReg rax) (OpReg r_dest))
           where
                 rep = localRegType dest
                 r_dest = getRegisterReg platform  (CmmLocal dest)
@@ -3818,7 +3771,7 @@ genSwitch expr targets = do
             let op = OpAddr (AddrBaseIndex (EABaseReg tableReg) (EAIndex reg (platformWordSizeInBytes platform)) (ImmInt 0))
                 code = e_code `appOL` toOL
                     [ LEA (archWordFormat is32bit) (OpAddr (AddrBaseIndex EABaseRip EAIndexNone (ImmCLbl lbl))) (OpReg tableReg)
-                    , mkMOV platform (archWordFormat is32bit) op (OpReg targetReg)
+                    , MOV (archWordFormat is32bit) op (OpReg targetReg)
                     , JMP_TBL (OpReg targetReg) ids (Section ReadOnlyData lbl) lbl
                     ]
             return code
@@ -4053,7 +4006,6 @@ trivialCode' _ width instr _ a b
 genTrivialCode :: Format -> (Operand -> Operand -> Instr)
                -> CmmExpr -> CmmExpr -> NatM Register
 genTrivialCode rep instr a b = do
-  platform <- getPlatform
   (b_op, b_code) <- getNonClobberedOperand b
   a_code <- getAnyReg a
   tmp <- getNewRegNat rep
@@ -4067,7 +4019,7 @@ genTrivialCode rep instr a b = do
      code dst
         | dst `regClashesWithOp` b_op =
                 b_code `appOL`
-                unitOL (mkMOV platform rep b_op (OpReg tmp)) `appOL`
+                unitOL (MOV rep b_op (OpReg tmp)) `appOL`
                 a_code dst `snocOL`
                 instr (OpReg tmp) (OpReg dst)
         | otherwise =
@@ -4088,7 +4040,7 @@ genFMA3Code :: Length
             -> FMASign
             -> CmmExpr -> CmmExpr -> CmmExpr -> NatM Register
 genFMA3Code l w signs x y z = do
-  platform <- getPlatform
+  config <- getConfig
   -- For the FMA instruction, we want to compute x * y + z
   --
   -- There are three possible instructions we could emit:
@@ -4134,7 +4086,7 @@ genFMA3Code l w signs x y z = do
      -- Fallback: Compute the result into a tmp reg and then move it.
      code_mov dst    = x_code x_tmp `snocOL`
                        fma213 z_op y_reg x_tmp `snocOL`
-                       mkRegRegMoveInstr platform rep x_tmp dst
+                       mkRegRegMoveInstr config rep x_tmp dst
 
      code dst =
         y_code `appOL`
@@ -4221,7 +4173,6 @@ coerceFP2FP to x = do
 
 sse2NegCode :: Width -> CmmExpr -> NatM Register
 sse2NegCode w x = do
-  platform <- getPlatform
   let fmt = floatFormat w
   x_code <- getAnyReg x
   -- This is how gcc does it, so it can't be that bad:
@@ -4241,7 +4192,7 @@ sse2NegCode w x = do
   tmp <- getNewRegNat fmt
   let
     code dst = x_code dst `appOL` amode_code `appOL` toOL [
-        mkMOV platform fmt (OpAddr amode) (OpReg tmp),
+        MOV fmt (OpAddr amode) (OpReg tmp),
         XOR fmt (OpReg tmp) (OpReg dst)
         ]
   --
@@ -4339,17 +4290,16 @@ genAtomicRMW bid width amop dst addr n = do
             -> AddrMode  -- Address of location to mutate
             -> NatM (OrdList Instr,BlockId) -- TODO: Return Maybe BlockId
     op_code dst_r arg amode = do
-        platform <- getPlatform
         case amop of
           -- In the common case where dst_r is a virtual register the
           -- final move should go away, because it's the last use of arg
           -- and the first use of dst_r.
           AMO_Add  -> return $ (toOL [ LOCK (XADD format (OpReg arg) (OpAddr amode))
-                                     , mkMOV platform format (OpReg arg) (OpReg dst_r)
+                                     , MOV format (OpReg arg) (OpReg dst_r)
                                      ], bid)
           AMO_Sub  -> return $ (toOL [ NEGI format (OpReg arg)
                                      , LOCK (XADD format (OpReg arg) (OpAddr amode))
-                                     , mkMOV platform format (OpReg arg) (OpReg dst_r)
+                                     , MOV format (OpReg arg) (OpReg dst_r)
                                      ], bid)
           -- In these cases we need a new block id, and have to return it so
           -- that later instruction selection can reference it.
@@ -4365,7 +4315,6 @@ genAtomicRMW bid width amop dst addr n = do
         cmpxchg_code :: (Operand -> Operand -> OrdList Instr)
                      -> NatM (OrdList Instr, BlockId)
         cmpxchg_code instrs = do
-            platform <- getPlatform
             lbl1 <- getBlockIdNat
             lbl2 <- getBlockIdNat
             tmp <- getNewRegNat format
@@ -4378,12 +4327,12 @@ genAtomicRMW bid width amop dst addr n = do
             updateCfgNat (addWeightEdge lbl1 lbl1 0)
 
             return $ (toOL
-                [ mkMOV platform format (OpAddr amode) (OpReg eax)
+                [ MOV format (OpAddr amode) (OpReg eax)
                 , JXX ALWAYS lbl1
                 , NEWBLOCK lbl1
                   -- Keep old value so we can return it:
-                , mkMOV platform format (OpReg eax) (OpReg dst_r)
-                , mkMOV platform format (OpReg eax) (OpReg tmp)
+                , MOV format (OpReg eax) (OpReg dst_r)
+                , MOV format (OpReg eax) (OpReg tmp)
                 ]
                 `appOL` instrs (OpReg arg) (OpReg tmp) `appOL` toOL
                 [ LOCK (CMPXCHG format (OpReg tmp) (OpAddr amode))
@@ -4413,7 +4362,6 @@ genCtz64_32
   -> CmmExpr
   -> NatM (InstrBlock, Maybe BlockId)
 genCtz64_32 bid dst src = do
-  platform <- getPlatform
   RegCode64 vcode rhi rlo <- iselExpr64 src
   let dst_r = getLocalRegReg dst
   lbl1 <- getBlockIdNat
@@ -4437,9 +4385,9 @@ genCtz64_32 bid dst src = do
   --    dst = 64;
   --  }
   let instrs = vcode `appOL` toOL
-           ([ mkMOV platform      II32 (OpReg rhi)         (OpReg tmp_r)
+           ([ MOV      II32 (OpReg rhi)         (OpReg tmp_r)
             , OR       II32 (OpReg rlo)         (OpReg tmp_r)
-            , mkMOV platform      II32 (OpImm (ImmInt 64)) (OpReg dst_r)
+            , MOV      II32 (OpImm (ImmInt 64)) (OpReg dst_r)
             , JXX EQQ    lbl2
             , JXX ALWAYS lbl1
 
@@ -4459,7 +4407,6 @@ genCtz64_32 bid dst src = do
 -- Generic case (width <= word size)
 genCtzGeneric :: Width -> LocalReg -> CmmExpr -> NatM InstrBlock
 genCtzGeneric width dst src = do
-  platform <- getPlatform
   code_src <- getAnyReg src
   config <- getConfig
   let bw = widthInBits width
@@ -4487,10 +4434,10 @@ genCtzGeneric width dst src = do
       let instrs = code_src src_r `appOL` toOL
                ([ MOVZxL  II8    (OpReg src_r) (OpReg src_r) | width == W8 ] ++
                 [ BSF     format (OpReg src_r) tmp_r
-                , mkMOV platform     II32   (OpImm (ImmInt bw)) (OpReg dst_r)
+                , MOV     II32   (OpImm (ImmInt bw)) (OpReg dst_r)
                 , CMOV NE format (OpReg tmp_r) dst_r
                 ]) -- NB: We don't need to zero-extend the result for the
-                   -- W8/W16 cases because the 'mkMOV platform' insn already
+                   -- W8/W16 cases because the 'MOV' insn already
                    -- took care of implicitly clearing the upper bits
       return instrs
 
@@ -4552,21 +4499,21 @@ genMemCpyInlineMaybe align dst src n = do
       go :: Reg -> Reg -> Reg -> Integer -> OrdList Instr
       go dst src tmp i
           | i >= sizeBytes =
-              unitOL (mkMOV platform format (OpAddr src_addr) (OpReg tmp)) `appOL`
-              unitOL (mkMOV platform format (OpReg tmp) (OpAddr dst_addr)) `appOL`
+              unitOL (MOV format (OpAddr src_addr) (OpReg tmp)) `appOL`
+              unitOL (MOV format (OpReg tmp) (OpAddr dst_addr)) `appOL`
               go dst src tmp (i - sizeBytes)
           -- Deal with remaining bytes.
           | i >= 4 =  -- Will never happen on 32-bit
-              unitOL (mkMOV platform II32 (OpAddr src_addr) (OpReg tmp)) `appOL`
-              unitOL (mkMOV platform II32 (OpReg tmp) (OpAddr dst_addr)) `appOL`
+              unitOL (MOV II32 (OpAddr src_addr) (OpReg tmp)) `appOL`
+              unitOL (MOV II32 (OpReg tmp) (OpAddr dst_addr)) `appOL`
               go dst src tmp (i - 4)
           | i >= 2 =
               unitOL (MOVZxL II16 (OpAddr src_addr) (OpReg tmp)) `appOL`
-              unitOL (mkMOV platform II16 (OpReg tmp) (OpAddr dst_addr)) `appOL`
+              unitOL (MOV II16 (OpReg tmp) (OpAddr dst_addr)) `appOL`
               go dst src tmp (i - 2)
           | i >= 1 =
               unitOL (MOVZxL II8 (OpAddr src_addr) (OpReg tmp)) `appOL`
-              unitOL (mkMOV platform II8 (OpReg tmp) (OpAddr dst_addr)) `appOL`
+              unitOL (MOV II8 (OpReg tmp) (OpAddr dst_addr)) `appOL`
               go dst src tmp (i - 1)
           | otherwise = nilOL
         where
@@ -4639,24 +4586,24 @@ genMemSetInlineMaybe align dst c n = do
     sizeBytes :: Integer
     sizeBytes = fromIntegral (formatInBytes format)
 
-    -- Depending on size returns the widest mkMOV platform instruction and its
+    -- Depending on size returns the widest MOV instruction and its
     -- width.
     gen4 :: AddrMode -> Integer -> (InstrBlock, Integer)
     gen4 addr size
         | size >= 4 =
-            (unitOL (mkMOV platform II32 (OpImm (ImmInteger c4)) (OpAddr addr)), 4)
+            (unitOL (MOV II32 (OpImm (ImmInteger c4)) (OpAddr addr)), 4)
         | size >= 2 =
-            (unitOL (mkMOV platform II16 (OpImm (ImmInteger c2)) (OpAddr addr)), 2)
+            (unitOL (MOV II16 (OpImm (ImmInteger c2)) (OpAddr addr)), 2)
         | size >= 1 =
-            (unitOL (mkMOV platform II8 (OpImm (ImmInteger c)) (OpAddr addr)), 1)
+            (unitOL (MOV II8 (OpImm (ImmInteger c)) (OpAddr addr)), 1)
         | otherwise = (nilOL, 0)
 
-    -- Generates a 64-bit wide mkMOV platform instruction from REG to MEM.
+    -- Generates a 64-bit wide MOV instruction from REG to MEM.
     gen8 :: AddrMode -> Reg -> InstrBlock
     gen8 addr reg8byte =
-      unitOL (mkMOV platform format (OpReg reg8byte) (OpAddr addr))
+      unitOL (MOV format (OpReg reg8byte) (OpAddr addr))
 
-    -- Unrolls memset when the widest mkMOV platform is <= 4 bytes.
+    -- Unrolls memset when the widest MOV is <= 4 bytes.
     go4 :: Reg -> Integer -> InstrBlock
     go4 dst left =
       if left <= 0 then nilOL
@@ -4666,7 +4613,7 @@ genMemSetInlineMaybe align dst c n = do
         dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone (ImmInteger (n - left))
         (curMov, curWidth) = gen4 dst_addr possibleWidth
 
-    -- Unrolls memset when the widest mkMOV platform is 8 bytes (thus another Reg
+    -- Unrolls memset when the widest MOV is 8 bytes (thus another Reg
     -- argument). Falls back to go4 when all 8 byte moves are
     -- exhausted.
     go8 :: Reg -> Reg -> Integer -> InstrBlock
@@ -4733,7 +4680,6 @@ genPrefetchData n src = do
 
 genByteSwap :: Width -> LocalReg -> CmmExpr -> NatM InstrBlock
 genByteSwap width dst src = do
-  platform <- getPlatform
   is32Bit <- is32BitPlatform
   let format = intFormat width
   case width of
@@ -4741,8 +4687,8 @@ genByteSwap width dst src = do
         let Reg64 dst_hi dst_lo = localReg64 dst
         RegCode64 vcode rhi rlo <- iselExpr64 src
         return $ vcode `appOL`
-                 toOL [ mkMOV platform II32 (OpReg rlo) (OpReg dst_hi),
-                        mkMOV platform II32 (OpReg rhi) (OpReg dst_lo),
+                 toOL [ MOV II32 (OpReg rlo) (OpReg dst_hi),
+                        MOV II32 (OpReg rhi) (OpReg dst_lo),
                         BSWAP II32 dst_hi,
                         BSWAP II32 dst_lo ]
       W16 -> do
@@ -4855,7 +4801,6 @@ genPext bid width dst src mask = do
 
 genClz :: BlockId -> Width -> CmmFormal -> CmmActual -> NatM InstrBlock
 genClz bid width dst src = do
-  platform <- getPlatform
   is32Bit <- is32BitPlatform
   config <- getConfig
   if is32Bit && width == W64
@@ -4889,11 +4834,11 @@ genClz bid width dst src = do
           return $ code_src src_r `appOL` toOL
                    ([ MOVZxL  II8    (OpReg src_r) (OpReg src_r) | width == W8 ] ++
                     [ BSR     format (OpReg src_r) tmp_r
-                    , mkMOV platform     II32   (OpImm (ImmInt (2*bw-1))) (OpReg dst_r)
+                    , MOV     II32   (OpImm (ImmInt (2*bw-1))) (OpReg dst_r)
                     , CMOV NE format (OpReg tmp_r) dst_r
                     , XOR     format (OpImm (ImmInt (bw-1))) (OpReg dst_r)
                     ]) -- NB: We don't need to zero-extend the result for the
-                       -- W8/W16 cases because the 'mkMOV platform' insn already
+                       -- W8/W16 cases because the 'MOV' insn already
                        -- took care of implicitly clearing the upper bits
 
 genWordToFloat :: BlockId -> Width -> CmmFormal -> CmmActual -> NatM InstrBlock
@@ -4903,8 +4848,7 @@ genWordToFloat bid width dst src =
 
 genAtomicRead :: Width -> MemoryOrdering -> LocalReg -> CmmExpr -> NatM InstrBlock
 genAtomicRead width _mord dst addr = do
-  platform <- getPlatform
-  load_code <- intLoadCode (mkMOV platform (intFormat width)) addr
+  load_code <- intLoadCode (MOV (intFormat width)) addr
   return (load_code (getLocalRegReg dst))
 
 genAtomicWrite :: Width -> MemoryOrdering -> CmmExpr -> CmmExpr -> NatM InstrBlock
@@ -4941,9 +4885,9 @@ genCmpXchg bid width dst addr old new = do
       platform <- getPlatform
       let dst_r    = getRegisterReg platform  (CmmLocal dst)
           code     = toOL
-                     [ mkMOV platform format (OpReg oldval) (OpReg eax)
+                     [ MOV format (OpReg oldval) (OpReg eax)
                      , LOCK (CMPXCHG format (OpReg newval) (OpAddr amode))
-                     , mkMOV platform format (OpReg eax) (OpReg dst_r)
+                     , MOV format (OpReg eax) (OpReg dst_r)
                      ]
       return $ addr_code `appOL` newval_code newval `appOL` oldval_code oldval
           `appOL` code
@@ -4954,7 +4898,6 @@ genCmpXchg bid width dst addr old new = do
 
 genXchg :: Width -> LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
 genXchg width dst addr value = do
-  platform <- getPlatform
   is32Bit <- is32BitPlatform
 
   when (is32Bit && width == W64) $
@@ -4966,7 +4909,7 @@ genXchg width dst addr value = do
   let dst_r    = getLocalRegReg dst
   -- Copy the value into the target register, perform the exchange.
   let code     = toOL
-                 [ mkMOV platform format (OpReg newval) (OpReg dst_r)
+                 [ MOV format (OpReg newval) (OpReg dst_r)
                   -- On X86 xchg implies a lock prefix if we use a memory argument.
                   -- so this is atomic.
                  , XCHG format (OpAddr amode) dst_r
@@ -4976,7 +4919,6 @@ genXchg width dst addr value = do
 
 genFloatAbs :: Width -> LocalReg -> CmmExpr -> NatM InstrBlock
 genFloatAbs width dst src = do
-  platform <- getPlatform
   let
     format = floatFormat width
     const = case width of
@@ -4988,7 +4930,7 @@ genFloatAbs width dst src = do
   tmp <- getNewRegNat format
   let dst_r = getLocalRegReg dst
   pure $ src_code dst_r `appOL` amode_code `appOL` toOL
-           [ mkMOV platform format (OpAddr amode) (OpReg tmp)
+           [ MOV format (OpAddr amode) (OpReg tmp)
            , AND format (OpReg tmp) (OpReg dst_r)
            ]
 
@@ -5053,7 +4995,6 @@ genSignedLargeMul
   -> CmmExpr
   -> NatM (OrdList Instr)
 genSignedLargeMul width res_c res_h res_l arg_x arg_y = do
-  platform <- getPlatform
   (y_reg, y_code) <- getRegOrMem arg_y
   x_code <- getAnyReg arg_x
   reg_tmp <- getNewRegNat II8
@@ -5064,8 +5005,8 @@ genSignedLargeMul width res_c res_h res_l arg_x arg_y = do
       code = y_code `appOL`
              x_code rax `appOL`
              toOL [ IMUL2 format y_reg
-                  , mkMOV platform format (OpReg rdx) (OpReg reg_h)
-                  , mkMOV platform format (OpReg rax) (OpReg reg_l)
+                  , MOV format (OpReg rdx) (OpReg reg_h)
+                  , MOV format (OpReg rax) (OpReg reg_l)
                   , SETCC CARRY (OpReg reg_tmp)
                   , MOVZxL II8 (OpReg reg_tmp) (OpReg reg_c)
                   ]
@@ -5079,7 +5020,6 @@ genUnsignedLargeMul
   -> CmmExpr
   -> NatM (OrdList Instr)
 genUnsignedLargeMul width res_h res_l arg_x arg_y = do
-  platform <- getPlatform
   (y_reg, y_code) <- getRegOrMem arg_y
   x_code <- getAnyReg arg_x
   let format = intFormat width
@@ -5088,8 +5028,8 @@ genUnsignedLargeMul width res_h res_l arg_x arg_y = do
       code = y_code `appOL`
              x_code rax `appOL`
              toOL [MUL2 format y_reg,
-                   mkMOV platform format (OpReg rdx) (OpReg reg_h),
-                   mkMOV platform format (OpReg rax) (OpReg reg_l)]
+                   MOV format (OpReg rdx) (OpReg reg_h),
+                   MOV format (OpReg rax) (OpReg reg_l)]
   return code
 
 
@@ -5103,7 +5043,6 @@ genQuotRem
   -> CmmExpr
   -> NatM InstrBlock
 genQuotRem width signed res_q res_r m_arg_x_high arg_x_low arg_y = do
-  platform <- getPlatform
   case width of
     W8 -> do
       -- See Note [DIV/IDIV for bytes]
@@ -5133,5 +5072,5 @@ genQuotRem width signed res_q res_r m_arg_x_high arg_x_low arg_y = do
                x_low_code rax `appOL`
                x_high_code rdx `appOL`
                toOL [instr format y_reg,
-                     mkMOV platform format (OpReg rax) (OpReg reg_q),
-                     mkMOV platform format (OpReg rdx) (OpReg reg_r)]
+                     MOV format (OpReg rax) (OpReg reg_q),
+                     MOV format (OpReg rdx) (OpReg reg_r)]


=====================================
compiler/GHC/CmmToAsm/X86/Instr.hs
=====================================
@@ -1,4 +1,5 @@
 {-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE MultiWayIf #-}
 {-# LANGUAGE TypeFamilies #-}
 
 -----------------------------------------------------------------------------
@@ -37,6 +38,7 @@ module GHC.CmmToAsm.X86.Instr
    , patchJumpInstr
    , isMetaInstr
    , isJumpishInstr
+   , movdOutFormat
    )
 where
 
@@ -46,7 +48,7 @@ import GHC.Data.FastString
 import GHC.CmmToAsm.X86.Cond
 import GHC.CmmToAsm.X86.Regs
 import GHC.CmmToAsm.Format
-import GHC.CmmToAsm.Reg.Target (targetClassOfReg, mkRegFormat)
+import GHC.CmmToAsm.Reg.Target (targetClassOfReg)
 import GHC.CmmToAsm.Types
 import GHC.CmmToAsm.Utils
 import GHC.CmmToAsm.Instr (RegUsage(..), noUsage)
@@ -207,12 +209,18 @@ data Instr
              -- True 64-bit operands need to be either first moved to a register or moved
              -- with @MOVABS@; we currently do not use this instruction in GHC.
              -- See https://stackoverflow.com/questions/52434073/whats-the-difference-between-the-x86-64-att-instructions-movq-and-movabsq.
+        | MOV2 Format Format Operand Operand
+         -- ^ Like MOV, but between two different kinds of registers
+         -- (e.g. moving rax to xmm1)
+         --
+         -- SIMD NCG TODO: this is a bit of a hack, but the alternative would
+         -- be to have MOV store two Formats to handle xmm -> rax and rax -> xmm.
 
         | MOVD   Format Operand Operand -- ^ MOVD/MOVQ SSE2 instructions
                                         -- (bitcast between a general purpose
                                         -- register and a float register).
                                         -- Format is input format, output format is
-                                        -- calculated in Ppr.hs
+                                        -- calculated in the 'movdOutFormat' function.
         | CMOV   Cond Format Operand Reg
         | MOVZxL      Format Operand Operand
               -- ^ The format argument is the size of operand 1 (the number of bits we keep)
@@ -419,15 +427,8 @@ regUsageOfInstr :: Platform -> Instr -> RegUsage
 regUsageOfInstr platform instr
  = case instr of
     MOV    fmt src dst    -> usageRW fmt src dst
-    MOVD   fmt src dst    ->
-      mkRU (use_R fmt src []) (use_R out_fmt dst [])
-        where
-           out_fmt = case fmt of
-             II32 -> FF32
-             II64 -> FF64
-             FF32 -> II32
-             FF64 -> II64
-             _ -> panic "MOVD: not a scalar 32/64 bit format"
+    MOV2   srcFmt dstFmt src dst -> mkRU (use_R srcFmt src []) (use_R dstFmt dst [])
+    MOVD   fmt src dst    -> mkRU (use_R fmt src []) (use_R (movdOutFormat fmt) dst [])
     CMOV _ fmt src dst    -> mkRU (use_R fmt src [mk fmt dst]) [mk fmt dst]
     MOVZxL fmt src dst    -> usageRW fmt src dst
     MOVSxL fmt src dst    -> usageRW fmt src dst
@@ -562,7 +563,7 @@ regUsageOfInstr platform instr
     VSHUFPS fmt _off src1 src2 dst
       -> mkRU (use_R fmt src1 [mk fmt src2]) [mk fmt dst]
 
-    PSLLDQ       fmt off dst -> mkRU (use_R fmt off []) [mk fmt dst]
+    PSLLDQ fmt off dst -> mkRU (use_R fmt off []) [mk fmt dst]
 
     MOVHLPS    fmt src dst
       -> mkRU (use_R fmt src []) [mk fmt dst]
@@ -608,9 +609,9 @@ regUsageOfInstr platform instr
     -- 3 operand form of FMA instructions.
     usageFMA :: HasDebugCallStack => Format -> Operand -> Reg -> Reg -> RegUsage
     usageFMA fmt (OpReg src1) src2 dst =
-      mkRU (map (\r -> mkRegFormat platform r fmt) [src1, src2, dst]) [ mkRegFormat platform dst fmt ]
+      mkRU (map (\r -> RegFormat r fmt) [src1, src2, dst]) [ RegFormat dst fmt ]
     usageFMA fmt (OpAddr ea1) src2 dst
-      = mkRU (use_EA ea1 (map (\r -> mkRegFormat platform r fmt) [src2, dst])) [ mkRegFormat platform dst fmt ]
+      = mkRU (use_EA ea1 (map (\r -> RegFormat r fmt) [src2, dst])) [ RegFormat dst fmt ]
     usageFMA _ _ _ _
       = panic "X86.RegInfo.usageFMA: no match"
 
@@ -628,6 +629,7 @@ regUsageOfInstr platform instr
     def_W _   _                   = panic "X86.RegInfo.def_W: no match"
 
     -- Registers used when an operand is read.
+    use_R :: HasDebugCallStack => Format -> Operand -> [RegFormat] -> [RegFormat]
     use_R fmt (OpReg reg)  tl = mk fmt reg : tl
     use_R _   (OpImm _)    tl = tl
     use_R _   (OpAddr ea)  tl = use_EA ea tl
@@ -650,29 +652,26 @@ regUsageOfInstr platform instr
               dst' = filter (interesting platform . regFormatReg) dst
 
     addrFmt = archWordFormat (target32Bit platform)
-    mk :: HasDebugCallStack => Format -> Reg -> RegFormat
-    mk fmt r = mkRegFormat platform r fmt
+    mk :: Format -> Reg -> RegFormat
+    mk fmt r = RegFormat r fmt
 
-    mkFmt :: HasDebugCallStack => Reg -> RegFormat
+    mkFmt :: Reg -> RegFormat
     mkFmt r = RegFormat r $ case targetClassOfReg platform r of
       RcInteger -> addrFmt
       RcFloatOrVector -> FF64
 
-    --mkRUFormat :: HasDebugCallStack => [Reg] -> [Reg] -> RegUsage
-    --mkRUFormat src dst = src' `seq` dst' `seq` RU (map mkFormat src') (map mkFormat dst')
-    --    where src' = filter (interesting platform) src
-    --          dst' = filter (interesting platform) dst
-    --          mkFormat reg =
-    --            mkRegFormat platform reg $
-    --              case targetClassOfReg platform reg of
-    --                RcInteger -> archWordFormat (target32Bit platform)
-    --                RcFloatOrVector -> FF64
-
 -- | Is this register interesting for the register allocator?
 interesting :: Platform -> Reg -> Bool
 interesting _        (RegVirtual _)              = True
 interesting platform (RegReal (RealRegSingle i)) = freeReg platform i
 
+movdOutFormat :: Format -> Format
+movdOutFormat format = case format of
+  II32 -> FF32
+  II64 -> FF64
+  FF32 -> II32
+  FF64 -> II64
+  _    -> pprPanic "X86: improper format for movd/movq" (ppr format)
 
 
 -- | Applies the supplied function to all registers in instructions.
@@ -680,23 +679,8 @@ interesting platform (RegReal (RealRegSingle i)) = freeReg platform i
 patchRegsOfInstr :: HasDebugCallStack => Platform -> Instr -> (Reg -> Reg) -> Instr
 patchRegsOfInstr platform instr env
   = case instr of
-    MOV fmt src dst ->
-      mkMOV fmt (patchOp src) (patchOp dst)
-        where
-          fmtCls = if isIntFormat fmt then RcInteger else RcFloatOrVector
-          mkMOV :: HasDebugCallStack => Format -> Operand -> Operand -> Instr
-          mkMOV fmt op1 op2 =
-            assertPpr (all (== fmtCls) $ catMaybes [cls1, cls2])
-              (vcat [ text "patchRegsOfInstr produced invalid MOV instruction"
-                    , text "fmt:" <+> ppr fmt
-                    , case op1 of { OpReg r1 -> ppr r1 <+> dcolon <+> ppr (fromJust cls1); _ -> empty }
-                    , case op2 of { OpReg r2 -> ppr r2 <+> dcolon <+> ppr (fromJust cls2); _ -> empty }
-                    ])
-              $ MOV fmt op1 op2
-            where
-              cls1 = case op1 of { OpReg r1 -> Just (targetClassOfReg platform r1); _ -> Nothing }
-              cls2 = case op2 of { OpReg r2 -> Just (targetClassOfReg platform r2); _ -> Nothing }
-
+    MOV fmt src dst      -> MOV fmt (patchOp src) (patchOp dst)
+    MOV2 srcFmt dstFmt src dst -> MOV2 srcFmt dstFmt (patchOp src) (patchOp dst)
     MOVD fmt src dst     -> patch2 (MOVD fmt) src dst
     CMOV cc fmt src dst  -> CMOV cc fmt (patchOp src) (env dst)
     MOVZxL fmt src dst   -> patch2 (MOVZxL fmt) src dst
@@ -914,28 +898,11 @@ mkSpillInstr
     -> Int       -- spill slot to use
     -> [Instr]
 
-mkSpillInstr config (RegFormat reg fmt) delta slot
-  = let off = spillSlotToOffset platform slot - delta
-    in case fmt of
-        VecFormat {}
-          | formatInBytes fmt > 16
-          -> [VMOVU fmt (OpReg reg) (OpAddr (spRel platform off))]
-          | otherwise
-          -> [MOVU fmt (OpReg reg) (OpAddr (spRel platform off))]
-            -- NB: not using MOVA, because we have no guarantees about the stack
-            -- being sufficiently aligned, including even numbered stack slots.
-        _ ->
-          let fmt' = scalarMoveFormat platform fmt
-              cls_f = if isIntFormat fmt' then RcInteger else RcFloatOrVector
-              cls1 = targetClassOfReg platform reg
-            in
-              assertPpr (all (== cls_f) [cls1])
-                  (vcat [ text "mkSpillInstr: incompatible formats"
-                        , text "format:" <+> ppr fmt <+> parens (ppr cls_f)
-                        , text "src:" <+> ppr reg <+> parens (ppr cls1)
-                        , callStackDoc ])
-                $ [MOV fmt' (OpReg reg) (OpAddr (spRel platform off))]
-    where platform = ncgPlatform config
+mkSpillInstr config (RegFormat reg fmt) delta slot =
+  [ movInstr config fmt (OpReg reg) (OpAddr (spRel platform off)) ]
+  where
+    platform = ncgPlatform config
+    off = spillSlotToOffset platform slot - delta
 
 -- | Make a spill reload instruction.
 mkLoadInstr
@@ -946,28 +913,35 @@ mkLoadInstr
     -> Int      -- spill slot to use
     -> [Instr]
 
-mkLoadInstr config (RegFormat reg fmt) delta slot
-  = let off = spillSlotToOffset platform slot - delta
-    in case fmt of
-          VecFormat {}
-            | formatInBytes fmt > 16
-            -> [VMOVU fmt (OpAddr (spRel platform off)) (OpReg reg)]
+mkLoadInstr config (RegFormat reg fmt) delta slot =
+  [ movInstr config fmt (OpAddr (spRel platform off)) (OpReg reg) ]
+  where
+    platform = ncgPlatform config
+    off = spillSlotToOffset platform slot - delta
+
+movInstr :: NCGConfig -> Format -> (Operand -> Operand -> Instr)
+movInstr config fmt =
+  case fmt of
+    -- NB: we are using {V}MOVU and not {V}MOVA, because we have no guarantees
+    -- about the stack being sufficiently aligned (even for even numbered stack slots).
+    VecFormat {}
+      | formatInBytes fmt > 16
+      -> if avx
+         then VMOVU fmt
+         else sorry "256-bit wide vectors require -mavx"
+      | otherwise
+      -> if | avx
+            -> VMOVU fmt
+            | sse >= Just SSE2
+            -> MOVU fmt
             | otherwise
-            -> [MOVU fmt (OpAddr (spRel platform off)) (OpReg reg)]
-            -- NB: not using MOVA, because we have no guarantees about the stack
-            -- being sufficiently aligned, including even numbered stack slots.
-          _ ->
-            let fmt' = scalarMoveFormat platform fmt
-                cls_f = if isIntFormat fmt' then RcInteger else RcFloatOrVector
-                cls2 = targetClassOfReg platform reg
-            in
-              assertPpr (all (== cls_f) [cls2])
-                  (vcat [ text "mkLoadInstr: incompatible formats"
-                        , text "format:" <+> ppr fmt <+> parens (ppr cls_f)
-                        , text "dst:" <+> ppr reg <+> parens (ppr cls2)
-                        , callStackDoc ])
-                $ [MOV fmt' (OpAddr (spRel platform off)) (OpReg reg)]
-    where platform = ncgPlatform config
+            -> sorry "128-bit wide vectors require either -msse2 or -mavx"
+    _ -> MOV (scalarMoveFormat platform fmt)
+  where
+    platform = ncgPlatform config
+    avx = ncgAvxEnabled config
+    sse = ncgSseVersion config
+
 
 spillSlotSize :: Platform -> Int
 spillSlotSize platform
@@ -1020,35 +994,19 @@ isMetaInstr instr
 -- | Make a reg-reg move instruction.
 mkRegRegMoveInstr
     :: HasDebugCallStack
-    => Platform
+    => NCGConfig
     -> Format
     -> Reg
     -> Reg
     -> Instr
-mkRegRegMoveInstr platform fmt src dst =
-  case fmt of
-    VecFormat _ s
-      | isIntScalarFormat s ->
-        if widthInBytes (formatToWidth fmt) <= 128
-         then MOVDQU  fmt (OpReg src) (OpReg dst)
-         else VMOVDQU fmt (OpReg src) (OpReg dst)
-      | otherwise ->
-        if widthInBytes (formatToWidth fmt) <= 128
-        then MOVU fmt (OpReg src) (OpReg dst)
-        else VMOVU fmt (OpReg src) (OpReg dst)
-    _ ->
-      let fmt' = scalarMoveFormat platform fmt
-          cls_f = if isIntFormat fmt' then RcInteger else RcFloatOrVector
-          cls1 = targetClassOfReg platform src
-          cls2 = targetClassOfReg platform dst
-      in
-        assertPpr (all (== cls_f) [cls1, cls2])
-          (vcat [ text "mkRegRegMoveInstr: incompatible formats"
-                , text "format:" <+> ppr fmt <+> parens (ppr cls_f)
-                , text "src:" <+> ppr src <+> parens (ppr cls1)
-                , text "dst:" <+> ppr dst <+> parens (ppr cls2)
-                , callStackDoc ])
-        $ MOV fmt' (OpReg src) (OpReg dst)
+mkRegRegMoveInstr config fmt src dst =
+  assertPpr (targetClassOfReg platform src == targetClassOfReg platform dst)
+    (vcat [ text "mkRegRegMoveInstr: incompatible register classes"
+          , text "src:" <+> ppr src
+          , text "dst:" <+> ppr dst ]) $
+    movInstr config fmt (OpReg src) (OpReg dst)
+  where
+    platform = ncgPlatform config
 
 scalarMoveFormat :: Platform -> Format -> Format
 scalarMoveFormat platform fmt
@@ -1074,6 +1032,8 @@ takeRegRegMoveInstr platform = \case
     -- so it is not a real "move" in that case.
     | not (isVecFormat fmt)
     -> go r1 r2
+  MOVD _ (OpReg r1) (OpReg r2)
+    -> go r1 r2
   MOVA _ (OpReg r1) (OpReg r2)
     -> go r1 r2
   MOVU _ (OpReg r1) (OpReg r2)
@@ -1172,7 +1132,7 @@ mkStackAllocInstr platform amount
         case platformArch platform of
             ArchX86_64 | needs_probe_call platform amount ->
                            [ MOV II64 (OpImm (ImmInt amount)) (OpReg rax)
-                           , CALL (Left $ strImmLit (fsLit "___chkstk_ms")) [mkRegFormat platform rax II64]
+                           , CALL (Left $ strImmLit (fsLit "___chkstk_ms")) [RegFormat rax II64]
                            , SUB II64 (OpReg rax) (OpReg rsp)
                            ]
                        | otherwise ->


=====================================
compiler/GHC/CmmToAsm/X86/Ppr.hs
=====================================
@@ -291,10 +291,10 @@ pprReg platform f r
       RegReal    (RealRegSingle i) ->
           if target32Bit platform then ppr32_reg_no f i
                                   else ppr64_reg_no f i
-      RegVirtual (VirtualRegI  u)  -> text "%vI_"   <> pprUniqueAlways u
-      RegVirtual (VirtualRegHi u)  -> text "%vHi_"  <> pprUniqueAlways u
-      RegVirtual (VirtualRegD  u)  -> text "%vD_"   <> pprUniqueAlways u
-      RegVirtual (VirtualRegV128 u) -> text "%vVec_" <> pprUniqueAlways u
+      RegVirtual (VirtualRegI    u) -> text "%vI_"   <> pprUniqueAlways u
+      RegVirtual (VirtualRegHi   u) -> text "%vHi_"  <> pprUniqueAlways u
+      RegVirtual (VirtualRegD    u) -> text "%vD_"   <> pprUniqueAlways u
+      RegVirtual (VirtualRegV128 u) -> text "%vV128_" <> pprUniqueAlways u
 
   where
     ppr32_reg_no :: Format -> Int -> doc
@@ -622,6 +622,13 @@ pprInstr platform i = case i of
    MOV format src dst
      -> pprFormatOpOp (text "mov") format src dst
 
+   MOV2 srcFmt dstFmt src dst
+     -> pprFormatOpOp (text "mov") fmt src dst
+     where
+      fmt = if formatInBytes srcFmt <= formatInBytes dstFmt
+            then srcFmt
+            else dstFmt
+
    CMOV cc format src dst
      -> pprCondOpReg (text "cmov") format cc src dst
 
@@ -1102,17 +1109,11 @@ pprInstr platform i = case i of
              FF32 -> text "d"
              FF64 -> text "q"
              _    -> panic "X86.Ppr.pprMovdOpOp: improper format for movd/movq."
-           out_fmt = case format of
-             II32 -> FF32
-             II64 -> FF64
-             FF32 -> II32
-             FF64 -> II64
-             _    -> panic "X86.Ppr.pprMovdOpOp: improper format for movd/movq."
        in line $ hcat [
            char '\t' <> name <> instr <> space,
            pprOperand platform format op1,
            comma,
-           pprOperand platform out_fmt op2
+           pprOperand platform (movdOutFormat format) op2
            ]
 
    pprFormatImmRegOp :: Line doc -> Format -> Imm -> Reg -> Operand -> doc



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/7d901da2e8b1c972a92a9988757d0a57439b3ec3...02a7018e1e141b2db9e997a59f314ba493a3ed29

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/7d901da2e8b1c972a92a9988757d0a57439b3ec3...02a7018e1e141b2db9e997a59f314ba493a3ed29
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240622/1402b19a/attachment-0001.html>