[Git][ghc/ghc][wip/ncg-simd] more SIMD debugging

Thu Jun 20 17:43:36 UTC 2024


sheaf pushed to branch wip/ncg-simd at Glasgow Haskell Compiler / GHC


Commits:
85a0b5b0 by sheaf at 2024-06-20T19:43:04+02:00
more SIMD debugging

- - - - -


6 changed files:

- compiler/GHC/CmmToAsm/Instr.hs
- compiler/GHC/CmmToAsm/Reg/Linear.hs
- compiler/GHC/CmmToAsm/Reg/Target.hs
- compiler/GHC/CmmToAsm/X86/CodeGen.hs
- compiler/GHC/CmmToAsm/X86/Instr.hs
- compiler/GHC/CmmToAsm/X86/Regs.hs


Changes:

=====================================
compiler/GHC/CmmToAsm/Instr.hs
=====================================
@@ -97,7 +97,8 @@ class Instruction instr where
 
         -- | An instruction to spill a register into a spill slot.
         mkSpillInstr
-                :: NCGConfig
+                :: HasDebugCallStack
+                => NCGConfig
                 -> RegFormat    -- ^ the reg to spill
                 -> Int          -- ^ the current stack delta
                 -> Int          -- ^ spill slots to use
@@ -106,7 +107,8 @@ class Instruction instr where
 
         -- | An instruction to reload a register from a spill slot.
         mkLoadInstr
-                :: NCGConfig
+                :: HasDebugCallStack
+                => NCGConfig
                 -> RegFormat    -- ^ the reg to reload.
                 -> Int          -- ^ the current stack delta
                 -> Int          -- ^ the spill slot to use


=====================================
compiler/GHC/CmmToAsm/Reg/Linear.hs
=====================================
@@ -440,7 +440,7 @@ raInsn block_live new_instrs id (LiveInstr (Instr instr) (Just live))
                           not (dst `elemUFM` assig),
                           isRealReg src || isInReg src assig -> do
            case src of
-              RegReal rr -> setAssigR (addToUFM assig dst (InReg $ RealRegUsage rr fmt))
+              RegReal rr -> setAssigR (addToUFM assig dst (InReg $ mkRealRegUsage platform rr fmt))
                 -- if src is a fixed reg, then we just map dest to this
                 -- reg in the assignment.  src must be an allocatable reg,
                 -- otherwise it wouldn't be in r_dying.
@@ -509,7 +509,7 @@ genRaInsn block_live new_instrs block_id instr r_dying w_dying = do
     -- only read by this instr.  (the list is typically ~2 elements,
     -- so using nub isn't a problem).
     let virt_read :: [VirtualRegFormat]
-        virt_read = nubOrdOn virtualRegFormatReg [ VirtualRegFormat vr fmt | RegFormat (RegVirtual vr) fmt <- read ]
+        virt_read = nubOrdOn virtualRegFormatReg [ mkVirtualRegFormat vr fmt | RegFormat (RegVirtual vr) fmt <- read ]
 
 --     do
 --         let real_read       = nub [ rr      | (RegReal rr) <- read]
@@ -719,7 +719,7 @@ saveClobberedTemps clobbered dying
               (my_reg : _) -> do
                   setFreeRegsR (frAllocateReg platform my_reg freeRegs)
 
-                  let new_assign = addToUFM_Directly assig temp (InReg (RealRegUsage my_reg fmt))
+                  let new_assign = addToUFM_Directly assig temp (InReg (mkRealRegUsage platform my_reg fmt))
                   let instr = mkRegRegMoveInstr platform fmt
                                   (RegReal reg) (RegReal my_reg)
 
@@ -732,7 +732,7 @@ saveClobberedTemps clobbered dying
                   -- record why this reg was spilled for profiling
                   recordSpill (SpillClobber temp)
 
-                  let new_assign  = addToUFM_Directly assig temp (InBoth (RealRegUsage reg fmt) slot)
+                  let new_assign  = addToUFM_Directly assig temp (InBoth (mkRealRegUsage platform reg fmt) slot)
 
                   return (new_assign, (spill ++ instrs))
 
@@ -816,12 +816,12 @@ allocateRegsAndSpill
 allocateRegsAndSpill _       _    spills alloc []
         = return (spills, reverse alloc)
 
-allocateRegsAndSpill reading keep spills alloc (VirtualRegFormat r fmt:rs)
+allocateRegsAndSpill reading keep spills alloc (r@(VirtualRegFormat vr _fmt):rs)
  = do   assig <- toVRegMap <$> getAssigR
         -- pprTraceM "allocateRegsAndSpill:assig" (ppr (r:rs) $$ ppr assig)
         -- See Note [UniqFM and the register allocator]
-        let doSpill = allocRegsAndSpill_spill reading keep spills alloc (VirtualRegFormat r fmt) rs assig
-        case lookupUFM assig r of
+        let doSpill = allocRegsAndSpill_spill reading keep spills alloc r rs assig
+        case lookupUFM assig vr of
                 -- case (1a): already in a register
                 Just (InReg my_reg) ->
                         allocateRegsAndSpill reading keep spills (realReg my_reg:alloc) rs
@@ -832,14 +832,14 @@ allocateRegsAndSpill reading keep spills alloc (VirtualRegFormat r fmt:rs)
                 -- NB2. This is why we must process written registers here, even if they
                 -- are also read by the same instruction.
                 Just (InBoth my_reg _)
-                 -> do  when (not reading) (setAssigR $ toRegMap (addToUFM assig r (InReg my_reg)))
+                 -> do  when (not reading) (setAssigR $ toRegMap (addToUFM assig vr (InReg my_reg)))
                         allocateRegsAndSpill reading keep spills (realReg my_reg:alloc) rs
 
                 -- Not already in a register, so we need to find a free one...
                 Just (InMem slot) | reading   -> doSpill (ReadMem slot)
                                   | otherwise -> doSpill WriteMem
                 Nothing | reading   ->
-                   pprPanic "allocateRegsAndSpill: Cannot read from uninitialized register" (ppr r)
+                   pprPanic "allocateRegsAndSpill: Cannot read from uninitialized register" (ppr vr)
                    -- NOTE: if the input to the NCG contains some
                    -- unreachable blocks with junk code, this panic
                    -- might be triggered.  Make sure you only feed
@@ -873,14 +873,14 @@ allocRegsAndSpill_spill :: (FR freeRegs, Instruction instr)
                         -> UniqFM VirtualReg Loc
                         -> SpillLoc
                         -> RegM freeRegs ([instr], [RealReg])
-allocRegsAndSpill_spill reading keep spills alloc (VirtualRegFormat r fmt) rs assig spill_loc
+allocRegsAndSpill_spill reading keep spills alloc r@(VirtualRegFormat vr fmt) rs assig spill_loc
  = do   platform <- getPlatform
         freeRegs <- getFreeRegsR
-        let regclass = classOfVirtualReg r
-            freeRegs_thisClass  = frGetFreeRegs platform regclass freeRegs :: [RealReg]
+        let regclass = if isIntFormat fmt then RcInteger else RcFloatOrVector
+            freeRegs_thisClass = frGetFreeRegs platform regclass freeRegs :: [RealReg]
 
         -- Can we put the variable into a register it already was?
-        pref_reg <- findPrefRealReg r
+        pref_reg <- findPrefRealReg vr
 
         case freeRegs_thisClass of
          -- case (2): we have a free register
@@ -891,10 +891,10 @@ allocRegsAndSpill_spill reading keep spills alloc (VirtualRegFormat r fmt) rs as
                         = reg
                         | otherwise
                         = first_free
-                spills'   <- loadTemp platform (VirtualRegFormat r fmt) spill_loc final_reg spills
+                spills'   <- loadTemp platform r spill_loc final_reg spills
 
                 setAssigR $ toRegMap
-                          $ (addToUFM assig r $! newLocation spill_loc $ RealRegUsage final_reg fmt)
+                          $ (addToUFM assig vr $! newLocation spill_loc $ mkRealRegUsage platform final_reg fmt)
                 setFreeRegsR $  frAllocateReg platform final_reg freeRegs
 
                 allocateRegsAndSpill reading keep spills' (final_reg : alloc) rs
@@ -916,30 +916,30 @@ allocRegsAndSpill_spill reading keep spills alloc (VirtualRegFormat r fmt) rs as
                 let candidates = nonDetUFMToList candidates'
 
                 -- the vregs we could kick out that are already in a slot
-                let compat reg' r'
+                let compat reg'
                       =  targetClassOfRealReg platform reg'
-                      == classOfVirtualReg r'
+                      == regclass
                     candidates_inBoth :: [(Unique, RealRegUsage, StackSlot)]
                     candidates_inBoth
                         = [ (temp, reg, mem)
                           | (temp, InBoth reg mem) <- candidates
-                          , compat (realReg reg) r ]
+                          , compat (realReg reg) ]
 
                 -- the vregs we could kick out that are only in a reg
                 --      this would require writing the reg to a new slot before using it.
                 let candidates_inReg
                         = [ (temp, reg)
                           | (temp, InReg reg) <- candidates
-                          , compat (realReg reg) r ]
+                          , compat (realReg reg) ]
 
                 let result
 
                         -- we have a temporary that is in both register and mem,
                         -- just free up its register for use.
-                        | (temp, myRegUse@(RealRegUsage my_reg fmt), slot) : _      <- candidates_inBoth
-                        = do    spills' <- loadTemp platform (VirtualRegFormat r fmt) spill_loc my_reg spills
+                        | (temp, (RealRegUsage my_reg _old_fmt), slot) : _ <- candidates_inBoth
+                        = do    spills' <- loadTemp platform r spill_loc my_reg spills
                                 let assig1  = addToUFM_Directly assig temp (InMem slot)
-                                let assig2  = addToUFM assig1 r $! newLocation spill_loc myRegUse
+                                let assig2  = addToUFM assig1 vr $! newLocation spill_loc (mkRealRegUsage platform my_reg fmt)
 
                                 setAssigR $ toRegMap assig2
                                 allocateRegsAndSpill reading keep spills' (my_reg:alloc) rs
@@ -955,12 +955,12 @@ allocRegsAndSpill_spill reading keep spills alloc (VirtualRegFormat r fmt) rs as
                                 recordSpill (SpillAlloc temp_to_push_out)
 
                                 -- update the register assignment
-                                let assig1  = addToUFM_Directly assig temp_to_push_out   (InMem slot)
-                                let assig2  = addToUFM assig1 r                 $! newLocation spill_loc (RealRegUsage my_reg fmt)
+                                let assig1  = addToUFM_Directly assig temp_to_push_out (InMem slot)
+                                let assig2  = addToUFM assig1 vr $! newLocation spill_loc (mkRealRegUsage platform my_reg fmt)
                                 setAssigR $ toRegMap assig2
 
                                 -- if need be, load up a spilled temp into the reg we've just freed up.
-                                spills' <- loadTemp platform (VirtualRegFormat r fmt) spill_loc my_reg spills
+                                spills' <- loadTemp platform r spill_loc my_reg spills
 
                                 allocateRegsAndSpill reading keep
                                         (spill_store ++ spills')
@@ -971,8 +971,9 @@ allocRegsAndSpill_spill reading keep spills alloc (VirtualRegFormat r fmt) rs as
                         | otherwise
                         = pprPanic ("RegAllocLinear.allocRegsAndSpill: no spill candidates\n")
                         $ vcat
-                                [ text "allocating vreg:  " <> text (show r)
+                                [ text "allocating vreg:  " <> text (show vr)
                                 , text "assignment:       " <> ppr assig
+                                , text "format:           " <> ppr fmt
                                 , text "freeRegs:         " <> text (showRegs freeRegs)
                                 , text "initFreeRegs:     " <> text (showRegs (frInitFreeRegs platform `asTypeOf` freeRegs))
                                 ]


=====================================
compiler/GHC/CmmToAsm/Reg/Target.hs
=====================================
@@ -15,7 +15,7 @@ module GHC.CmmToAsm.Reg.Target (
         targetMkVirtualReg,
         targetRegDotColor,
         targetClassOfReg,
-        mkRegFormat, mapRegFormatSet,
+        mkVirtualRegFormat, mkRegFormat, mapRegFormatSet,
 )
 
 where
@@ -142,6 +142,17 @@ targetClassOfReg platform reg
    RegVirtual vr -> classOfVirtualReg vr
    RegReal rr -> targetClassOfRealReg platform rr
 
+mkVirtualRegFormat :: HasDebugCallStack => VirtualReg -> Format -> VirtualRegFormat
+mkVirtualRegFormat reg fmt
+  = assertPpr (regCls == fmtCls)
+    (vcat [ text "mkVirtualRegFormat: incompatible register & format"
+          , text "reg:" <+> ppr reg <+> dcolon <+> ppr regCls
+          , text "fmt:" <+> ppr fmt <+> parens (ppr fmtCls) ])
+  $ VirtualRegFormat reg fmt
+  where
+    regCls = classOfVirtualReg reg
+    fmtCls = formatRegClass fmt
+
 mkRegFormat :: HasDebugCallStack => Platform -> Reg -> Format -> RegFormat
 mkRegFormat platform reg fmt
   = assertPpr (regCls == fmtCls)


=====================================
compiler/GHC/CmmToAsm/X86/CodeGen.hs
=====================================
@@ -35,6 +35,7 @@ import GHC.CmmToAsm.X86.Cond
 import GHC.CmmToAsm.X86.Regs
 import GHC.CmmToAsm.X86.Ppr
 import GHC.CmmToAsm.X86.RegInfo
+import GHC.CmmToAsm.Reg.Target
 
 import GHC.Platform.Regs
 import GHC.CmmToAsm.CPrim
@@ -89,6 +90,7 @@ import Data.Maybe
 import Data.Word
 
 import qualified Data.Map as M
+import GHC.Platform.Reg.Class (RegClass(..))
 
 is32BitPlatform :: NatM Bool
 is32BitPlatform = do
@@ -518,24 +520,41 @@ getSomeReg expr = do
         return (reg, code)
 
 
+mkMOV :: HasDebugCallStack => Platform -> Format -> Operand -> Operand -> Instr
+mkMOV platform fmt op1 op2 =
+  assertPpr (all (== fmtCls) $ catMaybes [cls1, cls2])
+    (vcat [ text "invalid mkMOV instruction"
+          , text "fmt:" <+> ppr fmt
+          , case op1 of { OpReg r1 -> ppr r1 <+> dcolon <+> ppr (fromJust cls1); _ -> empty }
+          , case op2 of { OpReg r2 -> ppr r2 <+> dcolon <+> ppr (fromJust cls2); _ -> empty }
+          ])
+    $ MOV fmt op1 op2
+
+  where
+    fmtCls = if isIntFormat fmt then RcInteger else RcFloatOrVector
+    cls1 = case op1 of { OpReg r1 -> Just (targetClassOfReg platform r1); _ -> Nothing }
+    cls2 = case op2 of { OpReg r2 -> Just (targetClassOfReg platform r2); _ -> Nothing }
+
 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 assignMem_I64Code addrTree valueTree = do
+  platform <- getPlatform
   Amode addr addr_code <- getAmode addrTree
   RegCode64 vcode rhi rlo <- iselExpr64 valueTree
   let
         -- Little-endian store
-        mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
-        mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
+        mov_lo = mkMOV platform II32 (OpReg rlo) (OpAddr addr)
+        mov_hi = mkMOV platform II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
   return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 
 
 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 assignReg_I64Code (CmmLocal dst) valueTree = do
+   platform <- getPlatform
    RegCode64 vcode r_src_hi r_src_lo <- iselExpr64 valueTree
    let
          Reg64 r_dst_hi r_dst_lo = localReg64 dst
-         mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
-         mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
+         mov_lo = mkMOV platform II32 (OpReg r_src_lo) (OpReg r_dst_lo)
+         mov_hi = mkMOV platform II32 (OpReg r_src_hi) (OpReg r_dst_hi)
    return (
         vcode `snocOL` mov_lo `snocOL` mov_hi
      )
@@ -545,22 +564,24 @@ assignReg_I64Code _ _
 
 iselExpr64 :: HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
 iselExpr64 (CmmLit (CmmInt i _)) = do
+  platform <- getPlatform
   Reg64 rhi rlo <- getNewReg64
   let
         r = fromIntegral (fromIntegral i :: Word32)
         q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
         code = toOL [
-                MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
-                MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
+                mkMOV platform II32 (OpImm (ImmInteger r)) (OpReg rlo),
+                mkMOV platform II32 (OpImm (ImmInteger q)) (OpReg rhi)
                 ]
   return (RegCode64 code rhi rlo)
 
 iselExpr64 (CmmLoad addrTree ty _) | isWord64 ty = do
+   platform <- getPlatform
    Amode addr addr_code <- getAmode addrTree
    Reg64 rhi rlo <- getNewReg64
    let
-        mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
-        mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
+        mov_lo = mkMOV platform II32 (OpAddr addr) (OpReg rlo)
+        mov_hi = mkMOV platform II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
    return (
             RegCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) rhi rlo
      )
@@ -570,41 +591,44 @@ iselExpr64 (CmmReg (CmmLocal local_reg)) = do
   return (RegCode64 nilOL hi lo)
 
 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
+   platform <- getPlatform
    RegCode64 code1 r1hi r1lo <- iselExpr64 e1
    Reg64 rhi rlo <- getNewReg64
    let
         r = fromIntegral (fromIntegral i :: Word32)
         q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
         code =  code1 `appOL`
-                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
+                toOL [ mkMOV platform II32 (OpReg r1lo) (OpReg rlo),
                        ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
-                       MOV II32 (OpReg r1hi) (OpReg rhi),
+                       mkMOV platform II32 (OpReg r1hi) (OpReg rhi),
                        ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
    return (RegCode64 code rhi rlo)
 
 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
+   platform <- getPlatform
    RegCode64 code1 r1hi r1lo <- iselExpr64 e1
    RegCode64 code2 r2hi r2lo <- iselExpr64 e2
    Reg64 rhi rlo <- getNewReg64
    let
         code =  code1 `appOL`
                 code2 `appOL`
-                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
+                toOL [ mkMOV platform II32 (OpReg r1lo) (OpReg rlo),
                        ADD II32 (OpReg r2lo) (OpReg rlo),
-                       MOV II32 (OpReg r1hi) (OpReg rhi),
+                       mkMOV platform II32 (OpReg r1hi) (OpReg rhi),
                        ADC II32 (OpReg r2hi) (OpReg rhi) ]
    return (RegCode64 code rhi rlo)
 
 iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
+   platform <- getPlatform
    RegCode64 code1 r1hi r1lo <- iselExpr64 e1
    RegCode64 code2 r2hi r2lo <- iselExpr64 e2
    Reg64 rhi rlo <- getNewReg64
    let
         code =  code1 `appOL`
                 code2 `appOL`
-                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
+                toOL [ mkMOV platform II32 (OpReg r1lo) (OpReg rlo),
                        SUB II32 (OpReg r2lo) (OpReg rlo),
-                       MOV II32 (OpReg r1hi) (OpReg rhi),
+                       mkMOV platform II32 (OpReg r1hi) (OpReg rhi),
                        SBB II32 (OpReg r2hi) (OpReg rhi) ]
    return (RegCode64 code rhi rlo)
 
@@ -637,44 +661,48 @@ iselExpr64 (CmmMachOp (MO_UU_Conv W8 W64) [expr]) = do
                           r_dst_lo
 
 iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) = do
+     platform <- getPlatform
      code <- getAnyReg expr
      Reg64 r_dst_hi r_dst_lo <- getNewReg64
      return $ RegCode64 (code r_dst_lo `snocOL`
-                          MOV II32 (OpReg r_dst_lo) (OpReg eax) `snocOL`
+                          mkMOV platform II32 (OpReg r_dst_lo) (OpReg eax) `snocOL`
                           CLTD II32 `snocOL`
-                          MOV II32 (OpReg eax) (OpReg r_dst_lo) `snocOL`
-                          MOV II32 (OpReg edx) (OpReg r_dst_hi))
+                          mkMOV platform II32 (OpReg eax) (OpReg r_dst_lo) `snocOL`
+                          mkMOV platform II32 (OpReg edx) (OpReg r_dst_hi))
                           r_dst_hi
                           r_dst_lo
 
 iselExpr64 (CmmMachOp (MO_SS_Conv W16 W64) [expr]) = do
+     platform <- getPlatform
      (r, code) <- getByteReg expr
      Reg64 r_dst_hi r_dst_lo <- getNewReg64
      return $ RegCode64 (code `appOL` toOL [
                           MOVSxL II16 (OpReg r) (OpReg eax),
                           CLTD II32,
-                          MOV II32 (OpReg eax) (OpReg r_dst_lo),
-                          MOV II32 (OpReg edx) (OpReg r_dst_hi)])
+                          mkMOV platform II32 (OpReg eax) (OpReg r_dst_lo),
+                          mkMOV platform II32 (OpReg edx) (OpReg r_dst_hi)])
                           r_dst_hi
                           r_dst_lo
 
 iselExpr64 (CmmMachOp (MO_SS_Conv W8 W64) [expr]) = do
+     platform <- getPlatform
      (r, code) <- getByteReg expr
      Reg64 r_dst_hi r_dst_lo <- getNewReg64
      return $ RegCode64 (code `appOL` toOL [
                           MOVSxL II8 (OpReg r) (OpReg eax),
                           CLTD II32,
-                          MOV II32 (OpReg eax) (OpReg r_dst_lo),
-                          MOV II32 (OpReg edx) (OpReg r_dst_hi)])
+                          mkMOV platform II32 (OpReg eax) (OpReg r_dst_lo),
+                          mkMOV platform II32 (OpReg edx) (OpReg r_dst_hi)])
                           r_dst_hi
                           r_dst_lo
 
 iselExpr64 (CmmMachOp (MO_S_Neg _) [expr]) = do
+   platform <- getPlatform
    RegCode64 code rhi rlo <- iselExpr64 expr
    Reg64 rohi rolo <- getNewReg64
    let
         ocode = code `appOL`
-                toOL [ MOV II32 (OpReg rlo) (OpReg rolo),
+                toOL [ mkMOV platform II32 (OpReg rlo) (OpReg rolo),
                        XOR II32 (OpReg rohi) (OpReg rohi),
                        NEGI II32 (OpReg rolo),
                        SBB II32 (OpReg rhi) (OpReg rohi) ]
@@ -690,6 +718,7 @@ iselExpr64 (CmmMachOp (MO_S_Neg _) [expr]) = do
 -- Note that @(r1hi * r2hi) << 64@ can be dropped because it overflows completely.
 
 iselExpr64 (CmmMachOp (MO_Mul _) [e1,e2]) = do
+   platform <- getPlatform
    RegCode64 code1 r1hi r1lo <- iselExpr64 e1
    RegCode64 code2 r2hi r2lo <- iselExpr64 e2
    Reg64 rhi rlo <- getNewReg64
@@ -697,26 +726,27 @@ iselExpr64 (CmmMachOp (MO_Mul _) [e1,e2]) = do
    let
         code =  code1 `appOL`
                 code2 `appOL`
-                toOL [ MOV  II32 (OpReg r1lo) (OpReg eax),
-                       MOV  II32 (OpReg r2lo) (OpReg tmp),
-                       MOV  II32 (OpReg r1hi) (OpReg rhi),
+                toOL [ mkMOV platform  II32 (OpReg r1lo) (OpReg eax),
+                       mkMOV platform  II32 (OpReg r2lo) (OpReg tmp),
+                       mkMOV platform  II32 (OpReg r1hi) (OpReg rhi),
                        IMUL II32 (OpReg tmp) (OpReg rhi),
-                       MOV  II32 (OpReg r2hi) (OpReg rlo),
+                       mkMOV platform  II32 (OpReg r2hi) (OpReg rlo),
                        IMUL II32 (OpReg eax) (OpReg rlo),
                        ADD  II32 (OpReg rlo) (OpReg rhi),
                        MUL2 II32 (OpReg tmp),
                        ADD  II32 (OpReg edx) (OpReg rhi),
-                       MOV  II32 (OpReg eax) (OpReg rlo)
+                       mkMOV platform  II32 (OpReg eax) (OpReg rlo)
                      ]
    return (RegCode64 code rhi rlo)
 
 iselExpr64 (CmmMachOp (MO_S_MulMayOflo W64) _) = do
+   platform <- getPlatform
    -- Performance sensitive users won't use 32 bit so let's keep it simple:
    -- We always return a (usually false) positive.
    Reg64 rhi rlo <- getNewReg64
    let code = toOL   [
-                       MOV  II32 (OpImm (ImmInt 1)) (OpReg rhi),
-                       MOV  II32 (OpImm (ImmInt 1)) (OpReg rlo)
+                       mkMOV platform  II32 (OpImm (ImmInt 1)) (OpReg rhi),
+                       mkMOV platform  II32 (OpImm (ImmInt 1)) (OpReg rlo)
                      ]
    return (RegCode64 code rhi rlo)
 
@@ -730,6 +760,7 @@ iselExpr64 (CmmMachOp (MO_S_MulMayOflo W64) _) = do
 -- the contents of @rlo@ to @rhi@ and clear @rlo at .
 
 iselExpr64 (CmmMachOp (MO_Shl _) [e1,e2]) = do
+   platform <- getPlatform
    RegCode64 code1 r1hi r1lo <- iselExpr64 e1
    code2 <- getAnyReg e2
    Reg64 rhi rlo <- getNewReg64
@@ -738,15 +769,15 @@ iselExpr64 (CmmMachOp (MO_Shl _) [e1,e2]) = do
    let
         code =  code1 `appOL`
                 code2 ecx `appOL`
-                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
-                       MOV II32 (OpReg r1hi) (OpReg rhi),
+                toOL [ mkMOV platform II32 (OpReg r1lo) (OpReg rlo),
+                       mkMOV platform II32 (OpReg r1hi) (OpReg rhi),
                        SHLD II32 (OpReg ecx) (OpReg rlo) (OpReg rhi),
                        SHL II32 (OpReg ecx) (OpReg rlo),
                        TEST II32 (OpImm (ImmInt 32)) (OpReg ecx),
                        JXX EQQ lbl2,
                        JXX ALWAYS lbl1,
                        NEWBLOCK lbl1,
-                       MOV II32 (OpReg rlo) (OpReg rhi),
+                       mkMOV platform II32 (OpReg rlo) (OpReg rhi),
                        XOR II32 (OpReg rlo) (OpReg rlo),
                        JXX ALWAYS lbl2,
                        NEWBLOCK lbl2
@@ -760,6 +791,7 @@ iselExpr64 (CmmMachOp (MO_Shl _) [e1,e2]) = do
 -- To accomplish that we shift @rhi@ by 31.
 
 iselExpr64 (CmmMachOp (MO_S_Shr _) [e1,e2]) = do
+   platform <- getPlatform
    RegCode64 code1 r1hi r1lo <- iselExpr64 e1
    (r2, code2) <- getSomeReg e2
    Reg64 rhi rlo <- getNewReg64
@@ -768,16 +800,16 @@ iselExpr64 (CmmMachOp (MO_S_Shr _) [e1,e2]) = do
    let
         code =  code1 `appOL`
                 code2 `appOL`
-                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
-                       MOV II32 (OpReg r1hi) (OpReg rhi),
-                       MOV II32 (OpReg r2) (OpReg ecx),
+                toOL [ mkMOV platform II32 (OpReg r1lo) (OpReg rlo),
+                       mkMOV platform II32 (OpReg r1hi) (OpReg rhi),
+                       mkMOV platform II32 (OpReg r2) (OpReg ecx),
                        SHRD II32 (OpReg ecx) (OpReg rhi) (OpReg rlo),
                        SAR II32 (OpReg ecx) (OpReg rhi),
                        TEST II32 (OpImm (ImmInt 32)) (OpReg ecx),
                        JXX EQQ lbl2,
                        JXX ALWAYS lbl1,
                        NEWBLOCK lbl1,
-                       MOV II32 (OpReg rhi) (OpReg rlo),
+                       mkMOV platform II32 (OpReg rhi) (OpReg rlo),
                        SAR II32 (OpImm (ImmInt 31)) (OpReg rhi),
                        JXX ALWAYS lbl2,
                        NEWBLOCK lbl2
@@ -787,6 +819,7 @@ iselExpr64 (CmmMachOp (MO_S_Shr _) [e1,e2]) = do
 -- Similar to the above.
 
 iselExpr64 (CmmMachOp (MO_U_Shr _) [e1,e2]) = do
+   platform <- getPlatform
    RegCode64 code1 r1hi r1lo <- iselExpr64 e1
    (r2, code2) <- getSomeReg e2
    Reg64 rhi rlo <- getNewReg64
@@ -795,16 +828,16 @@ iselExpr64 (CmmMachOp (MO_U_Shr _) [e1,e2]) = do
    let
         code =  code1 `appOL`
                 code2 `appOL`
-                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
-                       MOV II32 (OpReg r1hi) (OpReg rhi),
-                       MOV II32 (OpReg r2) (OpReg ecx),
+                toOL [ mkMOV platform II32 (OpReg r1lo) (OpReg rlo),
+                       mkMOV platform II32 (OpReg r1hi) (OpReg rhi),
+                       mkMOV platform II32 (OpReg r2) (OpReg ecx),
                        SHRD II32 (OpReg ecx) (OpReg rhi) (OpReg rlo),
                        SHR II32 (OpReg ecx) (OpReg rhi),
                        TEST II32 (OpImm (ImmInt 32)) (OpReg ecx),
                        JXX EQQ lbl2,
                        JXX ALWAYS lbl1,
                        NEWBLOCK lbl1,
-                       MOV II32 (OpReg rhi) (OpReg rlo),
+                       mkMOV platform II32 (OpReg rhi) (OpReg rlo),
                        XOR II32 (OpReg rhi) (OpReg rhi),
                        JXX ALWAYS lbl2,
                        NEWBLOCK lbl2
@@ -816,12 +849,13 @@ iselExpr64 (CmmMachOp (MO_Or  _) [e1,e2]) = iselExpr64ParallelBin OR  e1 e2
 iselExpr64 (CmmMachOp (MO_Xor _) [e1,e2]) = iselExpr64ParallelBin XOR e1 e2
 
 iselExpr64 (CmmMachOp (MO_Not _) [e1]) = do
+   platform <- getPlatform
    RegCode64 code1 r1hi r1lo <- iselExpr64 e1
    Reg64 rhi rlo <- getNewReg64
    let
         code =  code1 `appOL`
-                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
-                       MOV II32 (OpReg r1hi) (OpReg rhi),
+                toOL [ mkMOV platform II32 (OpReg r1lo) (OpReg rlo),
+                       mkMOV platform II32 (OpReg r1hi) (OpReg rhi),
                        NOT II32 (OpReg rlo),
                        NOT II32 (OpReg rhi)
                      ]
@@ -837,14 +871,15 @@ iselExpr64 expr
 iselExpr64ParallelBin :: (Format -> Operand -> Operand -> Instr)
                       -> CmmExpr -> CmmExpr -> NatM (RegCode64 (OrdList Instr))
 iselExpr64ParallelBin op e1 e2 = do
+   platform <- getPlatform
    RegCode64 code1 r1hi r1lo <- iselExpr64 e1
    RegCode64 code2 r2hi r2lo <- iselExpr64 e2
    Reg64 rhi rlo <- getNewReg64
    let
         code =  code1 `appOL`
                 code2 `appOL`
-                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
-                       MOV II32 (OpReg r1hi) (OpReg rhi),
+                toOL [ mkMOV platform II32 (OpReg r1lo) (OpReg rlo),
+                       mkMOV platform II32 (OpReg r1hi) (OpReg rhi),
                        op  II32 (OpReg r2lo) (OpReg rlo),
                        op  II32 (OpReg r2hi) (OpReg rhi)
                      ]
@@ -993,7 +1028,8 @@ getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _ _])
 
 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _ _])
  | not is32Bit = do
-  code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
+  platform <- getPlatform
+  code <- intLoadCode (mkMOV platform II32) addr -- 32-bit loads zero-extend
   return (Any II64 code)
 
 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _ _])
@@ -1060,16 +1096,16 @@ getRegister' platform is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
       MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
       MO_SS_Conv W8  W16 -> integerExtend W8  W16 MOVSxL x
 
-      -- We don't care about the upper bits for MO_XX_Conv, so MOV is enough. However, on 32-bit we
+      -- We don't care about the upper bits for MO_XX_Conv, so mkMOV platform is enough. However, on 32-bit we
       -- have 8-bit registers only for a few registers (as opposed to x86-64 where every register
       -- has 8-bit version). So for 32-bit code, we'll just zero-extend.
       MO_XX_Conv W8  W32
           | is32Bit   -> integerExtend W8 W32 MOVZxL x
-          | otherwise -> integerExtend W8 W32 MOV x
+          | otherwise -> integerExtend W8 W32 (mkMOV platform) x
       MO_XX_Conv W8  W16
           | is32Bit   -> integerExtend W8 W16 MOVZxL x
-          | otherwise -> integerExtend W8 W16 MOV x
-      MO_XX_Conv W16 W32 -> integerExtend W16 W32 MOV x
+          | otherwise -> integerExtend W8 W16 (mkMOV platform) x
+      MO_XX_Conv W16 W32 -> integerExtend W16 W32 (mkMOV platform) x
 
       MO_UU_Conv W8  W64 | not is32Bit -> integerExtend W8  W64 MOVZxL x
       MO_UU_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVZxL x
@@ -1082,10 +1118,10 @@ getRegister' platform is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
       -- away as an unnecessary reg-to-reg move, so we keep it in
       -- the form of a movzl and print it as a movl later.
       -- This doesn't apply to MO_XX_Conv since in this case we don't care about
-      -- the upper bits. So we can just use MOV.
-      MO_XX_Conv W8  W64 | not is32Bit -> integerExtend W8  W64 MOV x
-      MO_XX_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOV x
-      MO_XX_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOV x
+      -- the upper bits. So we can just use mkMOV platform.
+      MO_XX_Conv W8  W64 | not is32Bit -> integerExtend W8  W64 (mkMOV platform) x
+      MO_XX_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 (mkMOV platform) x
+      MO_XX_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 (mkMOV platform) x
 
       MO_FF_Conv W32 W64 -> coerceFP2FP W64 x
       MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
@@ -1309,7 +1345,7 @@ getRegister' platform is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
           (reg, exp) <- getSomeReg expr
           let fmt = VecFormat len FmtInt64
           return $ Any fmt (\dst -> exp `snocOL`
-                                    (MOV II64 (OpReg reg) (OpReg dst)) `snocOL`
+                                    (mkMOV platform II64 (OpReg reg) (OpReg dst)) `snocOL`
                                     (PUNPCKLQDQ fmt (OpReg dst) dst) `snocOL`
                                     (PUNPCKLQDQ fmt (OpReg dst) dst) `snocOL`
                                     (PUNPCKLQDQ fmt (OpReg dst) dst) `snocOL`
@@ -1704,7 +1740,7 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
           imm      = litToImm lit
           code dst
             = case lit of
-                CmmInt 0 _ -> exp `snocOL` (MOV FF32 (OpReg r) (OpReg dst))
+                CmmInt 0 _ -> exp `snocOL` (mkMOV platform FF32 (OpReg r) (OpReg dst))
                 CmmInt _ _ -> exp `snocOL` (VPSHUFD format imm (OpReg r) dst)
                 _          -> panic "Error in offset while unpacking"
       return (Any format code)
@@ -1715,7 +1751,7 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
           code dst
             = case lit of
                 CmmInt 0 _ -> exp `snocOL`
-                              (MOV FF64 (OpReg r) (OpReg dst))
+                              (mkMOV platform FF64 (OpReg r) (OpReg dst))
                 CmmInt 1 _ -> exp `snocOL`
                               (MOVHLPS format (OpReg r) dst)
                 _          -> panic "Error in offset while unpacking"
@@ -1757,10 +1793,10 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
       let code dst
             = case lit of
                 CmmInt 0 _ -> exp `snocOL`
-                              (MOV II64 (OpReg r) (OpReg dst))
+                              (mkMOV platform II64 (OpReg r) (OpReg dst))
                 CmmInt 1 _ -> exp `snocOL`
                               (MOVHLPS fmt (OpReg r) tmp) `snocOL`
-                              (MOV II64 (OpReg tmp) (OpReg dst))
+                              (mkMOV platform II64 (OpReg tmp) (OpReg dst))
                 _          -> panic "Error in offset while unpacking"
       return (Any fmt code)
     vector_int_unpack_sse _ w c e
@@ -1901,11 +1937,11 @@ getRegister' platform _is32Bit (CmmMachOp mop [x, y, z]) = do -- ternary MachOps
               = case offset of
                   CmmInt 0 _ -> valExp `appOL`
                                 vecExp `snocOL`
-                                (MOV FF64 (OpReg valReg) (OpReg dst)) `snocOL`
+                                (mkMOV platform FF64 (OpReg valReg) (OpReg dst)) `snocOL`
                                 (SHUFPD fmt (ImmInt 0b00) (OpReg vecReg) dst)
                   CmmInt 1 _ -> valExp `appOL`
                                 vecExp `snocOL`
-                                (MOV FF64 (OpReg vecReg) (OpReg dst)) `snocOL`
+                                (mkMOV platform FF64 (OpReg vecReg) (OpReg dst)) `snocOL`
                                 (SHUFPD fmt (ImmInt 0b00) (OpReg valReg) dst)
                   _ -> pprPanic "MO_VF_Insert DoubleX2: unsupported offset" (ppr offset)
          in return $ Any fmt code
@@ -1942,12 +1978,12 @@ getRegister' platform _is32Bit (CmmMachOp mop [x, y, z]) = do -- ternary MachOps
                   CmmInt 0 _ -> valExp `appOL`
                                 vecExp `snocOL`
                                 (MOVHLPS fmt (OpReg vecReg) tmp) `snocOL`
-                                (MOV II64 (OpReg valReg) (OpReg dst)) `snocOL`
+                                (mkMOV platform II64 (OpReg valReg) (OpReg dst)) `snocOL`
                                 (PUNPCKLQDQ fmt (OpReg tmp) dst)
                   CmmInt 1 _ -> valExp `appOL`
                                 vecExp `snocOL`
-                                (MOV fmt (OpReg vecReg) (OpReg dst)) `snocOL`
-                                (MOV II64 (OpReg valReg) (OpReg tmp)) `snocOL`
+                                (mkMOV platform fmt (OpReg vecReg) (OpReg dst)) `snocOL`
+                                (mkMOV platform II64 (OpReg valReg) (OpReg tmp)) `snocOL`
                                 (PUNPCKLQDQ fmt (OpReg tmp) dst)
                   _ -> pprPanic "MO_V_Insert Int64X2: unsupported offset" (ppr offset)
          in return $ Any fmt code
@@ -1983,14 +2019,16 @@ getRegister' _ _ (CmmLoad mem pk _)
 getRegister' _ is32Bit (CmmLoad mem pk _)
   | is32Bit && not (isWord64 pk)
   = do
+    platform <- getPlatform
+    let
+      instr = case width of
+                W8     -> MOVZxL II8
+                _other -> mkMOV platform format
     code <- intLoadCode instr mem
     return (Any format code)
   where
     width = typeWidth pk
     format = intFormat width
-    instr = case width of
-                W8     -> MOVZxL II8
-                _other -> MOV format
         -- We always zero-extend 8-bit loads, if we
         -- can't think of anything better.  This is because
         -- we can't guarantee access to an 8-bit variant of every register
@@ -2001,7 +2039,8 @@ getRegister' _ is32Bit (CmmLoad mem pk _)
 getRegister' _ is32Bit (CmmLoad mem pk _)
  | not is32Bit
   = do
-    code <- intLoadCode (MOV format) mem
+    platform <- getPlatform
+    code <- intLoadCode (mkMOV platform format) mem
     return (Any format code)
   where format = intFormat $ typeWidth pk
 
@@ -2042,7 +2081,7 @@ getRegister' platform is32Bit (CmmLit lit)
   | not is32Bit, isWord64 (cmmLitType platform lit), not (isBigLit lit)
   = let
         imm = litToImm lit
-        code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
+        code dst = unitOL (mkMOV platform II32 (OpImm imm) (OpReg dst))
     in
         return (Any II64 code)
   where
@@ -2071,7 +2110,7 @@ getRegister' platform _ (CmmLit lit)
       = do
       let format = cmmTypeFormat ctype
           imm = litToImm lit
-          code dst = unitOL (MOV format (OpImm imm) (OpReg dst))
+          code dst = unitOL (mkMOV platform format (OpImm imm) (OpReg dst))
       return (Any format code)
 
 getRegister' platform _ other
@@ -2409,9 +2448,10 @@ memConstant align lit = do
 
 loadFloatAmode :: Width -> AddrMode -> InstrBlock -> NatM Register
 loadFloatAmode w addr addr_code = do
+  platform <- getPlatform
   let format = floatFormat w
       code dst = addr_code `snocOL`
-                    MOV format (OpAddr addr) (OpReg dst)
+                    mkMOV platform format (OpAddr addr) (OpReg dst)
 
   return (Any format code)
 
@@ -2584,19 +2624,19 @@ condIntCode' platform cond x y
         cmpExact :: OrdList Instr
         cmpExact =
           toOL
-            [ MOV II32 (OpReg r1_hi) (OpReg tmp1)
-            , MOV II32 (OpReg r1_lo) (OpReg tmp2)
+            [ mkMOV platform II32 (OpReg r1_hi) (OpReg tmp1)
+            , mkMOV platform II32 (OpReg r1_lo) (OpReg tmp2)
             , XOR II32 (OpReg r2_hi) (OpReg tmp1)
             , XOR II32 (OpReg r2_lo) (OpReg tmp2)
             , OR  II32 (OpReg tmp1)  (OpReg tmp2)
             ]
         cmpGE = toOL
-            [ MOV II32 (OpReg r1_hi) (OpReg tmp1)
+            [ mkMOV platform II32 (OpReg r1_hi) (OpReg tmp1)
             , CMP II32 (OpReg r2_lo) (OpReg r1_lo)
             , SBB II32 (OpReg r2_hi) (OpReg tmp1)
             ]
         cmpLE = toOL
-            [ MOV II32 (OpReg r2_hi) (OpReg tmp1)
+            [ mkMOV platform II32 (OpReg r2_hi) (OpReg tmp1)
             , CMP II32 (OpReg r1_lo) (OpReg r2_lo)
             , SBB II32 (OpReg r1_hi) (OpReg tmp1)
             ]
@@ -2736,10 +2776,10 @@ assignMem_IntCode pk addr src = do
     let
         code = code_src `appOL`
                code_addr `snocOL`
-                  MOV pk op_src (OpAddr addr)
+                  mkMOV platform pk op_src (OpAddr addr)
         -- NOTE: op_src is stable, so it will still be valid
         -- after code_addr.  This may involve the introduction
-        -- of an extra MOV to a temporary register, but we hope
+        -- of an extra mkMOV platform to a temporary register, but we hope
         -- the register allocator will get rid of it.
     --
     return code
@@ -2754,7 +2794,8 @@ assignMem_IntCode pk addr src = do
 
 -- Assign; dst is a reg, rhs is mem
 assignReg_IntCode pk reg (CmmLoad src _ _) = do
-  load_code <- intLoadCode (MOV pk) src
+  platform <- getPlatform
+  load_code <- intLoadCode (mkMOV platform pk) src
   platform <- ncgPlatform <$> getConfig
   return (load_code (getRegisterReg platform reg))
 
@@ -2767,12 +2808,13 @@ assignReg_IntCode _ reg src = do
 
 -- Floating point assignment to memory
 assignMem_FltCode pk addr src = do
+  platform <- getPlatform
   (src_reg, src_code) <- getNonClobberedReg src
   Amode addr addr_code <- getAmode addr
   let
         code = src_code `appOL`
                addr_code `snocOL`
-               MOV pk (OpReg src_reg) (OpAddr addr)
+               mkMOV platform pk (OpReg src_reg) (OpAddr addr)
 
   return code
 
@@ -3324,7 +3366,7 @@ genCCall32 addr _ dest_regs args = do
                                       in
 
                                       -- assume SSE2
-                                       MOV format (OpReg reg) (OpAddr addr)
+                                       mkMOV platform format (OpReg reg) (OpAddr addr)
 
                                      ]
                                )
@@ -3406,12 +3448,12 @@ genCCall32 addr _ dest_regs args = do
                                    -- NB: This code will need to be
                                    -- revisited once GHC does more work around
                                    -- SIGFPE f
-                                   MOV fmt (OpAddr tmp_amode) (OpReg r_dest),
+                                   mkMOV platform fmt (OpAddr tmp_amode) (OpReg r_dest),
                                    ADD II32 (OpImm (ImmInt b)) (OpReg esp),
                                    DELTA delta0]
-              | isWord64 ty    = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
-                                        MOV II32 (OpReg edx) (OpReg r_dest_hi)]
-              | otherwise      = unitOL (MOV (intFormat w)
+              | isWord64 ty    = toOL [mkMOV platform II32 (OpReg eax) (OpReg r_dest),
+                                        mkMOV platform II32 (OpReg edx) (OpReg r_dest_hi)]
+              | otherwise      = unitOL (mkMOV platform (intFormat w)
                                              (OpReg eax)
                                              (OpReg r_dest))
               where
@@ -3519,7 +3561,7 @@ genCCall64 addr conv dest_regs args = do
                                 -- If we are calling a varargs function
                                 -- then we need to define ireg as well
                                 -- as freg
-                                MOV II64 (OpReg freg) (OpReg ireg))
+                                mkMOV platform II64 (OpReg freg) (OpReg ireg))
             | otherwise = do
                  arg_code <- getAnyReg arg
                  load_args_win rest (ireg : usedInt) usedFP regs
@@ -3538,7 +3580,7 @@ genCCall64 addr conv dest_regs args = do
              let code' = code `appOL` arg_code `appOL` toOL [
                             SUB (intFormat (wordWidth platform)) (OpImm (ImmInt arg_size)) (OpReg rsp),
                             DELTA (delta-arg_size),
-                            MOV (floatFormat width) (OpReg arg_reg) (OpAddr (spRel platform 0))]
+                            mkMOV platform (floatFormat width) (OpReg arg_reg) (OpAddr (spRel platform 0))]
              push_args rest code'
 
            | otherwise = do
@@ -3630,7 +3672,7 @@ genCCall64 addr conv dest_regs args = do
         -- It's not safe to omit this assignment, even if the number
         -- of SSE2 regs in use is zero.  If %al is larger than 8
         -- on entry to a varargs function, seg faults ensue.
-        assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
+        assign_eax n = unitOL (mkMOV platform II32 (OpImm (ImmInt n)) (OpReg eax))
 
     let call = callinsns `appOL`
                toOL (
@@ -3647,13 +3689,13 @@ genCCall64 addr conv dest_regs args = do
         assign_code []     = nilOL
         assign_code [dest] =
           case typeWidth rep of
-                W32 | isFloatType rep -> unitOL (MOV (floatFormat W32)
+                W32 | isFloatType rep -> unitOL (mkMOV platform (floatFormat W32)
                                                      (OpReg xmm0)
                                                      (OpReg r_dest))
-                W64 | isFloatType rep -> unitOL (MOV (floatFormat W64)
+                W64 | isFloatType rep -> unitOL (mkMOV platform (floatFormat W64)
                                                      (OpReg xmm0)
                                                      (OpReg r_dest))
-                _ -> unitOL (MOV (cmmTypeFormat rep) (OpReg rax) (OpReg r_dest))
+                _ -> unitOL (mkMOV platform (cmmTypeFormat rep) (OpReg rax) (OpReg r_dest))
           where
                 rep = localRegType dest
                 r_dest = getRegisterReg platform  (CmmLocal dest)
@@ -3767,7 +3809,7 @@ genSwitch expr targets = do
             let op = OpAddr (AddrBaseIndex (EABaseReg tableReg) (EAIndex reg (platformWordSizeInBytes platform)) (ImmInt 0))
                 code = e_code `appOL` toOL
                     [ LEA (archWordFormat is32bit) (OpAddr (AddrBaseIndex EABaseRip EAIndexNone (ImmCLbl lbl))) (OpReg tableReg)
-                    , MOV (archWordFormat is32bit) op (OpReg targetReg)
+                    , mkMOV platform (archWordFormat is32bit) op (OpReg targetReg)
                     , JMP_TBL (OpReg targetReg) ids (Section ReadOnlyData lbl) lbl
                     ]
             return code
@@ -4002,6 +4044,7 @@ trivialCode' _ width instr _ a b
 genTrivialCode :: Format -> (Operand -> Operand -> Instr)
                -> CmmExpr -> CmmExpr -> NatM Register
 genTrivialCode rep instr a b = do
+  platform <- getPlatform
   (b_op, b_code) <- getNonClobberedOperand b
   a_code <- getAnyReg a
   tmp <- getNewRegNat rep
@@ -4015,7 +4058,7 @@ genTrivialCode rep instr a b = do
      code dst
         | dst `regClashesWithOp` b_op =
                 b_code `appOL`
-                unitOL (MOV rep b_op (OpReg tmp)) `appOL`
+                unitOL (mkMOV platform rep b_op (OpReg tmp)) `appOL`
                 a_code dst `snocOL`
                 instr (OpReg tmp) (OpReg dst)
         | otherwise =
@@ -4169,6 +4212,7 @@ coerceFP2FP to x = do
 
 sse2NegCode :: Width -> CmmExpr -> NatM Register
 sse2NegCode w x = do
+  platform <- getPlatform
   let fmt = floatFormat w
   x_code <- getAnyReg x
   -- This is how gcc does it, so it can't be that bad:
@@ -4188,7 +4232,7 @@ sse2NegCode w x = do
   tmp <- getNewRegNat fmt
   let
     code dst = x_code dst `appOL` amode_code `appOL` toOL [
-        MOV fmt (OpAddr amode) (OpReg tmp),
+        mkMOV platform fmt (OpAddr amode) (OpReg tmp),
         XOR fmt (OpReg tmp) (OpReg dst)
         ]
   --
@@ -4285,31 +4329,34 @@ genAtomicRMW bid width amop dst addr n = do
             -> Reg       -- Register containing argument
             -> AddrMode  -- Address of location to mutate
             -> NatM (OrdList Instr,BlockId) -- TODO: Return Maybe BlockId
-    op_code dst_r arg amode = case amop of
-        -- In the common case where dst_r is a virtual register the
-        -- final move should go away, because it's the last use of arg
-        -- and the first use of dst_r.
-        AMO_Add  -> return $ (toOL [ LOCK (XADD format (OpReg arg) (OpAddr amode))
-                                   , MOV format (OpReg arg) (OpReg dst_r)
-                                   ], bid)
-        AMO_Sub  -> return $ (toOL [ NEGI format (OpReg arg)
-                                   , LOCK (XADD format (OpReg arg) (OpAddr amode))
-                                   , MOV format (OpReg arg) (OpReg dst_r)
-                                   ], bid)
-        -- In these cases we need a new block id, and have to return it so
-        -- that later instruction selection can reference it.
-        AMO_And  -> cmpxchg_code (\ src dst -> unitOL $ AND format src dst)
-        AMO_Nand -> cmpxchg_code (\ src dst -> toOL [ AND format src dst
-                                                    , NOT format dst
-                                                    ])
-        AMO_Or   -> cmpxchg_code (\ src dst -> unitOL $ OR format src dst)
-        AMO_Xor  -> cmpxchg_code (\ src dst -> unitOL $ XOR format src dst)
+    op_code dst_r arg amode = do
+        platform <- getPlatform
+        case amop of
+          -- In the common case where dst_r is a virtual register the
+          -- final move should go away, because it's the last use of arg
+          -- and the first use of dst_r.
+          AMO_Add  -> return $ (toOL [ LOCK (XADD format (OpReg arg) (OpAddr amode))
+                                     , mkMOV platform format (OpReg arg) (OpReg dst_r)
+                                     ], bid)
+          AMO_Sub  -> return $ (toOL [ NEGI format (OpReg arg)
+                                     , LOCK (XADD format (OpReg arg) (OpAddr amode))
+                                     , mkMOV platform format (OpReg arg) (OpReg dst_r)
+                                     ], bid)
+          -- In these cases we need a new block id, and have to return it so
+          -- that later instruction selection can reference it.
+          AMO_And  -> cmpxchg_code (\ src dst -> unitOL $ AND format src dst)
+          AMO_Nand -> cmpxchg_code (\ src dst -> toOL [ AND format src dst
+                                                      , NOT format dst
+                                                      ])
+          AMO_Or   -> cmpxchg_code (\ src dst -> unitOL $ OR format src dst)
+          AMO_Xor  -> cmpxchg_code (\ src dst -> unitOL $ XOR format src dst)
       where
         -- Simulate operation that lacks a dedicated instruction using
         -- cmpxchg.
         cmpxchg_code :: (Operand -> Operand -> OrdList Instr)
                      -> NatM (OrdList Instr, BlockId)
         cmpxchg_code instrs = do
+            platform <- getPlatform
             lbl1 <- getBlockIdNat
             lbl2 <- getBlockIdNat
             tmp <- getNewRegNat format
@@ -4322,12 +4369,12 @@ genAtomicRMW bid width amop dst addr n = do
             updateCfgNat (addWeightEdge lbl1 lbl1 0)
 
             return $ (toOL
-                [ MOV format (OpAddr amode) (OpReg eax)
+                [ mkMOV platform format (OpAddr amode) (OpReg eax)
                 , JXX ALWAYS lbl1
                 , NEWBLOCK lbl1
                   -- Keep old value so we can return it:
-                , MOV format (OpReg eax) (OpReg dst_r)
-                , MOV format (OpReg eax) (OpReg tmp)
+                , mkMOV platform format (OpReg eax) (OpReg dst_r)
+                , mkMOV platform format (OpReg eax) (OpReg tmp)
                 ]
                 `appOL` instrs (OpReg arg) (OpReg tmp) `appOL` toOL
                 [ LOCK (CMPXCHG format (OpReg tmp) (OpAddr amode))
@@ -4357,6 +4404,7 @@ genCtz64_32
   -> CmmExpr
   -> NatM (InstrBlock, Maybe BlockId)
 genCtz64_32 bid dst src = do
+  platform <- getPlatform
   RegCode64 vcode rhi rlo <- iselExpr64 src
   let dst_r = getLocalRegReg dst
   lbl1 <- getBlockIdNat
@@ -4380,9 +4428,9 @@ genCtz64_32 bid dst src = do
   --    dst = 64;
   --  }
   let instrs = vcode `appOL` toOL
-           ([ MOV      II32 (OpReg rhi)         (OpReg tmp_r)
+           ([ mkMOV platform      II32 (OpReg rhi)         (OpReg tmp_r)
             , OR       II32 (OpReg rlo)         (OpReg tmp_r)
-            , MOV      II32 (OpImm (ImmInt 64)) (OpReg dst_r)
+            , mkMOV platform      II32 (OpImm (ImmInt 64)) (OpReg dst_r)
             , JXX EQQ    lbl2
             , JXX ALWAYS lbl1
 
@@ -4402,6 +4450,7 @@ genCtz64_32 bid dst src = do
 -- Generic case (width <= word size)
 genCtzGeneric :: Width -> LocalReg -> CmmExpr -> NatM InstrBlock
 genCtzGeneric width dst src = do
+  platform <- getPlatform
   code_src <- getAnyReg src
   config <- getConfig
   let bw = widthInBits width
@@ -4429,10 +4478,10 @@ genCtzGeneric width dst src = do
       let instrs = code_src src_r `appOL` toOL
                ([ MOVZxL  II8    (OpReg src_r) (OpReg src_r) | width == W8 ] ++
                 [ BSF     format (OpReg src_r) tmp_r
-                , MOV     II32   (OpImm (ImmInt bw)) (OpReg dst_r)
+                , mkMOV platform     II32   (OpImm (ImmInt bw)) (OpReg dst_r)
                 , CMOV NE format (OpReg tmp_r) dst_r
                 ]) -- NB: We don't need to zero-extend the result for the
-                   -- W8/W16 cases because the 'MOV' insn already
+                   -- W8/W16 cases because the 'mkMOV platform' insn already
                    -- took care of implicitly clearing the upper bits
       return instrs
 
@@ -4494,21 +4543,21 @@ genMemCpyInlineMaybe align dst src n = do
       go :: Reg -> Reg -> Reg -> Integer -> OrdList Instr
       go dst src tmp i
           | i >= sizeBytes =
-              unitOL (MOV format (OpAddr src_addr) (OpReg tmp)) `appOL`
-              unitOL (MOV format (OpReg tmp) (OpAddr dst_addr)) `appOL`
+              unitOL (mkMOV platform format (OpAddr src_addr) (OpReg tmp)) `appOL`
+              unitOL (mkMOV platform format (OpReg tmp) (OpAddr dst_addr)) `appOL`
               go dst src tmp (i - sizeBytes)
           -- Deal with remaining bytes.
           | i >= 4 =  -- Will never happen on 32-bit
-              unitOL (MOV II32 (OpAddr src_addr) (OpReg tmp)) `appOL`
-              unitOL (MOV II32 (OpReg tmp) (OpAddr dst_addr)) `appOL`
+              unitOL (mkMOV platform II32 (OpAddr src_addr) (OpReg tmp)) `appOL`
+              unitOL (mkMOV platform II32 (OpReg tmp) (OpAddr dst_addr)) `appOL`
               go dst src tmp (i - 4)
           | i >= 2 =
               unitOL (MOVZxL II16 (OpAddr src_addr) (OpReg tmp)) `appOL`
-              unitOL (MOV II16 (OpReg tmp) (OpAddr dst_addr)) `appOL`
+              unitOL (mkMOV platform II16 (OpReg tmp) (OpAddr dst_addr)) `appOL`
               go dst src tmp (i - 2)
           | i >= 1 =
               unitOL (MOVZxL II8 (OpAddr src_addr) (OpReg tmp)) `appOL`
-              unitOL (MOV II8 (OpReg tmp) (OpAddr dst_addr)) `appOL`
+              unitOL (mkMOV platform II8 (OpReg tmp) (OpAddr dst_addr)) `appOL`
               go dst src tmp (i - 1)
           | otherwise = nilOL
         where
@@ -4581,24 +4630,24 @@ genMemSetInlineMaybe align dst c n = do
     sizeBytes :: Integer
     sizeBytes = fromIntegral (formatInBytes format)
 
-    -- Depending on size returns the widest MOV instruction and its
+    -- Depending on size returns the widest mkMOV platform instruction and its
     -- width.
     gen4 :: AddrMode -> Integer -> (InstrBlock, Integer)
     gen4 addr size
         | size >= 4 =
-            (unitOL (MOV II32 (OpImm (ImmInteger c4)) (OpAddr addr)), 4)
+            (unitOL (mkMOV platform II32 (OpImm (ImmInteger c4)) (OpAddr addr)), 4)
         | size >= 2 =
-            (unitOL (MOV II16 (OpImm (ImmInteger c2)) (OpAddr addr)), 2)
+            (unitOL (mkMOV platform II16 (OpImm (ImmInteger c2)) (OpAddr addr)), 2)
         | size >= 1 =
-            (unitOL (MOV II8 (OpImm (ImmInteger c)) (OpAddr addr)), 1)
+            (unitOL (mkMOV platform II8 (OpImm (ImmInteger c)) (OpAddr addr)), 1)
         | otherwise = (nilOL, 0)
 
-    -- Generates a 64-bit wide MOV instruction from REG to MEM.
+    -- Generates a 64-bit wide mkMOV platform instruction from REG to MEM.
     gen8 :: AddrMode -> Reg -> InstrBlock
     gen8 addr reg8byte =
-      unitOL (MOV format (OpReg reg8byte) (OpAddr addr))
+      unitOL (mkMOV platform format (OpReg reg8byte) (OpAddr addr))
 
-    -- Unrolls memset when the widest MOV is <= 4 bytes.
+    -- Unrolls memset when the widest mkMOV platform is <= 4 bytes.
     go4 :: Reg -> Integer -> InstrBlock
     go4 dst left =
       if left <= 0 then nilOL
@@ -4608,7 +4657,7 @@ genMemSetInlineMaybe align dst c n = do
         dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone (ImmInteger (n - left))
         (curMov, curWidth) = gen4 dst_addr possibleWidth
 
-    -- Unrolls memset when the widest MOV is 8 bytes (thus another Reg
+    -- Unrolls memset when the widest mkMOV platform is 8 bytes (thus another Reg
     -- argument). Falls back to go4 when all 8 byte moves are
     -- exhausted.
     go8 :: Reg -> Reg -> Integer -> InstrBlock
@@ -4675,6 +4724,7 @@ genPrefetchData n src = do
 
 genByteSwap :: Width -> LocalReg -> CmmExpr -> NatM InstrBlock
 genByteSwap width dst src = do
+  platform <- getPlatform
   is32Bit <- is32BitPlatform
   let format = intFormat width
   case width of
@@ -4682,8 +4732,8 @@ genByteSwap width dst src = do
         let Reg64 dst_hi dst_lo = localReg64 dst
         RegCode64 vcode rhi rlo <- iselExpr64 src
         return $ vcode `appOL`
-                 toOL [ MOV II32 (OpReg rlo) (OpReg dst_hi),
-                        MOV II32 (OpReg rhi) (OpReg dst_lo),
+                 toOL [ mkMOV platform II32 (OpReg rlo) (OpReg dst_hi),
+                        mkMOV platform II32 (OpReg rhi) (OpReg dst_lo),
                         BSWAP II32 dst_hi,
                         BSWAP II32 dst_lo ]
       W16 -> do
@@ -4796,6 +4846,7 @@ genPext bid width dst src mask = do
 
 genClz :: BlockId -> Width -> CmmFormal -> CmmActual -> NatM InstrBlock
 genClz bid width dst src = do
+  platform <- getPlatform
   is32Bit <- is32BitPlatform
   config <- getConfig
   if is32Bit && width == W64
@@ -4829,11 +4880,11 @@ genClz bid width dst src = do
           return $ code_src src_r `appOL` toOL
                    ([ MOVZxL  II8    (OpReg src_r) (OpReg src_r) | width == W8 ] ++
                     [ BSR     format (OpReg src_r) tmp_r
-                    , MOV     II32   (OpImm (ImmInt (2*bw-1))) (OpReg dst_r)
+                    , mkMOV platform     II32   (OpImm (ImmInt (2*bw-1))) (OpReg dst_r)
                     , CMOV NE format (OpReg tmp_r) dst_r
                     , XOR     format (OpImm (ImmInt (bw-1))) (OpReg dst_r)
                     ]) -- NB: We don't need to zero-extend the result for the
-                       -- W8/W16 cases because the 'MOV' insn already
+                       -- W8/W16 cases because the 'mkMOV platform' insn already
                        -- took care of implicitly clearing the upper bits
 
 genWordToFloat :: BlockId -> Width -> CmmFormal -> CmmActual -> NatM InstrBlock
@@ -4843,7 +4894,8 @@ genWordToFloat bid width dst src =
 
 genAtomicRead :: Width -> MemoryOrdering -> LocalReg -> CmmExpr -> NatM InstrBlock
 genAtomicRead width _mord dst addr = do
-  load_code <- intLoadCode (MOV (intFormat width)) addr
+  platform <- getPlatform
+  load_code <- intLoadCode (mkMOV platform (intFormat width)) addr
   return (load_code (getLocalRegReg dst))
 
 genAtomicWrite :: Width -> MemoryOrdering -> CmmExpr -> CmmExpr -> NatM InstrBlock
@@ -4880,9 +4932,9 @@ genCmpXchg bid width dst addr old new = do
       platform <- getPlatform
       let dst_r    = getRegisterReg platform  (CmmLocal dst)
           code     = toOL
-                     [ MOV format (OpReg oldval) (OpReg eax)
+                     [ mkMOV platform format (OpReg oldval) (OpReg eax)
                      , LOCK (CMPXCHG format (OpReg newval) (OpAddr amode))
-                     , MOV format (OpReg eax) (OpReg dst_r)
+                     , mkMOV platform format (OpReg eax) (OpReg dst_r)
                      ]
       return $ addr_code `appOL` newval_code newval `appOL` oldval_code oldval
           `appOL` code
@@ -4893,6 +4945,7 @@ genCmpXchg bid width dst addr old new = do
 
 genXchg :: Width -> LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
 genXchg width dst addr value = do
+  platform <- getPlatform
   is32Bit <- is32BitPlatform
 
   when (is32Bit && width == W64) $
@@ -4904,7 +4957,7 @@ genXchg width dst addr value = do
   let dst_r    = getLocalRegReg dst
   -- Copy the value into the target register, perform the exchange.
   let code     = toOL
-                 [ MOV format (OpReg newval) (OpReg dst_r)
+                 [ mkMOV platform format (OpReg newval) (OpReg dst_r)
                   -- On X86 xchg implies a lock prefix if we use a memory argument.
                   -- so this is atomic.
                  , XCHG format (OpAddr amode) dst_r
@@ -4914,6 +4967,7 @@ genXchg width dst addr value = do
 
 genFloatAbs :: Width -> LocalReg -> CmmExpr -> NatM InstrBlock
 genFloatAbs width dst src = do
+  platform <- getPlatform
   let
     format = floatFormat width
     const = case width of
@@ -4925,7 +4979,7 @@ genFloatAbs width dst src = do
   tmp <- getNewRegNat format
   let dst_r = getLocalRegReg dst
   pure $ src_code dst_r `appOL` amode_code `appOL` toOL
-           [ MOV format (OpAddr amode) (OpReg tmp)
+           [ mkMOV platform format (OpAddr amode) (OpReg tmp)
            , AND format (OpReg tmp) (OpReg dst_r)
            ]
 
@@ -4990,6 +5044,7 @@ genSignedLargeMul
   -> CmmExpr
   -> NatM (OrdList Instr)
 genSignedLargeMul width res_c res_h res_l arg_x arg_y = do
+  platform <- getPlatform
   (y_reg, y_code) <- getRegOrMem arg_y
   x_code <- getAnyReg arg_x
   reg_tmp <- getNewRegNat II8
@@ -5000,8 +5055,8 @@ genSignedLargeMul width res_c res_h res_l arg_x arg_y = do
       code = y_code `appOL`
              x_code rax `appOL`
              toOL [ IMUL2 format y_reg
-                  , MOV format (OpReg rdx) (OpReg reg_h)
-                  , MOV format (OpReg rax) (OpReg reg_l)
+                  , mkMOV platform format (OpReg rdx) (OpReg reg_h)
+                  , mkMOV platform format (OpReg rax) (OpReg reg_l)
                   , SETCC CARRY (OpReg reg_tmp)
                   , MOVZxL II8 (OpReg reg_tmp) (OpReg reg_c)
                   ]
@@ -5015,6 +5070,7 @@ genUnsignedLargeMul
   -> CmmExpr
   -> NatM (OrdList Instr)
 genUnsignedLargeMul width res_h res_l arg_x arg_y = do
+  platform <- getPlatform
   (y_reg, y_code) <- getRegOrMem arg_y
   x_code <- getAnyReg arg_x
   let format = intFormat width
@@ -5023,8 +5079,8 @@ genUnsignedLargeMul width res_h res_l arg_x arg_y = do
       code = y_code `appOL`
              x_code rax `appOL`
              toOL [MUL2 format y_reg,
-                   MOV format (OpReg rdx) (OpReg reg_h),
-                   MOV format (OpReg rax) (OpReg reg_l)]
+                   mkMOV platform format (OpReg rdx) (OpReg reg_h),
+                   mkMOV platform format (OpReg rax) (OpReg reg_l)]
   return code
 
 
@@ -5038,6 +5094,7 @@ genQuotRem
   -> CmmExpr
   -> NatM InstrBlock
 genQuotRem width signed res_q res_r m_arg_x_high arg_x_low arg_y = do
+  platform <- getPlatform
   case width of
     W8 -> do
       -- See Note [DIV/IDIV for bytes]
@@ -5067,5 +5124,5 @@ genQuotRem width signed res_q res_r m_arg_x_high arg_x_low arg_y = do
                x_low_code rax `appOL`
                x_high_code rdx `appOL`
                toOL [instr format y_reg,
-                     MOV format (OpReg rax) (OpReg reg_q),
-                     MOV format (OpReg rdx) (OpReg reg_r)]
+                     mkMOV platform format (OpReg rax) (OpReg reg_q),
+                     mkMOV platform format (OpReg rdx) (OpReg reg_r)]


=====================================
compiler/GHC/CmmToAsm/X86/Instr.hs
=====================================
@@ -46,7 +46,7 @@ import GHC.Data.FastString
 import GHC.CmmToAsm.X86.Cond
 import GHC.CmmToAsm.X86.Regs
 import GHC.CmmToAsm.Format
-import GHC.CmmToAsm.Reg.Target (targetClassOfReg)
+import GHC.CmmToAsm.Reg.Target (targetClassOfReg, mkRegFormat)
 import GHC.CmmToAsm.Types
 import GHC.CmmToAsm.Utils
 import GHC.CmmToAsm.Instr (RegUsage(..), noUsage)
@@ -463,24 +463,24 @@ regUsageOfInstr platform instr
     POP    fmt op         -> mkRU fmt [] (def_W op)
     TEST   fmt src dst    -> mkRUR fmt (use_R src $! use_R dst [])
     CMP    fmt src dst    -> mkRUR fmt (use_R src $! use_R dst [])
-    SETCC  _ op         -> mkRU II64 [] (def_W op)
-    JXX    _ _          -> mkRU II64 [] []
-    JXX_GBL _ _         -> mkRU II64 [] []
-    JMP     op regs     -> mkRUR II64 (use_R op regs)
-    JMP_TBL op _ _ _    -> mkRUR II64 (use_R op [])
-    CALL (Left _)  params   -> mkRU II64 params (callClobberedRegs platform)
-    CALL (Right reg) params -> mkRU II64 (reg:params) (callClobberedRegs platform)
-    CLTD   _            -> mkRU II64 [eax] [edx]
-    NOP                 -> mkRU II64 [] []
+    SETCC  _ op         -> mkRUFormat [] (def_W op)
+    JXX    _ _          -> mkRUFormat [] []
+    JXX_GBL _ _         -> mkRUFormat [] []
+    JMP     op regs     -> mkRUFormat (use_R op regs) []
+    JMP_TBL op _ _ _    -> mkRUFormat (use_R op []) []
+    CALL (Left _)  params   -> mkRUFormat params (callClobberedRegs platform)
+    CALL (Right reg) params -> mkRUFormat (reg:params) (callClobberedRegs platform)
+    CLTD   fmt          -> mkRU fmt [eax] [edx]
+    NOP                 -> mkRUFormat [] []
 
     X87Store    fmt  dst    -> mkRUR fmt ( use_EA dst [])
 
-    CVTSS2SD   src dst  -> mkRU FF64 [src] [dst]
-    CVTSD2SS   src dst  -> mkRU FF32 [src] [dst]
-    CVTTSS2SIQ _ src dst -> mkRU FF32 (use_R src []) [dst]
-    CVTTSD2SIQ _ src dst -> mkRU FF64 (use_R src []) [dst]
-    CVTSI2SS   _ src dst -> mkRU FF32 (use_R src []) [dst]
-    CVTSI2SD   _ src dst -> mkRU FF64 (use_R src []) [dst]
+    CVTSS2SD   src dst  -> mkRUFormat [src] [dst]
+    CVTSD2SS   src dst  -> mkRUFormat [src] [dst]
+    CVTTSS2SIQ _ src dst -> mkRUFormat (use_R src []) [dst]
+    CVTTSD2SIQ _ src dst -> mkRUFormat (use_R src []) [dst]
+    CVTSI2SS   _ src dst -> mkRUFormat (use_R src []) [dst]
+    CVTSI2SD   _ src dst -> mkRUFormat (use_R src []) [dst]
     FDIV fmt     src dst  -> usageRM fmt src dst
     SQRT fmt src dst      -> mkRU fmt (use_R src []) [dst]
 
@@ -573,32 +573,32 @@ regUsageOfInstr platform instr
     -- are read.
 
     -- 2 operand form; first operand Read; second Written
-    usageRW :: Format -> Operand -> Operand -> RegUsage
+    usageRW :: HasDebugCallStack => Format -> Operand -> Operand -> RegUsage
     usageRW fmt op (OpReg reg)      = mkRU fmt (use_R op []) [reg]
     usageRW fmt op (OpAddr ea)      = mkRUR fmt (use_R op $! use_EA ea [])
     usageRW _ _ _                   = panic "X86.RegInfo.usageRW: no match"
 
     -- 2 operand form; first operand Read; second Modified
-    usageRM :: Format -> Operand -> Operand -> RegUsage
+    usageRM :: HasDebugCallStack => Format -> Operand -> Operand -> RegUsage
     usageRM fmt op (OpReg reg)      = mkRU fmt (use_R op [reg]) [reg]
     usageRM fmt op (OpAddr ea)      = mkRUR fmt (use_R op $! use_EA ea [])
     usageRM _ _ _                   = panic "X86.RegInfo.usageRM: no match"
 
     -- 2 operand form; first operand Modified; second Modified
-    usageMM :: Format -> Operand -> Operand -> RegUsage
+    usageMM :: HasDebugCallStack => Format -> Operand -> Operand -> RegUsage
     usageMM fmt (OpReg src) (OpReg dst) = mkRU fmt [src, dst] [src, dst]
     usageMM fmt (OpReg src) (OpAddr ea) = mkRU fmt (use_EA ea [src]) [src]
     usageMM fmt (OpAddr ea) (OpReg dst) = mkRU fmt (use_EA ea [dst]) [dst]
     usageMM _ _ _                       = panic "X86.RegInfo.usageMM: no match"
 
     -- 3 operand form; first operand Read; second Modified; third Modified
-    usageRMM :: Format -> Operand -> Operand -> Operand -> RegUsage
+    usageRMM :: HasDebugCallStack => Format -> Operand -> Operand -> Operand -> RegUsage
     usageRMM fmt (OpReg src) (OpReg dst) (OpReg reg) = mkRU fmt [src, dst, reg] [dst, reg]
     usageRMM fmt (OpReg src) (OpAddr ea) (OpReg reg) = mkRU fmt (use_EA ea [src, reg]) [reg]
     usageRMM _ _ _ _                                 = panic "X86.RegInfo.usageRMM: no match"
 
     -- 3 operand form of FMA instructions.
-    usageFMA :: Format -> Operand -> Reg -> Reg -> RegUsage
+    usageFMA :: HasDebugCallStack => Format -> Operand -> Reg -> Reg -> RegUsage
     usageFMA fmt (OpReg src1) src2 dst
       = mkRU fmt [src1, src2, dst] [dst]
     usageFMA fmt (OpAddr ea1) src2 dst
@@ -607,7 +607,7 @@ regUsageOfInstr platform instr
       = panic "X86.RegInfo.usageFMA: no match"
 
     -- 1 operand form; operand Modified
-    usageM :: Format -> Operand -> RegUsage
+    usageM :: HasDebugCallStack => Format -> Operand -> RegUsage
     usageM fmt (OpReg reg)        = mkRU fmt [reg] [reg]
     usageM fmt (OpAddr ea)        = mkRUR fmt (use_EA ea [])
     usageM _ _                    = panic "X86.RegInfo.usageM: no match"
@@ -631,13 +631,25 @@ regUsageOfInstr platform instr
               use_index EAIndexNone   tl = tl
               use_index (EAIndex i _) tl = i : tl
 
-    mkRUR fmt src = src' `seq` RU (map (\ r -> RegFormat r fmt) src') []
+    mkRUR :: HasDebugCallStack => Format -> [Reg] -> RegUsage
+    mkRUR fmt src = src' `seq` RU (map (\ r -> mkRegFormat platform r fmt) src') []
         where src' = filter (interesting platform) src
 
-    mkRU fmt src dst = src' `seq` dst' `seq` RU (map (\ r -> RegFormat r fmt) src') (map (\ r -> RegFormat r fmt) dst')
+    mkRU :: HasDebugCallStack => Format -> [Reg] -> [Reg] -> RegUsage
+    mkRU fmt src dst = src' `seq` dst' `seq` RU (map (\ r -> mkRegFormat platform r fmt) src') (map (\ r -> mkRegFormat platform r fmt) dst')
         where src' = filter (interesting platform) src
               dst' = filter (interesting platform) dst
 
+    mkRUFormat :: HasDebugCallStack => [Reg] -> [Reg] -> RegUsage
+    mkRUFormat src dst = src' `seq` dst' `seq` RU (map mkFormat src') (map mkFormat dst')
+        where src' = filter (interesting platform) src
+              dst' = filter (interesting platform) dst
+              mkFormat reg =
+                mkRegFormat platform reg $
+                  case targetClassOfReg platform reg of
+                    RcInteger -> archWordFormat (target32Bit platform)
+                    RcFloatOrVector -> FF64
+
 -- | Is this register interesting for the register allocator?
 interesting :: Platform -> Reg -> Bool
 interesting _        (RegVirtual _)              = True
@@ -861,7 +873,8 @@ patchJumpInstr insn patchF
 -- -----------------------------------------------------------------------------
 -- | Make a spill instruction.
 mkSpillInstr
-    :: NCGConfig
+    :: HasDebugCallStack
+    => NCGConfig
     -> RegFormat -- register to spill
     -> Int       -- current stack delta
     -> Int       -- spill slot to use
@@ -877,12 +890,23 @@ mkSpillInstr config (RegFormat reg fmt) delta slot
           -> [MOVU fmt (OpReg reg) (OpAddr (spRel platform off))]
             -- NB: not using MOVA, because we have no guarantees about the stack
             -- being sufficiently aligned, including even numbered stack slots.
-        _ -> [MOV (scalarMoveFormat platform fmt) (OpReg reg) (OpAddr (spRel platform off))]
+        _ ->
+          let fmt' = scalarMoveFormat platform fmt
+              cls_f = if isIntFormat fmt' then RcInteger else RcFloatOrVector
+              cls1 = targetClassOfReg platform reg
+            in
+              assertPpr (all (== cls_f) [cls1])
+                  (vcat [ text "mkSpillInstr: incompatible formats"
+                        , text "format:" <+> ppr fmt <+> parens (ppr cls_f)
+                        , text "src:" <+> ppr reg <+> parens (ppr cls1)
+                        , callStackDoc ])
+                $ [MOV fmt' (OpReg reg) (OpAddr (spRel platform off))]
     where platform = ncgPlatform config
 
 -- | Make a spill reload instruction.
 mkLoadInstr
-    :: NCGConfig
+    :: HasDebugCallStack
+    => NCGConfig
     -> RegFormat      -- register to load
     -> Int      -- current stack delta
     -> Int      -- spill slot to use
@@ -898,7 +922,17 @@ mkLoadInstr config (RegFormat reg fmt) delta slot
             -> [MOVU fmt (OpAddr (spRel platform off)) (OpReg reg)]
             -- NB: not using MOVA, because we have no guarantees about the stack
             -- being sufficiently aligned, including even numbered stack slots.
-          _ -> [MOV (scalarMoveFormat platform fmt) (OpAddr (spRel platform off)) (OpReg reg)]
+          _ ->
+            let fmt' = scalarMoveFormat platform fmt
+                cls_f = if isIntFormat fmt' then RcInteger else RcFloatOrVector
+                cls2 = targetClassOfReg platform reg
+            in
+              assertPpr (all (== cls_f) [cls2])
+                  (vcat [ text "mkLoadInstr: incompatible formats"
+                        , text "format:" <+> ppr fmt <+> parens (ppr cls_f)
+                        , text "dst:" <+> ppr reg <+> parens (ppr cls2)
+                        , callStackDoc ])
+                $ [MOV fmt' (OpAddr (spRel platform off)) (OpReg reg)]
     where platform = ncgPlatform config
 
 spillSlotSize :: Platform -> Int
@@ -980,7 +1014,7 @@ mkRegRegMoveInstr platform fmt src dst =
                 , text "src:" <+> ppr src <+> parens (ppr cls1)
                 , text "dst:" <+> ppr dst <+> parens (ppr cls2)
                 , callStackDoc ])
-        $ MOV (scalarMoveFormat platform fmt) (OpReg src) (OpReg dst)
+        $ MOV fmt' (OpReg src) (OpReg dst)
 
 scalarMoveFormat :: Platform -> Format -> Format
 scalarMoveFormat platform fmt


=====================================
compiler/GHC/CmmToAsm/X86/Regs.hs
=====================================
@@ -378,7 +378,7 @@ callClobberedRegs platform
    -- Only xmm0-5 are caller-saves registers on 64-bit windows.
    -- For details check the Win64 ABI:
    -- https://docs.microsoft.com/en-us/cpp/build/x64-software-conventions
-   ++ map xmm [0  .. 5]
+   ++ map xmm [0 .. 5]
  | otherwise
     -- all xmm regs are caller-saves
     -- caller-saves registers



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/85a0b5b00d73758142c936a0d4584f02b409f62e

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/85a0b5b00d73758142c936a0d4584f02b409f62e
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240620/256ea729/attachment-0001.html>