[Git][ghc/ghc][wip/ncg-simd] SIMD NCG WIP: fix stack spilling

sheaf (@sheaf) gitlab at gitlab.haskell.org
Thu Jun 6 16:52:44 UTC 2024



sheaf pushed to branch wip/ncg-simd at Glasgow Haskell Compiler / GHC


Commits:
b2ca03a3 by sheaf at 2024-06-06T18:51:57+02:00
SIMD NCG WIP: fix stack spilling

- - - - -


22 changed files:

- compiler/GHC/CmmToAsm/AArch64.hs
- compiler/GHC/CmmToAsm/AArch64/Instr.hs
- compiler/GHC/CmmToAsm/Format.hs
- compiler/GHC/CmmToAsm/Instr.hs
- compiler/GHC/CmmToAsm/PPC.hs
- compiler/GHC/CmmToAsm/PPC/Instr.hs
- compiler/GHC/CmmToAsm/Reg/Graph.hs
- compiler/GHC/CmmToAsm/Reg/Graph/Coalesce.hs
- compiler/GHC/CmmToAsm/Reg/Graph/Spill.hs
- compiler/GHC/CmmToAsm/Reg/Graph/SpillClean.hs
- compiler/GHC/CmmToAsm/Reg/Graph/SpillCost.hs
- compiler/GHC/CmmToAsm/Reg/Linear.hs
- compiler/GHC/CmmToAsm/Reg/Linear/Base.hs
- compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs
- compiler/GHC/CmmToAsm/Reg/Linear/StackMap.hs
- compiler/GHC/CmmToAsm/Reg/Linear/State.hs
- compiler/GHC/CmmToAsm/Reg/Linear/X86.hs
- compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs
- compiler/GHC/CmmToAsm/Reg/Liveness.hs
- compiler/GHC/CmmToAsm/X86/Instr.hs
- compiler/GHC/Platform/Reg.hs
- compiler/GHC/Platform/Reg/Class.hs


Changes:

=====================================
compiler/GHC/CmmToAsm/AArch64.hs
=====================================
@@ -49,8 +49,8 @@ instance Instruction AArch64.Instr where
         jumpDestsOfInstr        = AArch64.jumpDestsOfInstr
         canFallthroughTo        = AArch64.canFallthroughTo
         patchJumpInstr          = AArch64.patchJumpInstr
-        mkSpillInstr            = AArch64.mkSpillInstr
-        mkLoadInstr             = AArch64.mkLoadInstr
+        mkSpillInstr cfg reg _ i j = AArch64.mkSpillInstr cfg reg i j
+        mkLoadInstr cfg reg _ i j = AArch64.mkLoadInstr cfg reg i j
         takeDeltaInstr          = AArch64.takeDeltaInstr
         isMetaInstr             = AArch64.isMetaInstr
         mkRegRegMoveInstr _     = AArch64.mkRegRegMoveInstr


=====================================
compiler/GHC/CmmToAsm/AArch64/Instr.hs
=====================================
@@ -145,8 +145,9 @@ regUsageOfInstr platform instr = case instr of
         -- filtering the usage is necessary, otherwise the register
         -- allocator will try to allocate pre-defined fixed stg
         -- registers as well, as they show up.
-        usage (src, dst) = RU (filter (interesting platform) src)
-                              (filter (interesting platform) dst)
+        usage (src, dst) = RU (map (,II64) $ filter (interesting platform) src)
+                              (map (,II64) $ filter (interesting platform) dst)
+                       -- SIMD NCG TODO: remove this hack
 
         regAddr :: AddrMode -> [Reg]
         regAddr (AddrRegReg r1 r2) = [r1, r2]


=====================================
compiler/GHC/CmmToAsm/Format.hs
=====================================
@@ -1,3 +1,7 @@
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE PatternSynonyms #-}
+{-# LANGUAGE ViewPatterns #-}
+
 -- | Formats on this architecture
 --      A Format is a combination of width and class
 --
@@ -9,7 +13,7 @@
 --              properly. eg SPARC doesn't care about FF80.
 --
 module GHC.CmmToAsm.Format (
-    Format(..),
+    Format(.., IntegerFormat),
     ScalarFormat(..),
     intFormat,
     floatFormat,
@@ -18,7 +22,7 @@ module GHC.CmmToAsm.Format (
     isVecFormat,
     cmmTypeFormat,
     formatToWidth,
-    formatInBytes
+    formatInBytes,
 )
 
 where
@@ -73,7 +77,23 @@ data Format
         | FF32
         | FF64
         | VecFormat !Length !ScalarFormat !Width
-        deriving (Show, Eq)
+        deriving (Show, Eq, Ord)
+
+pattern IntegerFormat :: Format
+pattern IntegerFormat <- ( isIntegerFormat -> True )
+{-# COMPLETE IntegerFormat, FF32, FF64, VecFormat #-}
+
+isIntegerFormat :: Format -> Bool
+isIntegerFormat = \case
+  II8  -> True
+  II16 -> True
+  II32 -> True
+  II64 -> True
+  _    -> False
+
+
+instance Outputable Format where
+  ppr fmt = text (show fmt)
 
 data ScalarFormat = FmtInt8
                   | FmtInt16
@@ -81,7 +101,7 @@ data ScalarFormat = FmtInt8
                   | FmtInt64
                   | FmtFloat
                   | FmtDouble
-                  deriving (Show, Eq)
+                  deriving (Show, Eq, Ord)
 
 -- | Get the integer format of this width.
 intFormat :: Width -> Format


=====================================
compiler/GHC/CmmToAsm/Instr.hs
=====================================
@@ -16,6 +16,7 @@ import GHC.Cmm.BlockId
 
 import GHC.CmmToAsm.Config
 import GHC.Data.FastString
+import GHC.CmmToAsm.Format
 
 -- | Holds a list of source and destination registers used by a
 --      particular instruction.
@@ -29,8 +30,8 @@ import GHC.Data.FastString
 --
 data RegUsage
         = RU    {
-                reads :: [Reg],
-                writes :: [Reg]
+                reads :: [(Reg, Format)],
+                writes :: [(Reg, Format)]
                 }
         deriving Show
 
@@ -96,15 +97,17 @@ class Instruction instr where
         mkSpillInstr
                 :: NCGConfig
                 -> Reg          -- ^ the reg to spill
+                -> Format
                 -> Int          -- ^ the current stack delta
-                -> Int          -- ^ spill slot to use
-                -> [instr]        -- ^ instructions
+                -> Int          -- ^ spill slots to use
+                -> [instr]      -- ^ instructions
 
 
         -- | An instruction to reload a register from a spill slot.
         mkLoadInstr
                 :: NCGConfig
                 -> Reg          -- ^ the reg to reload.
+                -> Format
                 -> Int          -- ^ the current stack delta
                 -> Int          -- ^ the spill slot to use
                 -> [instr]        -- ^ instructions


=====================================
compiler/GHC/CmmToAsm/PPC.hs
=====================================
@@ -48,8 +48,8 @@ instance Instruction PPC.Instr where
    jumpDestsOfInstr    = PPC.jumpDestsOfInstr
    canFallthroughTo    = PPC.canFallthroughTo
    patchJumpInstr      = PPC.patchJumpInstr
-   mkSpillInstr        = PPC.mkSpillInstr
-   mkLoadInstr         = PPC.mkLoadInstr
+   mkSpillInstr cfg reg _ i j       = PPC.mkSpillInstr cfg reg i j
+   mkLoadInstr cfg reg _ i j = PPC.mkLoadInstr cfg reg i j
    takeDeltaInstr      = PPC.takeDeltaInstr
    isMetaInstr         = PPC.isMetaInstr
    mkRegRegMoveInstr _ = PPC.mkRegRegMoveInstr


=====================================
compiler/GHC/CmmToAsm/PPC/Instr.hs
=====================================
@@ -391,8 +391,9 @@ regUsageOfInstr platform instr
     FMADD _ _ rt ra rc rb   -> usage ([ra, rc, rb], [rt])
     _                       -> noUsage
   where
-    usage (src, dst) = RU (filter (interesting platform) src)
-                          (filter (interesting platform) dst)
+    usage (src, dst) = RU (map (,II64) $ filter (interesting platform) src)
+                          (map (,II64) $ filter (interesting platform) dst)
+                       -- SIMD NCG TODO: remove this hack
     regAddr (AddrRegReg r1 r2) = [r1, r2]
     regAddr (AddrRegImm r1 _)  = [r1]
 


=====================================
compiler/GHC/CmmToAsm/Reg/Graph.hs
=====================================
@@ -335,21 +335,21 @@ buildGraph code
 -- | Add some conflict edges to the graph.
 --   Conflicts between virtual and real regs are recorded as exclusions.
 graphAddConflictSet
-        :: UniqSet Reg
+        :: RegMap (Reg, fmt)
         -> Color.Graph VirtualReg RegClass RealReg
         -> Color.Graph VirtualReg RegClass RealReg
 
-graphAddConflictSet set graph
+graphAddConflictSet regs graph
  = let  virtuals        = mkUniqSet
-                        [ vr | RegVirtual vr <- nonDetEltsUniqSet set ]
+                        [ vr | (RegVirtual vr, _) <- nonDetEltsUFM regs ]
 
         graph1  = Color.addConflicts virtuals classOfVirtualReg graph
 
         graph2  = foldr (\(r1, r2) -> Color.addExclusion r1 classOfVirtualReg r2)
                         graph1
                         [ (vr, rr)
-                                | RegVirtual vr <- nonDetEltsUniqSet set
-                                , RegReal    rr <- nonDetEltsUniqSet set]
+                                | (RegVirtual vr, _) <- nonDetEltsUFM regs
+                                , (RegReal    rr, _) <- nonDetEltsUFM regs]
                           -- See Note [Unique Determinism and code generation]
 
    in   graph2


=====================================
compiler/GHC/CmmToAsm/Reg/Graph/Coalesce.hs
=====================================
@@ -13,7 +13,6 @@ import GHC.Cmm
 import GHC.Data.Bag
 import GHC.Data.Graph.Directed
 import GHC.Types.Unique.FM
-import GHC.Types.Unique.Set
 import GHC.Types.Unique.Supply
 
 
@@ -85,8 +84,8 @@ slurpJoinMovs live
         slurpLI    rs (LiveInstr _      Nothing)    = rs
         slurpLI    rs (LiveInstr instr (Just live))
                 | Just (r1, r2) <- takeRegRegMoveInstr instr
-                , elementOfUniqSet r1 $ liveDieRead live
-                , elementOfUniqSet r2 $ liveBorn live
+                , elemUFM r1 $ liveDieRead live
+                , elemUFM r2 $ liveBorn live
 
                 -- only coalesce movs between two virtuals for now,
                 -- else we end up with allocatable regs in the live


=====================================
compiler/GHC/CmmToAsm/Reg/Graph/Spill.hs
=====================================
@@ -31,6 +31,7 @@ import Data.List (nub, (\\), intersect)
 import Data.Maybe
 import Data.IntSet              (IntSet)
 import qualified Data.IntSet    as IntSet
+import GHC.CmmToAsm.Format
 
 
 -- | Spill all these virtual regs to stack slots.
@@ -138,7 +139,7 @@ regSpill_top platform regSlotMap cmm
         -- then record the fact that these slots are now live in those blocks
         -- in the given slotmap.
         patchLiveSlot
-                :: BlockMap IntSet -> BlockId -> RegSet -> BlockMap IntSet
+                :: BlockMap IntSet -> BlockId -> RegMap (Reg, Format) -> BlockMap IntSet
 
         patchLiveSlot slotMap blockId regsLive
          = let
@@ -148,7 +149,8 @@ regSpill_top platform regSlotMap cmm
 
                 moreSlotsLive   = IntSet.fromList
                                 $ mapMaybe (lookupUFM regSlotMap)
-                                $ nonDetEltsUniqSet regsLive
+                                $ map fst
+                                $ nonDetEltsUFM regsLive
                     -- See Note [Unique Determinism and code generation]
 
                 slotMap'
@@ -197,9 +199,9 @@ regSpill_instr platform regSlotMap (LiveInstr instr (Just _)) = do
   let rsModify            = intersect rsRead_ rsWritten_
 
   -- work out if any of the regs being used are currently being spilled.
-  let rsSpillRead         = filter (\r -> elemUFM r regSlotMap) rsRead
-  let rsSpillWritten      = filter (\r -> elemUFM r regSlotMap) rsWritten
-  let rsSpillModify       = filter (\r -> elemUFM r regSlotMap) rsModify
+  let rsSpillRead         = filter (\(r,_) -> elemUFM r regSlotMap) rsRead
+  let rsSpillWritten      = filter (\(r,_) -> elemUFM r regSlotMap) rsWritten
+  let rsSpillModify       = filter (\(r,_) -> elemUFM r regSlotMap) rsModify
 
   -- rewrite the instr and work out spill code.
   (instr1, prepost1)      <- mapAccumLM (spillRead   regSlotMap) instr  rsSpillRead
@@ -224,10 +226,10 @@ spillRead
         :: Instruction instr
         => UniqFM Reg Int
         -> instr
-        -> Reg
+        -> (Reg, Format)
         -> SpillM (instr, ([LiveInstr instr'], [LiveInstr instr']))
 
-spillRead regSlotMap instr reg
+spillRead regSlotMap instr (reg, fmt)
  | Just slot     <- lookupUFM regSlotMap reg
  = do    (instr', nReg)  <- patchInstr reg instr
 
@@ -235,7 +237,7 @@ spillRead regSlotMap instr reg
                 { stateSpillSL  = addToUFM_C accSpillSL (stateSpillSL s) reg (reg, 0, 1) }
 
          return  ( instr'
-                 , ( [LiveInstr (RELOAD slot nReg) Nothing]
+                 , ( [LiveInstr (RELOAD slot nReg fmt) Nothing]
                  , []) )
 
  | otherwise     = panic "RegSpill.spillRead: no slot defined for spilled reg"
@@ -247,10 +249,10 @@ spillWrite
         :: Instruction instr
         => UniqFM Reg Int
         -> instr
-        -> Reg
+        -> (Reg, Format)
         -> SpillM (instr, ([LiveInstr instr'], [LiveInstr instr']))
 
-spillWrite regSlotMap instr reg
+spillWrite regSlotMap instr (reg, fmt)
  | Just slot     <- lookupUFM regSlotMap reg
  = do    (instr', nReg)  <- patchInstr reg instr
 
@@ -259,7 +261,7 @@ spillWrite regSlotMap instr reg
 
          return  ( instr'
                  , ( []
-                   , [LiveInstr (SPILL nReg slot) Nothing]))
+                   , [LiveInstr (SPILL nReg fmt slot) Nothing]))
 
  | otherwise     = panic "RegSpill.spillWrite: no slot defined for spilled reg"
 
@@ -270,10 +272,10 @@ spillModify
         :: Instruction instr
         => UniqFM Reg Int
         -> instr
-        -> Reg
+        -> (Reg, Format)
         -> SpillM (instr, ([LiveInstr instr'], [LiveInstr instr']))
 
-spillModify regSlotMap instr reg
+spillModify regSlotMap instr (reg, fmt)
  | Just slot     <- lookupUFM regSlotMap reg
  = do    (instr', nReg)  <- patchInstr reg instr
 
@@ -281,8 +283,8 @@ spillModify regSlotMap instr reg
                 { stateSpillSL  = addToUFM_C accSpillSL (stateSpillSL s) reg (reg, 1, 1) }
 
          return  ( instr'
-                 , ( [LiveInstr (RELOAD slot nReg) Nothing]
-                   , [LiveInstr (SPILL nReg slot) Nothing]))
+                 , ( [LiveInstr (RELOAD slot nReg fmt) Nothing]
+                   , [LiveInstr (SPILL nReg fmt slot) Nothing]))
 
  | otherwise     = panic "RegSpill.spillModify: no slot defined for spilled reg"
 


=====================================
compiler/GHC/CmmToAsm/Reg/Graph/SpillClean.hs
=====================================
@@ -160,12 +160,13 @@ cleanForward _ _ _ acc []
 -- hopefully the spill will be also be cleaned in the next pass
 cleanForward platform blockId assoc acc (li1 : li2 : instrs)
 
-        | LiveInstr (SPILL  reg1  slot1) _      <- li1
-        , LiveInstr (RELOAD slot2 reg2)  _      <- li2
+        | LiveInstr (SPILL  reg1  _ slot1) _      <- li1
+        , LiveInstr (RELOAD slot2 reg2 _)  _      <- li2
         , slot1 == slot2
         = do
                 modify $ \s -> s { sCleanedReloadsAcc = sCleanedReloadsAcc s + 1 }
                 cleanForward platform blockId assoc acc
+                -- SIMD NCG TODO: is mkRegRegMoveInstr here OK for vectors?
                  $ li1 : LiveInstr (mkRegRegMoveInstr platform reg1 reg2) Nothing
                        : instrs
 
@@ -189,7 +190,7 @@ cleanForward platform blockId assoc acc (li@(LiveInstr i1 _) : instrs)
 cleanForward platform blockId assoc acc (li : instrs)
 
         -- Update association due to the spill.
-        | LiveInstr (SPILL reg slot) _  <- li
+        | LiveInstr (SPILL reg _ slot) _  <- li
         = let   assoc'  = addAssoc (SReg reg)  (SSlot slot)
                         $ delAssoc (SSlot slot)
                         $ assoc
@@ -215,7 +216,7 @@ cleanForward platform blockId assoc acc (li : instrs)
         -- Writing to a reg changes its value.
         | LiveInstr instr _     <- li
         , RU _ written          <- regUsageOfInstr platform instr
-        = let assoc'    = foldr delAssoc assoc (map SReg $ nub written)
+        = let assoc'    = foldr delAssoc assoc (map SReg $ nub $ map fst written)
           in  cleanForward platform blockId assoc' (li : acc) instrs
 
 
@@ -229,7 +230,7 @@ cleanReload
         -> LiveInstr instr
         -> CleanM (Assoc Store, Maybe (LiveInstr instr))
 
-cleanReload platform blockId assoc li@(LiveInstr (RELOAD slot reg) _)
+cleanReload platform blockId assoc li@(LiveInstr (RELOAD slot reg _) _)
 
         -- If the reg we're reloading already has the same value as the slot
         --      then we can erase the instruction outright.
@@ -355,12 +356,12 @@ cleanBackward' _ _ _      acc []
 cleanBackward' liveSlotsOnEntry reloadedBy noReloads acc (li : instrs)
 
         -- If nothing ever reloads from this slot then we don't need the spill.
-        | LiveInstr (SPILL _ slot) _    <- li
+        | LiveInstr (SPILL _ _ slot) _    <- li
         , Nothing       <- lookupUFM reloadedBy (SSlot slot)
         = do    modify $ \s -> s { sCleanedSpillsAcc = sCleanedSpillsAcc s + 1 }
                 cleanBackward liveSlotsOnEntry noReloads acc instrs
 
-        | LiveInstr (SPILL _ slot) _    <- li
+        | LiveInstr (SPILL _ _ slot) _    <- li
         = if elementOfUniqSet slot noReloads
 
            -- We can erase this spill because the slot won't be read until
@@ -375,7 +376,7 @@ cleanBackward' liveSlotsOnEntry reloadedBy noReloads acc (li : instrs)
                 cleanBackward liveSlotsOnEntry noReloads' (li : acc) instrs
 
         -- if we reload from a slot then it's no longer unused
-        | LiveInstr (RELOAD slot _) _   <- li
+        | LiveInstr (RELOAD slot _ _) _   <- li
         , noReloads'            <- delOneFromUniqSet noReloads slot
         = cleanBackward liveSlotsOnEntry noReloads' (li : acc) instrs
 


=====================================
compiler/GHC/CmmToAsm/Reg/Graph/SpillCost.hs
=====================================
@@ -129,8 +129,8 @@ slurpSpillCostInfo platform cfg cmm
 
                 -- Increment counts for what regs were read/written from.
                 let (RU read written)   = regUsageOfInstr platform instr
-                mapM_ (incUses scale) $ mapMaybe takeVirtualReg $ nub read
-                mapM_ (incDefs scale) $ mapMaybe takeVirtualReg $ nub written
+                mapM_ (incUses scale) $ mapMaybe takeVirtualReg $ nub $ map fst read
+                mapM_ (incDefs scale) $ mapMaybe takeVirtualReg $ nub $ map fst written
 
                 -- Compute liveness for entry to next instruction.
                 let liveDieRead_virt    = takeVirtuals (liveDieRead  live)
@@ -158,9 +158,9 @@ slurpSpillCostInfo platform cfg cmm
           = 1.0 -- Only if no cfg given
 
 -- | Take all the virtual registers from this set.
-takeVirtuals :: UniqSet Reg -> UniqSet VirtualReg
-takeVirtuals set = mkUniqSet
-  [ vr | RegVirtual vr <- nonDetEltsUniqSet set ]
+takeVirtuals :: RegMap (Reg, fmt) -> UniqSet VirtualReg
+takeVirtuals m = mkUniqSet
+  [ vr | (RegVirtual vr, _) <- nonDetEltsUFM m ]
   -- See Note [Unique Determinism and code generation]
 
 


=====================================
compiler/GHC/CmmToAsm/Reg/Linear.hs
=====================================
@@ -127,7 +127,6 @@ import GHC.Cmm hiding (RegSet)
 
 import GHC.Data.Graph.Directed
 import GHC.Types.Unique
-import GHC.Types.Unique.Set
 import GHC.Types.Unique.FM
 import GHC.Types.Unique.Supply
 import GHC.Utils.Outputable
@@ -137,6 +136,7 @@ import GHC.Platform
 import Data.Maybe
 import Data.List (partition, nub)
 import Control.Monad
+import GHC.CmmToAsm.Format
 
 -- -----------------------------------------------------------------------------
 -- Top level of the register allocator
@@ -203,7 +203,7 @@ linearRegAlloc
         :: forall instr. (Instruction instr)
         => NCGConfig
         -> [BlockId] -- ^ entry points
-        -> BlockMap RegSet
+        -> BlockMap (UniqFM Reg (Reg, Format))
               -- ^ live regs on entry to each basic block
         -> [SCC (LiveBasicBlock instr)]
               -- ^ instructions annotated with "deaths"
@@ -242,7 +242,7 @@ linearRegAlloc'
         => NCGConfig
         -> freeRegs
         -> [BlockId]                    -- ^ entry points
-        -> BlockMap RegSet              -- ^ live regs on entry to each basic block
+        -> BlockMap (UniqFM Reg (Reg, Format))              -- ^ live regs on entry to each basic block
         -> [SCC (LiveBasicBlock instr)] -- ^ instructions annotated with "deaths"
         -> UniqSM ([NatBasicBlock instr], RegAllocStats, Int)
 
@@ -256,7 +256,7 @@ linearRegAlloc' config initFreeRegs entry_ids block_live sccs
 
 linearRA_SCCs :: OutputableRegConstraint freeRegs instr
               => [BlockId]
-              -> BlockMap RegSet
+              -> BlockMap (UniqFM Reg (Reg, Format))
               -> [NatBasicBlock instr]
               -> [SCC (LiveBasicBlock instr)]
               -> RegM freeRegs [NatBasicBlock instr]
@@ -291,7 +291,7 @@ linearRA_SCCs entry_ids block_live blocksAcc (CyclicSCC blocks : sccs)
 
 process :: forall freeRegs instr. (OutputableRegConstraint freeRegs instr)
         => [BlockId]
-        -> BlockMap RegSet
+        -> BlockMap (UniqFM Reg (Reg, Format))
         -> [GenBasicBlock (LiveInstr instr)]
         -> RegM freeRegs [[NatBasicBlock instr]]
 process entry_ids block_live =
@@ -330,7 +330,7 @@ process entry_ids block_live =
 --
 processBlock
         :: OutputableRegConstraint freeRegs instr
-        => BlockMap RegSet              -- ^ live regs on entry to each basic block
+        => BlockMap (UniqFM Reg (Reg, Format))              -- ^ live regs on entry to each basic block
         -> LiveBasicBlock instr         -- ^ block to do register allocation on
         -> RegM freeRegs [NatBasicBlock instr]   -- ^ block with registers allocated
 
@@ -347,7 +347,7 @@ processBlock block_live (BasicBlock id instrs)
 -- | Load the freeregs and current reg assignment into the RegM state
 --      for the basic block with this BlockId.
 initBlock :: FR freeRegs
-          => BlockId -> BlockMap RegSet -> RegM freeRegs ()
+          => BlockId -> BlockMap (UniqFM Reg (Reg, Format)) -> RegM freeRegs ()
 initBlock id block_live
  = do   platform    <- getPlatform
         block_assig <- getBlockAssigR
@@ -364,7 +364,7 @@ initBlock id block_live
                             setFreeRegsR    (frInitFreeRegs platform)
                           Just live ->
                             setFreeRegsR $ foldl' (flip $ frAllocateReg platform) (frInitFreeRegs platform)
-                                                  [ r | RegReal r <- nonDetEltsUniqSet live ]
+                                                  [ r | ( RegReal r, _ ) <- nonDetEltsUFM live ]
                             -- See Note [Unique Determinism and code generation]
                         setAssigR       emptyRegMap
 
@@ -377,7 +377,7 @@ initBlock id block_live
 -- | Do allocation for a sequence of instructions.
 linearRA
         :: forall freeRegs instr. (OutputableRegConstraint freeRegs instr)
-        => BlockMap RegSet                      -- ^ map of what vregs are live on entry to each block.
+        => BlockMap (UniqFM Reg (Reg, Format))                      -- ^ map of what vregs are live on entry to each block.
         -> BlockId                              -- ^ id of the current block, for debugging.
         -> [LiveInstr instr]                    -- ^ liveness annotated instructions in this block.
         -> RegM freeRegs
@@ -402,7 +402,7 @@ linearRA block_live block_id = go [] []
 -- | Do allocation for a single instruction.
 raInsn
         :: OutputableRegConstraint freeRegs instr
-        => BlockMap RegSet                      -- ^ map of what vregs are love on entry to each block.
+        => BlockMap (UniqFM Reg (Reg, Format))                      -- ^ map of what vregs are love on entry to each block.
         -> [instr]                              -- ^ accumulator for instructions already processed.
         -> BlockId                              -- ^ the id of the current block, for debugging
         -> LiveInstr instr                      -- ^ the instr to have its regs allocated, with liveness info.
@@ -432,12 +432,12 @@ raInsn block_live new_instrs id (LiveInstr (Instr instr) (Just live))
     -- (we can't eliminate it if the source register is on the stack, because
     --  we do not want to use one spill slot for different virtual registers)
     case takeRegRegMoveInstr instr of
-        Just (src,dst)  | src `elementOfUniqSet` (liveDieRead live),
+        Just (src,dst)  | Just (_, fmt) <- lookupUFM (liveDieRead live) src,
                           isVirtualReg dst,
                           not (dst `elemUFM` assig),
                           isRealReg src || isInReg src assig -> do
            case src of
-              (RegReal rr) -> setAssigR (addToUFM assig dst (InReg rr))
+              RegReal rr -> setAssigR (addToUFM assig dst (InReg $ RealRegUsage rr fmt))
                 -- if src is a fixed reg, then we just map dest to this
                 -- reg in the assignment.  src must be an allocatable reg,
                 -- otherwise it wouldn't be in r_dying.
@@ -456,8 +456,8 @@ raInsn block_live new_instrs id (LiveInstr (Instr instr) (Just live))
            return (new_instrs, [])
 
         _ -> genRaInsn block_live new_instrs id instr
-                        (nonDetEltsUniqSet $ liveDieRead live)
-                        (nonDetEltsUniqSet $ liveDieWrite live)
+                        (map fst $ nonDetEltsUFM $ liveDieRead live)
+                        (map fst $ nonDetEltsUFM $ liveDieWrite live)
                         -- See Note [Unique Determinism and code generation]
 
 raInsn _ _ _ instr
@@ -486,7 +486,7 @@ isInReg src assig | Just (InReg _) <- lookupUFM assig src = True
 
 genRaInsn :: forall freeRegs instr.
              (OutputableRegConstraint freeRegs instr)
-          => BlockMap RegSet
+          => BlockMap (UniqFM Reg (Reg, Format))
           -> [instr]
           -> BlockId
           -> instr
@@ -499,13 +499,13 @@ genRaInsn block_live new_instrs block_id instr r_dying w_dying = do
   platform <- getPlatform
   case regUsageOfInstr platform instr of { RU read written ->
     do
-    let real_written    = [ rr  | (RegReal     rr) <- written ] :: [RealReg]
-    let virt_written    = [ vr  | (RegVirtual  vr) <- written ]
+    let real_written    = [ rr  | (RegReal     rr, _) <- written ] :: [RealReg]
+    let virt_written    = [ vr  | (RegVirtual  vr, _) <- written ]
 
     -- we don't need to do anything with real registers that are
     -- only read by this instr.  (the list is typically ~2 elements,
     -- so using nub isn't a problem).
-    let virt_read       = nub [ vr      | (RegVirtual vr) <- read ] :: [VirtualReg]
+    let virt_read       = nub [ vr      | (RegVirtual vr, _) <- read ] :: [VirtualReg]
 
 --     do
 --         let real_read       = nub [ rr      | (RegReal rr) <- read]
@@ -638,9 +638,9 @@ releaseRegs regs = do
       loop assig !free (r:rs) =
          case lookupUFM assig r of
          Just (InBoth real _) -> loop (delFromUFM assig r)
-                                      (frReleaseReg platform real free) rs
+                                      (frReleaseReg platform (realReg real) free) rs
          Just (InReg real)    -> loop (delFromUFM assig r)
-                                      (frReleaseReg platform real free) rs
+                                      (frReleaseReg platform (realReg real) free) rs
          _                    -> loop (delFromUFM assig r) free rs
   loop assig free regs
 
@@ -688,15 +688,15 @@ saveClobberedTemps clobbered dying
                 -- currently support deterministic code-generation.
                 -- See Note [Unique Determinism and code generation]
                 InReg reg
-                    | any (realRegsAlias reg) clobbered
+                    | any (realRegsAlias $ realReg reg) clobbered
                     , temp `notElem` map getUnique dying
-                    -> clobber temp (assig,instrs) (reg)
+                    -> clobber temp (assig,instrs) reg
                 _ -> return (assig,instrs)
 
 
      -- See Note [UniqFM and the register allocator]
-     clobber :: Unique -> (RegMap Loc,[instr]) -> (RealReg) -> RegM freeRegs (RegMap Loc,[instr])
-     clobber temp (assig,instrs) (reg)
+     clobber :: Unique -> (RegMap Loc,[instr]) -> RealRegUsage -> RegM freeRegs (RegMap Loc,[instr])
+     clobber temp (assig,instrs) (RealRegUsage reg fmt)
        = do platform <- getPlatform
 
             freeRegs <- getFreeRegsR
@@ -711,7 +711,7 @@ saveClobberedTemps clobbered dying
               (my_reg : _) -> do
                   setFreeRegsR (frAllocateReg platform my_reg freeRegs)
 
-                  let new_assign = addToUFM_Directly assig temp (InReg my_reg)
+                  let new_assign = addToUFM_Directly assig temp (InReg (RealRegUsage my_reg fmt))
                   let instr = mkRegRegMoveInstr platform
                                   (RegReal reg) (RegReal my_reg)
 
@@ -719,12 +719,12 @@ saveClobberedTemps clobbered dying
 
               -- (2) no free registers: spill the value
               [] -> do
-                  (spill, slot)   <- spillR (RegReal reg) temp
+                  (spill, slot)   <- spillR (RegReal reg) fmt temp
 
                   -- record why this reg was spilled for profiling
                   recordSpill (SpillClobber temp)
 
-                  let new_assign  = addToUFM_Directly assig temp (InBoth reg slot)
+                  let new_assign  = addToUFM_Directly assig temp (InBoth (RealRegUsage reg fmt) slot)
 
                   return (new_assign, (spill ++ instrs))
 
@@ -771,7 +771,7 @@ clobberRegs clobbered
                 = assig
 
         clobber assig ((temp, InBoth reg slot) : rest)
-                | any (realRegsAlias reg) clobbered
+                | any (realRegsAlias $ realReg reg) clobbered
                 = clobber (addToUFM_Directly assig temp (InMem slot)) rest
 
         clobber assig (_:rest)
@@ -817,7 +817,7 @@ allocateRegsAndSpill reading keep spills alloc (r:rs)
         case lookupUFM assig r of
                 -- case (1a): already in a register
                 Just (InReg my_reg) ->
-                        allocateRegsAndSpill reading keep spills (my_reg:alloc) rs
+                        allocateRegsAndSpill reading keep spills (realReg my_reg:alloc) rs
 
                 -- case (1b): already in a register (and memory)
                 -- NB1. if we're writing this register, update its assignment to be
@@ -826,7 +826,7 @@ allocateRegsAndSpill reading keep spills alloc (r:rs)
                 -- are also read by the same instruction.
                 Just (InBoth my_reg _)
                  -> do  when (not reading) (setAssigR $ toRegMap (addToUFM assig r (InReg my_reg)))
-                        allocateRegsAndSpill reading keep spills (my_reg:alloc) rs
+                        allocateRegsAndSpill reading keep spills (realReg my_reg:alloc) rs
 
                 -- Not already in a register, so we need to find a free one...
                 Just (InMem slot) | reading   -> doSpill (ReadMem slot)
@@ -869,7 +869,15 @@ allocRegsAndSpill_spill :: (FR freeRegs, Instruction instr)
 allocRegsAndSpill_spill reading keep spills alloc r rs assig spill_loc
  = do   platform <- getPlatform
         freeRegs <- getFreeRegsR
-        let freeRegs_thisClass  = frGetFreeRegs platform (classOfVirtualReg r) freeRegs :: [RealReg]
+        let regclass = classOfVirtualReg r
+            freeRegs_thisClass  = frGetFreeRegs platform regclass freeRegs :: [RealReg]
+            -- SIMD NCG TODO: this is not the right thing to be doing,
+            -- and is indicative we should not use Format but a more
+            -- trimmed down datatype that only keeps track of e.g.
+            -- how many stack slots something uses up.
+            vr_fmt = case r of
+                VirtualRegVec {} -> VecFormat 2 FmtDouble W64
+                _ -> II64
 
         -- Can we put the variable into a register it already was?
         pref_reg <- findPrefRealReg r
@@ -883,10 +891,10 @@ allocRegsAndSpill_spill reading keep spills alloc r rs assig spill_loc
                         = reg
                         | otherwise
                         = first_free
-                spills'   <- loadTemp r spill_loc final_reg spills
+                spills'   <- loadTemp r vr_fmt spill_loc final_reg spills
 
                 setAssigR $ toRegMap
-                          $ (addToUFM assig r $! newLocation spill_loc final_reg)
+                          $ (addToUFM assig r $! newLocation spill_loc $ RealRegUsage final_reg vr_fmt)
                 setFreeRegsR $  frAllocateReg platform final_reg freeRegs
 
                 allocateRegsAndSpill reading keep spills' (final_reg : alloc) rs
@@ -908,48 +916,53 @@ allocRegsAndSpill_spill reading keep spills alloc r rs assig spill_loc
                 let candidates = nonDetUFMToList candidates'
 
                 -- the vregs we could kick out that are already in a slot
-                let candidates_inBoth :: [(Unique, RealReg, StackSlot)]
+                let compat reg' r'
+                      = let cls1 = targetClassOfRealReg platform reg'
+                            cls2 = classOfVirtualReg r'
+                        in  (if cls1 == RcVector128 then RcDouble else cls1)
+                         == (if cls2 == RcVector128 then RcDouble else cls2)
+                    candidates_inBoth :: [(Unique, RealRegUsage, StackSlot)]
                     candidates_inBoth
                         = [ (temp, reg, mem)
                           | (temp, InBoth reg mem) <- candidates
-                          , targetClassOfRealReg platform reg == classOfVirtualReg r ]
+                          , compat (realReg reg) r ]
 
                 -- the vregs we could kick out that are only in a reg
                 --      this would require writing the reg to a new slot before using it.
                 let candidates_inReg
                         = [ (temp, reg)
                           | (temp, InReg reg) <- candidates
-                          , targetClassOfRealReg platform reg == classOfVirtualReg r ]
+                          , compat (realReg reg) r ]
 
                 let result
 
                         -- we have a temporary that is in both register and mem,
                         -- just free up its register for use.
-                        | (temp, my_reg, slot) : _      <- candidates_inBoth
-                        = do    spills' <- loadTemp r spill_loc my_reg spills
+                        | (temp, myRegUse@(RealRegUsage my_reg fmt), slot) : _      <- candidates_inBoth
+                        = do    spills' <- loadTemp r fmt spill_loc my_reg spills
                                 let assig1  = addToUFM_Directly assig temp (InMem slot)
-                                let assig2  = addToUFM assig1 r $! newLocation spill_loc my_reg
+                                let assig2  = addToUFM assig1 r $! newLocation spill_loc myRegUse
 
                                 setAssigR $ toRegMap assig2
                                 allocateRegsAndSpill reading keep spills' (my_reg:alloc) rs
 
                         -- otherwise, we need to spill a temporary that currently
                         -- resides in a register.
-                        | (temp_to_push_out, (my_reg :: RealReg)) : _
+                        | (temp_to_push_out, RealRegUsage my_reg fmt) : _
                                         <- candidates_inReg
                         = do
-                                (spill_store, slot) <- spillR (RegReal my_reg) temp_to_push_out
+                                (spill_store, slot) <- spillR (RegReal my_reg) fmt temp_to_push_out
 
                                 -- record that this temp was spilled
                                 recordSpill (SpillAlloc temp_to_push_out)
 
                                 -- update the register assignment
                                 let assig1  = addToUFM_Directly assig temp_to_push_out   (InMem slot)
-                                let assig2  = addToUFM assig1 r                 $! newLocation spill_loc my_reg
+                                let assig2  = addToUFM assig1 r                 $! newLocation spill_loc (RealRegUsage my_reg fmt)
                                 setAssigR $ toRegMap assig2
 
                                 -- if need be, load up a spilled temp into the reg we've just freed up.
-                                spills' <- loadTemp r spill_loc my_reg spills
+                                spills' <- loadTemp r fmt spill_loc my_reg spills
 
                                 allocateRegsAndSpill reading keep
                                         (spill_store ++ spills')
@@ -970,7 +983,7 @@ allocRegsAndSpill_spill reading keep spills alloc r rs assig spill_loc
 
 
 -- | Calculate a new location after a register has been loaded.
-newLocation :: SpillLoc -> RealReg -> Loc
+newLocation :: SpillLoc -> RealRegUsage -> Loc
 -- if the tmp was read from a slot, then now its in a reg as well
 newLocation (ReadMem slot) my_reg = InBoth my_reg slot
 -- writes will always result in only the register being available
@@ -980,16 +993,17 @@ newLocation _ my_reg = InReg my_reg
 loadTemp
         :: (Instruction instr)
         => VirtualReg   -- the temp being loaded
+        -> Format
         -> SpillLoc     -- the current location of this temp
         -> RealReg      -- the hreg to load the temp into
         -> [instr]
         -> RegM freeRegs [instr]
 
-loadTemp vreg (ReadMem slot) hreg spills
+loadTemp vreg fmt (ReadMem slot) hreg spills
  = do
-        insn <- loadR (RegReal hreg) slot
+        insn <- loadR (RegReal hreg) fmt slot
         recordSpill (SpillLoad $ getUnique vreg)
         return  $  {- mkComment (text "spill load") : -} insn ++ spills
 
-loadTemp _ _ _ spills =
+loadTemp _ _ _ _ spills =
    return spills


=====================================
compiler/GHC/CmmToAsm/Reg/Linear/Base.hs
=====================================
@@ -11,6 +11,7 @@ module GHC.CmmToAsm.Reg.Linear.Base (
 
         Loc(..),
         regsOfLoc,
+        RealRegUsage(..),
 
         -- for stats
         SpillReason(..),
@@ -36,6 +37,7 @@ import GHC.Types.Unique.Supply
 import GHC.Cmm.BlockId
 import GHC.Cmm.Dataflow.Label
 import GHC.CmmToAsm.Reg.Utils
+import GHC.CmmToAsm.Format
 
 data ReadingOrWriting = Reading | Writing deriving (Eq,Ord)
 
@@ -76,8 +78,8 @@ updateBlockAssignment dest (freeRegs, regMap) (BlockAssignment {..}) =
     combWithExisting old_reg _ = Just $ old_reg
 
     fromLoc :: Loc -> Maybe RealReg
-    fromLoc (InReg rr) = Just rr
-    fromLoc (InBoth rr _) = Just rr
+    fromLoc (InReg rr) = Just $ realReg rr
+    fromLoc (InBoth rr _) = Just $ realReg rr
     fromLoc _ = Nothing
 
 
@@ -94,23 +96,29 @@ updateBlockAssignment dest (freeRegs, regMap) (BlockAssignment {..}) =
 --
 data Loc
         -- | vreg is in a register
-        = InReg   !RealReg
+        = InReg   {-# UNPACK #-} !RealRegUsage
 
-        -- | vreg is held in a stack slot
+        -- | vreg is held in stack slots
         | InMem   {-# UNPACK #-}  !StackSlot
 
 
-        -- | vreg is held in both a register and a stack slot
-        | InBoth   !RealReg
+        -- | vreg is held in both a register and stack slots
+        | InBoth   {-# UNPACK #-} !RealRegUsage
                    {-# UNPACK #-} !StackSlot
         deriving (Eq, Show, Ord)
 
+data RealRegUsage
+  = RealRegUsage
+    { realReg :: !RealReg
+    , realRegFormat :: !Format
+    } deriving (Eq, Show, Ord)
+
 instance Outputable Loc where
         ppr l = text (show l)
 
 
 -- | Get the reg numbers stored in this Loc.
-regsOfLoc :: Loc -> [RealReg]
+regsOfLoc :: Loc -> [RealRegUsage]
 regsOfLoc (InReg r)    = [r]
 regsOfLoc (InBoth r _) = [r]
 regsOfLoc (InMem _)    = []


=====================================
compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs
=====================================
@@ -29,16 +29,16 @@ import GHC.Utils.Panic
 import GHC.Utils.Monad (concatMapM)
 import GHC.Types.Unique
 import GHC.Types.Unique.FM
-import GHC.Types.Unique.Set
 
 import GHC.Utils.Outputable
+import GHC.CmmToAsm.Format
 
 -- | For a jump instruction at the end of a block, generate fixup code so its
 --      vregs are in the correct regs for its destination.
 --
 joinToTargets
         :: (FR freeRegs, Instruction instr)
-        => BlockMap RegSet              -- ^ maps the unique of the blockid to the set of vregs
+        => BlockMap (RegMap (Reg, Format)) -- ^ maps the unique of the blockid to the set of vregs
                                         --      that are known to be live on the entry to each block.
 
         -> BlockId                      -- ^ id of the current block
@@ -62,7 +62,7 @@ joinToTargets block_live id instr
 -----
 joinToTargets'
         :: (FR freeRegs, Instruction instr)
-        => BlockMap RegSet              -- ^ maps the unique of the blockid to the set of vregs
+        => BlockMap (RegMap (Reg, Format))               -- ^ maps the unique of the blockid to the set of vregs
                                         --      that are known to be live on the entry to each block.
 
         -> [NatBasicBlock instr]        -- ^ acc blocks of fixup code.
@@ -90,7 +90,7 @@ joinToTargets' block_live new_blocks block_id instr (dest:dests)
         -- adjust the current assignment to remove any vregs that are not live
         -- on entry to the destination block.
         let Just live_set       = mapLookup dest block_live
-        let still_live uniq _   = uniq `elemUniqSet_Directly` live_set
+        let still_live uniq _   = uniq `elemUFM_Directly` live_set
         let adjusted_assig      = filterUFM_Directly still_live assig
 
         -- and free up those registers which are now free.
@@ -99,14 +99,14 @@ joinToTargets' block_live new_blocks block_id instr (dest:dests)
                         -- This is non-deterministic but we do not
                         -- currently support deterministic code-generation.
                         -- See Note [Unique Determinism and code generation]
-                        , not (elemUniqSet_Directly reg live_set)
+                        , not (elemUFM_Directly reg live_set)
                         , r          <- regsOfLoc loc ]
 
         case lookupBlockAssignment  dest block_assig of
          Nothing
           -> joinToTargets_first
                         block_live new_blocks block_id instr dest dests
-                        block_assig adjusted_assig to_free
+                        block_assig adjusted_assig $ map realReg to_free
 
          Just (_, dest_assig)
           -> joinToTargets_again
@@ -116,7 +116,7 @@ joinToTargets' block_live new_blocks block_id instr (dest:dests)
 
 -- this is the first time we jumped to this block.
 joinToTargets_first :: (FR freeRegs, Instruction instr)
-                    => BlockMap RegSet
+                    => BlockMap (UniqFM Reg (Reg, Format))
                     -> [NatBasicBlock instr]
                     -> BlockId
                     -> instr
@@ -145,7 +145,7 @@ joinToTargets_first block_live new_blocks block_id instr dest dests
 
 -- we've jumped to this block before
 joinToTargets_again :: (Instruction instr, FR freeRegs)
-                    => BlockMap RegSet
+                    => BlockMap (UniqFM Reg (Reg, Format))
                     -> [NatBasicBlock instr]
                     -> BlockId
                     -> instr
@@ -327,15 +327,15 @@ handleComponent delta _  (AcyclicSCC (DigraphNode vreg src dsts))
 --      require a fixup.
 --
 handleComponent delta instr
-        (CyclicSCC ((DigraphNode vreg (InReg sreg) ((InReg dreg: _))) : rest))
+        (CyclicSCC ((DigraphNode vreg (InReg (RealRegUsage sreg scls)) ((InReg (RealRegUsage dreg dcls): _))) : rest))
         -- dest list may have more than one element, if the reg is also InMem.
  = do
         -- spill the source into its slot
         (instrSpill, slot)
-                        <- spillR (RegReal sreg) vreg
+                        <- spillR (RegReal sreg) scls vreg
 
         -- reload into destination reg
-        instrLoad       <- loadR (RegReal dreg) slot
+        instrLoad       <- loadR (RegReal dreg) dcls slot
 
         remainingFixUps <- mapM (handleComponent delta instr)
                                 (stronglyConnCompFromEdgedVerticesOrdR rest)
@@ -363,15 +363,16 @@ makeMove delta vreg src dst
       let platform = ncgPlatform config
 
       case (src, dst) of
-          (InReg s, InReg d) ->
+          (InReg (RealRegUsage s _), InReg (RealRegUsage d _)) ->
               do recordSpill (SpillJoinRR vreg)
+                 -- SIMD NCG TODO: does reg-2-reg work for vector registers?
                  return $ [mkRegRegMoveInstr platform (RegReal s) (RegReal d)]
-          (InMem s, InReg d) ->
+          (InMem s, InReg (RealRegUsage d cls)) ->
               do recordSpill (SpillJoinRM vreg)
-                 return $ mkLoadInstr config (RegReal d) delta s
-          (InReg s, InMem d) ->
+                 return $ mkLoadInstr config (RegReal d) cls delta s
+          (InReg (RealRegUsage s cls), InMem d) ->
               do recordSpill (SpillJoinRM vreg)
-                 return $ mkSpillInstr config (RegReal s) delta d
+                 return $ mkSpillInstr config (RegReal s) cls delta d
           _ ->
               -- we don't handle memory to memory moves.
               -- they shouldn't happen because we don't share


=====================================
compiler/GHC/CmmToAsm/Reg/Linear/StackMap.hs
=====================================
@@ -24,6 +24,7 @@ import GHC.Prelude
 
 import GHC.Types.Unique.FM
 import GHC.Types.Unique
+import GHC.CmmToAsm.Format
 
 
 -- | Identifier for a stack slot.
@@ -47,13 +48,18 @@ emptyStackMap = StackMap 0 emptyUFM
 -- | If this vreg unique already has a stack assignment then return the slot number,
 --      otherwise allocate a new slot, and update the map.
 --
-getStackSlotFor :: StackMap -> Unique -> (StackMap, Int)
-
-getStackSlotFor fs@(StackMap _ reserved) reg
-  | Just slot <- lookupUFM reserved reg  =  (fs, slot)
-
-getStackSlotFor (StackMap freeSlot reserved) reg =
-    (StackMap (freeSlot+1) (addToUFM reserved reg freeSlot), freeSlot)
+getStackSlotFor :: StackMap -> Format -> Unique -> (StackMap, Int)
+
+getStackSlotFor fs@(StackMap _ reserved) _fmt regUnique
+  | Just slot <- lookupUFM reserved regUnique  =  (fs, slot)
+
+getStackSlotFor (StackMap freeSlot reserved) fmt regUnique =
+  let
+    nbSlots = case fmt of
+      VecFormat {} -> 2 -- SIMD NCG TODO: panic for unsupported vectors
+      _ -> 1
+  in
+    (StackMap (freeSlot+nbSlots) (addToUFM reserved regUnique freeSlot), freeSlot)
 
 -- | Return the number of stack slots that were allocated
 getStackUse :: StackMap -> Int


=====================================
compiler/GHC/CmmToAsm/Reg/Linear/State.hs
=====================================
@@ -53,6 +53,7 @@ import GHC.Types.Unique.Supply
 import GHC.Exts (oneShot)
 
 import Control.Monad (ap)
+import GHC.CmmToAsm.Format
 
 type RA_Result freeRegs a = (# RA_State freeRegs, a #)
 
@@ -121,20 +122,20 @@ makeRAStats state
 
 
 spillR :: Instruction instr
-       => Reg -> Unique -> RegM freeRegs ([instr], Int)
+       => Reg -> Format -> Unique -> RegM freeRegs ([instr], Int)
 
-spillR reg temp = mkRegM $ \s ->
-  let (stack1,slot) = getStackSlotFor (ra_stack s) temp
-      instr  = mkSpillInstr (ra_config s) reg (ra_delta s) slot
+spillR reg fmt temp = mkRegM $ \s ->
+  let (stack1,slots) = getStackSlotFor (ra_stack s) fmt temp
+      instr  = mkSpillInstr (ra_config s) reg fmt (ra_delta s) slots
   in
-  RA_Result s{ra_stack=stack1} (instr,slot)
+  RA_Result s{ra_stack=stack1} (instr,slots)
 
 
 loadR :: Instruction instr
-      => Reg -> Int -> RegM freeRegs [instr]
+      => Reg -> Format -> Int -> RegM freeRegs [instr]
 
-loadR reg slot = mkRegM $ \s ->
-  RA_Result s (mkLoadInstr (ra_config s) reg (ra_delta s) slot)
+loadR reg fmt slot = mkRegM $ \s ->
+  RA_Result s (mkLoadInstr (ra_config s) reg fmt (ra_delta s) slot)
 
 getFreeRegsR :: RegM freeRegs freeRegs
 getFreeRegsR = mkRegM $ \ s at RA_State{ra_freeregs = freeregs} ->


=====================================
compiler/GHC/CmmToAsm/Reg/Linear/X86.hs
=====================================
@@ -32,13 +32,20 @@ getFreeRegs platform cls (FreeRegs f) = go f 0
 
   where go 0 _ = []
         go n m
-          | n .&. 1 /= 0 && classOfRealReg platform (RealRegSingle m) == cls
+          | n .&. 1 /= 0 && compatibleClass m
           = RealRegSingle m : (go (n `shiftR` 1) $! (m+1))
 
           | otherwise
           = go (n `shiftR` 1) $! (m+1)
         -- ToDo: there's no point looking through all the integer registers
         -- in order to find a floating-point one.
+        compatibleClass i =
+          let regClass = classOfRealReg platform (RealRegSingle i)
+          in   (if regClass == RcVector128 then RcDouble else regClass)
+            == (if cls == RcVector128 then RcDouble else cls)
+            -- SIMD NCG TODO: giant hack to account for xmm registers being
+            -- used for Double with SSE2.
+
 
 allocateReg :: RealReg -> FreeRegs -> FreeRegs
 allocateReg (RealRegSingle r) (FreeRegs f)


=====================================
compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs
=====================================
@@ -32,13 +32,19 @@ getFreeRegs platform cls (FreeRegs f) = go f 0
 
   where go 0 _ = []
         go n m
-          | n .&. 1 /= 0 && classOfRealReg platform (RealRegSingle m) == cls
+          | n .&. 1 /= 0 && compatibleClass m
           = RealRegSingle m : (go (n `shiftR` 1) $! (m+1))
 
           | otherwise
           = go (n `shiftR` 1) $! (m+1)
         -- ToDo: there's no point looking through all the integer registers
         -- in order to find a floating-point one.
+        compatibleClass i =
+          let regClass = classOfRealReg platform (RealRegSingle i)
+          in   (if regClass == RcVector128 then RcDouble else regClass)
+            == (if cls == RcVector128 then RcDouble else cls)
+            -- SIMD NCG TODO: giant hack to account for xmm registers being
+            -- used for Double with SSE2.
 
 allocateReg :: RealReg -> FreeRegs -> FreeRegs
 allocateReg (RealRegSingle r) (FreeRegs f)


=====================================
compiler/GHC/CmmToAsm/Reg/Liveness.hs
=====================================
@@ -63,6 +63,8 @@ import GHC.Utils.Monad.State.Strict
 import Data.List (mapAccumL, partition)
 import Data.Maybe
 import Data.IntSet              (IntSet)
+import GHC.CmmToAsm.Format
+import GHC.Types.Unique (Uniquable)
 
 -----------------------------------------------------------------------------
 type RegSet = UniqSet Reg
@@ -77,9 +79,6 @@ type RegMap a = UniqFM Reg a
 emptyRegMap :: RegMap a
 emptyRegMap = emptyUFM
 
-emptyRegSet :: RegSet
-emptyRegSet = emptyUniqSet
-
 type BlockMap a = LabelMap a
 
 type SlotMap a = UniqFM Slot a
@@ -101,10 +100,10 @@ data InstrSR instr
         = Instr  instr
 
         -- | spill this reg to a stack slot
-        | SPILL  Reg Int
+        | SPILL  Reg Format Int
 
         -- | reload this reg from a stack slot
-        | RELOAD Int Reg
+        | RELOAD Int Reg Format
 
         deriving (Functor)
 
@@ -112,14 +111,14 @@ instance Instruction instr => Instruction (InstrSR instr) where
         regUsageOfInstr platform i
          = case i of
                 Instr  instr    -> regUsageOfInstr platform instr
-                SPILL  reg _    -> RU [reg] []
-                RELOAD _ reg    -> RU [] [reg]
+                SPILL  reg fmt _    -> RU [(reg, fmt)] []
+                RELOAD _ reg fmt    -> RU [] [(reg, fmt)]
 
         patchRegsOfInstr i f
          = case i of
                 Instr instr     -> Instr (patchRegsOfInstr instr f)
-                SPILL  reg slot -> SPILL (f reg) slot
-                RELOAD slot reg -> RELOAD slot (f reg)
+                SPILL  reg cls slot -> SPILL (f reg) cls slot
+                RELOAD slot reg cls -> RELOAD slot (f reg) cls
 
         isJumpishInstr i
          = case i of
@@ -189,9 +188,9 @@ data LiveInstr instr
 
 data Liveness
         = Liveness
-        { liveBorn      :: RegSet       -- ^ registers born in this instruction (written to for first time).
-        , liveDieRead   :: RegSet       -- ^ registers that died because they were read for the last time.
-        , liveDieWrite  :: RegSet }     -- ^ registers that died because they were clobbered by something.
+        { liveBorn      :: RegMap (Reg, Format)       -- ^ registers born in this instruction (written to for first time).
+        , liveDieRead   :: RegMap (Reg, Format)       -- ^ registers that died because they were read for the last time.
+        , liveDieWrite  :: RegMap (Reg, Format) }     -- ^ registers that died because they were clobbered by something.
 
 
 -- | Stash regs live on entry to each basic block in the info part of the cmm code.
@@ -200,7 +199,7 @@ data LiveInfo
                 (LabelMap RawCmmStatics)  -- cmm info table static stuff
                 [BlockId]                 -- entry points (first one is the
                                           -- entry point for the proc).
-                (BlockMap RegSet)         -- argument locals live on entry to this block
+                (BlockMap (UniqFM Reg (Reg, Format)))         -- argument locals live on entry to this block
                 (BlockMap IntSet)         -- stack slots live on entry to this block
 
 
@@ -215,7 +214,7 @@ instance Outputable instr
         ppr (Instr realInstr)
            = ppr realInstr
 
-        ppr (SPILL reg slot)
+        ppr (SPILL reg _cls slot)
            = hcat [
                 text "\tSPILL",
                 char ' ',
@@ -223,7 +222,7 @@ instance Outputable instr
                 comma,
                 text "SLOT" <> parens (int slot)]
 
-        ppr (RELOAD slot reg)
+        ppr (RELOAD slot reg _cls)
            = hcat [
                 text "\tRELOAD",
                 char ' ',
@@ -246,11 +245,11 @@ instance Outputable instr
                         , pprRegs (text "# w_dying: ") (liveDieWrite live) ]
                     $+$ space)
 
-         where  pprRegs :: SDoc -> RegSet -> SDoc
+         where  pprRegs :: Outputable a => SDoc -> RegMap a -> SDoc
                 pprRegs name regs
-                 | isEmptyUniqSet regs  = empty
+                 | isNullUFM regs  = empty
                  | otherwise            = name <>
-                     (pprUFM (getUniqSet regs) (hcat . punctuate space . map ppr))
+                     (pprUFM regs (hcat . punctuate space . map ppr))
 
 instance OutputableP env instr => OutputableP env (LiveInstr instr) where
    pdoc env i = ppr (fmap (pdoc env) i)
@@ -329,7 +328,7 @@ mapGenBlockTopM f (CmmProc header label live (ListGraph blocks))
 slurpConflicts
         :: Instruction instr
         => LiveCmmDecl statics instr
-        -> (Bag (UniqSet Reg), Bag (Reg, Reg))
+        -> (Bag (UniqFM Reg (Reg, Format)), Bag (Reg, Reg))
 
 slurpConflicts live
         = slurpCmm (emptyBag, emptyBag) live
@@ -363,23 +362,23 @@ slurpConflicts live
          = let
                 -- regs that die because they are read for the last time at the start of an instruction
                 --      are not live across it.
-                rsLiveAcross    = rsLiveEntry `minusUniqSet` (liveDieRead live)
+                rsLiveAcross    = rsLiveEntry `minusUFM` (liveDieRead live)
 
                 -- regs live on entry to the next instruction.
                 --      be careful of orphans, make sure to delete dying regs _after_ unioning
                 --      in the ones that are born here.
-                rsLiveNext      = (rsLiveAcross `unionUniqSets` (liveBorn     live))
-                                                `minusUniqSet`  (liveDieWrite live)
+                rsLiveNext      = (rsLiveAcross `plusUFM` (liveBorn     live))
+                                                `minusUFM`  (liveDieWrite live)
 
                 -- orphan vregs are the ones that die in the same instruction they are born in.
                 --      these are likely to be results that are never used, but we still
                 --      need to assign a hreg to them..
-                rsOrphans       = intersectUniqSets
+                rsOrphans       = intersectUFM
                                         (liveBorn live)
-                                        (unionUniqSets (liveDieWrite live) (liveDieRead live))
+                                        (plusUFM (liveDieWrite live) (liveDieRead live))
 
                 --
-                rsConflicts     = unionUniqSets rsLiveNext rsOrphans
+                rsConflicts     = plusUFM rsLiveNext rsOrphans
 
           in    case takeRegRegMoveInstr instr of
                  Just rr        -> slurpLIs rsLiveNext
@@ -458,12 +457,12 @@ slurpReloadCoalesce live
         slurpLI slotMap li
 
                 -- remember what reg was stored into the slot
-                | LiveInstr (SPILL reg slot) _  <- li
-                , slotMap'                      <- addToUFM slotMap slot reg
+                | LiveInstr (SPILL reg _cls slot) _  <- li
+                , slotMap'                          <- addToUFM slotMap slot reg
                 = return (slotMap', Nothing)
 
                 -- add an edge between the this reg and the last one stored into the slot
-                | LiveInstr (RELOAD slot reg) _ <- li
+                | LiveInstr (RELOAD slot reg _cls) _ <- li
                 = case lookupUFM slotMap slot of
                         Just reg2
                          | reg /= reg2  -> return (slotMap, Just (reg, reg2))
@@ -572,13 +571,13 @@ stripLiveBlock config (BasicBlock i lis)
 
         -- The SPILL/RELOAD cases do not appear to be exercised by our codegens
         --
-        spillNat acc (LiveInstr (SPILL reg slot) _ : instrs)
+        spillNat acc (LiveInstr (SPILL reg cls slot) _ : instrs)
          = do   delta   <- get
-                spillNat (mkSpillInstr config reg delta slot ++ acc) instrs
+                spillNat (mkSpillInstr config reg cls delta slot ++ acc) instrs
 
-        spillNat acc (LiveInstr (RELOAD slot reg) _ : instrs)
+        spillNat acc (LiveInstr (RELOAD slot reg cls) _ : instrs)
          = do   delta   <- get
-                spillNat (mkLoadInstr config reg delta slot ++ acc) instrs
+                spillNat (mkLoadInstr config reg cls delta slot ++ acc) instrs
 
         spillNat acc (LiveInstr (Instr instr) _ : instrs)
          | Just i <- takeDeltaInstr instr
@@ -621,9 +620,8 @@ patchEraseLive patchF cmm
         patchCmm (CmmProc info label live sccs)
          | LiveInfo static id blockMap mLiveSlots <- info
          = let
-                patchRegSet set = mkUniqSet $ map patchF $ nonDetEltsUFM set
                   -- See Note [Unique Determinism and code generation]
-                blockMap'       = mapMap (patchRegSet . getUniqSet) blockMap
+                blockMap'       = mapMap (mapKeysUFM patchF) blockMap
 
                 info'           = LiveInfo static id blockMap' mLiveSlots
            in   CmmProc info' label live $ map patchSCC sccs
@@ -652,8 +650,8 @@ patchEraseLive patchF cmm
                 | r1 == r2      = True
 
                 -- destination reg is never used
-                | elementOfUniqSet r2 (liveBorn live)
-                , elementOfUniqSet r2 (liveDieRead live) || elementOfUniqSet r2 (liveDieWrite live)
+                | elemUFM r2 (liveBorn live)
+                , elemUFM r2 (liveDieRead live) || elemUFM r2 (liveDieWrite live)
                 = True
 
                 | otherwise     = False
@@ -676,11 +674,14 @@ patchRegsLiveInstr patchF li
                 (patchRegsOfInstr instr patchF)
                 (Just live
                         { -- WARNING: have to go via lists here because patchF changes the uniq in the Reg
-                          liveBorn      = mapUniqSet patchF $ liveBorn live
-                        , liveDieRead   = mapUniqSet patchF $ liveDieRead live
-                        , liveDieWrite  = mapUniqSet patchF $ liveDieWrite live })
+                          liveBorn      = mapKeysUFM patchF $ liveBorn live
+                        , liveDieRead   = mapKeysUFM patchF $ liveDieRead live
+                        , liveDieWrite  = mapKeysUFM patchF $ liveDieWrite live })
                           -- See Note [Unique Determinism and code generation]
 
+-- SIMD NCG TODO: move this to Unique.FM module
+mapKeysUFM :: Uniquable a => (t -> a) -> UniqFM key (t, b) -> UniqFM a (a, b)
+mapKeysUFM f m = listToUFM $ map ( \ (r, fmt) -> let r' = f r in (r', (r', fmt)) ) $ nonDetEltsUFM m
 
 --------------------------------------------------------------------------------
 -- | Convert a NatCmmDecl to a LiveCmmDecl, with liveness information
@@ -869,7 +870,7 @@ computeLiveness
         -> [SCC (LiveBasicBlock instr)]
         -> ([SCC (LiveBasicBlock instr)],       -- instructions annotated with list of registers
                                                 -- which are "dead after this instruction".
-               BlockMap RegSet)                 -- blocks annotated with set of live registers
+               BlockMap (UniqFM Reg (Reg, Format)))                 -- blocks annotated with set of live registers
                                                 -- on entry to the block.
 
 computeLiveness platform sccs
@@ -884,11 +885,11 @@ computeLiveness platform sccs
 livenessSCCs
        :: Instruction instr
        => Platform
-       -> BlockMap RegSet
+       -> BlockMap (UniqFM Reg (Reg, Format))
        -> [SCC (LiveBasicBlock instr)]          -- accum
        -> [SCC (LiveBasicBlock instr)]
        -> ( [SCC (LiveBasicBlock instr)]
-          , BlockMap RegSet)
+          , BlockMap (UniqFM Reg (Reg, Format)))
 
 livenessSCCs _ blockmap done []
         = (done, blockmap)
@@ -917,8 +918,8 @@ livenessSCCs platform blockmap done
 
             linearLiveness
                 :: Instruction instr
-                => BlockMap RegSet -> [LiveBasicBlock instr]
-                -> (BlockMap RegSet, [LiveBasicBlock instr])
+                => BlockMap (UniqFM Reg (Reg, Format)) -> [LiveBasicBlock instr]
+                -> (BlockMap (UniqFM Reg (Reg, Format)), [LiveBasicBlock instr])
 
             linearLiveness = mapAccumL (livenessBlock platform)
 
@@ -926,9 +927,8 @@ livenessSCCs platform blockmap done
                 -- BlockMaps for equality.
             equalBlockMaps a b
                 = a' == b'
-              where a' = map f $ mapToList a
-                    b' = map f $ mapToList b
-                    f (key,elt) = (key, nonDetEltsUniqSet elt)
+              where a' = mapToList a
+                    b' = mapToList b
                     -- See Note [Unique Determinism and code generation]
 
 
@@ -938,14 +938,14 @@ livenessSCCs platform blockmap done
 livenessBlock
         :: Instruction instr
         => Platform
-        -> BlockMap RegSet
+        -> BlockMap (UniqFM Reg (Reg, Format))
         -> LiveBasicBlock instr
-        -> (BlockMap RegSet, LiveBasicBlock instr)
+        -> (BlockMap (UniqFM Reg (Reg, Format)), LiveBasicBlock instr)
 
 livenessBlock platform blockmap (BasicBlock block_id instrs)
  = let
         (regsLiveOnEntry, instrs1)
-            = livenessBack platform emptyUniqSet blockmap [] (reverse instrs)
+            = livenessBack platform emptyUFM blockmap [] (reverse instrs)
         blockmap'       = mapInsert block_id regsLiveOnEntry blockmap
 
         instrs2         = livenessForward platform regsLiveOnEntry instrs1
@@ -960,7 +960,7 @@ livenessBlock platform blockmap (BasicBlock block_id instrs)
 livenessForward
         :: Instruction instr
         => Platform
-        -> RegSet                       -- regs live on this instr
+        -> UniqFM Reg (Reg, Format) -- regs live on this instr
         -> [LiveInstr instr] -> [LiveInstr instr]
 
 livenessForward _        _           []  = []
@@ -970,12 +970,14 @@ livenessForward platform rsLiveEntry (li@(LiveInstr instr mLive) : lis)
                 RU _ written  = regUsageOfInstr platform instr
                 -- Regs that are written to but weren't live on entry to this instruction
                 --      are recorded as being born here.
-                rsBorn          = mkUniqSet
-                                $ filter (\r -> not $ elementOfUniqSet r rsLiveEntry) written
+                rsBorn          = listToUFM
+                                $ map ( \ ( r, fmt ) -> ( r, ( r, fmt ) ) )
+                                $ filter (\( r, _) -> not $ elemUFM r rsLiveEntry)
+                                $ written
 
-                rsLiveNext      = (rsLiveEntry `unionUniqSets` rsBorn)
-                                        `minusUniqSet` (liveDieRead live)
-                                        `minusUniqSet` (liveDieWrite live)
+                rsLiveNext      = (rsLiveEntry `plusUFM` rsBorn)
+                                        `minusUFM` (liveDieRead live)
+                                        `minusUFM` (liveDieWrite live)
 
         in LiveInstr instr (Just live { liveBorn = rsBorn })
                 : livenessForward platform rsLiveNext lis
@@ -990,11 +992,11 @@ livenessForward platform rsLiveEntry (li@(LiveInstr instr mLive) : lis)
 livenessBack
         :: Instruction instr
         => Platform
-        -> RegSet                       -- regs live on this instr
-        -> BlockMap RegSet              -- regs live on entry to other BBs
+        -> UniqFM Reg (Reg, Format)            -- regs live on this instr
+        -> BlockMap (UniqFM Reg (Reg, Format)) -- regs live on entry to other BBs
         -> [LiveInstr instr]            -- instructions (accum)
         -> [LiveInstr instr]            -- instructions
-        -> (RegSet, [LiveInstr instr])
+        -> (UniqFM Reg (Reg, Format), [LiveInstr instr])
 
 livenessBack _        liveregs _        done []  = (liveregs, done)
 
@@ -1007,10 +1009,10 @@ livenessBack platform liveregs blockmap acc (instr : instrs)
 liveness1
         :: Instruction instr
         => Platform
-        -> RegSet
-        -> BlockMap RegSet
+        -> UniqFM Reg (Reg, Format)
+        -> BlockMap (UniqFM Reg (Reg, Format))
         -> LiveInstr instr
-        -> (RegSet, LiveInstr instr)
+        -> (UniqFM Reg (Reg, Format), LiveInstr instr)
 
 liveness1 _ liveregs _ (LiveInstr instr _)
         | isMetaInstr instr
@@ -1021,15 +1023,15 @@ liveness1 platform liveregs blockmap (LiveInstr instr _)
         | not_a_branch
         = (liveregs1, LiveInstr instr
                         (Just $ Liveness
-                        { liveBorn      = emptyUniqSet
+                        { liveBorn      = emptyUFM
                         , liveDieRead   = r_dying
                         , liveDieWrite  = w_dying }))
 
         | otherwise
         = (liveregs_br, LiveInstr instr
                         (Just $ Liveness
-                        { liveBorn      = emptyUniqSet
-                        , liveDieRead   = mkUniqSet r_dying_br
+                        { liveBorn      = emptyUFM
+                        , liveDieRead   = r_dying_br
                         , liveDieWrite  = w_dying }))
 
         where
@@ -1037,18 +1039,18 @@ liveness1 platform liveregs blockmap (LiveInstr instr _)
 
             -- registers that were written here are dead going backwards.
             -- registers that were read here are live going backwards.
-            liveregs1   = (liveregs `delListFromUniqSet` written)
-                                    `addListToUniqSet` read
+            liveregs1   = (liveregs `delListFromUFM` (map fst written))
+                                    `addListToUFM` (map (\(r, fmt) -> (r, (r,fmt))) read)
 
             -- registers that are not live beyond this point, are recorded
             --  as dying here.
-            r_dying     = mkUniqSet
-                          [ reg | reg <- read, reg `notElem` written,
-                              not (elementOfUniqSet reg liveregs) ]
+            r_dying     = listToUFM
+                          [ (reg, (reg, fmt)) | (reg, fmt) <- read, reg `notElem` map fst written,
+                              not (elemUFM reg liveregs) ]
 
-            w_dying     = mkUniqSet
-                          [ reg | reg <- written,
-                             not (elementOfUniqSet reg liveregs) ]
+            w_dying     = listToUFM
+                          [ (reg, (reg, fmt)) | (reg, fmt) <- written,
+                             not (elemUFM reg liveregs) ]
 
             -- union in the live regs from all the jump destinations of this
             -- instruction.
@@ -1058,15 +1060,14 @@ liveness1 platform liveregs blockmap (LiveInstr instr _)
             targetLiveRegs target
                   = case mapLookup target blockmap of
                                 Just ra -> ra
-                                Nothing -> emptyRegSet
+                                Nothing -> emptyUFM
 
-            live_from_branch = unionManyUniqSets (map targetLiveRegs targets)
+            live_from_branch = plusUFMList (map targetLiveRegs targets)
 
-            liveregs_br = liveregs1 `unionUniqSets` live_from_branch
+            liveregs_br = liveregs1 `plusUFM` live_from_branch
 
             -- registers that are live only in the branch targets should
             -- be listed as dying here.
-            live_branch_only = live_from_branch `minusUniqSet` liveregs
-            r_dying_br  = nonDetEltsUniqSet (r_dying `unionUniqSets`
-                                             live_branch_only)
+            live_branch_only = live_from_branch `minusUFM` liveregs
+            r_dying_br  = r_dying `plusUFM` live_branch_only
                           -- See Note [Unique Determinism and code generation]


=====================================
compiler/GHC/CmmToAsm/X86/Instr.hs
=====================================
@@ -380,6 +380,8 @@ data Instr
         | VSHUFPS    Format Imm Operand Reg
         | SHUFPD     Format Imm Operand Reg
         | VSHUFPD    Format Imm Operand Reg
+        -- SIMD NCG TODO: don't store the Format (or only what we need)
+        -- in order to emit these instructions.
 
         -- Shift
         | PSLLDQ     Format Operand Reg
@@ -401,129 +403,129 @@ data FMAPermutation = FMA132 | FMA213 | FMA231
 regUsageOfInstr :: Platform -> Instr -> RegUsage
 regUsageOfInstr platform instr
  = case instr of
-    MOV    _ src dst    -> usageRW src dst
-    CMOV _ _ src dst    -> mkRU (use_R src [dst]) [dst]
-    MOVZxL _ src dst    -> usageRW src dst
-    MOVSxL _ src dst    -> usageRW src dst
-    LEA    _ src dst    -> usageRW src dst
-    ADD    _ src dst    -> usageRM src dst
-    ADC    _ src dst    -> usageRM src dst
-    SUB    _ src dst    -> usageRM src dst
-    SBB    _ src dst    -> usageRM src dst
-    IMUL   _ src dst    -> usageRM src dst
+    MOV    fmt src dst    -> usageRW fmt src dst
+    CMOV _ fmt src dst    -> mkRU fmt (use_R src [dst]) [dst]
+    MOVZxL fmt src dst    -> usageRW fmt src dst
+    MOVSxL fmt src dst    -> usageRW fmt src dst
+    LEA    fmt src dst    -> usageRW fmt src dst
+    ADD    fmt src dst    -> usageRM fmt src dst
+    ADC    fmt src dst    -> usageRM fmt src dst
+    SUB    fmt src dst    -> usageRM fmt src dst
+    SBB    fmt src dst    -> usageRM fmt src dst
+    IMUL   fmt src dst    -> usageRM fmt src dst
 
     -- Result of IMULB will be in just in %ax
-    IMUL2  II8 src       -> mkRU (eax:use_R src []) [eax]
+    IMUL2  II8 src       -> mkRU II8 (eax:use_R src []) [eax]
     -- Result of IMUL for wider values, will be split between %dx/%edx/%rdx and
     -- %ax/%eax/%rax.
-    IMUL2  _ src        -> mkRU (eax:use_R src []) [eax,edx]
-
-    MUL    _ src dst    -> usageRM src dst
-    MUL2   _ src        -> mkRU (eax:use_R src []) [eax,edx]
-    DIV    _ op -> mkRU (eax:edx:use_R op []) [eax,edx]
-    IDIV   _ op -> mkRU (eax:edx:use_R op []) [eax,edx]
-    ADD_CC _ src dst    -> usageRM src dst
-    SUB_CC _ src dst    -> usageRM src dst
-    AND    _ src dst    -> usageRM src dst
-    OR     _ src dst    -> usageRM src dst
-
-    XOR    _ (OpReg src) (OpReg dst)
-        | src == dst    -> mkRU [] [dst]
-
-    XOR    _ src dst    -> usageRM src dst
-    NOT    _ op         -> usageM op
-    BSWAP  _ reg        -> mkRU [reg] [reg]
-    NEGI   _ op         -> usageM op
-    SHL    _ imm dst    -> usageRM imm dst
-    SAR    _ imm dst    -> usageRM imm dst
-    SHR    _ imm dst    -> usageRM imm dst
-    SHLD   _ imm dst1 dst2 -> usageRMM imm dst1 dst2
-    SHRD   _ imm dst1 dst2 -> usageRMM imm dst1 dst2
-    BT     _ _   src    -> mkRUR (use_R src [])
-
-    PUSH   _ op         -> mkRUR (use_R op [])
-    POP    _ op         -> mkRU [] (def_W op)
-    TEST   _ src dst    -> mkRUR (use_R src $! use_R dst [])
-    CMP    _ src dst    -> mkRUR (use_R src $! use_R dst [])
-    SETCC  _ op         -> mkRU [] (def_W op)
-    JXX    _ _          -> mkRU [] []
-    JXX_GBL _ _         -> mkRU [] []
-    JMP     op regs     -> mkRUR (use_R op regs)
-    JMP_TBL op _ _ _    -> mkRUR (use_R op [])
-    CALL (Left _)  params   -> mkRU params (callClobberedRegs platform)
-    CALL (Right reg) params -> mkRU (reg:params) (callClobberedRegs platform)
-    CLTD   _            -> mkRU [eax] [edx]
-    NOP                 -> mkRU [] []
-
-    X87Store    _  dst    -> mkRUR ( use_EA dst [])
-
-    CVTSS2SD   src dst  -> mkRU [src] [dst]
-    CVTSD2SS   src dst  -> mkRU [src] [dst]
-    CVTTSS2SIQ _ src dst -> mkRU (use_R src []) [dst]
-    CVTTSD2SIQ _ src dst -> mkRU (use_R src []) [dst]
-    CVTSI2SS   _ src dst -> mkRU (use_R src []) [dst]
-    CVTSI2SD   _ src dst -> mkRU (use_R src []) [dst]
-    FDIV _     src dst  -> usageRM src dst
-    SQRT _ src dst      -> mkRU (use_R src []) [dst]
-
-    FETCHGOT reg        -> mkRU [] [reg]
-    FETCHPC  reg        -> mkRU [] [reg]
+    IMUL2  fmt src        -> mkRU fmt (eax:use_R src []) [eax,edx]
+
+    MUL    fmt src dst    -> usageRM fmt src dst
+    MUL2   fmt src        -> mkRU fmt (eax:use_R src []) [eax,edx]
+    DIV    fmt op -> mkRU fmt (eax:edx:use_R op []) [eax,edx]
+    IDIV   fmt op -> mkRU fmt (eax:edx:use_R op []) [eax,edx]
+    ADD_CC fmt src dst    -> usageRM fmt src dst
+    SUB_CC fmt src dst    -> usageRM fmt src dst
+    AND    fmt src dst    -> usageRM fmt src dst
+    OR     fmt src dst    -> usageRM fmt src dst
+
+    XOR    fmt (OpReg src) (OpReg dst)
+        | src == dst    -> mkRU fmt [] [dst]
+
+    XOR    fmt src dst    -> usageRM fmt src dst
+    NOT    fmt op         -> usageM fmt op
+    BSWAP  fmt reg        -> mkRU fmt [reg] [reg]
+    NEGI   fmt op         -> usageM fmt op
+    SHL    fmt imm dst    -> usageRM fmt imm dst
+    SAR    fmt imm dst    -> usageRM fmt imm dst
+    SHR    fmt imm dst    -> usageRM fmt imm dst
+    SHLD   fmt imm dst1 dst2 -> usageRMM fmt imm dst1 dst2
+    SHRD   fmt imm dst1 dst2 -> usageRMM fmt imm dst1 dst2
+    BT     fmt _   src    -> mkRUR fmt (use_R src [])
+
+    PUSH   fmt op         -> mkRUR fmt (use_R op [])
+    POP    fmt op         -> mkRU fmt [] (def_W op)
+    TEST   fmt src dst    -> mkRUR fmt (use_R src $! use_R dst [])
+    CMP    fmt src dst    -> mkRUR fmt (use_R src $! use_R dst [])
+    SETCC  _ op         -> mkRU II64 [] (def_W op)
+    JXX    _ _          -> mkRU II64 [] []
+    JXX_GBL _ _         -> mkRU II64 [] []
+    JMP     op regs     -> mkRUR II64 (use_R op regs)
+    JMP_TBL op _ _ _    -> mkRUR II64 (use_R op [])
+    CALL (Left _)  params   -> mkRU II64 params (callClobberedRegs platform)
+    CALL (Right reg) params -> mkRU II64 (reg:params) (callClobberedRegs platform)
+    CLTD   _            -> mkRU II64 [eax] [edx]
+    NOP                 -> mkRU II64 [] []
+
+    X87Store    fmt  dst    -> mkRUR fmt ( use_EA dst [])
+
+    CVTSS2SD   src dst  -> mkRU FF64 [src] [dst]
+    CVTSD2SS   src dst  -> mkRU FF32 [src] [dst]
+    CVTTSS2SIQ _ src dst -> mkRU FF32 (use_R src []) [dst]
+    CVTTSD2SIQ _ src dst -> mkRU FF64 (use_R src []) [dst]
+    CVTSI2SS   _ src dst -> mkRU FF32 (use_R src []) [dst]
+    CVTSI2SD   _ src dst -> mkRU FF64 (use_R src []) [dst]
+    FDIV fmt     src dst  -> usageRM fmt src dst
+    SQRT fmt src dst      -> mkRU fmt (use_R src []) [dst]
+
+    FETCHGOT reg        -> mkRU II64 [] [reg]
+    FETCHPC  reg        -> mkRU II64 [] [reg]
 
     COMMENT _           -> noUsage
     LOCATION{}          -> noUsage
     UNWIND{}            -> noUsage
     DELTA   _           -> noUsage
 
-    POPCNT _ src dst -> mkRU (use_R src []) [dst]
-    LZCNT  _ src dst -> mkRU (use_R src []) [dst]
-    TZCNT  _ src dst -> mkRU (use_R src []) [dst]
-    BSF    _ src dst -> mkRU (use_R src []) [dst]
-    BSR    _ src dst -> mkRU (use_R src []) [dst]
+    POPCNT fmt src dst -> mkRU fmt (use_R src []) [dst]
+    LZCNT  fmt src dst -> mkRU fmt (use_R src []) [dst]
+    TZCNT  fmt src dst -> mkRU fmt (use_R src []) [dst]
+    BSF    fmt src dst -> mkRU fmt (use_R src []) [dst]
+    BSR    fmt src dst -> mkRU fmt (use_R src []) [dst]
 
-    PDEP   _ src mask dst -> mkRU (use_R src $ use_R mask []) [dst]
-    PEXT   _ src mask dst -> mkRU (use_R src $ use_R mask []) [dst]
+    PDEP   fmt src mask dst -> mkRU fmt (use_R src $ use_R mask []) [dst]
+    PEXT   fmt src mask dst -> mkRU fmt (use_R src $ use_R mask []) [dst]
 
-    FMA3 _ _ _ src3 src2 dst -> usageFMA src3 src2 dst
+    FMA3 fmt _ _ src3 src2 dst -> usageFMA fmt src3 src2 dst
 
     -- note: might be a better way to do this
-    PREFETCH _  _ src -> mkRU (use_R src []) []
+    PREFETCH _  fmt src -> mkRU fmt (use_R src []) []
     LOCK i              -> regUsageOfInstr platform i
-    XADD _ src dst      -> usageMM src dst
-    CMPXCHG _ src dst   -> usageRMM src dst (OpReg eax)
-    XCHG _ src dst      -> usageMM src (OpReg dst)
+    XADD fmt src dst      -> usageMM fmt src dst
+    CMPXCHG fmt src dst   -> usageRMM fmt src dst (OpReg eax)
+    XCHG fmt src dst      -> usageMM fmt src (OpReg dst)
     MFENCE -> noUsage
 
     -- vector instructions
-    VBROADCAST _ src dst   -> mkRU (use_EA src []) [dst]
-    VEXTRACT     _ off src dst -> mkRU ((use_R off []) ++ [src]) (use_R dst [])
-    INSERTPS     _ off src dst
-      -> mkRU ((use_R off []) ++ (use_R src []) ++ [dst]) [dst]
-
-    VMOVU        _ src dst   -> mkRU (use_R src []) (use_R dst [])
-    MOVU         _ src dst   -> mkRU (use_R src []) (use_R dst [])
-    MOVL         _ src dst   -> mkRU (use_R src []) (use_R dst [])
-    MOVH         _ src dst   -> mkRU (use_R src []) (use_R dst [])
-    VPXOR        _ s1 s2 dst -> mkRU [s1,s2] [dst]
-
-    VADD         _ s1 s2 dst -> mkRU ((use_R s1 []) ++ [s2]) [dst]
-    VSUB         _ s1 s2 dst -> mkRU ((use_R s1 []) ++ [s2]) [dst]
-    VMUL         _ s1 s2 dst -> mkRU ((use_R s1 []) ++ [s2]) [dst]
-    VDIV         _ s1 s2 dst -> mkRU ((use_R s1 []) ++ [s2]) [dst]
-
-    VPSHUFD      _ _off src dst
-      -> mkRU (use_R src []) [dst]
-    PSHUFD       _ _off src dst
-      -> mkRU (use_R src []) [dst]
-    SHUFPD      _ _off src dst
-      -> mkRU (use_R src [dst]) [dst]
-    SHUFPS      _ _off src dst
-      -> mkRU (use_R src [dst]) [dst]
-    VSHUFPD     _ _off src dst
-      -> mkRU (use_R src [dst]) [dst]
-    VSHUFPS     _ _off src dst
-      -> mkRU (use_R src [dst]) [dst]
-
-    PSLLDQ       _ off dst -> mkRU (use_R off []) [dst]
+    VBROADCAST fmt src dst   -> mkRU fmt (use_EA src []) [dst]
+    VEXTRACT     fmt off src dst -> mkRU fmt ((use_R off []) ++ [src]) (use_R dst [])
+    INSERTPS     fmt off src dst
+      -> mkRU fmt ((use_R off []) ++ (use_R src []) ++ [dst]) [dst]
+
+    VMOVU        fmt src dst   -> mkRU fmt (use_R src []) (use_R dst [])
+    MOVU         fmt src dst   -> mkRU fmt (use_R src []) (use_R dst [])
+    MOVL         fmt src dst   -> mkRU fmt (use_R src []) (use_R dst [])
+    MOVH         fmt src dst   -> mkRU fmt (use_R src []) (use_R dst [])
+    VPXOR        fmt s1 s2 dst -> mkRU fmt [s1,s2] [dst]
+
+    VADD         fmt s1 s2 dst -> mkRU fmt ((use_R s1 []) ++ [s2]) [dst]
+    VSUB         fmt s1 s2 dst -> mkRU fmt ((use_R s1 []) ++ [s2]) [dst]
+    VMUL         fmt s1 s2 dst -> mkRU fmt ((use_R s1 []) ++ [s2]) [dst]
+    VDIV         fmt s1 s2 dst -> mkRU fmt ((use_R s1 []) ++ [s2]) [dst]
+
+    VPSHUFD      fmt _off src dst
+      -> mkRU fmt (use_R src []) [dst]
+    PSHUFD       fmt _off src dst
+      -> mkRU fmt (use_R src []) [dst]
+    SHUFPD      fmt _off src dst
+      -> mkRU fmt (use_R src [dst]) [dst]
+    SHUFPS      fmt _off src dst
+      -> mkRU fmt (use_R src [dst]) [dst]
+    VSHUFPD     fmt _off src dst
+      -> mkRU fmt (use_R src [dst]) [dst]
+    VSHUFPS     fmt _off src dst
+      -> mkRU fmt (use_R src [dst]) [dst]
+
+    PSLLDQ       fmt off dst -> mkRU fmt (use_R off []) [dst]
 
     _other              -> panic "regUsage: unrecognised instr"
  where
@@ -537,44 +539,44 @@ regUsageOfInstr platform instr
     -- are read.
 
     -- 2 operand form; first operand Read; second Written
-    usageRW :: Operand -> Operand -> RegUsage
-    usageRW op (OpReg reg)      = mkRU (use_R op []) [reg]
-    usageRW op (OpAddr ea)      = mkRUR (use_R op $! use_EA ea [])
-    usageRW _ _                 = panic "X86.RegInfo.usageRW: no match"
+    usageRW :: Format -> Operand -> Operand -> RegUsage
+    usageRW fmt op (OpReg reg)      = mkRU fmt (use_R op []) [reg]
+    usageRW fmt op (OpAddr ea)      = mkRUR fmt (use_R op $! use_EA ea [])
+    usageRW _ _ _                   = panic "X86.RegInfo.usageRW: no match"
 
     -- 2 operand form; first operand Read; second Modified
-    usageRM :: Operand -> Operand -> RegUsage
-    usageRM op (OpReg reg)      = mkRU (use_R op [reg]) [reg]
-    usageRM op (OpAddr ea)      = mkRUR (use_R op $! use_EA ea [])
-    usageRM _ _                 = panic "X86.RegInfo.usageRM: no match"
+    usageRM :: Format -> Operand -> Operand -> RegUsage
+    usageRM fmt op (OpReg reg)      = mkRU fmt (use_R op [reg]) [reg]
+    usageRM fmt op (OpAddr ea)      = mkRUR fmt (use_R op $! use_EA ea [])
+    usageRM _ _ _                   = panic "X86.RegInfo.usageRM: no match"
 
     -- 2 operand form; first operand Modified; second Modified
-    usageMM :: Operand -> Operand -> RegUsage
-    usageMM (OpReg src) (OpReg dst) = mkRU [src, dst] [src, dst]
-    usageMM (OpReg src) (OpAddr ea) = mkRU (use_EA ea [src]) [src]
-    usageMM (OpAddr ea) (OpReg dst) = mkRU (use_EA ea [dst]) [dst]
-    usageMM _ _                     = panic "X86.RegInfo.usageMM: no match"
+    usageMM :: Format -> Operand -> Operand -> RegUsage
+    usageMM fmt (OpReg src) (OpReg dst) = mkRU fmt [src, dst] [src, dst]
+    usageMM fmt (OpReg src) (OpAddr ea) = mkRU fmt (use_EA ea [src]) [src]
+    usageMM fmt (OpAddr ea) (OpReg dst) = mkRU fmt (use_EA ea [dst]) [dst]
+    usageMM _ _ _                       = panic "X86.RegInfo.usageMM: no match"
 
     -- 3 operand form; first operand Read; second Modified; third Modified
-    usageRMM :: Operand -> Operand -> Operand -> RegUsage
-    usageRMM (OpReg src) (OpReg dst) (OpReg reg) = mkRU [src, dst, reg] [dst, reg]
-    usageRMM (OpReg src) (OpAddr ea) (OpReg reg) = mkRU (use_EA ea [src, reg]) [reg]
-    usageRMM _ _ _                               = panic "X86.RegInfo.usageRMM: no match"
+    usageRMM :: Format -> Operand -> Operand -> Operand -> RegUsage
+    usageRMM fmt (OpReg src) (OpReg dst) (OpReg reg) = mkRU fmt [src, dst, reg] [dst, reg]
+    usageRMM fmt (OpReg src) (OpAddr ea) (OpReg reg) = mkRU fmt (use_EA ea [src, reg]) [reg]
+    usageRMM _ _ _ _                                 = panic "X86.RegInfo.usageRMM: no match"
 
     -- 3 operand form of FMA instructions.
-    usageFMA :: Operand -> Reg -> Reg -> RegUsage
-    usageFMA (OpReg src1) src2 dst
-      = mkRU [src1, src2, dst] [dst]
-    usageFMA (OpAddr ea1) src2 dst
-      = mkRU (use_EA ea1 [src2, dst]) [dst]
-    usageFMA _ _ _
+    usageFMA :: Format -> Operand -> Reg -> Reg -> RegUsage
+    usageFMA fmt (OpReg src1) src2 dst
+      = mkRU fmt [src1, src2, dst] [dst]
+    usageFMA fmt (OpAddr ea1) src2 dst
+      = mkRU fmt (use_EA ea1 [src2, dst]) [dst]
+    usageFMA _ _ _ _
       = panic "X86.RegInfo.usageFMA: no match"
 
     -- 1 operand form; operand Modified
-    usageM :: Operand -> RegUsage
-    usageM (OpReg reg)          = mkRU [reg] [reg]
-    usageM (OpAddr ea)          = mkRUR (use_EA ea [])
-    usageM _                    = panic "X86.RegInfo.usageM: no match"
+    usageM :: Format -> Operand -> RegUsage
+    usageM fmt (OpReg reg)        = mkRU fmt [reg] [reg]
+    usageM fmt (OpAddr ea)        = mkRUR fmt (use_EA ea [])
+    usageM _ _                    = panic "X86.RegInfo.usageM: no match"
 
     -- Registers defd when an operand is written.
     def_W (OpReg reg)           = [reg]
@@ -595,10 +597,11 @@ regUsageOfInstr platform instr
               use_index EAIndexNone   tl = tl
               use_index (EAIndex i _) tl = i : tl
 
-    mkRUR src = src' `seq` RU src' []
+    mkRUR fmt src = src' `seq` RU (map (,fmt) src') []
         where src' = filter (interesting platform) src
 
-    mkRU src dst = src' `seq` dst' `seq` RU src' dst'
+
+    mkRU fmt src dst = src' `seq` dst' `seq` RU (map (,fmt) src') (map (,fmt) dst')
         where src' = filter (interesting platform) src
               dst' = filter (interesting platform) dst
 
@@ -817,18 +820,27 @@ patchJumpInstr insn patchF
 mkSpillInstr
     :: NCGConfig
     -> Reg      -- register to spill
+    -> Format
     -> Int      -- current stack delta
     -> Int      -- spill slot to use
     -> [Instr]
 
-mkSpillInstr config reg delta slot
-  = let off     = spillSlotToOffset platform slot - delta
-    in
-    case targetClassOfReg platform reg of
-           RcInteger   -> [MOV (archWordFormat is32Bit)
-                                   (OpReg reg) (OpAddr (spRel platform off))]
-           RcDouble    -> [MOV FF64 (OpReg reg) (OpAddr (spRel platform off))]
-           _         -> panic "X86.mkSpillInstr: no match"
+mkSpillInstr config reg fmt delta slot
+  = let off s = spillSlotToOffset platform s - delta
+    in case fmt of
+        IntegerFormat -> [MOV (archWordFormat is32Bit)
+                                   (OpReg reg) (OpAddr (spRel platform $ off slot))]
+        FF64 -> [MOV FF64 (OpReg reg) (OpAddr (spRel platform $ off slot))]
+        FF32 -> panic "X86_mkSpillInstr: RcFloat"
+        VecFormat {} ->
+          -- SIMD NCG TODO: panic for unsupported VecFormats (& same in mkLoadInstr)
+          [MOV FF64 (OpReg reg) (OpAddr (spRel platform $ off slot))
+           -- Now shuffle the register, putting the high half into the lower half.
+          ,SHUFPD (VecFormat 2 FmtDouble W64) (ImmInt 0b01) (OpReg reg) reg
+            -- NB: this format doesn't matter, we emit the same instruction
+            -- regardless of what is stored...
+            -- SIMD NCG TODO: avoid using MOV by using SHUFPD with an OpAddr argument?
+          ,MOV FF64 (OpReg reg) (OpAddr (spRel platform $ off (slot + 1)))]
     where platform = ncgPlatform config
           is32Bit = target32Bit platform
 
@@ -836,18 +848,23 @@ mkSpillInstr config reg delta slot
 mkLoadInstr
     :: NCGConfig
     -> Reg      -- register to load
+    -> Format
     -> Int      -- current stack delta
     -> Int      -- spill slot to use
     -> [Instr]
 
-mkLoadInstr config reg delta slot
-  = let off     = spillSlotToOffset platform slot - delta
+mkLoadInstr config reg fmt delta slot
+  = let off s     = spillSlotToOffset platform s - delta
     in
-        case targetClassOfReg platform reg of
-              RcInteger -> ([MOV (archWordFormat is32Bit)
-                                 (OpAddr (spRel platform off)) (OpReg reg)])
-              RcDouble  -> ([MOV FF64 (OpAddr (spRel platform off)) (OpReg reg)])
-              _         -> panic "X86.mkLoadInstr"
+        case fmt of
+              IntegerFormat -> ([MOV (archWordFormat is32Bit)
+                                 (OpAddr (spRel platform $ off slot)) (OpReg reg)])
+              FF64  -> ([MOV FF64 (OpAddr (spRel platform $ off slot)) (OpReg reg)])
+              FF32   -> panic "X86.mkLoadInstr RcFloat"
+              VecFormat {} ->
+                [MOVH (VecFormat 2 FmtDouble W64) (OpAddr (spRel platform $ off (slot + 1))) (OpReg reg)
+                ,MOVL (VecFormat 2 FmtDouble W64) (OpAddr (spRel platform $ off slot)) (OpReg reg)]
+
     where platform = ncgPlatform config
           is32Bit = target32Bit platform
 


=====================================
compiler/GHC/Platform/Reg.hs
=====================================
@@ -116,17 +116,12 @@ renameVirtualReg u r
 
 classOfVirtualReg :: VirtualReg -> RegClass
 classOfVirtualReg vr
- = case vr of
+  = case vr of
         VirtualRegI{}   -> RcInteger
         VirtualRegHi{}  -> RcInteger
         VirtualRegF{}   -> RcFloat
         VirtualRegD{}   -> RcDouble
-        -- Below is an awful, largely x86-specific hack
-        VirtualRegVec{} -> RcDouble
-        -- SIMD NCG TODO: this seems very wrong and potentially the source of
-        -- bug #16927, because we use this function to determine how to spill
-        -- the contents of a virtual register
-        -- (see e.g. GHC.CmmToAsm.X86.Instr.mkSpillInstr).
+        VirtualRegVec{} -> RcVector128
 
 
 -- Determine the upper-half vreg for a 64-bit quantity on a 32-bit platform


=====================================
compiler/GHC/Platform/Reg/Class.hs
=====================================
@@ -1,3 +1,4 @@
+{-# LANGUAGE LambdaCase #-}
 -- | An architecture independent description of a register's class.
 module GHC.Platform.Reg.Class
         ( RegClass (..)
@@ -18,21 +19,26 @@ import GHC.Builtin.Uniques
 --      We treat all registers in a class as being interchangeable.
 --
 data RegClass
-        = RcInteger
-        | RcFloat
-        | RcDouble
-        deriving (Eq, Show)
+  = RcInteger
+  | RcFloat
+  | RcDouble
+  | RcVector128
+  deriving (Eq, Ord, Show)
 
 allRegClasses :: [RegClass]
 allRegClasses =
-  [RcInteger, RcFloat, RcDouble]
+  [ RcInteger, RcFloat, RcDouble, RcVector128 ]
 
 instance Uniquable RegClass where
-    getUnique RcInteger = mkRegClassUnique 0
-    getUnique RcFloat   = mkRegClassUnique 1
-    getUnique RcDouble  = mkRegClassUnique 2
+    getUnique = \case
+      RcInteger   -> mkRegClassUnique 0
+      RcFloat     -> mkRegClassUnique 1
+      RcDouble    -> mkRegClassUnique 2
+      RcVector128 -> mkRegClassUnique 3
 
 instance Outputable RegClass where
-    ppr RcInteger       = Outputable.text "I"
-    ppr RcFloat         = Outputable.text "F"
-    ppr RcDouble        = Outputable.text "D"
+    ppr = \case
+      RcInteger   -> Outputable.text "I"
+      RcFloat     -> Outputable.text "F"
+      RcDouble    -> Outputable.text "D"
+      RcVector128 -> Outputable.text "V"



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/b2ca03a3e8080aa16efbab292bab7716bcf27dd3

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/b2ca03a3e8080aa16efbab292bab7716bcf27dd3
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240606/027da1d1/attachment-0001.html>


More information about the ghc-commits mailing list