[Git][ghc/ghc][wip/supersven/riscv-vectors] 12 commits: Add comment about vector registers in allocatableRegs

Sven Tennie (@supersven) gitlab at gitlab.haskell.org
Sat Jan 25 16:35:55 UTC 2025



Sven Tennie pushed to branch wip/supersven/riscv-vectors at Glasgow Haskell Compiler / GHC


Commits:
80022d79 by Sven Tennie at 2025-01-10T18:19:00+01:00
Add comment about vector registers in allocatableRegs

- - - - -
3312b409 by Sven Tennie at 2025-01-10T18:19:29+01:00
Formatting

- - - - -
5c1e6851 by Sven Tennie at 2025-01-10T18:48:04+01:00
Adjust TODO

- - - - -
a51c11f8 by Sven Tennie at 2025-01-19T16:14:29+01:00
Delete unused function

- - - - -
b2fd45e2 by Sven Tennie at 2025-01-25T15:05:34+01:00
WIP: Use Format instead of Width in OpReg Operand

Ints, floats and vectors are very different things. So, it is very
helpful to know to which of this three an OpReg Operand relates.

- - - - -
4bcb4ddd by Sven Tennie at 2025-01-25T15:15:27+01:00
Simplify VID

- - - - -
4ddc51ae by Sven Tennie at 2025-01-25T15:23:27+01:00
Simplify VMV

- - - - -
153016bd by Sven Tennie at 2025-01-25T15:37:48+01:00
Simplify VMERGE

- - - - -
90cea4e6 by Sven Tennie at 2025-01-25T15:42:54+01:00
Simplify VSLIDEDOWN

- - - - -
52b65116 by Sven Tennie at 2025-01-25T15:56:41+01:00
Simplify other vector instructions

- - - - -
c2fb7c65 by Sven Tennie at 2025-01-25T16:02:39+01:00
Simplify VFMA

- - - - -
715a80b1 by Sven Tennie at 2025-01-25T17:34:57+01:00
regUsageOfInstr with correct format

- - - - -


6 changed files:

- compiler/GHC/CmmToAsm/Format.hs
- compiler/GHC/CmmToAsm/RV64.hs
- compiler/GHC/CmmToAsm/RV64/CodeGen.hs
- compiler/GHC/CmmToAsm/RV64/Instr.hs
- compiler/GHC/CmmToAsm/RV64/Ppr.hs
- compiler/GHC/CmmToAsm/RV64/Regs.hs


Changes:

=====================================
compiler/GHC/CmmToAsm/Format.hs
=====================================
@@ -22,6 +22,8 @@ module GHC.CmmToAsm.Format (
     intScalarFormat,
     isFloatFormat,
     vecFormat,
+    floatVecFormat,
+    intVecFormat,
     isVecFormat,
     cmmTypeFormat,
     formatToWidth,
@@ -210,6 +212,12 @@ vecFormat ty =
              W64 -> VecFormat l FmtInt64
              _   -> pprPanic "Incorrect vector element width" (ppr elemTy)
 
+floatVecFormat :: Int -> Width -> Format
+floatVecFormat length width = vecFormat (cmmVec length (cmmFloat width))
+
+intVecFormat :: Int -> Width -> Format
+intVecFormat length width = vecFormat (cmmVec length (cmmBits width))
+
 -- | Check if a format represents a vector
 isVecFormat :: Format -> Bool
 isVecFormat (VecFormat {}) = True


=====================================
compiler/GHC/CmmToAsm/RV64.hs
=====================================
@@ -49,7 +49,7 @@ instance Instruction RV64.Instr where
   mkLoadInstr = RV64.mkLoadInstr
   takeDeltaInstr = RV64.takeDeltaInstr
   isMetaInstr = RV64.isMetaInstr
-  mkRegRegMoveInstr _ _ = RV64.mkRegRegMoveInstr
+  mkRegRegMoveInstr _ = RV64.mkRegRegMoveInstr 
   takeRegRegMoveInstr _ = RV64.takeRegRegMoveInstr
   mkJumpInstr = RV64.mkJumpInstr
   mkStackAllocInstr = RV64.mkStackAllocInstr


=====================================
compiler/GHC/CmmToAsm/RV64/CodeGen.hs
=====================================
@@ -290,13 +290,13 @@ genSwitch config expr targets = do
           `appOL` toOL
             [ COMMENT (ftext "Jump table for switch"),
               -- index to offset into the table (relative to tableReg)
-              annExpr expr (SLL (OpReg (formatToWidth fmt1) reg) (OpReg (formatToWidth fmt1) reg) (OpImm (ImmInt 3))),
+              annExpr expr (SLL (OpReg fmt1 reg) (OpReg fmt1 reg) (OpImm (ImmInt 3))),
               -- calculate table entry address
-              ADD (OpReg W64 targetReg) (OpReg (formatToWidth fmt1) reg) (OpReg (formatToWidth fmt2) tableReg),
+              ADD (OpReg II64 targetReg) (OpReg fmt1 reg) (OpReg fmt2 tableReg),
               -- load table entry (relative offset from tableReg (first entry) to target label)
-              LDRU II64 (OpReg W64 targetReg) (OpAddr (AddrRegImm targetReg (ImmInt 0))),
+              LDRU II64 (OpReg II64 targetReg) (OpAddr (AddrRegImm targetReg (ImmInt 0))),
               -- calculate absolute address of the target label
-              ADD (OpReg W64 targetReg) (OpReg W64 targetReg) (OpReg W64 tableReg),
+              ADD (OpReg II64 targetReg) (OpReg II64 targetReg) (OpReg II64 tableReg),
               -- prepare jump to target label
               J_TBL ids (Just lbl) targetReg
             ]
@@ -575,80 +575,93 @@ getRegister' config plat expr =
           -- narrowU is important: Negative immediates may be
           -- sign-extended on load!
           let imm = OpImm . ImmInteger $ narrowU w i
-           in pure (Any (intFormat w) (\dst -> unitOL $ annExpr expr (MOV (OpReg w dst) imm)))
+              format = intFormat w
+           in pure $ Any format (\dst -> unitOL $ annExpr expr (MOV (OpReg format dst) imm))
         CmmFloat 0 w -> do
           let op = litToImm' lit
-          pure (Any (floatFormat w) (\dst -> unitOL $ annExpr expr (MOV (OpReg w dst) op)))
+              format = floatFormat w
+          pure $ Any format (\dst -> unitOL $ annExpr expr (MOV (OpReg format dst) op))
         CmmFloat _f W8 -> pprPanic "getRegister' (CmmLit:CmmFloat), no support for bytes" (pdoc plat expr)
         CmmFloat _f W16 -> pprPanic "getRegister' (CmmLit:CmmFloat), no support for halfs" (pdoc plat expr)
         CmmFloat f W32 -> do
+          -- TODO: Besides width, much duplication with the W64 case!
           let word = castFloatToWord32 (fromRational f) :: Word32
-          intReg <- getNewRegNat (intFormat W32)
+              format_int = intFormat W32
+              format_dst = floatFormat W32
+          intReg <- getNewRegNat format_int
           return
             ( Any
-                (floatFormat W32)
+                format_dst
                 ( \dst ->
                     toOL
                       [ annExpr expr
-                          $ MOV (OpReg W32 intReg) (OpImm (ImmInteger (fromIntegral word))),
-                        MOV (OpReg W32 dst) (OpReg W32 intReg)
+                          $ MOV (OpReg format_int intReg) (OpImm (ImmInteger (fromIntegral word))),
+                        MOV (OpReg format_dst dst) (OpReg format_int intReg)
                       ]
                 )
             )
         CmmFloat f W64 -> do
           let word = castDoubleToWord64 (fromRational f) :: Word64
-          intReg <- getNewRegNat (intFormat W64)
+              format_int = intFormat W64
+              format_dst = floatFormat W64
+          intReg <- getNewRegNat format_int
           return
             ( Any
-                (floatFormat W64)
+                format_dst
                 ( \dst ->
                     toOL
                       [ annExpr expr
-                          $ MOV (OpReg W64 intReg) (OpImm (ImmInteger (fromIntegral word))),
-                        MOV (OpReg W64 dst) (OpReg W64 intReg)
+                          $ MOV (OpReg format_int intReg) (OpImm (ImmInteger (fromIntegral word))),
+                        MOV (OpReg format_dst dst) (OpReg format_int intReg)
                       ]
                 )
             )
         CmmFloat _f _w -> pprPanic "getRegister' (CmmLit:CmmFloat), unsupported float lit" (pdoc plat expr)
-
-        CmmVec lits |
-          VecFormat l sFmt <- cmmTypeFormat $ cmmLitType plat lit
-          , (f:fs) <- lits
-          , all (== f) fs ->  do
+        CmmVec lits
+          | VecFormat l sFmt <- cmmTypeFormat $ cmmLitType plat lit,
+            (f : fs) <- lits,
+            all (== f) fs -> do
               -- All vector elements are equal literals -> broadcast (splat)
               let w = scalarWidth sFmt
-                  broadcast = if isFloatScalarFormat sFmt
-                              then MO_VF_Broadcast l w
-                              else MO_V_Broadcast l w
+                  broadcast =
+                    if isFloatScalarFormat sFmt
+                      then MO_VF_Broadcast l w
+                      else MO_V_Broadcast l w
                   fmt = cmmTypeFormat $ cmmLitType plat lit
-              (reg, format,code) <- getSomeReg $ CmmMachOp broadcast [CmmLit f]
-              return $ Any fmt (\dst -> code `snocOL` annExpr expr
-                                          (MOV (OpReg w dst) (OpReg (formatToWidth format) reg)))
-
+              (reg, format, code) <- getSomeReg $ CmmMachOp broadcast [CmmLit f]
+              return
+                $ Any
+                  fmt
+                  ( \dst ->
+                      code
+                        `snocOL` annExpr
+                          expr
+                          (MOV (OpReg fmt dst) (OpReg format reg))
+                  )
         CmmVec _lits -> pprPanic "getRegister' (CmmLit:CmmVec): " (pdoc plat expr)
         CmmLabel lbl -> do
           let op = OpImm (ImmCLbl lbl)
               rep = cmmLitType plat lit
               format = cmmTypeFormat rep
-          return (Any format (\dst -> unitOL $ annExpr expr (LDR format (OpReg (formatToWidth format) dst) op)))
+          return (Any format (\dst -> unitOL $ annExpr expr (LDR format (OpReg format dst) op)))
         CmmLabelOff lbl off | isNbitEncodeable 12 (fromIntegral off) -> do
           let op = OpImm (ImmIndex lbl off)
               rep = cmmLitType plat lit
               format = cmmTypeFormat rep
-          return (Any format (\dst -> unitOL $ LDR format (OpReg (formatToWidth format) dst) op))
+          return (Any format (\dst -> unitOL $ LDR format (OpReg format dst) op))
         CmmLabelOff lbl off -> do
           let op = litToImm' (CmmLabel lbl)
               rep = cmmLitType plat lit
               format = cmmTypeFormat rep
               width = typeWidth rep
-          (off_r, _off_format, off_code) <- getSomeReg $ CmmLit (CmmInt (fromIntegral off) width)
+          (off_r, off_format, off_code) <- getSomeReg $ CmmLit (CmmInt (fromIntegral off) width)
           return
             ( Any
                 format
                 ( \dst ->
                     off_code
-                      `snocOL` LDR format (OpReg (formatToWidth format) dst) op
-                      `snocOL` ADD (OpReg width dst) (OpReg width dst) (OpReg width off_r)
+                      `snocOL` LDR format (OpReg format dst) op
+                      `snocOL` ADD (OpReg format dst) (OpReg format dst) (OpReg off_format off_r)
                 )
             )
         CmmLabelDiffOff {} -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (pdoc plat expr)
@@ -670,7 +683,7 @@ getRegister' config plat expr =
                           annExpr expr
                             -- We pattern match on the format in the pretty-printer.
                             -- So, we can here simply emit LDRU for all vectors.
-                            (LDRU format (OpReg width dst) (OpAddr addr))
+                            (LDRU format (OpReg format dst) (OpAddr addr))
                     )
                 )
         (w, _f)
@@ -681,7 +694,7 @@ getRegister' config plat expr =
                     format
                     ( \dst ->
                         addr_code
-                          `snocOL` LDRU format (OpReg width dst) (OpAddr addr)
+                          `snocOL` LDRU format (OpReg format dst) (OpAddr addr)
                     )
                 )
         _ -> pprPanic ("Width too big! Cannot load: " ++ show width) (pdoc plat expr)
@@ -700,15 +713,16 @@ getRegister' config plat expr =
       where
         width = typeWidth (cmmRegType reg)
     CmmRegOff reg off -> do
-      (off_r, _off_format, off_code) <- getSomeReg $ CmmLit (CmmInt (fromIntegral off) width)
-      (reg, _format, code) <- getSomeReg $ CmmReg reg
+      (off_r, off_format, off_code) <- getSomeReg $ CmmLit (CmmInt (fromIntegral off) width)
+      (reg, reg_format, code) <- getSomeReg $ CmmReg reg
+      let dst_format = intFormat width
       return
         $ Any
-          (intFormat width)
+          dst_format
           ( \dst ->
               off_code
                 `appOL` code
-                `snocOL` ADD (OpReg width dst) (OpReg width reg) (OpReg width off_r)
+                `snocOL` ADD (OpReg dst_format dst) (OpReg reg_format reg) (OpReg off_format off_r)
           )
       where
         width = typeWidth (cmmRegType reg)
@@ -720,134 +734,159 @@ getRegister' config plat expr =
     -- for MachOps, see GHC.Cmm.MachOp
     -- For CmmMachOp, see GHC.Cmm.Expr
     CmmMachOp op [e] -> do
-      (reg, _format, code) <- getSomeReg e
+      (e_reg, e_format, e_code) <- getSomeReg e
       case op of
-        MO_Not w -> return $ Any (intFormat w) $ \dst ->
-          let w' = opRegWidth w
-           in code
-                `snocOL`
-                -- pseudo instruction `not` is `xori rd, rs, -1`
-                ann (text "not") (XORI (OpReg w' dst) (OpReg w' reg) (OpImm (ImmInt (-1))))
-                `appOL` truncateReg w' w dst -- See Note [Signed arithmetic on RISCV64]
-        MO_S_Neg w -> negate code w reg
+        MO_Not w ->
+          let format_dst = intFormat w
+           in pure $ Any format_dst $ \dst ->
+                let w' = opRegWidth w
+                 in e_code
+                      `snocOL`
+                      -- pseudo instruction `not` is `xori rd, rs, -1`
+                      ann (text "not") (XORI (OpReg format_dst dst) (OpReg e_format e_reg) (OpImm (ImmInt (-1))))
+                      `appOL` truncateReg w' w dst -- See Note [Signed arithmetic on RISCV64]
+        MO_S_Neg w -> negate e_code w e_reg
         MO_F_Neg w ->
-          return
-            $ Any
-              (floatFormat w)
-              ( \dst ->
-                  code
-                    `snocOL` NEG (OpReg w dst) (OpReg w reg)
-              )
+          let format = floatFormat w
+           in return
+                $ Any
+                  format
+                  ( \dst ->
+                      e_code
+                        `snocOL` NEG (OpReg format dst) (OpReg e_format e_reg)
+                  )
         -- TODO: Can this case happen?
         MO_SF_Round from to | from < W32 -> do
           -- extend to the smallest available representation
-          (reg_x, code_x) <- signExtendReg from W32 reg
+          (reg_x, code_x) <- signExtendReg from W32 e_reg
+          let format = floatFormat to
           pure
             $ Any
-              (floatFormat to)
+              format
               ( \dst ->
-                  code
+                  e_code
                     `appOL` code_x
-                    `snocOL` annExpr expr (FCVT IntToFloat (OpReg to dst) (OpReg from reg_x)) -- (Signed ConVerT Float)
+                    `snocOL` annExpr expr (FCVT IntToFloat (OpReg format dst) (OpReg (intFormat from) reg_x)) -- (Signed ConVerT Float)
               )
         MO_SF_Round from to ->
-          pure
-            $ Any
-              (floatFormat to)
-              ( \dst ->
-                  code
-                    `snocOL` annExpr expr (FCVT IntToFloat (OpReg to dst) (OpReg from reg)) -- (Signed ConVerT Float)
-              )
+          let toFmt = floatFormat to
+              fromFmt = intFormat from
+           in pure
+                $ Any
+                  (floatFormat to)
+                  ( \dst ->
+                      e_code
+                        `snocOL` annExpr expr (FCVT IntToFloat (OpReg toFmt dst) (OpReg fromFmt e_reg)) -- (Signed ConVerT Float)
+                  )
         -- TODO: Can this case happen?
         MO_FS_Truncate from to
           | to < W32 ->
-              pure
+              let toFmt = intFormat to
+                  fromFmt = floatFormat from
+               in pure
+                    $ Any
+                      toFmt
+                      ( \dst ->
+                          e_code
+                            `snocOL`
+                            -- W32 is the smallest width to convert to. Decrease width afterwards.
+                            annExpr expr (FCVT FloatToInt (OpReg II32 dst) (OpReg fromFmt e_reg))
+                            `appOL` signExtendAdjustPrecission W32 to dst dst -- (float convert (-> zero) signed)
+                      )
+        MO_FS_Truncate from to ->
+          let toFmt = intFormat to
+              fromFmt = floatFormat from
+           in pure
                 $ Any
-                  (intFormat to)
+                  toFmt
                   ( \dst ->
-                      code
-                        `snocOL`
-                        -- W32 is the smallest width to convert to. Decrease width afterwards.
-                        annExpr expr (FCVT FloatToInt (OpReg W32 dst) (OpReg from reg))
-                        `appOL` signExtendAdjustPrecission W32 to dst dst -- (float convert (-> zero) signed)
+                      e_code
+                        `snocOL` annExpr expr (FCVT FloatToInt (OpReg toFmt dst) (OpReg fromFmt e_reg))
+                        `appOL` truncateReg from to dst -- (float convert (-> zero) signed)
                   )
-        MO_FS_Truncate from to ->
-          pure
-            $ Any
-              (intFormat to)
-              ( \dst ->
-                  code
-                    `snocOL` annExpr expr (FCVT FloatToInt (OpReg to dst) (OpReg from reg))
-                    `appOL` truncateReg from to dst -- (float convert (-> zero) signed)
-              )
         MO_UU_Conv from to
           | from <= to ->
-              pure
+              let toFmt = intFormat to
+                  fromFmt = intFormat from
+               in pure
+                    $ Any
+                      toFmt
+                      ( \dst ->
+                          e_code
+                            `snocOL` annExpr e (MOV (OpReg toFmt dst) (OpReg fromFmt e_reg))
+                      )
+        MO_UU_Conv from to ->
+          let toFmt = intFormat to
+              fromFmt = intFormat from
+           in pure
                 $ Any
-                  (intFormat to)
+                  toFmt
                   ( \dst ->
-                      code
-                        `snocOL` annExpr e (MOV (OpReg to dst) (OpReg from reg))
+                      e_code
+                        `snocOL` annExpr e (MOV (OpReg fromFmt dst) (OpReg fromFmt e_reg))
+                        `appOL` truncateReg from to dst
                   )
-        MO_UU_Conv from to ->
-          pure
-            $ Any
-              (intFormat to)
-              ( \dst ->
-                  code
-                    `snocOL` annExpr e (MOV (OpReg from dst) (OpReg from reg))
-                    `appOL` truncateReg from to dst
-              )
-        MO_SS_Conv from to -> ss_conv from to reg code
-        MO_FF_Conv from to -> return $ Any (floatFormat to) (\dst -> code `snocOL` annExpr e (FCVT FloatToFloat (OpReg to dst) (OpReg from reg)))
-        MO_WF_Bitcast w    -> return $ Any (floatFormat w)  (\dst -> code `snocOL` MOV (OpReg w dst) (OpReg w reg))
-        MO_FW_Bitcast w    -> return $ Any (intFormat w)    (\dst -> code `snocOL` MOV (OpReg w dst) (OpReg w reg))
-
+        MO_SS_Conv from to -> ss_conv from to e_reg e_code
+        MO_FF_Conv from to ->
+          let toFmt = floatFormat to
+              fromFmt = floatFormat from
+           in pure $ Any toFmt (\dst -> e_code `snocOL` annExpr e (FCVT FloatToFloat (OpReg toFmt dst) (OpReg fromFmt e_reg)))
+        MO_WF_Bitcast w ->
+          let toFmt = floatFormat w
+              fromFmt = intFormat w
+           in pure $ Any toFmt (\dst -> e_code `snocOL` MOV (OpReg toFmt dst) (OpReg fromFmt e_reg))
+        MO_FW_Bitcast w ->
+          let toFmt = intFormat w
+              fromFmt = floatFormat w
+           in pure $ Any toFmt (\dst -> e_code `snocOL` MOV (OpReg toFmt dst) (OpReg fromFmt e_reg))
         -- Conversions
         -- TODO: Duplication with MO_UU_Conv
         MO_XX_Conv from to
           | to < from ->
-              pure
-                $ Any
-                  (intFormat to)
-                  ( \dst ->
-                      code
-                        `snocOL` annExpr e (MOV (OpReg from dst) (OpReg from reg))
-                        `appOL` truncateReg from to dst
-                  )
+              let toFmt = intFormat to
+                  fromFmt = intFormat from
+               in pure
+                    $ Any
+                      toFmt 
+                      ( \dst ->
+                          e_code
+                            `snocOL` annExpr e (MOV (OpReg fromFmt dst) (OpReg fromFmt e_reg))
+                            `appOL` truncateReg from to dst
+                      )
         MO_XX_Conv _from to -> swizzleRegisterRep (intFormat to) <$> getRegister e
         MO_AlignmentCheck align wordWidth -> do
           reg <- getRegister' config plat e
           addAlignmentCheck align wordWidth reg
 
-        --TODO: MO_V_Broadcast with immediate: If the right value is a literal,
+        -- TODO: MO_V_Broadcast with immediate: If the right value is a literal,
         -- it may use vmv.v.i (simpler)
         -- TODO: Duplication with MO_VF_Broadcast
         MO_V_Broadcast length w -> do
           (reg_val, format_val, code_val) <- getSomeReg e
-          let w_val = formatToWidth format_val
-          pure $ Any (vecFormat (cmmVec length (cmmBits w))) $ \dst ->
-            code_val `snocOL`
-            annExpr expr
-              (VMV (VecFormat length (intScalarFormat w)) (OpReg w dst) (OpReg w_val reg_val))
-
+          let toFmt = VecFormat length (intScalarFormat w)
+          pure $ Any toFmt $ \dst ->
+            code_val
+              `snocOL` annExpr
+                expr
+                (VMV (OpReg toFmt dst) (OpReg format_val reg_val))
         MO_VF_Broadcast length w -> do
           (reg_val, format_val, code_val) <- getSomeReg e
-          let w_val = formatToWidth format_val
+          let toFmt = VecFormat length (floatScalarFormat w)
           pure $ Any (vecFormat (cmmVec length (cmmFloat w))) $ \dst ->
-            code_val `snocOL`
-            annExpr expr
-              (VMV (VecFormat length (floatScalarFormat w)) (OpReg w dst) (OpReg w_val reg_val))
+            code_val
+              `snocOL` annExpr
+                expr
+                (VMV (OpReg toFmt dst) (OpReg format_val reg_val))
 
         -- TODO: NO MO_V_Neg? Why?
         MO_VF_Neg length w -> do
           (reg_v, format_v, code_v) <- getSomeReg e
-          let w_v = formatToWidth format_v
-          pure $ Any (vecFormat (cmmVec length (cmmFloat w))) $ \dst ->
-            code_v `snocOL`
-            annExpr expr
-              (VNEG (VecFormat length (floatScalarFormat w)) (OpReg w dst) (OpReg w_v reg_v))
-
+          let toFmt = VecFormat length (floatScalarFormat w)
+          pure $ Any toFmt $ \dst ->
+            code_v
+              `snocOL` annExpr
+                expr
+                (VNEG (OpReg toFmt dst) (OpReg format_v reg_v))
         x -> pprPanic ("getRegister' (monadic CmmMachOp): " ++ show x) (pdoc plat expr)
       where
         -- In the case of 16- or 8-bit values we need to sign-extend to 32-bits
@@ -855,10 +894,11 @@ getRegister' config plat expr =
         negate code w reg = do
           let w' = opRegWidth w
           (reg', code_sx) <- signExtendReg w w' reg
-          return $ Any (intFormat w) $ \dst ->
+          let fmt = intFormat w
+          return $ Any fmt $ \dst ->
             code
               `appOL` code_sx
-              `snocOL` NEG (OpReg w' dst) (OpReg w' reg')
+              `snocOL` NEG (OpReg fmt dst) (OpReg fmt reg')
               `appOL` truncateReg w' w dst
 
         ss_conv from to reg code
@@ -868,20 +908,23 @@ getRegister' config plat expr =
                   `appOL` signExtend from to reg dst
                   `appOL` truncateReg from to dst
           | from > to =
-              pure $ Any (intFormat to) $ \dst ->
-                code
-                  `appOL` toOL
-                    [ ann
-                        (text "MO_SS_Conv: narrow register signed" <+> ppr reg <+> ppr from <> text "->" <> ppr to)
-                        (SLL (OpReg to dst) (OpReg from reg) (OpImm (ImmInt shift))),
-                      -- signed right shift
-                      SRA (OpReg to dst) (OpReg to dst) (OpImm (ImmInt shift))
-                    ]
-                  `appOL` truncateReg from to dst
+              let fromFmt = intFormat from
+                  toFmt = intFormat to
+               in pure $ Any toFmt $ \dst ->
+                    code
+                      `appOL` toOL
+                        [ ann
+                            (text "MO_SS_Conv: narrow register signed" <+> ppr reg <+> ppr from <> text "->" <> ppr to)
+                            (SLL (OpReg toFmt dst) (OpReg fromFmt reg) (OpImm (ImmInt shift))),
+                          -- signed right shift
+                          SRA (OpReg toFmt dst) (OpReg toFmt dst) (OpImm (ImmInt shift))
+                        ]
+                      `appOL` truncateReg from to dst
           | otherwise =
               -- No conversion necessary: Just copy.
-              pure $ Any (intFormat from) $ \dst ->
-                code `snocOL` MOV (OpReg from dst) (OpReg from reg)
+              let fmt = intFormat from
+               in pure $ Any fmt $ \dst ->
+                    code `snocOL` MOV (OpReg fmt dst) (OpReg fmt reg)
           where
             shift = 64 - (widthInBits from - widthInBits to)
 
@@ -901,29 +944,32 @@ getRegister' config plat expr =
     -- 1. Compute Reg +/- n directly.
     --    For Add/Sub we can directly encode 12bits, or 12bits lsl #12.
     CmmMachOp (MO_Add w) [CmmReg reg, CmmLit (CmmInt n _)]
-      | fitsIn12bitImm n -> return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (ADD (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n))))
+      | fitsIn12bitImm n -> return $ Any toFmt (\d -> unitOL $ annExpr expr (ADD (OpReg toFmt d) (OpReg fromFmt r') (OpImm (ImmInteger n))))
       where
         -- TODO: 12bits lsl #12; e.g. lower 12 bits of n are 0; shift n >> 12, and set lsl to #12.
-        w' = formatToWidth (cmmTypeFormat (cmmRegType reg))
         r' = getRegisterReg plat reg
+        toFmt = intFormat w
+        fromFmt = cmmTypeFormat (cmmRegType reg)
     CmmMachOp (MO_Sub w) [CmmReg reg, CmmLit (CmmInt n _)]
-      | fitsIn12bitImm n -> return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (SUB (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n))))
+      | fitsIn12bitImm n -> return $ Any toFmt (\d -> unitOL $ annExpr expr (SUB (OpReg toFmt d) (OpReg fromFmt r') (OpImm (ImmInteger n))))
       where
         -- TODO: 12bits lsl #12; e.g. lower 12 bits of n are 0; shift n >> 12, and set lsl to #12.
-        w' = formatToWidth (cmmTypeFormat (cmmRegType reg))
         r' = getRegisterReg plat reg
+        toFmt = intFormat w
+        fromFmt = cmmTypeFormat (cmmRegType reg)
     CmmMachOp (MO_U_Quot w) [x, y] | w == W8 || w == W16 -> do
       (reg_x, format_x, code_x) <- getSomeReg x
       (reg_y, format_y, code_y) <- getSomeReg y
+      let toFmt = intFormat w
       return
         $ Any
-          (intFormat w)
+          toFmt
           ( \dst ->
               code_x
                 `appOL` truncateReg (formatToWidth format_x) w reg_x
                 `appOL` code_y
                 `appOL` truncateReg (formatToWidth format_y) w reg_y
-                `snocOL` annExpr expr (DIVU (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y))
+                `snocOL` annExpr expr (DIVU (OpReg toFmt dst) (OpReg format_x reg_x) (OpReg format_y reg_y))
           )
 
     -- 2. Shifts. x << n, x >> n.
@@ -931,129 +977,140 @@ getRegister' config plat expr =
       | w == W32,
         0 <= n,
         n < 32 -> do
-          (reg_x, _format_x, code_x) <- getSomeReg x
+          (reg_x, format_x, code_x) <- getSomeReg x
+          let toFmt = intFormat w
           return
             $ Any
-              (intFormat w)
+              toFmt
               ( \dst ->
                   code_x
-                    `snocOL` annExpr expr (SLL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
+                    `snocOL` annExpr expr (SLL (OpReg toFmt dst) (OpReg format_x reg_x) (OpImm (ImmInteger n)))
                     `appOL` truncateReg w w dst
               )
     CmmMachOp (MO_Shl w) [x, CmmLit (CmmInt n _)]
       | w == W64,
         0 <= n,
         n < 64 -> do
-          (reg_x, _format_x, code_x) <- getSomeReg x
+          (reg_x, format_x, code_x) <- getSomeReg x
+          let toFmt = intFormat w
           return
             $ Any
               (intFormat w)
               ( \dst ->
                   code_x
-                    `snocOL` annExpr expr (SLL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
+                    `snocOL` annExpr expr (SLL (OpReg toFmt dst) (OpReg format_x reg_x) (OpImm (ImmInteger n)))
                     `appOL` truncateReg w w dst
               )
     CmmMachOp (MO_S_Shr w) [x, CmmLit (CmmInt n _)] | fitsIn12bitImm n -> do
       (reg_x, format_x, code_x) <- getSomeReg x
       (reg_x', code_x') <- signExtendReg (formatToWidth format_x) w reg_x
+      let toFmt = intFormat w
       return
         $ Any
-          (intFormat w)
+          toFmt
           ( \dst ->
               code_x
                 `appOL` code_x'
-                `snocOL` annExpr expr (SRA (OpReg w dst) (OpReg w reg_x') (OpImm (ImmInteger n)))
+                `snocOL` annExpr expr (SRA (OpReg toFmt dst) (OpReg format_x reg_x') (OpImm (ImmInteger n)))
           )
     CmmMachOp (MO_S_Shr w) [x, y] -> do
       (reg_x, format_x, code_x) <- getSomeReg x
-      (reg_y, _format_y, code_y) <- getSomeReg y
+      (reg_y, format_y, code_y) <- getSomeReg y
       (reg_x', code_x') <- signExtendReg (formatToWidth format_x) w reg_x
+      let toFmt = intFormat w
       return
         $ Any
-          (intFormat w)
+          toFmt
           ( \dst ->
               code_x
                 `appOL` code_x'
                 `appOL` code_y
-                `snocOL` annExpr expr (SRA (OpReg w dst) (OpReg w reg_x') (OpReg w reg_y))
+                `snocOL` annExpr expr (SRA (OpReg toFmt dst) (OpReg format_x reg_x') (OpReg format_y reg_y))
           )
     CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)]
       | w == W8,
         0 <= n,
         n < 8 -> do
           (reg_x, format_x, code_x) <- getSomeReg x
+          let toFmt = intFormat w
           return
             $ Any
-              (intFormat w)
+              toFmt
               ( \dst ->
                   code_x
                     `appOL` truncateReg (formatToWidth format_x) w reg_x
-                    `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
+                    `snocOL` annExpr expr (SRL (OpReg toFmt dst) (OpReg toFmt reg_x) (OpImm (ImmInteger n)))
               )
     CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)]
       | w == W16,
         0 <= n,
         n < 16 -> do
           (reg_x, format_x, code_x) <- getSomeReg x
+          let toFmt = intFormat w
           return
             $ Any
-              (intFormat w)
+              toFmt
               ( \dst ->
                   code_x
                     `appOL` truncateReg (formatToWidth format_x) w reg_x
-                    `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
+                    `snocOL` annExpr expr (SRL (OpReg toFmt dst) (OpReg toFmt reg_x) (OpImm (ImmInteger n)))
               )
     CmmMachOp (MO_U_Shr w) [x, y] | w == W8 || w == W16 -> do
       (reg_x, format_x, code_x) <- getSomeReg x
-      (reg_y, _format_y, code_y) <- getSomeReg y
+      (reg_y, format_y, code_y) <- getSomeReg y
+      let toFmt = intFormat w
       return
         $ Any
-          (intFormat w)
+          toFmt
           ( \dst ->
               code_x
                 `appOL` code_y
                 `appOL` truncateReg (formatToWidth format_x) w reg_x
-                `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y))
+                `snocOL` annExpr expr (SRL (OpReg toFmt dst) (OpReg format_x reg_x) (OpReg format_y reg_y))
           )
     CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)]
       | w == W32,
         0 <= n,
         n < 32 -> do
-          (reg_x, _format_x, code_x) <- getSomeReg x
+          (reg_x, format_x, code_x) <- getSomeReg x
+          let toFmt = intFormat w
           return
             $ Any
-              (intFormat w)
+              toFmt
               ( \dst ->
                   code_x
-                    `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
+                    `snocOL` annExpr expr (SRL (OpReg toFmt dst) (OpReg format_x reg_x) (OpImm (ImmInteger n)))
               )
     CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)]
       | w == W64,
         0 <= n,
         n < 64 -> do
-          (reg_x, _format_x, code_x) <- getSomeReg x
+          (reg_x, format_x, code_x) <- getSomeReg x
+          let toFmt = intFormat w
           return
             $ Any
-              (intFormat w)
+              toFmt
               ( \dst ->
                   code_x
-                    `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
+                    `snocOL` annExpr expr (SRL (OpReg toFmt dst) (OpReg format_x reg_x) (OpImm (ImmInteger n)))
               )
 
     -- 3. Logic &&, ||
     CmmMachOp (MO_And w) [CmmReg reg, CmmLit (CmmInt n _)]
       | fitsIn12bitImm n ->
-          return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (AND (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n))))
+          return $ Any toFmt (\d -> unitOL $ annExpr expr (AND (OpReg toFmt d) (OpReg fromFmt r') (OpImm (ImmInteger n))))
       where
-        w' = formatToWidth (cmmTypeFormat (cmmRegType reg))
         r' = getRegisterReg plat reg
+        toFmt = intFormat w
+        fromFmt = (cmmTypeFormat (cmmRegType reg))
     CmmMachOp (MO_Or w) [CmmReg reg, CmmLit (CmmInt n _)]
       | fitsIn12bitImm n ->
-          return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (ORI (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n))))
+          return $ Any toFmt (\d -> unitOL $ annExpr expr (ORI (OpReg toFmt d) (OpReg fromFmt r') (OpImm (ImmInteger n))))
       where
         w' = formatToWidth (cmmTypeFormat (cmmRegType reg))
         r' = getRegisterReg plat reg
-
+        toFmt = intFormat w
+        fromFmt = (cmmTypeFormat (cmmRegType reg))
     -- Generic binary case.
     CmmMachOp op [x, y] -> do
       let -- A "plain" operation.
@@ -1064,13 +1121,14 @@ getRegister' config plat expr =
             (reg_x, format_x, code_x) <- getSomeReg x
             (reg_y, format_y, code_y) <- getSomeReg y
             massertPpr (isIntFormat format_x == isIntFormat format_y) $ text "bitOp: incompatible"
+            let toFmt = intFormat w
             return
               $ Any
-                (intFormat w)
+                toFmt
                 ( \dst ->
                     code_x
                       `appOL` code_y
-                      `appOL` op (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y)
+                      `appOL` op (OpReg toFmt dst) (OpReg format_x reg_x) (OpReg format_y reg_y)
                 )
 
           -- A (potentially signed) integer operation.
@@ -1087,6 +1145,7 @@ getRegister' config plat expr =
             -- This is the width of the registers on which the operation
             -- should be performed.
             let w' = opRegWidth w
+                fmt' = intFormat w'
                 signExt r
                   | not is_signed = return (r, nilOL)
                   | otherwise = signExtendReg w w' r
@@ -1099,19 +1158,20 @@ getRegister' config plat expr =
                 -- sign-extend both operands
                 code_x_sx
                 `appOL` code_y_sx
-                `appOL` op (OpReg w' dst) (OpReg w' reg_x_sx) (OpReg w' reg_y_sx)
+                `appOL` op (OpReg fmt' dst) (OpReg fmt' reg_x_sx) (OpReg fmt' reg_y_sx)
                 `appOL` truncateReg w' w dst -- truncate back to the operand's original width
           floatOp w op = do
             (reg_fx, format_x, code_fx) <- getFloatReg x
             (reg_fy, format_y, code_fy) <- getFloatReg y
             massertPpr (isFloatFormat format_x && isFloatFormat format_y) $ text "floatOp: non-float"
+            let format_dst = floatFormat w
             return
               $ Any
-                (floatFormat w)
+                format_dst
                 ( \dst ->
                     code_fx
                       `appOL` code_fy
-                      `appOL` op (OpReg w dst) (OpReg w reg_fx) (OpReg w reg_fy)
+                      `appOL` op (OpReg format_dst dst) (OpReg format_x reg_fx) (OpReg format_x reg_fy)
                 )
 
           -- need a special one for conditionals, as they return ints
@@ -1119,25 +1179,29 @@ getRegister' config plat expr =
             (reg_fx, format_x, code_fx) <- getFloatReg x
             (reg_fy, format_y, code_fy) <- getFloatReg y
             massertPpr (isFloatFormat format_x && isFloatFormat format_y) $ text "floatCond: non-float"
+            let format_dst = intFormat w
             return
               $ Any
-                (intFormat w)
+                format_dst
                 ( \dst ->
                     code_fx
                       `appOL` code_fy
-                      `appOL` op (OpReg w dst) (OpReg w reg_fx) (OpReg w reg_fy)
+                      `appOL` op (OpReg format_dst dst) (OpReg format_x reg_fx) (OpReg format_y reg_fy)
                 )
 
-          vecOp length w op =  do
+          vecOp format op = do
             (reg_x, format_x, code_x) <- getSomeReg x
             (reg_y, format_y, code_y) <- getSomeReg y
-            massertPpr (isVecFormat format_x && isVecFormat format_y) $
-              text "vecOp: non-vector operand. operands: " <+> ppr format_x <+> ppr format_y
-            pure $ Any (vecFormat (cmmVec length (cmmFloat w))) $ \dst ->
-              code_x `appOL`
-              code_y `snocOL`
-              annExpr expr
-                (op (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y))
+            massertPpr (isVecFormat format_x && isVecFormat format_y)
+              $ text "vecOp: non-vector operand. operands: "
+              <+> ppr format_x
+              <+> ppr format_y
+            pure $ Any format $ \dst ->
+              code_x
+                `appOL` code_y
+                `snocOL` annExpr
+                  expr
+                  (op (OpReg format dst) (OpReg format_x reg_x) (OpReg format_y reg_y))
 
       case op of
         -- Integer operations
@@ -1194,56 +1258,56 @@ getRegister' config plat expr =
         MO_VF_Extract length w -> do
           (reg_v, format_v, code_v) <- getSomeReg x
           (reg_idx, format_idx, code_idx) <- getSomeReg y
-          let tmpFormat = VecFormat length (floatScalarFormat w)
-              width_v = formatToWidth format_v
-          tmp <- getNewRegNat tmpFormat
-          pure $ Any (floatFormat w) $ \dst ->
-            code_v `appOL`
-            code_idx `snocOL`
-            -- Setup
-            -- TODO: Use width
-            annExpr expr
-            -- Move selected element to index 0
-            -- vslidedown.vi v8, v9, 2
-              (VSLIDEDOWN (VecFormat length (floatScalarFormat w)) (OpReg width_v tmp) (OpReg width_v reg_v) (OpReg (formatToWidth format_idx) reg_idx)) `snocOL`
-            -- Move to float register
-            -- vmv.x.s a0, v8
-            VMV (VecFormat length (floatScalarFormat w)) (OpReg w dst) (OpReg (formatToWidth tmpFormat) tmp)
+          let format_dst = floatFormat w
+          tmp <- getNewRegNat format_v
+          pure $ Any format_dst $ \dst ->
+            code_v
+              `appOL` code_idx
+              `snocOL`
+              -- Setup
+              -- TODO: Use width
+              annExpr
+                expr
+                -- Move selected element to index 0
+                -- vslidedown.vi v8, v9, 2
+                (VSLIDEDOWN (OpReg format_v tmp) (OpReg format_v reg_v) (OpReg format_idx reg_idx))
+              `snocOL`
+              -- Move to float register
+              -- vmv.x.s a0, v8
+              VMV (OpReg format_dst dst) (OpReg format_v tmp)
 
         -- TODO: Duplication with MO_VF_Extract
         MO_V_Extract length w -> do
           (reg_v, format_v, code_v) <- getSomeReg x
           (reg_idx, format_idx, code_idx) <- getSomeReg y
-          let tmpFormat = VecFormat length (intScalarFormat w)
-              width_v = formatToWidth format_v
-          tmp <- getNewRegNat tmpFormat
-          pure $ Any (intFormat w) $ \dst ->
-            code_v `appOL`
-            code_idx `snocOL`
-            -- Setup
-            -- TODO: Use width
-            annExpr expr
-            -- Move selected element to index 0
-            -- vslidedown.vi v8, v9, 2
-              (VSLIDEDOWN (VecFormat length (intScalarFormat w)) (OpReg width_v tmp) (OpReg width_v reg_v) (OpReg (formatToWidth format_idx) reg_idx)) `snocOL`
-            -- Move to float register
-            -- vmv.x.s a0, v8
-            VMV (VecFormat length (intScalarFormat w)) (OpReg w dst) (OpReg (formatToWidth tmpFormat) tmp)
-
-        MO_VF_Add  length w -> vecOp length w (\d x y -> (VADD (VecFormat length (floatScalarFormat w)) d x y))
-        MO_VF_Sub  length w -> vecOp length w (\d x y -> (VSUB (VecFormat length (floatScalarFormat w)) d x y))
-        MO_VF_Mul  length w -> vecOp length w (\d x y -> (VMUL (VecFormat length (floatScalarFormat w)) d x y))
-        MO_VF_Quot length w -> vecOp length w (\d x y -> (VQUOT (VecFormat length (floatScalarFormat w)) d x y))
-
+          tmp <- getNewRegNat format_v
+          let format_dst = floatFormat w
+          pure $ Any format_dst $ \dst ->
+            code_v
+              `appOL` code_idx
+              `snocOL`
+              -- Setup
+              -- TODO: Use width
+              annExpr
+                expr
+                -- Move selected element to index 0
+                -- vslidedown.vi v8, v9, 2
+                (VSLIDEDOWN (OpReg format_v tmp) (OpReg format_v reg_v) (OpReg format_idx reg_idx))
+              `snocOL`
+              -- Move to float register
+              -- vmv.x.s a0, v8
+              VMV (OpReg format_dst dst) (OpReg format_v tmp)
+        MO_VF_Add length w -> vecOp (floatVecFormat length w) (\d x y -> (VADD d x y))
+        MO_VF_Sub length w -> vecOp (floatVecFormat length w) (\d x y -> (VSUB d x y))
+        MO_VF_Mul length w -> vecOp (floatVecFormat length w) (\d x y -> (VMUL d x y))
+        MO_VF_Quot length w -> vecOp (floatVecFormat length w) (\d x y -> (VQUOT d x y))
         -- See https://godbolt.org/z/PvcWKMKoW
-        MO_VS_Min length w -> vecOp length w (\d x y -> (VSMIN (VecFormat length (intScalarFormat w)) d x y))
-        MO_VS_Max length w -> vecOp length w (\d x y -> (VSMAX (VecFormat length (intScalarFormat w)) d x y))
-        MO_VU_Min length w -> vecOp length w (\d x y -> (VUMIN (VecFormat length (intScalarFormat w)) d x y))
-        MO_VU_Max length w -> vecOp length w (\d x y -> (VUMAX (VecFormat length (intScalarFormat w)) d x y))
-        MO_VF_Min length w -> vecOp length w (\d x y -> (VFMIN (VecFormat length (floatScalarFormat w)) d x y))
-        MO_VF_Max length w -> vecOp length w (\d x y -> (VFMAX (VecFormat length (floatScalarFormat w)) d x y))
-
-
+        MO_VS_Min length w -> vecOp (intVecFormat length w) (\d x y -> (VSMIN d x y))
+        MO_VS_Max length w -> vecOp (intVecFormat length w) (\d x y -> (VSMAX d x y))
+        MO_VU_Min length w -> vecOp (intVecFormat length w) (\d x y -> (VUMIN d x y))
+        MO_VU_Max length w -> vecOp (intVecFormat length w) (\d x y -> (VUMAX d x y))
+        MO_VF_Min length w -> vecOp (floatVecFormat length w) (\d x y -> (VFMIN d x y))
+        MO_VF_Max length w -> vecOp (floatVecFormat length w) (\d x y -> (VFMAX d x y))
         _e -> panic $ "Missing operation " ++ show expr
 
         -- Vectors
@@ -1282,23 +1346,23 @@ getRegister' config plat expr =
         -- x86 fnmadd - x * y + z <=> RISCV64 fmsub : d = - r1 * r2 + r3
         -- x86 fnmsub - x * y - z <=> RISCV64 fnmadd: d = - r1 * r2 - r3
         MO_FMA var length w
-          | length == 1
-          -> float3Op w (\d n m a -> unitOL $ FMA var d n m a)
+          | length == 1 ->
+              float3Op w (\d n m a -> unitOL $ FMA var d n m a)
           | otherwise -> do
-            (reg_x, format_x, code_x) <- getSomeReg x
-            (reg_y, format_y, code_y) <- getSomeReg y
-            (reg_z, format_z, code_z) <- getSomeReg z
-            let targetFormat = VecFormat length (floatScalarFormat w)
-                width_z = formatToWidth format_z
-                negate_z = if var `elem` [FNMAdd, FNMSub] then unitOL (VNEG format_z (OpReg width_z reg_z) (OpReg width_z reg_z)) else nilOL
-            pure $ Any targetFormat $ \dst ->
-              code_x `appOL`
-              code_y `appOL`
-              code_z `appOL`
-              negate_z `snocOL`
-              annExpr expr
-                (VMV (VecFormat length (floatScalarFormat w)) (OpReg w dst) (OpReg (formatToWidth format_x) reg_x)) `snocOL`
-              VFMA var format_x (OpReg w dst) (OpReg (formatToWidth format_y) reg_y) (OpReg (formatToWidth format_z) reg_z)
+              (reg_x, format_x, code_x) <- getSomeReg x
+              (reg_y, format_y, code_y) <- getSomeReg y
+              (reg_z, format_z, code_z) <- getSomeReg z
+              let targetFormat = VecFormat length (floatScalarFormat w)
+                  negate_z = if var `elem` [FNMAdd, FNMSub] then unitOL (VNEG (OpReg format_z reg_z) (OpReg format_z reg_z)) else nilOL
+              pure $ Any targetFormat $ \dst ->
+                code_x
+                  `appOL` code_y
+                  `appOL` code_z
+                  `appOL` negate_z
+                  `snocOL` annExpr
+                    expr
+                    (VMV (OpReg targetFormat dst) (OpReg format_x reg_x))
+                  `snocOL` VFMA var (OpReg targetFormat dst) (OpReg format_y reg_y) (OpReg format_z reg_z)
 
         -- TODO: Implement length as immediate
 
@@ -1311,40 +1375,60 @@ getRegister' config plat expr =
         --   ret
         --
         -- https://godbolt.org/z/sEG8MrM8P
-        MO_VF_Insert length width ->vecInsert floatScalarFormat length width
-
-        MO_V_Insert length width -> vecInsert intScalarFormat length width
-
+        MO_VF_Insert length width -> vecInsert floatVecFormat length width
+        MO_V_Insert length width -> vecInsert intVecFormat length width
         _ ->
           pprPanic "getRegister' (unhandled ternary CmmMachOp): "
             $ pprMachOp op
             <+> text "in"
             <+> pdoc plat expr
       where
-        vecInsert :: (Width -> ScalarFormat) -> Int -> Width -> NatM Register
-        vecInsert widthToScalarFormat length width =
-            do
-              let targetVecFormat = VecFormat length (widthToScalarFormat width)
-              (reg_v, format_v, code_v) <- getSomeReg x
-              (reg_f, format_f, code_f) <- getSomeReg y
-              (reg_idx, format_idx, code_idx) <- getSomeReg z
-              tmp <- getNewRegNat targetVecFormat
-              pure $ Any targetVecFormat $ \dst ->
-                code_v `appOL`
-                code_f `appOL`
-                code_idx `snocOL`
-                annExpr expr
-                -- 1. fill elements with index numbers
-                -- TODO: The Width is made up
-                -- TODO: Is it safe to use v0 (default mask register) here? Instructions may be shuffled around...
-                -- Can we use an explicitly fetched register as mask (depends on instructions)?
-                 (VID targetVecFormat (OpReg W8 v0Reg)) `snocOL`
+        vecInsert :: (Int -> Width -> Format) -> Int -> Width -> NatM Register
+        vecInsert toFormat length width =
+          do
+            (reg_v, format_v, code_v) <- getSomeReg x
+            (reg_f, format_f, code_f) <- getSomeReg y
+            (reg_idx, format_idx, code_idx) <- getSomeReg z
+            let format = toFormat length width
+                format_mask = intVecFormat length W8 -- Actually, W1 (one bit) would be correct, but that does not exist.
+                format_vid = intVecFormat length vidWidth
+            vidReg <- getNewRegNat format_vid
+            tmp <- getNewRegNat format
+            pure $ Any format $ \dst ->
+              code_v
+                `appOL` code_f
+                `appOL` code_idx
+                `snocOL` annExpr
+                  expr
+                  -- 1. fill elements with index numbers
+                  -- TODO: The Width is made up
+                  -- TODO: Is it safe to use v0 (default mask register) here? Instructions may be shuffled around...
+                  -- Can we use an explicitly fetched register as mask (depends on instructions)?
+                  (VID (OpReg format_vid vidReg))
+                `snocOL`
                 -- 2. Build mask
-                VMSEQ  targetVecFormat(OpReg W8 v0Reg) (OpReg W8 v0Reg) (OpReg (formatToWidth format_idx) reg_idx) `snocOL`
+                VMSEQ (OpReg format_mask v0Reg) (OpReg format_vid vidReg) (OpReg format_idx reg_idx)
+                `snocOL`
                 -- 3. Splat value into tmp vector
-                VMV targetVecFormat (OpReg width tmp) (OpReg (formatToWidth format_f) reg_f) `snocOL`
+                VMV (OpReg format tmp) (OpReg format_f reg_f)
+                `snocOL`
                 -- 4. Merge with mask -> set element at index
-                VMERGE targetVecFormat (OpReg width dst) (OpReg (formatToWidth format_v) reg_v) (OpReg width tmp) (OpReg W8 v0Reg)
+                VMERGE (OpReg format dst) (OpReg format_v reg_v) (OpReg format tmp) (OpReg format_mask v0Reg)
+          where
+            -- Which element width do I need in my vector to store indexes in it?
+            vidWidth = case bitWidthFixed (fromIntegral length :: Word) of
+              x
+                | x <= widthInBits W8 -> W8
+                | x <= widthInBits W16 -> W16
+                | x <= widthInBits W32 -> W32
+                | x <= widthInBits W64 -> W64
+                | x <= widthInBits W128 -> W128
+                | x <= widthInBits W256 -> W256
+                | x <= widthInBits W512 -> W512
+              e -> panic $ "length " ++ show length ++ "not representable in a single element's Width (" ++ show e ++ ")"
+            bitWidthFixed :: Word -> Int
+            bitWidthFixed 0 = 1
+            bitWidthFixed n = finiteBitSize n - countLeadingZeros n
 
         float3Op w op = do
           (reg_fx, format_x, code_fx) <- getFloatReg x
@@ -1352,13 +1436,14 @@ getRegister' config plat expr =
           (reg_fz, format_z, code_fz) <- getFloatReg z
           massertPpr (isFloatFormat format_x && isFloatFormat format_y && isFloatFormat format_z)
             $ text "float3Op: non-float"
+          let format_dst = floatFormat w
           pure
-            $ Any (floatFormat w)
+            $ Any format_dst
             $ \dst ->
               code_fx
                 `appOL` code_fy
                 `appOL` code_fz
-                `appOL` op (OpReg w dst) (OpReg w reg_fx) (OpReg w reg_fy) (OpReg w reg_fz)
+                `appOL` op (OpReg format_dst dst) (OpReg format_x reg_fx) (OpReg format_x reg_fy) (OpReg format_z reg_fz)
     CmmMachOp _op _xs ->
       pprPanic "getRegister' (variadic CmmMachOp): " (pdoc plat expr)
   where
@@ -1372,26 +1457,27 @@ getRegister' config plat expr =
       (reg_x, format_x, code_x) <- getSomeReg x
       (reg_y, format_y, code_y) <- getSomeReg y
       -- TODO: Can't we clobber reg_x and reg_y to save registers?
-      lo <- getNewRegNat II64
-      hi <- getNewRegNat II64
+      let format = II64
+      lo <- getNewRegNat format
+      hi <- getNewRegNat format
       -- TODO: Overhaul CSET: 3rd operand isn't needed for SNEZ
       let nonSense = OpImm (ImmInt 0)
       pure
         $ Any
-          (intFormat w)
+          format
           ( \dst ->
               code_x
                 `appOL` signExtend (formatToWidth format_x) W64 reg_x reg_x
                 `appOL` code_y
                 `appOL` signExtend (formatToWidth format_y) W64 reg_y reg_y
                 `appOL` toOL
-                  [ annExpr expr (MULH (OpReg w hi) (OpReg w reg_x) (OpReg w reg_y)),
-                    MUL (OpReg w lo) (OpReg w reg_x) (OpReg w reg_y),
-                    SRA (OpReg w lo) (OpReg w lo) (OpImm (ImmInt (widthInBits W64 - 1))),
+                  [ annExpr expr (MULH (OpReg format hi) (OpReg format_x reg_x) (OpReg format_y reg_y)),
+                    MUL (OpReg format lo) (OpReg format_x reg_x) (OpReg format_y reg_y),
+                    SRA (OpReg format lo) (OpReg format lo) (OpImm (ImmInt (widthInBits (formatToWidth format) - 1))),
                     ann
                       (text "Set flag if result of MULH contains more than sign bits.")
-                      (XOR (OpReg w hi) (OpReg w hi) (OpReg w lo)),
-                    CSET (OpReg w dst) (OpReg w hi) nonSense NE
+                      (XOR (OpReg format hi) (OpReg format hi) (OpReg format lo)),
+                    CSET (OpReg format dst) (OpReg format hi) nonSense NE
                   ]
           )
     do_mul_may_oflo w x y = do
@@ -1399,16 +1485,17 @@ getRegister' config plat expr =
       (reg_y, format_y, code_y) <- getSomeReg y
       let width_x = formatToWidth format_x
           width_y = formatToWidth format_y
+          format = intFormat w
       if w > width_x && w > width_y
         then
           pure
             $ Any
-              (intFormat w)
+              format
               ( \dst ->
                   -- 8bit * 8bit cannot overflow 16bit
                   -- 16bit * 16bit cannot overflow 32bit
                   -- 32bit * 32bit cannot overflow 64bit
-                  unitOL $ annExpr expr (ADD (OpReg w dst) zero (OpImm (ImmInt 0)))
+                  unitOL $ annExpr expr (ADD (OpReg format dst) zero (OpImm (ImmInt 0)))
               )
         else do
           let use32BitMul = w <= W32 && width_x <= W32 && width_y <= W32
@@ -1418,19 +1505,19 @@ getRegister' config plat expr =
               narrowedReg <- getNewRegNat II64
               pure
                 $ Any
-                  (intFormat w)
+                  format
                   ( \dst ->
                       code_x
                         `appOL` signExtend (formatToWidth format_x) W32 reg_x reg_x
                         `appOL` code_y
                         `appOL` signExtend (formatToWidth format_y) W32 reg_y reg_y
-                        `snocOL` annExpr expr (MUL (OpReg W32 dst) (OpReg W32 reg_x) (OpReg W32 reg_y))
+                        `snocOL` annExpr expr (MUL (OpReg II32 dst) (OpReg II32 reg_x) (OpReg II32 reg_y))
                         `appOL` signExtendAdjustPrecission W32 w dst narrowedReg
                         `appOL` toOL
                           [ ann
                               (text "Check if the multiplied value fits in the narrowed register")
-                              (SUB (OpReg w dst) (OpReg w dst) (OpReg w narrowedReg)),
-                            CSET (OpReg w dst) (OpReg w dst) nonSense NE
+                              (SUB (OpReg format dst) (OpReg format dst) (OpReg II32 narrowedReg)),
+                            CSET (OpReg format dst) (OpReg format dst) nonSense NE
                           ]
                   )
             else
@@ -1439,7 +1526,7 @@ getRegister' config plat expr =
                   (intFormat w)
                   ( \dst ->
                       -- Do not handle this unlikely case. Just tell that it may overflow.
-                      unitOL $ annExpr expr (ADD (OpReg w dst) zero (OpImm (ImmInt 1)))
+                      unitOL $ annExpr expr (ADD (OpReg format dst) zero (OpImm (ImmInt 1)))
                   )
 
 -- TODO: Missing MachOps:
@@ -1471,21 +1558,21 @@ signExtend :: Width -> Width -> Reg -> Reg -> OrdList Instr
 signExtend w w' _r _r' | w > w' = pprPanic "This is not a sign extension, but a truncation." $ ppr w <> text "->" <+> ppr w'
 signExtend w w' _r _r' | w > W64 || w' > W64 = pprPanic "Unexpected width (max is 64bit):" $ ppr w <> text "->" <+> ppr w'
 signExtend w w' r r' | w == W64 && w' == W64 && r == r' = nilOL
-signExtend w w' r r' | w == W64 && w' == W64 = unitOL $ MOV (OpReg w' r') (OpReg w r)
+signExtend w w' r r' | w == W64 && w' == W64 = unitOL $ MOV (OpReg (intFormat w') r') (OpReg (intFormat w) r)
 signExtend w w' r r'
   | w == W32 && w' == W64 =
       unitOL
         $ ann
           (text "sign-extend register (SEXT.W)" <+> ppr r <+> ppr w <> text "->" <> ppr w')
           -- `ADDIW r r 0` is the pseudo-op SEXT.W
-          (ADD (OpReg w' r') (OpReg w r) (OpImm (ImmInt 0)))
+          (ADD (OpReg (intFormat w') r') (OpReg (intFormat w) r) (OpImm (ImmInt 0)))
 signExtend w w' r r' =
   toOL
     [ ann
         (text "narrow register signed" <+> ppr r <> char ':' <> ppr w <> text "->" <> ppr r <> char ':' <> ppr w')
-        (SLL (OpReg w' r') (OpReg w r) (OpImm (ImmInt shift))),
+        (SLL (OpReg (intFormat w') r') (OpReg (intFormat w) r) (OpImm (ImmInt shift))),
       -- signed (arithmetic) right shift
-      SRA (OpReg w' r') (OpReg w' r') (OpImm (ImmInt shift))
+      SRA (OpReg (intFormat w') r') (OpReg (intFormat w') r') (OpImm (ImmInt shift))
     ]
   where
     shift = 64 - widthInBits w
@@ -1497,22 +1584,22 @@ signExtend w w' r r' =
 signExtendAdjustPrecission :: Width -> Width -> Reg -> Reg -> OrdList Instr
 signExtendAdjustPrecission w w' _r _r' | w > W64 || w' > W64 = pprPanic "Unexpected width (max is 64bit):" $ ppr w <> text "->" <+> ppr w'
 signExtendAdjustPrecission w w' r r' | w == W64 && w' == W64 && r == r' = nilOL
-signExtendAdjustPrecission w w' r r' | w == W64 && w' == W64 = unitOL $ MOV (OpReg w' r') (OpReg w r)
+signExtendAdjustPrecission w w' r r' | w == W64 && w' == W64 = unitOL $ MOV (OpReg (intFormat w') r') (OpReg (intFormat w) r)
 signExtendAdjustPrecission w w' r r'
   | w == W32 && w' == W64 =
       unitOL
         $ ann
           (text "sign-extend register (SEXT.W)" <+> ppr r <+> ppr w <> text "->" <> ppr w')
           -- `ADDIW r r 0` is the pseudo-op SEXT.W
-          (ADD (OpReg w' r') (OpReg w r) (OpImm (ImmInt 0)))
+          (ADD (OpReg (intFormat w') r') (OpReg (intFormat w) r) (OpImm (ImmInt 0)))
 signExtendAdjustPrecission w w' r r'
   | w > w' =
       toOL
         [ ann
             (text "narrow register signed" <+> ppr r <> char ':' <> ppr w <> text "->" <> ppr r <> char ':' <> ppr w')
-            (SLL (OpReg w' r') (OpReg w r) (OpImm (ImmInt shift))),
+            (SLL (OpReg (intFormat w') r') (OpReg (intFormat w) r) (OpImm (ImmInt shift))),
           -- signed (arithmetic) right shift
-          SRA (OpReg w' r') (OpReg w' r') (OpImm (ImmInt shift))
+          SRA (OpReg (intFormat w') r') (OpReg (intFormat w') r') (OpImm (ImmInt shift))
         ]
   where
     shift = 64 - widthInBits w'
@@ -1520,9 +1607,9 @@ signExtendAdjustPrecission w w' r r' =
   toOL
     [ ann
         (text "sign extend register" <+> ppr r <> char ':' <> ppr w <> text "->" <> ppr r <> char ':' <> ppr w')
-        (SLL (OpReg w' r') (OpReg w r) (OpImm (ImmInt shift))),
+        (SLL (OpReg (intFormat w') r') (OpReg (intFormat w) r) (OpImm (ImmInt shift))),
       -- signed (arithmetic) right shift
-      SRA (OpReg w' r') (OpReg w' r') (OpImm (ImmInt shift))
+      SRA (OpReg (intFormat w') r') (OpReg (intFormat w') r') (OpImm (ImmInt shift))
     ]
   where
     shift = 64 - widthInBits w
@@ -1540,9 +1627,9 @@ truncateReg w w' r =
   toOL
     [ ann
         (text "truncate register" <+> ppr r <+> ppr w <> text "->" <> ppr w')
-        (SLL (OpReg w' r) (OpReg w r) (OpImm (ImmInt shift))),
+        (SLL (OpReg (intFormat w') r) (OpReg (intFormat w) r) (OpImm (ImmInt shift))),
       -- SHL ignores signedness!
-      SRL (OpReg w' r) (OpReg w r) (OpImm (ImmInt shift))
+      SRL (OpReg (intFormat w') r) (OpReg (intFormat w) r) (OpImm (ImmInt shift))
     ]
   where
     shift = 64 - widthInBits w'
@@ -1561,18 +1648,17 @@ addAlignmentCheck align wordWidth reg = do
   where
     check :: Format -> Reg -> Reg -> BlockId -> Reg -> InstrBlock
     check fmt jumpReg cmpReg okayLblId reg =
-      let width = formatToWidth fmt
-       in assert (not $ isFloatFormat fmt)
-            $ toOL
-              [ ann
-                  (text "Alignment check - alignment: " <> int align <> text ", word width: " <> text (show wordWidth))
-                  (AND (OpReg width cmpReg) (OpReg width reg) (OpImm $ ImmInt $ align - 1)),
-                BCOND EQ (OpReg width cmpReg) zero (TBlock okayLblId),
-                COMMENT (text "Alignment check failed"),
-                LDR II64 (OpReg W64 jumpReg) (OpImm $ ImmCLbl mkBadAlignmentLabel),
-                B (TReg jumpReg),
-                NEWBLOCK okayLblId
-              ]
+      assert (not $ isFloatFormat fmt)
+        $ toOL
+          [ ann
+              (text "Alignment check - alignment: " <> int align <> text ", word width: " <> text (show wordWidth))
+              (AND (OpReg fmt cmpReg) (OpReg fmt reg) (OpImm $ ImmInt $ align - 1)),
+            BCOND EQ (OpReg fmt cmpReg) zero (TBlock okayLblId),
+            COMMENT (text "Alignment check failed"),
+            LDR II64 (OpReg II64 jumpReg) (OpImm $ ImmCLbl mkBadAlignmentLabel),
+            B (TReg jumpReg),
+            NEWBLOCK okayLblId
+          ]
 
 -- -----------------------------------------------------------------------------
 --  The 'Amode' type: Memory addressing modes passed up the tree.
@@ -1609,18 +1695,18 @@ getAmode _platform _ (CmmMachOp (MO_Add _w) [expr, CmmLit (CmmInt off _w')])
   | fitsIn12bitImm off =
       do
         (reg, _format, code) <- getSomeReg expr
-        return $ Amode (AddrRegImm reg (ImmInteger off)) $ COMMENT (text "getAmode generic" <+> (text . show)  expr) `consOL` code
+        return $ Amode (AddrRegImm reg (ImmInteger off)) $ COMMENT (text "getAmode generic" <+> (text . show) expr) `consOL` code
 getAmode _platform _ (CmmMachOp (MO_Sub _w) [expr, CmmLit (CmmInt off _w')])
   | fitsIn12bitImm (-off) =
       do
         (reg, _format, code) <- getSomeReg expr
-        return $ Amode (AddrRegImm reg (ImmInteger (-off))) $ COMMENT (text "getAmode generic" <+> (text . show)  expr) `consOL` code
+        return $ Amode (AddrRegImm reg (ImmInteger (-off))) $ COMMENT (text "getAmode generic" <+> (text . show) expr) `consOL` code
 
 -- Generic case
 getAmode _platform _ expr =
   do
     (reg, _format, code) <- getSomeReg expr
-    return $ Amode (AddrReg reg) $ COMMENT (text "getAmode generic" <+> (text . show)  expr) `consOL` code
+    return $ Amode (AddrReg reg) $ COMMENT (text "getAmode generic" <+> (text . show) expr) `consOL` code
 
 -- -----------------------------------------------------------------------------
 -- Generating assignments
@@ -1637,14 +1723,14 @@ getAmode _platform _ expr =
 assignMem :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
 assignMem rep addrE srcE =
   do
-    (src_reg, _format, code) <- getSomeReg srcE
+    (src_reg, src_format, code) <- getSomeReg srcE
     platform <- getPlatform
     let w = formatToWidth rep
     Amode addr addr_code <- getAmode platform w addrE
     return $ COMMENT (text "CmmStore" <+> parens (text (show addrE)) <+> parens (text (show srcE)))
       `consOL` ( code
                    `appOL` addr_code
-                   `snocOL` STR rep (OpReg w src_reg) (OpAddr addr)
+                   `snocOL` STR rep (OpReg src_format src_reg) (OpAddr addr)
                )
 
 assignReg :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
@@ -1660,7 +1746,7 @@ assignReg _ reg src =
       Fixed format freg fcode ->
         COMMENT (text "CmmAssign" <+> parens (text (show reg)) <+> parens (text (show src)))
           `consOL` ( fcode
-                       `snocOL` MOV (OpReg (formatToWidth format) dst) (OpReg (formatToWidth format) freg)
+                       `snocOL` MOV (OpReg format dst) (OpReg format freg)
                    )
 
 -- -----------------------------------------------------------------------------
@@ -1697,22 +1783,23 @@ genCondJump bid expr = do
   case expr of
     -- Optimized == 0 case.
     CmmMachOp (MO_Eq w) [x, CmmLit (CmmInt 0 _)] -> do
-      (reg_x, _format_x, code_x) <- getSomeReg x
-      return $ code_x `snocOL` annExpr expr (BCOND EQ zero (OpReg w reg_x) (TBlock bid))
+      (reg_x, format_x, code_x) <- getSomeReg x
+      return $ code_x `snocOL` annExpr expr (BCOND EQ zero (OpReg format_x reg_x) (TBlock bid))
 
     -- Optimized /= 0 case.
     CmmMachOp (MO_Ne w) [x, CmmLit (CmmInt 0 _)] -> do
-      (reg_x, _format_x, code_x) <- getSomeReg x
-      return $ code_x `snocOL` annExpr expr (BCOND NE zero (OpReg w reg_x) (TBlock bid))
+      (reg_x, format_x, code_x) <- getSomeReg x
+      return $ code_x `snocOL` annExpr expr (BCOND NE zero (OpReg format_x reg_x) (TBlock bid))
 
     -- Generic case.
     CmmMachOp mop [x, y] -> do
-      let ubcond w cmp = do
+      let ubcond :: Width -> Cond -> NatM (OrdList Instr)
+          ubcond w cmp = do
             -- compute both sides.
             (reg_x, format_x, code_x) <- getSomeReg x
             (reg_y, format_y, code_y) <- getSomeReg y
-            let x' = OpReg w reg_x
-                y' = OpReg w reg_y
+            let x' = OpReg format_x reg_x
+                y' = OpReg format_y reg_y
             return $ case w of
               w
                 | w == W8 || w == W16 ->
@@ -1727,12 +1814,13 @@ genCondJump bid expr = do
                   `appOL` code_y
                   `snocOL` annExpr expr (BCOND cmp x' y' (TBlock bid))
 
+          sbcond :: Width -> Cond -> NatM (OrdList Instr)
           sbcond w cmp = do
             -- compute both sides.
             (reg_x, format_x, code_x) <- getSomeReg x
             (reg_y, format_y, code_y) <- getSomeReg y
-            let x' = OpReg w reg_x
-                y' = OpReg w reg_y
+            let x' = OpReg format_x reg_x
+                y' = OpReg format_y reg_y
             return $ case w of
               w
                 | w `elem` [W8, W16, W32] ->
@@ -1743,17 +1831,18 @@ genCondJump bid expr = do
                       `appOL` unitOL (annExpr expr (BCOND cmp x' y' (TBlock bid)))
               _ -> code_x `appOL` code_y `appOL` unitOL (annExpr expr (BCOND cmp x' y' (TBlock bid)))
 
+          fbcond :: Width -> Cond -> NatM (OrdList Instr)
           fbcond w cmp = do
             -- ensure we get float regs
-            (reg_fx, _format_fx, code_fx) <- getFloatReg x
-            (reg_fy, _format_fy, code_fy) <- getFloatReg y
-            condOpReg <- OpReg W64 <$> getNewRegNat II64
+            (reg_fx, format_fx, code_fx) <- getFloatReg x
+            (reg_fy, format_fy, code_fy) <- getFloatReg y
+            condOpReg <- OpReg II64 <$> getNewRegNat II64
             oneReg <- getNewRegNat II64
             return $ code_fx
               `appOL` code_fy
-              `snocOL` annExpr expr (CSET condOpReg (OpReg w reg_fx) (OpReg w reg_fy) cmp)
-              `snocOL` MOV (OpReg W64 oneReg) (OpImm (ImmInt 1))
-              `snocOL` BCOND EQ condOpReg (OpReg w oneReg) (TBlock bid)
+              `snocOL` annExpr expr (CSET condOpReg (OpReg format_fx reg_fx) (OpReg format_fy reg_fy) cmp)
+              `snocOL` MOV (OpReg II64 oneReg) (OpImm (ImmInt 1))
+              `snocOL` BCOND EQ condOpReg (OpReg II64 oneReg) (TBlock bid)
 
       case mop of
         MO_F_Eq w -> fbcond w EQ
@@ -1850,7 +1939,7 @@ genCCall target@(ForeignTarget expr _cconv) dest_regs arg_regs = do
       moveStackDown i =
         toOL
           [ PUSH_STACK_FRAME,
-            SUB (OpReg W64 spMachReg) (OpReg W64 spMachReg) (OpImm (ImmInt (8 * i))),
+            SUB (OpReg II64 spMachReg) (OpReg II64 spMachReg) (OpImm (ImmInt (8 * i))),
             DELTA (-8 * i - 16)
           ]
       moveStackUp 0 =
@@ -1861,7 +1950,7 @@ genCCall target@(ForeignTarget expr _cconv) dest_regs arg_regs = do
       moveStackUp i | odd i = moveStackUp (i + 1)
       moveStackUp i =
         toOL
-          [ ADD (OpReg W64 spMachReg) (OpReg W64 spMachReg) (OpImm (ImmInt (8 * i))),
+          [ ADD (OpReg II64 spMachReg) (OpReg II64 spMachReg) (OpImm (ImmInt (8 * i))),
             POP_STACK_FRAME,
             DELTA 0
           ]
@@ -1869,7 +1958,7 @@ genCCall target@(ForeignTarget expr _cconv) dest_regs arg_regs = do
   let code =
         call_target_code -- compute the label (possibly into a register)
           `appOL` moveStackDown stackSpaceWords
-          `snocOL` COMMENT ((text . show)  (map (\(a,b,_c,_d) -> (a,b)) arg_regs''))
+          `snocOL` COMMENT ((text . show) (map (\(a, b, _c, _d) -> (a, b)) arg_regs''))
           `snocOL` COMMENT ((text . show) stackSpaceWords <+> (text . show) passRegs)
           `appOL` passArgumentsCode -- put the arguments into x0, ...
           `snocOL` COMMENT (text "CCALL")
@@ -1897,7 +1986,7 @@ genCCall target@(ForeignTarget expr _cconv) dest_regs arg_regs = do
               else
                 toOL
                   [ COMMENT (text "Pass gp argument sign-extended (SignedHint): " <> ppr r),
-                    MOV (OpReg w gpReg) (OpReg w r)
+                    MOV (OpReg format gpReg) (OpReg format r)
                   ]
           accumCode' =
             accumCode
@@ -1907,8 +1996,7 @@ genCCall target@(ForeignTarget expr _cconv) dest_regs arg_regs = do
 
     -- Still have FP regs, and we want to pass an FP argument.
     passArguments gpRegs (fpReg : fpRegs) vRegs ((r, format, _hint, code_r) : args) stackSpaceWords accumRegs accumCode | isFloatFormat format = do
-      let w = formatToWidth format
-          mov = MOV (OpReg w fpReg) (OpReg w r)
+      let mov = MOV (OpReg format fpReg) (OpReg format r)
           accumCode' =
             accumCode
               `appOL` code_r
@@ -1919,7 +2007,7 @@ genCCall target@(ForeignTarget expr _cconv) dest_regs arg_regs = do
     passArguments [] [] vRegs ((r, format, hint, code_r) : args) stackSpaceWords accumRegs accumCode | not (isVecFormat format) = do
       let w = formatToWidth format
           spOffet = 8 * stackSpaceWords
-          str = STR format (OpReg w r) (OpAddr (AddrRegImm spMachReg (ImmInt spOffet)))
+          str = STR format (OpReg format r) (OpAddr (AddrRegImm spMachReg (ImmInt spOffet)))
           stackCode =
             if hint == SignedHint
               then
@@ -1935,7 +2023,7 @@ genCCall target@(ForeignTarget expr _cconv) dest_regs arg_regs = do
     passArguments [] fpRegs vRegs ((r, format, _hint, code_r) : args) stackSpaceWords accumRegs accumCode | isIntFormat format = do
       let w = formatToWidth format
           spOffet = 8 * stackSpaceWords
-          str = STR format (OpReg w r) (OpAddr (AddrRegImm spMachReg (ImmInt spOffet)))
+          str = STR format (OpReg format r) (OpAddr (AddrRegImm spMachReg (ImmInt spOffet)))
           stackCode =
             code_r
               `snocOL` ann (text "Pass argument (size " <> ppr w <> text ") on the stack: " <> ppr r) str
@@ -1943,8 +2031,8 @@ genCCall target@(ForeignTarget expr _cconv) dest_regs arg_regs = do
 
     -- Still have gpRegs left, but want to pass a FP argument. Must be passed in gpReg then.
     passArguments (gpReg : gpRegs) [] vRegs ((r, format, _hint, code_r) : args) stackSpaceWords accumRegs accumCode | isFloatFormat format = do
-      let w = formatToWidth format
-          mov = MOV (OpReg w gpReg) (OpReg w r)
+      let gp_format = (intFormat . formatToWidth) format
+          mov = MOV (OpReg gp_format gpReg) (OpReg format r)
           accumCode' =
             accumCode
               `appOL` code_r
@@ -1953,8 +2041,7 @@ genCCall target@(ForeignTarget expr _cconv) dest_regs arg_regs = do
 
     -- Still have vector regs, and we want to pass a vector argument.
     passArguments gpRegs fpRegs (vReg : vRegs) ((r, format, _hint, code_r) : args) stackSpaceWords accumRegs accumCode | isVecFormat format = do
-      let w = formatToWidth format
-          mov = MOV (OpReg w vReg) (OpReg w r)
+      let mov = MOV (OpReg format vReg) (OpReg format r)
           accumCode' =
             accumCode
               `appOL` code_r
@@ -1962,9 +2049,9 @@ genCCall target@(ForeignTarget expr _cconv) dest_regs arg_regs = do
       passArguments gpRegs fpRegs vRegs args stackSpaceWords (vReg : accumRegs) accumCode'
 
     -- No more vector regs, and we want to pass a vector argument.
-    passArguments gpRegs fpRegs (vReg : vRegs) ((r, format, _hint, code_r) : args) stackSpaceWords accumRegs accumCode | isVecFormat format =
-      pprPanic "passArguments" (text "TODO: Implement and test vector argument passing on the stack.")
-
+    passArguments gpRegs fpRegs (vReg : vRegs) ((r, format, _hint, code_r) : args) stackSpaceWords accumRegs accumCode
+      | isVecFormat format =
+          pprPanic "passArguments" (text "TODO: Implement and test vector argument passing on the stack.")
     passArguments _ _ _ _ _ _ _ = pprPanic "passArguments" (text "invalid state")
 
     readResults :: [Reg] -> [Reg] -> [Reg] -> [LocalReg] -> [Reg] -> InstrBlock -> NatM InstrBlock
@@ -1986,30 +2073,28 @@ genCCall target@(ForeignTarget expr _cconv) dest_regs arg_regs = do
           w = cmmRegWidth (CmmLocal dst)
           r_dst = getRegisterReg platform (CmmLocal dst)
       case format of
-        format | isFloatFormat format
-                   -> readResults (gpReg : gpRegs) fpRegs (vReg: vRegs) dsts (fpReg : accumRegs) (accumCode `snocOL` MOV (OpReg w r_dst) (OpReg w fpReg))
-              | isVecFormat format
-                   -> readResults (gpReg : gpRegs) (fpReg: fpRegs) vRegs dsts (vReg : accumRegs) (accumCode `snocOL` MOV (OpReg w r_dst) (OpReg w vReg))
-              | otherwise ->
-                readResults gpRegs (fpReg : fpRegs) (vReg: vRegs) dsts (gpReg : accumRegs)
-                  $ accumCode
-                  `snocOL` MOV (OpReg w r_dst) (OpReg w gpReg)
-                  `appOL`
-                  -- truncate, otherwise an unexpectedly big value might be used in upfollowing calculations
-                  -- truncate, otherwise an unexpectedly big value might be used in upfollowing calculations
-                  -- truncate, otherwise an unexpectedly big value might be used in upfollowing calculations
-                  -- truncate, otherwise an unexpectedly big value might be used in upfollowing calculations
-                  truncateReg W64 w r_dst
+        format
+          | isFloatFormat format ->
+              readResults (gpReg : gpRegs) fpRegs (vReg : vRegs) dsts (fpReg : accumRegs) (accumCode `snocOL` MOV (OpReg format r_dst) (OpReg format fpReg))
+          | isVecFormat format ->
+              readResults (gpReg : gpRegs) (fpReg : fpRegs) vRegs dsts (vReg : accumRegs) (accumCode `snocOL` MOV (OpReg format r_dst) (OpReg format vReg))
+          | otherwise ->
+              readResults gpRegs (fpReg : fpRegs) (vReg : vRegs) dsts (gpReg : accumRegs)
+                $ accumCode
+                `snocOL` MOV (OpReg format r_dst) (OpReg format gpReg)
+                `appOL`
+                -- truncate, otherwise an unexpectedly big value might be used in upfollowing calculations
+                truncateReg W64 w r_dst
 genCCall (PrimTarget mop) dest_regs arg_regs = do
   case mop of
     MO_F32_Fabs
       | [arg_reg] <- arg_regs,
         [dest_reg] <- dest_regs ->
-          unaryFloatOp W32 (\d x -> unitOL $ FABS d x) arg_reg dest_reg
+          unaryFloatOp FF32 (\d x -> unitOL $ FABS d x) arg_reg dest_reg
     MO_F64_Fabs
       | [arg_reg] <- arg_regs,
         [dest_reg] <- dest_regs ->
-          unaryFloatOp W64 (\d x -> unitOL $ FABS d x) arg_reg dest_reg
+          unaryFloatOp FF64 (\d x -> unitOL $ FABS d x) arg_reg dest_reg
     -- 64 bit float ops
     MO_F64_Pwr -> mkCCall "pow"
     MO_F64_Sin -> mkCCall "sin"
@@ -2133,22 +2218,23 @@ genCCall (PrimTarget mop) dest_regs arg_regs = do
           -- __atomic_load_n(&a, __ATOMIC_ACQUIRE);
           -- __atomic_load_n(&a, __ATOMIC_SEQ_CST);
           let instrs = case ord of
-                MemOrderRelaxed -> unitOL $ ann moDescr (LDR (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p))
+                MemOrderRelaxed -> unitOL $ ann moDescr (LDR format (OpReg format dst) (OpAddr $ AddrReg p))
                 MemOrderAcquire ->
                   toOL
-                    [ ann moDescr (LDR (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p)),
+                    [ ann moDescr (LDR format (OpReg format dst) (OpAddr $ AddrReg p)),
                       FENCE FenceRead FenceReadWrite
                     ]
                 MemOrderSeqCst ->
                   toOL
                     [ ann moDescr (FENCE FenceReadWrite FenceReadWrite),
-                      LDR (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p),
+                      LDR format (OpReg format dst) (OpAddr $ AddrReg p),
                       FENCE FenceRead FenceReadWrite
                     ]
                 MemOrderRelease -> panic $ "Unexpected MemOrderRelease on an AtomicRead: " ++ show mo
               dst = getRegisterReg platform (CmmLocal dst_reg)
               moDescr = (text . show) mo
               code = code_p `appOL` instrs
+              format = intFormat w
           return code
       | otherwise -> panic "mal-formed AtomicRead"
     mo@(MO_AtomicWrite w ord)
@@ -2161,17 +2247,17 @@ genCCall (PrimTarget mop) dest_regs arg_regs = do
           -- __atomic_store_n(&a, 23, __ATOMIC_SEQ_CST);
           -- __atomic_store_n(&a, 23, __ATOMIC_RELEASE);
           let instrs = case ord of
-                MemOrderRelaxed -> unitOL $ ann moDescr (STR fmt_val (OpReg w val) (OpAddr $ AddrReg p))
+                MemOrderRelaxed -> unitOL $ ann moDescr (STR fmt_val (OpReg fmt_val val) (OpAddr $ AddrReg p))
                 MemOrderSeqCst ->
                   toOL
                     [ ann moDescr (FENCE FenceReadWrite FenceWrite),
-                      STR fmt_val (OpReg w val) (OpAddr $ AddrReg p),
+                      STR fmt_val (OpReg fmt_val val) (OpAddr $ AddrReg p),
                       FENCE FenceReadWrite FenceReadWrite
                     ]
                 MemOrderRelease ->
                   toOL
                     [ ann moDescr (FENCE FenceReadWrite FenceWrite),
-                      STR fmt_val (OpReg w val) (OpAddr $ AddrReg p)
+                      STR fmt_val (OpReg fmt_val val) (OpAddr $ AddrReg p)
                     ]
                 MemOrderAcquire -> panic $ "Unexpected MemOrderAcquire on an AtomicWrite" ++ show mo
               moDescr = (text . show) mo
@@ -2204,11 +2290,12 @@ genCCall (PrimTarget mop) dest_regs arg_regs = do
       let cconv = ForeignConvention CCallConv [NoHint] [NoHint] CmmMayReturn
       genCCall (ForeignTarget target cconv) dest_regs arg_regs
 
-    unaryFloatOp w op arg_reg dest_reg = do
+    unaryFloatOp :: Format -> (Operand -> Operand -> OrdList Instr) -> CmmExpr -> LocalReg -> NatM (OrdList Instr)
+    unaryFloatOp fmt op arg_reg dest_reg = do
       platform <- getPlatform
-      (reg_fx, _format_x, code_fx) <- getFloatReg arg_reg
+      (reg_fx, format_x, code_fx) <- getFloatReg arg_reg
       let dst = getRegisterReg platform (CmmLocal dest_reg)
-      let code = code_fx `appOL` op (OpReg w dst) (OpReg w reg_fx)
+      let code = code_fx `appOL` op (OpReg fmt dst) (OpReg format_x reg_fx)
       pure code
 
 {- Note [RISCV64 far jumps]
@@ -2286,7 +2373,7 @@ genCondFarJump cond op1 op2 far_target = do
           $ BCOND cond op1 op2 (TBlock jmp_lbl_id),
         B (TBlock skip_lbl_id),
         NEWBLOCK jmp_lbl_id,
-        LDR II64 (OpReg W64 tmpReg) (OpImm (ImmCLbl (blockLbl far_target))),
+        LDR II64 (OpReg II64 tmpReg) (OpImm (ImmCLbl (blockLbl far_target))),
         B (TReg tmpReg),
         NEWBLOCK skip_lbl_id
       ]
@@ -2300,7 +2387,7 @@ genFarJump far_target =
   return
     $ toOL
       [ ann (text "Unconditional far jump to: " <> ppr far_target)
-          $ LDR II64 (OpReg W64 tmpReg) (OpImm (ImmCLbl (blockLbl far_target))),
+          $ LDR II64 (OpReg II64 tmpReg) (OpImm (ImmCLbl (blockLbl far_target))),
         B (TReg tmpReg)
       ]
 


=====================================
compiler/GHC/CmmToAsm/RV64/Instr.hs
=====================================
@@ -20,8 +20,8 @@ import GHC.CmmToAsm.Utils
 import GHC.Data.FastString (LexicalFastString)
 import GHC.Platform
 import GHC.Platform.Reg
-import GHC.Platform.Regs
 import GHC.Platform.Reg.Class.Separate
+import GHC.Platform.Regs
 import GHC.Prelude
 import GHC.Stack
 import GHC.Types.Unique.DSM
@@ -96,10 +96,10 @@ regUsageOfInstr platform instr = case instr of
   -- ORI's third operand is always an immediate
   ORI dst src1 _ -> usage (regOp src1, regOp dst)
   XORI dst src1 _ -> usage (regOp src1, regOp dst)
-  J_TBL _ _ t -> usage ([t], [])
+  J_TBL _ _ t -> usage ([(t, II64)], [])
   B t -> usage (regTarget t, [])
   BCOND _ l r t -> usage (regTarget t ++ regOp l ++ regOp r, [])
-  BL t ps -> usage (t : ps, callerSavedRegisters)
+  BL t ps -> usage ((t, II64) : map (\p -> (p, II64)) ps, callerSavedRegisters)
   CSET dst l r _ -> usage (regOp l ++ regOp r, regOp dst)
   STR _ src dst -> usage (regOp src ++ regOp dst, [])
   LDR _ dst src -> usage (regOp src, regOp dst)
@@ -109,84 +109,76 @@ regUsageOfInstr platform instr = case instr of
   FABS dst src -> usage (regOp src, regOp dst)
   FMIN dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
   FMAX dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
-  VMV fmt dst src1 -> usage (regOp src1, regOp dst)
-  VID fmt dst -> usage ([], regOp dst)
-  VMSEQ fmt dst src op -> usage (regOp src ++ regOp op, regOp dst)
-  VMERGE fmt dst op1 op2 opm -> usage (regOp op1 ++ regOp op2 ++ regOp opm, regOp dst)
-  VSLIDEDOWN fmt dst op1 op2 -> usage (regOp op1 ++ regOp op2, regOp dst)
+  VMV dst src1 -> usage (regOp src1, regOp dst)
+  VID dst -> usage ([], regOp dst)
+  VMSEQ dst src op -> usage (regOp src ++ regOp op, regOp dst)
+  VMERGE dst op1 op2 opm -> usage (regOp op1 ++ regOp op2 ++ regOp opm, regOp dst)
+  VSLIDEDOWN dst op1 op2 -> usage (regOp op1 ++ regOp op2, regOp dst)
   -- WARNING: VSETIVLI is a special case. It changes the interpretation of all vector registers!
-  VSETIVLI dst _ _ _ _ _ -> usage ([], [dst])
-  VNEG fmt dst src1 -> usage (regOp src1, regOp dst)
-  VADD fmt dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
-  VSUB fmt dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
-  VMUL fmt dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
-  VQUOT fmt dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
-  VSMIN fmt dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
-  VSMAX fmt dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
-  VUMIN fmt dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
-  VUMAX fmt dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
-  VFMIN fmt dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
-  VFMAX fmt dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+  VSETIVLI (OpReg fmt reg)  _ _ _ _ _ -> usage ([], [(reg, fmt)])
+  VNEG dst src1 -> usage (regOp src1, regOp dst)
+  VADD dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+  VSUB dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+  VMUL dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+  VQUOT dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+  VSMIN dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+  VSMAX dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+  VUMIN dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+  VUMAX dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+  VFMIN dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+  VFMAX dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
   FMA _ dst src1 src2 src3 ->
     usage (regOp src1 ++ regOp src2 ++ regOp src3, regOp dst)
-  VFMA _ _fmt op1 op2 op3 ->
+  VFMA _ op1 op2 op3 ->
     usage (regOp op1 ++ regOp op2 ++ regOp op3, regOp op1)
   _ -> panic $ "regUsageOfInstr: " ++ instrCon instr
   where
     -- filtering the usage is necessary, otherwise the register
     -- allocator will try to allocate pre-defined fixed stg
     -- registers as well, as they show up.
-    usage :: ([Reg], [Reg]) -> RegUsage
+    usage :: ([(Reg, Format)], [(Reg, Format)]) -> RegUsage
     usage (srcRegs, dstRegs) =
       RU
         (map mkFmt $ filter (interesting platform) srcRegs)
         (map mkFmt $ filter (interesting platform) dstRegs)
 
-      -- SIMD NCG TODO: the format here is used for register spilling/unspilling.
-      -- As the RISCV64 NCG does not currently support SIMD registers,
-      -- this simple logic is OK.
-    mkFmt r = RegWithFormat r fmt
-      where
-        fmt = case cls of
-                RcInteger -> II64
-                RcFloat   -> FF64
-                -- TODO: We're expecting 128bit vector registers here. This
-                -- needs to be calculated from real format. Probably, we need to
-                -- hand around the format instead of the width for vector regs.
-                RcVector -> VecFormat 2 FmtInt64
-        cls = case r of
-                RegVirtual vr -> classOfVirtualReg (platformArch platform) vr
-                RegReal rr -> classOfRealReg rr
-
-    regAddr :: AddrMode -> [Reg]
-    regAddr (AddrRegImm r1 _imm) = [r1]
-    regAddr (AddrReg r1) = [r1]
-
-    regOp :: Operand -> [Reg]
-    regOp (OpReg _w r1) = [r1]
+    mkFmt (r, fmt) = RegWithFormat r fmt
+
+    regAddr :: AddrMode -> [(Reg, Format)]
+    regAddr (AddrRegImm r1 _imm) = [(r1, II64)]
+    regAddr (AddrReg r1) = [(r1, II64)]
+
+    regOp :: Operand -> [(Reg, Format)]
+    regOp (OpReg fmt r1) = [(r1, fmt)]
     regOp (OpAddr a) = regAddr a
     regOp (OpImm _imm) = []
 
-    regTarget :: Target -> [Reg]
+    regTarget :: Target -> [(Reg, Format)]
     regTarget (TBlock _bid) = []
-    regTarget (TReg r1) = [r1]
+    regTarget (TReg r1) = [(r1, II64)]
 
     -- Is this register interesting for the register allocator?
-    interesting :: Platform -> Reg -> Bool
-    interesting _ (RegVirtual _) = True
-    interesting platform (RegReal (RealRegSingle i)) = freeReg platform i
+    interesting :: Platform -> (Reg, Format) -> Bool
+    interesting _ ((RegVirtual _), _) = True
+    interesting platform ((RegReal (RealRegSingle i)), _) = freeReg platform i
 
 -- | Caller-saved registers (according to calling convention)
 --
 -- These registers may be clobbered after a jump.
-callerSavedRegisters :: [Reg]
+callerSavedRegisters :: [(Reg, Format)]
 callerSavedRegisters =
-  [regSingle raRegNo]
-    ++ map regSingle [t0RegNo .. t2RegNo]
-    ++ map regSingle [a0RegNo .. a7RegNo]
-    ++ map regSingle [t3RegNo .. t6RegNo]
-    ++ map regSingle [ft0RegNo .. ft7RegNo]
-    ++ map regSingle [fa0RegNo .. fa7RegNo]
+  [(toTuple . regSingle) raRegNo]
+    ++ map (toTuple . regSingle) [t0RegNo .. t2RegNo]
+    ++ map (toTuple . regSingle) [a0RegNo .. a7RegNo]
+    ++ map (toTuple . regSingle) [t3RegNo .. t6RegNo]
+    ++ map (toTuple . regSingle) [ft0RegNo .. ft7RegNo]
+    ++ map (toTuple . regSingle) [fa0RegNo .. fa7RegNo]
+  where
+    toTuple :: Reg -> (Reg, Format)
+    toTuple r = (r, format r)
+    format r | isIntReg r = II64
+             | isFloatReg r = FF64
+             | otherwise = panic $ "Unexpected register: " ++ show r
 
 -- | Apply a given mapping to all the register references in this instruction.
 patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
@@ -230,27 +222,27 @@ patchRegsOfInstr instr env = case instr of
   FABS o1 o2 -> FABS (patchOp o1) (patchOp o2)
   FMIN o1 o2 o3 -> FMIN (patchOp o1) (patchOp o2) (patchOp o3)
   FMAX o1 o2 o3 -> FMAX (patchOp o1) (patchOp o2) (patchOp o3)
-  VMV fmt o1 o2 -> VMV fmt (patchOp o1) (patchOp o2)
-  VID fmt o1 -> VID fmt (patchOp o1)
-  VMSEQ fmt o1 o2 o3 -> VMSEQ fmt (patchOp o1) (patchOp o2) (patchOp o3)
-  VMERGE fmt o1 o2 o3 o4 -> VMERGE fmt (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4)
-  VSLIDEDOWN fmt o1 o2 o3 -> VSLIDEDOWN fmt (patchOp o1) (patchOp o2) (patchOp o3)
-  VSETIVLI o1 o2 o3 o4 o5 o6 -> VSETIVLI (env o1) o2 o3 o4 o5 o6
-  VNEG fmt o1 o2 -> VNEG fmt (patchOp o1) (patchOp o2)
-  VADD fmt o1 o2 o3 -> VADD fmt (patchOp o1) (patchOp o2) (patchOp o3)
-  VSUB fmt o1 o2 o3 -> VSUB fmt (patchOp o1) (patchOp o2) (patchOp o3)
-  VMUL fmt o1 o2 o3 -> VMUL fmt (patchOp o1) (patchOp o2) (patchOp o3)
-  VQUOT fmt o1 o2 o3 -> VQUOT fmt (patchOp o1) (patchOp o2) (patchOp o3)
-  VSMIN fmt o1 o2 o3 -> VSMIN fmt (patchOp o1) (patchOp o2) (patchOp o3)
-  VSMAX fmt o1 o2 o3 -> VSMAX fmt (patchOp o1) (patchOp o2) (patchOp o3)
-  VUMIN fmt o1 o2 o3 -> VUMIN fmt (patchOp o1) (patchOp o2) (patchOp o3)
-  VUMAX fmt o1 o2 o3 -> VUMAX fmt (patchOp o1) (patchOp o2) (patchOp o3)
-  VFMIN fmt o1 o2 o3 -> VFMIN fmt (patchOp o1) (patchOp o2) (patchOp o3)
-  VFMAX fmt o1 o2 o3 -> VFMAX fmt (patchOp o1) (patchOp o2) (patchOp o3)
+  VMV o1 o2 -> VMV (patchOp o1) (patchOp o2)
+  VID o1 -> VID (patchOp o1)
+  VMSEQ o1 o2 o3 -> VMSEQ (patchOp o1) (patchOp o2) (patchOp o3)
+  VMERGE o1 o2 o3 o4 -> VMERGE (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4)
+  VSLIDEDOWN o1 o2 o3 -> VSLIDEDOWN (patchOp o1) (patchOp o2) (patchOp o3)
+  VSETIVLI o1 o2 o3 o4 o5 o6 -> VSETIVLI (patchOp o1) o2 o3 o4 o5 o6
+  VNEG o1 o2 -> VNEG (patchOp o1) (patchOp o2)
+  VADD o1 o2 o3 -> VADD (patchOp o1) (patchOp o2) (patchOp o3)
+  VSUB o1 o2 o3 -> VSUB (patchOp o1) (patchOp o2) (patchOp o3)
+  VMUL o1 o2 o3 -> VMUL (patchOp o1) (patchOp o2) (patchOp o3)
+  VQUOT o1 o2 o3 -> VQUOT (patchOp o1) (patchOp o2) (patchOp o3)
+  VSMIN o1 o2 o3 -> VSMIN (patchOp o1) (patchOp o2) (patchOp o3)
+  VSMAX o1 o2 o3 -> VSMAX (patchOp o1) (patchOp o2) (patchOp o3)
+  VUMIN o1 o2 o3 -> VUMIN (patchOp o1) (patchOp o2) (patchOp o3)
+  VUMAX o1 o2 o3 -> VUMAX (patchOp o1) (patchOp o2) (patchOp o3)
+  VFMIN o1 o2 o3 -> VFMIN (patchOp o1) (patchOp o2) (patchOp o3)
+  VFMAX o1 o2 o3 -> VFMAX (patchOp o1) (patchOp o2) (patchOp o3)
   FMA s o1 o2 o3 o4 ->
     FMA s (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4)
-  VFMA s fmt o1 o2 o3 ->
-    VFMA s fmt (patchOp o1) (patchOp o2) (patchOp o3)
+  VFMA s o1 o2 o3 ->
+    VFMA s (patchOp o1) (patchOp o2) (patchOp o3)
   _ -> panic $ "patchRegsOfInstr: " ++ instrCon instr
   where
     patchOp :: Operand -> Operand
@@ -362,7 +354,7 @@ mkSpillInstr _config (RegWithFormat reg fmt) delta slot =
       = scalarMoveFormat fmt
     mkStrSpImm imm =
       ANN (text "Spill@" <> int (off - delta))
-        $ STR fmt' (OpReg W64 reg) (OpAddr (AddrRegImm spMachReg (ImmInt imm)))
+        $ STR fmt' (OpReg fmt' reg) (OpAddr (AddrRegImm spMachReg (ImmInt imm)))
     movImmToTmp imm =
       ANN (text "Spill: TMP <- " <> int imm)
         $ MOV tmp (OpImm (ImmInt imm))
@@ -371,7 +363,7 @@ mkSpillInstr _config (RegWithFormat reg fmt) delta slot =
         $ ADD tmp tmp sp
     mkStrTmp =
       ANN (text "Spill@" <> int (off - delta))
-        $ STR fmt' (OpReg W64 reg) (OpAddr (AddrReg tmpReg))
+        $ STR fmt' (OpReg fmt' reg) (OpAddr (AddrReg tmpReg))
 
     off = spillSlotToOffset slot
 
@@ -401,7 +393,7 @@ mkLoadInstr _config (RegWithFormat reg fmt) delta slot =
       = scalarMoveFormat fmt
     mkLdrSpImm imm =
       ANN (text "Reload@" <> int (off - delta))
-        $ LDR fmt' (OpReg W64 reg) (OpAddr (AddrRegImm spMachReg (ImmInt imm)))
+        $ LDR fmt' (OpReg fmt' reg) (OpAddr (AddrRegImm spMachReg (ImmInt imm)))
     movImmToTmp imm =
       ANN (text "Reload: TMP <- " <> int imm)
         $ MOV tmp (OpImm (ImmInt imm))
@@ -410,7 +402,7 @@ mkLoadInstr _config (RegWithFormat reg fmt) delta slot =
         $ ADD tmp tmp sp
     mkLdrTmp =
       ANN (text "Reload@" <> int (off - delta))
-        $ LDR fmt' (OpReg W64 reg) (OpAddr (AddrReg tmpReg))
+        $ LDR fmt' (OpReg fmt' reg) (OpAddr (AddrReg tmpReg))
 
     off = spillSlotToOffset slot
 
@@ -444,11 +436,11 @@ isMetaInstr instr =
 -- | Copy the value in a register to another one.
 --
 -- Must work for all register classes.
-mkRegRegMoveInstr :: Reg -> Reg -> Instr
-mkRegRegMoveInstr src dst = ANN desc instr
+mkRegRegMoveInstr :: Format -> Reg -> Reg -> Instr
+mkRegRegMoveInstr fmt src dst = ANN desc instr
   where
     desc = text "Reg->Reg Move: " <> ppr src <> text " -> " <> ppr dst
-    instr = MOV (operandFromReg dst) (operandFromReg src)
+    instr = MOV (operandFromReg fmt dst) (operandFromReg fmt src)
 
 -- | Take the source and destination from this (potential) reg -> reg move instruction
 --
@@ -675,24 +667,24 @@ data Instr
     FMA FMASign Operand Operand Operand Operand
 
   -- TODO: Care about the variants (<instr>.x.y) -> sum type
-  | VMV Format Operand Operand
-  | VID Format Operand 
-  | VMSEQ Format Operand Operand Operand
-  | VMERGE Format Operand Operand Operand Operand
-  | VSLIDEDOWN Format Operand Operand Operand
-  | VSETIVLI Reg Word Width VectorGrouping TailAgnosticFlag MaskAgnosticFlag
-  | VNEG Format Operand Operand
-  | VADD Format Operand Operand Operand
-  | VSUB Format Operand Operand Operand
-  | VMUL Format Operand Operand Operand
-  | VQUOT Format Operand Operand Operand
-  | VSMIN Format Operand Operand Operand
-  | VSMAX Format Operand Operand Operand
-  | VUMIN Format Operand Operand Operand
-  | VUMAX Format Operand Operand Operand
-  | VFMIN Format Operand Operand Operand
-  | VFMAX Format Operand Operand Operand
-  | VFMA FMASign Format Operand Operand Operand
+  | VMV Operand Operand
+  | VID Operand 
+  | VMSEQ Operand Operand Operand
+  | VMERGE Operand Operand Operand Operand
+  | VSLIDEDOWN Operand Operand Operand
+  | VSETIVLI Operand Word Width VectorGrouping TailAgnosticFlag MaskAgnosticFlag
+  | VNEG Operand Operand
+  | VADD Operand Operand Operand
+  | VSUB Operand Operand Operand
+  | VMUL Operand Operand Operand
+  | VQUOT Operand Operand Operand
+  | VSMIN Operand Operand Operand
+  | VSMAX Operand Operand Operand
+  | VUMIN Operand Operand Operand
+  | VUMAX Operand Operand Operand
+  | VFMIN Operand Operand Operand
+  | VFMAX Operand Operand Operand
+  | VFMA FMASign Operand Operand Operand
 
 -- | Operand of a FENCE instruction (@r@, @w@ or @rw@)
 data FenceType = FenceRead | FenceWrite | FenceReadWrite
@@ -780,7 +772,7 @@ instrCon i =
         FMSub -> "FMSUB"
         FNMAdd -> "FNMADD"
         FNMSub -> "FNMSUB"
-    VFMA variant _ _ _ _ ->
+    VFMA variant _ _ _ ->
       case variant of
         FMAdd -> "VFMADD"
         FMSub -> "VFMSUB"
@@ -791,152 +783,132 @@ data Target
   = TBlock BlockId
   | TReg Reg
 
--- TODO: OpReg should carry the format, not only the width. This would unify OpReg and OpVecReg.
 data Operand
   = -- | register
-    OpReg Width Reg
+    OpReg Format Reg
   | -- | immediate value
     OpImm Imm
   | -- | memory reference
     OpAddr AddrMode
   deriving (Eq, Show)
 
-operandFromReg :: Reg -> Operand
-operandFromReg = OpReg W64
+-- TODO: This just wraps a constructor... Inline?
+operandFromReg :: Format -> Reg -> Operand
+operandFromReg = OpReg
 
-operandFromRegNo :: RegNo -> Operand
-operandFromRegNo = operandFromReg . regSingle
+operandFromRegNo :: Format -> RegNo -> Operand
+operandFromRegNo fmt = operandFromReg fmt . regSingle
 
 zero, ra, sp, gp, tp, fp, tmp :: Operand
-zero = operandFromReg zeroReg
-ra = operandFromReg raReg
-sp = operandFromReg spMachReg
-gp = operandFromRegNo 3
-tp = operandFromRegNo 4
-fp = operandFromRegNo 8
-tmp = operandFromReg tmpReg
+zero = operandFromReg II64 zeroReg
+ra = operandFromReg II64 raReg
+sp = operandFromReg II64 spMachReg
+gp = operandFromRegNo II64 3
+tp = operandFromRegNo II64 4
+fp = operandFromRegNo II64 8
+tmp = operandFromReg II64 tmpReg
 
 x0, x1, x2, x3, x4, x5, x6, x7 :: Operand
 x8, x9, x10, x11, x12, x13, x14, x15 :: Operand
 x16, x17, x18, x19, x20, x21, x22, x23 :: Operand
 x24, x25, x26, x27, x28, x29, x30, x31 :: Operand
-x0 = operandFromRegNo x0RegNo
-x1 = operandFromRegNo 1
-x2 = operandFromRegNo 2
-x3 = operandFromRegNo 3
-x4 = operandFromRegNo 4
-x5 = operandFromRegNo x5RegNo
-x6 = operandFromRegNo 6
-x7 = operandFromRegNo x7RegNo
-
-x8 = operandFromRegNo 8
-
-x9 = operandFromRegNo 9
-
-x10 = operandFromRegNo x10RegNo
-
-x11 = operandFromRegNo 11
-
-x12 = operandFromRegNo 12
-
-x13 = operandFromRegNo 13
-
-x14 = operandFromRegNo 14
-
-x15 = operandFromRegNo 15
-
-x16 = operandFromRegNo 16
-
-x17 = operandFromRegNo x17RegNo
-
-x18 = operandFromRegNo 18
-
-x19 = operandFromRegNo 19
-
-x20 = operandFromRegNo 20
-
-x21 = operandFromRegNo 21
-
-x22 = operandFromRegNo 22
-
-x23 = operandFromRegNo 23
-
-x24 = operandFromRegNo 24
-
-x25 = operandFromRegNo 25
-
-x26 = operandFromRegNo 26
-
-x27 = operandFromRegNo 27
-
-x28 = operandFromRegNo x28RegNo
-
-x29 = operandFromRegNo 29
-
-x30 = operandFromRegNo 30
-
-x31 = operandFromRegNo x31RegNo
+x0 = operandFromRegNo II64 x0RegNo
+x1 = operandFromRegNo II64 1
+x2 = operandFromRegNo II64 2
+x3 = operandFromRegNo II64 3
+x4 = operandFromRegNo II64 4
+x5 = operandFromRegNo II64 x5RegNo
+x6 = operandFromRegNo II64 6
+x7 = operandFromRegNo II64 x7RegNo
+
+x8 = operandFromRegNo II64 8
+
+x9 = operandFromRegNo II64 9
+
+x10 = operandFromRegNo II64 x10RegNo
+
+x11 = operandFromRegNo II64 11
+x12 = operandFromRegNo II64 12
+x13 = operandFromRegNo II64 13
+x14 = operandFromRegNo II64 14
+x15 = operandFromRegNo II64 15
+x16 = operandFromRegNo II64 16
+x17 = operandFromRegNo II64 x17RegNo
+x18 = operandFromRegNo II64 18
+x19 = operandFromRegNo II64 19
+x20 = operandFromRegNo II64 20
+x21 = operandFromRegNo II64 21
+x22 = operandFromRegNo II64 22
+x23 = operandFromRegNo II64 23
+x24 = operandFromRegNo II64 24
+x25 = operandFromRegNo II64 25
+x26 = operandFromRegNo II64 26
+x27 = operandFromRegNo II64 27
+x28 = operandFromRegNo II64 x28RegNo
+x29 = operandFromRegNo II64 29
+x30 = operandFromRegNo II64 30
+x31 = operandFromRegNo II64 x31RegNo
 
 d0, d1, d2, d3, d4, d5, d6, d7 :: Operand
 d8, d9, d10, d11, d12, d13, d14, d15 :: Operand
 d16, d17, d18, d19, d20, d21, d22, d23 :: Operand
 d24, d25, d26, d27, d28, d29, d30, d31 :: Operand
-d0 = operandFromRegNo d0RegNo
-d1 = operandFromRegNo 33
-d2 = operandFromRegNo 34
-d3 = operandFromRegNo 35
-d4 = operandFromRegNo 36
-d5 = operandFromRegNo 37
-d6 = operandFromRegNo 38
-d7 = operandFromRegNo d7RegNo
+d0 = operandFromRegNo FF64 d0RegNo
+d1 = operandFromRegNo FF64 33
+d2 = operandFromRegNo FF64 34
+d3 = operandFromRegNo FF64 35
+d4 = operandFromRegNo FF64 36
+d5 = operandFromRegNo FF64 37
+d6 = operandFromRegNo FF64 38
+d7 = operandFromRegNo FF64 d7RegNo
 
-d8 = operandFromRegNo 40
+d8 = operandFromRegNo FF64 40
 
-d9 = operandFromRegNo 41
+d9 = operandFromRegNo FF64 41
 
-d10 = operandFromRegNo d10RegNo
+d10 = operandFromRegNo FF64 d10RegNo
 
-d11 = operandFromRegNo 43
+d11 = operandFromRegNo FF64 43
 
-d12 = operandFromRegNo 44
+d12 = operandFromRegNo FF64 44
 
-d13 = operandFromRegNo 45
+d13 = operandFromRegNo FF64 45
 
-d14 = operandFromRegNo 46
+d14 = operandFromRegNo FF64 46
 
-d15 = operandFromRegNo 47
+d15 = operandFromRegNo FF64 47
 
-d16 = operandFromRegNo 48
+d16 = operandFromRegNo FF64 48
 
-d17 = operandFromRegNo d17RegNo
+d17 = operandFromRegNo FF64 d17RegNo
 
-d18 = operandFromRegNo 50
+d18 = operandFromRegNo FF64 50
 
-d19 = operandFromRegNo 51
+d19 = operandFromRegNo FF64 51
 
-d20 = operandFromRegNo 52
+d20 = operandFromRegNo FF64 52
 
-d21 = operandFromRegNo 53
+d21 = operandFromRegNo FF64 53
 
-d22 = operandFromRegNo 54
+d22 = operandFromRegNo FF64 54
 
-d23 = operandFromRegNo 55
+d23 = operandFromRegNo FF64 55
 
-d24 = operandFromRegNo 56
+d24 = operandFromRegNo FF64 56
 
-d25 = operandFromRegNo 57
+d25 = operandFromRegNo FF64 57
 
-d26 = operandFromRegNo 58
+d26 = operandFromRegNo FF64 58
 
-d27 = operandFromRegNo 59
+d27 = operandFromRegNo FF64 59
 
-d28 = operandFromRegNo 60
+d28 = operandFromRegNo FF64 60
 
-d29 = operandFromRegNo 61
+d29 = operandFromRegNo FF64 61
 
-d30 = operandFromRegNo 62
+d30 = operandFromRegNo FF64 62
 
-d31 = operandFromRegNo d31RegNo
+d31 = operandFromRegNo FF64 d31RegNo
 
 fitsIn12bitImm :: (Num a, Ord a) => a -> Bool
 fitsIn12bitImm off = off >= intMin12bit && off <= intMax12bit
@@ -957,7 +929,7 @@ isEncodeableInWidth :: Width -> Integer -> Bool
 isEncodeableInWidth = isNbitEncodeable . widthInBits
 
 isIntRegOp :: Operand -> Bool
-isIntRegOp (OpReg _ reg) | isIntReg reg = True
+isIntRegOp (OpReg fmt reg) | isIntReg reg = assertFmtReg fmt reg $ True
 isIntRegOp _ = False
 
 isIntImmOp :: Operand -> Bool
@@ -969,7 +941,7 @@ isIntOp :: Operand -> Bool
 isIntOp op = isIntRegOp op || isIntImmOp op
 
 isFloatRegOp :: Operand -> Bool
-isFloatRegOp (OpReg _ reg) | isFloatReg reg = True
+isFloatRegOp (OpReg fmt reg) | isFloatReg reg = assertFmtReg fmt reg $ True
 isFloatRegOp _ = False
 
 isFloatImmOp :: Operand -> Bool
@@ -980,8 +952,21 @@ isFloatImmOp _ = False
 isFloatOp :: Operand -> Bool
 isFloatOp op = isFloatRegOp op || isFloatImmOp op
 
+-- TODO: Hide OpReg (Operand) constructor and use this guard to ensure only sane fmt/reg combinations can be used
+assertFmtReg :: HasCallStack => Format -> Reg -> a -> a
+assertFmtReg fmt reg| fmtRegCombinationIsSane fmt reg = id  
+assertFmtReg fmt reg = pprPanic 
+                              "Format does not fit to register."
+                              (text "fmt" <> colon <+> ppr fmt <+> text "reg" <> colon <+> ppr reg)
+
+fmtRegCombinationIsSane :: Format -> Reg -> Bool
+fmtRegCombinationIsSane fmt reg = 
+                           (isFloatFormat fmt && isFloatReg reg) ||
+                           (isIntFormat fmt && isIntReg reg) ||
+                           (isVecFormat fmt && isVectorReg reg)
+
 isVectorRegOp :: Operand -> Bool
-isVectorRegOp (OpReg _ reg) | isVectorReg reg = True
+isVectorRegOp (OpReg fmt reg) | isVectorReg reg = assertFmtReg fmt reg $ True
 isVectorRegOp _ = False
 
 isFloatReg :: Reg -> Bool


=====================================
compiler/GHC/CmmToAsm/RV64/Ppr.hs
=====================================
@@ -320,20 +320,20 @@ pprOp :: (IsLine doc) => Platform -> Operand -> doc
 pprOp plat op = case op of
   OpReg w r -> pprReg w r
   OpImm im -> pprOpImm plat im
-  OpAddr (AddrRegImm r1 im) -> pprOpImm plat im <> char '(' <> pprReg W64 r1 <> char ')'
-  OpAddr (AddrReg r1) -> text "0(" <+> pprReg W64 r1 <+> char ')'
+  OpAddr (AddrRegImm r1 im) -> pprOpImm plat im <> char '(' <> pprReg II64 r1 <> char ')'
+  OpAddr (AddrReg r1) -> text "0(" <+> pprReg II64 r1 <+> char ')'
 
 -- | Pretty print register with calling convention name
 --
 -- This representation makes it easier to reason about the emitted assembly
 -- code.
-pprReg :: forall doc. (IsLine doc) => Width -> Reg -> doc
-pprReg w r = case r of
+pprReg :: forall doc. (IsLine doc) => Format -> Reg -> doc
+pprReg fmt r = assertFmtReg fmt r $ case r of
   RegReal (RealRegSingle i) -> ppr_reg_no i
   -- virtual regs should not show up, but this is helpful for debugging.
   RegVirtual (VirtualRegI u) -> text "%vI_" <> pprUniqueAlways u
   RegVirtual (VirtualRegD u) -> text "%vD_" <> pprUniqueAlways u
-  _ -> pprPanic "RiscV64.pprReg" (text (show r) <+> ppr w)
+  _ -> pprPanic "RiscV64.pprReg" (text (show r) <+> ppr fmt)
   where
     ppr_reg_no :: Int -> doc
     -- General Purpose Registers
@@ -435,19 +435,19 @@ pprReg w r = case r of
     ppr_reg_no 94 = text "v30"
     ppr_reg_no 95 = text "v31"
     ppr_reg_no i
-      | i < 0 = pprPanic "Unexpected register number (min is 0)" (ppr w <+> int i)
-      | i > 95 = pprPanic "Unexpected register number (max is 95)" (ppr w <+> int i)
+      | i < 0 = pprPanic "Unexpected register number (min is 0)" (ppr fmt <+> int i)
+      | i > 95 = pprPanic "Unexpected register number (max is 95)" (ppr fmt <+> int i)
       -- no support for widths > W64.
-      | otherwise = pprPanic "Unsupported width in register (max is 95)" (ppr w <+> int i)
+      | otherwise = pprPanic "Unsupported width in register (max is 95)" (ppr fmt <+> int i)
 
 -- | Single precission `Operand` (floating-point)
 isSingleOp :: Operand -> Bool
-isSingleOp (OpReg W32 _) = True
+isSingleOp (OpReg FF32 _) = True
 isSingleOp _ = False
 
 -- | Double precission `Operand` (floating-point)
 isDoubleOp :: Operand -> Bool
-isDoubleOp (OpReg W64 _) = True
+isDoubleOp (OpReg FF64 _) = True
 isDoubleOp _ = False
 
 -- | `Operand` is an immediate value
@@ -508,7 +508,7 @@ pprInstr platform instr = case instr of
   ADD o1 o2 o3
     | isFloatOp o1 && isFloatOp o2 && isFloatOp o3 -> op3 (text "\tfadd." <> if isSingleOp o1 then text "s" else text "d") o1 o2 o3
     -- This case is used for sign extension: SEXT.W op
-    | OpReg W64 _ <- o1, OpReg W32 _ <- o2, isImmOp o3 -> op3 (text "\taddiw") o1 o2 o3
+    | OpReg II64 _ <- o1, OpReg II32  _ <- o2, isImmOp o3 -> op3 (text "\taddiw") o1 o2 o3
     | otherwise -> op3 (text "\tadd") o1 o2 o3
   MUL o1 o2 o3
     | isFloatOp o1 && isFloatOp o2 && isFloatOp o3 -> op3 (text "\tfmul." <> if isSingleOp o1 then text "s" else text "d") o1 o2 o3
@@ -580,8 +580,8 @@ pprInstr platform instr = case instr of
   XORI o1 o2 o3 -> op3 (text "\txori") o1 o2 o3
   J_TBL _ _ r -> pprInstr platform (B (TReg r))
   B l | isLabel l -> line $ text "\tjal" <+> pprOp platform x0 <> comma <+> getLabel platform l
-  B (TReg r) -> line $ text "\tjalr" <+> pprOp platform x0 <> comma <+> pprReg W64 r <> comma <+> text "0"
-  BL r _ -> line $ text "\tjalr" <+> text "x1" <> comma <+> pprReg W64 r <> comma <+> text "0"
+  B (TReg r) -> line $ text "\tjalr" <+> pprOp platform x0 <> comma <+> pprReg II64 r <> comma <+> text "0"
+  BL r _ -> line $ text "\tjalr" <+> text "x1" <> comma <+> pprReg II64 r <> comma <+> text "0"
   BCOND c l r t
     | isLabel t ->
         line $ text "\t" <> pprBcond c <+> pprOp platform l <> comma <+> pprOp platform r <> comma <+> getLabel platform t
@@ -699,22 +699,22 @@ pprInstr platform instr = case instr of
   LDRU fmt@(VecFormat _ FmtDouble) o1 o2 -> configVec fmt $$ op2 (text "\tvle64.v") o1 o2
   LDRU f o1 o2 -> pprPanic "Unsupported unsigned load" ((text . show) f <+> pprOp platform o1 <+> pprOp platform o2)
   FENCE r w -> line $ text "\tfence" <+> pprFenceType r <> char ',' <+> pprFenceType w
-  FCVT FloatToFloat o1@(OpReg W32 _) o2@(OpReg W64 _) -> op2 (text "\tfcvt.s.d") o1 o2
-  FCVT FloatToFloat o1@(OpReg W64 _) o2@(OpReg W32 _) -> op2 (text "\tfcvt.d.s") o1 o2
+  FCVT FloatToFloat o1@(OpReg FF32 _) o2@(OpReg FF64 _) -> op2 (text "\tfcvt.s.d") o1 o2
+  FCVT FloatToFloat o1@(OpReg FF64 _) o2@(OpReg FF32 _) -> op2 (text "\tfcvt.d.s") o1 o2
   FCVT FloatToFloat o1 o2 ->
     pprPanic "RV64.pprInstr - impossible float to float conversion"
       $ line (pprOp platform o1 <> text "->" <> pprOp platform o2)
-  FCVT IntToFloat o1@(OpReg W32 _) o2@(OpReg W32 _) -> op2 (text "\tfcvt.s.w") o1 o2
-  FCVT IntToFloat o1@(OpReg W32 _) o2@(OpReg W64 _) -> op2 (text "\tfcvt.s.l") o1 o2
-  FCVT IntToFloat o1@(OpReg W64 _) o2@(OpReg W32 _) -> op2 (text "\tfcvt.d.w") o1 o2
-  FCVT IntToFloat o1@(OpReg W64 _) o2@(OpReg W64 _) -> op2 (text "\tfcvt.d.l") o1 o2
+  FCVT IntToFloat o1@(OpReg FF32 _) o2@(OpReg II32 _) -> op2 (text "\tfcvt.s.w") o1 o2
+  FCVT IntToFloat o1@(OpReg FF32 _) o2@(OpReg II64 _) -> op2 (text "\tfcvt.s.l") o1 o2
+  FCVT IntToFloat o1@(OpReg FF64 _) o2@(OpReg II32 _) -> op2 (text "\tfcvt.d.w") o1 o2
+  FCVT IntToFloat o1@(OpReg FF64 _) o2@(OpReg II64 _) -> op2 (text "\tfcvt.d.l") o1 o2
   FCVT IntToFloat o1 o2 ->
     pprPanic "RV64.pprInstr - impossible integer to float conversion"
       $ line (pprOp platform o1 <> text "->" <> pprOp platform o2)
-  FCVT FloatToInt o1@(OpReg W32 _) o2@(OpReg W32 _) -> op2 (text "\tfcvt.w.s") o1 o2
-  FCVT FloatToInt o1@(OpReg W32 _) o2@(OpReg W64 _) -> op2 (text "\tfcvt.w.d") o1 o2
-  FCVT FloatToInt o1@(OpReg W64 _) o2@(OpReg W32 _) -> op2 (text "\tfcvt.l.s") o1 o2
-  FCVT FloatToInt o1@(OpReg W64 _) o2@(OpReg W64 _) -> op2 (text "\tfcvt.l.d") o1 o2
+  FCVT FloatToInt o1@(OpReg II32 _) o2@(OpReg FF32 _) -> op2 (text "\tfcvt.w.s") o1 o2
+  FCVT FloatToInt o1@(OpReg II32 _) o2@(OpReg FF64 _) -> op2 (text "\tfcvt.w.d") o1 o2
+  FCVT FloatToInt o1@(OpReg II64 _) o2@(OpReg FF32 _) -> op2 (text "\tfcvt.l.s") o1 o2
+  FCVT FloatToInt o1@(OpReg II64 _) o2@(OpReg FF64 _) -> op2 (text "\tfcvt.l.d") o1 o2
   FCVT FloatToInt o1 o2 ->
     pprPanic "RV64.pprInstr - impossible float to integer conversion"
       $ line (pprOp platform o1 <> text "->" <> pprOp platform o2)
@@ -733,7 +733,7 @@ pprInstr platform instr = case instr of
           FNMAdd -> text "\tfnmadd" <> dot <> floatPrecission d
           FNMSub -> text "\tfnmsub" <> dot <> floatPrecission d
      in op4 fma d r1 r2 r3
-  VFMA variant fmt o1 o2 o3 | VecFormat l fmt' <- fmt ->
+  VFMA variant o1@(OpReg fmt _reg) o2 o3 | VecFormat l fmt' <- fmt ->
     let formatString = if (isFloatFormat . scalarFormatFormat) fmt' then text "f" else text ""
         prefix = text "v" <> formatString
         suffix = text "vv"
@@ -743,26 +743,31 @@ pprInstr platform instr = case instr of
             FNMAdd -> text "nmadd" -- TODO: Works only for floats!
             FNMSub -> text "nmsub"
      in op3 (tab <> prefix <> fma <> dot <> suffix) o1 o2 o3
-  VMV fmt o1 o2 | isFloatOp o1 && isVectorRegOp o2 -> configVec fmt $$ op2 (text "\tvfmv" <> dot <> text "f" <> dot <> text "s") o1 o2
+  VFMA _variant o1 _o2 _o3 -> pprPanic "RV64.pprInstr - VFMA can only target registers." (pprOp platform o1)
+  VMV o1@(OpReg fmt _reg) o2 | isFloatOp o1 && isVectorRegOp o2 -> configVec fmt $$ op2 (text "\tvfmv" <> dot <> text "f" <> dot <> text "s") o1 o2
                 | isFloatOp o2 -> configVec fmt $$ op2 (text "\tvfmv" <> dot <> opToVInstrSuffix o1 <> dot <> text "f") o1 o2
                 | isIntRegOp o1 && isVectorRegOp o2 -> configVec fmt $$ op2 (text "\tvmv" <> dot <> text "x" <> dot <> text "s") o1 o2
                 | isIntRegOp o2 -> configVec fmt $$ op2 (text "\tvmv" <> dot <> opToVInstrSuffix o1 <> dot <> text "x") o1 o2
                 | isVectorRegOp o1 && isVectorRegOp o2 -> configVec fmt $$ op2 (text "\tvmv" <> dot <> opToVInstrSuffix o1 <> dot <> text "v") o1 o2
                 | True -> pprPanic "RV64.pprInstr - impossible vector move (VMV)" (pprOp platform o1 <+> pprOp platform o2 <+> text "fmt" <> colon <> (text . show) fmt)
-  -- TODO: Remove o2 from constructor
-  VID fmt o1 -> configVec fmt $$ op1 (text "\tvid.v") o1
+  VMV o1 _o2 -> pprPanic "RV64.pprInstr - VMV can only target registers." (pprOp platform o1)
+  VID op@(OpReg fmt _reg) -> configVec fmt $$ op1 (text "\tvid.v") op
+  VID op -> pprPanic "RV64.pprInstr - VID can only target registers." (pprOp platform op)
   -- TODO: This expects int register as third operand: Generalize by calculating
   -- the instruction suffix (".vx")
-  VMSEQ fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvmseq.vx") o1 o2 o3
+  VMSEQ o1@(OpReg fmt _reg) o2 o3 -> configVec fmt $$ op3 (text "\tvmseq.vx") o1 o2 o3
+  VMSEQ o1 _o2 _o3 -> pprPanic "RV64.pprInstr - VMSEQ can only target registers." (pprOp platform o1)
   -- TODO: All operands need to be vector registers. Make this more general or
   -- validate this constraint.
-  VMERGE fmt o1 o2 o3 o4 -> configVec fmt $$ op4 (text "\tvmerge.vvm") o1 o2 o3 o4
-  VSLIDEDOWN fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvslidedown.vx") o1 o2 o3
-  -- TODO: Use configVec, adjust VSETIVLI to contain only format?
-  VSETIVLI dst len width grouping ta ma ->
+  VMERGE o1@(OpReg fmt _reg) o2 o3 o4 -> configVec fmt $$ op4 (text "\tvmerge.vvm") o1 o2 o3 o4
+  VMERGE o1 _o2 _o3 _o4 -> pprPanic "RV64.pprInstr - VMERGE can only target registers." (pprOp platform o1)
+  VSLIDEDOWN o1@(OpReg fmt _reg) o2 o3 -> configVec fmt $$ op3 (text "\tvslidedown.vx") o1 o2 o3
+  VSLIDEDOWN o1 _o2 _o3 -> pprPanic "RV64.pprInstr - VSLIDEDOWN can only target registers." (pprOp platform o1)
+  -- TODO: adjust VSETIVLI to contain only format?
+  VSETIVLI (OpReg fmt dst) len width grouping ta ma ->
     line
       $ text "\tvsetivli"
-      <+> pprReg W64 dst
+      <+> pprReg fmt dst
       <> comma
       <+> (text . show) len
       <> comma
@@ -773,17 +778,29 @@ pprInstr platform instr = case instr of
       <+> pprTA ta
       <> comma
       <+> pprMasking ma
-  VNEG fmt o1 o2 -> configVec fmt $$ op2 (text "\tvfneg.v") o1 o2
-  VADD fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvfadd.vv") o1 o2 o3
-  VSUB fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvfsub.vv") o1 o2 o3
-  VMUL fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvfmul.vv") o1 o2 o3
-  VQUOT fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvfdiv.vv") o1 o2 o3
-  VSMIN fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvmin.vv") o1 o2 o3
-  VSMAX fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvmax.vv") o1 o2 o3
-  VUMIN fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvminu.vv") o1 o2 o3
-  VUMAX fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvmaxu.vv") o1 o2 o3
-  VFMIN fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvfmin.vv") o1 o2 o3
-  VFMAX fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvfmax.vv") o1 o2 o3
+  VSETIVLI o1 _ _ _ _ _ -> pprPanic "RV64.pprInstr - VSETIVLI can only target registers." (pprOp platform o1)
+  VNEG o1@(OpReg fmt _reg) o2 -> configVec fmt $$ op2 (text "\tvfneg.v") o1 o2
+  VNEG o1 _o2 -> pprPanic "RV64.pprInstr - VNEG can only target registers." (pprOp platform o1)
+  VADD o1@(OpReg fmt _reg) o2 o3 -> configVec fmt $$ op3 (text "\tvfadd.vv") o1 o2 o3
+  VADD o1 _o2 _o3 -> pprPanic "RV64.pprInstr - VADD can only target registers." (pprOp platform o1)
+  VSUB o1@(OpReg fmt _reg) o2 o3 -> configVec fmt $$ op3 (text "\tvfsub.vv") o1 o2 o3
+  VSUB o1 _o2 _o3 -> pprPanic "RV64.pprInstr - VSUB can only target registers." (pprOp platform o1)
+  VMUL o1@(OpReg fmt _reg) o2 o3 -> configVec fmt $$ op3 (text "\tvfmul.vv") o1 o2 o3
+  VMUL o1 _o2 _o3 -> pprPanic "RV64.pprInstr - VMUL can only target registers." (pprOp platform o1)
+  VQUOT o1@(OpReg fmt _reg) o2 o3 -> configVec fmt $$ op3 (text "\tvfdiv.vv") o1 o2 o3
+  VQUOT o1 _o2 _o3 -> pprPanic "RV64.pprInstr - VQUOT can only target registers." (pprOp platform o1)
+  VSMIN o1@(OpReg fmt _reg) o2 o3 -> configVec fmt $$ op3 (text "\tvmin.vv") o1 o2 o3
+  VSMIN o1 _o2 _o3 -> pprPanic "RV64.pprInstr - VSMIN can only target registers." (pprOp platform o1)
+  VSMAX o1@(OpReg fmt _reg) o2 o3 -> configVec fmt $$ op3 (text "\tvmax.vv") o1 o2 o3
+  VSMAX o1 _o2 _o3 -> pprPanic "RV64.pprInstr - VSMAX can only target registers." (pprOp platform o1)
+  VUMIN o1@(OpReg fmt _reg) o2 o3 -> configVec fmt $$ op3 (text "\tvminu.vv") o1 o2 o3
+  VUMIN o1 _o2 _o3 -> pprPanic "RV64.pprInstr - VUMIN can only target registers." (pprOp platform o1)
+  VUMAX o1@(OpReg fmt _reg) o2 o3 -> configVec fmt $$ op3 (text "\tvmaxu.vv") o1 o2 o3
+  VUMAX o1 _o2 _o3 -> pprPanic "RV64.pprInstr - VUMAX can only target registers." (pprOp platform o1)
+  VFMIN o1@(OpReg fmt _reg) o2 o3 -> configVec fmt $$ op3 (text "\tvfmin.vv") o1 o2 o3
+  VFMIN o1 _o2 _o3 -> pprPanic "RV64.pprInstr - VFMIN can only target registers." (pprOp platform o1)
+  VFMAX o1@(OpReg fmt _reg) o2 o3 -> configVec fmt $$ op3 (text "\tvfmax.vv") o1 o2 o3
+  VFMAX o1 _o2 _o3 -> pprPanic "RV64.pprInstr - VFMAX can only target registers." (pprOp platform o1)
   instr -> panic $ "RV64.pprInstr - Unknown instruction: " ++ instrCon instr
   where
     op1 op o1 = line $ op <+> pprOp platform o1
@@ -825,15 +842,9 @@ pprInstr platform instr = case instr of
     opToVInstrSuffix op | isVectorRegOp op = text "v"
     opToVInstrSuffix op = pprPanic "Unsupported operand for vector instruction" (pprOp platform op)
 
-    -- TODO: unused?
-    floatWidthSuffix :: (IsLine doc) => Width -> doc
-    floatWidthSuffix W32 = text "s"
-    floatWidthSuffix W64 = text "d"
-    floatWidthSuffix w = pprPanic "Unsupported floating point vector operation width" (ppr w)
-
     configVec :: (IsDoc doc) => Format -> doc
     configVec (VecFormat length fmt) =
-      pprInstr platform (VSETIVLI zeroReg (fromIntegral length) ((formatToWidth . scalarFormatFormat) fmt) M1 TA MA)
+      pprInstr platform (VSETIVLI (OpReg II64 zeroReg) (fromIntegral length) ((formatToWidth . scalarFormatFormat) fmt) M1 TA MA)
     configVec fmt = pprPanic "Unsupported vector configuration" ((text . show) fmt)
 
 floatOpPrecision :: Platform -> Operand -> Operand -> String


=====================================
compiler/GHC/CmmToAsm/RV64/Regs.hs
=====================================
@@ -72,7 +72,7 @@ fa7RegNo, d17RegNo :: RegNo
 d17RegNo = 49
 fa7RegNo = d17RegNo
 
-v0RegNo ::RegNo
+v0RegNo :: RegNo
 v0RegNo = 64
 
 v8RegNo :: RegNo
@@ -123,7 +123,7 @@ tmpReg = regSingle tmpRegNo
 v0Reg :: Reg
 v0Reg = regSingle v0RegNo
 
--- | All machine register numbers.
+-- | All machine register numbers. Including potential vector registers.
 allMachRegNos :: [RegNo]
 allMachRegNos = intRegs ++ fpRegs ++ vRegs
   where
@@ -137,6 +137,11 @@ allMachRegNos = intRegs ++ fpRegs ++ vRegs
 -- These are all registers minus those with a fixed role in RISCV ABI (zero, lr,
 -- sp, gp, tp, fp, tmp) and GHC RTS (Base, Sp, Hp, HpLim, R1..R8, F1..F6,
 -- D1..D6.)
+--
+-- We pretend that vector registers are always available. If they aren't, we
+-- simply don't emit instructions using them. This is much simpler than fixing
+-- the register allocators which expect a configuration per platform (which we
+-- can only set when GHC itself gets build.)
 allocatableRegs :: Platform -> [RealReg]
 allocatableRegs platform =
   let isFree = freeReg platform



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/91169343d138c1afd81ee9aaa04711b9659d731b...715a80b12d5479512c9bd0130c8070c28b65f1cb

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/91169343d138c1afd81ee9aaa04711b9659d731b...715a80b12d5479512c9bd0130c8070c28b65f1cb
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20250125/c688f745/attachment-0001.html>


More information about the ghc-commits mailing list