[Git][ghc/ghc][wip/supersven/riscv-vectors] 4 commits: simd001 green

Sun Nov 10 15:38:59 UTC 2024

Sven Tennie pushed to branch wip/supersven/riscv-vectors at Glasgow Haskell Compiler / GHC


Commits:
558ed8e4 by Sven Tennie at 2024-11-02T10:46:28+01:00
simd001 green

- - - - -
03f2767a by Sven Tennie at 2024-11-02T12:22:36+01:00
simd003 green

- - - - -
a9b9a340 by Sven Tennie at 2024-11-02T13:19:05+01:00
simd006 green

- - - - -
0682aed3 by Sven Tennie at 2024-11-10T16:37:14+01:00
simd007 green

- - - - -


5 changed files:

- compiler/GHC/CmmToAsm/RV64/CodeGen.hs
- compiler/GHC/CmmToAsm/RV64/Instr.hs
- compiler/GHC/CmmToAsm/RV64/Ppr.hs
- compiler/GHC/CmmToAsm/RV64/Regs.hs
- testsuite/tests/simd/should_run/all.T


Changes:

=====================================
compiler/GHC/CmmToAsm/RV64/CodeGen.hs
=====================================
@@ -364,15 +364,11 @@ stmtToInstrs stmt = do
       genCCall target result_regs args
     CmmComment s -> pure (unitOL (COMMENT (ftext s)))
     CmmTick {} -> pure nilOL
-    CmmAssign reg src
-      | isFloatType ty -> assignReg_FltCode format reg src
-      | otherwise -> assignReg_IntCode format reg src
+    CmmAssign reg src -> assignReg format reg src
       where
         ty = cmmRegType reg
         format = cmmTypeFormat ty
-    CmmStore addr src _alignment
-      | isFloatType ty -> assignMem_FltCode format addr src
-      | otherwise -> assignMem_IntCode format addr src
+    CmmStore addr src _alignment -> assignMem format addr src
       where
         ty = cmmExprType platform src
         format = cmmTypeFormat ty
@@ -662,9 +658,21 @@ getRegister' config plat expr =
       let format = cmmTypeFormat rep
           width = typeWidth rep
       Amode addr addr_code <- getAmode plat width mem
-      case width of
-        w
-          | (w <= W64) || isVecFormat format ->
+      case (width, format) of
+        (_w, f)
+          | VecFormat l vf <- f ->
+              pure
+                ( Any
+                    format
+                    ( \dst ->
+                        unitOL (COMMENT (text "XXX here")) `appOL`
+                        addr_code `snocOL`
+                          annExpr expr
+                            (LDRU format (OpReg width dst) (OpAddr addr))
+                    )
+                )
+        (w, _f)
+          | w <= W64 ->
               -- Load without sign-extension. See Note [Signed arithmetic on RISCV64]
               pure
                 ( Any
@@ -828,20 +836,29 @@ getRegister' config plat expr =
         --TODO: MO_V_Broadcast with immediate: If the right value is a literal,
         -- it may use vmv.v.i (simpler)
         MO_V_Broadcast length w -> do
-          (reg_idx, format_idx, code_idx) <- getSomeReg e
-          let w_idx = formatToWidth format_idx
+          (reg_val, format_val, code_val) <- getSomeReg e
+          let w_val = formatToWidth format_val
           pure $ Any (intFormat w) $ \dst ->
-            code_idx `snocOL`
-            annExpr expr (VSETIVLI zeroReg (fromIntegral length) w M1 TA MA) `snocOL`
-            VMV (OpReg w dst) (OpReg w_idx reg_idx)
+            code_val `snocOL`
+            annExpr expr
+              (VMV (VecFormat length (intScalarFormat w)) (OpReg w dst) (OpReg w_val reg_val))
 
         MO_VF_Broadcast length w -> do
-          (reg_idx, format_idx, code_idx) <- getSomeReg e
-          let w_idx = formatToWidth format_idx
+          (reg_val, format_val, code_val) <- getSomeReg e
+          let w_val = formatToWidth format_val
           pure $ Any (vecFormat (cmmVec length (cmmFloat w))) $ \dst ->
-            code_idx `snocOL`
-            annExpr expr (VSETIVLI zeroReg (fromIntegral length) w M1 TA MA) `snocOL`
-            VMV (OpReg w dst) (OpReg w_idx reg_idx)
+            code_val `snocOL`
+            annExpr expr
+              (VMV (VecFormat length (floatScalarFormat w)) (OpReg w dst) (OpReg w_val reg_val))
+
+        -- TODO: NO MO_V_Neg? Why?
+        MO_VF_Neg length w -> do
+          (reg_v, format_v, code_v) <- getSomeReg e
+          let w_v = formatToWidth format_v
+          pure $ Any (vecFormat (cmmVec length (cmmFloat w))) $ \dst ->
+            code_v `snocOL`
+            annExpr expr
+              (VNEG (VecFormat length (floatScalarFormat w))(OpReg w dst) (OpReg w_v reg_v))
 
         x -> pprPanic ("getRegister' (monadic CmmMachOp): " ++ show x) (pdoc plat expr)
       where
@@ -1123,6 +1140,17 @@ getRegister' config plat expr =
                       `appOL` op (OpReg w dst) (OpReg w reg_fx) (OpReg w reg_fy)
                 )
 
+          vecOp length w op =  do
+            (reg_x, format_x, code_x) <- getSomeReg x
+            (reg_y, format_y, code_y) <- getSomeReg y
+            massertPpr (isVecFormat format_x && isVecFormat format_y) $
+              text "vecOp: non-vector operand. operands: " <+> ppr format_x <+> ppr format_y
+            pure $ Any (vecFormat (cmmVec length (cmmFloat w))) $ \dst ->
+              code_x `appOL`
+              code_y `snocOL`
+              annExpr expr
+                (op (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y))
+
       case op of
         -- Integer operations
         -- Add/Sub should only be Integer Options.
@@ -1174,6 +1202,7 @@ getRegister' config plat expr =
         MO_U_Shr w -> intOp False w (\d x y -> unitOL $ annExpr expr (SRL d x y))
         MO_S_Shr w -> intOp True w (\d x y -> unitOL $ annExpr expr (SRA d x y))
 
+        -- TODO: Use vecOp here
         MO_VF_Extract length w -> do
           (reg_v, format_v, code_v) <- getSomeReg x
           (reg_idx, format_idx, code_idx) <- getSomeReg y
@@ -1184,15 +1213,27 @@ getRegister' config plat expr =
             code_v `appOL`
             code_idx `snocOL`
             -- Setup
-            -- vsetivli zero, 1, e32, m1, ta, ma
             -- TODO: Use width
-            annExpr expr (VSETIVLI zeroReg (fromIntegral length) w M1 TA MA) `snocOL`
+            annExpr expr
             -- Move selected element to index 0
             -- vslidedown.vi v8, v9, 2
-            VSLIDEDOWN (OpReg width_v tmp) (OpReg width_v reg_v) (OpReg (formatToWidth format_idx) reg_idx) `snocOL`
+              (VSLIDEDOWN (VecFormat length (floatScalarFormat w)) (OpReg width_v tmp) (OpReg width_v reg_v) (OpReg (formatToWidth format_idx) reg_idx)) `snocOL`
             -- Move to float register
             -- vmv.x.s a0, v8
-            VMV (OpReg w dst) (OpReg (formatToWidth tmpFormat) tmp)
+            VMV (VecFormat length (floatScalarFormat w))(OpReg w dst) (OpReg (formatToWidth tmpFormat) tmp)
+        MO_VF_Add  length w -> vecOp length w (\d x y -> (VADD (VecFormat length (floatScalarFormat w)) d x y))
+        MO_VF_Sub  length w -> vecOp length w (\d x y -> (VSUB (VecFormat length (floatScalarFormat w)) d x y))
+        MO_VF_Mul  length w -> vecOp length w (\d x y -> (VMUL (VecFormat length (floatScalarFormat w)) d x y))
+        MO_VF_Quot length w -> vecOp length w (\d x y -> (VQUOT (VecFormat length (floatScalarFormat w)) d x y))
+
+        -- See https://godbolt.org/z/PvcWKMKoW
+        MO_VS_Min length w -> vecOp length w (\d x y -> (VSMIN (VecFormat length (intScalarFormat w)) d x y))
+        MO_VS_Max length w -> vecOp length w (\d x y -> (VSMAX (VecFormat length (intScalarFormat w)) d x y))
+        MO_VU_Min length w -> vecOp length w (\d x y -> (VUMIN (VecFormat length (intScalarFormat w)) d x y))
+        MO_VU_Max length w -> vecOp length w (\d x y -> (VUMAX (VecFormat length (intScalarFormat w)) d x y))
+        MO_VF_Min length w -> vecOp length w (\d x y -> (VFMIN (VecFormat length (floatScalarFormat w)) d x y))
+        MO_VF_Max length w -> vecOp length w (\d x y -> (VFMAX (VecFormat length (floatScalarFormat w)) d x y))
+
 
         _e -> panic $ "Missing operation " ++ show expr
 
@@ -1264,16 +1305,16 @@ getRegister' config plat expr =
               code_f `appOL`
               code_idx `appOL`
               code_l `snocOL`
-              annExpr expr (VSETIVLI zeroReg (fromIntegral length) w M1 TA MA) `snocOL`
+              annExpr expr
               -- Build mask for index
               -- 1. fill elements with index numbers
               -- TODO: The Width is made up
-              VID (OpReg W8 v0Reg) (OpReg (formatToWidth format_l) reg_l) `snocOL`
+               (VID (VecFormat length (intScalarFormat w)) (OpReg W8 v0Reg) (OpReg (formatToWidth format_l) reg_l)) `snocOL`
               -- 2. Splat value into tmp vector
-              VMV (OpReg w tmp) (OpReg (formatToWidth format_f) reg_f) `snocOL`
+              VMV (VecFormat length (floatScalarFormat w)) (OpReg w tmp) (OpReg (formatToWidth format_f) reg_f) `snocOL`
               -- 3. Merge with mask -> set element at index
-              VMSEQ (OpReg W8 v0Reg) (OpReg W8 v0Reg) (OpReg (formatToWidth format_idx) reg_idx) `snocOL`
-              VMERGE (OpReg w dst) (OpReg (formatToWidth format_v) reg_v)  (OpReg w tmp) (OpReg W8 v0Reg)
+              VMSEQ (VecFormat length (floatScalarFormat w)) (OpReg W8 v0Reg) (OpReg W8 v0Reg) (OpReg (formatToWidth format_idx) reg_idx) `snocOL`
+              VMERGE (VecFormat length (floatScalarFormat w)) (OpReg w dst) (OpReg (formatToWidth format_v) reg_v)  (OpReg w tmp) (OpReg W8 v0Reg)
 
         _ ->
           pprPanic "getRegister' (unhandled ternary CmmMachOp): "
@@ -1532,18 +1573,18 @@ getAmode _platform _ (CmmMachOp (MO_Add _w) [expr, CmmLit (CmmInt off _w')])
   | fitsIn12bitImm off =
       do
         (reg, _format, code) <- getSomeReg expr
-        return $ Amode (AddrRegImm reg (ImmInteger off)) code
+        return $ Amode (AddrRegImm reg (ImmInteger off)) $ COMMENT (text "getAmode generic" <+> (text . show)  expr) `consOL` code
 getAmode _platform _ (CmmMachOp (MO_Sub _w) [expr, CmmLit (CmmInt off _w')])
   | fitsIn12bitImm (-off) =
       do
         (reg, _format, code) <- getSomeReg expr
-        return $ Amode (AddrRegImm reg (ImmInteger (-off))) code
+        return $ Amode (AddrRegImm reg (ImmInteger (-off))) $ COMMENT (text "getAmode generic" <+> (text . show)  expr) `consOL` code
 
 -- Generic case
 getAmode _platform _ expr =
   do
     (reg, _format, code) <- getSomeReg expr
-    return $ Amode (AddrReg reg) code
+    return $ Amode (AddrReg reg) $ COMMENT (text "getAmode generic" <+> (text . show)  expr) `consOL` code
 
 -- -----------------------------------------------------------------------------
 -- Generating assignments
@@ -1557,11 +1598,8 @@ getAmode _platform _ expr =
 -- fails when the right hand side is forced into a fixed register
 -- (e.g. the result of a call).
 
-assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
-assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
-assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
-assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
-assignMem_IntCode rep addrE srcE =
+assignMem :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignMem rep addrE srcE =
   do
     (src_reg, _format, code) <- getSomeReg srcE
     platform <- getPlatform
@@ -1573,7 +1611,8 @@ assignMem_IntCode rep addrE srcE =
                    `snocOL` STR rep (OpReg w src_reg) (OpAddr addr)
                )
 
-assignReg_IntCode _ reg src =
+assignReg :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
+assignReg _ reg src =
   do
     platform <- getPlatform
     let dst = getRegisterReg platform reg
@@ -1588,12 +1627,6 @@ assignReg_IntCode _ reg src =
                        `snocOL` MOV (OpReg (formatToWidth format) dst) (OpReg (formatToWidth format) freg)
                    )

--- Let's treat Floating point stuff
--- as integer code for now. Opaque.
-assignMem_FltCode = assignMem_IntCode
-
-assignReg_FltCode = assignReg_IntCode
-
 -- -----------------------------------------------------------------------------
 -- Jumps
 -- AArch64 has 26bits for targets, whereas RiscV only has 20.
@@ -2336,19 +2369,30 @@ makeFarBranches {- only used when debugging -} _platform statics basic_blocks =
       CSET {} -> 2
       STR {} -> 1
       LDR {} -> 3
-      LDRU {} -> 1
+      LDRU {} -> 2
       FENCE {} -> 1
       FCVT {} -> 1
       FABS {} -> 1
       FMIN {} -> 1
       FMAX {} -> 1
       FMA {} -> 1
-      VMV {} -> 1
-      VID {} -> 1
-      VMSEQ {} -> 1
-      VMERGE {} -> 1
-      VSLIDEDOWN {} -> 1
+      VMV {} -> 2
+      VID {} -> 2
+      VMSEQ {} -> 2
+      VMERGE {} -> 2
+      VSLIDEDOWN {} -> 2
       VSETIVLI {} -> 1
+      VNEG {} -> 2
+      VADD {} -> 2
+      VSUB {} -> 2
+      VMUL {} -> 2
+      VQUOT {} -> 2
+      VSMIN {} -> 2
+      VSMAX {} -> 2
+      VUMIN {} -> 2
+      VUMAX {} -> 2
+      VFMIN {} -> 2
+      VFMAX {} -> 2
       -- estimate the subsituted size for jumps to lables
       -- jumps to registers have size 1
       BCOND {} -> long_bc_jump_size


=====================================
compiler/GHC/CmmToAsm/RV64/Instr.hs
=====================================
@@ -109,12 +109,24 @@ regUsageOfInstr platform instr = case instr of
   FABS dst src -> usage (regOp src, regOp dst)
   FMIN dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
   FMAX dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
-  VMV dst src1 -> usage (regOp src1, regOp dst)
-  VID dst src1 -> usage (regOp src1, regOp dst)
-  VMSEQ dst src op -> usage (regOp src ++ regOp op, regOp dst)
-  VMERGE dst op1 op2 opm -> usage (regOp op1 ++ regOp op2 ++ regOp opm, regOp dst)
-  VSLIDEDOWN dst op1 op2 -> usage (regOp op1 ++ regOp op2, regOp dst)
+  VMV fmt dst src1 -> usage (regOp src1, regOp dst)
+  VID fmt dst src1 -> usage (regOp src1, regOp dst)
+  VMSEQ fmt dst src op -> usage (regOp src ++ regOp op, regOp dst)
+  VMERGE fmt dst op1 op2 opm -> usage (regOp op1 ++ regOp op2 ++ regOp opm, regOp dst)
+  VSLIDEDOWN fmt dst op1 op2 -> usage (regOp op1 ++ regOp op2, regOp dst)
+  -- WARNING: VSETIVLI is a special case. It changes the interpretation of all vector registers!
   VSETIVLI dst _ _ _ _ _ -> usage ([], [dst])
+  VNEG fmt dst src1 -> usage (regOp src1, regOp dst)
+  VADD fmt dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+  VSUB fmt dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+  VMUL fmt dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+  VQUOT fmt dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+  VSMIN fmt dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+  VSMAX fmt dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+  VUMIN fmt dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+  VUMAX fmt dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+  VFMIN fmt dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+  VFMAX fmt dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
   FMA _ dst src1 src2 src3 ->
     usage (regOp src1 ++ regOp src2 ++ regOp src3, regOp dst)
   _ -> panic $ "regUsageOfInstr: " ++ instrCon instr
@@ -216,12 +228,23 @@ patchRegsOfInstr instr env = case instr of
   FABS o1 o2 -> FABS (patchOp o1) (patchOp o2)
   FMIN o1 o2 o3 -> FMIN (patchOp o1) (patchOp o2) (patchOp o3)
   FMAX o1 o2 o3 -> FMAX (patchOp o1) (patchOp o2) (patchOp o3)
-  VMV o1 o2 -> VMV (patchOp o1) (patchOp o2)
-  VID o1 o2 -> VID (patchOp o1) (patchOp o2)
-  VMSEQ o1 o2 o3 -> VMSEQ (patchOp o1) (patchOp o2) (patchOp o3)
-  VMERGE o1 o2 o3 o4 -> VMERGE (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4)
-  VSLIDEDOWN o1 o2 o3 -> VSLIDEDOWN (patchOp o1) (patchOp o2) (patchOp o3)
+  VMV fmt o1 o2 -> VMV fmt (patchOp o1) (patchOp o2)
+  VID fmt o1 o2 -> VID fmt (patchOp o1) (patchOp o2)
+  VMSEQ fmt o1 o2 o3 -> VMSEQ fmt (patchOp o1) (patchOp o2) (patchOp o3)
+  VMERGE fmt o1 o2 o3 o4 -> VMERGE fmt (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4)
+  VSLIDEDOWN fmt o1 o2 o3 -> VSLIDEDOWN fmt (patchOp o1) (patchOp o2) (patchOp o3)
   VSETIVLI o1 o2 o3 o4 o5 o6 -> VSETIVLI (env o1) o2 o3 o4 o5 o6
+  VNEG fmt o1 o2 -> VNEG fmt (patchOp o1) (patchOp o2)
+  VADD fmt o1 o2 o3 -> VADD fmt (patchOp o1) (patchOp o2) (patchOp o3)
+  VSUB fmt o1 o2 o3 -> VSUB fmt (patchOp o1) (patchOp o2) (patchOp o3)
+  VMUL fmt o1 o2 o3 -> VMUL fmt (patchOp o1) (patchOp o2) (patchOp o3)
+  VQUOT fmt o1 o2 o3 -> VQUOT fmt (patchOp o1) (patchOp o2) (patchOp o3)
+  VSMIN fmt o1 o2 o3 -> VSMIN fmt (patchOp o1) (patchOp o2) (patchOp o3)
+  VSMAX fmt o1 o2 o3 -> VSMAX fmt (patchOp o1) (patchOp o2) (patchOp o3)
+  VUMIN fmt o1 o2 o3 -> VUMIN fmt (patchOp o1) (patchOp o2) (patchOp o3)
+  VUMAX fmt o1 o2 o3 -> VUMAX fmt (patchOp o1) (patchOp o2) (patchOp o3)
+  VFMIN fmt o1 o2 o3 -> VFMIN fmt (patchOp o1) (patchOp o2) (patchOp o3)
+  VFMAX fmt o1 o2 o3 -> VFMAX fmt (patchOp o1) (patchOp o2) (patchOp o3)
   FMA s o1 o2 o3 o4 ->
     FMA s (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4)
   _ -> panic $ "patchRegsOfInstr: " ++ instrCon instr
@@ -319,21 +342,23 @@ mkSpillInstr ::
   -- | spill slot to use
   Int ->
   [Instr]
-mkSpillInstr _config (RegWithFormat reg _fmt) delta slot =
+mkSpillInstr _config (RegWithFormat reg fmt) delta slot =
   case off - delta of
-    imm | fitsIn12bitImm imm -> [mkStrSpImm imm]
+    imm | fitsIn12bitImm imm && not (isVecFormat fmt) -> [mkStrSpImm imm]
     imm ->
       [ movImmToTmp imm,
         addSpToTmp,
         mkStrTmp
       ]
   where
-    fmt = case reg of
-      RegReal (RealRegSingle n) | n < d0RegNo -> II64
-      _ -> FF64
+    fmt'
+      | isVecFormat fmt
+      = fmt
+      | otherwise
+      = scalarMoveFormat fmt
     mkStrSpImm imm =
       ANN (text "Spill@" <> int (off - delta))
-        $ STR fmt (OpReg W64 reg) (OpAddr (AddrRegImm spMachReg (ImmInt imm)))
+        $ STR fmt' (OpReg W64 reg) (OpAddr (AddrRegImm spMachReg (ImmInt imm)))
     movImmToTmp imm =
       ANN (text "Spill: TMP <- " <> int imm)
         $ MOV tmp (OpImm (ImmInt imm))
@@ -342,7 +367,7 @@ mkSpillInstr _config (RegWithFormat reg _fmt) delta slot =
         $ ADD tmp tmp sp
     mkStrTmp =
       ANN (text "Spill@" <> int (off - delta))
-        $ STR fmt (OpReg W64 reg) (OpAddr (AddrReg tmpReg))
+        $ STR fmt' (OpReg W64 reg) (OpAddr (AddrReg tmpReg))
 
     off = spillSlotToOffset slot
 
@@ -356,21 +381,23 @@ mkLoadInstr ::
   -- | spill slot to use
   Int ->
   [Instr]
-mkLoadInstr _config (RegWithFormat reg _fmt) delta slot =
+mkLoadInstr _config (RegWithFormat reg fmt) delta slot =
   case off - delta of
-    imm | fitsIn12bitImm imm -> [mkLdrSpImm imm]
+    imm | fitsIn12bitImm imm && not (isVecFormat fmt) -> [mkLdrSpImm imm]
     imm ->
       [ movImmToTmp imm,
         addSpToTmp,
         mkLdrTmp
       ]
   where
-    fmt = case reg of
-      RegReal (RealRegSingle n) | n < d0RegNo -> II64
-      _ -> FF64
+    fmt'
+      | isVecFormat fmt
+      = fmt
+      | otherwise
+      = scalarMoveFormat fmt
     mkLdrSpImm imm =
       ANN (text "Reload@" <> int (off - delta))
-        $ LDR fmt (OpReg W64 reg) (OpAddr (AddrRegImm spMachReg (ImmInt imm)))
+        $ LDR fmt' (OpReg W64 reg) (OpAddr (AddrRegImm spMachReg (ImmInt imm)))
     movImmToTmp imm =
       ANN (text "Reload: TMP <- " <> int imm)
         $ MOV tmp (OpImm (ImmInt imm))
@@ -379,10 +406,16 @@ mkLoadInstr _config (RegWithFormat reg _fmt) delta slot =
         $ ADD tmp tmp sp
     mkLdrTmp =
       ANN (text "Reload@" <> int (off - delta))
-        $ LDR fmt (OpReg W64 reg) (OpAddr (AddrReg tmpReg))
+        $ LDR fmt' (OpReg W64 reg) (OpAddr (AddrReg tmpReg))
 
     off = spillSlotToOffset slot
 
+scalarMoveFormat :: Format -> Format
+scalarMoveFormat fmt
+  | isFloatFormat fmt = FF64
+  | otherwise = II64
+
+
 -- | See if this instruction is telling us the current C stack delta
 takeDeltaInstr :: Instr -> Maybe Int
 takeDeltaInstr (ANN _ i) = takeDeltaInstr i
@@ -638,12 +671,23 @@ data Instr
     FMA FMASign Operand Operand Operand Operand
 
   -- TODO: Care about the variants (<instr>.x.y) -> sum type
-  | VMV Operand Operand
-  | VID Operand Operand
-  | VMSEQ Operand Operand Operand
-  | VMERGE Operand Operand Operand Operand
-  | VSLIDEDOWN Operand Operand Operand
+  | VMV Format Operand Operand
+  | VID Format Operand Operand
+  | VMSEQ Format Operand Operand Operand
+  | VMERGE Format Operand Operand Operand Operand
+  | VSLIDEDOWN Format Operand Operand Operand
   | VSETIVLI Reg Word Width VectorGrouping TailAgnosticFlag MaskAgnosticFlag
+  | VNEG Format Operand Operand
+  | VADD Format Operand Operand Operand
+  | VSUB Format Operand Operand Operand
+  | VMUL Format Operand Operand Operand
+  | VQUOT Format Operand Operand Operand
+  | VSMIN Format Operand Operand Operand
+  | VSMAX Format Operand Operand Operand
+  | VUMIN Format Operand Operand Operand
+  | VUMAX Format Operand Operand Operand
+  | VFMIN Format Operand Operand Operand
+  | VFMAX Format Operand Operand Operand
 
 -- | Operand of a FENCE instruction (@r@, @w@ or @rw@)
 data FenceType = FenceRead | FenceWrite | FenceReadWrite
@@ -714,6 +758,17 @@ instrCon i =
     VMERGE {} -> "VMERGE"
     VSLIDEDOWN {} -> "VSLIDEDOWN"
     VSETIVLI {} -> "VSETIVLI"
+    VNEG {} -> "VNEG"
+    VADD {} -> "VADD"
+    VSUB {} -> "VSUB"
+    VMUL {} -> "VMUL"
+    VQUOT {} -> "VQUOT"
+    VSMIN {} -> "VSMIN"
+    VSMAX {} -> "VSMAX"
+    VUMIN {} -> "VUMIN"
+    VUMAX {} -> "VUMAX"
+    VFMIN {} -> "VFMIN"
+    VFMAX {} -> "VFMAX"
     FMA variant _ _ _ _ ->
       case variant of
         FMAdd -> "FMADD"
@@ -725,6 +780,7 @@ data Target
   = TBlock BlockId
   | TReg Reg
 
+-- TODO: OpReg should carry the format, not only the width. This would unify OpReg and OpVecReg.
 data Operand
   = -- | register
     OpReg Width Reg


=====================================
compiler/GHC/CmmToAsm/RV64/Ppr.hs
=====================================
@@ -59,7 +59,6 @@ pprNatCmmDecl config proc@(CmmProc top_info lbl _ (ListGraph blocks)) =
               -- elimination, it might be the target of a goto.
               ( if platformHasSubsectionsViaSymbols platform
                   then -- See Note [Subsections Via Symbols]
-
                     line
                       $ text "\t.long "
                       <+> pprAsmLabel platform info_lbl
@@ -655,9 +654,12 @@ pprInstr platform instr = case instr of
   STR II64 o1 o2 -> op2 (text "\tsd") o1 o2
   STR FF32 o1 o2 -> op2 (text "\tfsw") o1 o2
   STR FF64 o1 o2 -> op2 (text "\tfsd") o1 o2
-  STR (VecFormat 2 FmtFloat) o1 o2@(OpAddr _) -> op2 (text "\tvse32.v") o1 o2
-  STR (VecFormat 2 FmtDouble) o1 o2@(OpAddr _) -> op2 (text "\tvse64.v") o1 o2
-  STR f o1 o2 -> pprPanic "Unsupported store" ((text . show) f <+> pprOp platform o1 <+> pprOp platform o2)
+  STR fmt@(VecFormat _ FmtInt8) o1 o2 -> configVec fmt $$ op2 (text "\tvse8.v") o1 o2
+  STR fmt@(VecFormat _ FmtInt16) o1 o2 -> configVec fmt $$ op2 (text "\tvse16.v") o1 o2
+  STR fmt@(VecFormat _ FmtInt32) o1 o2 -> configVec fmt $$ op2 (text "\tvse32.v") o1 o2
+  STR fmt@(VecFormat _ FmtInt64) o1 o2 -> configVec fmt $$ op2 (text "\tvse64.v") o1 o2
+  STR fmt@(VecFormat _ FmtFloat) o1 o2 -> configVec fmt $$ op2 (text "\tvse32.v") o1 o2
+  STR fmt@(VecFormat _ FmtDouble) o1 o2 -> configVec fmt $$ op2 (text "\tvse64.v") o1 o2
   LDR _f o1 (OpImm (ImmIndex lbl off)) ->
     lines_
       [ text "\tla" <+> pprOp platform o1 <> comma <+> pprAsmLabel platform lbl,
@@ -671,6 +673,12 @@ pprInstr platform instr = case instr of
   LDR II64 o1 o2 -> op2 (text "\tld") o1 o2
   LDR FF32 o1 o2 -> op2 (text "\tflw") o1 o2
   LDR FF64 o1 o2 -> op2 (text "\tfld") o1 o2
+  LDR fmt@(VecFormat _ FmtInt8) o1 o2 -> configVec fmt $$ op2 (text "\tvle8.v") o1 o2
+  LDR fmt@(VecFormat _ FmtInt16) o1 o2 -> configVec fmt $$ op2 (text "\tvle16.v") o1 o2
+  LDR fmt@(VecFormat _ FmtInt32) o1 o2 -> configVec fmt $$ op2 (text "\tvle32.v") o1 o2
+  LDR fmt@(VecFormat _ FmtInt64) o1 o2 -> configVec fmt $$ op2 (text "\tvle64.v") o1 o2
+  LDR fmt@(VecFormat _ FmtFloat) o1 o2 -> configVec fmt $$ op2 (text "\tvle32.v") o1 o2
+  LDR fmt@(VecFormat _ FmtDouble) o1 o2 -> configVec fmt $$ op2 (text "\tvle64.v") o1 o2
   LDRU II8 o1 o2 -> op2 (text "\tlbu") o1 o2
   LDRU II16 o1 o2 -> op2 (text "\tlhu") o1 o2
   LDRU II32 o1 o2 -> op2 (text "\tlwu") o1 o2
@@ -681,8 +689,12 @@ pprInstr platform instr = case instr of
   LDRU FF64 o1 o2@(OpAddr (AddrReg _)) -> op2 (text "\tfld") o1 o2
   LDRU FF64 o1 o2@(OpAddr (AddrRegImm _ _)) -> op2 (text "\tfld") o1 o2
   -- vectors
-  LDRU (VecFormat 2 FmtFloat) o1 o2@(OpAddr _) -> op2 (text "\tvle32.v") o1 o2
-  LDRU (VecFormat 2 FmtDouble) o1 o2@(OpAddr _) -> op2 (text "\tvle64.v") o1 o2
+  LDRU fmt@(VecFormat _ FmtInt8) o1 o2 -> configVec fmt $$ op2 (text "\tvle8.v") o1 o2
+  LDRU fmt@(VecFormat _ FmtInt16) o1 o2 -> configVec fmt $$ op2 (text "\tvle16.v") o1 o2
+  LDRU fmt@(VecFormat _ FmtInt32) o1 o2 -> configVec fmt $$ op2 (text "\tvle32.v") o1 o2
+  LDRU fmt@(VecFormat _ FmtInt64) o1 o2 -> configVec fmt $$ op2 (text "\tvle64.v") o1 o2
+  LDRU fmt@(VecFormat _ FmtFloat) o1 o2 -> configVec fmt $$ op2 (text "\tvle32.v") o1 o2
+  LDRU fmt@(VecFormat _ FmtDouble) o1 o2 -> configVec fmt $$ op2 (text "\tvle64.v") o1 o2
   LDRU f o1 o2 -> pprPanic "Unsupported unsigned load" ((text . show) f <+> pprOp platform o1 <+> pprOp platform o2)
   FENCE r w -> line $ text "\tfence" <+> pprFenceType r <> char ',' <+> pprFenceType w
   FCVT FloatToFloat o1@(OpReg W32 _) o2@(OpReg W64 _) -> op2 (text "\tfcvt.s.d") o1 o2
@@ -706,10 +718,12 @@ pprInstr platform instr = case instr of
       $ line (pprOp platform o1 <> text "->" <> pprOp platform o2)
   FABS o1 o2 | isSingleOp o2 -> op2 (text "\tfabs.s") o1 o2
   FABS o1 o2 | isDoubleOp o2 -> op2 (text "\tfabs.d") o1 o2
-  FMIN o1 o2 o3 | isSingleOp o1 -> op3 (text "\tfmin.s") o1 o2 o3
-                | isDoubleOp o2 -> op3 (text "\tfmin.d") o1 o2 o3
-  FMAX o1 o2 o3 | isSingleOp o1 -> op3 (text "\tfmax.s") o1 o2 o3
-                | isDoubleOp o2 -> op3 (text "\tfmax.d") o1 o2 o3
+  FMIN o1 o2 o3
+    | isSingleOp o1 -> op3 (text "\tfmin.s") o1 o2 o3
+    | isDoubleOp o2 -> op3 (text "\tfmin.d") o1 o2 o3
+  FMAX o1 o2 o3
+    | isSingleOp o1 -> op3 (text "\tfmax.s") o1 o2 o3
+    | isDoubleOp o2 -> op3 (text "\tfmax.d") o1 o2 o3
   FMA variant d r1 r2 r3 ->
     let fma = case variant of
           FMAdd -> text "\tfmadd" <> dot <> floatPrecission d
@@ -717,21 +731,44 @@ pprInstr platform instr = case instr of
           FNMAdd -> text "\tfnmadd" <> dot <> floatPrecission d
           FNMSub -> text "\tfnmsub" <> dot <> floatPrecission d
      in op4 fma d r1 r2 r3
-
-  VMV o1@(OpReg w _) o2 | isFloatOp o1 && isVectorOp o2 -> op2 (text "\tvfmv" <> dot <> text "f" <> dot <> text "s") o1 o2
-  VMV o1@(OpReg _w _) o2 | isFloatOp o2 -> op2 (text "\tvfmv" <> dot <> opToVInstrSuffix o1 <> dot <> text "f") o1 o2
-  VMV o1 o2 -> op2 (text "\tvmv" <> dot <> opToVInstrSuffix o1 <> dot <> opToVInstrSuffix o2) o1 o2
+  VMV fmt o1@(OpReg w _) o2 | isFloatOp o1 && isVectorOp o2 -> configVec fmt $$ op2 (text "\tvfmv" <> dot <> text "f" <> dot <> text "s") o1 o2
+  VMV fmt o1@(OpReg _w _) o2 | isFloatOp o2 -> configVec fmt $$ op2 (text "\tvfmv" <> dot <> opToVInstrSuffix o1 <> dot <> text "f") o1 o2
+  VMV fmt o1 o2 -> configVec fmt $$ op2 (text "\tvmv" <> dot <> opToVInstrSuffix o1 <> dot <> opToVInstrSuffix o2) o1 o2
   -- TODO: Remove o2 from constructor
-  VID o1 _o2 -> op1 (text "\tvid.v") o1
+  VID fmt o1 _o2 -> configVec fmt $$ op1 (text "\tvid.v") o1
   -- TODO: This expects int register as third operand: Generalize by calculating
   -- the instruction suffix (".vx")
-  VMSEQ o1 o2 o3 -> op3 (text "\tvmseq.vx") o1 o2 o3
+  VMSEQ fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvmseq.vx") o1 o2 o3
   -- TODO: All operands need to be vector registers. Make this more general or
   -- validate this constraint.
-  VMERGE o1 o2 o3 o4 -> op4 (text "\tvmerge.vvm") o1 o2 o3 o4
-  VSLIDEDOWN o1 o2 o3 -> op3 (text "\tvslidedown.vx") o1 o2 o3
-  VSETIVLI dst len width grouping ta ma -> line $
-    text "\tvsetivli" <+> pprReg W64 dst <> comma <+> (text.show) len <> comma <+> pprVWidth width <> comma <+> pprGrouping grouping <> comma <+> pprTA ta <> comma <+> pprMasking ma
+  VMERGE fmt o1 o2 o3 o4 -> configVec fmt $$ op4 (text "\tvmerge.vvm") o1 o2 o3 o4
+  VSLIDEDOWN fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvslidedown.vx") o1 o2 o3
+  -- TODO: Use configVec, adjust VSETIVLI to contain only format?
+  VSETIVLI dst len width grouping ta ma ->
+    line
+      $ text "\tvsetivli"
+      <+> pprReg W64 dst
+      <> comma
+      <+> (text . show) len
+      <> comma
+      <+> pprVWidth width
+      <> comma
+      <+> pprGrouping grouping
+      <> comma
+      <+> pprTA ta
+      <> comma
+      <+> pprMasking ma
+  VNEG fmt o1 o2 -> configVec fmt $$ op2 (text "\tvfneg.v") o1 o2
+  VADD fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvfadd.vv") o1 o2 o3
+  VSUB fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvfsub.vv") o1 o2 o3
+  VMUL fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvfmul.vv") o1 o2 o3
+  VQUOT fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvfdiv.vv") o1 o2 o3
+  VSMIN fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvmin.vv") o1 o2 o3
+  VSMAX fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvmax.vv") o1 o2 o3
+  VUMIN fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvminu.vv") o1 o2 o3
+  VUMAX fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvmaxu.vv") o1 o2 o3
+  VFMIN fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvfmin.vv") o1 o2 o3
+  VFMAX fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvfmax.vv") o1 o2 o3
   instr -> panic $ "RV64.pprInstr - Unknown instruction: " ++ instrCon instr
   where
     op1 op o1 = line $ op <+> pprOp platform o1
@@ -749,7 +786,7 @@ pprInstr platform instr = case instr of
     pprTA TA = text "ta"
     pprTA TU = text "tu"
 
-    pprVWidth :: IsLine doc => Width -> doc
+    pprVWidth :: (IsLine doc) => Width -> doc
     pprVWidth W8 = text "e8"
     pprVWidth W16 = text "e16"
     pprVWidth W32 = text "e32"
@@ -767,17 +804,22 @@ pprInstr platform instr = case instr of
     pprMasking MA = text "ma"
     pprMasking MU = text "mu"
 
-    opToVInstrSuffix :: IsLine doc => Operand -> doc
+    opToVInstrSuffix :: (IsLine doc) => Operand -> doc
     opToVInstrSuffix op | isIntOp op = text "x"
     opToVInstrSuffix op | isFloatOp op = text "f"
     opToVInstrSuffix op | isVectorOp op = text "v"
     opToVInstrSuffix op = pprPanic "Unsupported operand for vector instruction" (pprOp platform op)
 
-    floatWidthSuffix :: IsLine doc => Width -> doc
+    floatWidthSuffix :: (IsLine doc) => Width -> doc
     floatWidthSuffix W32 = text "s"
     floatWidthSuffix W64 = text "d"
     floatWidthSuffix w = pprPanic "Unsupported floating point vector operation width" (ppr w)
 
+    configVec :: (IsDoc doc) => Format -> doc
+    configVec (VecFormat length fmt) =
+      pprInstr platform (VSETIVLI zeroReg (fromIntegral length) ((formatToWidth . scalarFormatFormat) fmt) M1 TA MA)
+    configVec fmt = pprPanic "Unsupported vector configuration" ((text . show) fmt)
+
 floatOpPrecision :: Platform -> Operand -> Operand -> String
 floatOpPrecision _p l r | isFloatOp l && isFloatOp r && isSingleOp l && isSingleOp r = "s" -- single precision
 floatOpPrecision _p l r | isFloatOp l && isFloatOp r && isDoubleOp l && isDoubleOp r = "d" -- double precision


=====================================
compiler/GHC/CmmToAsm/RV64/Regs.hs
=====================================
@@ -144,6 +144,10 @@ allGpArgRegs = map regSingle [a0RegNo .. a7RegNo]
 allFpArgRegs :: [Reg]
 allFpArgRegs = map regSingle [fa0RegNo .. fa7RegNo]
 
+-- | Literally all general vector registers (no status registers)
+allVecRegs :: [Reg]
+allVecRegs = map regSingle [v0RegNo .. v31RegNo]
+
 -- * Addressing modes
 
 -- | Addressing modes


=====================================
testsuite/tests/simd/should_run/all.T
=====================================
@@ -26,14 +26,14 @@ setTestOpts(
 
 # TODO: Revert debug trace dumps
 test('simd000', [], compile_and_run, ['-opta=-march=rv64gv -dppr-debug -ddump-to-file -ddump-asm'])
-test('simd001', [], compile_and_run, [''])
-test('simd002', [], compile_and_run, [''])
-test('simd003', [], compile_and_run, [''])
-test('simd004', [], compile_and_run, ['-O2'])
-test('simd005', [], compile_and_run, [''])
-test('simd006', [], compile_and_run, [''])
-test('simd007', [], compile_and_run, [''])
-test('simd008', [], compile_and_run, [''])
+test('simd001', [], compile_and_run, ['-opta=-march=rv64gv -dppr-debug -ddump-to-file -ddump-asm'])
+test('simd002', [], compile_and_run, ['-opta=-march=rv64gv -dppr-debug -ddump-to-file -ddump-asm'])
+test('simd003', [], compile_and_run, ['-opta=-march=rv64gv -dppr-debug -ddump-to-file -ddump-asm'])
+test('simd004', [], compile_and_run, ['-opta=-march=rv64gv -dppr-debug -ddump-to-file -ddump-asm -O2'])
+test('simd005', [], compile_and_run, ['-opta=-march=rv64gv -dppr-debug -ddump-to-file -ddump-asm'])
+test('simd006', [], compile_and_run, ['-opta=-march=rv64gv -dppr-debug -ddump-to-file -ddump-asm'])
+test('simd007', [], compile_and_run, ['-opta=-march=rv64gv -dppr-debug -ddump-to-file -ddump-asm'])
+test('simd008', [], compile_and_run, ['-opta=-march=rv64gv -dppr-debug -ddump-to-file -ddump-asm'])
 test('simd009', [ req_th
                 , extra_files(['Simd009b.hs', 'Simd009c.hs'])
                 , unless(have_cpu_feature('avx'), skip)



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/e8d84f6a448d91eebe48c130b6c66a9ee0ffb9fd...0682aed34baf0f7a4767d1d1571982851086f7dc

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/e8d84f6a448d91eebe48c130b6c66a9ee0ffb9fd...0682aed34baf0f7a4767d1d1571982851086f7dc
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20241110/fbe15279/attachment-0001.html>