[Git][ghc/ghc][wip/supersven/riscv-vectors] simd000 green

Fri Nov 1 17:29:25 UTC 2024

Sven Tennie pushed to branch wip/supersven/riscv-vectors at Glasgow Haskell Compiler / GHC


Commits:
e8d84f6a by Sven Tennie at 2024-11-01T18:28:38+01:00
simd000 green

- - - - -


5 changed files:

- compiler/CodeGen.Platform.h
- compiler/GHC/CmmToAsm/RV64/CodeGen.hs
- compiler/GHC/CmmToAsm/RV64/Ppr.hs
- compiler/GHC/CmmToAsm/Reg/Linear/RV64.hs
- testsuite/tests/simd/should_run/simd000.hs


Changes:

=====================================
compiler/CodeGen.Platform.h
=====================================
@@ -1171,6 +1171,8 @@ freeReg 8 = False
 -- made-up inter-procedural (ip) register
 -- See Note [The made-up RISCV64 TMP (IP) register]
 freeReg 31 = False
+-- reserve v0 as mask operand
+freeReg 64 = False
 
 # if defined(REG_Base)
 freeReg REG_Base  = False


=====================================
compiler/GHC/CmmToAsm/RV64/CodeGen.hs
=====================================
@@ -827,19 +827,21 @@ getRegister' config plat expr =
 
         --TODO: MO_V_Broadcast with immediate: If the right value is a literal,
         -- it may use vmv.v.i (simpler)
-        MO_V_Broadcast _length w -> do
+        MO_V_Broadcast length w -> do
           (reg_idx, format_idx, code_idx) <- getSomeReg e
           let w_idx = formatToWidth format_idx
           pure $ Any (intFormat w) $ \dst ->
             code_idx `snocOL`
-            annExpr expr (VMV (OpReg w dst) (OpReg w_idx reg_idx))
+            annExpr expr (VSETIVLI zeroReg (fromIntegral length) w M1 TA MA) `snocOL`
+            VMV (OpReg w dst) (OpReg w_idx reg_idx)
 
-        MO_VF_Broadcast l w -> do
+        MO_VF_Broadcast length w -> do
           (reg_idx, format_idx, code_idx) <- getSomeReg e
           let w_idx = formatToWidth format_idx
-          pure $ Any (vecFormat (cmmVec l (cmmFloat w))) $ \dst ->
+          pure $ Any (vecFormat (cmmVec length (cmmFloat w))) $ \dst ->
             code_idx `snocOL`
-            annExpr expr (VMV (OpReg w dst) (OpReg w_idx reg_idx))
+            annExpr expr (VSETIVLI zeroReg (fromIntegral length) w M1 TA MA) `snocOL`
+            VMV (OpReg w dst) (OpReg w_idx reg_idx)
 
         x -> pprPanic ("getRegister' (monadic CmmMachOp): " ++ show x) (pdoc plat expr)
       where
@@ -1183,7 +1185,8 @@ getRegister' config plat expr =
             code_idx `snocOL`
             -- Setup
             -- vsetivli zero, 1, e32, m1, ta, ma
-            annExpr expr (VSETIVLI zeroReg 1 W32 M1 TA MA) `snocOL`
+            -- TODO: Use width
+            annExpr expr (VSETIVLI zeroReg (fromIntegral length) w M1 TA MA) `snocOL`
             -- Move selected element to index 0
             -- vslidedown.vi v8, v9, 2
             VSLIDEDOWN (OpReg width_v tmp) (OpReg width_v reg_v) (OpReg (formatToWidth format_idx) reg_idx) `snocOL`
@@ -1238,6 +1241,16 @@ getRegister' config plat expr =
           | otherwise
           -> sorry "The RISCV64 backend does not (yet) support vectors."
         -- TODO: Implement length as immediate
+
+        -- insert_float_into_vector:
+        --   vsetivli        zero, 4, e32, m1, ta, ma
+        --   vid.v   v8
+        --   vmseq.vi        v0, v8, 2
+        --   vfmv.v.f        v8, fa0
+        --   vmerge.vvm      v8, v8, v8, v0
+        --   ret
+        --
+        -- https://godbolt.org/z/sEG8MrM8P
         MO_VF_Insert length w ->
           do
             (reg_v, format_v, code_v) <- getSomeReg x
@@ -1245,21 +1258,22 @@ getRegister' config plat expr =
             (reg_idx, format_idx, code_idx) <- getSomeReg z
             (reg_l, format_l, code_l) <- getSomeReg (CmmLit (CmmInt (toInteger length) W64))
             tmp <- getNewRegNat (VecFormat length (floatScalarFormat w))
-            -- TODO: FmtInt8 should be FmtInt1 (which does not exist yet, so we're lying here)
-            reg_mask <- getNewRegNat (VecFormat length FmtInt8)
             let targetFormat = VecFormat length (floatScalarFormat w)
             pure $ Any targetFormat $ \dst ->
               code_v `appOL`
               code_f `appOL`
               code_idx `appOL`
               code_l `snocOL`
+              annExpr expr (VSETIVLI zeroReg (fromIntegral length) w M1 TA MA) `snocOL`
               -- Build mask for index
               -- 1. fill elements with index numbers
               -- TODO: The Width is made up
-              annExpr expr (VID (OpReg W8 reg_mask) (OpReg (formatToWidth format_l) reg_l)) `snocOL`
-              -- Merge with mask -> set element at index
-              VMSEQ (OpReg W8 reg_mask) (OpReg W8 reg_mask) (OpReg (formatToWidth format_f) reg_f) `snocOL`
-              VMERGE (OpReg (formatToWidth format_v) dst) (OpReg (formatToWidth format_v) reg_v)  (OpReg (formatToWidth format_f) reg_f) (OpReg W8 reg_mask)
+              VID (OpReg W8 v0Reg) (OpReg (formatToWidth format_l) reg_l) `snocOL`
+              -- 2. Splat value into tmp vector
+              VMV (OpReg w tmp) (OpReg (formatToWidth format_f) reg_f) `snocOL`
+              -- 3. Merge with mask -> set element at index
+              VMSEQ (OpReg W8 v0Reg) (OpReg W8 v0Reg) (OpReg (formatToWidth format_idx) reg_idx) `snocOL`
+              VMERGE (OpReg w dst) (OpReg (formatToWidth format_v) reg_v)  (OpReg w tmp) (OpReg W8 v0Reg)
 
         _ ->
           pprPanic "getRegister' (unhandled ternary CmmMachOp): "


=====================================
compiler/GHC/CmmToAsm/RV64/Ppr.hs
=====================================
@@ -718,17 +718,23 @@ pprInstr platform instr = case instr of
           FNMSub -> text "\tfnmsub" <> dot <> floatPrecission d
      in op4 fma d r1 r2 r3
 
-  VMV o1@(OpReg w _) o2 | isFloatOp o1 -> op2 (text "\tvfmv" <> dot <> text "f" <> dot <> floatWidthSuffix w) o1 o2
+  VMV o1@(OpReg w _) o2 | isFloatOp o1 && isVectorOp o2 -> op2 (text "\tvfmv" <> dot <> text "f" <> dot <> text "s") o1 o2
   VMV o1@(OpReg _w _) o2 | isFloatOp o2 -> op2 (text "\tvfmv" <> dot <> opToVInstrSuffix o1 <> dot <> text "f") o1 o2
   VMV o1 o2 -> op2 (text "\tvmv" <> dot <> opToVInstrSuffix o1 <> dot <> opToVInstrSuffix o2) o1 o2
-  VID o1 o2 -> op2 (text "\tvid.v") o1 o2
-  VMSEQ o1 o2 o3 -> op3 (text "\tvmseq.v.x") o1 o2 o3
-  VMERGE o1 o2 o3 o4 -> op4 (text "\tvmerge.vxm") o1 o2 o3 o4
+  -- TODO: Remove o2 from constructor
+  VID o1 _o2 -> op1 (text "\tvid.v") o1
+  -- TODO: This expects int register as third operand: Generalize by calculating
+  -- the instruction suffix (".vx")
+  VMSEQ o1 o2 o3 -> op3 (text "\tvmseq.vx") o1 o2 o3
+  -- TODO: All operands need to be vector registers. Make this more general or
+  -- validate this constraint.
+  VMERGE o1 o2 o3 o4 -> op4 (text "\tvmerge.vvm") o1 o2 o3 o4
   VSLIDEDOWN o1 o2 o3 -> op3 (text "\tvslidedown.vx") o1 o2 o3
   VSETIVLI dst len width grouping ta ma -> line $
     text "\tvsetivli" <+> pprReg W64 dst <> comma <+> (text.show) len <> comma <+> pprVWidth width <> comma <+> pprGrouping grouping <> comma <+> pprTA ta <> comma <+> pprMasking ma
   instr -> panic $ "RV64.pprInstr - Unknown instruction: " ++ instrCon instr
   where
+    op1 op o1 = line $ op <+> pprOp platform o1
     op2 op o1 o2 = line $ op <+> pprOp platform o1 <> comma <+> pprOp platform o2
     op3 op o1 o2 o3 = line $ op <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3
     op4 op o1 o2 o3 o4 = line $ op <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 <> comma <+> pprOp platform o4


=====================================
compiler/GHC/CmmToAsm/Reg/Linear/RV64.hs
=====================================
@@ -84,7 +84,7 @@ getFreeRegs cls (FreeRegs g f v) =
     -- change often.)
     allocatableIntRegs = [5 .. 7] ++ [10 .. 17] ++ [28 .. 30]
     allocatableDoubleRegs = [0 .. 7] ++ [10 .. 17] ++ [28 .. 31]
-    allocatableVectorRegs = 0 : [7 .. 31]
+    allocatableVectorRegs = 1 : [7 .. 31]
 
 -- | Set corresponding register bit to 0
 allocateReg :: (HasCallStack) => RealReg -> FreeRegs -> FreeRegs


=====================================
testsuite/tests/simd/should_run/simd000.hs
=====================================
@@ -9,12 +9,11 @@ main = do
     -- FloatX4#
     case unpackFloatX4# (broadcastFloatX4# 1.5#) of
         (# a, b, c, d #) -> print (F# a, F# b, F# c, F# d)

--- TODO: Uncomment again
---    case unpackFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #)) of
---        (# a, b, c, d #) -> print (F# a, F# b, F# c, F# d)
---
---    -- DoubleX2#
---    case unpackDoubleX2# (broadcastDoubleX2# 6.5##) of
---        (# a, b #) -> print (D# a, D# b)
---    case unpackDoubleX2# (packDoubleX2# (# 8.9##,7.2## #)) of
---        (# a, b #) -> print (D# a, D# b)
+    case unpackFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #)) of
+        (# a, b, c, d #) -> print (F# a, F# b, F# c, F# d)
+
+    -- DoubleX2#
+    case unpackDoubleX2# (broadcastDoubleX2# 6.5##) of
+        (# a, b #) -> print (D# a, D# b)
+    case unpackDoubleX2# (packDoubleX2# (# 8.9##,7.2## #)) of
+        (# a, b #) -> print (D# a, D# b)



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/e8d84f6a448d91eebe48c130b6c66a9ee0ffb9fd

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/e8d84f6a448d91eebe48c130b6c66a9ee0ffb9fd
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20241101/8404b021/attachment-0001.html>