[Git][ghc/ghc][wip/supersven/riscv-vectors] WIP: simd000 - hacked further

Sun Oct 20 18:07:25 UTC 2024


Sven Tennie pushed to branch wip/supersven/riscv-vectors at Glasgow Haskell Compiler / GHC


Commits:
94ec8cb6 by Sven Tennie at 2024-10-20T20:06:20+02:00
WIP: simd000 - hacked further

- - - - -


13 changed files:

- compiler/CodeGen.Platform.h
- compiler/GHC/CmmToAsm/RV64/CodeGen.hs
- compiler/GHC/CmmToAsm/RV64/Instr.hs
- compiler/GHC/CmmToAsm/RV64/Ppr.hs
- compiler/GHC/CmmToAsm/RV64/Regs.hs
- compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs
- compiler/GHC/CmmToAsm/Reg/Linear.hs
- compiler/GHC/CmmToAsm/Reg/Linear/RV64.hs
- compiler/GHC/Platform/Reg/Class.hs
- hadrian/src/Flavour.hs
- rts/CheckVectorSupport.c
- rts/include/stg/MachRegs/riscv64.h
- testsuite/tests/simd/should_run/all.T


Changes:

=====================================
compiler/CodeGen.Platform.h
=====================================
@@ -411,6 +411,39 @@ import GHC.Platform.Reg
 # define ft10 62
 # define ft11 63
 
+# define v0  64
+# define v1  65
+# define v2  66
+# define v3  67
+# define v4  68
+# define v5  69
+# define v6  70
+# define v7  71
+# define v8  72
+# define v9  73
+# define v10 74
+# define v11 75
+# define v12 76
+# define v13 77
+# define v14 78
+# define v15 79
+# define v16 80
+# define v17 81
+# define v18 82
+# define v19 83
+# define v20 84
+# define v21 85
+# define v22 86
+# define v23 87
+# define v24 88
+# define v25 89
+# define v26 90
+# define v27 91
+# define v28 92
+# define v29 93
+# define v30 94
+# define v31 95
+
 #elif defined(MACHREGS_loongarch64)
 
 # define zero 0
@@ -1218,6 +1251,25 @@ freeReg REG_D5    = False
 freeReg REG_D6    = False
 # endif
 
+# if defined(REG_XMM1)
+freeReg REG_XMM1    = False
+# endif
+# if defined(REG_XMM2)
+freeReg REG_XMM2    = False
+# endif
+# if defined(REG_XMM3)
+freeReg REG_XMM3    = False
+# endif
+# if defined(REG_XMM4)
+freeReg REG_XMM4    = False
+# endif
+# if defined(REG_XMM5)
+freeReg REG_XMM5    = False
+# endif
+# if defined(REG_XMM6)
+freeReg REG_XMM6    = False
+# endif
+
 freeReg _ = True
 
 #else


=====================================
compiler/GHC/CmmToAsm/RV64/CodeGen.hs
=====================================
@@ -664,7 +664,7 @@ getRegister' config plat expr =
       Amode addr addr_code <- getAmode plat width mem
       case width of
         w
-          | w <= W64 ->
+          | (w <= W64) || isVecFormat format ->
               -- Load without sign-extension. See Note [Signed arithmetic on RISCV64]
               pure
                 ( Any
@@ -674,6 +674,20 @@ getRegister' config plat expr =
                           `snocOL` LDRU format (OpReg width dst) (OpAddr addr)
                     )
                 )
+        -- TODO: Load vector - instructions VLW, VLB, VLH, ... Encode in ppr of LDRU?
+        -- riscv64-unknown-linux-gnu-ghc: panic! (the 'impossible' happened)
+        -- GHC version 9.13.20241013:
+        --       Width too big! Cannot load: W128
+        -- Fx2V128[Sp + 8]
+        -- Call stack:
+        --     CallStack (from HasCallStack):
+        --       callStackDoc, called at compiler/GHC/Utils/Panic.hs:190:37 in ghc-9.13-inplace:GHC.Utils.Panic
+        --       pprPanic, called at compiler/GHC/CmmToAsm/RV64/CodeGen.hs:678:11 in ghc-9.13-inplace:GHC.CmmToAsm.RV64.CodeGen
+        -- CallStack (from HasCallStack):
+        --   panic, called at compiler/GHC/Utils/Error.hs:507:29 in ghc-9.13-inplace:GHC.Utils.Error
+
+        -- Fx2V128 -> cat= Float, length = 2, widthInBits = 128
+
         _ ->
           pprPanic ("Width too big! Cannot load: " ++ show width) (pdoc plat expr)
     CmmStackSlot _ _ ->
@@ -820,10 +834,10 @@ getRegister' config plat expr =
             code_idx `snocOL`
             annExpr expr (VMV (OpReg w dst) (OpReg w_idx reg_idx))
 
-        MO_VF_Broadcast _length w -> do
+        MO_VF_Broadcast l w -> do
           (reg_idx, format_idx, code_idx) <- getSomeReg e
           let w_idx = formatToWidth format_idx
-          pure $ Any (intFormat w) $ \dst ->
+          pure $ Any (vecFormat (cmmVec l (cmmFloat w))) $ \dst ->
             code_idx `snocOL`
             annExpr expr (VMV (OpReg w dst) (OpReg w_idx reg_idx))
 


=====================================
compiler/GHC/CmmToAsm/RV64/Instr.hs
=====================================
@@ -136,7 +136,10 @@ regUsageOfInstr platform instr = case instr of
         fmt = case cls of
                 RcInteger -> II64
                 RcFloat   -> FF64
-                RcVector  -> sorry "The RISCV64 NCG does not (yet) support vectors; please use -fllvm."
+                -- TODO: We're expecting 128bit vector registers here. This
+                -- needs to be calculated from real format. Probably, we need to
+                -- hand around the format instead of the width for vector regs.
+                RcVector -> VecFormat 2 FmtInt64
         cls = case r of
                 RegVirtual vr -> classOfVirtualReg (platformArch platform) vr
                 RegReal rr -> classOfRealReg rr
@@ -887,13 +890,27 @@ isEncodeableInWidth :: Width -> Integer -> Bool
 isEncodeableInWidth = isNbitEncodeable . widthInBits
 
 isIntOp :: Operand -> Bool
-isIntOp = not . isFloatOp
+isIntOp o = not (isFloatOp o || isVectorOp o)
 
 isFloatOp :: Operand -> Bool
 isFloatOp (OpReg _ reg) | isFloatReg reg = True
 isFloatOp _ = False
 
+isVectorOp :: Operand -> Bool
+isVectorOp (OpReg _ reg) | isVectorReg reg = True
+isVectorOp _ = False
+
 isFloatReg :: Reg -> Bool
-isFloatReg (RegReal (RealRegSingle i)) | i > 31 = True
+isFloatReg (RegReal (RealRegSingle i)) | isFloatRegNo i = True
 isFloatReg (RegVirtual (VirtualRegD _)) = True
 isFloatReg _ = False
+
+isIntReg :: Reg -> Bool
+isIntReg (RegReal (RealRegSingle i)) | isIntRegNo i = True
+isIntReg (RegVirtual (VirtualRegD _)) = True
+isIntReg _ = False
+
+isVectorReg :: Reg -> Bool
+isVectorReg (RegReal (RealRegSingle i)) | isVectorRegNo i = True
+isVectorReg (RegVirtual (VirtualRegD _)) = True
+isVectorReg _ = False


=====================================
compiler/GHC/CmmToAsm/RV64/Ppr.hs
=====================================
@@ -403,11 +403,43 @@ pprReg w r = case r of
     ppr_reg_no 61 = text "ft9"
     ppr_reg_no 62 = text "ft10"
     ppr_reg_no 63 = text "ft11"
+    ppr_reg_no 64 = text "v0"
+    ppr_reg_no 65 = text "v1"
+    ppr_reg_no 66 = text "v2"
+    ppr_reg_no 67 = text "v3"
+    ppr_reg_no 68 = text "v4"
+    ppr_reg_no 69 = text "v5"
+    ppr_reg_no 70 = text "v6"
+    ppr_reg_no 71 = text "v7"
+    ppr_reg_no 72 = text "v8"
+    ppr_reg_no 73 = text "v9"
+    ppr_reg_no 74 = text "v10"
+    ppr_reg_no 75 = text "v11"
+    ppr_reg_no 76 = text "v12"
+    ppr_reg_no 77 = text "v13"
+    ppr_reg_no 78 = text "v14"
+    ppr_reg_no 79 = text "v15"
+    ppr_reg_no 80 = text "v16"
+    ppr_reg_no 81 = text "v17"
+    ppr_reg_no 82 = text "v18"
+    ppr_reg_no 83 = text "v19"
+    ppr_reg_no 84 = text "v20"
+    ppr_reg_no 85 = text "v21"
+    ppr_reg_no 86 = text "v22"
+    ppr_reg_no 87 = text "v23"
+    ppr_reg_no 88 = text "v24"
+    ppr_reg_no 89 = text "v25"
+    ppr_reg_no 90 = text "v26"
+    ppr_reg_no 91 = text "v27"
+    ppr_reg_no 92 = text "v28"
+    ppr_reg_no 93 = text "v29"
+    ppr_reg_no 94 = text "v30"
+    ppr_reg_no 95 = text "v31"
     ppr_reg_no i
       | i < 0 = pprPanic "Unexpected register number (min is 0)" (ppr w <+> int i)
-      | i > 63 = pprPanic "Unexpected register number (max is 63)" (ppr w <+> int i)
+      | i > 95 = pprPanic "Unexpected register number (max is 95)" (ppr w <+> int i)
       -- no support for widths > W64.
-      | otherwise = pprPanic "Unsupported width in register (max is 64)" (ppr w <+> int i)
+      | otherwise = pprPanic "Unsupported width in register (max is 95)" (ppr w <+> int i)
 
 -- | Single precission `Operand` (floating-point)
 isSingleOp :: Operand -> Bool
@@ -621,6 +653,9 @@ pprInstr platform instr = case instr of
   STR II64 o1 o2 -> op2 (text "\tsd") o1 o2
   STR FF32 o1 o2 -> op2 (text "\tfsw") o1 o2
   STR FF64 o1 o2 -> op2 (text "\tfsd") o1 o2
+  STR (VecFormat 2 FmtFloat) o1 o2@(OpAddr _) -> op2 (text "\tvse32.v") o1 o2
+  STR (VecFormat 2 FmtDouble) o1 o2@(OpAddr _) -> op2 (text "\tvse64.v") o1 o2
+  STR f o1 o2 -> pprPanic "Unsupported store" ((text . show) f <+> pprOp platform o1 <+> pprOp platform o2)
   LDR _f o1 (OpImm (ImmIndex lbl off)) ->
     lines_
       [ text "\tla" <+> pprOp platform o1 <> comma <+> pprAsmLabel platform lbl,
@@ -643,6 +678,9 @@ pprInstr platform instr = case instr of
   LDRU FF32 o1 o2@(OpAddr (AddrRegImm _ _)) -> op2 (text "\tflw") o1 o2
   LDRU FF64 o1 o2@(OpAddr (AddrReg _)) -> op2 (text "\tfld") o1 o2
   LDRU FF64 o1 o2@(OpAddr (AddrRegImm _ _)) -> op2 (text "\tfld") o1 o2
+  -- vectors
+  LDRU (VecFormat 2 FmtFloat) o1 o2@(OpAddr _) -> op2 (text "\tvle32.v") o1 o2
+  LDRU (VecFormat 2 FmtDouble) o1 o2@(OpAddr _) -> op2 (text "\tvle64.v") o1 o2
   LDRU f o1 o2 -> pprPanic "Unsupported unsigned load" ((text . show) f <+> pprOp platform o1 <+> pprOp platform o2)
   FENCE r w -> line $ text "\tfence" <+> pprFenceType r <> char ',' <+> pprFenceType w
   FCVT FloatToFloat o1@(OpReg W32 _) o2@(OpReg W64 _) -> op2 (text "\tfcvt.s.d") o1 o2
@@ -677,7 +715,10 @@ pprInstr platform instr = case instr of
           FNMAdd -> text "\tfnmadd" <> dot <> floatPrecission d
           FNMSub -> text "\tfnmsub" <> dot <> floatPrecission d
      in op4 fma d r1 r2 r3
-  VMV o1 o2 -> op2 (text "\tvmv.v.x") o1 o2
+
+  VMV o1@(OpReg w _) o2 | isFloatOp o2 -> op2 (text "\tvfmv" <> dot <> text "f" <> dot <> floatWidthSuffix w) o1 o2
+  VMV o1@(OpReg w _) o2 | isFloatOp o1 -> op2 (text "\tvfmv" <> dot <> opToVInstrSuffix o1 <> dot <> floatWidthSuffix w) o1 o2
+  VMV o1 o2 -> op2 (text "\tvmv" <> dot <> opToVInstrSuffix o1 <> dot <> opToVInstrSuffix o2) o1 o2
   VID o1 o2 -> op2 (text "\tvid.v") o1 o2
   VMSEQ o1 o2 o3 -> op3 (text "\tvmseq.v.x") o1 o2 o3
   VMERGE o1 o2 o3 o4 -> op4 (text "\tvmerge.vxm") o1 o2 o3 o4
@@ -718,6 +759,17 @@ pprInstr platform instr = case instr of
     pprMasking MA = text "ma"
     pprMasking MU = text "mu"
 
+    opToVInstrSuffix :: IsLine doc => Operand -> doc
+    opToVInstrSuffix op | isIntOp op = text "x"
+    opToVInstrSuffix op | isFloatOp op = text "f"
+    opToVInstrSuffix op | isVectorOp op = text "v"
+    opToVInstrSuffix op = pprPanic "Unsupported operand for vector instruction" (pprOp platform op)
+
+    floatWidthSuffix :: IsLine doc => Width -> doc
+    floatWidthSuffix W32 = text "s"
+    floatWidthSuffix W64 = text "d"
+    floatWidthSuffix w = pprPanic "Unsupported floating point vector operation width" (ppr w)
+
 floatOpPrecision :: Platform -> Operand -> Operand -> String
 floatOpPrecision _p l r | isFloatOp l && isFloatOp r && isSingleOp l && isSingleOp r = "s" -- single precision
 floatOpPrecision _p l r | isFloatOp l && isFloatOp r && isDoubleOp l && isDoubleOp r = "d" -- double precision


=====================================
compiler/GHC/CmmToAsm/RV64/Regs.hs
=====================================
@@ -78,6 +78,15 @@ v0RegNo = 64
 v31RegNo :: RegNo
 v31RegNo = 95
 
+isVectorRegNo :: RegNo -> Bool
+isVectorRegNo r = v0RegNo <= r && r <= v31RegNo
+
+isFloatRegNo :: RegNo -> Bool
+isFloatRegNo r = d0RegNo <= r && r <= d31RegNo
+
+isIntRegNo :: RegNo -> Bool
+isIntRegNo r = x0RegNo <= r && r <= x31RegNo
+
 -- Note [The made-up RISCV64 TMP (IP) register]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 --
@@ -110,10 +119,12 @@ v0Reg = regSingle v0RegNo
 
 -- | All machine register numbers.
 allMachRegNos :: [RegNo]
-allMachRegNos = intRegs ++ fpRegs
+allMachRegNos = intRegs ++ fpRegs ++ vRegs
   where
     intRegs = [x0RegNo .. x31RegNo]
     fpRegs = [d0RegNo .. d31RegNo]
+    -- TODO: If Vector extension is turned off, this should become the empty list
+    vRegs = [v0RegNo .. v31RegNo]
 
 -- | Registers available to the register allocator.
 --
@@ -210,41 +221,33 @@ realRegSqueeze cls rr =
     RcInteger ->
       case rr of
         RealRegSingle regNo
-          | regNo < d0RegNo
-          -> 1
-          | otherwise
-          -> 0
+          | regNo <= x31RegNo -> 1
+          | otherwise -> 0
     RcFloat ->
       case rr of
         RealRegSingle regNo
-          |  regNo < d0RegNo
-          || regNo > d31RegNo
-          -> 0
-          | otherwise
-          -> 1
+          | regNo <= d31RegNo -> 1
+          | otherwise -> 0
     RcVector ->
       case rr of
         RealRegSingle regNo
-          | regNo > d31RegNo
-          -> 1
-          | otherwise
-          -> 0
+          | regNo <= v31RegNo -> 1
+          | otherwise -> 0
 
 mkVirtualReg :: Unique -> Format -> VirtualReg
 mkVirtualReg u format
-  | not (isFloatFormat format) = VirtualRegI u
-  | otherwise =
-      case format of
-        FF32 -> VirtualRegD u
-        FF64 -> VirtualRegD u
-        _ -> panic "RV64.mkVirtualReg"
+  | isIntFormat format = VirtualRegI u
+  | isFloatFormat format = VirtualRegD u
+  | isVecFormat format = VirtualRegV128 u
+  | otherwise = panic $ "RV64.mkVirtualReg: No virtual register type for " ++ show format
 
 {-# INLINE classOfRealReg #-}
 classOfRealReg :: RealReg -> RegClass
 classOfRealReg (RealRegSingle i)
-  | i < d0RegNo = RcInteger
-  | i > d31RegNo = RcVector
-  | otherwise = RcFloat
+  | i <= x31RegNo = RcInteger
+  | i <= d31RegNo = RcFloat
+  | i <= v31RegNo = RcVector
+  | otherwise = panic $ "RV64.classOfRealReg: Unknown register number: " ++ show i
 
 regDotColor :: RealReg -> SDoc
 regDotColor reg =


=====================================
compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs
=====================================
@@ -144,7 +144,7 @@ allocatableRegs arch rc =
     ArchMipsel    -> panic "trivColorable ArchMipsel"
     ArchS390X     -> panic "trivColorable ArchS390X"
     ArchRISCV64   -> case rc of
-      -- TODO for Sven Tennie
+      -- TODO: for Sven Tennie
       Separate.RcInteger -> 14
       Separate.RcFloat   -> 20
       Separate.RcVector  -> 20


=====================================
compiler/GHC/CmmToAsm/Reg/Linear.hs
=====================================
@@ -977,6 +977,7 @@ allocRegsAndSpill_spill reading keep spills alloc r@(VirtualRegWithFormat vr fmt
                         = pprPanic ("RegAllocLinear.allocRegsAndSpill: no spill candidates\n")
                         $ vcat
                                 [ text "allocating vreg:  " <> text (show vr)
+                                , text "regClass:         " <> text (show regclass)
                                 , text "assignment:       " <> ppr assig
                                 , text "format:           " <> ppr fmt
                                 , text "freeRegs:         " <> text (showRegs freeRegs)


=====================================
compiler/GHC/CmmToAsm/Reg/Linear/RV64.hs
=====================================
@@ -31,22 +31,26 @@ data FreeRegs
       !Word32
       -- | floating point registers (`RcDouble`)
       !Word32
+      -- | vector registers (`RcVector`)
+      !Word32
 
 instance Show FreeRegs where
-  show (FreeRegs g f) = "FreeRegs 0b" ++ showBits g ++ " 0b" ++ showBits f
+  show (FreeRegs g f v) = "FreeRegs 0b" ++ showBits g ++ " 0b" ++ showBits f ++ " 0b" ++ showBits v
 
 -- | Show bits as a `String` of @1 at s and @0 at s
 showBits :: Word32 -> String
 showBits w = map (\i -> if testBit w i then '1' else '0') [0 .. 31]
 
 instance Outputable FreeRegs where
-  ppr (FreeRegs g f) =
+  ppr (FreeRegs g f v) =
     text "   "
       <+> foldr (\i x -> pad_int i <+> x) (text "") [0 .. 31]
       $$ text "GPR"
       <+> foldr (\i x -> show_bit g i <+> x) (text "") [0 .. 31]
       $$ text "FPR"
       <+> foldr (\i x -> show_bit f i <+> x) (text "") [0 .. 31]
+      $$ text "VPR"
+      <+> foldr (\i x -> show_bit v i <+> x) (text "") [0 .. 31]
     where
       pad_int i | i < 10 = char ' ' <> int i
       pad_int i = int i
@@ -59,17 +63,16 @@ initFreeRegs :: Platform -> FreeRegs
 initFreeRegs platform = foldl' (flip releaseReg) noFreeRegs (allocatableRegs platform)
   where
     noFreeRegs :: FreeRegs
-    noFreeRegs = FreeRegs 0 0
+    noFreeRegs = FreeRegs 0 0 0
 
 -- | Get all free `RealReg`s (i.e. those where the corresponding bit is 1)
 getFreeRegs :: RegClass -> FreeRegs -> [RealReg]
-getFreeRegs cls (FreeRegs g f) =
+getFreeRegs cls (FreeRegs g f v) =
   case cls of
     RcInteger -> go 0 g allocatableIntRegs
     RcFloat -> go 32 f allocatableDoubleRegs
-    RcVector ->
-      sorry "Linear.RV64.getFreeRegs: vector registers are not supported"
-
+    -- TODO: If there's no Vector support, we should return an empty list or panic.
+    RcVector -> go 64 v allocatableVectorRegs
   where
     go _ _ [] = []
     go off x (i : is)
@@ -81,19 +84,24 @@ getFreeRegs cls (FreeRegs g f) =
     -- change often.)
     allocatableIntRegs = [5 .. 7] ++ [10 .. 17] ++ [28 .. 30]
     allocatableDoubleRegs = [0 .. 7] ++ [10 .. 17] ++ [28 .. 31]
+    allocatableVectorRegs = 0 : [7 .. 31]
 
 -- | Set corresponding register bit to 0
 allocateReg :: (HasCallStack) => RealReg -> FreeRegs -> FreeRegs
-allocateReg (RealRegSingle r) (FreeRegs g f)
-  | r > 31 && testBit f (r - 32) = FreeRegs g (clearBit f (r - 32))
-  | r < 32 && testBit g r = FreeRegs (clearBit g r) f
-  | r > 31 = panic $ "Linear.RV64.allocReg: double allocation of float reg v" ++ show (r - 32) ++ "; " ++ showBits f
-  | otherwise = pprPanic "Linear.RV64.allocReg" $ text ("double allocation of gp reg x" ++ show r ++ "; " ++ showBits g)
+allocateReg (RealRegSingle r) (FreeRegs g f v)
+  | r < 32 && testBit g r = FreeRegs (clearBit g r) f v
+  | r >= 32 && testBit f (r - 32) = FreeRegs g (clearBit f (r - 32)) v
+  | r >= 64 && testBit v (r - 64) = FreeRegs g f (clearBit v (r - 64))
+  | otherwise =
+      pprPanic "Linear.RV64.allocateReg"
+        $ text ("invalid allocation of register " ++ show r ++ "; g:" ++ showBits g ++ "; f:" ++ showBits f ++ "; v:" ++ showBits v)
 
 -- | Set corresponding register bit to 1
 releaseReg :: (HasCallStack) => RealReg -> FreeRegs -> FreeRegs
-releaseReg (RealRegSingle r) (FreeRegs g f)
-  | r > 31 && testBit f (r - 32) = pprPanic "Linear.RV64.releaseReg" (text "can't release non-allocated reg v" <> int (r - 32))
-  | r < 32 && testBit g r = pprPanic "Linear.RV64.releaseReg" (text "can't release non-allocated reg x" <> int r)
-  | r > 31 = FreeRegs g (setBit f (r - 32))
-  | otherwise = FreeRegs (setBit g r) f
+releaseReg (RealRegSingle r) (FreeRegs g f v)
+  | r < 32 && not (testBit g r) = FreeRegs (setBit g r) f v
+  | r >= 32 && r <= 63 && not (testBit f (r - 32)) = FreeRegs g (setBit f (r - 32)) v
+  | r >= 64 && not (testBit v (r - 64)) = FreeRegs g f (setBit v (r - 64))
+  | otherwise =
+      pprPanic "Linear.RV64.releaseReg"
+        $ text ("invalid release of register " ++ show r ++ "; g:" ++ showBits g ++ "; f:" ++ showBits f ++ "; v:" ++ showBits v)


=====================================
compiler/GHC/Platform/Reg/Class.hs
=====================================
@@ -49,8 +49,5 @@ registerArch arch =
     ArchPPC       -> Unified
     ArchPPC_64 {} -> Unified
     ArchAArch64   -> Unified
-    -- Support for vector registers not yet implemented for RISC-V
-    -- see panic in `getFreeRegs`.
-    --ArchRISCV64   -> Separate
-    ArchRISCV64   -> NoVectors
+    ArchRISCV64   -> Separate
     _             -> NoVectors


=====================================
hadrian/src/Flavour.hs
=====================================
@@ -70,6 +70,7 @@ flavourTransformers = M.fromList
     , "late_ccs"         =: enableLateCCS
     , "boot_nonmoving_gc" =: enableBootNonmovingGc
     , "dump_stg"         =: enableDumpStg
+    , "vectors"          =: enableVectorSupport
     ]
   where (=:) = (,)
 
@@ -163,6 +164,15 @@ enableDebugInfo = addArgs $ notStage0 ? mconcat
     , builder (Cabal Setup) ? arg "--disable-executable-stripping"
     ]
 
+-- TODO: A bit hand-wavy; this likely needs to be part of autoconf
+enableVectorSupport :: Flavour -> Flavour
+enableVectorSupport = addArgs $ notStage0 ? mconcat
+    [ builder (Ghc CompileHs) ? pure ["-optc=-march=rv64gv", "-opta=-march=rv64gv"]
+    , builder (Ghc CompileCWithGhc) ? pure ["-optc=-march=rv64gv", "-opta=-march=rv64gv"]
+    , builder (Ghc ToolArgs) ? pure ["-optc=-march=rv64gv", "-opta=-march=rv64gv"]
+    , builder (Cc CompileC) ? pure ["-optc=-march=rv64gv", "-opta=-march=rv64gv"]
+    ]
+
 -- | Enable the ticky-ticky profiler in stage2 GHC
 enableTickyGhc :: Flavour -> Flavour
 enableTickyGhc f =


=====================================
rts/CheckVectorSupport.c
=====================================
@@ -1,6 +1,10 @@
 #include <stdio.h>
 #include "CheckVectorSupport.h"
 
+#if defined(__riscv_v) && defined(__riscv_v_intrinsic)
+  #include <riscv_vector.h>
+#endif
+
 // Check support for vector registers (conservative).
 //
 // 0: no support for vector registers
@@ -69,8 +73,7 @@ int checkVectorSupport(void) {
     // supports vectors.
 
     // TODO: Check the machine supports V extension 1.0. Or, implement the older
-    // comman versions.
-    #include <riscv_vector.h>
+    // command versions.
 
     unsigned vlenb = __riscv_vlenb();
 


=====================================
rts/include/stg/MachRegs/riscv64.h
=====================================
@@ -57,5 +57,12 @@
 #define REG_D5          fs10
 #define REG_D6          fs11
 
+#define REG_XMM1        v1
+#define REG_XMM2        v2
+#define REG_XMM3        v3
+#define REG_XMM4        v4
+#define REG_XMM5        v5
+#define REG_XMM6        v6
+
 #define MAX_REAL_FLOAT_REG   6
-#define MAX_REAL_DOUBLE_REG  6
\ No newline at end of file
+#define MAX_REAL_DOUBLE_REG  6


=====================================
testsuite/tests/simd/should_run/all.T
=====================================
@@ -24,7 +24,8 @@ setTestOpts(
   , when(have_cpu_feature('avx512f'), extra_hc_opts('-mavx512f'))
   ])
 
-test('simd000', [], compile_and_run, [''])
+# TODO: Revert debug trace dumps
+test('simd000', [], compile_and_run, ['-opta=-march=rv64gv -dppr-debug -ddump-to-file -ddump-asm'])
 test('simd001', [], compile_and_run, [''])
 test('simd002', [], compile_and_run, [''])
 test('simd003', [], compile_and_run, [''])



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/94ec8cb6f225cb1f46067f643ebef75f1861c810

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/94ec8cb6f225cb1f46067f643ebef75f1861c810
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20241020/51724fb7/attachment-0001.html>