[Git][ghc/ghc][wip/supersven/riscv-vectors] 3 commits: Compile vector helper files with vector support

Sat Feb 1 10:51:22 UTC 2025


Sven Tennie pushed to branch wip/supersven/riscv-vectors at Glasgow Haskell Compiler / GHC


Commits:
f2c4ae33 by Sven Tennie at 2025-02-01T09:47:30+01:00
Compile vector helper files with vector support

- - - - -
8ab58512 by Sven Tennie at 2025-02-01T11:12:32+01:00
-mriscv-vlen makes more sense to RISC-V people

VLEN is a well defined term.

- - - - -
c1667712 by Sven Tennie at 2025-02-01T11:50:58+01:00
Simplify expressions

- - - - -


4 changed files:

- compiler/GHC/CmmToAsm/RV64/CodeGen.hs
- compiler/GHC/Driver/Session.hs
- docs/users_guide/using.rst
- hadrian/src/Settings/Packages.hs


Changes:

=====================================
compiler/GHC/CmmToAsm/RV64/CodeGen.hs
=====================================
@@ -483,11 +483,11 @@ assertVectorRegWidth expr = do
   if isVecFormat format then
    case mbRegMinBits of
     Nothing -> pprPanic
-                "CmmExpr results in vector format, but no vector register configured (see -mvector-min-width-bits in docs)"
+                "CmmExpr results in vector format, but no vector register configured (see -mriscv-vlen in docs)"
                 (pdoc platform expr)
     Just regMinBits | (formatInBytes format) * 8 <= regMinBits -> pure ()
                     | otherwise -> pprPanic 
-                      "CmmExpr results in vector format which is bigger than the configured vector register size (see -mvector-min-width-bits in docs)"
+                      "CmmExpr results in vector format which is bigger than the configured vector register size (see -mriscv-vlen in docs)"
                       (pdoc platform expr)
   else
     pure ()
@@ -680,7 +680,6 @@ getRegister' config plat expr =
       case (width, format) of
         (_w, f)
           | isVecFormat f ->
-              -- TODO: Check for configured vectorMinBits
               pure
                 ( Any
                     format
@@ -866,23 +865,8 @@ getRegister' config plat expr =
 
         -- TODO: MO_V_Broadcast with immediate: If the right value is a literal,
         -- it may use vmv.v.i (simpler)
-        -- TODO: Duplication with MO_VF_Broadcast
-        MO_V_Broadcast length w -> do
-          (reg_val, format_val, code_val) <- getSomeReg e
-          let toFmt = VecFormat length (intScalarFormat w)
-          pure $ Any toFmt $ \dst ->
-            code_val
-              `snocOL` annExpr
-                expr
-                (VMV (OpReg toFmt dst) (OpReg format_val reg_val))
-        MO_VF_Broadcast length w -> do
-          (reg_val, format_val, code_val) <- getSomeReg e
-          let toFmt = VecFormat length (floatScalarFormat w)
-          pure $ Any (vecFormat (cmmVec length (cmmFloat w))) $ \dst ->
-            code_val
-              `snocOL` annExpr
-                expr
-                (VMV (OpReg toFmt dst) (OpReg format_val reg_val))
+        MO_V_Broadcast length w ->vectorBroadcast (intVecFormat length w)
+        MO_VF_Broadcast length w -> vectorBroadcast (floatVecFormat length w)
 
         -- TODO: NO MO_V_Neg? Why?
         MO_VF_Neg length w -> do
@@ -934,6 +918,15 @@ getRegister' config plat expr =
           where
             shift = 64 - (widthInBits from - widthInBits to)
 
+        vectorBroadcast :: Format -> NatM Register
+        vectorBroadcast targetFormat = do
+          (reg_val, format_val, code_val) <- getSomeReg e
+          pure $ Any targetFormat $ \dst ->
+            code_val
+              `snocOL` annExpr
+                expr
+                (VMV (OpReg targetFormat dst) (OpReg format_val reg_val))
+
     -- Dyadic machops:
     --
     -- The general idea is:
@@ -1260,60 +1253,21 @@ getRegister' config plat expr =
         MO_U_Shr w -> intOp False w (\d x y -> unitOL $ annExpr expr (SRL d x y))
         MO_S_Shr w -> intOp True w (\d x y -> unitOL $ annExpr expr (SRA d x y))
 
-        -- TODO: Use vecOp here
-        MO_VF_Extract length w -> do
-          (reg_v, format_v, code_v) <- getSomeReg x
-          (reg_idx, format_idx, code_idx) <- getSomeReg y
-          let format_dst = floatFormat w
-          tmp <- getNewRegNat format_v
-          pure $ Any format_dst $ \dst ->
-            code_v
-              `appOL` code_idx
-              `snocOL`
-              -- Setup
-              -- TODO: Use width
-              annExpr
-                expr
-                -- Move selected element to index 0
-                -- vslidedown.vi v8, v9, 2
-                (VSLIDEDOWN (OpReg format_v tmp) (OpReg format_v reg_v) (OpReg format_idx reg_idx))
-              `snocOL`
-              -- Move to float register
-              -- vmv.x.s a0, v8
-              VMV (OpReg format_dst dst) (OpReg format_v tmp)
-
-        -- TODO: Duplication with MO_VF_Extract
-        MO_V_Extract length w -> do
-          (reg_v, format_v, code_v) <- getSomeReg x
-          (reg_idx, format_idx, code_idx) <- getSomeReg y
-          tmp <- getNewRegNat format_v
-          let format_dst = floatFormat w
-          pure $ Any format_dst $ \dst ->
-            code_v
-              `appOL` code_idx
-              `snocOL`
-              -- Setup
-              -- TODO: Use width
-              annExpr
-                expr
-                -- Move selected element to index 0
-                -- vslidedown.vi v8, v9, 2
-                (VSLIDEDOWN (OpReg format_v tmp) (OpReg format_v reg_v) (OpReg format_idx reg_idx))
-              `snocOL`
-              -- Move to float register
-              -- vmv.x.s a0, v8
-              VMV (OpReg format_dst dst) (OpReg format_v tmp)
-        MO_VF_Add length w -> vecOp (floatVecFormat length w) (\d x y -> (VADD d x y))
-        MO_VF_Sub length w -> vecOp (floatVecFormat length w) (\d x y -> (VSUB d x y))
-        MO_VF_Mul length w -> vecOp (floatVecFormat length w) (\d x y -> (VMUL d x y))
-        MO_VF_Quot length w -> vecOp (floatVecFormat length w) (\d x y -> (VQUOT d x y))
+        -- Vector operations
+        MO_VF_Extract length w -> vecOp (floatVecFormat length w) VSLIDEDOWN
+        MO_V_Extract length w -> vecOp (intVecFormat length w) VSLIDEDOWN
+
+        MO_VF_Add length w -> vecOp (floatVecFormat length w) VADD
+        MO_VF_Sub length w -> vecOp (floatVecFormat length w) VSUB
+        MO_VF_Mul length w -> vecOp (floatVecFormat length w) VMUL
+        MO_VF_Quot length w -> vecOp (floatVecFormat length w) VQUOT
         -- See https://godbolt.org/z/PvcWKMKoW
-        MO_VS_Min length w -> vecOp (intVecFormat length w) (\d x y -> (VSMIN d x y))
-        MO_VS_Max length w -> vecOp (intVecFormat length w) (\d x y -> (VSMAX d x y))
-        MO_VU_Min length w -> vecOp (intVecFormat length w) (\d x y -> (VUMIN d x y))
-        MO_VU_Max length w -> vecOp (intVecFormat length w) (\d x y -> (VUMAX d x y))
-        MO_VF_Min length w -> vecOp (floatVecFormat length w) (\d x y -> (VFMIN d x y))
-        MO_VF_Max length w -> vecOp (floatVecFormat length w) (\d x y -> (VFMAX d x y))
+        MO_VS_Min length w -> vecOp (intVecFormat length w) VSMIN
+        MO_VS_Max length w -> vecOp (intVecFormat length w) VSMAX
+        MO_VU_Min length w -> vecOp (intVecFormat length w) VUMIN
+        MO_VU_Max length w -> vecOp (intVecFormat length w) VUMAX
+        MO_VF_Min length w -> vecOp (floatVecFormat length w) VFMIN
+        MO_VF_Max length w -> vecOp (floatVecFormat length w) VFMAX
         _e -> panic $ "Missing operation " ++ show expr
 
         -- Vectors


=====================================
compiler/GHC/Driver/Session.hs
=====================================
@@ -1685,7 +1685,7 @@ dynamic_flags_deps = [
   , make_ord_flag defGhcFlag "mavx512f"     (noArg (\d -> d { avx512f = True }))
   , make_ord_flag defGhcFlag "mavx512pf"    (noArg (\d ->
                                                          d { avx512pf = True }))
-  , make_ord_flag defGhcFlag "mvector-min-width-bits"
+  , make_ord_flag defGhcFlag "mriscv-vlen"
                                             (word64SuffixM setVectorMinBits)
   , make_ord_flag defGhcFlag "mfma"         (noArg (\d -> d { fma = True }))
 


=====================================
docs/users_guide/using.rst
=====================================
@@ -1743,7 +1743,7 @@ Some flags only make sense for particular target platforms.
     multiply-add, which might perform non-IEEE-compliant software emulation on
     some platforms (depending on the implementation of the C standard library).
 
-.. ghc-flag:: -mvector-min-width-bits
+.. ghc-flag:: -mriscv-vlen
    :shortdesc: (RISC-V NCG only) Minimal width of vector registers in bits
    :type: dynamic
    :category: platform-options


=====================================
hadrian/src/Settings/Packages.hs
=====================================
@@ -325,6 +325,7 @@ rtsPackageArgs = package rts ? do
     libzstdLibraryDir <- getSetting LibZstdLibDir
 
     x86 <- queryTarget (\ tgt -> archOS_arch (tgtArchOs tgt) `elem` [ ArchX86, ArchX86_64 ])
+    riscv64 <- queryTarget (\ tgt -> archOS_arch (tgtArchOs tgt) == ArchRISCV64)
 
     -- Arguments passed to GHC when compiling C and .cmm sources.
     let ghcArgs = mconcat
@@ -340,11 +341,13 @@ rtsPackageArgs = package rts ? do
             --
             -- In particular, we **do not** pass -mavx when compiling
             -- AutoApply_V16.cmm, as that would lock out targets with SSE2 but not AVX.
-          , inputs ["**/AutoApply_V32.cmm"] ? pure [ "-mavx2"    | x86 ]
-          , inputs ["**/AutoApply_V64.cmm"] ? pure [ "-mavx512f" | x86 ]
+          , inputs ["**/AutoApply_V16.cmm"] ? pure [ "-mriscv-vlen 128" | riscv64]
+          , inputs ["**/AutoApply_V32.cmm"] ? pure ([ "-mavx2"    | x86 ] ++ [ "-mriscv-vlen 256" | riscv64])
+          , inputs ["**/AutoApply_V64.cmm"] ? pure ([ "-mavx512f" | x86 ] ++ [ "-mriscv-vlen 512" | riscv64])
 
-          , inputs ["**/Jumps_V32.cmm"] ? pure [ "-mavx2"    | x86 ]
-          , inputs ["**/Jumps_V64.cmm"] ? pure [ "-mavx512f" | x86 ]
+          , inputs ["**/Jumps_V16.cmm"] ? pure [ "-mriscv-vlen 128" | riscv64]
+          , inputs ["**/Jumps_V32.cmm"] ? pure ([ "-mavx2"    | x86 ] ++ [ "-mriscv-vlen 256" | riscv64])
+          , inputs ["**/Jumps_V64.cmm"] ? pure ([ "-mavx512f" | x86 ] ++ [ "-mriscv-vlen 512" | riscv64])
           ]
 
     let cArgs = mconcat
@@ -420,11 +423,13 @@ rtsPackageArgs = package rts ? do
                    , "**/AutoApply_V64.c" ] ? pure ["-fno-PIC", "-static"]
 
             -- See Note [AutoApply.cmm for vectors] in genapply/Main.hs
-          , inputs ["**/AutoApply_V32.c"] ? pure [ "-mavx2"    | x86 ]
-          , inputs ["**/AutoApply_V64.c"] ? pure [ "-mavx512f" | x86 ]
+          , inputs ["**/AutoApply_V16.c"] ? pure [ "-march=rv64g_zvl128b" | riscv64]
+          , inputs ["**/AutoApply_V32.c"] ? pure ([ "-mavx2"    | x86 ] ++ [ "-march=rv64g_zvl256b" | riscv64])
+          , inputs ["**/AutoApply_V64.c"] ? pure ([ "-mavx512f" | x86 ] ++ [ "-march=rv64g_zvl512b" | riscv64])
 
-          , inputs ["**/Jumps_V32.c"] ? pure [ "-mavx2"    | x86 ]
-          , inputs ["**/Jumps_V64.c"] ? pure [ "-mavx512f" | x86 ]
+          , inputs ["**/Jumps_V16.c"] ? pure [ "-mriscv-vlen 128" | riscv64]
+          , inputs ["**/Jumps_V32.c"] ? pure ([ "-mavx2"    | x86 ] ++ [ "-march=rv64g_zvl256b" | riscv64])
+          , inputs ["**/Jumps_V64.c"] ? pure ([ "-mavx512f" | x86 ] ++ [ "-march=rv64g_zvl512b" | riscv64])
 
           -- inlining warnings happen in Compact
           , inputs ["**/Compact.c"] ? arg "-Wno-inline"



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/fa321f9c00874feb7900690feb90cf991c4e5c63...c166771226e3d4be115b5aaaffe0af81d7b80d27

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/fa321f9c00874feb7900690feb90cf991c4e5c63...c166771226e3d4be115b5aaaffe0af81d7b80d27
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20250201/e462f2c3/attachment-0001.html>