[Git][ghc/ghc][wip/ncg-simd] 3 commits: update docs & genprimopcode

Wed Jun 26 10:23:18 UTC 2024


sheaf pushed to branch wip/ncg-simd at Glasgow Haskell Compiler / GHC


Commits:
e13380c2 by sheaf at 2024-06-26T12:13:13+02:00
update docs & genprimopcode

- - - - -
3583556a by sheaf at 2024-06-26T12:13:24+02:00
Revert "fixup Reg.Liveness.equalBlockMaps"

This reverts commit bdfd16720337a004f1ea37737b5e6d69aef7a7a5.

- - - - -
dc286e78 by sheaf at 2024-06-26T12:14:23+02:00
Empty commit allowing metric increases

-------------------------
Metric Increase:
    T12707
    T13035
    T13379
    T3294
    T4801
    T5321FD
    T5321Fun
    T783
-------------------------

- - - - -


6 changed files:

- compiler/GHC/Builtin/primops.txt.pp
- compiler/GHC/CmmToAsm/PPC/Instr.hs
- compiler/GHC/CmmToAsm/Reg/Liveness.hs
- compiler/GHC/CmmToAsm/X86/Instr.hs
- docs/users_guide/9.12.1-notes.rst
- utils/genprimopcode/Main.hs


Changes:

=====================================
compiler/GHC/Builtin/primops.txt.pp
=====================================
@@ -145,7 +145,7 @@ defaults
    cheap            = { primOpOkForSpeculation _thisOp }
    strictness       = { \ arity -> mkClosedDmdSig (replicate arity topDmd) topDiv }
    fixity           = Nothing
-   llvm_only        = False
+   simd             = False
    vector           = []
    deprecated_msg   = {}      -- A non-empty message indicates deprecation
 
@@ -4022,86 +4022,86 @@ section "SIMD Vectors"
   ,<Word8,Word8#,64>,<Word16,Word16#,32>,<Word32,Word32#,16>,<Word64,Word64#,8>]
 
 primtype VECTOR
-   with llvm_only = True
+   with simd = True
         vector = ALL_VECTOR_TYPES
 
 primop VecBroadcastOp "broadcast#" GenPrimOp
    SCALAR -> VECTOR
    { Broadcast a scalar to all elements of a vector. }
-   with llvm_only = True
+   with simd = True
         vector = ALL_VECTOR_TYPES
 
 primop VecPackOp "pack#" GenPrimOp
    VECTUPLE -> VECTOR
    { Pack the elements of an unboxed tuple into a vector. }
-   with llvm_only = True
+   with simd = True
         vector = ALL_VECTOR_TYPES
 
 primop VecUnpackOp "unpack#" GenPrimOp
    VECTOR -> VECTUPLE
    { Unpack the elements of a vector into an unboxed tuple. #}
-   with llvm_only = True
+   with simd = True
         vector = ALL_VECTOR_TYPES
 
 primop VecInsertOp "insert#" GenPrimOp
    VECTOR -> SCALAR -> Int# -> VECTOR
    { Insert a scalar at the given position in a vector. }
    with effect = CanFail
-        llvm_only = True
+        simd = True
         vector = ALL_VECTOR_TYPES
 
 primop VecAddOp "plus#" GenPrimOp
    VECTOR -> VECTOR -> VECTOR
    { Add two vectors element-wise. }
    with commutable = True
-        llvm_only = True
+        simd = True
         vector = ALL_VECTOR_TYPES
 
 primop VecSubOp "minus#" GenPrimOp
    VECTOR -> VECTOR -> VECTOR
    { Subtract two vectors element-wise. }
-   with llvm_only = True
+   with simd = True
         vector = ALL_VECTOR_TYPES
 
 primop VecMulOp "times#" GenPrimOp
    VECTOR -> VECTOR -> VECTOR
    { Multiply two vectors element-wise. }
    with commutable = True
-        llvm_only = True
+        simd = True
         vector = ALL_VECTOR_TYPES
 
 primop VecDivOp "divide#" GenPrimOp
    VECTOR -> VECTOR -> VECTOR
    { Divide two vectors element-wise. }
    with effect = CanFail
-        llvm_only = True
+        simd = True
         vector = FLOAT_VECTOR_TYPES
 
 primop VecQuotOp "quot#" GenPrimOp
    VECTOR -> VECTOR -> VECTOR
    { Rounds towards zero element-wise. }
    with effect = CanFail
-        llvm_only = True
+        simd = True
         vector = INT_VECTOR_TYPES
 
 primop VecRemOp "rem#" GenPrimOp
    VECTOR -> VECTOR -> VECTOR
    { Satisfies @('quot#' x y) 'times#' y 'plus#' ('rem#' x y) == x at . }
    with effect = CanFail
-        llvm_only = True
+        simd = True
         vector = INT_VECTOR_TYPES
 
 primop VecNegOp "negate#" GenPrimOp
    VECTOR -> VECTOR
    { Negate element-wise. }
-   with llvm_only = True
+   with simd = True
         vector = SIGNED_VECTOR_TYPES
 
 primop VecIndexByteArrayOp "indexArray#" GenPrimOp
    ByteArray# -> Int# -> VECTOR
    { Read a vector from specified index of immutable array. }
    with effect = CanFail
-        llvm_only = True
+        simd = True
         vector = ALL_VECTOR_TYPES
 
 primop VecReadByteArrayOp "readArray#" GenPrimOp
@@ -4109,7 +4109,7 @@ primop VecReadByteArrayOp "readArray#" GenPrimOp
    { Read a vector from specified index of mutable array. }
    with effect = ReadWriteEffect
         can_fail_warning = YesWarnCanFail
-        llvm_only = True
+        simd = True
         vector = ALL_VECTOR_TYPES
 
 primop VecWriteByteArrayOp "writeArray#" GenPrimOp
@@ -4117,14 +4117,14 @@ primop VecWriteByteArrayOp "writeArray#" GenPrimOp
    { Write a vector to specified index of mutable array. }
    with effect = ReadWriteEffect
         can_fail_warning = YesWarnCanFail
-        llvm_only = True
+        simd = True
         vector = ALL_VECTOR_TYPES
 
 primop VecIndexOffAddrOp "indexOffAddr#" GenPrimOp
    Addr# -> Int# -> VECTOR
    { Reads vector; offset in bytes. }
    with effect = CanFail
-        llvm_only = True
+        simd = True
         vector = ALL_VECTOR_TYPES
 
 primop VecReadOffAddrOp "readOffAddr#" GenPrimOp
@@ -4132,7 +4132,7 @@ primop VecReadOffAddrOp "readOffAddr#" GenPrimOp
    { Reads vector; offset in bytes. }
    with effect = ReadWriteEffect
         can_fail_warning = YesWarnCanFail
-        llvm_only = True
+        simd = True
         vector = ALL_VECTOR_TYPES
 
 primop VecWriteOffAddrOp "writeOffAddr#" GenPrimOp
@@ -4140,7 +4140,7 @@ primop VecWriteOffAddrOp "writeOffAddr#" GenPrimOp
    { Write vector; offset in bytes. }
    with effect = ReadWriteEffect
         can_fail_warning = YesWarnCanFail
-        llvm_only = True
+        simd = True
         vector = ALL_VECTOR_TYPES
 
 
@@ -4148,7 +4148,7 @@ primop VecIndexScalarByteArrayOp "indexArrayAs#" GenPrimOp
    ByteArray# -> Int# -> VECTOR
    { Read a vector from specified index of immutable array of scalars; offset is in scalar elements. }
    with effect = CanFail
-        llvm_only = True
+        simd = True
         vector = ALL_VECTOR_TYPES
 
 primop VecReadScalarByteArrayOp "readArrayAs#" GenPrimOp
@@ -4156,7 +4156,7 @@ primop VecReadScalarByteArrayOp "readArrayAs#" GenPrimOp
    { Read a vector from specified index of mutable array of scalars; offset is in scalar elements. }
    with effect = ReadWriteEffect
         can_fail_warning = YesWarnCanFail
-        llvm_only = True
+        simd = True
         vector = ALL_VECTOR_TYPES
 
 primop VecWriteScalarByteArrayOp "writeArrayAs#" GenPrimOp
@@ -4164,14 +4164,14 @@ primop VecWriteScalarByteArrayOp "writeArrayAs#" GenPrimOp
    { Write a vector to specified index of mutable array of scalars; offset is in scalar elements. }
    with effect = ReadWriteEffect
         can_fail_warning = YesWarnCanFail
-        llvm_only = True
+        simd = True
         vector = ALL_VECTOR_TYPES
 
 primop VecIndexScalarOffAddrOp "indexOffAddrAs#" GenPrimOp
    Addr# -> Int# -> VECTOR
    { Reads vector; offset in scalar elements. }
    with effect = CanFail
-        llvm_only = True
+        simd = True
         vector = ALL_VECTOR_TYPES
 
 primop VecReadScalarOffAddrOp "readOffAddrAs#" GenPrimOp
@@ -4179,7 +4179,7 @@ primop VecReadScalarOffAddrOp "readOffAddrAs#" GenPrimOp
    { Reads vector; offset in scalar elements. }
    with effect = ReadWriteEffect
         can_fail_warning = YesWarnCanFail
-        llvm_only = True
+        simd = True
         vector = ALL_VECTOR_TYPES
 
 primop VecWriteScalarOffAddrOp "writeOffAddrAs#" GenPrimOp
@@ -4187,39 +4187,39 @@ primop VecWriteScalarOffAddrOp "writeOffAddrAs#" GenPrimOp
    { Write vector; offset in scalar elements. }
    with effect = ReadWriteEffect
         can_fail_warning = YesWarnCanFail
-        llvm_only = True
+        simd = True
         vector = ALL_VECTOR_TYPES
 
 primop   VecFMAdd   "fmadd#" GenPrimOp
    VECTOR -> VECTOR -> VECTOR -> VECTOR
    {Fused multiply-add operation @x*y+z at . See "GHC.Prim#fma".}
    with
-      llvm_only = True
+      simd = True
       vector = FLOAT_VECTOR_TYPES
 primop   VecFMSub   "fmsub#" GenPrimOp
    VECTOR -> VECTOR -> VECTOR -> VECTOR
    {Fused multiply-subtract operation @x*y-z at . See "GHC.Prim#fma".}
    with
-      llvm_only = True
+      simd = True
       vector = FLOAT_VECTOR_TYPES
 primop   VecFNMAdd   "fnmadd#" GenPrimOp
    VECTOR -> VECTOR -> VECTOR -> VECTOR
    {Fused negate-multiply-add operation @-x*y+z at . See "GHC.Prim#fma".}
    with
-      llvm_only = True
+      simd = True
       vector = FLOAT_VECTOR_TYPES
 primop   VecFNMSub   "fnmsub#" GenPrimOp
    VECTOR -> VECTOR -> VECTOR -> VECTOR
    {Fused negate-multiply-subtract operation @-x*y-z at . See "GHC.Prim#fma".}
    with
-      llvm_only = True
+      simd = True
       vector = FLOAT_VECTOR_TYPES
 
 primop VecShuffleOp "shuffle#" GenPrimOp
   VECTOR -> VECTOR -> INTVECTUPLE -> VECTOR
-  { Shuffle elements of the concatenation of the input two vectors
+  {Shuffle elements of the concatenation of the input two vectors
   into the result vector.}
-   with llvm_only = True
+   with simd = True
         vector = ALL_VECTOR_TYPES
 
 ------------------------------------------------------------------------


=====================================
compiler/GHC/CmmToAsm/PPC/Instr.hs
=====================================
@@ -394,7 +394,7 @@ regUsageOfInstr platform instr
     usage (src, dst) = RU (map mkFmt $ filter (interesting platform) src)
                           (map mkFmt $ filter (interesting platform) dst)
       -- SIMD NCG TODO: the format here is used for register spilling/unspilling.
-      -- As the AArch64 NCG does not currently support SIMD registers,
+      -- As the PowerPC NCG does not currently support SIMD registers,
       -- this simple logic is OK.
     mkFmt r = RegFormat r fmt
       where fmt = case targetClassOfReg platform r of


=====================================
compiler/GHC/CmmToAsm/Reg/Liveness.hs
=====================================
@@ -927,15 +927,10 @@ livenessSCCs platform blockmap done
 
                 -- probably the least efficient way to compare two
                 -- BlockMaps for equality.
-            equalBlockMaps
-                :: BlockMap (UniqSet RegFormat)
-                -> BlockMap (UniqSet RegFormat)
-                -> Bool
             equalBlockMaps a b
                 = a' == b'
-              where a' = map f $ mapToList a
-                    b' = map f $ mapToList b
-                    f (key,elt) = (key, map regFormatReg $ nonDetEltsUniqSet elt)
+              where a' = mapToList a
+                    b' = mapToList b
                     -- See Note [Unique Determinism and code generation]
 
 


=====================================
compiler/GHC/CmmToAsm/X86/Instr.hs
=====================================
@@ -206,7 +206,7 @@ data Instr
         --
         -- When used at a vector format, only moves the lower 64 bits of data;
         -- the rest of the data in the destination may either be zeroed or
-        -- preserved, depending on the specific format.
+        -- preserved, depending on the specific format and operands.
         | MOV Format Operand Operand
              -- N.B. Due to AT&T assembler quirks, when used with 'II64'
              -- 'Format' immediate source and memory target operand, the source


=====================================
docs/users_guide/9.12.1-notes.rst
=====================================
@@ -62,6 +62,12 @@ Compiler
 
 - :ghc-flag:`-Wderiving-typeable` has been added to :ghc-flag:`-Wall`.
 
+- SIMD support has been added to the X86 native code generator.
+  For the time being, only 128 bit wide vectors are supported, with most
+  floating-point operations implemented, together with a few integer vector
+  operations. Other operations still require the LLVM backend. Contributors
+  welcome!
+
 - i386 Windows support is now completely removed amid massive cleanup
   of legacy code to pave way for Arm64 Windows support (`#24883
   <https://gitlab.haskell.org/ghc/ghc/-/issues/24883>`_). Rest
@@ -98,6 +104,20 @@ Runtime system
 ``ghc-prim`` library
 ~~~~~~~~~~~~~~~~~~~~
 
+- New fused multiply-add instructions for vectors of floating-point values,
+  such as ``fmaddFloatX4# :: FloatX4# -> FloatX4# -> FloatX4# -> FloatX4#`` and
+  ``fnmsubDoubleX2# :: DoubleX2# -> DoubleX2# -> DoubleX2# -> DoubleX2#``.
+  These follow the same semantics as ``fmadd``/``fmsub``/``fnmadd``/``fnmsub``,
+  operating in parallel on vectors of floating-point values.
+
+- New vector shuffle instructions, such as ``shuffleFloatX4# :: FloatX4# -> FloatX4# -> (# Int#, Int#, Int#, Int# #) -> FloatX4#``.
+  These instructions take two input vectors and a collection of indices (which must
+  be compile-time literal integers), and constructs a result vector by extracting
+  out the values at those indices. For instance, ``shuffleFloatX4#`` on input vectors with
+  components ``(# 0.1#, 11.1#, 22.1#, 33.1# #)`` and ``(# 44.1#, 55.1#, 66.1#, 77.1# #)``,
+  and indices ``(# 4#, 3#, 6#, 1# #)``, will return a vector with components
+  ``(# 44.1#, 33.1#, 66.1#, 11.1# #)``.
+
 ``ghc`` library
 ~~~~~~~~~~~~~~~
 


=====================================
utils/genprimopcode/Main.hs
=====================================
@@ -343,15 +343,10 @@ gen_hs_source (Info defaults entries) =
                  Section { } -> error "Section is not an entity"
              ]
 
-           extra options = case on_llvm_only options ++ can_fail options of
-             [m1,m2] -> "\n\n__/Warning:/__ this " ++ m1 ++ " and " ++ m2 ++ "."
+           extra options = case can_fail options of
              [m] -> "\n\n__/Warning:/__ this " ++ m ++ "."
              _ -> ""
 
-           on_llvm_only options
-             = [ "is only available on LLVM"
-               | Just (OptionTrue _) <- [lookup_attrib "llvm_only" options] ]
-
            can_fail options
              = [ "can fail with an unchecked exception"
                | Just (OptionEffect eff) <- [lookup_attrib "effect" options]
@@ -473,37 +468,50 @@ gen_wrappers (Info _ entries)
      ++ "import qualified GHC.Prim\n"
      ++ "import GHC.Tuple ()\n"
      ++ "import GHC.Prim (" ++ types ++ ")\n"
-     ++ unlines (concatMap f specs)
+     ++ unlines (concatMap mkWrapper wrappers)
      where
-        specs = filter (not.dodgy) $
-                filter (not.is_llvm_only) $
-                filter is_primop entries
-        tycons = foldr union [] $ map (tyconsIn . ty) specs
+        wrappers = filter wantWrapper entries
+        tycons = foldr union [] $ map (tyconsIn . ty) wrappers
         tycons' = filter (`notElem` [TyCon "()", TyCon "Bool"]) tycons
         types = concat $ intersperse ", " $ map show tycons'
-        f spec = let args = map (\n -> "a" ++ show n) [1 .. arity (ty spec)]
-                     src_name = wrap (name spec)
-                     lhs = src_name ++ " " ++ unwords args
-                     rhs = wrapQual (name spec) ++ " " ++ unwords args
-                 in ["{-# NOINLINE " ++ src_name ++ " #-}",
-                     src_name ++ " :: " ++ pprTy (ty spec),
-                     lhs ++ " = " ++ rhs]
+        mkWrapper spec =
+          let args = map (\n -> "a" ++ show n) [1 .. arity (ty spec)]
+              src_name = wrap (name spec)
+              lhs = src_name ++ " " ++ unwords args
+              rhs = wrapQual (name spec) ++ " " ++ unwords args
+          in ["{-# NOINLINE " ++ src_name ++ " #-}",
+              src_name ++ " :: " ++ pprTy (ty spec),
+              lhs ++ " = " ++ rhs]
         wrap nm | isLower (head nm) = nm
                 | otherwise = "(" ++ nm ++ ")"
         wrapQual nm | isLower (head nm) = "GHC.Prim." ++ nm
                     | otherwise         = "(GHC.Prim." ++ nm ++ ")"
 
-        dodgy spec
-           = name spec `elem`
-             [-- tagToEnum# is really magical, and can't have
+        wantWrapper :: Entry -> Bool
+        wantWrapper entry =
+          and
+            [ not $ name entry `elem` magicalPrimops
+            , is_primop entry
+                -- NB: is_primop rules out vector primops; not sure why this is necessary.
+            , not $ is_simd entry
+                -- Don't produce wrappers for SIMD primops.
+                --
+                -- SIMD NCG TODO: this was the logic in place when SIMD primops
+                -- were LLVM only; but now that this is no longer the case I
+                -- suppose this choice can be revisited?
+            ]
+
+        magicalPrimops :: [String]
+        magicalPrimops =
+          [ "tagToEnum#"
+              -- tagToEnum# is really magical, and can't have
               -- a wrapper since its implementation depends on
               -- the type of its result
-              "tagToEnum#"
-             ]
+          ]
 
-        is_llvm_only :: Entry -> Bool
-        is_llvm_only entry =
-            case lookup_attrib "llvm_only" (opts entry) of
+        is_simd :: Entry -> Bool
+        is_simd entry =
+            case lookup_attrib "simd" (opts entry) of
               Just (OptionTrue _) -> True
               _                   -> False
 



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/125fb19cec33bdd5bb0de9117222b6dbac665830...dc286e78d53b857cd2ac6f8b27103ffd85d2e3e4

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/125fb19cec33bdd5bb0de9117222b6dbac665830...dc286e78d53b857cd2ac6f8b27103ffd85d2e3e4
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240626/1e9b2da6/attachment-0001.html>