[Git][ghc/ghc][master] Support 128-bit SIMD on AArch64 via LLVM backend

Marge Bot (@marge-bot) gitlab at gitlab.haskell.org
Tue Aug 22 19:13:39 UTC 2023



Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC


Commits:
015886ec by ARATA Mizuki at 2023-08-22T15:13:13-04:00
Support 128-bit SIMD on AArch64 via LLVM backend

- - - - -


6 changed files:

- compiler/CodeGen.Platform.h
- compiler/GHC/Cmm/CallConv.hs
- compiler/GHC/StgToCmm/Prim.hs
- rts/include/stg/MachRegs.h
- testsuite/tests/codeGen/should_run/all.T
- testsuite/tests/unboxedsums/all.T


Changes:

=====================================
compiler/CodeGen.Platform.h
=====================================
@@ -203,6 +203,39 @@ import GHC.Platform.Reg
 # define d29 61
 # define d30 62
 # define d31 63
+
+# define q0 32
+# define q1 33
+# define q2 34
+# define q3 35
+# define q4 36
+# define q5 37
+# define q6 38
+# define q7 39
+# define q8 40
+# define q9 41
+# define q10 42
+# define q11 43
+# define q12 44
+# define q13 45
+# define q14 46
+# define q15 47
+# define q16 48
+# define q17 49
+# define q18 50
+# define q19 51
+# define q20 52
+# define q21 53
+# define q22 54
+# define q23 55
+# define q24 56
+# define q25 57
+# define q26 58
+# define q27 59
+# define q28 60
+# define q29 61
+# define q30 62
+# define q31 63
 #endif
 
 # if defined(MACHREGS_darwin)


=====================================
compiler/GHC/Cmm/CallConv.hs
=====================================
@@ -193,8 +193,10 @@ realLongRegs    platform = map LongReg    $ regList (pc_MAX_Real_Long_REG    (pl
 
 realXmmRegNos :: Platform -> [Int]
 realXmmRegNos platform
-    | isSse2Enabled platform = regList (pc_MAX_Real_XMM_REG (platformConstants platform))
-    | otherwise              = []
+    | isSse2Enabled platform || platformArch platform == ArchAArch64
+    = regList (pc_MAX_Real_XMM_REG (platformConstants platform))
+    | otherwise
+    = []
 
 regList :: Int -> [Int]
 regList n = [1 .. n]


=====================================
compiler/GHC/StgToCmm/Prim.hs
=====================================
@@ -2303,7 +2303,7 @@ vecCmmCat FloatVec = cmmFloat
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 -- Check to make sure that we can generate code for the specified vector type
 -- given the current set of dynamic flags.
--- Currently these checks are specific to x86 and x86_64 architecture.
+-- Currently these checks are specific to x86, x86_64 and AArch64 architectures.
 -- This should be fixed!
 -- In particular,
 -- 1) Add better support for other architectures! (this may require a redesign)
@@ -2334,27 +2334,38 @@ vecCmmCat FloatVec = cmmFloat
 checkVecCompatibility :: StgToCmmConfig -> PrimOpVecCat -> Length -> Width -> FCode ()
 checkVecCompatibility cfg vcat l w =
   case stgToCmmVecInstrsErr cfg of
-    Nothing  -> check vecWidth vcat l w  -- We are in a compatible backend
-    Just err -> sorry err                -- incompatible backend, do panic
+    Nothing | isX86 -> checkX86 vecWidth vcat l w
+            | platformArch platform == ArchAArch64 -> checkAArch64 vecWidth
+            | otherwise -> sorry "SIMD vector instructions are not supported on this architecture."
+    Just err -> sorry err  -- incompatible backend, do panic
   where
     platform = stgToCmmPlatform cfg
-    check :: Width -> PrimOpVecCat -> Length -> Width -> FCode ()
-    check W128 FloatVec 4 W32 | not (isSseEnabled platform) =
+    isX86 = case platformArch platform of
+      ArchX86_64 -> True
+      ArchX86 -> True
+      _ -> False
+    checkX86 :: Width -> PrimOpVecCat -> Length -> Width -> FCode ()
+    checkX86 W128 FloatVec 4 W32 | not (isSseEnabled platform) =
         sorry $ "128-bit wide single-precision floating point " ++
                 "SIMD vector instructions require at least -msse."
-    check W128 _ _ _ | not (isSse2Enabled platform) =
+    checkX86 W128 _ _ _ | not (isSse2Enabled platform) =
         sorry $ "128-bit wide integer and double precision " ++
                 "SIMD vector instructions require at least -msse2."
-    check W256 FloatVec _ _ | not (stgToCmmAvx cfg) =
+    checkX86 W256 FloatVec _ _ | not (stgToCmmAvx cfg) =
         sorry $ "256-bit wide floating point " ++
                 "SIMD vector instructions require at least -mavx."
-    check W256 _ _ _ | not (stgToCmmAvx2 cfg) =
+    checkX86 W256 _ _ _ | not (stgToCmmAvx2 cfg) =
         sorry $ "256-bit wide integer " ++
                 "SIMD vector instructions require at least -mavx2."
-    check W512 _ _ _ | not (stgToCmmAvx512f cfg) =
+    checkX86 W512 _ _ _ | not (stgToCmmAvx512f cfg) =
         sorry $ "512-bit wide " ++
                 "SIMD vector instructions require -mavx512f."
-    check _ _ _ _ = return ()
+    checkX86 _ _ _ _ = return ()
+
+    checkAArch64 :: Width -> FCode ()
+    checkAArch64 W256 = sorry $ "256-bit wide SIMD vector instructions are not supported."
+    checkAArch64 W512 = sorry $ "512-bit wide SIMD vector instructions are not supported."
+    checkAArch64 _ = return ()
 
     vecWidth = typeWidth (vecVmmType vcat l w)
 


=====================================
rts/include/stg/MachRegs.h
=====================================
@@ -457,6 +457,12 @@ the stack. See Note [Overlapping global registers] for implications.
 #define REG_D3          d14
 #define REG_D4          d15
 
+#define REG_XMM1        q4
+#define REG_XMM2        q5
+
+#define CALLER_SAVES_XMM1
+#define CALLER_SAVES_XMM2
+
 /* -----------------------------------------------------------------------------
    The s390x register mapping
 


=====================================
testsuite/tests/codeGen/should_run/all.T
=====================================
@@ -222,7 +222,7 @@ test('T21186', normal, compile_and_run, [''])
 test('T20640a', normal, compile_and_run, [''])
 test('T20640b', normal, compile_and_run, [''])
 test('T22296',[only_ways(llvm_ways)
-              ,unless(arch('x86_64'), skip)],compile_and_run,[''])
+              ,unless(arch('x86_64') or arch('aarch64'), skip)],compile_and_run,[''])
 test('T22798', normal, compile_and_run, ['-fregs-graph'])
 test('CheckBoundsOK', normal, compile_and_run, ['-fcheck-prim-bounds'])
 test('OrigThunkInfo', normal, compile_and_run, ['-forig-thunk-info'])


=====================================
testsuite/tests/unboxedsums/all.T
=====================================
@@ -38,7 +38,7 @@ test('T20859', normal, compile, [''])
 
 test('T22187',[only_ways(llvm_ways)],compile,[''])
 test('T22187_run',[only_ways(llvm_ways)
-                  ,unless(arch('x86_64'), skip)],compile_and_run,[''])
+                  ,unless(arch('x86_64') or arch('aarch64'), skip)],compile_and_run,[''])
 
 test('unpack_sums_1', normal, compile_and_run, ['-O'])
 test('unpack_sums_2', normal, compile, ['-O'])



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/015886ec78e598f850c4202efdee239bac63b8c7

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/015886ec78e598f850c4202efdee239bac63b8c7
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20230822/00603e62/attachment-0001.html>


More information about the ghc-commits mailing list