[Git][ghc/ghc][master] Support 128-bit SIMD on AArch64 via LLVM backend
Marge Bot (@marge-bot)
gitlab at gitlab.haskell.org
Tue Aug 22 19:13:39 UTC 2023
Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC
Commits:
015886ec by ARATA Mizuki at 2023-08-22T15:13:13-04:00
Support 128-bit SIMD on AArch64 via LLVM backend
- - - - -
6 changed files:
- compiler/CodeGen.Platform.h
- compiler/GHC/Cmm/CallConv.hs
- compiler/GHC/StgToCmm/Prim.hs
- rts/include/stg/MachRegs.h
- testsuite/tests/codeGen/should_run/all.T
- testsuite/tests/unboxedsums/all.T
Changes:
=====================================
compiler/CodeGen.Platform.h
=====================================
@@ -203,6 +203,39 @@ import GHC.Platform.Reg
# define d29 61
# define d30 62
# define d31 63
+
+# define q0 32
+# define q1 33
+# define q2 34
+# define q3 35
+# define q4 36
+# define q5 37
+# define q6 38
+# define q7 39
+# define q8 40
+# define q9 41
+# define q10 42
+# define q11 43
+# define q12 44
+# define q13 45
+# define q14 46
+# define q15 47
+# define q16 48
+# define q17 49
+# define q18 50
+# define q19 51
+# define q20 52
+# define q21 53
+# define q22 54
+# define q23 55
+# define q24 56
+# define q25 57
+# define q26 58
+# define q27 59
+# define q28 60
+# define q29 61
+# define q30 62
+# define q31 63
#endif
# if defined(MACHREGS_darwin)
=====================================
compiler/GHC/Cmm/CallConv.hs
=====================================
@@ -193,8 +193,10 @@ realLongRegs platform = map LongReg $ regList (pc_MAX_Real_Long_REG (pl
realXmmRegNos :: Platform -> [Int]
realXmmRegNos platform
- | isSse2Enabled platform = regList (pc_MAX_Real_XMM_REG (platformConstants platform))
- | otherwise = []
+ | isSse2Enabled platform || platformArch platform == ArchAArch64
+ = regList (pc_MAX_Real_XMM_REG (platformConstants platform))
+ | otherwise
+ = []
regList :: Int -> [Int]
regList n = [1 .. n]
=====================================
compiler/GHC/StgToCmm/Prim.hs
=====================================
@@ -2303,7 +2303,7 @@ vecCmmCat FloatVec = cmmFloat
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- Check to make sure that we can generate code for the specified vector type
-- given the current set of dynamic flags.
--- Currently these checks are specific to x86 and x86_64 architecture.
+-- Currently these checks are specific to x86, x86_64 and AArch64 architectures.
-- This should be fixed!
-- In particular,
-- 1) Add better support for other architectures! (this may require a redesign)
@@ -2334,27 +2334,38 @@ vecCmmCat FloatVec = cmmFloat
checkVecCompatibility :: StgToCmmConfig -> PrimOpVecCat -> Length -> Width -> FCode ()
checkVecCompatibility cfg vcat l w =
case stgToCmmVecInstrsErr cfg of
- Nothing -> check vecWidth vcat l w -- We are in a compatible backend
- Just err -> sorry err -- incompatible backend, do panic
+ Nothing | isX86 -> checkX86 vecWidth vcat l w
+ | platformArch platform == ArchAArch64 -> checkAArch64 vecWidth
+ | otherwise -> sorry "SIMD vector instructions are not supported on this architecture."
+ Just err -> sorry err -- incompatible backend, do panic
where
platform = stgToCmmPlatform cfg
- check :: Width -> PrimOpVecCat -> Length -> Width -> FCode ()
- check W128 FloatVec 4 W32 | not (isSseEnabled platform) =
+ isX86 = case platformArch platform of
+ ArchX86_64 -> True
+ ArchX86 -> True
+ _ -> False
+ checkX86 :: Width -> PrimOpVecCat -> Length -> Width -> FCode ()
+ checkX86 W128 FloatVec 4 W32 | not (isSseEnabled platform) =
sorry $ "128-bit wide single-precision floating point " ++
"SIMD vector instructions require at least -msse."
- check W128 _ _ _ | not (isSse2Enabled platform) =
+ checkX86 W128 _ _ _ | not (isSse2Enabled platform) =
sorry $ "128-bit wide integer and double precision " ++
"SIMD vector instructions require at least -msse2."
- check W256 FloatVec _ _ | not (stgToCmmAvx cfg) =
+ checkX86 W256 FloatVec _ _ | not (stgToCmmAvx cfg) =
sorry $ "256-bit wide floating point " ++
"SIMD vector instructions require at least -mavx."
- check W256 _ _ _ | not (stgToCmmAvx2 cfg) =
+ checkX86 W256 _ _ _ | not (stgToCmmAvx2 cfg) =
sorry $ "256-bit wide integer " ++
"SIMD vector instructions require at least -mavx2."
- check W512 _ _ _ | not (stgToCmmAvx512f cfg) =
+ checkX86 W512 _ _ _ | not (stgToCmmAvx512f cfg) =
sorry $ "512-bit wide " ++
"SIMD vector instructions require -mavx512f."
- check _ _ _ _ = return ()
+ checkX86 _ _ _ _ = return ()
+
+ checkAArch64 :: Width -> FCode ()
+ checkAArch64 W256 = sorry $ "256-bit wide SIMD vector instructions are not supported."
+ checkAArch64 W512 = sorry $ "512-bit wide SIMD vector instructions are not supported."
+ checkAArch64 _ = return ()
vecWidth = typeWidth (vecVmmType vcat l w)
=====================================
rts/include/stg/MachRegs.h
=====================================
@@ -457,6 +457,12 @@ the stack. See Note [Overlapping global registers] for implications.
#define REG_D3 d14
#define REG_D4 d15
+#define REG_XMM1 q4
+#define REG_XMM2 q5
+
+#define CALLER_SAVES_XMM1
+#define CALLER_SAVES_XMM2
+
/* -----------------------------------------------------------------------------
The s390x register mapping
=====================================
testsuite/tests/codeGen/should_run/all.T
=====================================
@@ -222,7 +222,7 @@ test('T21186', normal, compile_and_run, [''])
test('T20640a', normal, compile_and_run, [''])
test('T20640b', normal, compile_and_run, [''])
test('T22296',[only_ways(llvm_ways)
- ,unless(arch('x86_64'), skip)],compile_and_run,[''])
+ ,unless(arch('x86_64') or arch('aarch64'), skip)],compile_and_run,[''])
test('T22798', normal, compile_and_run, ['-fregs-graph'])
test('CheckBoundsOK', normal, compile_and_run, ['-fcheck-prim-bounds'])
test('OrigThunkInfo', normal, compile_and_run, ['-forig-thunk-info'])
=====================================
testsuite/tests/unboxedsums/all.T
=====================================
@@ -38,7 +38,7 @@ test('T20859', normal, compile, [''])
test('T22187',[only_ways(llvm_ways)],compile,[''])
test('T22187_run',[only_ways(llvm_ways)
- ,unless(arch('x86_64'), skip)],compile_and_run,[''])
+ ,unless(arch('x86_64') or arch('aarch64'), skip)],compile_and_run,[''])
test('unpack_sums_1', normal, compile_and_run, ['-O'])
test('unpack_sums_2', normal, compile, ['-O'])
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/015886ec78e598f850c4202efdee239bac63b8c7
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/015886ec78e598f850c4202efdee239bac63b8c7
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20230822/00603e62/attachment-0001.html>
More information about the ghc-commits
mailing list