[Git][ghc/ghc][wip/ncg-simd] NCG SIMD: fix shuffle lowering
sheaf (@sheaf)
gitlab at gitlab.haskell.org
Tue Jun 11 09:49:38 UTC 2024
sheaf pushed to branch wip/ncg-simd at Glasgow Haskell Compiler / GHC
Commits:
16faf448 by sheaf at 2024-06-11T11:38:23+02:00
NCG SIMD: fix shuffle lowering
- - - - -
5 changed files:
- compiler/GHC/CmmToAsm/X86/CodeGen.hs
- compiler/GHC/CmmToAsm/X86/Instr.hs
- + testsuite/tests/codeGen/should_run/Simd009b.hs
- + testsuite/tests/codeGen/should_run/Simd009c.hs
- + testsuite/tests/codeGen/should_run/simd009.hs
Changes:
=====================================
compiler/GHC/CmmToAsm/X86/CodeGen.hs
=====================================
@@ -1668,11 +1668,10 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
VecFormat 2 FmtDouble _ ->
case is of
[i1, i2] -> case (i1, i2) of
- (i, j) | i == j
- -> let v = if i < 2 then v1 else v2
- mov = if i == 0 || i == 2 then MOVL else MOVH
- in unitOL (mov fmt (OpReg v) (OpReg dst)) `snocOL`
- SHUFPD fmt (ImmInt 0b00) (OpReg dst) dst
+ (0,0) -> unitOL (VSHUFPD fmt (ImmInt 0b00) (OpReg v1) v1 dst)
+ (1,1) -> unitOL (VSHUFPD fmt (ImmInt 0b11) (OpReg v1) v1 dst)
+ (2,2) -> unitOL (VSHUFPD fmt (ImmInt 0b00) (OpReg v2) v2 dst)
+ (3,3) -> unitOL (VSHUFPD fmt (ImmInt 0b11) (OpReg v2) v2 dst)
(0,1) -> unitOL (MOVU fmt (OpReg v1) (OpReg dst))
(2,3) -> unitOL (MOVU fmt (OpReg v2) (OpReg dst))
(1,0) -> unitOL (VSHUFPD fmt (ImmInt 0b01) (OpReg v1) v1 dst)
@@ -1680,9 +1679,9 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
(0,2) -> unitOL (VSHUFPD fmt (ImmInt 0b00) (OpReg v2) v1 dst)
(2,0) -> unitOL (VSHUFPD fmt (ImmInt 0b00) (OpReg v1) v2 dst)
(0,3) -> unitOL (VSHUFPD fmt (ImmInt 0b10) (OpReg v2) v1 dst)
- (3,0) -> unitOL (VSHUFPD fmt (ImmInt 0b10) (OpReg v1) v2 dst)
+ (3,0) -> unitOL (VSHUFPD fmt (ImmInt 0b01) (OpReg v1) v2 dst)
(1,2) -> unitOL (VSHUFPD fmt (ImmInt 0b01) (OpReg v2) v1 dst)
- (2,1) -> unitOL (VSHUFPD fmt (ImmInt 0b01) (OpReg v1) v2 dst)
+ (2,1) -> unitOL (VSHUFPD fmt (ImmInt 0b10) (OpReg v1) v2 dst)
(1,3) -> unitOL (VSHUFPD fmt (ImmInt 0b11) (OpReg v2) v1 dst)
(3,1) -> unitOL (VSHUFPD fmt (ImmInt 0b11) (OpReg v1) v2 dst)
_ -> pprPanic "vector shuffle: indices out of bounds 0 <= i <= 3" (ppr is)
@@ -1701,11 +1700,11 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
| i1 <= 3, i2 <= 3
, i3 >= 4, i4 >= 4
, let imm = i1 + i2 `shiftL` 2 + (i3 - 4) `shiftL` 4 + (i4 - 4) `shiftL` 6
- -> unitOL (VSHUFPS fmt (ImmInt imm) (OpReg v1) v2 dst)
+ -> unitOL (VSHUFPS fmt (ImmInt imm) (OpReg v2) v1 dst)
| i1 >= 4, i2 >= 4
, i3 <= 3, i4 <= 3
- , let imm = i3 + i4 `shiftL` 2 + (i1 - 4) `shiftL` 4 + (i2 - 4) `shiftL` 6
- -> unitOL (VSHUFPS fmt (ImmInt imm) (OpReg v2) v1 dst)
+ , let imm = (i1 - 4) + (i2 - 4) `shiftL` 2 + i3 `shiftL` 4 + i4 `shiftL` 6
+ -> unitOL (VSHUFPS fmt (ImmInt imm) (OpReg v1) v2 dst)
| otherwise
->
-- Fall-back code with 4 INSERTPS operations.
=====================================
compiler/GHC/CmmToAsm/X86/Instr.hs
=====================================
@@ -48,9 +48,7 @@ import GHC.CmmToAsm.Format
import GHC.CmmToAsm.Types
import GHC.CmmToAsm.Utils
import GHC.CmmToAsm.Instr (RegUsage(..), noUsage)
-import GHC.Platform.Reg.Class
import GHC.Platform.Reg
-import GHC.CmmToAsm.Reg.Target
import GHC.CmmToAsm.Config
import GHC.Cmm.BlockId
@@ -500,7 +498,7 @@ regUsageOfInstr platform instr
VBROADCAST fmt src dst -> mkRU fmt (use_EA src []) [dst]
VEXTRACT fmt off src dst -> mkRU fmt ((use_R off []) ++ [src]) (use_R dst [])
INSERTPS fmt off src dst
- -> mkRU fmt ((use_R off []) ++ (use_R src []) ++ [dst]) [dst]
+ -> mkRU fmt ((use_R off []) ++ (use_R src [])) [dst]
VMOVU fmt src dst -> mkRU fmt (use_R src []) (use_R dst [])
MOVU fmt src dst -> mkRU fmt (use_R src []) (use_R dst [])
=====================================
testsuite/tests/codeGen/should_run/Simd009b.hs
=====================================
@@ -0,0 +1,70 @@
+{-# OPTIONS_GHC -O2 #-}
+{-# OPTIONS_GHC -msse #-}
+{-# OPTIONS_GHC -msse2 #-}
+{-# OPTIONS_GHC -msse4 #-}
+{-# OPTIONS_GHC -mavx #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+{-# LANGUAGE RecordWildCards #-}
+
+module Simd009b where
+
+import Control.Monad ( unless )
+import Data.Foldable ( for_ )
+import GHC.Exts
+
+data FloatX4 = FX4# FloatX4#
+
+instance Show FloatX4 where
+ show (FX4# f) = case (unpackFloatX4# f) of
+ (# a, b, c, d #) -> show ((F# a), (F# b), (F# c), (F# d))
+
+
+instance Eq FloatX4 where
+ (FX4# a) == (FX4# b)
+ = case (unpackFloatX4# a) of
+ (# a1, a2, a3, a4 #) ->
+ case (unpackFloatX4# b) of
+ (# b1, b2, b3, b4 #) -> (F# a1) == (F# b1) &&
+ (F# a2) == (F# b2) &&
+ (F# a3) == (F# b3) &&
+ (F# a4) == (F# b4)
+
+data DoubleX2 = DX2# DoubleX2#
+
+instance Show DoubleX2 where
+ show (DX2# d) = case (unpackDoubleX2# d) of
+ (# a, b #) -> show ((D# a), (D# b))
+
+instance Eq DoubleX2 where
+ (DX2# a) == (DX2# b)
+ = case (unpackDoubleX2# a) of
+ (# a1, a2 #) ->
+ case (unpackDoubleX2# b) of
+ (# b1, b2 #) -> (D# a1) == (D# b1) &&
+ (D# a2) == (D# b2)
+
+myShuffleDoubleX2 :: DoubleX2# -> DoubleX2# -> (# Int#, Int# #) -> DoubleX2#
+myShuffleDoubleX2 v1 v2 (# i1, i2 #) =
+ case unpackDoubleX2# v1 of
+ (# d1, d2 #) ->
+ case unpackDoubleX2# v2 of
+ (# d3, d4 #) ->
+ let ds = [ D# d1, D# d2, D# d3, D# d4 ]
+ D# x = ds !! I# i1
+ D# y = ds !! I# i2
+ in packDoubleX2# (# x, y #)
+
+myShuffleFloatX4 :: FloatX4# -> FloatX4# -> (# Int#, Int#, Int#, Int# #) -> FloatX4#
+myShuffleFloatX4 v1 v2 (# i1, i2, i3, i4 #) =
+ case unpackFloatX4# v1 of
+ (# f1, f2, f3, f4 #) ->
+ case unpackFloatX4# v2 of
+ (# f5, f6, f7, f8 #) ->
+ let fs = [ F# f1, F# f2, F# f3, F# f4
+ , F# f5, F# f6, F# f7, F# f8 ]
+ F# x = fs !! I# i1
+ F# y = fs !! I# i2
+ F# z = fs !! I# i3
+ F# w = fs !! I# i4
+ in packFloatX4# (# x, y, z, w #)
=====================================
testsuite/tests/codeGen/should_run/Simd009c.hs
=====================================
@@ -0,0 +1,53 @@
+{-# OPTIONS_GHC -O2 #-}
+{-# OPTIONS_GHC -msse #-}
+{-# OPTIONS_GHC -msse2 #-}
+{-# OPTIONS_GHC -msse4 #-}
+{-# OPTIONS_GHC -mavx #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE TemplateHaskell #-}
+
+module Simd009c where
+
+import Control.Monad ( unless )
+import Data.Foldable ( for_ )
+import GHC.Exts
+import Language.Haskell.TH ( CodeQ )
+import Language.Haskell.TH.Syntax ( Lift(liftTyped) )
+
+import Simd009b
+
+floatX4ShuffleTest :: CodeQ FloatX4 -> CodeQ FloatX4 -> CodeQ (Int, Int, Int, Int) -> CodeQ (IO ())
+floatX4ShuffleTest v1 v2 ijkl =
+ [||
+ do
+ let (I# i#, I# j#, I# k#, I# l#) = $$ijkl
+ FX4# v1# = $$v1
+ FX4# v2# = $$v2
+ s1 = shuffleFloatX4# v1# v2# (# i#, j#, k#, l# #)
+ s2 = myShuffleFloatX4 v1# v2# (# i#, j#, k#, l# #)
+ unless (FX4# s1 == FX4# s2) $ do
+ putStrLn $ "Failed test: FloatX4# shuffle " ++ show (I# i#, I# j#, I# k#, I# l# )
+ putStrLn $ " SIMD: " ++ show (FX4# s1)
+ putStrLn $ "reference: " ++ show (FX4# s2)
+ ||]
+
+doubleX2ShuffleTest :: CodeQ DoubleX2 -> CodeQ DoubleX2 -> CodeQ (Int, Int) -> CodeQ (IO ())
+doubleX2ShuffleTest v1 v2 ij =
+ [||
+ do
+ let (I# i#, I# j#) = $$ij
+ DX2# v1# = $$v1
+ DX2# v2# = $$v2
+ s1 = shuffleDoubleX2# v1# v2# (# i#, j# #)
+ s2 = myShuffleDoubleX2 v1# v2# (# i#, j# #)
+ unless (DX2# s1 == DX2# s2) $ do
+ putStrLn $ "Failed test:DoubleX2# shuffle " ++ show (I# i#, I# j#)
+ putStrLn $ " SIMD: " ++ show (DX2# s1)
+ putStrLn $ "reference: " ++ show (DX2# s2)
+ ||]
+
+forQ_ :: Lift i => [i] -> (CodeQ i -> CodeQ (IO ())) -> CodeQ (IO ())
+forQ_ [] _ = [|| return () ||]
+forQ_ (i:is) f = [|| $$( f (liftTyped i) ) *> $$( forQ_ is f ) ||]
=====================================
testsuite/tests/codeGen/should_run/simd009.hs
=====================================
@@ -0,0 +1,32 @@
+{-# OPTIONS_GHC -msse #-}
+{-# OPTIONS_GHC -msse2 #-}
+{-# OPTIONS_GHC -msse4 #-}
+{-# OPTIONS_GHC -mavx #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE TemplateHaskell #-}
+-- test shuffle instructions
+
+import Control.Monad ( unless )
+import Data.Foldable ( for_ )
+import GHC.Exts
+import Language.Haskell.TH ( CodeQ )
+
+import Simd009b
+import Simd009c
+
+main :: IO ()
+main = do
+ let x = packDoubleX2# (# 1.1##, 2.2## #)
+ y = packDoubleX2# (# 3.3##, 4.4## #)
+ a = packFloatX4# (# 1.1#, 2.2#, 3.3#, 4.4# #)
+ b = packFloatX4# (# 5.5#, 6.6#, 7.7#, 8.8# #)
+ $$(forQ_ [(i,j) | i <- [0..3], j <- [0..3]] (doubleX2ShuffleTest [|| DX2# x ||] [|| DX2# y ||]) )
+ $$(forQ_ [ (0,0,0,0), (3,3,3,3), (7,7,7,7)
+ , (0,1,2,3), (4,5,6,7)
+ , (3,2,1,0), (7,6,5,4)
+ , (0,1,4,5), (4,5,0,1)
+ , (2,1,7,6), (7,6,2,1)
+ , (1,2,7,7), (6,6,3,2)
+ ] (floatX4ShuffleTest [|| FX4# a ||] [|| FX4# b ||]) )
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/16faf448673a302c7c0a54d36e8f61a7519afcac
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/16faf448673a302c7c0a54d36e8f61a7519afcac
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240611/962fe8e3/attachment-0001.html>
More information about the ghc-commits
mailing list