[Git][ghc/ghc][wip/ncg-simd] NCG SIMD: fix shuffle lowering

sheaf (@sheaf) gitlab at gitlab.haskell.org
Tue Jun 11 09:49:38 UTC 2024



sheaf pushed to branch wip/ncg-simd at Glasgow Haskell Compiler / GHC


Commits:
16faf448 by sheaf at 2024-06-11T11:38:23+02:00
NCG SIMD: fix shuffle lowering

- - - - -


5 changed files:

- compiler/GHC/CmmToAsm/X86/CodeGen.hs
- compiler/GHC/CmmToAsm/X86/Instr.hs
- + testsuite/tests/codeGen/should_run/Simd009b.hs
- + testsuite/tests/codeGen/should_run/Simd009c.hs
- + testsuite/tests/codeGen/should_run/simd009.hs


Changes:

=====================================
compiler/GHC/CmmToAsm/X86/CodeGen.hs
=====================================
@@ -1668,11 +1668,10 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
         VecFormat 2 FmtDouble _ ->
           case is of
             [i1, i2] -> case (i1, i2) of
-              (i, j) | i == j
-                -> let v = if i < 2 then v1 else v2
-                       mov = if i == 0 || i == 2 then MOVL else MOVH
-                   in unitOL (mov fmt (OpReg v) (OpReg dst)) `snocOL`
-                      SHUFPD fmt (ImmInt 0b00) (OpReg dst) dst
+              (0,0) -> unitOL (VSHUFPD fmt (ImmInt 0b00) (OpReg v1) v1 dst)
+              (1,1) -> unitOL (VSHUFPD fmt (ImmInt 0b11) (OpReg v1) v1 dst)
+              (2,2) -> unitOL (VSHUFPD fmt (ImmInt 0b00) (OpReg v2) v2 dst)
+              (3,3) -> unitOL (VSHUFPD fmt (ImmInt 0b11) (OpReg v2) v2 dst)
               (0,1) -> unitOL (MOVU fmt (OpReg v1) (OpReg dst))
               (2,3) -> unitOL (MOVU fmt (OpReg v2) (OpReg dst))
               (1,0) -> unitOL (VSHUFPD fmt (ImmInt 0b01) (OpReg v1) v1 dst)
@@ -1680,9 +1679,9 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
               (0,2) -> unitOL (VSHUFPD fmt (ImmInt 0b00) (OpReg v2) v1 dst)
               (2,0) -> unitOL (VSHUFPD fmt (ImmInt 0b00) (OpReg v1) v2 dst)
               (0,3) -> unitOL (VSHUFPD fmt (ImmInt 0b10) (OpReg v2) v1 dst)
-              (3,0) -> unitOL (VSHUFPD fmt (ImmInt 0b10) (OpReg v1) v2 dst)
+              (3,0) -> unitOL (VSHUFPD fmt (ImmInt 0b01) (OpReg v1) v2 dst)
               (1,2) -> unitOL (VSHUFPD fmt (ImmInt 0b01) (OpReg v2) v1 dst)
-              (2,1) -> unitOL (VSHUFPD fmt (ImmInt 0b01) (OpReg v1) v2 dst)
+              (2,1) -> unitOL (VSHUFPD fmt (ImmInt 0b10) (OpReg v1) v2 dst)
               (1,3) -> unitOL (VSHUFPD fmt (ImmInt 0b11) (OpReg v2) v1 dst)
               (3,1) -> unitOL (VSHUFPD fmt (ImmInt 0b11) (OpReg v1) v2 dst)
               _ -> pprPanic "vector shuffle: indices out of bounds 0 <= i <= 3" (ppr is)
@@ -1701,11 +1700,11 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
               | i1 <= 3, i2 <= 3
               , i3 >= 4, i4 >= 4
               , let imm = i1 + i2 `shiftL` 2 + (i3 - 4) `shiftL` 4 + (i4 - 4) `shiftL` 6
-              -> unitOL (VSHUFPS fmt (ImmInt imm) (OpReg v1) v2 dst)
+              -> unitOL (VSHUFPS fmt (ImmInt imm) (OpReg v2) v1 dst)
               | i1 >= 4, i2 >= 4
               , i3 <= 3, i4 <= 3
-              , let imm = i3 + i4 `shiftL` 2 + (i1 - 4) `shiftL` 4 + (i2 - 4) `shiftL` 6
-              -> unitOL (VSHUFPS fmt (ImmInt imm) (OpReg v2) v1 dst)
+              , let imm = (i1 - 4) + (i2 - 4) `shiftL` 2 + i3 `shiftL` 4 + i4 `shiftL` 6
+              -> unitOL (VSHUFPS fmt (ImmInt imm) (OpReg v1) v2 dst)
               | otherwise
               ->
               -- Fall-back code with 4 INSERTPS operations.


=====================================
compiler/GHC/CmmToAsm/X86/Instr.hs
=====================================
@@ -48,9 +48,7 @@ import GHC.CmmToAsm.Format
 import GHC.CmmToAsm.Types
 import GHC.CmmToAsm.Utils
 import GHC.CmmToAsm.Instr (RegUsage(..), noUsage)
-import GHC.Platform.Reg.Class
 import GHC.Platform.Reg
-import GHC.CmmToAsm.Reg.Target
 import GHC.CmmToAsm.Config
 
 import GHC.Cmm.BlockId
@@ -500,7 +498,7 @@ regUsageOfInstr platform instr
     VBROADCAST fmt src dst   -> mkRU fmt (use_EA src []) [dst]
     VEXTRACT     fmt off src dst -> mkRU fmt ((use_R off []) ++ [src]) (use_R dst [])
     INSERTPS     fmt off src dst
-      -> mkRU fmt ((use_R off []) ++ (use_R src []) ++ [dst]) [dst]
+      -> mkRU fmt ((use_R off []) ++ (use_R src [])) [dst]
 
     VMOVU        fmt src dst   -> mkRU fmt (use_R src []) (use_R dst [])
     MOVU         fmt src dst   -> mkRU fmt (use_R src []) (use_R dst [])


=====================================
testsuite/tests/codeGen/should_run/Simd009b.hs
=====================================
@@ -0,0 +1,70 @@
+{-# OPTIONS_GHC -O2 #-}
+{-# OPTIONS_GHC -msse #-}
+{-# OPTIONS_GHC -msse2 #-}
+{-# OPTIONS_GHC -msse4 #-}
+{-# OPTIONS_GHC -mavx #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+{-# LANGUAGE RecordWildCards #-}
+
+module Simd009b where
+
+import Control.Monad ( unless )
+import Data.Foldable ( for_ )
+import GHC.Exts
+
+data FloatX4  = FX4# FloatX4#
+
+instance Show FloatX4 where
+  show (FX4# f) = case (unpackFloatX4# f) of
+    (# a, b, c, d #) -> show ((F# a), (F# b), (F# c), (F# d))
+
+
+instance Eq FloatX4 where
+  (FX4# a) == (FX4# b)
+    = case (unpackFloatX4# a) of
+        (# a1, a2, a3, a4 #) ->
+          case (unpackFloatX4# b) of
+            (# b1, b2, b3, b4 #) -> (F# a1) == (F# b1) &&
+                                    (F# a2) == (F# b2) &&
+                                    (F# a3) == (F# b3) &&
+                                    (F# a4) == (F# b4)
+
+data DoubleX2 = DX2# DoubleX2#
+
+instance Show DoubleX2 where
+  show (DX2# d) = case (unpackDoubleX2# d) of
+    (# a, b #) -> show ((D# a), (D# b))
+
+instance Eq DoubleX2 where
+  (DX2# a) == (DX2# b)
+    = case (unpackDoubleX2# a) of
+        (# a1, a2 #) ->
+          case (unpackDoubleX2# b) of
+            (# b1, b2 #) -> (D# a1) == (D# b1) &&
+                            (D# a2) == (D# b2)
+
+myShuffleDoubleX2 :: DoubleX2# -> DoubleX2# -> (# Int#, Int# #) -> DoubleX2#
+myShuffleDoubleX2 v1 v2 (# i1, i2 #) =
+  case unpackDoubleX2# v1 of
+    (# d1, d2 #) ->
+      case unpackDoubleX2# v2 of
+        (# d3, d4 #) ->
+          let ds = [ D# d1, D# d2, D# d3, D# d4 ]
+              D# x = ds !! I# i1
+              D# y = ds !! I# i2
+          in packDoubleX2# (# x, y #)
+
+myShuffleFloatX4 :: FloatX4# -> FloatX4# -> (# Int#, Int#, Int#, Int# #) -> FloatX4#
+myShuffleFloatX4 v1 v2 (# i1, i2, i3, i4 #) =
+  case unpackFloatX4# v1 of
+    (# f1, f2, f3, f4 #) ->
+      case unpackFloatX4# v2 of
+        (# f5, f6, f7, f8 #) ->
+          let fs = [ F# f1, F# f2, F# f3, F# f4
+                   , F# f5, F# f6, F# f7, F# f8 ]
+              F# x = fs !! I# i1
+              F# y = fs !! I# i2
+              F# z = fs !! I# i3
+              F# w = fs !! I# i4
+          in packFloatX4# (# x, y, z, w #)


=====================================
testsuite/tests/codeGen/should_run/Simd009c.hs
=====================================
@@ -0,0 +1,53 @@
+{-# OPTIONS_GHC -O2 #-}
+{-# OPTIONS_GHC -msse #-}
+{-# OPTIONS_GHC -msse2 #-}
+{-# OPTIONS_GHC -msse4 #-}
+{-# OPTIONS_GHC -mavx #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE TemplateHaskell #-}
+
+module Simd009c where
+
+import Control.Monad ( unless )
+import Data.Foldable ( for_ )
+import GHC.Exts
+import Language.Haskell.TH ( CodeQ )
+import Language.Haskell.TH.Syntax ( Lift(liftTyped) )
+
+import Simd009b
+
+floatX4ShuffleTest :: CodeQ FloatX4 -> CodeQ FloatX4 -> CodeQ (Int, Int, Int, Int) -> CodeQ (IO ())
+floatX4ShuffleTest v1 v2 ijkl =
+  [||
+  do
+    let (I# i#, I# j#, I# k#, I# l#) = $$ijkl
+        FX4# v1# = $$v1
+        FX4# v2# = $$v2
+        s1 = shuffleFloatX4# v1# v2# (# i#, j#, k#, l# #)
+        s2 = myShuffleFloatX4 v1# v2# (# i#, j#, k#, l# #)
+    unless (FX4# s1 == FX4# s2) $ do
+      putStrLn $ "Failed test: FloatX4# shuffle " ++ show (I# i#, I# j#, I# k#, I# l# )
+      putStrLn $ "     SIMD: " ++ show (FX4# s1)
+      putStrLn $ "reference: " ++ show (FX4# s2)
+  ||]
+
+doubleX2ShuffleTest :: CodeQ DoubleX2 -> CodeQ DoubleX2 -> CodeQ (Int, Int) -> CodeQ (IO ())
+doubleX2ShuffleTest v1 v2 ij =
+  [||
+  do
+    let (I# i#, I# j#) = $$ij
+        DX2# v1# = $$v1
+        DX2# v2# = $$v2
+        s1 = shuffleDoubleX2# v1# v2# (# i#, j# #)
+        s2 = myShuffleDoubleX2 v1# v2# (# i#, j# #)
+    unless (DX2# s1 == DX2# s2) $ do
+      putStrLn $ "Failed test:DoubleX2# shuffle " ++ show (I# i#, I# j#)
+      putStrLn $ "     SIMD: " ++ show (DX2# s1)
+      putStrLn $ "reference: " ++ show (DX2# s2)
+  ||]
+
+forQ_ :: Lift i => [i] -> (CodeQ i -> CodeQ (IO ())) -> CodeQ (IO ())
+forQ_ [] _ = [|| return () ||]
+forQ_ (i:is) f = [|| $$( f (liftTyped i) ) *> $$( forQ_ is f ) ||]


=====================================
testsuite/tests/codeGen/should_run/simd009.hs
=====================================
@@ -0,0 +1,32 @@
+{-# OPTIONS_GHC -msse #-}
+{-# OPTIONS_GHC -msse2 #-}
+{-# OPTIONS_GHC -msse4 #-}
+{-# OPTIONS_GHC -mavx #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE TemplateHaskell #-}
+-- test shuffle instructions
+
+import Control.Monad ( unless )
+import Data.Foldable ( for_ )
+import GHC.Exts
+import Language.Haskell.TH ( CodeQ )
+
+import Simd009b
+import Simd009c
+
+main :: IO ()
+main = do
+  let x = packDoubleX2# (# 1.1##, 2.2## #)
+      y = packDoubleX2# (# 3.3##, 4.4## #)
+      a = packFloatX4# (# 1.1#, 2.2#, 3.3#, 4.4# #)
+      b = packFloatX4# (# 5.5#, 6.6#, 7.7#, 8.8# #)
+  $$(forQ_ [(i,j) | i <- [0..3], j <- [0..3]] (doubleX2ShuffleTest [|| DX2# x ||] [|| DX2# y ||]) )
+  $$(forQ_ [ (0,0,0,0), (3,3,3,3), (7,7,7,7)
+           , (0,1,2,3), (4,5,6,7)
+           , (3,2,1,0), (7,6,5,4)
+           , (0,1,4,5), (4,5,0,1)
+           , (2,1,7,6), (7,6,2,1)
+           , (1,2,7,7), (6,6,3,2)
+           ] (floatX4ShuffleTest [|| FX4# a ||] [|| FX4# b ||]) )



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/16faf448673a302c7c0a54d36e8f61a7519afcac

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/16faf448673a302c7c0a54d36e8f61a7519afcac
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240611/962fe8e3/attachment-0001.html>


More information about the ghc-commits mailing list