[Git][ghc/ghc][wip/ncg-simd] SIMD NCG WIP: fix stack spilling

sheaf (@sheaf) gitlab at gitlab.haskell.org
Thu Jun 6 12:31:48 UTC 2024



sheaf pushed to branch wip/ncg-simd at Glasgow Haskell Compiler / GHC


Commits:
2b451078 by sheaf at 2024-06-06T14:26:29+02:00
SIMD NCG WIP: fix stack spilling

- - - - -


9 changed files:

- compiler/GHC/CmmToAsm/Instr.hs
- compiler/GHC/CmmToAsm/Reg/Linear.hs
- compiler/GHC/CmmToAsm/Reg/Linear/Base.hs
- compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs
- compiler/GHC/CmmToAsm/Reg/Linear/StackMap.hs
- compiler/GHC/CmmToAsm/Reg/Linear/State.hs
- compiler/GHC/CmmToAsm/X86/Instr.hs
- compiler/GHC/Platform/Reg.hs
- compiler/GHC/Platform/Reg/Class.hs


Changes:

=====================================
compiler/GHC/CmmToAsm/Instr.hs
=====================================
@@ -97,8 +97,8 @@ class Instruction instr where
                 :: NCGConfig
                 -> Reg          -- ^ the reg to spill
                 -> Int          -- ^ the current stack delta
-                -> Int          -- ^ spill slot to use
-                -> [instr]        -- ^ instructions
+                -> Int          -- ^ spill slots to use
+                -> [instr]      -- ^ instructions
 
 
         -- | An instruction to reload a register from a spill slot.


=====================================
compiler/GHC/CmmToAsm/Reg/Linear.hs
=====================================
@@ -719,7 +719,7 @@ saveClobberedTemps clobbered dying
 
               -- (2) no free registers: spill the value
               [] -> do
-                  (spill, slot)   <- spillR (RegReal reg) temp
+                  (spill, slot)   <- spillR (RegReal reg) regclass temp
 
                   -- record why this reg was spilled for profiling
                   recordSpill (SpillClobber temp)
@@ -869,7 +869,8 @@ allocRegsAndSpill_spill :: (FR freeRegs, Instruction instr)
 allocRegsAndSpill_spill reading keep spills alloc r rs assig spill_loc
  = do   platform <- getPlatform
         freeRegs <- getFreeRegsR
-        let freeRegs_thisClass  = frGetFreeRegs platform (classOfVirtualReg r) freeRegs :: [RealReg]
+        let regclass = classOfVirtualReg r
+            freeRegs_thisClass  = frGetFreeRegs platform regclass freeRegs :: [RealReg]
 
         -- Can we put the variable into a register it already was?
         pref_reg <- findPrefRealReg r
@@ -938,7 +939,7 @@ allocRegsAndSpill_spill reading keep spills alloc r rs assig spill_loc
                         | (temp_to_push_out, (my_reg :: RealReg)) : _
                                         <- candidates_inReg
                         = do
-                                (spill_store, slot) <- spillR (RegReal my_reg) temp_to_push_out
+                                (spill_store, slot) <- spillR (RegReal my_reg) regclass temp_to_push_out
 
                                 -- record that this temp was spilled
                                 recordSpill (SpillAlloc temp_to_push_out)


=====================================
compiler/GHC/CmmToAsm/Reg/Linear/Base.hs
=====================================
@@ -96,11 +96,11 @@ data Loc
         -- | vreg is in a register
         = InReg   !RealReg
 
-        -- | vreg is held in a stack slot
+        -- | vreg is held in stack slots
         | InMem   {-# UNPACK #-}  !StackSlot
 
 
-        -- | vreg is held in both a register and a stack slot
+        -- | vreg is held in both a register and stack slots
         | InBoth   !RealReg
                    {-# UNPACK #-} !StackSlot
         deriving (Eq, Show, Ord)


=====================================
compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs
=====================================
@@ -32,6 +32,7 @@ import GHC.Types.Unique.FM
 import GHC.Types.Unique.Set
 
 import GHC.Utils.Outputable
+import GHC.CmmToAsm.Reg.Target (targetClassOfRealReg)
 
 -- | For a jump instruction at the end of a block, generate fixup code so its
 --      vregs are in the correct regs for its destination.
@@ -330,9 +331,10 @@ handleComponent delta instr
         (CyclicSCC ((DigraphNode vreg (InReg sreg) ((InReg dreg: _))) : rest))
         -- dest list may have more than one element, if the reg is also InMem.
  = do
+        platform <- getPlatform
         -- spill the source into its slot
         (instrSpill, slot)
-                        <- spillR (RegReal sreg) vreg
+                        <- spillR (RegReal sreg) (targetClassOfRealReg platform sreg) vreg
 
         -- reload into destination reg
         instrLoad       <- loadR (RegReal dreg) slot


=====================================
compiler/GHC/CmmToAsm/Reg/Linear/StackMap.hs
=====================================
@@ -22,6 +22,7 @@ where
 
 import GHC.Prelude
 
+import GHC.Platform.Reg.Class
 import GHC.Types.Unique.FM
 import GHC.Types.Unique
 
@@ -47,13 +48,20 @@ emptyStackMap = StackMap 0 emptyUFM
 -- | If this vreg unique already has a stack assignment then return the slot number,
 --      otherwise allocate a new slot, and update the map.
 --
-getStackSlotFor :: StackMap -> Unique -> (StackMap, Int)
-
-getStackSlotFor fs@(StackMap _ reserved) reg
-  | Just slot <- lookupUFM reserved reg  =  (fs, slot)
-
-getStackSlotFor (StackMap freeSlot reserved) reg =
-    (StackMap (freeSlot+1) (addToUFM reserved reg freeSlot), freeSlot)
+getStackSlotFor :: StackMap -> RegClass -> Unique -> (StackMap, Int)
+
+getStackSlotFor fs@(StackMap _ reserved) _regUse regUnique
+  | Just slot <- lookupUFM reserved regUnique  =  (fs, slot)
+
+getStackSlotFor (StackMap freeSlot reserved) regClass regUnique =
+  let
+    nbSlots = case regClass of
+      RcInteger -> 1
+      RcFloat -> 1
+      RcDouble -> 1
+      RcVector128 -> 2
+  in
+    (StackMap (freeSlot+nbSlots) (addToUFM reserved regUnique freeSlot), freeSlot)
 
 -- | Return the number of stack slots that were allocated
 getStackUse :: StackMap -> Int


=====================================
compiler/GHC/CmmToAsm/Reg/Linear/State.hs
=====================================
@@ -48,6 +48,7 @@ import GHC.Platform.Reg
 import GHC.Cmm.BlockId
 
 import GHC.Platform
+import GHC.Platform.Reg.Class
 import GHC.Types.Unique
 import GHC.Types.Unique.Supply
 import GHC.Exts (oneShot)
@@ -121,13 +122,13 @@ makeRAStats state
 
 
 spillR :: Instruction instr
-       => Reg -> Unique -> RegM freeRegs ([instr], Int)
+       => Reg -> RegClass -> Unique -> RegM freeRegs ([instr], Int)
 
-spillR reg temp = mkRegM $ \s ->
-  let (stack1,slot) = getStackSlotFor (ra_stack s) temp
-      instr  = mkSpillInstr (ra_config s) reg (ra_delta s) slot
+spillR reg regClass temp = mkRegM $ \s ->
+  let (stack1,slots) = getStackSlotFor (ra_stack s) regClass temp
+      instr  = mkSpillInstr (ra_config s) reg (ra_delta s) slots
   in
-  RA_Result s{ra_stack=stack1} (instr,slot)
+  RA_Result s{ra_stack=stack1} (instr,slots)
 
 
 loadR :: Instruction instr


=====================================
compiler/GHC/CmmToAsm/X86/Instr.hs
=====================================
@@ -380,6 +380,8 @@ data Instr
         | VSHUFPS    Format Imm Operand Reg
         | SHUFPD     Format Imm Operand Reg
         | VSHUFPD    Format Imm Operand Reg
+        -- SIMD NCG TODO: don't store the Format (or only what we need)
+        -- in order to emit these instructions.
 
         -- Shift
         | PSLLDQ     Format Operand Reg
@@ -822,13 +824,20 @@ mkSpillInstr
     -> [Instr]
 
 mkSpillInstr config reg delta slot
-  = let off     = spillSlotToOffset platform slot - delta
+  = let off s = spillSlotToOffset platform s - delta
     in
     case targetClassOfReg platform reg of
            RcInteger   -> [MOV (archWordFormat is32Bit)
-                                   (OpReg reg) (OpAddr (spRel platform off))]
-           RcDouble    -> [MOV FF64 (OpReg reg) (OpAddr (spRel platform off))]
-           _         -> panic "X86.mkSpillInstr: no match"
+                                   (OpReg reg) (OpAddr (spRel platform $ off slot))]
+           RcDouble    -> [MOV FF64 (OpReg reg) (OpAddr (spRel platform $ off slot))]
+           RcFloat     -> panic "X86_mkSpillInstr: RcFloat"
+           RcVector128 -> [MOV FF64 (OpReg reg) (OpAddr (spRel platform $ off slot))
+                           -- Now shuffle the register, putting the high half into the lower half.
+                          ,SHUFPD (VecFormat 2 FmtDouble W64) (ImmInt 0b00) (OpReg reg) reg
+                            -- NB: this format doesn't matter, we emit the same instruction
+                            -- regardless of what is stored...
+                            -- SIMD NCG TODO: avoid using MOV by using SHUFPD with an OpAddr argument?
+                          ,MOV FF64 (OpReg reg) (OpAddr (spRel platform $ off (slot + 1)))]
     where platform = ncgPlatform config
           is32Bit = target32Bit platform
 
@@ -841,13 +850,24 @@ mkLoadInstr
     -> [Instr]
 
 mkLoadInstr config reg delta slot
-  = let off     = spillSlotToOffset platform slot - delta
+  = let off s     = spillSlotToOffset platform s - delta
     in
         case targetClassOfReg platform reg of
               RcInteger -> ([MOV (archWordFormat is32Bit)
-                                 (OpAddr (spRel platform off)) (OpReg reg)])
-              RcDouble  -> ([MOV FF64 (OpAddr (spRel platform off)) (OpReg reg)])
-              _         -> panic "X86.mkLoadInstr"
+                                 (OpAddr (spRel platform $ off slot)) (OpReg reg)])
+              RcDouble  -> ([MOV FF64 (OpAddr (spRel platform $ off slot)) (OpReg reg)])
+              RcFloat   -> panic "X86.mkLoadInstr RcFloat"
+              RcVector128 ->
+                -- Load the higher half into the lower part of register from the second stack slot,
+                -- shuffle it into the higher part of the register,
+                -- and load then lower half into the lower part of the register.
+                [MOV FF64 (OpAddr (spRel platform $ off (slot + 1))) (OpReg reg)
+                ,SHUFPD (VecFormat 2 FmtDouble W64) (ImmInt 0b01) (OpReg reg) reg
+                  -- SIMD NCG TODO: not sure about this immediate
+                  -- SIMD NCG TODO: can we avoid the MOV instructions and directly
+                  -- use SHUFPD for an Addr to Reg move?
+                ,MOV FF64 (OpAddr (spRel platform $ off slot)) (OpReg reg)]
+
     where platform = ncgPlatform config
           is32Bit = target32Bit platform
 


=====================================
compiler/GHC/Platform/Reg.hs
=====================================
@@ -116,17 +116,12 @@ renameVirtualReg u r
 
 classOfVirtualReg :: VirtualReg -> RegClass
 classOfVirtualReg vr
- = case vr of
+  = case vr of
         VirtualRegI{}   -> RcInteger
         VirtualRegHi{}  -> RcInteger
         VirtualRegF{}   -> RcFloat
         VirtualRegD{}   -> RcDouble
-        -- Below is an awful, largely x86-specific hack
-        VirtualRegVec{} -> RcDouble
-        -- SIMD NCG TODO: this seems very wrong and potentially the source of
-        -- bug #16927, because we use this function to determine how to spill
-        -- the contents of a virtual register
-        -- (see e.g. GHC.CmmToAsm.X86.Instr.mkSpillInstr).
+        VirtualRegVec{} -> RcVector128
 
 
 -- Determine the upper-half vreg for a 64-bit quantity on a 32-bit platform


=====================================
compiler/GHC/Platform/Reg/Class.hs
=====================================
@@ -1,3 +1,4 @@
+{-# LANGUAGE LambdaCase #-}
 -- | An architecture independent description of a register's class.
 module GHC.Platform.Reg.Class
         ( RegClass (..)
@@ -18,21 +19,26 @@ import GHC.Builtin.Uniques
 --      We treat all registers in a class as being interchangeable.
 --
 data RegClass
-        = RcInteger
-        | RcFloat
-        | RcDouble
-        deriving (Eq, Show)
+  = RcInteger
+  | RcFloat
+  | RcDouble
+  | RcVector128
+  deriving (Eq, Ord, Show)
 
 allRegClasses :: [RegClass]
 allRegClasses =
-  [RcInteger, RcFloat, RcDouble]
+  [ RcInteger, RcFloat, RcDouble, RcVector128 ]
 
 instance Uniquable RegClass where
-    getUnique RcInteger = mkRegClassUnique 0
-    getUnique RcFloat   = mkRegClassUnique 1
-    getUnique RcDouble  = mkRegClassUnique 2
+    getUnique = \case
+      RcInteger   -> mkRegClassUnique 0
+      RcFloat     -> mkRegClassUnique 1
+      RcDouble    -> mkRegClassUnique 2
+      RcVector128 -> mkRegClassUnique 3
 
 instance Outputable RegClass where
-    ppr RcInteger       = Outputable.text "I"
-    ppr RcFloat         = Outputable.text "F"
-    ppr RcDouble        = Outputable.text "D"
+    ppr = \case
+      RcInteger   -> Outputable.text "I"
+      RcFloat     -> Outputable.text "F"
+      RcDouble    -> Outputable.text "D"
+      RcVector128 -> Outputable.text "V"



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/2b451078203820b4f3a55c02935a01fa7382631e

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/2b451078203820b4f3a55c02935a01fa7382631e
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240606/c2ab9323/attachment-0001.html>


More information about the ghc-commits mailing list