[Git][ghc/ghc][wip/ncg-simd] SIMD NCG WIP: fix stack spilling
sheaf (@sheaf)
gitlab at gitlab.haskell.org
Thu Jun 6 12:31:48 UTC 2024
sheaf pushed to branch wip/ncg-simd at Glasgow Haskell Compiler / GHC
Commits:
2b451078 by sheaf at 2024-06-06T14:26:29+02:00
SIMD NCG WIP: fix stack spilling
- - - - -
9 changed files:
- compiler/GHC/CmmToAsm/Instr.hs
- compiler/GHC/CmmToAsm/Reg/Linear.hs
- compiler/GHC/CmmToAsm/Reg/Linear/Base.hs
- compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs
- compiler/GHC/CmmToAsm/Reg/Linear/StackMap.hs
- compiler/GHC/CmmToAsm/Reg/Linear/State.hs
- compiler/GHC/CmmToAsm/X86/Instr.hs
- compiler/GHC/Platform/Reg.hs
- compiler/GHC/Platform/Reg/Class.hs
Changes:
=====================================
compiler/GHC/CmmToAsm/Instr.hs
=====================================
@@ -97,8 +97,8 @@ class Instruction instr where
:: NCGConfig
-> Reg -- ^ the reg to spill
-> Int -- ^ the current stack delta
- -> Int -- ^ spill slot to use
- -> [instr] -- ^ instructions
+ -> Int -- ^ spill slots to use
+ -> [instr] -- ^ instructions
-- | An instruction to reload a register from a spill slot.
=====================================
compiler/GHC/CmmToAsm/Reg/Linear.hs
=====================================
@@ -719,7 +719,7 @@ saveClobberedTemps clobbered dying
-- (2) no free registers: spill the value
[] -> do
- (spill, slot) <- spillR (RegReal reg) temp
+ (spill, slot) <- spillR (RegReal reg) regclass temp
-- record why this reg was spilled for profiling
recordSpill (SpillClobber temp)
@@ -869,7 +869,8 @@ allocRegsAndSpill_spill :: (FR freeRegs, Instruction instr)
allocRegsAndSpill_spill reading keep spills alloc r rs assig spill_loc
= do platform <- getPlatform
freeRegs <- getFreeRegsR
- let freeRegs_thisClass = frGetFreeRegs platform (classOfVirtualReg r) freeRegs :: [RealReg]
+ let regclass = classOfVirtualReg r
+ freeRegs_thisClass = frGetFreeRegs platform regclass freeRegs :: [RealReg]
-- Can we put the variable into a register it already was?
pref_reg <- findPrefRealReg r
@@ -938,7 +939,7 @@ allocRegsAndSpill_spill reading keep spills alloc r rs assig spill_loc
| (temp_to_push_out, (my_reg :: RealReg)) : _
<- candidates_inReg
= do
- (spill_store, slot) <- spillR (RegReal my_reg) temp_to_push_out
+ (spill_store, slot) <- spillR (RegReal my_reg) regclass temp_to_push_out
-- record that this temp was spilled
recordSpill (SpillAlloc temp_to_push_out)
=====================================
compiler/GHC/CmmToAsm/Reg/Linear/Base.hs
=====================================
@@ -96,11 +96,11 @@ data Loc
-- | vreg is in a register
= InReg !RealReg
- -- | vreg is held in a stack slot
+ -- | vreg is held in stack slots
| InMem {-# UNPACK #-} !StackSlot
- -- | vreg is held in both a register and a stack slot
+ -- | vreg is held in both a register and stack slots
| InBoth !RealReg
{-# UNPACK #-} !StackSlot
deriving (Eq, Show, Ord)
=====================================
compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs
=====================================
@@ -32,6 +32,7 @@ import GHC.Types.Unique.FM
import GHC.Types.Unique.Set
import GHC.Utils.Outputable
+import GHC.CmmToAsm.Reg.Target (targetClassOfRealReg)
-- | For a jump instruction at the end of a block, generate fixup code so its
-- vregs are in the correct regs for its destination.
@@ -330,9 +331,10 @@ handleComponent delta instr
(CyclicSCC ((DigraphNode vreg (InReg sreg) ((InReg dreg: _))) : rest))
-- dest list may have more than one element, if the reg is also InMem.
= do
+ platform <- getPlatform
-- spill the source into its slot
(instrSpill, slot)
- <- spillR (RegReal sreg) vreg
+ <- spillR (RegReal sreg) (targetClassOfRealReg platform sreg) vreg
-- reload into destination reg
instrLoad <- loadR (RegReal dreg) slot
=====================================
compiler/GHC/CmmToAsm/Reg/Linear/StackMap.hs
=====================================
@@ -22,6 +22,7 @@ where
import GHC.Prelude
+import GHC.Platform.Reg.Class
import GHC.Types.Unique.FM
import GHC.Types.Unique
@@ -47,13 +48,20 @@ emptyStackMap = StackMap 0 emptyUFM
-- | If this vreg unique already has a stack assignment then return the slot number,
-- otherwise allocate a new slot, and update the map.
--
-getStackSlotFor :: StackMap -> Unique -> (StackMap, Int)
-
-getStackSlotFor fs@(StackMap _ reserved) reg
- | Just slot <- lookupUFM reserved reg = (fs, slot)
-
-getStackSlotFor (StackMap freeSlot reserved) reg =
- (StackMap (freeSlot+1) (addToUFM reserved reg freeSlot), freeSlot)
+getStackSlotFor :: StackMap -> RegClass -> Unique -> (StackMap, Int)
+
+getStackSlotFor fs@(StackMap _ reserved) _regUse regUnique
+ | Just slot <- lookupUFM reserved regUnique = (fs, slot)
+
+getStackSlotFor (StackMap freeSlot reserved) regClass regUnique =
+ let
+ nbSlots = case regClass of
+ RcInteger -> 1
+ RcFloat -> 1
+ RcDouble -> 1
+ RcVector128 -> 2
+ in
+ (StackMap (freeSlot+nbSlots) (addToUFM reserved regUnique freeSlot), freeSlot)
-- | Return the number of stack slots that were allocated
getStackUse :: StackMap -> Int
=====================================
compiler/GHC/CmmToAsm/Reg/Linear/State.hs
=====================================
@@ -48,6 +48,7 @@ import GHC.Platform.Reg
import GHC.Cmm.BlockId
import GHC.Platform
+import GHC.Platform.Reg.Class
import GHC.Types.Unique
import GHC.Types.Unique.Supply
import GHC.Exts (oneShot)
@@ -121,13 +122,13 @@ makeRAStats state
spillR :: Instruction instr
- => Reg -> Unique -> RegM freeRegs ([instr], Int)
+ => Reg -> RegClass -> Unique -> RegM freeRegs ([instr], Int)
-spillR reg temp = mkRegM $ \s ->
- let (stack1,slot) = getStackSlotFor (ra_stack s) temp
- instr = mkSpillInstr (ra_config s) reg (ra_delta s) slot
+spillR reg regClass temp = mkRegM $ \s ->
+ let (stack1,slots) = getStackSlotFor (ra_stack s) regClass temp
+ instr = mkSpillInstr (ra_config s) reg (ra_delta s) slots
in
- RA_Result s{ra_stack=stack1} (instr,slot)
+ RA_Result s{ra_stack=stack1} (instr,slots)
loadR :: Instruction instr
=====================================
compiler/GHC/CmmToAsm/X86/Instr.hs
=====================================
@@ -380,6 +380,8 @@ data Instr
| VSHUFPS Format Imm Operand Reg
| SHUFPD Format Imm Operand Reg
| VSHUFPD Format Imm Operand Reg
+ -- SIMD NCG TODO: don't store the Format (or only what we need)
+ -- in order to emit these instructions.
-- Shift
| PSLLDQ Format Operand Reg
@@ -822,13 +824,20 @@ mkSpillInstr
-> [Instr]
mkSpillInstr config reg delta slot
- = let off = spillSlotToOffset platform slot - delta
+ = let off s = spillSlotToOffset platform s - delta
in
case targetClassOfReg platform reg of
RcInteger -> [MOV (archWordFormat is32Bit)
- (OpReg reg) (OpAddr (spRel platform off))]
- RcDouble -> [MOV FF64 (OpReg reg) (OpAddr (spRel platform off))]
- _ -> panic "X86.mkSpillInstr: no match"
+ (OpReg reg) (OpAddr (spRel platform $ off slot))]
+ RcDouble -> [MOV FF64 (OpReg reg) (OpAddr (spRel platform $ off slot))]
+ RcFloat -> panic "X86_mkSpillInstr: RcFloat"
+ RcVector128 -> [MOV FF64 (OpReg reg) (OpAddr (spRel platform $ off slot))
+ -- Now shuffle the register, putting the high half into the lower half.
+ ,SHUFPD (VecFormat 2 FmtDouble W64) (ImmInt 0b00) (OpReg reg) reg
+ -- NB: this format doesn't matter, we emit the same instruction
+ -- regardless of what is stored...
+ -- SIMD NCG TODO: avoid using MOV by using SHUFPD with an OpAddr argument?
+ ,MOV FF64 (OpReg reg) (OpAddr (spRel platform $ off (slot + 1)))]
where platform = ncgPlatform config
is32Bit = target32Bit platform
@@ -841,13 +850,24 @@ mkLoadInstr
-> [Instr]
mkLoadInstr config reg delta slot
- = let off = spillSlotToOffset platform slot - delta
+ = let off s = spillSlotToOffset platform s - delta
in
case targetClassOfReg platform reg of
RcInteger -> ([MOV (archWordFormat is32Bit)
- (OpAddr (spRel platform off)) (OpReg reg)])
- RcDouble -> ([MOV FF64 (OpAddr (spRel platform off)) (OpReg reg)])
- _ -> panic "X86.mkLoadInstr"
+ (OpAddr (spRel platform $ off slot)) (OpReg reg)])
+ RcDouble -> ([MOV FF64 (OpAddr (spRel platform $ off slot)) (OpReg reg)])
+ RcFloat -> panic "X86.mkLoadInstr RcFloat"
+ RcVector128 ->
+ -- Load the higher half into the lower part of register from the second stack slot,
+ -- shuffle it into the higher part of the register,
+ -- and load then lower half into the lower part of the register.
+ [MOV FF64 (OpAddr (spRel platform $ off (slot + 1))) (OpReg reg)
+ ,SHUFPD (VecFormat 2 FmtDouble W64) (ImmInt 0b01) (OpReg reg) reg
+ -- SIMD NCG TODO: not sure about this immediate
+ -- SIMD NCG TODO: can we avoid the MOV instructions and directly
+ -- use SHUFPD for an Addr to Reg move?
+ ,MOV FF64 (OpAddr (spRel platform $ off slot)) (OpReg reg)]
+
where platform = ncgPlatform config
is32Bit = target32Bit platform
=====================================
compiler/GHC/Platform/Reg.hs
=====================================
@@ -116,17 +116,12 @@ renameVirtualReg u r
classOfVirtualReg :: VirtualReg -> RegClass
classOfVirtualReg vr
- = case vr of
+ = case vr of
VirtualRegI{} -> RcInteger
VirtualRegHi{} -> RcInteger
VirtualRegF{} -> RcFloat
VirtualRegD{} -> RcDouble
- -- Below is an awful, largely x86-specific hack
- VirtualRegVec{} -> RcDouble
- -- SIMD NCG TODO: this seems very wrong and potentially the source of
- -- bug #16927, because we use this function to determine how to spill
- -- the contents of a virtual register
- -- (see e.g. GHC.CmmToAsm.X86.Instr.mkSpillInstr).
+ VirtualRegVec{} -> RcVector128
-- Determine the upper-half vreg for a 64-bit quantity on a 32-bit platform
=====================================
compiler/GHC/Platform/Reg/Class.hs
=====================================
@@ -1,3 +1,4 @@
+{-# LANGUAGE LambdaCase #-}
-- | An architecture independent description of a register's class.
module GHC.Platform.Reg.Class
( RegClass (..)
@@ -18,21 +19,26 @@ import GHC.Builtin.Uniques
-- We treat all registers in a class as being interchangeable.
--
data RegClass
- = RcInteger
- | RcFloat
- | RcDouble
- deriving (Eq, Show)
+ = RcInteger
+ | RcFloat
+ | RcDouble
+ | RcVector128
+ deriving (Eq, Ord, Show)
allRegClasses :: [RegClass]
allRegClasses =
- [RcInteger, RcFloat, RcDouble]
+ [ RcInteger, RcFloat, RcDouble, RcVector128 ]
instance Uniquable RegClass where
- getUnique RcInteger = mkRegClassUnique 0
- getUnique RcFloat = mkRegClassUnique 1
- getUnique RcDouble = mkRegClassUnique 2
+ getUnique = \case
+ RcInteger -> mkRegClassUnique 0
+ RcFloat -> mkRegClassUnique 1
+ RcDouble -> mkRegClassUnique 2
+ RcVector128 -> mkRegClassUnique 3
instance Outputable RegClass where
- ppr RcInteger = Outputable.text "I"
- ppr RcFloat = Outputable.text "F"
- ppr RcDouble = Outputable.text "D"
+ ppr = \case
+ RcInteger -> Outputable.text "I"
+ RcFloat -> Outputable.text "F"
+ RcDouble -> Outputable.text "D"
+ RcVector128 -> Outputable.text "V"
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/2b451078203820b4f3a55c02935a01fa7382631e
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/2b451078203820b4f3a55c02935a01fa7382631e
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240606/c2ab9323/attachment-0001.html>
More information about the ghc-commits
mailing list