[Git][ghc/ghc][wip/ncg-simd] SIMD NCG WIP: fix stack spilling
sheaf (@sheaf)
gitlab at gitlab.haskell.org
Thu Jun 6 15:50:16 UTC 2024
sheaf pushed to branch wip/ncg-simd at Glasgow Haskell Compiler / GHC
Commits:
34d7a170 by sheaf at 2024-06-06T17:49:05+02:00
SIMD NCG WIP: fix stack spilling
- - - - -
22 changed files:
- compiler/GHC/CmmToAsm/AArch64.hs
- compiler/GHC/CmmToAsm/AArch64/Instr.hs
- compiler/GHC/CmmToAsm/Format.hs
- compiler/GHC/CmmToAsm/Instr.hs
- compiler/GHC/CmmToAsm/PPC.hs
- compiler/GHC/CmmToAsm/PPC/Instr.hs
- compiler/GHC/CmmToAsm/Reg/Graph.hs
- compiler/GHC/CmmToAsm/Reg/Graph/Coalesce.hs
- compiler/GHC/CmmToAsm/Reg/Graph/Spill.hs
- compiler/GHC/CmmToAsm/Reg/Graph/SpillClean.hs
- compiler/GHC/CmmToAsm/Reg/Graph/SpillCost.hs
- compiler/GHC/CmmToAsm/Reg/Linear.hs
- compiler/GHC/CmmToAsm/Reg/Linear/Base.hs
- compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs
- compiler/GHC/CmmToAsm/Reg/Linear/StackMap.hs
- compiler/GHC/CmmToAsm/Reg/Linear/State.hs
- compiler/GHC/CmmToAsm/Reg/Linear/X86.hs
- compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs
- compiler/GHC/CmmToAsm/Reg/Liveness.hs
- compiler/GHC/CmmToAsm/X86/Instr.hs
- compiler/GHC/Platform/Reg.hs
- compiler/GHC/Platform/Reg/Class.hs
Changes:
=====================================
compiler/GHC/CmmToAsm/AArch64.hs
=====================================
@@ -49,8 +49,8 @@ instance Instruction AArch64.Instr where
jumpDestsOfInstr = AArch64.jumpDestsOfInstr
canFallthroughTo = AArch64.canFallthroughTo
patchJumpInstr = AArch64.patchJumpInstr
- mkSpillInstr = AArch64.mkSpillInstr
- mkLoadInstr = AArch64.mkLoadInstr
+ mkSpillInstr cfg reg _ i j = AArch64.mkSpillInstr cfg reg i j
+ mkLoadInstr cfg reg _ i j = AArch64.mkLoadInstr cfg reg i j
takeDeltaInstr = AArch64.takeDeltaInstr
isMetaInstr = AArch64.isMetaInstr
mkRegRegMoveInstr _ = AArch64.mkRegRegMoveInstr
=====================================
compiler/GHC/CmmToAsm/AArch64/Instr.hs
=====================================
@@ -145,8 +145,9 @@ regUsageOfInstr platform instr = case instr of
-- filtering the usage is necessary, otherwise the register
-- allocator will try to allocate pre-defined fixed stg
-- registers as well, as they show up.
- usage (src, dst) = RU (filter (interesting platform) src)
- (filter (interesting platform) dst)
+ usage (src, dst) = RU (map (,II64) $ filter (interesting platform) src)
+ (map (,II64) $ filter (interesting platform) dst)
+ -- SIMD NCG TODO: remove this hack
regAddr :: AddrMode -> [Reg]
regAddr (AddrRegReg r1 r2) = [r1, r2]
=====================================
compiler/GHC/CmmToAsm/Format.hs
=====================================
@@ -1,3 +1,7 @@
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE PatternSynonyms #-}
+{-# LANGUAGE ViewPatterns #-}
+
-- | Formats on this architecture
-- A Format is a combination of width and class
--
@@ -9,7 +13,7 @@
-- properly. eg SPARC doesn't care about FF80.
--
module GHC.CmmToAsm.Format (
- Format(..),
+ Format(.., IntegerFormat),
ScalarFormat(..),
intFormat,
floatFormat,
@@ -18,7 +22,7 @@ module GHC.CmmToAsm.Format (
isVecFormat,
cmmTypeFormat,
formatToWidth,
- formatInBytes
+ formatInBytes,
)
where
@@ -73,7 +77,23 @@ data Format
| FF32
| FF64
| VecFormat !Length !ScalarFormat !Width
- deriving (Show, Eq)
+ deriving (Show, Eq, Ord)
+
+pattern IntegerFormat :: Format
+pattern IntegerFormat <- ( isIntegerFormat -> True )
+{-# COMPLETE IntegerFormat, FF32, FF64, VecFormat #-}
+
+isIntegerFormat :: Format -> Bool
+isIntegerFormat = \case
+ II8 -> True
+ II16 -> True
+ II32 -> True
+ II64 -> True
+ _ -> False
+
+
+instance Outputable Format where
+ ppr fmt = text (show fmt)
data ScalarFormat = FmtInt8
| FmtInt16
@@ -81,7 +101,7 @@ data ScalarFormat = FmtInt8
| FmtInt64
| FmtFloat
| FmtDouble
- deriving (Show, Eq)
+ deriving (Show, Eq, Ord)
-- | Get the integer format of this width.
intFormat :: Width -> Format
=====================================
compiler/GHC/CmmToAsm/Instr.hs
=====================================
@@ -16,6 +16,7 @@ import GHC.Cmm.BlockId
import GHC.CmmToAsm.Config
import GHC.Data.FastString
+import GHC.CmmToAsm.Format
-- | Holds a list of source and destination registers used by a
-- particular instruction.
@@ -29,8 +30,8 @@ import GHC.Data.FastString
--
data RegUsage
= RU {
- reads :: [Reg],
- writes :: [Reg]
+ reads :: [(Reg, Format)],
+ writes :: [(Reg, Format)]
}
deriving Show
@@ -96,15 +97,17 @@ class Instruction instr where
mkSpillInstr
:: NCGConfig
-> Reg -- ^ the reg to spill
+ -> Format
-> Int -- ^ the current stack delta
- -> Int -- ^ spill slot to use
- -> [instr] -- ^ instructions
+ -> Int -- ^ spill slots to use
+ -> [instr] -- ^ instructions
-- | An instruction to reload a register from a spill slot.
mkLoadInstr
:: NCGConfig
-> Reg -- ^ the reg to reload.
+ -> Format
-> Int -- ^ the current stack delta
-> Int -- ^ the spill slot to use
-> [instr] -- ^ instructions
=====================================
compiler/GHC/CmmToAsm/PPC.hs
=====================================
@@ -48,8 +48,8 @@ instance Instruction PPC.Instr where
jumpDestsOfInstr = PPC.jumpDestsOfInstr
canFallthroughTo = PPC.canFallthroughTo
patchJumpInstr = PPC.patchJumpInstr
- mkSpillInstr = PPC.mkSpillInstr
- mkLoadInstr = PPC.mkLoadInstr
+ mkSpillInstr cfg reg _ i j = PPC.mkSpillInstr cfg reg i j
+ mkLoadInstr cfg reg _ i j = PPC.mkLoadInstr cfg reg i j
takeDeltaInstr = PPC.takeDeltaInstr
isMetaInstr = PPC.isMetaInstr
mkRegRegMoveInstr _ = PPC.mkRegRegMoveInstr
=====================================
compiler/GHC/CmmToAsm/PPC/Instr.hs
=====================================
@@ -391,8 +391,9 @@ regUsageOfInstr platform instr
FMADD _ _ rt ra rc rb -> usage ([ra, rc, rb], [rt])
_ -> noUsage
where
- usage (src, dst) = RU (filter (interesting platform) src)
- (filter (interesting platform) dst)
+ usage (src, dst) = RU (map (,II64) $ filter (interesting platform) src)
+ (map (,II64) $ filter (interesting platform) dst)
+ -- SIMD NCG TODO: remove this hack
regAddr (AddrRegReg r1 r2) = [r1, r2]
regAddr (AddrRegImm r1 _) = [r1]
=====================================
compiler/GHC/CmmToAsm/Reg/Graph.hs
=====================================
@@ -335,21 +335,21 @@ buildGraph code
-- | Add some conflict edges to the graph.
-- Conflicts between virtual and real regs are recorded as exclusions.
graphAddConflictSet
- :: UniqSet Reg
+ :: RegMap (Reg, fmt)
-> Color.Graph VirtualReg RegClass RealReg
-> Color.Graph VirtualReg RegClass RealReg
-graphAddConflictSet set graph
+graphAddConflictSet regs graph
= let virtuals = mkUniqSet
- [ vr | RegVirtual vr <- nonDetEltsUniqSet set ]
+ [ vr | (RegVirtual vr, _) <- nonDetEltsUFM regs ]
graph1 = Color.addConflicts virtuals classOfVirtualReg graph
graph2 = foldr (\(r1, r2) -> Color.addExclusion r1 classOfVirtualReg r2)
graph1
[ (vr, rr)
- | RegVirtual vr <- nonDetEltsUniqSet set
- , RegReal rr <- nonDetEltsUniqSet set]
+ | (RegVirtual vr, _) <- nonDetEltsUFM regs
+ , (RegReal rr, _) <- nonDetEltsUFM regs]
-- See Note [Unique Determinism and code generation]
in graph2
=====================================
compiler/GHC/CmmToAsm/Reg/Graph/Coalesce.hs
=====================================
@@ -13,7 +13,6 @@ import GHC.Cmm
import GHC.Data.Bag
import GHC.Data.Graph.Directed
import GHC.Types.Unique.FM
-import GHC.Types.Unique.Set
import GHC.Types.Unique.Supply
@@ -85,8 +84,8 @@ slurpJoinMovs live
slurpLI rs (LiveInstr _ Nothing) = rs
slurpLI rs (LiveInstr instr (Just live))
| Just (r1, r2) <- takeRegRegMoveInstr instr
- , elementOfUniqSet r1 $ liveDieRead live
- , elementOfUniqSet r2 $ liveBorn live
+ , elemUFM r1 $ liveDieRead live
+ , elemUFM r2 $ liveBorn live
-- only coalesce movs between two virtuals for now,
-- else we end up with allocatable regs in the live
=====================================
compiler/GHC/CmmToAsm/Reg/Graph/Spill.hs
=====================================
@@ -31,6 +31,7 @@ import Data.List (nub, (\\), intersect)
import Data.Maybe
import Data.IntSet (IntSet)
import qualified Data.IntSet as IntSet
+import GHC.CmmToAsm.Format
-- | Spill all these virtual regs to stack slots.
@@ -138,7 +139,7 @@ regSpill_top platform regSlotMap cmm
-- then record the fact that these slots are now live in those blocks
-- in the given slotmap.
patchLiveSlot
- :: BlockMap IntSet -> BlockId -> RegSet -> BlockMap IntSet
+ :: BlockMap IntSet -> BlockId -> RegMap (Reg, Format) -> BlockMap IntSet
patchLiveSlot slotMap blockId regsLive
= let
@@ -148,7 +149,8 @@ regSpill_top platform regSlotMap cmm
moreSlotsLive = IntSet.fromList
$ mapMaybe (lookupUFM regSlotMap)
- $ nonDetEltsUniqSet regsLive
+ $ map fst
+ $ nonDetEltsUFM regsLive
-- See Note [Unique Determinism and code generation]
slotMap'
@@ -197,9 +199,9 @@ regSpill_instr platform regSlotMap (LiveInstr instr (Just _)) = do
let rsModify = intersect rsRead_ rsWritten_
-- work out if any of the regs being used are currently being spilled.
- let rsSpillRead = filter (\r -> elemUFM r regSlotMap) rsRead
- let rsSpillWritten = filter (\r -> elemUFM r regSlotMap) rsWritten
- let rsSpillModify = filter (\r -> elemUFM r regSlotMap) rsModify
+ let rsSpillRead = filter (\(r,_) -> elemUFM r regSlotMap) rsRead
+ let rsSpillWritten = filter (\(r,_) -> elemUFM r regSlotMap) rsWritten
+ let rsSpillModify = filter (\(r,_) -> elemUFM r regSlotMap) rsModify
-- rewrite the instr and work out spill code.
(instr1, prepost1) <- mapAccumLM (spillRead regSlotMap) instr rsSpillRead
@@ -224,10 +226,10 @@ spillRead
:: Instruction instr
=> UniqFM Reg Int
-> instr
- -> Reg
+ -> (Reg, Format)
-> SpillM (instr, ([LiveInstr instr'], [LiveInstr instr']))
-spillRead regSlotMap instr reg
+spillRead regSlotMap instr (reg, fmt)
| Just slot <- lookupUFM regSlotMap reg
= do (instr', nReg) <- patchInstr reg instr
@@ -235,7 +237,7 @@ spillRead regSlotMap instr reg
{ stateSpillSL = addToUFM_C accSpillSL (stateSpillSL s) reg (reg, 0, 1) }
return ( instr'
- , ( [LiveInstr (RELOAD slot nReg) Nothing]
+ , ( [LiveInstr (RELOAD slot nReg fmt) Nothing]
, []) )
| otherwise = panic "RegSpill.spillRead: no slot defined for spilled reg"
@@ -247,10 +249,10 @@ spillWrite
:: Instruction instr
=> UniqFM Reg Int
-> instr
- -> Reg
+ -> (Reg, Format)
-> SpillM (instr, ([LiveInstr instr'], [LiveInstr instr']))
-spillWrite regSlotMap instr reg
+spillWrite regSlotMap instr (reg, fmt)
| Just slot <- lookupUFM regSlotMap reg
= do (instr', nReg) <- patchInstr reg instr
@@ -259,7 +261,7 @@ spillWrite regSlotMap instr reg
return ( instr'
, ( []
- , [LiveInstr (SPILL nReg slot) Nothing]))
+ , [LiveInstr (SPILL nReg fmt slot) Nothing]))
| otherwise = panic "RegSpill.spillWrite: no slot defined for spilled reg"
@@ -270,10 +272,10 @@ spillModify
:: Instruction instr
=> UniqFM Reg Int
-> instr
- -> Reg
+ -> (Reg, Format)
-> SpillM (instr, ([LiveInstr instr'], [LiveInstr instr']))
-spillModify regSlotMap instr reg
+spillModify regSlotMap instr (reg, fmt)
| Just slot <- lookupUFM regSlotMap reg
= do (instr', nReg) <- patchInstr reg instr
@@ -281,8 +283,8 @@ spillModify regSlotMap instr reg
{ stateSpillSL = addToUFM_C accSpillSL (stateSpillSL s) reg (reg, 1, 1) }
return ( instr'
- , ( [LiveInstr (RELOAD slot nReg) Nothing]
- , [LiveInstr (SPILL nReg slot) Nothing]))
+ , ( [LiveInstr (RELOAD slot nReg fmt) Nothing]
+ , [LiveInstr (SPILL nReg fmt slot) Nothing]))
| otherwise = panic "RegSpill.spillModify: no slot defined for spilled reg"
=====================================
compiler/GHC/CmmToAsm/Reg/Graph/SpillClean.hs
=====================================
@@ -160,12 +160,13 @@ cleanForward _ _ _ acc []
-- hopefully the spill will be also be cleaned in the next pass
cleanForward platform blockId assoc acc (li1 : li2 : instrs)
- | LiveInstr (SPILL reg1 slot1) _ <- li1
- , LiveInstr (RELOAD slot2 reg2) _ <- li2
+ | LiveInstr (SPILL reg1 _ slot1) _ <- li1
+ , LiveInstr (RELOAD slot2 reg2 _) _ <- li2
, slot1 == slot2
= do
modify $ \s -> s { sCleanedReloadsAcc = sCleanedReloadsAcc s + 1 }
cleanForward platform blockId assoc acc
+ -- SIMD NCG TODO: is mkRegRegMoveInstr here OK for vectors?
$ li1 : LiveInstr (mkRegRegMoveInstr platform reg1 reg2) Nothing
: instrs
@@ -189,7 +190,7 @@ cleanForward platform blockId assoc acc (li@(LiveInstr i1 _) : instrs)
cleanForward platform blockId assoc acc (li : instrs)
-- Update association due to the spill.
- | LiveInstr (SPILL reg slot) _ <- li
+ | LiveInstr (SPILL reg _ slot) _ <- li
= let assoc' = addAssoc (SReg reg) (SSlot slot)
$ delAssoc (SSlot slot)
$ assoc
@@ -215,7 +216,7 @@ cleanForward platform blockId assoc acc (li : instrs)
-- Writing to a reg changes its value.
| LiveInstr instr _ <- li
, RU _ written <- regUsageOfInstr platform instr
- = let assoc' = foldr delAssoc assoc (map SReg $ nub written)
+ = let assoc' = foldr delAssoc assoc (map SReg $ nub $ map fst written)
in cleanForward platform blockId assoc' (li : acc) instrs
@@ -229,7 +230,7 @@ cleanReload
-> LiveInstr instr
-> CleanM (Assoc Store, Maybe (LiveInstr instr))
-cleanReload platform blockId assoc li@(LiveInstr (RELOAD slot reg) _)
+cleanReload platform blockId assoc li@(LiveInstr (RELOAD slot reg _) _)
-- If the reg we're reloading already has the same value as the slot
-- then we can erase the instruction outright.
@@ -355,12 +356,12 @@ cleanBackward' _ _ _ acc []
cleanBackward' liveSlotsOnEntry reloadedBy noReloads acc (li : instrs)
-- If nothing ever reloads from this slot then we don't need the spill.
- | LiveInstr (SPILL _ slot) _ <- li
+ | LiveInstr (SPILL _ _ slot) _ <- li
, Nothing <- lookupUFM reloadedBy (SSlot slot)
= do modify $ \s -> s { sCleanedSpillsAcc = sCleanedSpillsAcc s + 1 }
cleanBackward liveSlotsOnEntry noReloads acc instrs
- | LiveInstr (SPILL _ slot) _ <- li
+ | LiveInstr (SPILL _ _ slot) _ <- li
= if elementOfUniqSet slot noReloads
-- We can erase this spill because the slot won't be read until
@@ -375,7 +376,7 @@ cleanBackward' liveSlotsOnEntry reloadedBy noReloads acc (li : instrs)
cleanBackward liveSlotsOnEntry noReloads' (li : acc) instrs
-- if we reload from a slot then it's no longer unused
- | LiveInstr (RELOAD slot _) _ <- li
+ | LiveInstr (RELOAD slot _ _) _ <- li
, noReloads' <- delOneFromUniqSet noReloads slot
= cleanBackward liveSlotsOnEntry noReloads' (li : acc) instrs
=====================================
compiler/GHC/CmmToAsm/Reg/Graph/SpillCost.hs
=====================================
@@ -129,8 +129,8 @@ slurpSpillCostInfo platform cfg cmm
-- Increment counts for what regs were read/written from.
let (RU read written) = regUsageOfInstr platform instr
- mapM_ (incUses scale) $ mapMaybe takeVirtualReg $ nub read
- mapM_ (incDefs scale) $ mapMaybe takeVirtualReg $ nub written
+ mapM_ (incUses scale) $ mapMaybe takeVirtualReg $ nub $ map fst read
+ mapM_ (incDefs scale) $ mapMaybe takeVirtualReg $ nub $ map fst written
-- Compute liveness for entry to next instruction.
let liveDieRead_virt = takeVirtuals (liveDieRead live)
@@ -158,9 +158,9 @@ slurpSpillCostInfo platform cfg cmm
= 1.0 -- Only if no cfg given
-- | Take all the virtual registers from this set.
-takeVirtuals :: UniqSet Reg -> UniqSet VirtualReg
-takeVirtuals set = mkUniqSet
- [ vr | RegVirtual vr <- nonDetEltsUniqSet set ]
+takeVirtuals :: RegMap (Reg, fmt) -> UniqSet VirtualReg
+takeVirtuals m = mkUniqSet
+ [ vr | (RegVirtual vr, _) <- nonDetEltsUFM m ]
-- See Note [Unique Determinism and code generation]
=====================================
compiler/GHC/CmmToAsm/Reg/Linear.hs
=====================================
@@ -127,7 +127,6 @@ import GHC.Cmm hiding (RegSet)
import GHC.Data.Graph.Directed
import GHC.Types.Unique
-import GHC.Types.Unique.Set
import GHC.Types.Unique.FM
import GHC.Types.Unique.Supply
import GHC.Utils.Outputable
@@ -137,6 +136,7 @@ import GHC.Platform
import Data.Maybe
import Data.List (partition, nub)
import Control.Monad
+import GHC.CmmToAsm.Format
-- -----------------------------------------------------------------------------
-- Top level of the register allocator
@@ -203,7 +203,7 @@ linearRegAlloc
:: forall instr. (Instruction instr)
=> NCGConfig
-> [BlockId] -- ^ entry points
- -> BlockMap RegSet
+ -> BlockMap (UniqFM Reg (Reg, Format))
-- ^ live regs on entry to each basic block
-> [SCC (LiveBasicBlock instr)]
-- ^ instructions annotated with "deaths"
@@ -242,7 +242,7 @@ linearRegAlloc'
=> NCGConfig
-> freeRegs
-> [BlockId] -- ^ entry points
- -> BlockMap RegSet -- ^ live regs on entry to each basic block
+ -> BlockMap (UniqFM Reg (Reg, Format)) -- ^ live regs on entry to each basic block
-> [SCC (LiveBasicBlock instr)] -- ^ instructions annotated with "deaths"
-> UniqSM ([NatBasicBlock instr], RegAllocStats, Int)
@@ -256,7 +256,7 @@ linearRegAlloc' config initFreeRegs entry_ids block_live sccs
linearRA_SCCs :: OutputableRegConstraint freeRegs instr
=> [BlockId]
- -> BlockMap RegSet
+ -> BlockMap (UniqFM Reg (Reg, Format))
-> [NatBasicBlock instr]
-> [SCC (LiveBasicBlock instr)]
-> RegM freeRegs [NatBasicBlock instr]
@@ -291,7 +291,7 @@ linearRA_SCCs entry_ids block_live blocksAcc (CyclicSCC blocks : sccs)
process :: forall freeRegs instr. (OutputableRegConstraint freeRegs instr)
=> [BlockId]
- -> BlockMap RegSet
+ -> BlockMap (UniqFM Reg (Reg, Format))
-> [GenBasicBlock (LiveInstr instr)]
-> RegM freeRegs [[NatBasicBlock instr]]
process entry_ids block_live =
@@ -330,7 +330,7 @@ process entry_ids block_live =
--
processBlock
:: OutputableRegConstraint freeRegs instr
- => BlockMap RegSet -- ^ live regs on entry to each basic block
+ => BlockMap (UniqFM Reg (Reg, Format)) -- ^ live regs on entry to each basic block
-> LiveBasicBlock instr -- ^ block to do register allocation on
-> RegM freeRegs [NatBasicBlock instr] -- ^ block with registers allocated
@@ -347,7 +347,7 @@ processBlock block_live (BasicBlock id instrs)
-- | Load the freeregs and current reg assignment into the RegM state
-- for the basic block with this BlockId.
initBlock :: FR freeRegs
- => BlockId -> BlockMap RegSet -> RegM freeRegs ()
+ => BlockId -> BlockMap (UniqFM Reg (Reg, Format)) -> RegM freeRegs ()
initBlock id block_live
= do platform <- getPlatform
block_assig <- getBlockAssigR
@@ -364,7 +364,7 @@ initBlock id block_live
setFreeRegsR (frInitFreeRegs platform)
Just live ->
setFreeRegsR $ foldl' (flip $ frAllocateReg platform) (frInitFreeRegs platform)
- [ r | RegReal r <- nonDetEltsUniqSet live ]
+ [ r | ( RegReal r, _ ) <- nonDetEltsUFM live ]
-- See Note [Unique Determinism and code generation]
setAssigR emptyRegMap
@@ -377,7 +377,7 @@ initBlock id block_live
-- | Do allocation for a sequence of instructions.
linearRA
:: forall freeRegs instr. (OutputableRegConstraint freeRegs instr)
- => BlockMap RegSet -- ^ map of what vregs are live on entry to each block.
+ => BlockMap (UniqFM Reg (Reg, Format)) -- ^ map of what vregs are live on entry to each block.
-> BlockId -- ^ id of the current block, for debugging.
-> [LiveInstr instr] -- ^ liveness annotated instructions in this block.
-> RegM freeRegs
@@ -402,7 +402,7 @@ linearRA block_live block_id = go [] []
-- | Do allocation for a single instruction.
raInsn
:: OutputableRegConstraint freeRegs instr
- => BlockMap RegSet -- ^ map of what vregs are love on entry to each block.
+ => BlockMap (UniqFM Reg (Reg, Format)) -- ^ map of what vregs are love on entry to each block.
-> [instr] -- ^ accumulator for instructions already processed.
-> BlockId -- ^ the id of the current block, for debugging
-> LiveInstr instr -- ^ the instr to have its regs allocated, with liveness info.
@@ -432,12 +432,12 @@ raInsn block_live new_instrs id (LiveInstr (Instr instr) (Just live))
-- (we can't eliminate it if the source register is on the stack, because
-- we do not want to use one spill slot for different virtual registers)
case takeRegRegMoveInstr instr of
- Just (src,dst) | src `elementOfUniqSet` (liveDieRead live),
+ Just (src,dst) | Just (_, fmt) <- lookupUFM (liveDieRead live) src,
isVirtualReg dst,
not (dst `elemUFM` assig),
isRealReg src || isInReg src assig -> do
case src of
- (RegReal rr) -> setAssigR (addToUFM assig dst (InReg rr))
+ RegReal rr -> setAssigR (addToUFM assig dst (InReg $ RealRegUsage rr fmt))
-- if src is a fixed reg, then we just map dest to this
-- reg in the assignment. src must be an allocatable reg,
-- otherwise it wouldn't be in r_dying.
@@ -456,8 +456,8 @@ raInsn block_live new_instrs id (LiveInstr (Instr instr) (Just live))
return (new_instrs, [])
_ -> genRaInsn block_live new_instrs id instr
- (nonDetEltsUniqSet $ liveDieRead live)
- (nonDetEltsUniqSet $ liveDieWrite live)
+ (map fst $ nonDetEltsUFM $ liveDieRead live)
+ (map fst $ nonDetEltsUFM $ liveDieWrite live)
-- See Note [Unique Determinism and code generation]
raInsn _ _ _ instr
@@ -486,7 +486,7 @@ isInReg src assig | Just (InReg _) <- lookupUFM assig src = True
genRaInsn :: forall freeRegs instr.
(OutputableRegConstraint freeRegs instr)
- => BlockMap RegSet
+ => BlockMap (UniqFM Reg (Reg, Format))
-> [instr]
-> BlockId
-> instr
@@ -499,13 +499,13 @@ genRaInsn block_live new_instrs block_id instr r_dying w_dying = do
platform <- getPlatform
case regUsageOfInstr platform instr of { RU read written ->
do
- let real_written = [ rr | (RegReal rr) <- written ] :: [RealReg]
- let virt_written = [ vr | (RegVirtual vr) <- written ]
+ let real_written = [ rr | (RegReal rr, _) <- written ] :: [RealReg]
+ let virt_written = [ vr | (RegVirtual vr, _) <- written ]
-- we don't need to do anything with real registers that are
-- only read by this instr. (the list is typically ~2 elements,
-- so using nub isn't a problem).
- let virt_read = nub [ vr | (RegVirtual vr) <- read ] :: [VirtualReg]
+ let virt_read = nub [ vr | (RegVirtual vr, _) <- read ] :: [VirtualReg]
-- do
-- let real_read = nub [ rr | (RegReal rr) <- read]
@@ -638,9 +638,9 @@ releaseRegs regs = do
loop assig !free (r:rs) =
case lookupUFM assig r of
Just (InBoth real _) -> loop (delFromUFM assig r)
- (frReleaseReg platform real free) rs
+ (frReleaseReg platform (realReg real) free) rs
Just (InReg real) -> loop (delFromUFM assig r)
- (frReleaseReg platform real free) rs
+ (frReleaseReg platform (realReg real) free) rs
_ -> loop (delFromUFM assig r) free rs
loop assig free regs
@@ -688,15 +688,15 @@ saveClobberedTemps clobbered dying
-- currently support deterministic code-generation.
-- See Note [Unique Determinism and code generation]
InReg reg
- | any (realRegsAlias reg) clobbered
+ | any (realRegsAlias $ realReg reg) clobbered
, temp `notElem` map getUnique dying
- -> clobber temp (assig,instrs) (reg)
+ -> clobber temp (assig,instrs) reg
_ -> return (assig,instrs)
-- See Note [UniqFM and the register allocator]
- clobber :: Unique -> (RegMap Loc,[instr]) -> (RealReg) -> RegM freeRegs (RegMap Loc,[instr])
- clobber temp (assig,instrs) (reg)
+ clobber :: Unique -> (RegMap Loc,[instr]) -> RealRegUsage -> RegM freeRegs (RegMap Loc,[instr])
+ clobber temp (assig,instrs) (RealRegUsage reg fmt)
= do platform <- getPlatform
freeRegs <- getFreeRegsR
@@ -711,7 +711,7 @@ saveClobberedTemps clobbered dying
(my_reg : _) -> do
setFreeRegsR (frAllocateReg platform my_reg freeRegs)
- let new_assign = addToUFM_Directly assig temp (InReg my_reg)
+ let new_assign = addToUFM_Directly assig temp (InReg (RealRegUsage my_reg fmt))
let instr = mkRegRegMoveInstr platform
(RegReal reg) (RegReal my_reg)
@@ -719,12 +719,12 @@ saveClobberedTemps clobbered dying
-- (2) no free registers: spill the value
[] -> do
- (spill, slot) <- spillR (RegReal reg) temp
+ (spill, slot) <- spillR (RegReal reg) fmt temp
-- record why this reg was spilled for profiling
recordSpill (SpillClobber temp)
- let new_assign = addToUFM_Directly assig temp (InBoth reg slot)
+ let new_assign = addToUFM_Directly assig temp (InBoth (RealRegUsage reg fmt) slot)
return (new_assign, (spill ++ instrs))
@@ -771,7 +771,7 @@ clobberRegs clobbered
= assig
clobber assig ((temp, InBoth reg slot) : rest)
- | any (realRegsAlias reg) clobbered
+ | any (realRegsAlias $ realReg reg) clobbered
= clobber (addToUFM_Directly assig temp (InMem slot)) rest
clobber assig (_:rest)
@@ -817,7 +817,7 @@ allocateRegsAndSpill reading keep spills alloc (r:rs)
case lookupUFM assig r of
-- case (1a): already in a register
Just (InReg my_reg) ->
- allocateRegsAndSpill reading keep spills (my_reg:alloc) rs
+ allocateRegsAndSpill reading keep spills (realReg my_reg:alloc) rs
-- case (1b): already in a register (and memory)
-- NB1. if we're writing this register, update its assignment to be
@@ -826,7 +826,7 @@ allocateRegsAndSpill reading keep spills alloc (r:rs)
-- are also read by the same instruction.
Just (InBoth my_reg _)
-> do when (not reading) (setAssigR $ toRegMap (addToUFM assig r (InReg my_reg)))
- allocateRegsAndSpill reading keep spills (my_reg:alloc) rs
+ allocateRegsAndSpill reading keep spills (realReg my_reg:alloc) rs
-- Not already in a register, so we need to find a free one...
Just (InMem slot) | reading -> doSpill (ReadMem slot)
@@ -869,7 +869,15 @@ allocRegsAndSpill_spill :: (FR freeRegs, Instruction instr)
allocRegsAndSpill_spill reading keep spills alloc r rs assig spill_loc
= do platform <- getPlatform
freeRegs <- getFreeRegsR
- let freeRegs_thisClass = frGetFreeRegs platform (classOfVirtualReg r) freeRegs :: [RealReg]
+ let regclass = classOfVirtualReg r
+ freeRegs_thisClass = frGetFreeRegs platform regclass freeRegs :: [RealReg]
+ -- SIMD NCG TODO: this is not the right thing to be doing,
+ -- and is indicative we should not use Format but a more
+ -- trimmed down datatype that only keeps track of e.g.
+ -- how many stack slots something uses up.
+ vr_fmt = case r of
+ VirtualRegVec {} -> VecFormat 2 FmtDouble W64
+ _ -> II64
-- Can we put the variable into a register it already was?
pref_reg <- findPrefRealReg r
@@ -883,10 +891,10 @@ allocRegsAndSpill_spill reading keep spills alloc r rs assig spill_loc
= reg
| otherwise
= first_free
- spills' <- loadTemp r spill_loc final_reg spills
+ spills' <- loadTemp r vr_fmt spill_loc final_reg spills
setAssigR $ toRegMap
- $ (addToUFM assig r $! newLocation spill_loc final_reg)
+ $ (addToUFM assig r $! newLocation spill_loc $ RealRegUsage final_reg vr_fmt)
setFreeRegsR $ frAllocateReg platform final_reg freeRegs
allocateRegsAndSpill reading keep spills' (final_reg : alloc) rs
@@ -908,48 +916,53 @@ allocRegsAndSpill_spill reading keep spills alloc r rs assig spill_loc
let candidates = nonDetUFMToList candidates'
-- the vregs we could kick out that are already in a slot
- let candidates_inBoth :: [(Unique, RealReg, StackSlot)]
+ let compat reg' r'
+ = let cls1 = targetClassOfRealReg platform reg'
+ cls2 = classOfVirtualReg r'
+ in (if cls1 == RcVector128 then RcDouble else cls1)
+ == (if cls2 == RcVector128 then RcDouble else cls2)
+ candidates_inBoth :: [(Unique, RealRegUsage, StackSlot)]
candidates_inBoth
= [ (temp, reg, mem)
| (temp, InBoth reg mem) <- candidates
- , targetClassOfRealReg platform reg == classOfVirtualReg r ]
+ , compat (realReg reg) r ]
-- the vregs we could kick out that are only in a reg
-- this would require writing the reg to a new slot before using it.
let candidates_inReg
= [ (temp, reg)
| (temp, InReg reg) <- candidates
- , targetClassOfRealReg platform reg == classOfVirtualReg r ]
+ , compat (realReg reg) r ]
let result
-- we have a temporary that is in both register and mem,
-- just free up its register for use.
- | (temp, my_reg, slot) : _ <- candidates_inBoth
- = do spills' <- loadTemp r spill_loc my_reg spills
+ | (temp, myRegUse@(RealRegUsage my_reg fmt), slot) : _ <- candidates_inBoth
+ = do spills' <- loadTemp r fmt spill_loc my_reg spills
let assig1 = addToUFM_Directly assig temp (InMem slot)
- let assig2 = addToUFM assig1 r $! newLocation spill_loc my_reg
+ let assig2 = addToUFM assig1 r $! newLocation spill_loc myRegUse
setAssigR $ toRegMap assig2
allocateRegsAndSpill reading keep spills' (my_reg:alloc) rs
-- otherwise, we need to spill a temporary that currently
-- resides in a register.
- | (temp_to_push_out, (my_reg :: RealReg)) : _
+ | (temp_to_push_out, RealRegUsage my_reg fmt) : _
<- candidates_inReg
= do
- (spill_store, slot) <- spillR (RegReal my_reg) temp_to_push_out
+ (spill_store, slot) <- spillR (RegReal my_reg) fmt temp_to_push_out
-- record that this temp was spilled
recordSpill (SpillAlloc temp_to_push_out)
-- update the register assignment
let assig1 = addToUFM_Directly assig temp_to_push_out (InMem slot)
- let assig2 = addToUFM assig1 r $! newLocation spill_loc my_reg
+ let assig2 = addToUFM assig1 r $! newLocation spill_loc (RealRegUsage my_reg fmt)
setAssigR $ toRegMap assig2
-- if need be, load up a spilled temp into the reg we've just freed up.
- spills' <- loadTemp r spill_loc my_reg spills
+ spills' <- loadTemp r fmt spill_loc my_reg spills
allocateRegsAndSpill reading keep
(spill_store ++ spills')
@@ -970,7 +983,7 @@ allocRegsAndSpill_spill reading keep spills alloc r rs assig spill_loc
-- | Calculate a new location after a register has been loaded.
-newLocation :: SpillLoc -> RealReg -> Loc
+newLocation :: SpillLoc -> RealRegUsage -> Loc
-- if the tmp was read from a slot, then now its in a reg as well
newLocation (ReadMem slot) my_reg = InBoth my_reg slot
-- writes will always result in only the register being available
@@ -980,16 +993,17 @@ newLocation _ my_reg = InReg my_reg
loadTemp
:: (Instruction instr)
=> VirtualReg -- the temp being loaded
+ -> Format
-> SpillLoc -- the current location of this temp
-> RealReg -- the hreg to load the temp into
-> [instr]
-> RegM freeRegs [instr]
-loadTemp vreg (ReadMem slot) hreg spills
+loadTemp vreg fmt (ReadMem slot) hreg spills
= do
- insn <- loadR (RegReal hreg) slot
+ insn <- loadR (RegReal hreg) fmt slot
recordSpill (SpillLoad $ getUnique vreg)
return $ {- mkComment (text "spill load") : -} insn ++ spills
-loadTemp _ _ _ spills =
+loadTemp _ _ _ _ spills =
return spills
=====================================
compiler/GHC/CmmToAsm/Reg/Linear/Base.hs
=====================================
@@ -11,6 +11,7 @@ module GHC.CmmToAsm.Reg.Linear.Base (
Loc(..),
regsOfLoc,
+ RealRegUsage(..),
-- for stats
SpillReason(..),
@@ -36,6 +37,7 @@ import GHC.Types.Unique.Supply
import GHC.Cmm.BlockId
import GHC.Cmm.Dataflow.Label
import GHC.CmmToAsm.Reg.Utils
+import GHC.CmmToAsm.Format
data ReadingOrWriting = Reading | Writing deriving (Eq,Ord)
@@ -76,8 +78,8 @@ updateBlockAssignment dest (freeRegs, regMap) (BlockAssignment {..}) =
combWithExisting old_reg _ = Just $ old_reg
fromLoc :: Loc -> Maybe RealReg
- fromLoc (InReg rr) = Just rr
- fromLoc (InBoth rr _) = Just rr
+ fromLoc (InReg rr) = Just $ realReg rr
+ fromLoc (InBoth rr _) = Just $ realReg rr
fromLoc _ = Nothing
@@ -94,23 +96,29 @@ updateBlockAssignment dest (freeRegs, regMap) (BlockAssignment {..}) =
--
data Loc
-- | vreg is in a register
- = InReg !RealReg
+ = InReg {-# UNPACK #-} !RealRegUsage
- -- | vreg is held in a stack slot
+ -- | vreg is held in stack slots
| InMem {-# UNPACK #-} !StackSlot
- -- | vreg is held in both a register and a stack slot
- | InBoth !RealReg
+ -- | vreg is held in both a register and stack slots
+ | InBoth {-# UNPACK #-} !RealRegUsage
{-# UNPACK #-} !StackSlot
deriving (Eq, Show, Ord)
+data RealRegUsage
+ = RealRegUsage
+ { realReg :: !RealReg
+ , realRegFormat :: !Format
+ } deriving (Eq, Show, Ord)
+
instance Outputable Loc where
ppr l = text (show l)
-- | Get the reg numbers stored in this Loc.
-regsOfLoc :: Loc -> [RealReg]
+regsOfLoc :: Loc -> [RealRegUsage]
regsOfLoc (InReg r) = [r]
regsOfLoc (InBoth r _) = [r]
regsOfLoc (InMem _) = []
=====================================
compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs
=====================================
@@ -29,16 +29,16 @@ import GHC.Utils.Panic
import GHC.Utils.Monad (concatMapM)
import GHC.Types.Unique
import GHC.Types.Unique.FM
-import GHC.Types.Unique.Set
import GHC.Utils.Outputable
+import GHC.CmmToAsm.Format
-- | For a jump instruction at the end of a block, generate fixup code so its
-- vregs are in the correct regs for its destination.
--
joinToTargets
:: (FR freeRegs, Instruction instr)
- => BlockMap RegSet -- ^ maps the unique of the blockid to the set of vregs
+ => BlockMap (RegMap (Reg, Format)) -- ^ maps the unique of the blockid to the set of vregs
-- that are known to be live on the entry to each block.
-> BlockId -- ^ id of the current block
@@ -62,7 +62,7 @@ joinToTargets block_live id instr
-----
joinToTargets'
:: (FR freeRegs, Instruction instr)
- => BlockMap RegSet -- ^ maps the unique of the blockid to the set of vregs
+ => BlockMap (RegMap (Reg, Format)) -- ^ maps the unique of the blockid to the set of vregs
-- that are known to be live on the entry to each block.
-> [NatBasicBlock instr] -- ^ acc blocks of fixup code.
@@ -90,7 +90,7 @@ joinToTargets' block_live new_blocks block_id instr (dest:dests)
-- adjust the current assignment to remove any vregs that are not live
-- on entry to the destination block.
let Just live_set = mapLookup dest block_live
- let still_live uniq _ = uniq `elemUniqSet_Directly` live_set
+ let still_live uniq _ = uniq `elemUFM_Directly` live_set
let adjusted_assig = filterUFM_Directly still_live assig
-- and free up those registers which are now free.
@@ -99,14 +99,14 @@ joinToTargets' block_live new_blocks block_id instr (dest:dests)
-- This is non-deterministic but we do not
-- currently support deterministic code-generation.
-- See Note [Unique Determinism and code generation]
- , not (elemUniqSet_Directly reg live_set)
+ , not (elemUFM_Directly reg live_set)
, r <- regsOfLoc loc ]
case lookupBlockAssignment dest block_assig of
Nothing
-> joinToTargets_first
block_live new_blocks block_id instr dest dests
- block_assig adjusted_assig to_free
+ block_assig adjusted_assig $ map realReg to_free
Just (_, dest_assig)
-> joinToTargets_again
@@ -116,7 +116,7 @@ joinToTargets' block_live new_blocks block_id instr (dest:dests)
-- this is the first time we jumped to this block.
joinToTargets_first :: (FR freeRegs, Instruction instr)
- => BlockMap RegSet
+ => BlockMap (UniqFM Reg (Reg, Format))
-> [NatBasicBlock instr]
-> BlockId
-> instr
@@ -145,7 +145,7 @@ joinToTargets_first block_live new_blocks block_id instr dest dests
-- we've jumped to this block before
joinToTargets_again :: (Instruction instr, FR freeRegs)
- => BlockMap RegSet
+ => BlockMap (UniqFM Reg (Reg, Format))
-> [NatBasicBlock instr]
-> BlockId
-> instr
@@ -327,15 +327,15 @@ handleComponent delta _ (AcyclicSCC (DigraphNode vreg src dsts))
-- require a fixup.
--
handleComponent delta instr
- (CyclicSCC ((DigraphNode vreg (InReg sreg) ((InReg dreg: _))) : rest))
+ (CyclicSCC ((DigraphNode vreg (InReg (RealRegUsage sreg scls)) ((InReg (RealRegUsage dreg dcls): _))) : rest))
-- dest list may have more than one element, if the reg is also InMem.
= do
-- spill the source into its slot
(instrSpill, slot)
- <- spillR (RegReal sreg) vreg
+ <- spillR (RegReal sreg) scls vreg
-- reload into destination reg
- instrLoad <- loadR (RegReal dreg) slot
+ instrLoad <- loadR (RegReal dreg) dcls slot
remainingFixUps <- mapM (handleComponent delta instr)
(stronglyConnCompFromEdgedVerticesOrdR rest)
@@ -363,15 +363,16 @@ makeMove delta vreg src dst
let platform = ncgPlatform config
case (src, dst) of
- (InReg s, InReg d) ->
+ (InReg (RealRegUsage s _), InReg (RealRegUsage d _)) ->
do recordSpill (SpillJoinRR vreg)
+ -- SIMD NCG TODO: does reg-2-reg work for vector registers?
return $ [mkRegRegMoveInstr platform (RegReal s) (RegReal d)]
- (InMem s, InReg d) ->
+ (InMem s, InReg (RealRegUsage d cls)) ->
do recordSpill (SpillJoinRM vreg)
- return $ mkLoadInstr config (RegReal d) delta s
- (InReg s, InMem d) ->
+ return $ mkLoadInstr config (RegReal d) cls delta s
+ (InReg (RealRegUsage s cls), InMem d) ->
do recordSpill (SpillJoinRM vreg)
- return $ mkSpillInstr config (RegReal s) delta d
+ return $ mkSpillInstr config (RegReal s) cls delta d
_ ->
-- we don't handle memory to memory moves.
-- they shouldn't happen because we don't share
=====================================
compiler/GHC/CmmToAsm/Reg/Linear/StackMap.hs
=====================================
@@ -24,6 +24,7 @@ import GHC.Prelude
import GHC.Types.Unique.FM
import GHC.Types.Unique
+import GHC.CmmToAsm.Format
-- | Identifier for a stack slot.
@@ -47,13 +48,18 @@ emptyStackMap = StackMap 0 emptyUFM
-- | If this vreg unique already has a stack assignment then return the slot number,
-- otherwise allocate a new slot, and update the map.
--
-getStackSlotFor :: StackMap -> Unique -> (StackMap, Int)
-
-getStackSlotFor fs@(StackMap _ reserved) reg
- | Just slot <- lookupUFM reserved reg = (fs, slot)
-
-getStackSlotFor (StackMap freeSlot reserved) reg =
- (StackMap (freeSlot+1) (addToUFM reserved reg freeSlot), freeSlot)
+getStackSlotFor :: StackMap -> Format -> Unique -> (StackMap, Int)
+
+getStackSlotFor fs@(StackMap _ reserved) _fmt regUnique
+ | Just slot <- lookupUFM reserved regUnique = (fs, slot)
+
+getStackSlotFor (StackMap freeSlot reserved) fmt regUnique =
+ let
+ nbSlots = case fmt of
+ VecFormat {} -> 2 -- SIMD NCG TODO: panic for unsupported vectors
+ _ -> 1
+ in
+ (StackMap (freeSlot+nbSlots) (addToUFM reserved regUnique freeSlot), freeSlot)
-- | Return the number of stack slots that were allocated
getStackUse :: StackMap -> Int
=====================================
compiler/GHC/CmmToAsm/Reg/Linear/State.hs
=====================================
@@ -53,6 +53,7 @@ import GHC.Types.Unique.Supply
import GHC.Exts (oneShot)
import Control.Monad (ap)
+import GHC.CmmToAsm.Format
type RA_Result freeRegs a = (# RA_State freeRegs, a #)
@@ -121,20 +122,20 @@ makeRAStats state
spillR :: Instruction instr
- => Reg -> Unique -> RegM freeRegs ([instr], Int)
+ => Reg -> Format -> Unique -> RegM freeRegs ([instr], Int)
-spillR reg temp = mkRegM $ \s ->
- let (stack1,slot) = getStackSlotFor (ra_stack s) temp
- instr = mkSpillInstr (ra_config s) reg (ra_delta s) slot
+spillR reg fmt temp = mkRegM $ \s ->
+ let (stack1,slots) = getStackSlotFor (ra_stack s) fmt temp
+ instr = mkSpillInstr (ra_config s) reg fmt (ra_delta s) slots
in
- RA_Result s{ra_stack=stack1} (instr,slot)
+ RA_Result s{ra_stack=stack1} (instr,slots)
loadR :: Instruction instr
- => Reg -> Int -> RegM freeRegs [instr]
+ => Reg -> Format -> Int -> RegM freeRegs [instr]
-loadR reg slot = mkRegM $ \s ->
- RA_Result s (mkLoadInstr (ra_config s) reg (ra_delta s) slot)
+loadR reg fmt slot = mkRegM $ \s ->
+ RA_Result s (mkLoadInstr (ra_config s) reg fmt (ra_delta s) slot)
getFreeRegsR :: RegM freeRegs freeRegs
getFreeRegsR = mkRegM $ \ s at RA_State{ra_freeregs = freeregs} ->
=====================================
compiler/GHC/CmmToAsm/Reg/Linear/X86.hs
=====================================
@@ -32,13 +32,20 @@ getFreeRegs platform cls (FreeRegs f) = go f 0
where go 0 _ = []
go n m
- | n .&. 1 /= 0 && classOfRealReg platform (RealRegSingle m) == cls
+ | n .&. 1 /= 0 && compatibleClass m
= RealRegSingle m : (go (n `shiftR` 1) $! (m+1))
| otherwise
= go (n `shiftR` 1) $! (m+1)
-- ToDo: there's no point looking through all the integer registers
-- in order to find a floating-point one.
+ compatibleClass i =
+ let regClass = classOfRealReg platform (RealRegSingle i)
+ in (if regClass == RcVector128 then RcDouble else regClass)
+ == (if cls == RcVector128 then RcDouble else cls)
+ -- SIMD NCG TODO: giant hack to account for xmm registers being
+ -- used for Double with SSE2.
+
allocateReg :: RealReg -> FreeRegs -> FreeRegs
allocateReg (RealRegSingle r) (FreeRegs f)
=====================================
compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs
=====================================
@@ -32,13 +32,19 @@ getFreeRegs platform cls (FreeRegs f) = go f 0
where go 0 _ = []
go n m
- | n .&. 1 /= 0 && classOfRealReg platform (RealRegSingle m) == cls
+ | n .&. 1 /= 0 && compatibleClass m
= RealRegSingle m : (go (n `shiftR` 1) $! (m+1))
| otherwise
= go (n `shiftR` 1) $! (m+1)
-- ToDo: there's no point looking through all the integer registers
-- in order to find a floating-point one.
+ compatibleClass i =
+ let regClass = classOfRealReg platform (RealRegSingle i)
+ in (if regClass == RcVector128 then RcDouble else regClass)
+ == (if cls == RcVector128 then RcDouble else cls)
+ -- SIMD NCG TODO: giant hack to account for xmm registers being
+ -- used for Double with SSE2.
allocateReg :: RealReg -> FreeRegs -> FreeRegs
allocateReg (RealRegSingle r) (FreeRegs f)
=====================================
compiler/GHC/CmmToAsm/Reg/Liveness.hs
=====================================
@@ -63,6 +63,8 @@ import GHC.Utils.Monad.State.Strict
import Data.List (mapAccumL, partition)
import Data.Maybe
import Data.IntSet (IntSet)
+import GHC.CmmToAsm.Format
+import GHC.Types.Unique (Uniquable)
-----------------------------------------------------------------------------
type RegSet = UniqSet Reg
@@ -77,9 +79,6 @@ type RegMap a = UniqFM Reg a
emptyRegMap :: RegMap a
emptyRegMap = emptyUFM
-emptyRegSet :: RegSet
-emptyRegSet = emptyUniqSet
-
type BlockMap a = LabelMap a
type SlotMap a = UniqFM Slot a
@@ -101,10 +100,10 @@ data InstrSR instr
= Instr instr
-- | spill this reg to a stack slot
- | SPILL Reg Int
+ | SPILL Reg Format Int
-- | reload this reg from a stack slot
- | RELOAD Int Reg
+ | RELOAD Int Reg Format
deriving (Functor)
@@ -112,14 +111,14 @@ instance Instruction instr => Instruction (InstrSR instr) where
regUsageOfInstr platform i
= case i of
Instr instr -> regUsageOfInstr platform instr
- SPILL reg _ -> RU [reg] []
- RELOAD _ reg -> RU [] [reg]
+ SPILL reg fmt _ -> RU [(reg, fmt)] []
+ RELOAD _ reg fmt -> RU [] [(reg, fmt)]
patchRegsOfInstr i f
= case i of
Instr instr -> Instr (patchRegsOfInstr instr f)
- SPILL reg slot -> SPILL (f reg) slot
- RELOAD slot reg -> RELOAD slot (f reg)
+ SPILL reg cls slot -> SPILL (f reg) cls slot
+ RELOAD slot reg cls -> RELOAD slot (f reg) cls
isJumpishInstr i
= case i of
@@ -189,9 +188,9 @@ data LiveInstr instr
data Liveness
= Liveness
- { liveBorn :: RegSet -- ^ registers born in this instruction (written to for first time).
- , liveDieRead :: RegSet -- ^ registers that died because they were read for the last time.
- , liveDieWrite :: RegSet } -- ^ registers that died because they were clobbered by something.
+ { liveBorn :: RegMap (Reg, Format) -- ^ registers born in this instruction (written to for first time).
+ , liveDieRead :: RegMap (Reg, Format) -- ^ registers that died because they were read for the last time.
+ , liveDieWrite :: RegMap (Reg, Format) } -- ^ registers that died because they were clobbered by something.
-- | Stash regs live on entry to each basic block in the info part of the cmm code.
@@ -200,7 +199,7 @@ data LiveInfo
(LabelMap RawCmmStatics) -- cmm info table static stuff
[BlockId] -- entry points (first one is the
-- entry point for the proc).
- (BlockMap RegSet) -- argument locals live on entry to this block
+ (BlockMap (UniqFM Reg (Reg, Format))) -- argument locals live on entry to this block
(BlockMap IntSet) -- stack slots live on entry to this block
@@ -215,7 +214,7 @@ instance Outputable instr
ppr (Instr realInstr)
= ppr realInstr
- ppr (SPILL reg slot)
+ ppr (SPILL reg _cls slot)
= hcat [
text "\tSPILL",
char ' ',
@@ -223,7 +222,7 @@ instance Outputable instr
comma,
text "SLOT" <> parens (int slot)]
- ppr (RELOAD slot reg)
+ ppr (RELOAD slot reg _cls)
= hcat [
text "\tRELOAD",
char ' ',
@@ -246,11 +245,11 @@ instance Outputable instr
, pprRegs (text "# w_dying: ") (liveDieWrite live) ]
$+$ space)
- where pprRegs :: SDoc -> RegSet -> SDoc
+ where pprRegs :: Outputable a => SDoc -> RegMap a -> SDoc
pprRegs name regs
- | isEmptyUniqSet regs = empty
+ | isNullUFM regs = empty
| otherwise = name <>
- (pprUFM (getUniqSet regs) (hcat . punctuate space . map ppr))
+ (pprUFM regs (hcat . punctuate space . map ppr))
instance OutputableP env instr => OutputableP env (LiveInstr instr) where
pdoc env i = ppr (fmap (pdoc env) i)
@@ -329,7 +328,7 @@ mapGenBlockTopM f (CmmProc header label live (ListGraph blocks))
slurpConflicts
:: Instruction instr
=> LiveCmmDecl statics instr
- -> (Bag (UniqSet Reg), Bag (Reg, Reg))
+ -> (Bag (UniqFM Reg (Reg, Format)), Bag (Reg, Reg))
slurpConflicts live
= slurpCmm (emptyBag, emptyBag) live
@@ -363,23 +362,23 @@ slurpConflicts live
= let
-- regs that die because they are read for the last time at the start of an instruction
-- are not live across it.
- rsLiveAcross = rsLiveEntry `minusUniqSet` (liveDieRead live)
+ rsLiveAcross = rsLiveEntry `minusUFM` (liveDieRead live)
-- regs live on entry to the next instruction.
-- be careful of orphans, make sure to delete dying regs _after_ unioning
-- in the ones that are born here.
- rsLiveNext = (rsLiveAcross `unionUniqSets` (liveBorn live))
- `minusUniqSet` (liveDieWrite live)
+ rsLiveNext = (rsLiveAcross `plusUFM` (liveBorn live))
+ `minusUFM` (liveDieWrite live)
-- orphan vregs are the ones that die in the same instruction they are born in.
-- these are likely to be results that are never used, but we still
-- need to assign a hreg to them..
- rsOrphans = intersectUniqSets
+ rsOrphans = intersectUFM
(liveBorn live)
- (unionUniqSets (liveDieWrite live) (liveDieRead live))
+ (plusUFM (liveDieWrite live) (liveDieRead live))
--
- rsConflicts = unionUniqSets rsLiveNext rsOrphans
+ rsConflicts = plusUFM rsLiveNext rsOrphans
in case takeRegRegMoveInstr instr of
Just rr -> slurpLIs rsLiveNext
@@ -458,12 +457,12 @@ slurpReloadCoalesce live
slurpLI slotMap li
-- remember what reg was stored into the slot
- | LiveInstr (SPILL reg slot) _ <- li
- , slotMap' <- addToUFM slotMap slot reg
+ | LiveInstr (SPILL reg _cls slot) _ <- li
+ , slotMap' <- addToUFM slotMap slot reg
= return (slotMap', Nothing)
-- add an edge between the this reg and the last one stored into the slot
- | LiveInstr (RELOAD slot reg) _ <- li
+ | LiveInstr (RELOAD slot reg _cls) _ <- li
= case lookupUFM slotMap slot of
Just reg2
| reg /= reg2 -> return (slotMap, Just (reg, reg2))
@@ -572,13 +571,13 @@ stripLiveBlock config (BasicBlock i lis)
-- The SPILL/RELOAD cases do not appear to be exercised by our codegens
--
- spillNat acc (LiveInstr (SPILL reg slot) _ : instrs)
+ spillNat acc (LiveInstr (SPILL reg cls slot) _ : instrs)
= do delta <- get
- spillNat (mkSpillInstr config reg delta slot ++ acc) instrs
+ spillNat (mkSpillInstr config reg cls delta slot ++ acc) instrs
- spillNat acc (LiveInstr (RELOAD slot reg) _ : instrs)
+ spillNat acc (LiveInstr (RELOAD slot reg cls) _ : instrs)
= do delta <- get
- spillNat (mkLoadInstr config reg delta slot ++ acc) instrs
+ spillNat (mkLoadInstr config reg cls delta slot ++ acc) instrs
spillNat acc (LiveInstr (Instr instr) _ : instrs)
| Just i <- takeDeltaInstr instr
@@ -621,9 +620,8 @@ patchEraseLive patchF cmm
patchCmm (CmmProc info label live sccs)
| LiveInfo static id blockMap mLiveSlots <- info
= let
- patchRegSet set = mkUniqSet $ map patchF $ nonDetEltsUFM set
-- See Note [Unique Determinism and code generation]
- blockMap' = mapMap (patchRegSet . getUniqSet) blockMap
+ blockMap' = mapMap (mapKeysUFM patchF) blockMap
info' = LiveInfo static id blockMap' mLiveSlots
in CmmProc info' label live $ map patchSCC sccs
@@ -652,8 +650,8 @@ patchEraseLive patchF cmm
| r1 == r2 = True
-- destination reg is never used
- | elementOfUniqSet r2 (liveBorn live)
- , elementOfUniqSet r2 (liveDieRead live) || elementOfUniqSet r2 (liveDieWrite live)
+ | elemUFM r2 (liveBorn live)
+ , elemUFM r2 (liveDieRead live) || elemUFM r2 (liveDieWrite live)
= True
| otherwise = False
@@ -676,11 +674,14 @@ patchRegsLiveInstr patchF li
(patchRegsOfInstr instr patchF)
(Just live
{ -- WARNING: have to go via lists here because patchF changes the uniq in the Reg
- liveBorn = mapUniqSet patchF $ liveBorn live
- , liveDieRead = mapUniqSet patchF $ liveDieRead live
- , liveDieWrite = mapUniqSet patchF $ liveDieWrite live })
+ liveBorn = mapKeysUFM patchF $ liveBorn live
+ , liveDieRead = mapKeysUFM patchF $ liveDieRead live
+ , liveDieWrite = mapKeysUFM patchF $ liveDieWrite live })
-- See Note [Unique Determinism and code generation]
+-- SIMD NCG TODO: move this to Unique.FM module
+mapKeysUFM :: Uniquable a => (t -> a) -> UniqFM key (t, b) -> UniqFM a (a, b)
+mapKeysUFM f m = listToUFM $ map ( \ (r, fmt) -> let r' = f r in (r', (r', fmt)) ) $ nonDetEltsUFM m
--------------------------------------------------------------------------------
-- | Convert a NatCmmDecl to a LiveCmmDecl, with liveness information
@@ -869,7 +870,7 @@ computeLiveness
-> [SCC (LiveBasicBlock instr)]
-> ([SCC (LiveBasicBlock instr)], -- instructions annotated with list of registers
-- which are "dead after this instruction".
- BlockMap RegSet) -- blocks annotated with set of live registers
+ BlockMap (UniqFM Reg (Reg, Format))) -- blocks annotated with set of live registers
-- on entry to the block.
computeLiveness platform sccs
@@ -884,11 +885,11 @@ computeLiveness platform sccs
livenessSCCs
:: Instruction instr
=> Platform
- -> BlockMap RegSet
+ -> BlockMap (UniqFM Reg (Reg, Format))
-> [SCC (LiveBasicBlock instr)] -- accum
-> [SCC (LiveBasicBlock instr)]
-> ( [SCC (LiveBasicBlock instr)]
- , BlockMap RegSet)
+ , BlockMap (UniqFM Reg (Reg, Format)))
livenessSCCs _ blockmap done []
= (done, blockmap)
@@ -917,8 +918,8 @@ livenessSCCs platform blockmap done
linearLiveness
:: Instruction instr
- => BlockMap RegSet -> [LiveBasicBlock instr]
- -> (BlockMap RegSet, [LiveBasicBlock instr])
+ => BlockMap (UniqFM Reg (Reg, Format)) -> [LiveBasicBlock instr]
+ -> (BlockMap (UniqFM Reg (Reg, Format)), [LiveBasicBlock instr])
linearLiveness = mapAccumL (livenessBlock platform)
@@ -926,9 +927,8 @@ livenessSCCs platform blockmap done
-- BlockMaps for equality.
equalBlockMaps a b
= a' == b'
- where a' = map f $ mapToList a
- b' = map f $ mapToList b
- f (key,elt) = (key, nonDetEltsUniqSet elt)
+ where a' = mapToList a
+ b' = mapToList b
-- See Note [Unique Determinism and code generation]
@@ -938,14 +938,14 @@ livenessSCCs platform blockmap done
livenessBlock
:: Instruction instr
=> Platform
- -> BlockMap RegSet
+ -> BlockMap (UniqFM Reg (Reg, Format))
-> LiveBasicBlock instr
- -> (BlockMap RegSet, LiveBasicBlock instr)
+ -> (BlockMap (UniqFM Reg (Reg, Format)), LiveBasicBlock instr)
livenessBlock platform blockmap (BasicBlock block_id instrs)
= let
(regsLiveOnEntry, instrs1)
- = livenessBack platform emptyUniqSet blockmap [] (reverse instrs)
+ = livenessBack platform emptyUFM blockmap [] (reverse instrs)
blockmap' = mapInsert block_id regsLiveOnEntry blockmap
instrs2 = livenessForward platform regsLiveOnEntry instrs1
@@ -960,7 +960,7 @@ livenessBlock platform blockmap (BasicBlock block_id instrs)
livenessForward
:: Instruction instr
=> Platform
- -> RegSet -- regs live on this instr
+ -> UniqFM Reg (Reg, Format) -- regs live on this instr
-> [LiveInstr instr] -> [LiveInstr instr]
livenessForward _ _ [] = []
@@ -970,12 +970,14 @@ livenessForward platform rsLiveEntry (li@(LiveInstr instr mLive) : lis)
RU _ written = regUsageOfInstr platform instr
-- Regs that are written to but weren't live on entry to this instruction
-- are recorded as being born here.
- rsBorn = mkUniqSet
- $ filter (\r -> not $ elementOfUniqSet r rsLiveEntry) written
+ rsBorn = listToUFM
+ $ map ( \ ( r, fmt ) -> ( r, ( r, fmt ) ) )
+ $ filter (\( r, _) -> not $ elemUFM r rsLiveEntry)
+ $ written
- rsLiveNext = (rsLiveEntry `unionUniqSets` rsBorn)
- `minusUniqSet` (liveDieRead live)
- `minusUniqSet` (liveDieWrite live)
+ rsLiveNext = (rsLiveEntry `plusUFM` rsBorn)
+ `minusUFM` (liveDieRead live)
+ `minusUFM` (liveDieWrite live)
in LiveInstr instr (Just live { liveBorn = rsBorn })
: livenessForward platform rsLiveNext lis
@@ -990,11 +992,11 @@ livenessForward platform rsLiveEntry (li@(LiveInstr instr mLive) : lis)
livenessBack
:: Instruction instr
=> Platform
- -> RegSet -- regs live on this instr
- -> BlockMap RegSet -- regs live on entry to other BBs
+ -> UniqFM Reg (Reg, Format) -- regs live on this instr
+ -> BlockMap (UniqFM Reg (Reg, Format)) -- regs live on entry to other BBs
-> [LiveInstr instr] -- instructions (accum)
-> [LiveInstr instr] -- instructions
- -> (RegSet, [LiveInstr instr])
+ -> (UniqFM Reg (Reg, Format), [LiveInstr instr])
livenessBack _ liveregs _ done [] = (liveregs, done)
@@ -1007,10 +1009,10 @@ livenessBack platform liveregs blockmap acc (instr : instrs)
liveness1
:: Instruction instr
=> Platform
- -> RegSet
- -> BlockMap RegSet
+ -> UniqFM Reg (Reg, Format)
+ -> BlockMap (UniqFM Reg (Reg, Format))
-> LiveInstr instr
- -> (RegSet, LiveInstr instr)
+ -> (UniqFM Reg (Reg, Format), LiveInstr instr)
liveness1 _ liveregs _ (LiveInstr instr _)
| isMetaInstr instr
@@ -1021,15 +1023,15 @@ liveness1 platform liveregs blockmap (LiveInstr instr _)
| not_a_branch
= (liveregs1, LiveInstr instr
(Just $ Liveness
- { liveBorn = emptyUniqSet
+ { liveBorn = emptyUFM
, liveDieRead = r_dying
, liveDieWrite = w_dying }))
| otherwise
= (liveregs_br, LiveInstr instr
(Just $ Liveness
- { liveBorn = emptyUniqSet
- , liveDieRead = mkUniqSet r_dying_br
+ { liveBorn = emptyUFM
+ , liveDieRead = r_dying_br
, liveDieWrite = w_dying }))
where
@@ -1037,18 +1039,18 @@ liveness1 platform liveregs blockmap (LiveInstr instr _)
-- registers that were written here are dead going backwards.
-- registers that were read here are live going backwards.
- liveregs1 = (liveregs `delListFromUniqSet` written)
- `addListToUniqSet` read
+ liveregs1 = (liveregs `delListFromUFM` (map fst written))
+ `addListToUFM` (map (\(r, fmt) -> (r, (r,fmt))) read)
-- registers that are not live beyond this point, are recorded
-- as dying here.
- r_dying = mkUniqSet
- [ reg | reg <- read, reg `notElem` written,
- not (elementOfUniqSet reg liveregs) ]
+ r_dying = listToUFM
+ [ (reg, (reg, fmt)) | (reg, fmt) <- read, reg `notElem` map fst written,
+ not (elemUFM reg liveregs) ]
- w_dying = mkUniqSet
- [ reg | reg <- written,
- not (elementOfUniqSet reg liveregs) ]
+ w_dying = listToUFM
+ [ (reg, (reg, fmt)) | (reg, fmt) <- written,
+ not (elemUFM reg liveregs) ]
-- union in the live regs from all the jump destinations of this
-- instruction.
@@ -1058,15 +1060,14 @@ liveness1 platform liveregs blockmap (LiveInstr instr _)
targetLiveRegs target
= case mapLookup target blockmap of
Just ra -> ra
- Nothing -> emptyRegSet
+ Nothing -> emptyUFM
- live_from_branch = unionManyUniqSets (map targetLiveRegs targets)
+ live_from_branch = plusUFMList (map targetLiveRegs targets)
- liveregs_br = liveregs1 `unionUniqSets` live_from_branch
+ liveregs_br = liveregs1 `plusUFM` live_from_branch
-- registers that are live only in the branch targets should
-- be listed as dying here.
- live_branch_only = live_from_branch `minusUniqSet` liveregs
- r_dying_br = nonDetEltsUniqSet (r_dying `unionUniqSets`
- live_branch_only)
+ live_branch_only = live_from_branch `minusUFM` liveregs
+ r_dying_br = r_dying `plusUFM` live_branch_only
-- See Note [Unique Determinism and code generation]
=====================================
compiler/GHC/CmmToAsm/X86/Instr.hs
=====================================
@@ -380,6 +380,8 @@ data Instr
| VSHUFPS Format Imm Operand Reg
| SHUFPD Format Imm Operand Reg
| VSHUFPD Format Imm Operand Reg
+ -- SIMD NCG TODO: don't store the Format (or only what we need)
+ -- in order to emit these instructions.
-- Shift
| PSLLDQ Format Operand Reg
@@ -401,129 +403,129 @@ data FMAPermutation = FMA132 | FMA213 | FMA231
regUsageOfInstr :: Platform -> Instr -> RegUsage
regUsageOfInstr platform instr
= case instr of
- MOV _ src dst -> usageRW src dst
- CMOV _ _ src dst -> mkRU (use_R src [dst]) [dst]
- MOVZxL _ src dst -> usageRW src dst
- MOVSxL _ src dst -> usageRW src dst
- LEA _ src dst -> usageRW src dst
- ADD _ src dst -> usageRM src dst
- ADC _ src dst -> usageRM src dst
- SUB _ src dst -> usageRM src dst
- SBB _ src dst -> usageRM src dst
- IMUL _ src dst -> usageRM src dst
+ MOV fmt src dst -> usageRW fmt src dst
+ CMOV _ fmt src dst -> mkRU fmt (use_R src [dst]) [dst]
+ MOVZxL fmt src dst -> usageRW fmt src dst
+ MOVSxL fmt src dst -> usageRW fmt src dst
+ LEA fmt src dst -> usageRW fmt src dst
+ ADD fmt src dst -> usageRM fmt src dst
+ ADC fmt src dst -> usageRM fmt src dst
+ SUB fmt src dst -> usageRM fmt src dst
+ SBB fmt src dst -> usageRM fmt src dst
+ IMUL fmt src dst -> usageRM fmt src dst
-- Result of IMULB will be in just in %ax
- IMUL2 II8 src -> mkRU (eax:use_R src []) [eax]
+ IMUL2 II8 src -> mkRU II8 (eax:use_R src []) [eax]
-- Result of IMUL for wider values, will be split between %dx/%edx/%rdx and
-- %ax/%eax/%rax.
- IMUL2 _ src -> mkRU (eax:use_R src []) [eax,edx]
-
- MUL _ src dst -> usageRM src dst
- MUL2 _ src -> mkRU (eax:use_R src []) [eax,edx]
- DIV _ op -> mkRU (eax:edx:use_R op []) [eax,edx]
- IDIV _ op -> mkRU (eax:edx:use_R op []) [eax,edx]
- ADD_CC _ src dst -> usageRM src dst
- SUB_CC _ src dst -> usageRM src dst
- AND _ src dst -> usageRM src dst
- OR _ src dst -> usageRM src dst
-
- XOR _ (OpReg src) (OpReg dst)
- | src == dst -> mkRU [] [dst]
-
- XOR _ src dst -> usageRM src dst
- NOT _ op -> usageM op
- BSWAP _ reg -> mkRU [reg] [reg]
- NEGI _ op -> usageM op
- SHL _ imm dst -> usageRM imm dst
- SAR _ imm dst -> usageRM imm dst
- SHR _ imm dst -> usageRM imm dst
- SHLD _ imm dst1 dst2 -> usageRMM imm dst1 dst2
- SHRD _ imm dst1 dst2 -> usageRMM imm dst1 dst2
- BT _ _ src -> mkRUR (use_R src [])
-
- PUSH _ op -> mkRUR (use_R op [])
- POP _ op -> mkRU [] (def_W op)
- TEST _ src dst -> mkRUR (use_R src $! use_R dst [])
- CMP _ src dst -> mkRUR (use_R src $! use_R dst [])
- SETCC _ op -> mkRU [] (def_W op)
- JXX _ _ -> mkRU [] []
- JXX_GBL _ _ -> mkRU [] []
- JMP op regs -> mkRUR (use_R op regs)
- JMP_TBL op _ _ _ -> mkRUR (use_R op [])
- CALL (Left _) params -> mkRU params (callClobberedRegs platform)
- CALL (Right reg) params -> mkRU (reg:params) (callClobberedRegs platform)
- CLTD _ -> mkRU [eax] [edx]
- NOP -> mkRU [] []
-
- X87Store _ dst -> mkRUR ( use_EA dst [])
-
- CVTSS2SD src dst -> mkRU [src] [dst]
- CVTSD2SS src dst -> mkRU [src] [dst]
- CVTTSS2SIQ _ src dst -> mkRU (use_R src []) [dst]
- CVTTSD2SIQ _ src dst -> mkRU (use_R src []) [dst]
- CVTSI2SS _ src dst -> mkRU (use_R src []) [dst]
- CVTSI2SD _ src dst -> mkRU (use_R src []) [dst]
- FDIV _ src dst -> usageRM src dst
- SQRT _ src dst -> mkRU (use_R src []) [dst]
-
- FETCHGOT reg -> mkRU [] [reg]
- FETCHPC reg -> mkRU [] [reg]
+ IMUL2 fmt src -> mkRU fmt (eax:use_R src []) [eax,edx]
+
+ MUL fmt src dst -> usageRM fmt src dst
+ MUL2 fmt src -> mkRU fmt (eax:use_R src []) [eax,edx]
+ DIV fmt op -> mkRU fmt (eax:edx:use_R op []) [eax,edx]
+ IDIV fmt op -> mkRU fmt (eax:edx:use_R op []) [eax,edx]
+ ADD_CC fmt src dst -> usageRM fmt src dst
+ SUB_CC fmt src dst -> usageRM fmt src dst
+ AND fmt src dst -> usageRM fmt src dst
+ OR fmt src dst -> usageRM fmt src dst
+
+ XOR fmt (OpReg src) (OpReg dst)
+ | src == dst -> mkRU fmt [] [dst]
+
+ XOR fmt src dst -> usageRM fmt src dst
+ NOT fmt op -> usageM fmt op
+ BSWAP fmt reg -> mkRU fmt [reg] [reg]
+ NEGI fmt op -> usageM fmt op
+ SHL fmt imm dst -> usageRM fmt imm dst
+ SAR fmt imm dst -> usageRM fmt imm dst
+ SHR fmt imm dst -> usageRM fmt imm dst
+ SHLD fmt imm dst1 dst2 -> usageRMM fmt imm dst1 dst2
+ SHRD fmt imm dst1 dst2 -> usageRMM fmt imm dst1 dst2
+ BT fmt _ src -> mkRUR fmt (use_R src [])
+
+ PUSH fmt op -> mkRUR fmt (use_R op [])
+ POP fmt op -> mkRU fmt [] (def_W op)
+ TEST fmt src dst -> mkRUR fmt (use_R src $! use_R dst [])
+ CMP fmt src dst -> mkRUR fmt (use_R src $! use_R dst [])
+ SETCC _ op -> mkRU II64 [] (def_W op)
+ JXX _ _ -> mkRU II64 [] []
+ JXX_GBL _ _ -> mkRU II64 [] []
+ JMP op regs -> mkRUR II64 (use_R op regs)
+ JMP_TBL op _ _ _ -> mkRUR II64 (use_R op [])
+ CALL (Left _) params -> mkRU II64 params (callClobberedRegs platform)
+ CALL (Right reg) params -> mkRU II64 (reg:params) (callClobberedRegs platform)
+ CLTD _ -> mkRU II64 [eax] [edx]
+ NOP -> mkRU II64 [] []
+
+ X87Store fmt dst -> mkRUR fmt ( use_EA dst [])
+
+ CVTSS2SD src dst -> mkRU FF64 [src] [dst]
+ CVTSD2SS src dst -> mkRU FF32 [src] [dst]
+ CVTTSS2SIQ _ src dst -> mkRU FF32 (use_R src []) [dst]
+ CVTTSD2SIQ _ src dst -> mkRU FF64 (use_R src []) [dst]
+ CVTSI2SS _ src dst -> mkRU FF32 (use_R src []) [dst]
+ CVTSI2SD _ src dst -> mkRU FF64 (use_R src []) [dst]
+ FDIV fmt src dst -> usageRM fmt src dst
+ SQRT fmt src dst -> mkRU fmt (use_R src []) [dst]
+
+ FETCHGOT reg -> mkRU II64 [] [reg]
+ FETCHPC reg -> mkRU II64 [] [reg]
COMMENT _ -> noUsage
LOCATION{} -> noUsage
UNWIND{} -> noUsage
DELTA _ -> noUsage
- POPCNT _ src dst -> mkRU (use_R src []) [dst]
- LZCNT _ src dst -> mkRU (use_R src []) [dst]
- TZCNT _ src dst -> mkRU (use_R src []) [dst]
- BSF _ src dst -> mkRU (use_R src []) [dst]
- BSR _ src dst -> mkRU (use_R src []) [dst]
+ POPCNT fmt src dst -> mkRU fmt (use_R src []) [dst]
+ LZCNT fmt src dst -> mkRU fmt (use_R src []) [dst]
+ TZCNT fmt src dst -> mkRU fmt (use_R src []) [dst]
+ BSF fmt src dst -> mkRU fmt (use_R src []) [dst]
+ BSR fmt src dst -> mkRU fmt (use_R src []) [dst]
- PDEP _ src mask dst -> mkRU (use_R src $ use_R mask []) [dst]
- PEXT _ src mask dst -> mkRU (use_R src $ use_R mask []) [dst]
+ PDEP fmt src mask dst -> mkRU fmt (use_R src $ use_R mask []) [dst]
+ PEXT fmt src mask dst -> mkRU fmt (use_R src $ use_R mask []) [dst]
- FMA3 _ _ _ src3 src2 dst -> usageFMA src3 src2 dst
+ FMA3 fmt _ _ src3 src2 dst -> usageFMA fmt src3 src2 dst
-- note: might be a better way to do this
- PREFETCH _ _ src -> mkRU (use_R src []) []
+ PREFETCH _ fmt src -> mkRU fmt (use_R src []) []
LOCK i -> regUsageOfInstr platform i
- XADD _ src dst -> usageMM src dst
- CMPXCHG _ src dst -> usageRMM src dst (OpReg eax)
- XCHG _ src dst -> usageMM src (OpReg dst)
+ XADD fmt src dst -> usageMM fmt src dst
+ CMPXCHG fmt src dst -> usageRMM fmt src dst (OpReg eax)
+ XCHG fmt src dst -> usageMM fmt src (OpReg dst)
MFENCE -> noUsage
-- vector instructions
- VBROADCAST _ src dst -> mkRU (use_EA src []) [dst]
- VEXTRACT _ off src dst -> mkRU ((use_R off []) ++ [src]) (use_R dst [])
- INSERTPS _ off src dst
- -> mkRU ((use_R off []) ++ (use_R src []) ++ [dst]) [dst]
-
- VMOVU _ src dst -> mkRU (use_R src []) (use_R dst [])
- MOVU _ src dst -> mkRU (use_R src []) (use_R dst [])
- MOVL _ src dst -> mkRU (use_R src []) (use_R dst [])
- MOVH _ src dst -> mkRU (use_R src []) (use_R dst [])
- VPXOR _ s1 s2 dst -> mkRU [s1,s2] [dst]
-
- VADD _ s1 s2 dst -> mkRU ((use_R s1 []) ++ [s2]) [dst]
- VSUB _ s1 s2 dst -> mkRU ((use_R s1 []) ++ [s2]) [dst]
- VMUL _ s1 s2 dst -> mkRU ((use_R s1 []) ++ [s2]) [dst]
- VDIV _ s1 s2 dst -> mkRU ((use_R s1 []) ++ [s2]) [dst]
-
- VPSHUFD _ _off src dst
- -> mkRU (use_R src []) [dst]
- PSHUFD _ _off src dst
- -> mkRU (use_R src []) [dst]
- SHUFPD _ _off src dst
- -> mkRU (use_R src [dst]) [dst]
- SHUFPS _ _off src dst
- -> mkRU (use_R src [dst]) [dst]
- VSHUFPD _ _off src dst
- -> mkRU (use_R src [dst]) [dst]
- VSHUFPS _ _off src dst
- -> mkRU (use_R src [dst]) [dst]
-
- PSLLDQ _ off dst -> mkRU (use_R off []) [dst]
+ VBROADCAST fmt src dst -> mkRU fmt (use_EA src []) [dst]
+ VEXTRACT fmt off src dst -> mkRU fmt ((use_R off []) ++ [src]) (use_R dst [])
+ INSERTPS fmt off src dst
+ -> mkRU fmt ((use_R off []) ++ (use_R src []) ++ [dst]) [dst]
+
+ VMOVU fmt src dst -> mkRU fmt (use_R src []) (use_R dst [])
+ MOVU fmt src dst -> mkRU fmt (use_R src []) (use_R dst [])
+ MOVL fmt src dst -> mkRU fmt (use_R src []) (use_R dst [])
+ MOVH fmt src dst -> mkRU fmt (use_R src []) (use_R dst [])
+ VPXOR fmt s1 s2 dst -> mkRU fmt [s1,s2] [dst]
+
+ VADD fmt s1 s2 dst -> mkRU fmt ((use_R s1 []) ++ [s2]) [dst]
+ VSUB fmt s1 s2 dst -> mkRU fmt ((use_R s1 []) ++ [s2]) [dst]
+ VMUL fmt s1 s2 dst -> mkRU fmt ((use_R s1 []) ++ [s2]) [dst]
+ VDIV fmt s1 s2 dst -> mkRU fmt ((use_R s1 []) ++ [s2]) [dst]
+
+ VPSHUFD fmt _off src dst
+ -> mkRU fmt (use_R src []) [dst]
+ PSHUFD fmt _off src dst
+ -> mkRU fmt (use_R src []) [dst]
+ SHUFPD fmt _off src dst
+ -> mkRU fmt (use_R src [dst]) [dst]
+ SHUFPS fmt _off src dst
+ -> mkRU fmt (use_R src [dst]) [dst]
+ VSHUFPD fmt _off src dst
+ -> mkRU fmt (use_R src [dst]) [dst]
+ VSHUFPS fmt _off src dst
+ -> mkRU fmt (use_R src [dst]) [dst]
+
+ PSLLDQ fmt off dst -> mkRU fmt (use_R off []) [dst]
_other -> panic "regUsage: unrecognised instr"
where
@@ -537,44 +539,44 @@ regUsageOfInstr platform instr
-- are read.
-- 2 operand form; first operand Read; second Written
- usageRW :: Operand -> Operand -> RegUsage
- usageRW op (OpReg reg) = mkRU (use_R op []) [reg]
- usageRW op (OpAddr ea) = mkRUR (use_R op $! use_EA ea [])
- usageRW _ _ = panic "X86.RegInfo.usageRW: no match"
+ usageRW :: Format -> Operand -> Operand -> RegUsage
+ usageRW fmt op (OpReg reg) = mkRU fmt (use_R op []) [reg]
+ usageRW fmt op (OpAddr ea) = mkRUR fmt (use_R op $! use_EA ea [])
+ usageRW _ _ _ = panic "X86.RegInfo.usageRW: no match"
-- 2 operand form; first operand Read; second Modified
- usageRM :: Operand -> Operand -> RegUsage
- usageRM op (OpReg reg) = mkRU (use_R op [reg]) [reg]
- usageRM op (OpAddr ea) = mkRUR (use_R op $! use_EA ea [])
- usageRM _ _ = panic "X86.RegInfo.usageRM: no match"
+ usageRM :: Format -> Operand -> Operand -> RegUsage
+ usageRM fmt op (OpReg reg) = mkRU fmt (use_R op [reg]) [reg]
+ usageRM fmt op (OpAddr ea) = mkRUR fmt (use_R op $! use_EA ea [])
+ usageRM _ _ _ = panic "X86.RegInfo.usageRM: no match"
-- 2 operand form; first operand Modified; second Modified
- usageMM :: Operand -> Operand -> RegUsage
- usageMM (OpReg src) (OpReg dst) = mkRU [src, dst] [src, dst]
- usageMM (OpReg src) (OpAddr ea) = mkRU (use_EA ea [src]) [src]
- usageMM (OpAddr ea) (OpReg dst) = mkRU (use_EA ea [dst]) [dst]
- usageMM _ _ = panic "X86.RegInfo.usageMM: no match"
+ usageMM :: Format -> Operand -> Operand -> RegUsage
+ usageMM fmt (OpReg src) (OpReg dst) = mkRU fmt [src, dst] [src, dst]
+ usageMM fmt (OpReg src) (OpAddr ea) = mkRU fmt (use_EA ea [src]) [src]
+ usageMM fmt (OpAddr ea) (OpReg dst) = mkRU fmt (use_EA ea [dst]) [dst]
+ usageMM _ _ _ = panic "X86.RegInfo.usageMM: no match"
-- 3 operand form; first operand Read; second Modified; third Modified
- usageRMM :: Operand -> Operand -> Operand -> RegUsage
- usageRMM (OpReg src) (OpReg dst) (OpReg reg) = mkRU [src, dst, reg] [dst, reg]
- usageRMM (OpReg src) (OpAddr ea) (OpReg reg) = mkRU (use_EA ea [src, reg]) [reg]
- usageRMM _ _ _ = panic "X86.RegInfo.usageRMM: no match"
+ usageRMM :: Format -> Operand -> Operand -> Operand -> RegUsage
+ usageRMM fmt (OpReg src) (OpReg dst) (OpReg reg) = mkRU fmt [src, dst, reg] [dst, reg]
+ usageRMM fmt (OpReg src) (OpAddr ea) (OpReg reg) = mkRU fmt (use_EA ea [src, reg]) [reg]
+ usageRMM _ _ _ _ = panic "X86.RegInfo.usageRMM: no match"
-- 3 operand form of FMA instructions.
- usageFMA :: Operand -> Reg -> Reg -> RegUsage
- usageFMA (OpReg src1) src2 dst
- = mkRU [src1, src2, dst] [dst]
- usageFMA (OpAddr ea1) src2 dst
- = mkRU (use_EA ea1 [src2, dst]) [dst]
- usageFMA _ _ _
+ usageFMA :: Format -> Operand -> Reg -> Reg -> RegUsage
+ usageFMA fmt (OpReg src1) src2 dst
+ = mkRU fmt [src1, src2, dst] [dst]
+ usageFMA fmt (OpAddr ea1) src2 dst
+ = mkRU fmt (use_EA ea1 [src2, dst]) [dst]
+ usageFMA _ _ _ _
= panic "X86.RegInfo.usageFMA: no match"
-- 1 operand form; operand Modified
- usageM :: Operand -> RegUsage
- usageM (OpReg reg) = mkRU [reg] [reg]
- usageM (OpAddr ea) = mkRUR (use_EA ea [])
- usageM _ = panic "X86.RegInfo.usageM: no match"
+ usageM :: Format -> Operand -> RegUsage
+ usageM fmt (OpReg reg) = mkRU fmt [reg] [reg]
+ usageM fmt (OpAddr ea) = mkRUR fmt (use_EA ea [])
+ usageM _ _ = panic "X86.RegInfo.usageM: no match"
-- Registers defd when an operand is written.
def_W (OpReg reg) = [reg]
@@ -595,10 +597,11 @@ regUsageOfInstr platform instr
use_index EAIndexNone tl = tl
use_index (EAIndex i _) tl = i : tl
- mkRUR src = src' `seq` RU src' []
+ mkRUR fmt src = src' `seq` RU (map (,fmt) src') []
where src' = filter (interesting platform) src
- mkRU src dst = src' `seq` dst' `seq` RU src' dst'
+
+ mkRU fmt src dst = src' `seq` dst' `seq` RU (map (,fmt) src') (map (,fmt) dst')
where src' = filter (interesting platform) src
dst' = filter (interesting platform) dst
@@ -817,18 +820,27 @@ patchJumpInstr insn patchF
mkSpillInstr
:: NCGConfig
-> Reg -- register to spill
+ -> Format
-> Int -- current stack delta
-> Int -- spill slot to use
-> [Instr]
-mkSpillInstr config reg delta slot
- = let off = spillSlotToOffset platform slot - delta
- in
- case targetClassOfReg platform reg of
- RcInteger -> [MOV (archWordFormat is32Bit)
- (OpReg reg) (OpAddr (spRel platform off))]
- RcDouble -> [MOV FF64 (OpReg reg) (OpAddr (spRel platform off))]
- _ -> panic "X86.mkSpillInstr: no match"
+mkSpillInstr config reg fmt delta slot
+ = let off s = spillSlotToOffset platform s - delta
+ in case fmt of
+ IntegerFormat -> [MOV (archWordFormat is32Bit)
+ (OpReg reg) (OpAddr (spRel platform $ off slot))]
+ FF64 -> [MOV FF64 (OpReg reg) (OpAddr (spRel platform $ off slot))]
+ FF32 -> panic "X86_mkSpillInstr: RcFloat"
+ VecFormat {} ->
+ -- SIMD NCG TODO: panic for unsupported VecFormats (& same in mkLoadInstr)
+ [MOV FF64 (OpReg reg) (OpAddr (spRel platform $ off slot))
+ -- Now shuffle the register, putting the high half into the lower half.
+ ,SHUFPD (VecFormat 2 FmtDouble W64) (ImmInt 0b01) (OpReg reg) reg
+ -- NB: this format doesn't matter, we emit the same instruction
+ -- regardless of what is stored...
+ -- SIMD NCG TODO: avoid using MOV by using SHUFPD with an OpAddr argument?
+ ,MOV FF64 (OpReg reg) (OpAddr (spRel platform $ off (slot + 1)))]
where platform = ncgPlatform config
is32Bit = target32Bit platform
@@ -836,18 +848,30 @@ mkSpillInstr config reg delta slot
mkLoadInstr
:: NCGConfig
-> Reg -- register to load
+ -> Format
-> Int -- current stack delta
-> Int -- spill slot to use
-> [Instr]
-mkLoadInstr config reg delta slot
- = let off = spillSlotToOffset platform slot - delta
+mkLoadInstr config reg fmt delta slot
+ = let off s = spillSlotToOffset platform s - delta
in
- case targetClassOfReg platform reg of
- RcInteger -> ([MOV (archWordFormat is32Bit)
- (OpAddr (spRel platform off)) (OpReg reg)])
- RcDouble -> ([MOV FF64 (OpAddr (spRel platform off)) (OpReg reg)])
- _ -> panic "X86.mkLoadInstr"
+ case fmt of
+ IntegerFormat -> ([MOV (archWordFormat is32Bit)
+ (OpAddr (spRel platform $ off slot)) (OpReg reg)])
+ FF64 -> ([MOV FF64 (OpAddr (spRel platform $ off slot)) (OpReg reg)])
+ FF32 -> panic "X86.mkLoadInstr RcFloat"
+ VecFormat {} ->
+ -- Load the higher half into the lower part of register from the second stack slot,
+ -- shuffle it into the higher part of the register,
+ -- and load then lower half into the lower part of the register.
+ [MOV FF64 (OpAddr (spRel platform $ off (slot + 1))) (OpReg reg)
+ ,SHUFPD (VecFormat 2 FmtDouble W64) (ImmInt 0b01) (OpReg reg) reg
+ -- SIMD NCG TODO: not sure about this immediate
+ -- SIMD NCG TODO: can we avoid the MOV instructions and directly
+ -- use SHUFPD for an Addr to Reg move?
+ ,MOV FF64 (OpAddr (spRel platform $ off slot)) (OpReg reg)]
+
where platform = ncgPlatform config
is32Bit = target32Bit platform
=====================================
compiler/GHC/Platform/Reg.hs
=====================================
@@ -116,17 +116,12 @@ renameVirtualReg u r
classOfVirtualReg :: VirtualReg -> RegClass
classOfVirtualReg vr
- = case vr of
+ = case vr of
VirtualRegI{} -> RcInteger
VirtualRegHi{} -> RcInteger
VirtualRegF{} -> RcFloat
VirtualRegD{} -> RcDouble
- -- Below is an awful, largely x86-specific hack
- VirtualRegVec{} -> RcDouble
- -- SIMD NCG TODO: this seems very wrong and potentially the source of
- -- bug #16927, because we use this function to determine how to spill
- -- the contents of a virtual register
- -- (see e.g. GHC.CmmToAsm.X86.Instr.mkSpillInstr).
+ VirtualRegVec{} -> RcVector128
-- Determine the upper-half vreg for a 64-bit quantity on a 32-bit platform
=====================================
compiler/GHC/Platform/Reg/Class.hs
=====================================
@@ -1,3 +1,4 @@
+{-# LANGUAGE LambdaCase #-}
-- | An architecture independent description of a register's class.
module GHC.Platform.Reg.Class
( RegClass (..)
@@ -18,21 +19,26 @@ import GHC.Builtin.Uniques
-- We treat all registers in a class as being interchangeable.
--
data RegClass
- = RcInteger
- | RcFloat
- | RcDouble
- deriving (Eq, Show)
+ = RcInteger
+ | RcFloat
+ | RcDouble
+ | RcVector128
+ deriving (Eq, Ord, Show)
allRegClasses :: [RegClass]
allRegClasses =
- [RcInteger, RcFloat, RcDouble]
+ [ RcInteger, RcFloat, RcDouble, RcVector128 ]
instance Uniquable RegClass where
- getUnique RcInteger = mkRegClassUnique 0
- getUnique RcFloat = mkRegClassUnique 1
- getUnique RcDouble = mkRegClassUnique 2
+ getUnique = \case
+ RcInteger -> mkRegClassUnique 0
+ RcFloat -> mkRegClassUnique 1
+ RcDouble -> mkRegClassUnique 2
+ RcVector128 -> mkRegClassUnique 3
instance Outputable RegClass where
- ppr RcInteger = Outputable.text "I"
- ppr RcFloat = Outputable.text "F"
- ppr RcDouble = Outputable.text "D"
+ ppr = \case
+ RcInteger -> Outputable.text "I"
+ RcFloat -> Outputable.text "F"
+ RcDouble -> Outputable.text "D"
+ RcVector128 -> Outputable.text "V"
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/34d7a1702f9fc170f754726a665ddb3e36044992
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/34d7a1702f9fc170f754726a665ddb3e36044992
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240606/13ba626f/attachment-0001.html>
More information about the ghc-commits
mailing list