[Git][ghc/ghc][wip/andreask/refactor_cmm_weights] Refactor linear reg alloc to remember past assignments.

Andreas Klebinger gitlab at gitlab.haskell.org
Sat Apr 4 01:39:14 UTC 2020



Andreas Klebinger pushed to branch wip/andreask/refactor_cmm_weights at Glasgow Haskell Compiler / GHC


Commits:
a8e74bc9 by Andreas Klebinger at 2020-04-04T03:39:00+02:00
Refactor linear reg alloc to remember past assignments.

We used to pick the first free register, which
is damn fast but produces fixup blocks on the regular.

Now we look for past assignments first. This means for
loops with blocks A, B, C if a variable is live in A & C
we will pick the right register (if available) in C.

This avoids some needless fixup blocks.

- - - - -


12 changed files:

- compiler/GHC/Cmm/Pipeline.hs
- compiler/GHC/CmmToAsm/BlockLayout.hs
- compiler/GHC/CmmToAsm/CFG.hs
- compiler/GHC/CmmToAsm/Instr.hs
- compiler/GHC/CmmToAsm/Reg/Linear.hs
- compiler/GHC/CmmToAsm/Reg/Linear/Base.hs
- compiler/GHC/CmmToAsm/Reg/Linear/PPC.hs
- compiler/GHC/CmmToAsm/Reg/Linear/SPARC.hs
- compiler/GHC/CmmToAsm/Reg/Linear/State.hs
- compiler/GHC/CmmToAsm/Reg/Linear/X86.hs
- compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs
- compiler/utils/Outputable.hs


Changes:

=====================================
compiler/GHC/Cmm/Pipeline.hs
=====================================
@@ -116,6 +116,10 @@ cpsTop hsc_env proc =
             condPass Opt_CmmSink (cmmSink dflags) g
                      Opt_D_dump_cmm_sink "Sink assignments"
 
+       g <- {-# SCC "sink" #-} -- See Note [Sinking after stack layout]
+            condPass Opt_CmmSink (cmmSink dflags) g
+                     Opt_D_dump_cmm_sink "Sink assignments"
+
        ------------- CAF analysis ----------------------------------------------
        let cafEnv = {-# SCC "cafAnal" #-} cafAnal call_pps l g
        dumpWith dflags Opt_D_dump_cmm_caf "CAFEnv" FormatText (ppr cafEnv)


=====================================
compiler/GHC/CmmToAsm/BlockLayout.hs
=====================================
@@ -639,7 +639,8 @@ sequenceChain _info _weights    [x] = [x]
 sequenceChain  info weights'     blocks@((BasicBlock entry _):_) =
     let weights :: CFG
         weights = --pprTrace "cfg'" (pprEdgeWeights cfg')
-                  cfg'
+                  -- cfg'
+                  weights'
           where
             (_, globalEdgeWeights) = {-# SCC mkGlobalWeights #-} mkGlobalWeights entry weights'
             cfg' = {-# SCC rewriteEdges #-}


=====================================
compiler/GHC/CmmToAsm/CFG.hs
=====================================
@@ -670,11 +670,20 @@ findBackEdges root cfg =
     typedEdges =
       classifyEdges root getSuccs edges :: [((BlockId,BlockId),EdgeType)]
 
-
 optimizeCFG :: D.CfgWeights -> RawCmmDecl -> CFG -> CFG
 optimizeCFG _ (CmmData {}) cfg = cfg
-optimizeCFG weights (CmmProc info _lab _live graph) cfg =
-    {-# SCC optimizeCFG #-}
+optimizeCFG weights proc@(CmmProc _info _lab _live graph) cfg =
+  staticPredCfg (g_entry graph) . optHsPatterns weights proc $ cfg
+
+-- | Modify branch weights based on educated guess on
+-- patterns GHC tends to produce and how they affect
+-- performance.
+--
+-- Most importantly we penalize jumps across info tables.
+optHsPatterns :: D.CfgWeights -> RawCmmDecl -> CFG -> CFG
+optHsPatterns _ (CmmData {}) cfg = cfg
+optHsPatterns weights (CmmProc info _lab _live graph) cfg =
+    {-# SCC optHsPatterns #-}
     -- pprTrace "Initial:" (pprEdgeWeights cfg) $
     -- pprTrace "Initial:" (ppr $ mkGlobalWeights (g_entry graph) cfg) $
 
@@ -749,6 +758,21 @@ optimizeCFG weights (CmmProc info _lab _live graph) cfg =
           | CmmSource { trans_cmmNode = CmmCondBranch {} } <- source = True
           | otherwise = False
 
+-- |
+staticPredCfg :: BlockId -> CFG -> CFG
+staticPredCfg entry cfg = cfg'
+  where
+    (_, globalEdgeWeights) = {-# SCC mkGlobalWeights #-}
+                             mkGlobalWeights entry cfg
+    cfg' = {-# SCC rewriteEdges #-}
+            mapFoldlWithKey
+                (\cfg from m ->
+                    mapFoldlWithKey
+                        (\cfg to w -> setEdgeWeight cfg (EdgeWeight w) from to )
+                        cfg m )
+                cfg
+                globalEdgeWeights
+
 -- | Determine loop membership of blocks based on SCC analysis
 --   This is faster but only gives yes/no answers.
 loopMembers :: HasDebugCallStack => CFG -> LabelMap Bool
@@ -922,6 +946,10 @@ revPostorderFrom cfg root =
 --   reverse post order. Which is required for diamond control flow to work probably.
 --
 --   We also apply a few prediction heuristics (based on the same paper)
+--
+--   The returned result represents frequences.
+--   For blocks it's the expected number of executions and
+--   for edges is the number of traversals.
 
 {-# NOINLINE mkGlobalWeights #-}
 {-# SCC mkGlobalWeights #-}


=====================================
compiler/GHC/CmmToAsm/Instr.hs
=====================================
@@ -37,7 +37,10 @@ import GHC.CmmToAsm.Config
 --      (for allocation purposes, anyway).
 --
 data RegUsage
-        = RU [Reg] [Reg]
+        = RU    {
+                reads :: [Reg],
+                writes :: [Reg]
+                }
 
 -- | No regs read or written to.
 noUsage :: RegUsage


=====================================
compiler/GHC/CmmToAsm/Reg/Linear.hs
=====================================
@@ -1,4 +1,5 @@
 {-# LANGUAGE BangPatterns, CPP, ScopedTypeVariables #-}
+{-# LANGUAGE ConstraintKinds #-}
 
 {-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
 
@@ -137,6 +138,7 @@ import GHC.Platform
 import Data.Maybe
 import Data.List
 import Control.Monad
+import Control.Applicative
 
 -- -----------------------------------------------------------------------------
 -- Top level of the register allocator
@@ -229,8 +231,12 @@ linearRegAlloc config entry_ids block_live sccs
   go f = linearRegAlloc' config f entry_ids block_live sccs
   platform = ncgPlatform config
 
+-- |
+type OutputableRegConstraint freeRegs instr =
+        (FR freeRegs, Outputable freeRegs, Outputable instr, Instruction instr)
+
 linearRegAlloc'
-        :: (FR freeRegs, Outputable instr, Instruction instr)
+        :: OutputableRegConstraint freeRegs instr
         => NCGConfig
         -> freeRegs
         -> [BlockId]                    -- ^ entry points
@@ -246,7 +252,7 @@ linearRegAlloc' config initFreeRegs entry_ids block_live sccs
         return  (blocks, stats, getStackUse stack)
 
 
-linearRA_SCCs :: (FR freeRegs, Instruction instr, Outputable instr)
+linearRA_SCCs :: OutputableRegConstraint freeRegs instr
               => [BlockId]
               -> BlockMap RegSet
               -> [NatBasicBlock instr]
@@ -281,7 +287,7 @@ linearRA_SCCs entry_ids block_live blocksAcc (CyclicSCC blocks : sccs)
    more sanity checking to guard against this eventuality.
 -}
 
-process :: (FR freeRegs, Instruction instr, Outputable instr)
+process :: OutputableRegConstraint freeRegs instr
         => [BlockId]
         -> BlockMap RegSet
         -> [GenBasicBlock (LiveInstr instr)]
@@ -325,15 +331,20 @@ process entry_ids block_live (b@(BasicBlock id _) : blocks)
 -- | Do register allocation on this basic block
 --
 processBlock
-        :: (FR freeRegs, Outputable instr, Instruction instr)
+        :: OutputableRegConstraint freeRegs instr
         => BlockMap RegSet              -- ^ live regs on entry to each basic block
         -> LiveBasicBlock instr         -- ^ block to do register allocation on
         -> RegM freeRegs [NatBasicBlock instr]   -- ^ block with registers allocated
 
 processBlock block_live (BasicBlock id instrs)
- = do   initBlock id block_live
+ = do   -- pprTraceM "processBlock" $ text "" $$ ppr (BasicBlock id instrs)
+        initBlock id block_live
+        assig <- getBlockAssigR
+
+        -- pprTraceM "assignment" $ ppr assig
         (instrs', fixups)
                 <- linearRA block_live [] [] id instrs
+        -- pprTraceM "blockResult" $ ppr (instrs', fixups)
         return  $ BasicBlock id instrs' : fixups
 
 
@@ -369,7 +380,7 @@ initBlock id block_live
 
 -- | Do allocation for a sequence of instructions.
 linearRA
-        :: (FR freeRegs, Outputable instr, Instruction instr)
+        :: OutputableRegConstraint freeRegs instr
         => BlockMap RegSet                      -- ^ map of what vregs are live on entry to each block.
         -> [instr]                              -- ^ accumulator for instructions already processed.
         -> [NatBasicBlock instr]                -- ^ accumulator for blocks of fixup code.
@@ -396,7 +407,7 @@ linearRA block_live accInstr accFixups id (instr:instrs)
 
 -- | Do allocation for a single instruction.
 raInsn
-        :: (FR freeRegs, Outputable instr, Instruction instr)
+        :: OutputableRegConstraint freeRegs instr
         => BlockMap RegSet                      -- ^ map of what vregs are love on entry to each block.
         -> [instr]                              -- ^ accumulator for instructions already processed.
         -> BlockId                              -- ^ the id of the current block, for debugging
@@ -476,7 +487,7 @@ isInReg src assig | Just (InReg _) <- lookupUFM assig src = True
                   | otherwise = False
 
 
-genRaInsn :: (FR freeRegs, Instruction instr, Outputable instr)
+genRaInsn :: OutputableRegConstraint freeRegs instr
           => BlockMap RegSet
           -> [instr]
           -> BlockId
@@ -486,7 +497,9 @@ genRaInsn :: (FR freeRegs, Instruction instr, Outputable instr)
           -> RegM freeRegs ([instr], [NatBasicBlock instr])
 
 genRaInsn block_live new_instrs block_id instr r_dying w_dying = do
+--   pprTraceM "genRaInsn" $ ppr (block_id, instr)
   platform <- getPlatform
+  block_assig <- getBlockAssigR
   case regUsageOfInstr platform instr of { RU read written ->
     do
     let real_written    = [ rr  | (RegReal     rr) <- written ]
@@ -525,10 +538,12 @@ genRaInsn block_live new_instrs block_id instr r_dying w_dying = do
     (fixup_blocks, adjusted_instr)
         <- joinToTargets block_live block_id instr
 
+--     when (not $ null fixup_blocks) $ pprTraceM "genRA:FixBlocks" $ ppr fixup_blocks
+
     -- Debugging - show places where the reg alloc inserted
     -- assignment fixup blocks.
-    -- when (not $ null fixup_blocks) $
-    --    pprTrace "fixup_blocks" (ppr fixup_blocks) (return ())
+--     when (not $ null fixup_blocks) $
+--        pprTrace "fixup_blocks" (ppr fixup_blocks) (return ())
 
     -- (e) Delete all register assignments for temps which are read
     --     (only) and die here.  Update the free register list.
@@ -737,7 +752,7 @@ data SpillLoc = ReadMem StackSlot  -- reading from register only in memory
 --   the list of free registers and free stack slots.
 
 allocateRegsAndSpill
-        :: (FR freeRegs, Outputable instr, Instruction instr)
+        :: forall freeRegs instr. (FR freeRegs, Outputable instr, Instruction instr)
         => Bool                 -- True <=> reading (load up spilled regs)
         -> [VirtualReg]         -- don't push these out
         -> [instr]              -- spill insns
@@ -749,7 +764,8 @@ allocateRegsAndSpill _       _    spills alloc []
         = return (spills, reverse alloc)
 
 allocateRegsAndSpill reading keep spills alloc (r:rs)
- = do   assig <- getAssigR
+ = do   assig <- getAssigR :: RegM freeRegs (RegMap Loc)
+        -- pprTraceM "allocateRegsAndSpill:assig" (ppr (r:rs) $$ ppr assig)
         let doSpill = allocRegsAndSpill_spill reading keep spills alloc r rs assig
         case lookupUFM assig r of
                 -- case (1a): already in a register
@@ -779,6 +795,15 @@ allocateRegsAndSpill reading keep spills alloc (r:rs)
 
                         | otherwise -> doSpill WriteNew
 
+findVirtRegAny :: forall freeRegs u. Uniquable u
+               => u -> RegM freeRegs (Maybe Loc)
+findVirtRegAny vreg = do
+  bassig <- getBlockAssigR :: RegM freeRegs (BlockMap (freeRegs,RegMap Loc))
+  return $ foldr (findVirtRegAssig) Nothing bassig
+  where
+    findVirtRegAssig :: (freeRegs,RegMap Loc) -> Maybe Loc -> Maybe Loc
+    findVirtRegAssig assig z =
+        lookupUFM (snd assig) vreg <|> z
 
 -- reading is redundant with reason, but we keep it around because it's
 -- convenient and it maintains the recursive structure of the allocator. -- EZY
@@ -795,13 +820,20 @@ allocRegsAndSpill_spill :: (FR freeRegs, Instruction instr, Outputable instr)
 allocRegsAndSpill_spill reading keep spills alloc r rs assig spill_loc
  = do   platform <- getPlatform
         freeRegs <- getFreeRegsR
-        let freeRegs_thisClass  = frGetFreeRegs platform (classOfVirtualReg r) freeRegs
+        let freeRegs_thisClass  = frGetFreeRegs platform (classOfVirtualReg r) freeRegs :: [RealReg]
 
         case freeRegs_thisClass of
 
          -- case (2): we have a free register
          (my_reg : _) ->
-           do   spills'   <- loadTemp r spill_loc my_reg spills
+           do   _r <- findVirtRegAny r
+                -- pprTraceM "free" $ ppr (_r, my_reg, freeRegs_thisClass)
+                my_reg <- return $ case _r of
+                                        Just (InReg my_reg')
+                                                | my_reg' `elem` freeRegs_thisClass -> my_reg'
+                                        _ -> my_reg :: RealReg
+                -- pprTraceM "findVirtRegAny" $ ppr (my_reg, _r)
+                spills'   <- loadTemp r spill_loc my_reg spills
 
                 setAssigR       (addToUFM assig r $! newLocation spill_loc my_reg)
                 setFreeRegsR $  frAllocateReg platform my_reg freeRegs


=====================================
compiler/GHC/CmmToAsm/Reg/Linear/Base.hs
=====================================
@@ -138,6 +138,9 @@ data RA_State freeRegs
         , ra_config     :: !NCGConfig
 
         -- | (from,fixup,to) : We inserted fixup code between from and to
-        , ra_fixups     :: [(BlockId,BlockId,BlockId)] }
+        , ra_fixups     :: [(BlockId,BlockId,BlockId)]
+
+        -- | Map virtual regs to regs they have been assigned in the past.
+        , ra_sugg_assig :: RegMap Loc  }
 
 


=====================================
compiler/GHC/CmmToAsm/Reg/Linear/PPC.hs
=====================================
@@ -1,3 +1,5 @@
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+
 -- | Free regs map for PowerPC
 module GHC.CmmToAsm.Reg.Linear.PPC where
 
@@ -27,6 +29,9 @@ import Data.Bits
 data FreeRegs = FreeRegs !Word32 !Word32
               deriving( Show )  -- The Show is used in an ASSERT
 
+instance Outputable FreeRegs where
+    ppr = text . show
+
 noFreeRegs :: FreeRegs
 noFreeRegs = FreeRegs 0 0
 


=====================================
compiler/GHC/CmmToAsm/Reg/Linear/SPARC.hs
=====================================
@@ -1,4 +1,5 @@
 {-# LANGUAGE CPP #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
 
 -- | Free regs map for SPARC
 module GHC.CmmToAsm.Reg.Linear.SPARC where
@@ -38,6 +39,9 @@ data FreeRegs
 instance Show FreeRegs where
         show = showFreeRegs
 
+instance Outputable FreeRegs where
+        ppr = text . showFreeRegs
+
 -- | A reg map where no regs are free to be allocated.
 noFreeRegs :: FreeRegs
 noFreeRegs = FreeRegs 0 0 0


=====================================
compiler/GHC/CmmToAsm/Reg/Linear/State.hs
=====================================
@@ -1,4 +1,5 @@
 {-# LANGUAGE CPP, PatternSynonyms, DeriveFunctor #-}
+{-# LANGUAGE ScopedTypeVariables #-}
 
 #if !defined(GHC_LOADED_INTO_GHCI)
 {-# LANGUAGE UnboxedTuples #-}
@@ -52,7 +53,9 @@ import GHC.Cmm.BlockId
 import GHC.Platform
 import GHC.Types.Unique
 import GHC.Types.Unique.Supply
+import GHC.Types.Unique.FM
 
+import Control.Applicative
 import Control.Monad (ap)
 
 -- Avoids using unboxed tuples when loading into GHCi
@@ -111,7 +114,8 @@ runR config block_assig freeregs assig stack us thing =
                 , ra_us         = us
                 , ra_spills     = []
                 , ra_config     = config
-                , ra_fixups     = [] })
+                , ra_fixups     = []
+                , ra_sugg_assig = assig })
    of
         RA_Result state returned_thing
          ->     (ra_blockassig state, ra_stack state, makeRAStats state, returned_thing)
@@ -157,6 +161,24 @@ setAssigR :: RegMap Loc -> RegM freeRegs ()
 setAssigR assig = RegM $ \ s ->
   RA_Result s{ra_assig=assig} ()
 
+findVirtRegAny :: forall freeRegs u. Uniquable u
+               => u -> RegM freeRegs (Maybe Loc)
+findVirtRegAny vreg = do
+  bassig <- getBlockAssigR :: RegM freeRegs (BlockMap (freeRegs,RegMap Loc))
+  return $ foldr (findVirtRegAssig) Nothing bassig
+  where
+    findVirtRegAssig :: (freeRegs,RegMap Loc) -> Maybe Loc -> Maybe Loc
+    findVirtRegAssig assig z =
+        lookupUFM (snd assig) vreg <|> z
+
+-- suggestAssig :: RegMap Loc -> RegM ()
+-- suggestAssig assig = RegM $ \ s ->
+--   RA_Result s{ra_sugg_assig=plusUFM (ra_sugg_assig s) assig} ()
+
+-- getSug :: Unique -> RegM Loc
+-- getSug = RegM $ \ s at RA_State{ra_sugg_assig = sugg_assig} ->
+--   RA_Result s sugg_assig
+
 getBlockAssigR :: RegM freeRegs (BlockAssignment freeRegs)
 getBlockAssigR = RegM $ \ s at RA_State{ra_blockassig = assig} ->
   RA_Result s assig


=====================================
compiler/GHC/CmmToAsm/Reg/Linear/X86.hs
=====================================
@@ -1,3 +1,4 @@
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
 
 -- | Free regs map for i386
 module GHC.CmmToAsm.Reg.Linear.X86 where
@@ -9,12 +10,13 @@ import GHC.Platform.Reg.Class
 import GHC.Platform.Reg
 import Panic
 import GHC.Platform
+import Outputable
 
 import Data.Word
 import Data.Bits
 
 newtype FreeRegs = FreeRegs Word32
-    deriving Show
+    deriving (Show,Outputable)
 
 noFreeRegs :: FreeRegs
 noFreeRegs = FreeRegs 0


=====================================
compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs
=====================================
@@ -1,3 +1,4 @@
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
 
 -- | Free regs map for x86_64
 module GHC.CmmToAsm.Reg.Linear.X86_64 where
@@ -9,12 +10,13 @@ import GHC.Platform.Reg.Class
 import GHC.Platform.Reg
 import Panic
 import GHC.Platform
+import Outputable
 
 import Data.Word
 import Data.Bits
 
 newtype FreeRegs = FreeRegs Word64
-    deriving Show
+    deriving (Show,Outputable)
 
 noFreeRegs :: FreeRegs
 noFreeRegs = FreeRegs 0


=====================================
compiler/utils/Outputable.hs
=====================================
@@ -847,6 +847,9 @@ instance Outputable Word16 where
 instance Outputable Word32 where
     ppr n = integer $ fromIntegral n
 
+instance Outputable Word64 where
+    ppr n = integer $ fromIntegral n
+
 instance Outputable Word where
     ppr n = integer $ fromIntegral n
 



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/a8e74bc95970f2c2f6b8c2d1821d11fefdb2eb37

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/a8e74bc95970f2c2f6b8c2d1821d11fefdb2eb37
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20200403/34f5e7d9/attachment-0001.html>


More information about the ghc-commits mailing list