[commit: ghc] master: CmmSink: Use a IntSet instead of a list (4353756)

git at git.haskell.org git at git.haskell.org
Fri Nov 3 00:16:05 UTC 2017


Repository : ssh://git@git.haskell.org/ghc

On branch  : master
Link       : http://ghc.haskell.org/trac/ghc/changeset/43537568579a63cb6b8d70b4815d76c46bb9a692/ghc

>---------------------------------------------------------------

commit 43537568579a63cb6b8d70b4815d76c46bb9a692
Author: alexbiehl <alex.biehl at gmail.com>
Date:   Thu Nov 2 17:27:37 2017 -0400

    CmmSink: Use a IntSet instead of a list
    
    CmmProcs which have *lots* of local variables take a considerable
    amount of time in CmmSink. This was noticed by @tdammers in #7258
    while compiling files with large records (~200-400 fields).
    
    Before:
    
    ```
            Sun Oct 29 19:58 2017 Time and Allocation Profiling Report (Final)
    
               ghc-stage2 +RTS -p -RTS
    -B/Users/alexbiehl/git/ghc/inplace/lib /Users/alexbiehl/Downloads/W2.hs
    -fforce-recomp -O2
    
            total time  =       26.00 secs   (25996 ticks @ 1000 us, 1 processor)
            total alloc = 14,921,627,912 bytes  (excludes profiling overheads)
    
    COST CENTRE     MODULE      SRC %time %alloc
    
    sink            CmmPipeline
    compiler/cmm/CmmPipeline.hs:(104,13)-(105,59)        55.7   15.9
    SimplTopBinds   SimplCore   compiler/simplCore/SimplCore.hs:761:39-74 19.5   30.6
    FloatOutwards   SimplCore   compiler/simplCore/SimplCore.hs:471:40-66 4.2    9.0
    RegAlloc-linear AsmCodeGen compiler/nativeGen/AsmCodeGen.hs:(658,27)-(660,55)    4.0   11.1
    pprNativeCode   AsmCodeGen compiler/nativeGen/AsmCodeGen.hs:(529,37)-(530,65)    2.8    6.3
    NewStranal      SimplCore   compiler/simplCore/SimplCore.hs:480:40-63 1.6    3.7
    OccAnal         SimplCore compiler/simplCore/SimplCore.hs:(739,22)-(740,67)     1.5    3.5
    StgCmm          HscMain compiler/main/HscMain.hs:(1426,13)-(1427,62)          1.2    2.4
    regLiveness     AsmCodeGen compiler/nativeGen/AsmCodeGen.hs:(591,17)-(593,52)    1.2    1.9
    genMachCode     AsmCodeGen compiler/nativeGen/AsmCodeGen.hs:(580,17)-(582,62)    0.9    1.8
    NativeCodeGen   CodeOutput  compiler/main/CodeOutput.hs:171:18-78 0.9    2.1
    CoreTidy        HscMain     compiler/main/HscMain.hs:1253:27-67 0.8    1.9
    ```
    
    After:
    
    ```
            Sun Oct 29 19:18 2017 Time and Allocation Profiling Report (Final)
    
               ghc-stage2 +RTS -p -RTS
    -B/Users/alexbiehl/git/ghc/inplace/lib /Users/alexbiehl/Downloads/W2.hs
    -fforce-recomp -O2
    
            total time  =       13.31 secs   (13307 ticks @ 1000 us, 1 processor)
            total alloc = 15,772,184,488 bytes  (excludes profiling overheads)
    
    COST CENTRE     MODULE         SRC %time %alloc
    
    SimplTopBinds   SimplCore
    compiler/simplCore/SimplCore.hs:761:39-74            38.3   29.0
    sink            CmmPipeline compiler/cmm/CmmPipeline.hs:(104,13)-(105,59)        13.2   20.3
    RegAlloc-linear AsmCodeGen compiler/nativeGen/AsmCodeGen.hs:(658,27)-(660,55)    8.3   10.5
    FloatOutwards   SimplCore compiler/simplCore/SimplCore.hs:471:40-66             8.1    8.5
    pprNativeCode   AsmCodeGen compiler/nativeGen/AsmCodeGen.hs:(529,37)-(530,65)    5.4    5.9
    NewStranal      SimplCore compiler/simplCore/SimplCore.hs:480:40-63             3.1    3.5
    OccAnal         SimplCore compiler/simplCore/SimplCore.hs:(739,22)-(740,67)     2.9    3.3
    StgCmm          HscMain compiler/main/HscMain.hs:(1426,13)-(1427,62)          2.3    2.3
    regLiveness     AsmCodeGen compiler/nativeGen/AsmCodeGen.hs:(591,17)-(593,52)    2.1    1.8
    NativeCodeGen   CodeOutput     compiler/main/CodeOutput.hs:171:18-78 1.7    2.0
    genMachCode     AsmCodeGen compiler/nativeGen/AsmCodeGen.hs:(580,17)-(582,62)    1.6    1.7
    CoreTidy        HscMain        compiler/main/HscMain.hs:1253:27-67 1.4    1.8
    foldNodesBwdOO  Hoopl.Dataflow compiler/cmm/Hoopl/Dataflow.hs:(397,1)-(403,17)       1.1    0.8
    ```
    
    Reviewers: austin, bgamari, simonmar
    
    Reviewed By: bgamari
    
    Subscribers: duog, rwbarton, thomie, tdammers
    
    GHC Trac Issues: #7258
    
    Differential Revision: https://phabricator.haskell.org/D4145


>---------------------------------------------------------------

43537568579a63cb6b8d70b4815d76c46bb9a692
 compiler/cmm/CmmSink.hs | 32 +++++++++++++++++++++++++-------
 1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/compiler/cmm/CmmSink.hs b/compiler/cmm/CmmSink.hs
index a674e54..3633ed3 100644
--- a/compiler/cmm/CmmSink.hs
+++ b/compiler/cmm/CmmSink.hs
@@ -17,13 +17,31 @@ import CodeGen.Platform
 import Platform (isARM, platformArch)
 
 import DynFlags
+import Unique
 import UniqFM
 import PprCmm ()
 
+import qualified Data.IntSet as IntSet
 import Data.List (partition)
 import qualified Data.Set as Set
 import Data.Maybe
 
+-- Compact sets for membership tests of local variables.
+
+type LRegSet = IntSet.IntSet
+
+emptyLRegSet :: LRegSet
+emptyLRegSet = IntSet.empty
+
+nullLRegSet :: LRegSet -> Bool
+nullLRegSet = IntSet.null
+
+insertLRegSet :: LocalReg -> LRegSet -> LRegSet
+insertLRegSet l = IntSet.insert (getKey (getUnique l))
+
+elemLRegSet :: LocalReg -> LRegSet -> Bool
+elemLRegSet l = IntSet.member (getKey (getUnique l))
+
 -- -----------------------------------------------------------------------------
 -- Sinking and inlining
 
@@ -399,7 +417,7 @@ tryToInline
       , Assignments             -- Remaining assignments
       )
 
-tryToInline dflags live node assigs = go usages node [] assigs
+tryToInline dflags live node assigs = go usages node emptyLRegSet assigs
  where
   usages :: UniqFM Int -- Maps each LocalReg to a count of how often it is used
   usages = foldLocalRegsUsed dflags addUsage emptyUFM node
@@ -422,7 +440,7 @@ tryToInline dflags live node assigs = go usages node [] assigs
         inline_and_keep    = keep inl_node -- inline the assignment, keep it
 
         keep node' = (final_node, a : rest')
-          where (final_node, rest') = go usages' node' (l:skipped) rest
+          where (final_node, rest') = go usages' node' (insertLRegSet l skipped) rest
                 usages' = foldLocalRegsUsed dflags (\m r -> addToUFM m r 2)
                                             usages rhs
                 -- we must not inline anything that is mentioned in the RHS
@@ -430,7 +448,7 @@ tryToInline dflags live node assigs = go usages node [] assigs
                 -- usages of the regs on the RHS to 2.
 
         cannot_inline = skipped `regsUsedIn` rhs -- Note [dependent assignments]
-                        || l `elem` skipped
+                        || l `elemLRegSet` skipped
                         || not (okToInline dflags rhs node)
 
         l_usages = lookupUFM usages l
@@ -521,11 +539,11 @@ And we do that right here in tryToInline, just as we do cmmMachOpFold.
 addUsage :: UniqFM Int -> LocalReg -> UniqFM Int
 addUsage m r = addToUFM_C (+) m r 1
 
-regsUsedIn :: [LocalReg] -> CmmExpr -> Bool
-regsUsedIn [] _ = False
+regsUsedIn :: LRegSet -> CmmExpr -> Bool
+regsUsedIn ls _ | nullLRegSet ls = False
 regsUsedIn ls e = wrapRecExpf f e False
-  where f (CmmReg (CmmLocal l))      _ | l `elem` ls = True
-        f (CmmRegOff (CmmLocal l) _) _ | l `elem` ls = True
+  where f (CmmReg (CmmLocal l))      _ | l `elemLRegSet` ls = True
+        f (CmmRegOff (CmmLocal l) _) _ | l `elemLRegSet` ls = True
         f _ z = z
 
 -- we don't inline into CmmUnsafeForeignCall if the expression refers



More information about the ghc-commits mailing list