[commit: ghc] master: CmmPipeline: add a second pass of CmmCommonBlockElim (d5c4d46)

git at git.haskell.org git at git.haskell.org
Tue Mar 27 13:53:41 UTC 2018


Repository : ssh://git@git.haskell.org/ghc

On branch  : master
Link       : http://ghc.haskell.org/trac/ghc/changeset/d5c4d46a62ce6a0cfa6440344f707136eff18119/ghc

>---------------------------------------------------------------

commit d5c4d46a62ce6a0cfa6440344f707136eff18119
Author: Michal Terepeta <michal.terepeta at gmail.com>
Date:   Mon Mar 26 15:18:44 2018 -0400

    CmmPipeline: add a second pass of CmmCommonBlockElim
    
    The sinking pass often gets rid of unnecessary registers
    registers/assignements exposing more opportunities for CBE, so this
    commit adds a second round of CBE after the sinking pass and should
    fix #12915 (and some examples in #14226).
    
    Nofib results:
    * Binary size:         0.9% reduction on average
    * Compile allocations: 0.7% increase on average
    * Runtime:             noisy, two separate runs of nofib showed a tiny
                           reduction on average, (~0.2-0.3%), but I think
                           this is mostly noise
    * Compile time:        very noisy, but generally within +/- 0.5% (one
                           run faster, one slower)
    
    One interesting part of this change is that running CBE invalidates
    results of proc-point analysis. But instead of re-doing the whole
    analysis, we can use the map that CBE creates for replacing/comparing
    block labels (maps a redundant label to a useful one) to update the
    results of proc-point analysis. This lowers the overhead compared to the
    previous experiment in #12915.
    
    Signed-off-by: Michal Terepeta <michal.terepeta at gmail.com>
    
    Test Plan: ./validate
    
    Reviewers: bgamari, simonmar
    
    Reviewed By: simonmar
    
    Subscribers: rwbarton, thomie, carter
    
    GHC Trac Issues: #12915, #14226
    
    Differential Revision: https://phabricator.haskell.org/D4417


>---------------------------------------------------------------

d5c4d46a62ce6a0cfa6440344f707136eff18119
 compiler/cmm/CmmCommonBlockElim.hs |  8 ++++----
 compiler/cmm/CmmPipeline.hs        | 42 +++++++++++++++++++++++++++++++++++---
 2 files changed, 43 insertions(+), 7 deletions(-)

diff --git a/compiler/cmm/CmmCommonBlockElim.hs b/compiler/cmm/CmmCommonBlockElim.hs
index c91d553..ad10511 100644
--- a/compiler/cmm/CmmCommonBlockElim.hs
+++ b/compiler/cmm/CmmCommonBlockElim.hs
@@ -59,9 +59,10 @@ import Data.List (foldl')
 -- hashes, and at most once otherwise. Previously, we were slower, and people
 -- rightfully complained: #10397
 
--- TODO: Use optimization fuel
-elimCommonBlocks :: CmmGraph -> CmmGraph
-elimCommonBlocks g = replaceLabels env $ copyTicks env g
+type Subst = LabelMap BlockId
+
+elimCommonBlocks :: CmmGraph -> (CmmGraph, Subst)
+elimCommonBlocks g = (replaceLabels env $ copyTicks env g, env)
   where
      env = iterate mapEmpty blocks_with_key
      -- The order of blocks doesn't matter here, but revPostorder also drops any
@@ -73,7 +74,6 @@ elimCommonBlocks g = replaceLabels env $ copyTicks env g
 -- (so avoid comparing them again)
 type DistinctBlocks = [CmmBlock]
 type Key = [Label]
-type Subst = LabelMap BlockId
 
 -- The outer list groups by hash. We retain this grouping throughout.
 iterate :: Subst -> [[(Key, DistinctBlocks)]] -> Subst
diff --git a/compiler/cmm/CmmPipeline.hs b/compiler/cmm/CmmPipeline.hs
index 4d109a4..babdb0b 100644
--- a/compiler/cmm/CmmPipeline.hs
+++ b/compiler/cmm/CmmPipeline.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE TypeFamilies #-}
 
 module CmmPipeline (
   -- | Converts C-- with an implicit stack and native C-- calls into
@@ -28,6 +29,8 @@ import Control.Monad
 import Outputable
 import Platform
 
+import Data.Maybe
+
 -----------------------------------------------------------------------------
 -- | Top level driver for C-- pipeline
 -----------------------------------------------------------------------------
@@ -67,9 +70,9 @@ cpsTop hsc_env proc =
                                           , do_layout = do_layout }} = h
 
        ----------- Eliminate common blocks -------------------------------------
-       g <- {-# SCC "elimCommonBlocks" #-}
-            condPass Opt_CmmElimCommonBlocks elimCommonBlocks g
-                          Opt_D_dump_cmm_cbe "Post common block elimination"
+       (g, _) <- {-# SCC "elimCommonBlocks" #-}
+                 condPass2 Opt_CmmElimCommonBlocks elimCommonBlocks g mapEmpty
+                           Opt_D_dump_cmm_cbe "Post common block elimination"
 
        -- Any work storing block Labels must be performed _after_
        -- elimCommonBlocks
@@ -104,6 +107,32 @@ cpsTop hsc_env proc =
             condPass Opt_CmmSink (cmmSink dflags) g
                      Opt_D_dump_cmm_sink "Sink assignments"
 
+       (g, call_pps, proc_points) <- do
+         -- Only do the second CBE if we did the sinking pass. Otherwise,
+         -- it's unlikely we'll have any new opportunities to find redundant
+         -- blocks.
+         if not (gopt Opt_CmmSink dflags)
+           then pure (g, call_pps, proc_points)
+           else do
+             (g, cbe_subst) <- {-# SCC "elimCommonBlocks2" #-}
+               condPass2
+                 Opt_CmmElimCommonBlocks elimCommonBlocks g mapEmpty
+                 Opt_D_dump_cmm_cbe "Post common block elimination 2"
+
+             -- CBE might invalidate the results of proc-point analysis (by
+             -- removing labels). So we need to fix it. Instead of re-doing
+             -- the whole analysis, we use the final substitution env from
+             -- CBE to update existing results.
+             let cbe_fix set bid =
+                   setInsert (fromMaybe bid (mapLookup bid cbe_subst)) set
+             let !new_call_pps = setFoldl cbe_fix setEmpty call_pps
+             let !new_proc_points
+                   | splitting_proc_points =
+                       setFoldl cbe_fix setEmpty proc_points
+                   | otherwise = new_call_pps
+
+             return (g, new_call_pps, new_proc_points)
+
        ------------- CAF analysis ----------------------------------------------
        let cafEnv = {-# SCC "cafAnal" #-} cafAnal g
        dumpWith dflags Opt_D_dump_cmm_caf "CAFEnv" (ppr cafEnv)
@@ -155,6 +184,13 @@ cpsTop hsc_env proc =
                     return g
                else return g
 
+        condPass2 flag pass g a dumpflag dumpname =
+            if gopt flag dflags
+               then do
+                    (g, a) <- return $ pass g
+                    dump dumpflag dumpname g
+                    return (g, a)
+               else return (g, a)
 
         -- we don't need to split proc points for the NCG, unless
         -- tablesNextToCode is off.  The latter is because we have no



More information about the ghc-commits mailing list