[commit: ghc] ghc-7.10: Greatly speed up nativeCodeGen/seqBlocks (009e285)

git at git.haskell.org git at git.haskell.org
Mon May 18 13:37:46 UTC 2015


Repository : ssh://git@git.haskell.org/ghc

On branch  : ghc-7.10
Link       : http://ghc.haskell.org/trac/ghc/changeset/009e28520c045a5d0479af5f5c27bca4736e88ae/ghc

>---------------------------------------------------------------

commit 009e28520c045a5d0479af5f5c27bca4736e88ae
Author: Joachim Breitner <mail at joachim-breitner.de>
Date:   Sat May 16 17:47:31 2015 +0200

    Greatly speed up nativeCodeGen/seqBlocks
    
    When working on #10397, I noticed that "reorder" in
    nativeCodeGen/seqBlocks took more than 60% of the time. With this
    refactoring, it does not even show up in the profile any more. This
    fixes #10422.
    
    Differential Revision: https://phabricator.haskell.org/D893
    
    (cherry picked from commit 8e4dc8fb63b8d3bfee485c1c830776f3ed704f4d)


>---------------------------------------------------------------

009e28520c045a5d0479af5f5c27bca4736e88ae
 compiler/nativeGen/AsmCodeGen.hs    | 53 ++++++++++++++++++++++++-------------
 testsuite/tests/perf/compiler/all.T |  4 ++-
 2 files changed, 38 insertions(+), 19 deletions(-)

diff --git a/compiler/nativeGen/AsmCodeGen.hs b/compiler/nativeGen/AsmCodeGen.hs
index 4080398..9c57e76 100644
--- a/compiler/nativeGen/AsmCodeGen.hs
+++ b/compiler/nativeGen/AsmCodeGen.hs
@@ -63,6 +63,7 @@ import UniqFM
 import UniqSupply
 import DynFlags
 import Util
+import Unique
 
 import BasicTypes       ( Alignment )
 import Digraph
@@ -779,25 +780,41 @@ mkNode block@(BasicBlock id instrs) = (block, id, getOutEdges instrs)
 
 seqBlocks :: BlockEnv i -> [(GenBasicBlock t1, BlockId, [BlockId])]
                         -> [GenBasicBlock t1]
-seqBlocks _ [] = []
-seqBlocks infos ((block,_,[]) : rest)
-  = block : seqBlocks infos rest
-seqBlocks infos ((block@(BasicBlock id instrs),_,[next]) : rest)
-  | can_fallthrough = BasicBlock id (init instrs) : seqBlocks infos rest'
-  | otherwise       = block : seqBlocks infos rest'
+seqBlocks infos blocks = placeNext pullable0 todo0
   where
-        can_fallthrough = not (mapMember next infos) && can_reorder
-        (can_reorder, rest') = reorder next [] rest
-          -- TODO: we should do a better job for cycles; try to maximise the
-          -- fallthroughs within a loop.
-seqBlocks _ _ = panic "AsmCodegen:seqBlocks"
-
-reorder :: (Eq a) => a -> [(t, a, t1)] -> [(t, a, t1)] -> (Bool, [(t, a, t1)])
-reorder  _ accum [] = (False, reverse accum)
-reorder id accum (b@(block,id',out) : rest)
-  | id == id'  = (True, (block,id,out) : reverse accum ++ rest)
-  | otherwise  = reorder id (b:accum) rest
-
+    -- pullable: Blocks that are not yet placed
+    -- todo:     Original order of blocks, to be followed if we have no good
+    --           reason not to;
+    --           may include blocks that have already been placed, but then
+    --           these are not in pullable
+    pullable0 = listToUFM [ (i,(b,n)) | (b,i,n) <- blocks ]
+    todo0     = [i | (_,i,_) <- blocks ]
+
+    placeNext _ [] = []
+    placeNext pullable (i:rest)
+        | Just (block, pullable') <- lookupDeleteUFM pullable i
+        = place pullable' rest block
+        | otherwise
+        -- We already placed this block, so ignore
+        = placeNext pullable rest
+
+    place pullable todo (block,[])
+                          = block : placeNext pullable todo
+    place pullable todo (block@(BasicBlock id instrs),[next])
+        | mapMember next infos
+        = block : placeNext pullable todo
+        | Just (nextBlock, pullable') <- lookupDeleteUFM pullable next
+        = BasicBlock id (init instrs) : place pullable' todo nextBlock
+        | otherwise
+        = block : placeNext pullable todo
+    place _ _ (_,tooManyNextNodes)
+        = pprPanic "seqBlocks" (ppr tooManyNextNodes)
+
+
+lookupDeleteUFM :: Uniquable key => UniqFM elt -> key -> Maybe (elt, UniqFM elt)
+lookupDeleteUFM m k = do -- Maybe monad
+    v <- lookupUFM m k
+    return (v, delFromUFM m k)
 
 -- -----------------------------------------------------------------------------
 -- Generate jump tables
diff --git a/testsuite/tests/perf/compiler/all.T b/testsuite/tests/perf/compiler/all.T
index 31b0a5a..e6d31c4 100644
--- a/testsuite/tests/perf/compiler/all.T
+++ b/testsuite/tests/perf/compiler/all.T
@@ -379,7 +379,7 @@ test('T783',
             # 2014-04-04: 319179104 (x86 Windows, 64 bit machine)
             # 2014-09-03: 223377364 (Windows, better specialisation, raft of core-to-core optimisations)
 
-           (wordsize(64), 441932632, 10)]),
+           (wordsize(64), 452933048, 10)]),
             # prev:       349263216 (amd64/Linux)
             # 07/08/2012: 384479856 (amd64/Linux)
             # 29/08/2012: 436927840 (amd64/Linux)
@@ -394,6 +394,8 @@ test('T783',
             #   (general round of updates)
             # 2014-08-29: 441932632  (amd64/Linux)
             #   (better specialisation, raft of core-to-core optimisations)
+            # 2015-05-16: 452933048  (amd64/Linux)
+	    #   (improved sequenceBlocks in nativeCodeGen, #10422)
       extra_hc_opts('-static')
       ],
       compile,[''])



More information about the ghc-commits mailing list