[commit: ghc] master: Squash space leaks in the result of byteCodeGen (648fd73)

git at git.haskell.org git at git.haskell.org
Fri Jul 22 12:57:31 UTC 2016


Repository : ssh://git@git.haskell.org/ghc

On branch  : master
Link       : http://ghc.haskell.org/trac/ghc/changeset/648fd73a7b8fbb7955edc83330e2910428e76147/ghc

>---------------------------------------------------------------

commit 648fd73a7b8fbb7955edc83330e2910428e76147
Author: Simon Marlow <smarlow at fb.com>
Date:   Thu Jul 21 04:51:05 2016 -0700

    Squash space leaks in the result of byteCodeGen
    
    When loading a large number of modules into GHCi, we collect
    CompiledByteCode for every module and then link it all at the end.
    Space leaks in the CompiledByteCode linger until we traverse it all for
    linking, and possibly longer, if there are bits we don't look at.
    
    This is the nuke-it-from-orbit approach: we deepseq the whole thing
    after code generation. It's the only way to be sure.
    
    Test Plan:
    Heap profile of GHCi while loading nofib/real/anna into GHCi, this patch
    reduces the peak heap usage from ~100M to ~50M.
    
    Reviewers: hvr, austin, bgamari, erikd
    
    Subscribers: thomie
    
    Differential Revision: https://phabricator.haskell.org/D2419


>---------------------------------------------------------------

648fd73a7b8fbb7955edc83330e2910428e76147
 compiler/ghci/ByteCodeGen.hs   | 15 ++++++++-
 compiler/ghci/ByteCodeTypes.hs | 73 ++++++++++++++++++++++++++++++++----------
 2 files changed, 70 insertions(+), 18 deletions(-)

diff --git a/compiler/ghci/ByteCodeGen.hs b/compiler/ghci/ByteCodeGen.hs
index 9c7d25a..90e2174 100644
--- a/compiler/ghci/ByteCodeGen.hs
+++ b/compiler/ghci/ByteCodeGen.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE CPP, MagicHash, RecordWildCards #-}
+{-# OPTIONS_GHC -fprof-auto-top #-}
 --
 --  (c) The University of Glasgow 2002-2006
 --
@@ -57,6 +58,7 @@ import UniqSupply
 import Module
 import Control.Arrow ( second )
 
+import Control.Exception
 import Data.Array
 import Data.Map (Map)
 import Data.IntMap (IntMap)
@@ -93,10 +95,21 @@ byteCodeGen hsc_env this_mod binds tycs mb_modBreaks
         dumpIfSet_dyn dflags Opt_D_dump_BCOs
            "Proto-BCOs" (vcat (intersperse (char ' ') (map ppr proto_bcos)))
 
-        assembleBCOs hsc_env proto_bcos tycs
+        cbc <- assembleBCOs hsc_env proto_bcos tycs
           (case modBreaks of
              Nothing -> Nothing
              Just mb -> Just mb{ modBreaks_breakInfo = breakInfo })
+
+        -- Squash space leaks in the CompiledByteCode.  This is really
+        -- important, because when loading a set of modules into GHCi
+        -- we don't touch the CompiledByteCode until the end when we
+        -- do linking.  Forcing out the thunks here reduces space
+        -- usage by more than 50% when loading a large number of
+        -- modules.
+        evaluate (seqCompiledByteCode cbc)
+
+        return cbc
+
   where dflags = hsc_dflags hsc_env
 
 -- -----------------------------------------------------------------------------
diff --git a/compiler/ghci/ByteCodeTypes.hs b/compiler/ghci/ByteCodeTypes.hs
index 99e2ba2..3537a2b 100644
--- a/compiler/ghci/ByteCodeTypes.hs
+++ b/compiler/ghci/ByteCodeTypes.hs
@@ -1,11 +1,11 @@
-{-# LANGUAGE MagicHash, RecordWildCards #-}
+{-# LANGUAGE MagicHash, RecordWildCards, GeneralizedNewtypeDeriving #-}
 --
 --  (c) The University of Glasgow 2002-2006
 --
 
 -- | Bytecode assembler types
 module ByteCodeTypes
-  ( CompiledByteCode(..), FFIInfo(..)
+  ( CompiledByteCode(..), seqCompiledByteCode, FFIInfo(..)
   , UnlinkedBCO(..), BCOPtr(..), BCONPtr(..)
   , ItblEnv, ItblPtr(..)
   , CgBreakInfo(..)
@@ -26,6 +26,7 @@ import GHCi.BreakArray
 import GHCi.RemoteTypes
 import GHCi.FFI
 import GHCi.InfoTable
+import Control.DeepSeq
 
 import Foreign
 import Data.Array
@@ -48,38 +49,61 @@ data CompiledByteCode = CompiledByteCode
   }
                 -- ToDo: we're not tracking strings that we malloc'd
 newtype FFIInfo = FFIInfo (RemotePtr C_ffi_cif)
-  deriving Show
+  deriving (Show, NFData)
 
 instance Outputable CompiledByteCode where
   ppr CompiledByteCode{..} = ppr bc_bcos
 
+-- Not a real NFData instance, because ModBreaks contains some things
+-- we can't rnf
+seqCompiledByteCode :: CompiledByteCode -> ()
+seqCompiledByteCode CompiledByteCode{..} =
+  rnf bc_bcos `seq`
+  rnf (nameEnvElts bc_itbls) `seq`
+  rnf bc_ffis `seq`
+  rnf bc_strs `seq`
+  rnf (fmap seqModBreaks bc_breaks)
+
 type ItblEnv = NameEnv (Name, ItblPtr)
         -- We need the Name in the range so we know which
         -- elements to filter out when unloading a module
 
-newtype ItblPtr = ItblPtr (RemotePtr StgInfoTable) deriving Show
+newtype ItblPtr = ItblPtr (RemotePtr StgInfoTable)
+  deriving (Show, NFData)
 
 data UnlinkedBCO
    = UnlinkedBCO {
-        unlinkedBCOName   :: Name,
-        unlinkedBCOArity  :: Int,
-        unlinkedBCOInstrs :: UArray Int Word16,         -- insns
-        unlinkedBCOBitmap :: UArray Int Word,           -- bitmap
-        unlinkedBCOLits   :: (SizedSeq BCONPtr),        -- non-ptrs
-        unlinkedBCOPtrs   :: (SizedSeq BCOPtr)          -- ptrs
+        unlinkedBCOName   :: !Name,
+        unlinkedBCOArity  :: {-# UNPACK #-} !Int,
+        unlinkedBCOInstrs :: !(UArray Int Word16),      -- insns
+        unlinkedBCOBitmap :: !(UArray Int Word),        -- bitmap
+        unlinkedBCOLits   :: !(SizedSeq BCONPtr),       -- non-ptrs
+        unlinkedBCOPtrs   :: !(SizedSeq BCOPtr)         -- ptrs
    }
 
+instance NFData UnlinkedBCO where
+  rnf UnlinkedBCO{..} =
+    rnf unlinkedBCOLits `seq`
+    rnf unlinkedBCOPtrs
+
 data BCOPtr
-  = BCOPtrName   Name
-  | BCOPtrPrimOp PrimOp
-  | BCOPtrBCO    UnlinkedBCO
+  = BCOPtrName   !Name
+  | BCOPtrPrimOp !PrimOp
+  | BCOPtrBCO    !UnlinkedBCO
   | BCOPtrBreakArray  -- a pointer to this module's BreakArray
 
+instance NFData BCOPtr where
+  rnf (BCOPtrBCO bco) = rnf bco
+  rnf x = x `seq` ()
+
 data BCONPtr
-  = BCONPtrWord  Word
-  | BCONPtrLbl   FastString
-  | BCONPtrItbl  Name
-  | BCONPtrStr   ByteString
+  = BCONPtrWord  {-# UNPACK #-} !Word
+  | BCONPtrLbl   !FastString
+  | BCONPtrItbl  !Name
+  | BCONPtrStr   !ByteString
+
+instance NFData BCONPtr where
+  rnf x = x `seq` ()
 
 -- | Information about a breakpoint that we know at code-generation time
 data CgBreakInfo
@@ -88,6 +112,12 @@ data CgBreakInfo
    , cgb_resty  :: Type
    }
 
+-- Not a real NFData instance because we can't rnf Id or Type
+seqCgBreakInfo :: CgBreakInfo -> ()
+seqCgBreakInfo CgBreakInfo{..} =
+  rnf (map snd cgb_vars) `seq`
+  seqType cgb_resty
+
 instance Outputable UnlinkedBCO where
    ppr (UnlinkedBCO nm _arity _insns _bitmap lits ptrs)
       = sep [text "BCO", ppr nm, text "with",
@@ -126,6 +156,15 @@ data ModBreaks
         -- ^ info about each breakpoint from the bytecode generator
    }
 
+seqModBreaks :: ModBreaks -> ()
+seqModBreaks ModBreaks{..} =
+  rnf modBreaks_flags `seq`
+  rnf modBreaks_locs `seq`
+  rnf modBreaks_vars `seq`
+  rnf modBreaks_decls `seq`
+  rnf modBreaks_ccs `seq`
+  rnf (fmap seqCgBreakInfo modBreaks_breakInfo)
+
 -- | Construct an empty ModBreaks
 emptyModBreaks :: ModBreaks
 emptyModBreaks = ModBreaks



More information about the ghc-commits mailing list