[Git][ghc/ghc][wip/T17609] 2 commits: nativeGen: Deduplicate DWARF strings

Ben Gamari gitlab at gitlab.haskell.org
Mon Nov 2 06:08:16 UTC 2020



Ben Gamari pushed to branch wip/T17609 at Glasgow Haskell Compiler / GHC


Commits:
e2760490 by Ben Gamari at 2020-11-02T01:08:08-05:00
nativeGen: Deduplicate DWARF strings

As noted in #17609, we previously made no attempt to deduplicate
strings. This resulted in unnecessarily long compile times and large
object files. Fix this.

Fixes #17609.

- - - - -
b037879d by Ben Gamari at 2020-11-02T01:08:08-05:00
Add Note cross-reference for unique tag allocations

- - - - -


5 changed files:

- compiler/GHC/Builtin/Uniques.hs
- compiler/GHC/CmmToAsm/Dwarf.hs
- compiler/GHC/CmmToAsm/Dwarf/Constants.hs
- compiler/GHC/CmmToAsm/Dwarf/Types.hs
- compiler/GHC/Types/Unique.hs


Changes:

=====================================
compiler/GHC/Builtin/Uniques.hs
=====================================
@@ -296,6 +296,9 @@ getTupleDataConName boxity n =
 *                                                                      *
 ************************************************************************
 
+Note [Unique tag allocation]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
 Allocation of unique supply characters:
         v,t,u : for renumbering value-, type- and usage- vars.
         B:   builtin


=====================================
compiler/GHC/CmmToAsm/Dwarf.hs
=====================================
@@ -12,6 +12,7 @@ import GHC.Cmm.DebugBlock
 import GHC.Unit.Module
 import GHC.Utils.Outputable
 import GHC.Platform
+import GHC.Types.SrcLoc
 import GHC.Types.Unique
 import GHC.Types.Unique.Supply
 
@@ -46,11 +47,12 @@ dwarfGen config modLoc us blocks = do
   compPath <- getCurrentDirectory
   let lowLabel = dblCLabel $ head procs
       highLabel = mkAsmTempEndLabel $ dblCLabel $ last procs
+      producer = dwarfStringFromString $ cProjectName ++ " " ++ cProjectVersion
       dwarfUnit = DwarfCompileUnit
         { dwChildren = map (procToDwarf config) (map stripBlocks procs)
-        , dwName = fromMaybe "" (ml_hs_file modLoc)
-        , dwCompDir = addTrailingPathSeparator compPath
-        , dwProducer = cProjectName ++ " " ++ cProjectVersion
+        , dwName = dwarfStringFromString $ fromMaybe "" (ml_hs_file modLoc)
+        , dwCompDir = dwarfStringFromString $ addTrailingPathSeparator compPath
+        , dwProducer = producer
         , dwLowLabel = lowLabel
         , dwHighLabel = highLabel
         , dwLineLabel = dwarfLineLabel
@@ -76,6 +78,9 @@ dwarfGen config modLoc us blocks = do
                      , compileUnitFooter platform unitU
                      ]
 
+  -- .debug_str section: Strings
+  let stringsSct = dwarfStringsSection platform (dwarfInfoStrings dwarfUnit)
+
   -- .debug_line section: Generated mainly by the assembler, but we
   -- need to label it
   let lineSct = dwarfLineSection platform $$
@@ -92,7 +97,7 @@ dwarfGen config modLoc us blocks = do
                | otherwise               = [DwarfARange lowLabel highLabel]
   let aranges = dwarfARangesSection platform $$ pprDwarfARanges platform aranges' unitU
 
-  return (infoSct $$ abbrevSct $$ lineSct $$ frameSct $$ aranges, us'')
+  return (infoSct $$ stringsSct $$ abbrevSct $$ lineSct $$ frameSct $$ aranges, us'')
 
 -- | Build an address range entry for one proc.
 -- With split sections, each proc needs its own entry, since they may get
@@ -177,7 +182,7 @@ parent, B.
 procToDwarf :: NCGConfig -> DebugBlock -> DwarfInfo
 procToDwarf config prc
   = DwarfSubprogram { dwChildren = map blockToDwarf (dblBlocks prc)
-                    , dwName     = case dblSourceTick prc of
+                    , dwName     = dwarfStringFromString $ case dblSourceTick prc of
                          Just s at SourceNote{} -> sourceName s
                          _otherwise -> show (dblLabel prc)
                     , dwLabel    = dblCLabel prc
@@ -208,7 +213,13 @@ blockToDwarf blk
       | otherwise                 = Nothing   -- block was optimized out
 
 tickToDwarf :: Tickish () -> [DwarfInfo]
-tickToDwarf  (SourceNote ss _) = [DwarfSrcNote ss]
+tickToDwarf  (SourceNote ss _) =
+  [DwarfSrcNote { dwSpanFile = dwarfStringFromFastString (srcSpanFile ss)
+                , dwSpanStartLine = srcSpanStartLine ss
+                , dwSpanStartCol  = srcSpanStartCol  ss
+                , dwSpanEndLine = srcSpanEndLine ss
+                , dwSpanEndCol  = srcSpanEndCol  ss
+                }]
 tickToDwarf _ = []
 
 -- | Generates the data for the debug frame section, which encodes the


=====================================
compiler/GHC/CmmToAsm/Dwarf/Constants.hs
=====================================
@@ -86,12 +86,14 @@ dW_CHILDREN_no, dW_CHILDREN_yes :: Word8
 dW_CHILDREN_no  = 0
 dW_CHILDREN_yes = 1
 
-dW_FORM_addr, dW_FORM_data2, dW_FORM_data4, dW_FORM_string, dW_FORM_flag,
+dW_FORM_addr, dW_FORM_data2, dW_FORM_data4,
+  dW_FORM_strp,dW_FORM_string, dW_FORM_flag,
   dW_FORM_block1, dW_FORM_ref4, dW_FORM_ref_addr, dW_FORM_flag_present :: Word
 dW_FORM_addr   = 0x01
 dW_FORM_data2  = 0x05
 dW_FORM_data4  = 0x06
 dW_FORM_string = 0x08
+dW_FORM_strp   = 0x0e
 dW_FORM_flag   = 0x0c
 dW_FORM_block1 = 0x0a
 dW_FORM_ref_addr     = 0x10
@@ -145,11 +147,13 @@ dW_OP_call_frame_cfa = 0x9c
 
 -- * Dwarf section declarations
 dwarfInfoSection, dwarfAbbrevSection, dwarfLineSection,
-  dwarfFrameSection, dwarfGhcSection, dwarfARangesSection :: Platform -> SDoc
+  dwarfFrameSection, dwarfStringSection,
+  dwarfGhcSection, dwarfARangesSection :: Platform -> SDoc
 dwarfInfoSection    platform = dwarfSection platform "info"
 dwarfAbbrevSection  platform = dwarfSection platform "abbrev"
 dwarfLineSection    platform = dwarfSection platform "line"
 dwarfFrameSection   platform = dwarfSection platform "frame"
+dwarfStringSection  platform = dwarfSection platform "str"
 dwarfGhcSection     platform = dwarfSection platform "ghc"
 dwarfARangesSection platform = dwarfSection platform "aranges"
 
@@ -165,11 +169,13 @@ dwarfSection platform name =
        -> text "\t.section .debug_" <> text name <> text ",\"dr\""
 
 -- * Dwarf section labels
-dwarfInfoLabel, dwarfAbbrevLabel, dwarfLineLabel, dwarfFrameLabel :: PtrString
+dwarfInfoLabel, dwarfAbbrevLabel, dwarfLineLabel, dwarfFrameLabel,
+  dwarfStringLabel :: PtrString
 dwarfInfoLabel   = sLit ".Lsection_info"
 dwarfAbbrevLabel = sLit ".Lsection_abbrev"
 dwarfLineLabel   = sLit ".Lsection_line"
 dwarfFrameLabel  = sLit ".Lsection_frame"
+dwarfStringLabel = sLit ".Lsection_str"
 
 -- | Mapping of registers to DWARF register numbers
 dwarfRegNo :: Platform -> Reg -> Word8


=====================================
compiler/GHC/CmmToAsm/Dwarf/Types.hs
=====================================
@@ -2,12 +2,19 @@
 {-# LANGUAGE FlexibleInstances #-}
 {-# LANGUAGE FlexibleContexts #-}
 {-# LANGUAGE UndecidableInstances #-}
+{-# LANGUAGE RecordWildCards #-}
 
 module GHC.CmmToAsm.Dwarf.Types
   ( -- * Dwarf information
     DwarfInfo(..)
   , pprDwarfInfo
   , pprAbbrevDecls
+  , dwarfInfoStrings
+    -- * Dwarf Strings section
+  , DwarfString
+  , dwarfStringsSection
+  , dwarfStringFromString
+  , dwarfStringFromFastString
     -- * Dwarf address range table
   , DwarfARange(..)
   , pprDwarfARanges
@@ -32,18 +39,15 @@ import GHC.Prelude
 import GHC.Cmm.DebugBlock
 import GHC.Cmm.CLabel
 import GHC.Cmm.Expr         ( GlobalReg(..) )
-import GHC.Utils.Encoding
 import GHC.Data.FastString
 import GHC.Utils.Outputable
 import GHC.Platform
 import GHC.Types.Unique
+import GHC.Types.Unique.Set
 import GHC.Platform.Reg
-import GHC.Types.SrcLoc
-import GHC.Utils.Misc
 
 import GHC.CmmToAsm.Dwarf.Constants
 
-import qualified Data.ByteString as BS
 import qualified Control.Monad.Trans.State.Strict as S
 import Control.Monad (zipWithM, join)
 import Data.Bits
@@ -53,18 +57,49 @@ import Data.Char
 
 import GHC.Platform.Regs
 
+-- | A string in the DWARF @.debug_str@ section.
+newtype DwarfString = DwarfString FastString
+
+instance Uniquable DwarfString where
+  getUnique (DwarfString fs) = getUnique fs
+
+dwarfStringFromString :: String -> DwarfString
+dwarfStringFromString = dwarfStringFromFastString . fsLit
+
+dwarfStringFromFastString :: FastString -> DwarfString
+dwarfStringFromFastString = DwarfString
+
+dwarfStringSymbol :: DwarfString -> SDoc
+dwarfStringSymbol (DwarfString fs) =
+    text "_dbgfs_" <> ppr (getKey $ getUnique fs)
+
+pprDwarfString :: Platform -> DwarfString -> SDoc
+pprDwarfString plat s =
+    sectionOffset plat (dwarfStringSymbol s) (ptext dwarfStringLabel)
+
+dwarfStringsSection :: Platform -> UniqSet DwarfString -> SDoc
+dwarfStringsSection platform xs = vcat
+    [ ptext dwarfStringLabel <> colon
+    , dwarfStringSection platform
+    , vcat (map string $ nonDetEltsUniqSet xs)
+    ]
+  where
+    string :: DwarfString -> SDoc
+    string dstr@(DwarfString fstr) =
+      dwarfStringSymbol dstr <> colon $$ pprFastString fstr
+
 -- | Individual dwarf records. Each one will be encoded as an entry in
 -- the @.debug_info@ section.
 data DwarfInfo
   = DwarfCompileUnit { dwChildren :: [DwarfInfo]
-                     , dwName :: String
-                     , dwProducer :: String
-                     , dwCompDir :: String
+                     , dwName :: DwarfString
+                     , dwProducer :: DwarfString
+                     , dwCompDir :: DwarfString
                      , dwLowLabel :: CLabel
                      , dwHighLabel :: CLabel
                      , dwLineLabel :: PtrString }
   | DwarfSubprogram { dwChildren :: [DwarfInfo]
-                    , dwName :: String
+                    , dwName :: DwarfString
                     , dwLabel :: CLabel
                     , dwParent :: Maybe CLabel
                       -- ^ label of DIE belonging to the parent tick
@@ -73,9 +108,23 @@ data DwarfInfo
                , dwLabel :: CLabel
                , dwMarker :: Maybe CLabel
                }
-  | DwarfSrcNote { dwSrcSpan :: RealSrcSpan
+  | DwarfSrcNote { dwSpanFile      :: !DwarfString
+                 , dwSpanStartLine :: !Int
+                 , dwSpanStartCol  :: !Int
+                 , dwSpanEndLine   :: !Int
+                 , dwSpanEndCol    :: !Int
                  }
 
+-- | 'DwarfStrings' mentioned by the given 'DwarfInfo'.
+dwarfInfoStrings :: DwarfInfo -> UniqSet DwarfString
+dwarfInfoStrings dwinfo =
+  case dwinfo of
+    DwarfCompileUnit {..} -> mkUniqSet [dwName, dwProducer, dwCompDir] `unionUniqSets` foldMap dwarfInfoStrings dwChildren
+    DwarfSubprogram {..} -> unitUniqSet dwName `unionUniqSets` foldMap dwarfInfoStrings dwChildren
+    DwarfBlock {..} -> foldMap dwarfInfoStrings dwChildren
+    DwarfSrcNote {..} -> unitUniqSet dwSpanFile
+
+
 -- | Abbreviation codes used for encoding above records in the
 -- @.debug_info@ section.
 data DwarfAbbrev
@@ -138,7 +187,7 @@ pprAbbrevDecls platform haveDebugLine =
        , (dW_AT_high_pc, dW_FORM_addr)
        ] $$
      mkAbbrev DwAbbrGhcSrcNote dW_TAG_ghc_src_note dW_CHILDREN_no
-       [ (dW_AT_ghc_span_file, dW_FORM_string)
+       [ (dW_AT_ghc_span_file, dW_FORM_strp)
        , (dW_AT_ghc_span_start_line, dW_FORM_data4)
        , (dW_AT_ghc_span_start_col, dW_FORM_data2)
        , (dW_AT_ghc_span_end_line, dW_FORM_data4)
@@ -174,10 +223,10 @@ pprDwarfInfoOpen :: Platform -> Bool -> DwarfInfo -> SDoc
 pprDwarfInfoOpen platform haveSrc (DwarfCompileUnit _ name producer compDir lowLabel
                                            highLabel lineLbl) =
   pprAbbrev DwAbbrCompileUnit
-  $$ pprString name
-  $$ pprString producer
+  $$ pprDwarfString platform name
+  $$ pprDwarfString platform producer
   $$ pprData4 dW_LANG_Haskell
-  $$ pprString compDir
+  $$ pprDwarfString platform compDir
   $$ pprWord platform (pdoc platform lowLabel)
   $$ pprWord platform (pdoc platform highLabel)
   $$ if haveSrc
@@ -186,7 +235,7 @@ pprDwarfInfoOpen platform haveSrc (DwarfCompileUnit _ name producer compDir lowL
 pprDwarfInfoOpen platform _ (DwarfSubprogram _ name label parent) =
   pdoc platform (mkAsmTempDieLabel label) <> colon
   $$ pprAbbrev abbrev
-  $$ pprString name
+  $$ pprDwarfString platform name
   $$ pprLabelString platform label
   $$ pprFlag (externallyVisibleCLabel label)
   $$ pprWord platform (pdoc platform label)
@@ -209,13 +258,13 @@ pprDwarfInfoOpen platform _ (DwarfBlock _ label (Just marker)) =
   $$ pprLabelString platform label
   $$ pprWord platform (pdoc platform marker)
   $$ pprWord platform (pdoc platform $ mkAsmTempEndLabel marker)
-pprDwarfInfoOpen _ _ (DwarfSrcNote ss) =
+pprDwarfInfoOpen platform _ (DwarfSrcNote {..}) =
   pprAbbrev DwAbbrGhcSrcNote
-  $$ pprString' (ftext $ srcSpanFile ss)
-  $$ pprData4 (fromIntegral $ srcSpanStartLine ss)
-  $$ pprHalf (fromIntegral $ srcSpanStartCol ss)
-  $$ pprData4 (fromIntegral $ srcSpanEndLine ss)
-  $$ pprHalf (fromIntegral $ srcSpanEndCol ss)
+  $$ pprDwarfString platform dwSpanFile
+  $$ pprData4 (fromIntegral dwSpanStartLine)
+  $$ pprHalf (fromIntegral dwSpanStartCol)
+  $$ pprData4 (fromIntegral dwSpanEndLine)
+  $$ pprHalf (fromIntegral dwSpanEndCol)
 
 -- | Close a DWARF info record with children
 pprDwarfInfoClose :: SDoc
@@ -584,12 +633,8 @@ pprString' :: SDoc -> SDoc
 pprString' str = text "\t.asciz \"" <> str <> char '"'
 
 -- | Generate a string constant. We take care to escape the string.
-pprString :: String -> SDoc
-pprString str
-  = pprString' $ hcat $ map escapeChar $
-    if str `lengthIs` utf8EncodedLength str
-    then str
-    else map (chr . fromIntegral) $ BS.unpack $ bytesFS $ mkFastString str
+pprFastString :: FastString -> SDoc
+pprFastString = pprString' . hcat . map escapeChar . unpackFS
 
 -- | Escape a single non-unicode character
 escapeChar :: Char -> SDoc


=====================================
compiler/GHC/Types/Unique.hs
=====================================
@@ -68,8 +68,11 @@ import Data.Bits
 *                                                                      *
 ************************************************************************
 
-The @Chars@ are ``tag letters'' that identify the @UniqueSupply at .
-Fast comparison is everything on @Uniques@:
+The @Chars@ are ``tag letters'' that identify the @UniqueSupply at . The
+allocation of these is documented in Note [Unique tag allocation] in
+GHC.Builtin.Uniques.
+
+Fast comparison is everything on @Uniques at .
 -}
 
 -- | Unique identifier.



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/a8792d493d30978d83a91456692dca34fbafd474...b037879d83f6751e5b4a48ec14952217626385af

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/a8792d493d30978d83a91456692dca34fbafd474...b037879d83f6751e5b4a48ec14952217626385af
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20201102/85d33509/attachment-0001.html>


More information about the ghc-commits mailing list