[Git][ghc/ghc][wip/T17609] nativeGen: Deduplicate DWARF strings

Ben Gamari gitlab at gitlab.haskell.org
Wed Apr 29 16:48:03 UTC 2020



Ben Gamari pushed to branch wip/T17609 at Glasgow Haskell Compiler / GHC


Commits:
66e11eff by Ben Gamari at 2020-04-29T12:47:50-04:00
nativeGen: Deduplicate DWARF strings

As noted in #17609, we previously made no attempt to deduplicate
strings. This resulted in unnecessarily long compile times and large
object files. Fix this.

Fixes #17609.

- - - - -


2 changed files:

- compiler/GHC/CmmToAsm/Dwarf.hs
- compiler/GHC/CmmToAsm/Dwarf/Types.hs


Changes:

=====================================
compiler/GHC/CmmToAsm/Dwarf.hs
=====================================
@@ -47,11 +47,12 @@ dwarfGen df modLoc us blocks = do
   compPath <- getCurrentDirectory
   let lowLabel = dblCLabel $ head procs
       highLabel = mkAsmTempEndLabel $ dblCLabel $ last procs
+      producer = dwarfStringFromString $ cProjectName ++ " " ++ cProjectVersion
       dwarfUnit = DwarfCompileUnit
         { dwChildren = map (procToDwarf df) (map stripBlocks procs)
         , dwName = fromMaybe "" (ml_hs_file modLoc)
-        , dwCompDir = addTrailingPathSeparator compPath
-        , dwProducer = cProjectName ++ " " ++ cProjectVersion
+        , dwCompDir = dwarfStringFromString $ addTrailingPathSeparator compPath
+        , dwProducer = producer
         , dwLowLabel = lowLabel
         , dwHighLabel = highLabel
         , dwLineLabel = dwarfLineLabel
@@ -77,6 +78,9 @@ dwarfGen df modLoc us blocks = do
                      , compileUnitFooter unitU
                      ]
 
+  -- .debug_str section: Strings
+  let stringsSct = dwarfStringsSection (dwarfInfoStrings dwarfUnit)
+
   -- .debug_line section: Generated mainly by the assembler, but we
   -- need to label it
   let lineSct = dwarfLineSection platform $$
@@ -93,7 +97,7 @@ dwarfGen df modLoc us blocks = do
                | otherwise                 = [DwarfARange lowLabel highLabel]
   let aranges = dwarfARangesSection platform $$ pprDwarfARanges platform aranges' unitU
 
-  return (infoSct $$ abbrevSct $$ lineSct $$ frameSct $$ aranges, us'')
+  return (infoSct $$ stringsSct $$ abbrevSct $$ lineSct $$ frameSct $$ aranges, us'')
 
 -- | Build an address range entry for one proc.
 -- With split sections, each proc needs its own entry, since they may get
@@ -178,7 +182,7 @@ parent, B.
 procToDwarf :: DynFlags -> DebugBlock -> DwarfInfo
 procToDwarf df prc
   = DwarfSubprogram { dwChildren = map blockToDwarf (dblBlocks prc)
-                    , dwName     = case dblSourceTick prc of
+                    , dwName     = dwarfStringFromString $ case dblSourceTick prc of
                          Just s at SourceNote{} -> sourceName s
                          _otherwise -> showSDocDump df $ ppr $ dblLabel prc
                     , dwLabel    = dblCLabel prc


=====================================
compiler/GHC/CmmToAsm/Dwarf/Types.hs
=====================================
@@ -1,8 +1,14 @@
+{-# LANGUAGE RecordWildCards #-}
+
 module GHC.CmmToAsm.Dwarf.Types
   ( -- * Dwarf information
     DwarfInfo(..)
   , pprDwarfInfo
   , pprAbbrevDecls
+  , dwarfInfoStrings
+    -- * Dwarf Strings section
+  , DwarfString
+  , dwarfStringsSection
     -- * Dwarf address range table
   , DwarfARange(..)
   , pprDwarfARanges
@@ -48,18 +54,45 @@ import Data.Char
 
 import GHC.Platform.Regs
 
+-- | A string in the DWARF @.debug_str@ section.
+newtype DwarfString = DwarfString FastString
+
+dwarfStringFromString :: String -> DwarfString
+dwarfStringFromString = dwarfStringFromFastString . fsLit
+
+dwarfStringFromFastString :: String -> DwarfString
+dwarfStringFromFastString = DwarfString
+
+dwarfStringSymbol :: DwarfString -> SDoc
+dwarfStringSymbol (DwarfString fs) =
+    text "_dbgfs_" <> getUnique fs
+
+debugStrSection :: SDoc
+debugStrSection = text ".debug_str"
+
+pprDwarfString :: Platform -> DwarfString -> SDoc
+pprDwarfString plat s =
+    sectionOffset plat (dwarfStringSymbol s) debugStrSection
+
+dwarfStringsSection :: [DwarfString] -> SDoc
+dwarfStringsSection xs = text ".section" <+> debugStrSection $$ foldMap string xs
+  where
+    string :: DwarfString -> SDoc
+    string dstr@(DwarfString fstr) =
+      dwarfStringSymbol dstr <> colon $$ pprFastString fstr
+
 -- | Individual dwarf records. Each one will be encoded as an entry in
 -- the @.debug_info@ section.
 data DwarfInfo
   = DwarfCompileUnit { dwChildren :: [DwarfInfo]
-                     , dwName :: String
-                     , dwProducer :: String
-                     , dwCompDir :: String
+                     , dwName :: DwarfString
+                     , dwProducer :: DwarfString
+                     , dwCompDir :: DwarfString
                      , dwLowLabel :: CLabel
                      , dwHighLabel :: CLabel
                      , dwLineLabel :: PtrString }
   | DwarfSubprogram { dwChildren :: [DwarfInfo]
-                    , dwName :: String
+                    , dwName :: DwarfString
                     , dwLabel :: CLabel
                     , dwParent :: Maybe CLabel
                       -- ^ label of DIE belonging to the parent tick
@@ -68,9 +101,23 @@ data DwarfInfo
                , dwLabel :: CLabel
                , dwMarker :: Maybe CLabel
                }
-  | DwarfSrcNote { dwSrcSpan :: RealSrcSpan
+  | DwarfSrcNote { dwSpanFile  :: !DwarfString
+                 , dwSpanSLine :: !Int
+                 , dwSpanSCol  :: !Int
+                 , dwSpanELine :: !Int
+                 , dwSpanECol  :: !Int
                  }
 
+-- | 'DwarfStrings' mentioned by the given 'DwarfInfo'.
+dwarfInfoStrings :: DwarfInfo -> [DwarfString]
+dwarfInfoStrings dwinfo =
+  case dwinfo of
+    DwarfCompileUnit {..} -> [dwName, dwProducer, dwCompDir] <> foldMap dwarfInfoStrings dwChildren
+    DwarfSubprogram {..} -> [dwName] <> foldMap dwarfInfoStrings dwChildren
+    DwarfBlock {..} -> foldMap dwarfInfoStrings dwChildren
+    DwarfSrcNote {..} -> [dwSpanFile]
+
+
 -- | Abbreviation codes used for encoding above records in the
 -- @.debug_info@ section.
 data DwarfAbbrev
@@ -133,7 +180,7 @@ pprAbbrevDecls platform haveDebugLine =
        , (dW_AT_high_pc, dW_FORM_addr)
        ] $$
      mkAbbrev DwAbbrGhcSrcNote dW_TAG_ghc_src_note dW_CHILDREN_no
-       [ (dW_AT_ghc_span_file, dW_FORM_string)
+       [ (dW_AT_ghc_span_file, dW_FORM_strp)
        , (dW_AT_ghc_span_start_line, dW_FORM_data4)
        , (dW_AT_ghc_span_start_col, dW_FORM_data2)
        , (dW_AT_ghc_span_end_line, dW_FORM_data4)
@@ -163,10 +210,10 @@ pprDwarfInfoOpen :: Platform -> Bool -> DwarfInfo -> SDoc
 pprDwarfInfoOpen platform haveSrc (DwarfCompileUnit _ name producer compDir lowLabel
                                            highLabel lineLbl) =
   pprAbbrev DwAbbrCompileUnit
-  $$ pprString name
-  $$ pprString producer
+  $$ pprDwarfString name
+  $$ pprDwarfString producer
   $$ pprData4 dW_LANG_Haskell
-  $$ pprString compDir
+  $$ pprDwarfString compDir
   $$ pprWord platform (ppr lowLabel)
   $$ pprWord platform (ppr highLabel)
   $$ if haveSrc
@@ -176,7 +223,7 @@ pprDwarfInfoOpen platform _ (DwarfSubprogram _ name label
                                     parent) = sdocWithDynFlags $ \df ->
   ppr (mkAsmTempDieLabel label) <> colon
   $$ pprAbbrev abbrev
-  $$ pprString name
+  $$ pprDwarfString name
   $$ pprString (renderWithStyle (initSDocContext df (mkCodeStyle CStyle)) (ppr label))
   $$ pprFlag (externallyVisibleCLabel label)
   $$ pprWord platform (ppr label)
@@ -201,7 +248,7 @@ pprDwarfInfoOpen platform _ (DwarfBlock _ label (Just marker)) = sdocWithDynFlag
   $$ pprWord platform (ppr $ mkAsmTempEndLabel marker)
 pprDwarfInfoOpen _ _ (DwarfSrcNote ss) =
   pprAbbrev DwAbbrGhcSrcNote
-  $$ pprString' (ftext $ srcSpanFile ss)
+  $$ pprDwarfString (ftext $ srcSpanFile ss)
   $$ pprData4 (fromIntegral $ srcSpanStartLine ss)
   $$ pprHalf (fromIntegral $ srcSpanStartCol ss)
   $$ pprData4 (fromIntegral $ srcSpanEndLine ss)
@@ -573,13 +620,13 @@ pprLEBInt x | x >= -64 && x < 64
 pprString' :: SDoc -> SDoc
 pprString' str = text "\t.asciz \"" <> str <> char '"'
 
+-- | Generate a string constant. We take care to escape the string.
+pprFastString :: FastString -> SDoc
+pprFastString = pprString' . hcat . map escapeChar . unpackFS
+
 -- | Generate a string constant. We take care to escape the string.
 pprString :: String -> SDoc
-pprString str
-  = pprString' $ hcat $ map escapeChar $
-    if str `lengthIs` utf8EncodedLength str
-    then str
-    else map (chr . fromIntegral) $ BS.unpack $ bytesFS $ mkFastString str
+pprString = pprFastString . mkFastString
 
 -- | Escape a single non-unicode character
 escapeChar :: Char -> SDoc



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/66e11eff47a3d298e75c17abda987e4ab4e09975

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/66e11eff47a3d298e75c17abda987e4ab4e09975
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20200429/5c4c3e5e/attachment-0001.html>


More information about the ghc-commits mailing list