[Git][ghc/ghc][wip/marge_bot_batch_merge_job] 3 commits: DynFlags: refactor DmdAnal

Marge Bot gitlab at gitlab.haskell.org
Mon Oct 12 22:52:51 UTC 2020



 Marge Bot pushed to branch wip/marge_bot_batch_merge_job at Glasgow Haskell Compiler / GHC


Commits:
9bbc84d2 by Sylvain Henry at 2020-10-12T18:21:51-04:00
DynFlags: refactor DmdAnal

Make demand analysis usable without having to provide DynFlags.

- - - - -
824988ab by Wander Hillen at 2020-10-12T18:52:36-04:00
Initial ShortText code and conversion of package db code

Metric Decrease:
    Naperian
    T10421
    T10421a
    T10547
    T12150
    T12234
    T12425
    T13035
    T18140
    T18304
    T5837
    T6048
    T13253-spj
    T18282
    T18223
    T3064
    T9961
Metric Increase
    T13701

HFSKJH

- - - - -
d8a9cdb6 by Sylvain Henry at 2020-10-12T18:52:40-04:00
Parser: don't require the HomeUnitId

The HomeUnitId is only used by the Cmm parser and this one has access to
the DynFlags, so it can grab the UnitId of the HomeUnit from them.

Bump haddock submodule

- - - - -


21 changed files:

- compiler/GHC/Cmm/Lexer.x
- compiler/GHC/Cmm/Parser.y
- compiler/GHC/Cmm/Monad.hs → compiler/GHC/Cmm/Parser/Monad.hs
- compiler/GHC/Core/Opt/DmdAnal.hs
- compiler/GHC/Core/Opt/Pipeline.hs
- compiler/GHC/Driver/Backpack.hs
- compiler/GHC/Driver/CodeOutput.hs
- compiler/GHC/Driver/Config.hs
- compiler/GHC/Driver/Finder.hs
- compiler/GHC/Parser/Lexer.x
- compiler/GHC/Runtime/Linker.hs
- compiler/GHC/SysTools/ExtraObj.hs
- compiler/GHC/Unit/Info.hs
- compiler/GHC/Unit/State.hs
- compiler/ghc.cabal.in
- + libraries/ghc-boot/GHC/Data/ShortText.hs
- libraries/ghc-boot/GHC/Unit/Database.hs
- compiler/GHC/Utils/Encoding.hs → libraries/ghc-boot/GHC/Utils/Encoding.hs
- libraries/ghc-boot/ghc-boot.cabal.in
- utils/ghc-pkg/Main.hs
- utils/haddock


Changes:

=====================================
compiler/GHC/Cmm/Lexer.x
=====================================
@@ -20,7 +20,7 @@ import GHC.Prelude
 import GHC.Cmm.Expr
 
 import GHC.Parser.Lexer
-import GHC.Cmm.Monad
+import GHC.Cmm.Parser.Monad
 import GHC.Types.SrcLoc
 import GHC.Types.Unique.FM
 import GHC.Data.StringBuffer


=====================================
compiler/GHC/Cmm/Parser.y
=====================================
@@ -234,8 +234,8 @@ import GHC.Cmm.Info
 import GHC.Cmm.BlockId
 import GHC.Cmm.Lexer
 import GHC.Cmm.CLabel
-import GHC.Cmm.Monad hiding (getPlatform, getProfile, getPtrOpts)
-import qualified GHC.Cmm.Monad as PD
+import GHC.Cmm.Parser.Monad hiding (getPlatform, getProfile, getPtrOpts)
+import qualified GHC.Cmm.Parser.Monad as PD
 import GHC.Cmm.CallConv
 import GHC.Runtime.Heap.Layout
 import GHC.Parser.Lexer
@@ -385,9 +385,11 @@ cmmtop  :: { CmmParse () }
         | cmmdata                       { $1 }
         | decl                          { $1 }
         | 'CLOSURE' '(' NAME ',' NAME lits ')' ';'
-                {% liftP . withHomeUnitId $ \pkg ->
-                   do lits <- sequence $6;
-                      staticClosure pkg $3 $5 (map getLit lits) }
+                {% do
+                      home_unit_id <- getHomeUnitId
+                      liftP $ pure $ do
+                        lits <- sequence $6;
+                        staticClosure home_unit_id $3 $5 (map getLit lits) }
 
 -- The only static closures in the RTS are dummy closures like
 -- stg_END_TSO_QUEUE_closure and stg_dummy_ret.  We don't need
@@ -406,8 +408,10 @@ cmmdata :: { CmmParse () }
 
 data_label :: { CmmParse CLabel }
     : NAME ':'
-                {% liftP . withHomeUnitId $ \pkg ->
-                   return (mkCmmDataLabel pkg (NeedExternDecl False) $1) }
+                {% do
+                   home_unit_id <- getHomeUnitId
+                   liftP $ pure $ do
+                     pure (mkCmmDataLabel home_unit_id (NeedExternDecl False) $1) }
 
 statics :: { [CmmParse [CmmStatic]] }
         : {- empty -}                   { [] }
@@ -464,103 +468,117 @@ maybe_body :: { CmmParse () }
 
 info    :: { CmmParse (CLabel, Maybe CmmInfoTable, [LocalReg]) }
         : NAME
-                {% liftP . withHomeUnitId $ \pkg ->
-                   do   newFunctionName $1 pkg
-                        return (mkCmmCodeLabel pkg $1, Nothing, []) }
+                {% do
+                     home_unit_id <- getHomeUnitId
+                     liftP $ pure $ do
+                       newFunctionName $1 home_unit_id
+                       return (mkCmmCodeLabel home_unit_id $1, Nothing, []) }
 
 
         | 'INFO_TABLE' '(' NAME ',' INT ',' INT ',' INT ',' STRING ',' STRING ')'
                 -- ptrs, nptrs, closure type, description, type
-                {% liftP . withHomeUnitId $ \pkg ->
-                   do profile <- getProfile
-                      let prof = profilingInfo profile $11 $13
-                          rep  = mkRTSRep (fromIntegral $9) $
-                                   mkHeapRep profile False (fromIntegral $5)
-                                                   (fromIntegral $7) Thunk
-                              -- not really Thunk, but that makes the info table
-                              -- we want.
-                      return (mkCmmEntryLabel pkg $3,
-                              Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3
-                                           , cit_rep = rep
-                                           , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing },
-                              []) }
+                {% do
+                      home_unit_id <- getHomeUnitId
+                      liftP $ pure $ do
+                        profile <- getProfile
+                        let prof = profilingInfo profile $11 $13
+                            rep  = mkRTSRep (fromIntegral $9) $
+                                     mkHeapRep profile False (fromIntegral $5)
+                                                     (fromIntegral $7) Thunk
+                                -- not really Thunk, but that makes the info table
+                                -- we want.
+                        return (mkCmmEntryLabel home_unit_id $3,
+                                Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel home_unit_id $3
+                                             , cit_rep = rep
+                                             , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing },
+                                []) }
 
         | 'INFO_TABLE_FUN' '(' NAME ',' INT ',' INT ',' INT ',' STRING ',' STRING ',' INT ')'
                 -- ptrs, nptrs, closure type, description, type, fun type
-                {% liftP . withHomeUnitId $ \pkg ->
-                   do profile <- getProfile
-                      let prof = profilingInfo profile $11 $13
-                          ty   = Fun 0 (ArgSpec (fromIntegral $15))
-                                -- Arity zero, arg_type $15
-                          rep = mkRTSRep (fromIntegral $9) $
-                                    mkHeapRep profile False (fromIntegral $5)
-                                                    (fromIntegral $7) ty
-                      return (mkCmmEntryLabel pkg $3,
-                              Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3
-                                           , cit_rep = rep
-                                           , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing },
-                              []) }
+                {% do
+                      home_unit_id <- getHomeUnitId
+                      liftP $ pure $ do
+                        profile <- getProfile
+                        let prof = profilingInfo profile $11 $13
+                            ty   = Fun 0 (ArgSpec (fromIntegral $15))
+                                  -- Arity zero, arg_type $15
+                            rep = mkRTSRep (fromIntegral $9) $
+                                      mkHeapRep profile False (fromIntegral $5)
+                                                      (fromIntegral $7) ty
+                        return (mkCmmEntryLabel home_unit_id $3,
+                                Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel home_unit_id $3
+                                             , cit_rep = rep
+                                             , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing },
+                                []) }
                 -- we leave most of the fields zero here.  This is only used
                 -- to generate the BCO info table in the RTS at the moment.
 
         | 'INFO_TABLE_CONSTR' '(' NAME ',' INT ',' INT ',' INT ',' INT ',' STRING ',' STRING ')'
                 -- ptrs, nptrs, tag, closure type, description, type
-                {% liftP . withHomeUnitId $ \pkg ->
-                   do profile <- getProfile
-                      let prof = profilingInfo profile $13 $15
-                          ty  = Constr (fromIntegral $9)  -- Tag
-                                       (BS8.pack $13)
-                          rep = mkRTSRep (fromIntegral $11) $
-                                  mkHeapRep profile False (fromIntegral $5)
-                                                  (fromIntegral $7) ty
-                      return (mkCmmEntryLabel pkg $3,
-                              Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3
-                                           , cit_rep = rep
-                                           , cit_prof = prof, cit_srt = Nothing,cit_clo = Nothing },
-                              []) }
+                {% do
+                      home_unit_id <- getHomeUnitId
+                      liftP $ pure $ do
+                        profile <- getProfile
+                        let prof = profilingInfo profile $13 $15
+                            ty  = Constr (fromIntegral $9)  -- Tag
+                                         (BS8.pack $13)
+                            rep = mkRTSRep (fromIntegral $11) $
+                                    mkHeapRep profile False (fromIntegral $5)
+                                                    (fromIntegral $7) ty
+                        return (mkCmmEntryLabel home_unit_id $3,
+                                Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel home_unit_id $3
+                                             , cit_rep = rep
+                                             , cit_prof = prof, cit_srt = Nothing,cit_clo = Nothing },
+                                []) }
 
                      -- If profiling is on, this string gets duplicated,
                      -- but that's the way the old code did it we can fix it some other time.
 
         | 'INFO_TABLE_SELECTOR' '(' NAME ',' INT ',' INT ',' STRING ',' STRING ')'
                 -- selector, closure type, description, type
-                {% liftP . withHomeUnitId $ \pkg ->
-                   do profile <- getProfile
-                      let prof = profilingInfo profile $9 $11
-                          ty  = ThunkSelector (fromIntegral $5)
-                          rep = mkRTSRep (fromIntegral $7) $
-                                   mkHeapRep profile False 0 0 ty
-                      return (mkCmmEntryLabel pkg $3,
-                              Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3
-                                           , cit_rep = rep
-                                           , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing },
-                              []) }
+                {% do
+                      home_unit_id <- getHomeUnitId
+                      liftP $ pure $ do
+                        profile <- getProfile
+                        let prof = profilingInfo profile $9 $11
+                            ty  = ThunkSelector (fromIntegral $5)
+                            rep = mkRTSRep (fromIntegral $7) $
+                                     mkHeapRep profile False 0 0 ty
+                        return (mkCmmEntryLabel home_unit_id $3,
+                                Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel home_unit_id $3
+                                             , cit_rep = rep
+                                             , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing },
+                                []) }
 
         | 'INFO_TABLE_RET' '(' NAME ',' INT ')'
                 -- closure type (no live regs)
-                {% liftP . withHomeUnitId $ \pkg ->
-                   do let prof = NoProfilingInfo
-                          rep  = mkRTSRep (fromIntegral $5) $ mkStackRep []
-                      return (mkCmmRetLabel pkg $3,
-                              Just $ CmmInfoTable { cit_lbl = mkCmmRetInfoLabel pkg $3
-                                           , cit_rep = rep
-                                           , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing },
-                              []) }
+                {% do
+                      home_unit_id <- getHomeUnitId
+                      liftP $ pure $ do
+                        let prof = NoProfilingInfo
+                            rep  = mkRTSRep (fromIntegral $5) $ mkStackRep []
+                        return (mkCmmRetLabel home_unit_id $3,
+                                Just $ CmmInfoTable { cit_lbl = mkCmmRetInfoLabel home_unit_id $3
+                                             , cit_rep = rep
+                                             , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing },
+                                []) }
 
         | 'INFO_TABLE_RET' '(' NAME ',' INT ',' formals0 ')'
                 -- closure type, live regs
-                {% liftP . withHomeUnitId $ \pkg ->
-                   do platform <- getPlatform
-                      live <- sequence $7
-                      let prof = NoProfilingInfo
-                          -- drop one for the info pointer
-                          bitmap = mkLiveness platform (drop 1 live)
-                          rep  = mkRTSRep (fromIntegral $5) $ mkStackRep bitmap
-                      return (mkCmmRetLabel pkg $3,
-                              Just $ CmmInfoTable { cit_lbl = mkCmmRetInfoLabel pkg $3
-                                           , cit_rep = rep
-                                           , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing },
-                              live) }
+                {% do
+                      home_unit_id <- getHomeUnitId
+                      liftP $ pure $ do
+                        platform <- getPlatform
+                        live <- sequence $7
+                        let prof = NoProfilingInfo
+                            -- drop one for the info pointer
+                            bitmap = mkLiveness platform (drop 1 live)
+                            rep  = mkRTSRep (fromIntegral $5) $ mkStackRep bitmap
+                        return (mkCmmRetLabel home_unit_id $3,
+                                Just $ CmmInfoTable { cit_lbl = mkCmmRetInfoLabel home_unit_id $3
+                                             , cit_rep = rep
+                                             , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing },
+                                live) }
 
 body    :: { CmmParse () }
         : {- empty -}                   { return () }


=====================================
compiler/GHC/Cmm/Monad.hs → compiler/GHC/Cmm/Parser/Monad.hs
=====================================
@@ -7,13 +7,14 @@
 -- The parser for C-- requires access to a lot more of the 'DynFlags',
 -- so 'PD' provides access to 'DynFlags' via a 'HasDynFlags' instance.
 -----------------------------------------------------------------------------
-module GHC.Cmm.Monad (
+module GHC.Cmm.Parser.Monad (
     PD(..)
   , liftP
   , failMsgPD
   , getProfile
   , getPlatform
   , getPtrOpts
+  , getHomeUnitId
   ) where
 
 import GHC.Prelude
@@ -28,6 +29,8 @@ import GHC.Driver.Session
 import GHC.Parser.Lexer
 import GHC.Parser.Errors
 import GHC.Types.SrcLoc
+import GHC.Unit.Types
+import GHC.Unit.Home
 
 newtype PD a = PD { unPD :: DynFlags -> PState -> ParseResult a }
 
@@ -73,3 +76,9 @@ getPtrOpts = do
       { po_profile     = profile
       , po_align_check = gopt Opt_AlignmentSanitisation dflags
       }
+
+-- | Return the UnitId of the home-unit. This is used to create labels.
+getHomeUnitId :: PD UnitId
+getHomeUnitId = do
+   dflags <- getDynFlags
+   pure (homeUnitId (mkHomeUnitFromFlags dflags))


=====================================
compiler/GHC/Core/Opt/DmdAnal.hs
=====================================
@@ -9,18 +9,20 @@
 
 {-# LANGUAGE CPP #-}
 
-module GHC.Core.Opt.DmdAnal ( dmdAnalProgram ) where
+module GHC.Core.Opt.DmdAnal
+   ( DmdAnalOpts(..)
+   , dmdAnalProgram
+   )
+where
 
 #include "HsVersions.h"
 
 import GHC.Prelude
 
-import GHC.Driver.Session
 import GHC.Core.Opt.WorkWrap.Utils
 import GHC.Types.Demand   -- All of it
 import GHC.Core
 import GHC.Core.Multiplicity ( scaledThing )
-import GHC.Core.Seq     ( seqBinds )
 import GHC.Utils.Outputable
 import GHC.Types.Var.Env
 import GHC.Types.Var.Set
@@ -29,7 +31,6 @@ import Data.List        ( mapAccumL )
 import GHC.Core.DataCon
 import GHC.Types.ForeignCall ( isSafeForeignCall )
 import GHC.Types.Id
-import GHC.Types.Id.Info
 import GHC.Core.Utils
 import GHC.Core.TyCon
 import GHC.Core.Type
@@ -41,7 +42,6 @@ import GHC.Utils.Panic
 import GHC.Data.Maybe         ( isJust )
 import GHC.Builtin.PrimOps
 import GHC.Builtin.Types.Prim ( realWorldStatePrimTy )
-import GHC.Utils.Error        ( dumpIfSet_dyn, DumpFormat (..) )
 import GHC.Types.Unique.Set
 
 {-
@@ -52,14 +52,21 @@ import GHC.Types.Unique.Set
 ************************************************************************
 -}
 
-dmdAnalProgram :: DynFlags -> FamInstEnvs -> CoreProgram -> IO CoreProgram
-dmdAnalProgram dflags fam_envs binds = do
-  let env             = emptyAnalEnv dflags fam_envs
-  let binds_plus_dmds = snd $ mapAccumL dmdAnalTopBind env binds
-  dumpIfSet_dyn dflags Opt_D_dump_str_signatures "Strictness signatures" FormatText $
-    dumpIdInfoOfProgram (pprIfaceStrictSig . strictnessInfo) binds_plus_dmds
-  -- See Note [Stamp out space leaks in demand analysis]
-  seqBinds binds_plus_dmds `seq` return binds_plus_dmds
+-- | Options for the demand analysis
+data DmdAnalOpts = DmdAnalOpts
+   { dmd_strict_dicts :: !Bool -- ^ Use strict dictionaries
+   }
+
+-- | Outputs a new copy of the Core program in which binders have been annotated
+-- with demand and strictness information.
+--
+-- Note: use `seqBinds` on the result to avoid leaks due to lazyness (cf Note
+-- [Stamp out space leaks in demand analysis])
+dmdAnalProgram :: DmdAnalOpts -> FamInstEnvs -> CoreProgram -> CoreProgram
+dmdAnalProgram opts fam_envs binds = binds_plus_dmds
+   where
+      env             = emptyAnalEnv opts fam_envs
+      binds_plus_dmds = snd $ mapAccumL dmdAnalTopBind env binds
 
 -- Analyse a (group of) top-level binding(s)
 dmdAnalTopBind :: AnalEnv
@@ -1235,31 +1242,13 @@ type DFunFlag = Bool  -- indicates if the lambda being considered is in the
 notArgOfDfun :: DFunFlag
 notArgOfDfun = False
 
-{-  Note [dmdAnalEnv performance]
-    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-It's tempting to think that removing the dynflags from AnalEnv would improve
-performance. After all when analysing recursive groups we end up allocating
-a lot of environments. However this is not the case.
-
-We do get some performance by making AnalEnv smaller. However very often we
-defer computation which means we have to capture the dynflags in the thunks
-we allocate. Doing this naively in practice causes more allocation than the
-removal of DynFlags saves us.
-
-In theory it should be possible to make this better if we are stricter in
-the analysis and therefore allocate fewer thunks. But I couldn't get there
-in a few hours and overall the impact on GHC here is small, and there are
-bigger fish to fry. So for new the env will keep a reference to the flags.
--}
-
-data AnalEnv
-  = AE { ae_dflags :: DynFlags -- See Note [dmdAnalEnv performance]
-       , ae_sigs   :: SigEnv
-       , ae_virgin :: Bool    -- True on first iteration only
+data AnalEnv = AE
+   { ae_strict_dicts :: !Bool -- ^ Enable strict dict
+   , ae_sigs         :: !SigEnv
+   , ae_virgin       :: !Bool -- ^ True on first iteration only
                               -- See Note [Initialising strictness]
-       , ae_fam_envs :: FamInstEnvs
- }
+   , ae_fam_envs     :: !FamInstEnvs
+   }
 
         -- We use the se_env to tell us whether to
         -- record info about a variable in the DmdEnv
@@ -1271,17 +1260,18 @@ data AnalEnv
 type SigEnv = VarEnv (StrictSig, TopLevelFlag)
 
 instance Outputable AnalEnv where
-  ppr (AE { ae_sigs = env, ae_virgin = virgin })
-    = text "AE" <+> braces (vcat
-         [ text "ae_virgin =" <+> ppr virgin
-         , text "ae_sigs =" <+> ppr env ])
-
-emptyAnalEnv :: DynFlags -> FamInstEnvs -> AnalEnv
-emptyAnalEnv dflags fam_envs
-    = AE { ae_dflags = dflags
-         , ae_sigs = emptySigEnv
-         , ae_virgin = True
-         , ae_fam_envs = fam_envs
+  ppr env = text "AE" <+> braces (vcat
+         [ text "ae_virgin =" <+> ppr (ae_virgin env)
+         , text "ae_strict_dicts =" <+> ppr (ae_strict_dicts env)
+         , text "ae_sigs =" <+> ppr (ae_sigs env)
+         ])
+
+emptyAnalEnv :: DmdAnalOpts -> FamInstEnvs -> AnalEnv
+emptyAnalEnv opts fam_envs
+    = AE { ae_strict_dicts = dmd_strict_dicts opts
+         , ae_sigs         = emptySigEnv
+         , ae_virgin       = True
+         , ae_fam_envs     = fam_envs
          }
 
 emptySigEnv :: SigEnv
@@ -1334,7 +1324,7 @@ findBndrDmd env arg_of_dfun dmd_ty id
     id_ty = idType id
 
     strictify dmd
-      | gopt Opt_DictsStrict (ae_dflags env)
+      | ae_strict_dicts env
              -- We never want to strictify a recursive let. At the moment
              -- annotateBndr is only call for non-recursive lets; if that
              -- changes, we need a RecFlag parameter and another guard here.


=====================================
compiler/GHC/Core/Opt/Pipeline.hs
=====================================
@@ -24,7 +24,7 @@ import GHC.Core.Ppr     ( pprCoreBindings, pprCoreExpr )
 import GHC.Core.Opt.OccurAnal ( occurAnalysePgm, occurAnalyseExpr )
 import GHC.Types.Id.Info
 import GHC.Core.Stats   ( coreBindsSize, coreBindsStats, exprSize )
-import GHC.Core.Utils   ( mkTicks, stripTicksTop )
+import GHC.Core.Utils   ( mkTicks, stripTicksTop, dumpIdInfoOfProgram )
 import GHC.Core.Lint    ( endPass, lintPassResult, dumpPassResult,
                           lintAnnots )
 import GHC.Core.Opt.Simplify       ( simplTopBinds, simplExpr, simplRules )
@@ -41,15 +41,17 @@ import GHC.Utils.Error  ( withTiming, withTimingD, DumpFormat (..) )
 import GHC.Types.Basic
 import GHC.Types.Var.Set
 import GHC.Types.Var.Env
+import GHC.Types.Demand
 import GHC.Core.Opt.LiberateCase ( liberateCase )
 import GHC.Core.Opt.StaticArgs   ( doStaticArgs )
 import GHC.Core.Opt.Specialise   ( specProgram)
 import GHC.Core.Opt.SpecConstr   ( specConstrProgram)
-import GHC.Core.Opt.DmdAnal      ( dmdAnalProgram )
+import GHC.Core.Opt.DmdAnal
 import GHC.Core.Opt.CprAnal      ( cprAnalProgram )
 import GHC.Core.Opt.CallArity    ( callArityAnalProgram )
 import GHC.Core.Opt.Exitify      ( exitifyProgram )
 import GHC.Core.Opt.WorkWrap     ( wwTopBinds )
+import GHC.Core.Seq (seqBinds)
 import GHC.Types.SrcLoc
 import GHC.Utils.Misc
 import GHC.Unit.Module.Env
@@ -484,7 +486,7 @@ doCorePass CoreDoExitify             = {-# SCC "Exitify" #-}
                                        doPass exitifyProgram
 
 doCorePass CoreDoDemand              = {-# SCC "DmdAnal" #-}
-                                       doPassDFM dmdAnalProgram
+                                       doPassDFM dmdAnal
 
 doCorePass CoreDoCpr                 = {-# SCC "CprAnal" #-}
                                        doPassDFM cprAnalProgram
@@ -1074,3 +1076,16 @@ transferIdInfo exported_id local_id
                                (ruleInfo local_info)
         -- Remember to set the function-name field of the
         -- rules as we transfer them from one function to another
+
+
+
+dmdAnal :: DynFlags -> FamInstEnvs -> CoreProgram -> IO CoreProgram
+dmdAnal dflags fam_envs binds = do
+  let opts = DmdAnalOpts
+               { dmd_strict_dicts = gopt Opt_DictsStrict dflags
+               }
+      binds_plus_dmds = dmdAnalProgram opts fam_envs binds
+  Err.dumpIfSet_dyn dflags Opt_D_dump_str_signatures "Strictness signatures" FormatText $
+    dumpIdInfoOfProgram (pprIfaceStrictSig . strictnessInfo) binds_plus_dmds
+  -- See Note [Stamp out space leaks in demand analysis] in GHC.Core.Opt.DmdAnal
+  seqBinds binds_plus_dmds `seq` return binds_plus_dmds


=====================================
compiler/GHC/Driver/Backpack.hs
=====================================
@@ -1,6 +1,8 @@
 {-# LANGUAGE CPP #-}
 {-# LANGUAGE FlexibleInstances #-}
 {-# LANGUAGE NondecreasingIndentation #-}
+{-# LANGUAGE OverloadedStrings #-}
+
 
 -- | This is the driver for the 'ghc --backpack' mode, which
 -- is a reimplementation of the "package manager" bits of
@@ -38,6 +40,7 @@ import GHC.Unit.State
 import GHC.Driver.Types
 import GHC.Data.StringBuffer
 import GHC.Data.FastString
+import qualified GHC.Data.ShortText as ST
 import GHC.Utils.Error
 import GHC.Types.SrcLoc
 import GHC.Driver.Main
@@ -340,8 +343,8 @@ buildUnit session cid insts lunit = do
             unitAbiDepends = [],
             unitLinkerOptions = case session of
                                  TcSession -> []
-                                 _ -> obj_files,
-            unitImportDirs = [ hi_dir ],
+                                 _ -> map ST.pack $ obj_files,
+            unitImportDirs = [ ST.pack $ hi_dir ],
             unitIsExposed = False,
             unitIsIndefinite = case session of
                                  TcSession -> True


=====================================
compiler/GHC/Driver/CodeOutput.hs
=====================================
@@ -32,6 +32,7 @@ import GHC.Cmm.CLabel
 import GHC.Driver.Types
 import GHC.Driver.Session
 import GHC.Driver.Ppr
+import qualified GHC.Data.ShortText as ST
 import GHC.Data.Stream           ( Stream )
 import qualified GHC.Data.Stream as Stream
 import GHC.SysTools.FileCleanup
@@ -211,7 +212,7 @@ outputForeignStubs dflags mod location stubs
         let rts_includes =
                let rts_pkg = unsafeLookupUnitId (unitState dflags) rtsUnitId in
                concatMap mk_include (unitIncludes rts_pkg)
-            mk_include i = "#include \"" ++ i ++ "\"\n"
+            mk_include i = "#include \"" ++ ST.unpack i ++ "\"\n"
 
             -- wrapper code mentions the ffi_arg type, which comes from ffi.h
             ffi_includes


=====================================
compiler/GHC/Driver/Config.hs
=====================================
@@ -32,7 +32,6 @@ initParserOpts =
   mkParserOpts
     <$> warningFlags
     <*> extensionFlags
-    <*> homeUnitId_
     <*> safeImportsOn
     <*> gopt Opt_Haddock
     <*> gopt Opt_KeepRawTokenStream


=====================================
compiler/GHC/Driver/Finder.hs
=====================================
@@ -43,6 +43,7 @@ import GHC.Unit.State
 
 import GHC.Driver.Types
 import GHC.Data.FastString
+import qualified GHC.Data.ShortText as ST
 import GHC.Utils.Misc
 import GHC.Builtin.Names ( gHC_PRIM )
 import GHC.Driver.Session
@@ -380,7 +381,7 @@ findPackageModule_ hsc_env mod pkg_conf =
 
      mk_hi_loc = mkHiOnlyModLocation dflags package_hisuf
 
-     import_dirs = unitImportDirs pkg_conf
+     import_dirs = map ST.unpack $ unitImportDirs pkg_conf
       -- we never look for a .hi-boot file in an external package;
       -- .hi-boot files only make sense for the home package.
   in


=====================================
compiler/GHC/Parser/Lexer.x
=====================================
@@ -55,7 +55,7 @@ module GHC.Parser.Lexer (
    P(..), ParseResult(..),
    allocateComments,
    MonadP(..),
-   getRealSrcLoc, getPState, withHomeUnitId,
+   getRealSrcLoc, getPState,
    failMsgP, failLocMsgP, srcParseFail,
    getErrorMessages, getMessages,
    popContext, pushModuleContext, setLastToken, setSrcLoc,
@@ -104,7 +104,6 @@ import GHC.Data.OrdList
 import GHC.Utils.Misc ( readRational, readHexRational )
 
 import GHC.Types.SrcLoc
-import GHC.Unit.Types
 import GHC.Types.Basic ( InlineSpec(..), RuleMatchInfo(..),
                          IntegralLit(..), FractionalLit(..),
                          SourceText(..) )
@@ -2210,10 +2209,8 @@ warnopt f options = f `EnumSet.member` pWarningFlags options
 -- | Parser options.
 --
 -- See 'mkParserOpts' to construct this.
-data ParserOpts = ParserOpts {
-    pWarningFlags   :: EnumSet WarningFlag
-  , pHomeUnitId     :: UnitId      -- ^ id of the unit currently being compiled
-                                   -- (only used in Cmm parser)
+data ParserOpts = ParserOpts
+  { pWarningFlags   :: EnumSet WarningFlag -- ^ enabled warning flags
   , pExtsBitmap     :: !ExtsBitmap -- ^ bitmap of permitted extensions
   }
 
@@ -2322,9 +2319,6 @@ failLocMsgP loc1 loc2 f =
 getPState :: P PState
 getPState = P $ \s -> POk s s
 
-withHomeUnitId :: (UnitId -> a) -> P a
-withHomeUnitId f = P $ \s@(PState{options = o}) -> POk s (f (pHomeUnitId o))
-
 getExts :: P ExtsBitmap
 getExts = P $ \s -> POk s (pExtsBitmap . options $ s)
 
@@ -2637,8 +2631,6 @@ data ExtBits
 mkParserOpts
   :: EnumSet WarningFlag        -- ^ warnings flags enabled
   -> EnumSet LangExt.Extension  -- ^ permitted language extensions enabled
-  -> UnitId                     -- ^ id of the unit currently being compiled
-                                -- (used in Cmm parser)
   -> Bool                       -- ^ are safe imports on?
   -> Bool                       -- ^ keeping Haddock comment tokens
   -> Bool                       -- ^ keep regular comment tokens
@@ -2650,11 +2642,10 @@ mkParserOpts
 
   -> ParserOpts
 -- ^ Given exactly the information needed, set up the 'ParserOpts'
-mkParserOpts warningFlags extensionFlags homeUnitId
+mkParserOpts warningFlags extensionFlags
   safeImports isHaddock rawTokStream usePosPrags =
     ParserOpts {
       pWarningFlags = warningFlags
-    , pHomeUnitId   = homeUnitId
     , pExtsBitmap   = safeHaskellBit .|. langExtBits .|. optBits
     }
   where


=====================================
compiler/GHC/Runtime/Linker.hs
=====================================
@@ -61,6 +61,7 @@ import GHC.Types.SrcLoc
 import qualified GHC.Data.Maybe as Maybes
 import GHC.Types.Unique.DSet
 import GHC.Data.FastString
+import qualified GHC.Data.ShortText as ST
 import GHC.Platform
 import GHC.SysTools
 import GHC.SysTools.FileCleanup
@@ -1282,10 +1283,10 @@ linkPackage hsc_env pkg
         let dflags    = hsc_dflags hsc_env
             platform  = targetPlatform dflags
             is_dyn    = interpreterDynamic (hscInterp hsc_env)
-            dirs | is_dyn    = Packages.unitLibraryDynDirs pkg
-                 | otherwise = Packages.unitLibraryDirs pkg
+            dirs | is_dyn    = map ST.unpack $ Packages.unitLibraryDynDirs pkg
+                 | otherwise = map ST.unpack $ Packages.unitLibraryDirs pkg
 
-        let hs_libs   =  Packages.unitLibraries pkg
+        let hs_libs   = map ST.unpack $ Packages.unitLibraries pkg
             -- The FFI GHCi import lib isn't needed as
             -- GHC.Runtime.Linker + rts/Linker.c link the
             -- interpreted references to FFI to the compiled FFI.
@@ -1300,11 +1301,12 @@ linkPackage hsc_env pkg
         -- libs do not exactly match the .so/.dll equivalents. So if the
         -- package file provides an "extra-ghci-libraries" field then we use
         -- that instead of the "extra-libraries" field.
-            extra_libs =
-                      (if null (Packages.unitExtDepLibsGhc pkg)
-                            then Packages.unitExtDepLibsSys pkg
-                            else Packages.unitExtDepLibsGhc pkg)
-                      ++ [ lib | '-':'l':lib <- Packages.unitLinkerOptions pkg ]
+            extdeplibs = map ST.unpack (if null (Packages.unitExtDepLibsGhc pkg)
+                                      then Packages.unitExtDepLibsSys pkg
+                                      else Packages.unitExtDepLibsGhc pkg)
+            linkerlibs = [ lib | '-':'l':lib <- (map ST.unpack $ Packages.unitLinkerOptions pkg) ]
+            extra_libs = extdeplibs ++ linkerlibs
+
         -- See Note [Fork/Exec Windows]
         gcc_paths <- getGCCPaths dflags (platformOS platform)
         dirs_env <- addEnvPaths "LIBRARY_PATH" dirs
@@ -1434,8 +1436,8 @@ loadFrameworks :: HscEnv -> Platform -> UnitInfo -> IO ()
 loadFrameworks hsc_env platform pkg
     = when (platformUsesFrameworks platform) $ mapM_ load frameworks
   where
-    fw_dirs    = Packages.unitExtDepFrameworkDirs pkg
-    frameworks = Packages.unitExtDepFrameworks pkg
+    fw_dirs    = map ST.unpack $ Packages.unitExtDepFrameworkDirs pkg
+    frameworks = map ST.unpack $ Packages.unitExtDepFrameworks pkg
 
     load fw = do  r <- loadFramework hsc_env fw_dirs fw
                   case r of


=====================================
compiler/GHC/SysTools/ExtraObj.hs
=====================================
@@ -25,6 +25,7 @@ import GHC.Unit
 import GHC.SysTools.Elf
 import GHC.Utils.Misc
 import GHC.Prelude
+import qualified GHC.Data.ShortText as ST
 
 import Control.Monad
 import Data.Maybe
@@ -57,7 +58,7 @@ mkExtraObj dflags extn xs
       -- we're compiling C or assembler. When compiling C, we pass the usual
       -- set of include directories and PIC flags.
       cOpts = map Option (picCCOpts dflags)
-                    ++ map (FileOption "-I")
+                    ++ map (FileOption "-I" . ST.unpack)
                             (unitIncludeDirs $ unsafeLookupUnit pkgs rtsUnit)
 
       -- When compiling assembler code, we drop the usual C options, and if the


=====================================
compiler/GHC/Unit/Info.hs
=====================================
@@ -31,6 +31,7 @@ import Data.Version
 import Data.Bifunctor
 
 import GHC.Data.FastString
+import qualified GHC.Data.ShortText as ST
 import GHC.Utils.Outputable
 import GHC.Unit.Module as Module
 import GHC.Types.Unique
@@ -124,21 +125,21 @@ pprUnitInfo GenericUnitInfo {..} =
       field "exposed-modules"      (ppr unitExposedModules),
       field "hidden-modules"       (fsep (map ppr unitHiddenModules)),
       field "trusted"              (ppr unitIsTrusted),
-      field "import-dirs"          (fsep (map text unitImportDirs)),
-      field "library-dirs"         (fsep (map text unitLibraryDirs)),
-      field "dynamic-library-dirs" (fsep (map text unitLibraryDynDirs)),
-      field "hs-libraries"         (fsep (map text unitLibraries)),
-      field "extra-libraries"      (fsep (map text unitExtDepLibsSys)),
-      field "extra-ghci-libraries" (fsep (map text unitExtDepLibsGhc)),
-      field "include-dirs"         (fsep (map text unitIncludeDirs)),
-      field "includes"             (fsep (map text unitIncludes)),
+      field "import-dirs"          (fsep (map (text . ST.unpack) unitImportDirs)),
+      field "library-dirs"         (fsep (map (text . ST.unpack) unitLibraryDirs)),
+      field "dynamic-library-dirs" (fsep (map (text . ST.unpack) unitLibraryDynDirs)),
+      field "hs-libraries"         (fsep (map (text . ST.unpack) unitLibraries)),
+      field "extra-libraries"      (fsep (map (text . ST.unpack) unitExtDepLibsSys)),
+      field "extra-ghci-libraries" (fsep (map (text . ST.unpack) unitExtDepLibsGhc)),
+      field "include-dirs"         (fsep (map (text . ST.unpack) unitIncludeDirs)),
+      field "includes"             (fsep (map (text . ST.unpack) unitIncludes)),
       field "depends"              (fsep (map ppr  unitDepends)),
-      field "cc-options"           (fsep (map text unitCcOptions)),
-      field "ld-options"           (fsep (map text unitLinkerOptions)),
-      field "framework-dirs"       (fsep (map text unitExtDepFrameworkDirs)),
-      field "frameworks"           (fsep (map text unitExtDepFrameworks)),
-      field "haddock-interfaces"   (fsep (map text unitHaddockInterfaces)),
-      field "haddock-html"         (fsep (map text unitHaddockHTMLs))
+      field "cc-options"           (fsep (map (text . ST.unpack) unitCcOptions)),
+      field "ld-options"           (fsep (map (text . ST.unpack) unitLinkerOptions)),
+      field "framework-dirs"       (fsep (map (text . ST.unpack) unitExtDepFrameworkDirs)),
+      field "frameworks"           (fsep (map (text . ST.unpack) unitExtDepFrameworks)),
+      field "haddock-interfaces"   (fsep (map (text . ST.unpack) unitHaddockInterfaces)),
+      field "haddock-html"         (fsep (map (text . ST.unpack) unitHaddockHTMLs))
     ]
   where
     field name body = text name <> colon <+> nest 4 body


=====================================
compiler/GHC/Unit/State.hs
=====================================
@@ -99,6 +99,7 @@ import GHC.Data.Maybe
 
 import System.Environment ( getEnv )
 import GHC.Data.FastString
+import qualified GHC.Data.ShortText as ST
 import GHC.Utils.Error  ( debugTraceMsg, MsgDoc, dumpIfSet_dyn,
                           withTiming, DumpFormat (..) )
 import GHC.Utils.Exception
@@ -749,7 +750,7 @@ mungeUnitInfo :: FilePath -> FilePath
                    -> UnitInfo -> UnitInfo
 mungeUnitInfo top_dir pkgroot =
     mungeDynLibFields
-  . mungeUnitInfoPaths top_dir pkgroot
+  . mungeUnitInfoPaths (ST.pack top_dir) (ST.pack pkgroot)
 
 mungeDynLibFields :: UnitInfo -> UnitInfo
 mungeDynLibFields pkg =
@@ -1797,7 +1798,7 @@ getUnitIncludePath ctx unit_state home_unit pkgs =
   collectIncludeDirs `fmap` getPreloadUnitsAnd ctx unit_state home_unit pkgs
 
 collectIncludeDirs :: [UnitInfo] -> [FilePath]
-collectIncludeDirs ps = ordNub (filter notNull (concatMap unitIncludeDirs ps))
+collectIncludeDirs ps = map ST.unpack $ ordNub (filter (not . ST.null) (concatMap unitIncludeDirs ps))
 
 -- | Find all the library paths in these and the preload packages
 getUnitLibraryPath :: SDocContext -> UnitState -> HomeUnit -> Ways -> [UnitId] -> IO [String]
@@ -1822,8 +1823,8 @@ collectLinkOpts :: DynFlags -> [UnitInfo] -> ([String], [String], [String])
 collectLinkOpts dflags ps =
     (
         concatMap (map ("-l" ++) . packageHsLibs dflags) ps,
-        concatMap (map ("-l" ++) . unitExtDepLibsSys) ps,
-        concatMap unitLinkerOptions ps
+        concatMap (map ("-l" ++) . map ST.unpack . unitExtDepLibsSys) ps,
+        concatMap (map ST.unpack . unitLinkerOptions) ps
     )
 collectArchives :: DynFlags -> UnitInfo -> IO [FilePath]
 collectArchives dflags pc =
@@ -1831,7 +1832,7 @@ collectArchives dflags pc =
                         | searchPath <- searchPaths
                         , lib <- libs ]
   where searchPaths = ordNub . filter notNull . libraryDirsForWay (ways dflags) $ pc
-        libs        = packageHsLibs dflags pc ++ unitExtDepLibsSys pc
+        libs        = packageHsLibs dflags pc ++ (map ST.unpack $ unitExtDepLibsSys pc)
 
 getLibs :: DynFlags -> [UnitId] -> IO [(String,String)]
 getLibs dflags pkgs = do
@@ -1846,7 +1847,7 @@ getLibs dflags pkgs = do
     filterM (doesFileExist . fst) candidates
 
 packageHsLibs :: DynFlags -> UnitInfo -> [String]
-packageHsLibs dflags p = map (mkDynName . addSuffix) (unitLibraries p)
+packageHsLibs dflags p = map (mkDynName . addSuffix . ST.unpack) (unitLibraries p)
   where
         ways0 = ways dflags
 
@@ -1895,27 +1896,27 @@ packageHsLibs dflags p = map (mkDynName . addSuffix) (unitLibraries p)
 
 -- | Either the 'unitLibraryDirs' or 'unitLibraryDynDirs' as appropriate for the way.
 libraryDirsForWay :: Ways -> UnitInfo -> [String]
-libraryDirsForWay ws
-  | WayDyn `elem` ws = unitLibraryDynDirs
-  | otherwise        = unitLibraryDirs
+libraryDirsForWay ws ui
+  | WayDyn `elem` ws = map ST.unpack $ unitLibraryDynDirs ui
+  | otherwise        = map ST.unpack $ unitLibraryDirs ui
 
 -- | Find all the C-compiler options in these and the preload packages
 getUnitExtraCcOpts :: SDocContext -> UnitState -> HomeUnit -> [UnitId] -> IO [String]
 getUnitExtraCcOpts ctx unit_state home_unit pkgs = do
   ps <- getPreloadUnitsAnd ctx unit_state home_unit pkgs
-  return (concatMap unitCcOptions ps)
+  return $ map ST.unpack (concatMap unitCcOptions ps)
 
 -- | Find all the package framework paths in these and the preload packages
 getUnitFrameworkPath :: SDocContext -> UnitState -> HomeUnit -> [UnitId] -> IO [String]
 getUnitFrameworkPath ctx unit_state home_unit pkgs = do
   ps <- getPreloadUnitsAnd ctx unit_state home_unit pkgs
-  return (ordNub (filter notNull (concatMap unitExtDepFrameworkDirs ps)))
+  return $ map ST.unpack (ordNub (filter (not . ST.null) (concatMap unitExtDepFrameworkDirs ps)))
 
 -- | Find all the package frameworks in these and the preload packages
 getUnitFrameworks :: SDocContext -> UnitState -> HomeUnit -> [UnitId] -> IO [String]
 getUnitFrameworks ctx unit_state home_unit pkgs = do
   ps <- getPreloadUnitsAnd ctx unit_state home_unit pkgs
-  return (concatMap unitExtDepFrameworks ps)
+  return $ map ST.unpack (concatMap unitExtDepFrameworks ps)
 
 -- -----------------------------------------------------------------------------
 -- Package Utils


=====================================
compiler/ghc.cabal.in
=====================================
@@ -239,7 +239,7 @@ Library
         GHC.Cmm.Lint
         GHC.Cmm.Liveness
         GHC.Cmm.MachOp
-        GHC.Cmm.Monad
+        GHC.Cmm.Parser.Monad
         GHC.Cmm.Switch
         GHC.Cmm.Node
         GHC.Cmm.Opt
@@ -547,7 +547,6 @@ Library
         GHC.Data.BooleanFormula
         GHC.Utils.BufHandle
         GHC.Data.Graph.Directed
-        GHC.Utils.Encoding
         GHC.Utils.IO.Unsafe
         GHC.Data.FastMutInt
         GHC.Data.FastString


=====================================
libraries/ghc-boot/GHC/Data/ShortText.hs
=====================================
@@ -0,0 +1,112 @@
+{-# LANGUAGE BangPatterns, MagicHash, UnboxedTuples, GeneralizedNewtypeDeriving, DerivingStrategies #-}
+{-# OPTIONS_GHC -O2 -funbox-strict-fields #-}
+
+-- |
+-- An Unicode string for internal GHC use. Meant to replace String
+-- in places where being a lazy linked is not very useful and a more
+-- memory efficient data structure is desirable.
+
+-- Very similar to FastString, but not hash-consed and with some extra instances and
+-- functions for serialisation and I/O. Should be imported qualified.
+
+module GHC.Data.ShortText (
+        -- * ShortText
+        ShortText(..),
+        -- ** Conversion to and from String
+        pack,
+        unpack,
+        -- ** Operations
+        codepointLength,
+        byteLength,
+        GHC.Data.ShortText.null,
+        splitFilePath,
+        GHC.Data.ShortText.head,
+        stripPrefix
+  ) where
+
+import Prelude
+
+import Control.Monad (guard)
+import Control.DeepSeq as DeepSeq
+import Data.Binary
+import qualified Data.ByteString.Char8 as B8
+import qualified Data.ByteString.Short.Internal as SBS
+import GHC.Exts
+import GHC.IO
+import GHC.Utils.Encoding
+import System.FilePath (isPathSeparator)
+
+{-| A 'ShortText' is a modified UTF-8 encoded string meant for short strings like
+file paths, module descriptions, etc.
+-}
+newtype ShortText = ShortText { contents :: SBS.ShortByteString
+                              }
+                              deriving stock (Show)
+                              deriving newtype (Eq, Ord, Binary, Semigroup, Monoid, NFData)
+
+-- We don't want to derive this one from ShortByteString since that one won't handle
+-- UTF-8 characters correctly.
+instance IsString ShortText where
+  fromString = pack
+
+-- | /O(n)/ Returns the length of the 'ShortText' in characters.
+codepointLength :: ShortText -> Int
+codepointLength st = unsafeDupablePerformIO $ countUTF8Chars (contents st)
+-- | /O(1)/ Returns the length of the 'ShortText' in bytes.
+byteLength :: ShortText -> Int
+byteLength st = SBS.length $ contents st
+
+-- | /O(n)/ Convert a 'String' into a 'ShortText'.
+pack :: String -> ShortText
+pack s = unsafeDupablePerformIO $ ShortText <$> utf8EncodeShortByteString s
+
+-- | /O(n)/ Convert a 'ShortText' into a 'String'.
+unpack :: ShortText -> String
+unpack st = utf8DecodeShortByteString $ contents st
+
+-- | /O(1)/ Test whether the 'ShortText' is the empty string.
+null :: ShortText -> Bool
+null st = SBS.null $ contents st
+
+-- | /O(n)/ Split a 'ShortText' representing a file path into its components by separating
+-- on the file separator characters for this platform.
+splitFilePath :: ShortText -> [ShortText]
+-- This seems dangerous, but since the path separators are in the ASCII set they map down
+-- to a single byte when encoded in UTF-8 and so this should work even when casting to ByteString.
+-- We DeepSeq.force the resulting list so that we can be sure that no references to the
+-- bytestring in `st'` remain in unevaluated thunks, which might prevent `st'` from being
+-- collected by the GC.
+splitFilePath st = DeepSeq.force $ map (ShortText . SBS.toShort) $ B8.splitWith isPathSeparator st'
+  where st' = SBS.fromShort $ contents st
+
+-- | /O(1)/ Returns the first UTF-8 codepoint in the 'ShortText'. Depending on the string in
+-- question, this may or may not be the actual first character in the string due to Unicode
+-- non-printable characters.
+head :: ShortText -> Char
+head st
+  | SBS.null $ contents st = error "head: Empty ShortText"
+  | otherwise              = Prelude.head $ unpack st
+
+-- | /O(n)/ The 'stripPrefix' function takes two 'ShortText's and returns 'Just' the remainder of
+-- the second iff the first is its prefix, and otherwise Nothing.
+stripPrefix :: ShortText -> ShortText -> Maybe ShortText
+stripPrefix prefix st = do
+  let !(SBS.SBS prefixBA) = contents prefix
+  let !(SBS.SBS stBA)     = contents st
+  let prefixLength        = sizeofByteArray# prefixBA
+  let stLength            = sizeofByteArray# stBA
+  -- If the length of 'st' is not >= than the length of 'prefix', it is impossible for 'prefix'
+  -- to be the prefix of `st`.
+  guard $ (I# stLength) >= (I# prefixLength)
+  -- 'prefix' is a prefix of 'st' if the first <length of prefix> bytes of 'st'
+  -- are equal to 'prefix'
+  guard $ I# (compareByteArrays# prefixBA 0# stBA 0# prefixLength) == 0
+  -- Allocate a new ByteArray# and copy the remainder of the 'st' into it
+  unsafeDupablePerformIO $ do
+    let newBAsize = (stLength -# prefixLength)
+    newSBS <- IO $ \s0 ->
+      let !(# s1, ba #)  = newByteArray# newBAsize s0
+          s2             = copyByteArray# stBA prefixLength ba 0# newBAsize s1
+          !(# s3, fba #) = unsafeFreezeByteArray# ba s2
+      in  (# s3, SBS.SBS fba #)
+    return . Just . ShortText $ newSBS


=====================================
libraries/ghc-boot/GHC/Unit/Database.hs
=====================================
@@ -12,6 +12,7 @@
 {-# LANGUAGE TypeSynonymInstances #-}
 {-# LANGUAGE ExplicitNamespaces #-}
 {-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE OverloadedStrings #-}
 
 -----------------------------------------------------------------------------
 -- |
@@ -82,16 +83,16 @@ import Data.Bifunctor
 import Data.Binary as Bin
 import Data.Binary.Put as Bin
 import Data.Binary.Get as Bin
+import Data.List (intersperse)
 import Control.Exception as Exception
 import Control.Monad (when)
 import System.FilePath as FilePath
-import qualified System.FilePath.Posix as FilePath.Posix
 import System.IO
 import System.IO.Error
 import GHC.IO.Exception (IOErrorType(InappropriateType))
+import qualified GHC.Data.ShortText as ST
 import GHC.IO.Handle.Lock
 import System.Directory
-import Data.List (stripPrefix)
 
 -- | @ghc-boot@'s UnitInfo, serialized to the database.
 type DbUnitInfo      = GenericUnitInfo BS.ByteString BS.ByteString BS.ByteString BS.ByteString BS.ByteString DbModule
@@ -142,28 +143,28 @@ data GenericUnitInfo compid srcpkgid srcpkgname uid modulename mod = GenericUnit
       -- components that can be registered in a database and used by other
       -- modules.
 
-   , unitAbiHash        :: String
+   , unitAbiHash        :: ST.ShortText
       -- ^ ABI hash used to avoid mixing up units compiled with different
       -- dependencies, compiler, options, etc.
 
    , unitDepends        :: [uid]
       -- ^ Identifiers of the units this one depends on
 
-   , unitAbiDepends     :: [(uid, String)]
+   , unitAbiDepends     :: [(uid, ST.ShortText)]
      -- ^ Like 'unitDepends', but each dependency is annotated with the ABI hash
      -- we expect the dependency to respect.
 
-   , unitImportDirs     :: [FilePath]
+   , unitImportDirs     :: [FilePathST]
       -- ^ Directories containing module interfaces
 
-   , unitLibraries      :: [String]
+   , unitLibraries      :: [ST.ShortText]
       -- ^ Names of the Haskell libraries provided by this unit
 
-   , unitExtDepLibsSys  :: [String]
+   , unitExtDepLibsSys  :: [ST.ShortText]
       -- ^ Names of the external system libraries that this unit depends on. See
       -- also `unitExtDepLibsGhc` field.
 
-   , unitExtDepLibsGhc  :: [String]
+   , unitExtDepLibsGhc  :: [ST.ShortText]
       -- ^ Because of slight differences between the GHC dynamic linker (in
       -- GHC.Runtime.Linker) and the
       -- native system linker, some packages have to link with a different list
@@ -174,46 +175,46 @@ data GenericUnitInfo compid srcpkgid srcpkgname uid modulename mod = GenericUnit
       -- If this field is set, then we use that instead of the
       -- `unitExtDepLibsSys` field.
 
-   , unitLibraryDirs    :: [FilePath]
+   , unitLibraryDirs    :: [FilePathST]
       -- ^ Directories containing libraries provided by this unit. See also
       -- `unitLibraryDynDirs`.
       --
       -- It seems to be used to store paths to external library dependencies
       -- too.
 
-   , unitLibraryDynDirs :: [FilePath]
+   , unitLibraryDynDirs :: [FilePathST]
       -- ^ Directories containing the dynamic libraries provided by this unit.
       -- See also `unitLibraryDirs`.
       --
       -- It seems to be used to store paths to external dynamic library
       -- dependencies too.
 
-   , unitExtDepFrameworks :: [String]
+   , unitExtDepFrameworks :: [ST.ShortText]
       -- ^ Names of the external MacOS frameworks that this unit depends on.
 
-   , unitExtDepFrameworkDirs :: [FilePath]
+   , unitExtDepFrameworkDirs :: [FilePathST]
       -- ^ Directories containing MacOS frameworks that this unit depends
       -- on.
 
-   , unitLinkerOptions  :: [String]
+   , unitLinkerOptions  :: [ST.ShortText]
       -- ^ Linker (e.g. ld) command line options
 
-   , unitCcOptions      :: [String]
+   , unitCcOptions      :: [ST.ShortText]
       -- ^ C compiler options that needs to be passed to the C compiler when we
       -- compile some C code against this unit.
 
-   , unitIncludes       :: [String]
+   , unitIncludes       :: [ST.ShortText]
       -- ^ C header files that are required by this unit (provided by this unit
       -- or external)
 
-   , unitIncludeDirs    :: [FilePath]
+   , unitIncludeDirs    :: [FilePathST]
       -- ^ Directories containing C header files that this unit depends
       -- on.
 
-   , unitHaddockInterfaces :: [FilePath]
+   , unitHaddockInterfaces :: [FilePathST]
       -- ^ Paths to Haddock interface files for this unit
 
-   , unitHaddockHTMLs   :: [FilePath]
+   , unitHaddockHTMLs   :: [FilePathST]
       -- ^ Paths to Haddock directories containing HTML files
 
    , unitExposedModules :: [(modulename, Maybe mod)]
@@ -242,6 +243,8 @@ data GenericUnitInfo compid srcpkgid srcpkgname uid modulename mod = GenericUnit
    }
    deriving (Eq, Show)
 
+type FilePathST = ST.ShortText
+
 -- | Convert between GenericUnitInfo instances
 mapGenericUnitInfo
    :: (uid1 -> uid2)
@@ -646,12 +649,12 @@ instance Binary DbInstUnitId where
 -- Also perform a similar substitution for the older GHC-specific
 -- "$topdir" variable. The "topdir" is the location of the ghc
 -- installation (obtained from the -B option).
-mkMungePathUrl :: FilePath -> FilePath -> (FilePath -> FilePath, FilePath -> FilePath)
+mkMungePathUrl :: FilePathST -> FilePathST -> (FilePathST -> FilePathST, FilePathST -> FilePathST)
 mkMungePathUrl top_dir pkgroot = (munge_path, munge_url)
    where
     munge_path p
-      | Just p' <- stripVarPrefix "${pkgroot}" p = pkgroot ++ p'
-      | Just p' <- stripVarPrefix "$topdir"    p = top_dir ++ p'
+      | Just p' <- stripVarPrefix "${pkgroot}" p = mappend pkgroot p'
+      | Just p' <- stripVarPrefix "$topdir"    p = mappend top_dir p'
       | otherwise                                = p
 
     munge_url p
@@ -659,20 +662,19 @@ mkMungePathUrl top_dir pkgroot = (munge_path, munge_url)
       | Just p' <- stripVarPrefix "$httptopdir"   p = toUrlPath top_dir p'
       | otherwise                                   = p
 
-    toUrlPath r p = "file:///"
-                 -- URLs always use posix style '/' separators:
-                 ++ FilePath.Posix.joinPath
-                        (r : -- We need to drop a leading "/" or "\\"
-                             -- if there is one:
-                             dropWhile (all isPathSeparator)
-                                       (FilePath.splitDirectories p))
+    toUrlPath r p = mconcat $ "file:///" : (intersperse "/" (r : (splitDirectories p)))
+                                          -- URLs always use posix style '/' separators
+
+    -- We need to drop a leading "/" or "\\" if there is one:
+    splitDirectories :: FilePathST -> [FilePathST]
+    splitDirectories p  = filter (not . ST.null) $ ST.splitFilePath p
 
     -- We could drop the separator here, and then use </> above. However,
     -- by leaving it in and using ++ we keep the same path separator
     -- rather than letting FilePath change it to use \ as the separator
-    stripVarPrefix var path = case stripPrefix var path of
-                              Just [] -> Just []
-                              Just cs@(c : _) | isPathSeparator c -> Just cs
+    stripVarPrefix var path = case ST.stripPrefix var path of
+                              Just "" -> Just ""
+                              Just cs | isPathSeparator (ST.head cs) -> Just cs
                               _ -> Nothing
 
 
@@ -684,7 +686,7 @@ mkMungePathUrl top_dir pkgroot = (munge_path, munge_url)
 -- Also perform a similar substitution for the older GHC-specific
 -- "$topdir" variable. The "topdir" is the location of the ghc
 -- installation (obtained from the -B option).
-mungeUnitInfoPaths :: FilePath -> FilePath -> GenericUnitInfo a b c d e f -> GenericUnitInfo a b c d e f
+mungeUnitInfoPaths :: FilePathST -> FilePathST -> GenericUnitInfo a b c d e f -> GenericUnitInfo a b c d e f
 mungeUnitInfoPaths top_dir pkgroot pkg =
    -- TODO: similar code is duplicated in utils/ghc-pkg/Main.hs
     pkg


=====================================
compiler/GHC/Utils/Encoding.hs → libraries/ghc-boot/GHC/Utils/Encoding.hs
=====================================
@@ -1,7 +1,10 @@
 {-# LANGUAGE BangPatterns, MagicHash, UnboxedTuples #-}
-{-# OPTIONS_GHC -O2 #-}
+{-# OPTIONS_GHC -O2 -fno-warn-name-shadowing #-}
 -- We always optimise this, otherwise performance of a non-optimised
--- compiler is severely affected
+-- compiler is severely affected. This module used to live in the `ghc`
+-- package but has been moved to `ghc-boot` because the definition
+-- of the package database (needed in both ghc and in ghc-pkg) lives in
+-- `ghc-boot` and uses ShortText, which in turn depends on this module.
 
 -- -----------------------------------------------------------------------------
 --
@@ -36,7 +39,7 @@ module GHC.Utils.Encoding (
         toBase62Padded
   ) where
 
-import GHC.Prelude
+import Prelude
 
 import Foreign
 import Foreign.ForeignPtr.Unsafe (unsafeForeignPtrToPtr)


=====================================
libraries/ghc-boot/ghc-boot.cabal.in
=====================================
@@ -38,6 +38,8 @@ Library
 
     exposed-modules:
             GHC.BaseDir
+            GHC.Data.ShortText
+            GHC.Utils.Encoding
             GHC.LanguageExtensions
             GHC.Unit.Database
             GHC.Serialized
@@ -68,4 +70,5 @@ Library
                    containers >= 0.5 && < 0.7,
                    directory  >= 1.2 && < 1.4,
                    filepath   >= 1.3 && < 1.5,
+                   deepseq    >= 1.4 && < 1.5,
                    ghc-boot-th == @ProjectVersionMunged@


=====================================
utils/ghc-pkg/Main.hs
=====================================
@@ -31,12 +31,13 @@
 module Main (main) where
 
 import qualified GHC.Unit.Database as GhcPkg
-import GHC.Unit.Database
+import GHC.Unit.Database hiding (mkMungePathUrl)
 import GHC.HandleEncoding
 import GHC.BaseDir (getBaseDir)
 import GHC.Settings.Utils (getTargetArchOS, maybeReadFuzzy)
 import GHC.Platform.Host (hostPlatformArchOS)
 import GHC.UniqueSubdir (uniqueSubdir)
+import qualified GHC.Data.ShortText as ST
 import GHC.Version ( cProjectVersion )
 import qualified Distribution.Simple.PackageIndex as PackageIndex
 import qualified Data.Graph as Graph
@@ -56,6 +57,7 @@ import Distribution.Types.MungedPackageId
 import Distribution.Simple.Utils (toUTF8BS, writeUTF8File, readUTF8File)
 import qualified Data.Version as Version
 import System.FilePath as FilePath
+import qualified System.FilePath.Posix as FilePath.Posix
 import System.Directory ( getAppUserDataDirectory, createDirectoryIfMissing,
                           getModificationTime )
 import Text.Printf
@@ -990,6 +992,35 @@ mungePackagePaths top_dir pkgroot pkg =
     munge_urls  = map munge_url
     (munge_path,munge_url) = mkMungePathUrl top_dir pkgroot
 
+mkMungePathUrl :: FilePath -> FilePath -> (FilePath -> FilePath, FilePath -> FilePath)
+mkMungePathUrl top_dir pkgroot = (munge_path, munge_url)
+   where
+    munge_path p
+      | Just p' <- stripVarPrefix "${pkgroot}" p = pkgroot ++ p'
+      | Just p' <- stripVarPrefix "$topdir"    p = top_dir ++ p'
+      | otherwise                                = p
+
+    munge_url p
+      | Just p' <- stripVarPrefix "${pkgrooturl}" p = toUrlPath pkgroot p'
+      | Just p' <- stripVarPrefix "$httptopdir"   p = toUrlPath top_dir p'
+      | otherwise                                   = p
+
+    toUrlPath r p = "file:///"
+                 -- URLs always use posix style '/' separators:
+                 ++ FilePath.Posix.joinPath
+                        (r : -- We need to drop a leading "/" or "\\"
+                             -- if there is one:
+                             dropWhile (all isPathSeparator)
+                                       (FilePath.splitDirectories p))
+
+    -- We could drop the separator here, and then use </> above. However,
+    -- by leaving it in and using ++ we keep the same path separator
+    -- rather than letting FilePath change it to use \ as the separator
+    stripVarPrefix var path = case stripPrefix var path of
+                              Just [] -> Just []
+                              Just cs@(c : _) | isPathSeparator c -> Just cs
+                              _ -> Nothing
+
 -- -----------------------------------------------------------------------------
 -- Workaround for old single-file style package dbs
 
@@ -1331,7 +1362,7 @@ recomputeValidAbiDeps db pkg =
     newAbiDeps =
       catMaybes . flip map (GhcPkg.unitAbiDepends pkg) $ \(k, _) ->
         case filter (\d -> installedUnitId d == k) db of
-          [x] -> Just (k, unAbiHash (abiHash x))
+          [x] -> Just (k, ST.pack $ unAbiHash (abiHash x))
           _   -> Nothing
     abiDepsUpdated =
       GhcPkg.unitAbiDepends pkg /= newAbiDeps
@@ -1370,22 +1401,22 @@ convertPackageInfoToCacheFormat pkg =
        GhcPkg.unitComponentName  =
          fmap (mkPackageName . unUnqualComponentName) (libraryNameString $ sourceLibName pkg),
        GhcPkg.unitDepends        = depends pkg,
-       GhcPkg.unitAbiDepends     = map (\(AbiDependency k v) -> (k,unAbiHash v)) (abiDepends pkg),
-       GhcPkg.unitAbiHash        = unAbiHash (abiHash pkg),
-       GhcPkg.unitImportDirs     = importDirs pkg,
-       GhcPkg.unitLibraries      = hsLibraries pkg,
-       GhcPkg.unitExtDepLibsSys  = extraLibraries pkg,
-       GhcPkg.unitExtDepLibsGhc  = extraGHCiLibraries pkg,
-       GhcPkg.unitLibraryDirs    = libraryDirs pkg,
-       GhcPkg.unitLibraryDynDirs = libraryDynDirs pkg,
-       GhcPkg.unitExtDepFrameworks = frameworks pkg,
-       GhcPkg.unitExtDepFrameworkDirs = frameworkDirs pkg,
-       GhcPkg.unitLinkerOptions  = ldOptions pkg,
-       GhcPkg.unitCcOptions      = ccOptions pkg,
-       GhcPkg.unitIncludes       = includes pkg,
-       GhcPkg.unitIncludeDirs    = includeDirs pkg,
-       GhcPkg.unitHaddockInterfaces = haddockInterfaces pkg,
-       GhcPkg.unitHaddockHTMLs   = haddockHTMLs pkg,
+       GhcPkg.unitAbiDepends     = map (\(AbiDependency k v) -> (k,ST.pack $ unAbiHash v)) (abiDepends pkg),
+       GhcPkg.unitAbiHash        = ST.pack $ unAbiHash (abiHash pkg),
+       GhcPkg.unitImportDirs     = map ST.pack $ importDirs pkg,
+       GhcPkg.unitLibraries      = map ST.pack $ hsLibraries pkg,
+       GhcPkg.unitExtDepLibsSys  = map ST.pack $ extraLibraries pkg,
+       GhcPkg.unitExtDepLibsGhc  = map ST.pack $ extraGHCiLibraries pkg,
+       GhcPkg.unitLibraryDirs    = map ST.pack $ libraryDirs pkg,
+       GhcPkg.unitLibraryDynDirs = map ST.pack $ libraryDynDirs pkg,
+       GhcPkg.unitExtDepFrameworks = map ST.pack $ frameworks pkg,
+       GhcPkg.unitExtDepFrameworkDirs = map ST.pack $ frameworkDirs pkg,
+       GhcPkg.unitLinkerOptions  = map ST.pack $ ldOptions pkg,
+       GhcPkg.unitCcOptions      = map ST.pack $ ccOptions pkg,
+       GhcPkg.unitIncludes       = map ST.pack $ includes pkg,
+       GhcPkg.unitIncludeDirs    = map ST.pack $ includeDirs pkg,
+       GhcPkg.unitHaddockInterfaces = map ST.pack $ haddockInterfaces pkg,
+       GhcPkg.unitHaddockHTMLs   = map ST.pack $ haddockHTMLs pkg,
        GhcPkg.unitExposedModules = map convertExposed (exposedModules pkg),
        GhcPkg.unitHiddenModules  = hiddenModules pkg,
        GhcPkg.unitIsIndefinite   = indefinite pkg,


=====================================
utils/haddock
=====================================
@@ -1 +1 @@
-Subproject commit 7b5972402afad755cd45aaad1a96aac509e9d5d2
+Subproject commit 6f16399e0320d0ef5e6c3dd0329ce7ed3715b6b2



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/431aea3ecec5acafcdd71f24a48562d9f13121b9...d8a9cdb6eaa8f70d8a6c4c7f84f762fab3f557ff

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/431aea3ecec5acafcdd71f24a48562d9f13121b9...d8a9cdb6eaa8f70d8a6c4c7f84f762fab3f557ff
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20201012/568861dd/attachment-0001.html>


More information about the ghc-commits mailing list