[Git][ghc/ghc][master] Initial ShortText code and conversion of package db code

Marge Bot gitlab at gitlab.haskell.org
Tue Oct 13 04:12:56 UTC 2020



 Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC


Commits:
7fdcce6d by Wander Hillen at 2020-10-13T00:12:47-04:00
Initial ShortText code and conversion of package db code

Metric Decrease:
    Naperian
    T10421
    T10421a
    T10547
    T12150
    T12234
    T12425
    T13035
    T18140
    T18304
    T5837
    T6048
    T13253-spj
    T18282
    T18223
    T3064
    T9961
Metric Increase
    T13701

HFSKJH

- - - - -


13 changed files:

- compiler/GHC/Driver/Backpack.hs
- compiler/GHC/Driver/CodeOutput.hs
- compiler/GHC/Driver/Finder.hs
- compiler/GHC/Runtime/Linker.hs
- compiler/GHC/SysTools/ExtraObj.hs
- compiler/GHC/Unit/Info.hs
- compiler/GHC/Unit/State.hs
- compiler/ghc.cabal.in
- + libraries/ghc-boot/GHC/Data/ShortText.hs
- libraries/ghc-boot/GHC/Unit/Database.hs
- compiler/GHC/Utils/Encoding.hs → libraries/ghc-boot/GHC/Utils/Encoding.hs
- libraries/ghc-boot/ghc-boot.cabal.in
- utils/ghc-pkg/Main.hs


Changes:

=====================================
compiler/GHC/Driver/Backpack.hs
=====================================
@@ -1,6 +1,8 @@
 {-# LANGUAGE CPP #-}
 {-# LANGUAGE FlexibleInstances #-}
 {-# LANGUAGE NondecreasingIndentation #-}
+{-# LANGUAGE OverloadedStrings #-}
+
 
 -- | This is the driver for the 'ghc --backpack' mode, which
 -- is a reimplementation of the "package manager" bits of
@@ -38,6 +40,7 @@ import GHC.Unit.State
 import GHC.Driver.Types
 import GHC.Data.StringBuffer
 import GHC.Data.FastString
+import qualified GHC.Data.ShortText as ST
 import GHC.Utils.Error
 import GHC.Types.SrcLoc
 import GHC.Driver.Main
@@ -340,8 +343,8 @@ buildUnit session cid insts lunit = do
             unitAbiDepends = [],
             unitLinkerOptions = case session of
                                  TcSession -> []
-                                 _ -> obj_files,
-            unitImportDirs = [ hi_dir ],
+                                 _ -> map ST.pack $ obj_files,
+            unitImportDirs = [ ST.pack $ hi_dir ],
             unitIsExposed = False,
             unitIsIndefinite = case session of
                                  TcSession -> True


=====================================
compiler/GHC/Driver/CodeOutput.hs
=====================================
@@ -32,6 +32,7 @@ import GHC.Cmm.CLabel
 import GHC.Driver.Types
 import GHC.Driver.Session
 import GHC.Driver.Ppr
+import qualified GHC.Data.ShortText as ST
 import GHC.Data.Stream           ( Stream )
 import qualified GHC.Data.Stream as Stream
 import GHC.SysTools.FileCleanup
@@ -211,7 +212,7 @@ outputForeignStubs dflags mod location stubs
         let rts_includes =
                let rts_pkg = unsafeLookupUnitId (unitState dflags) rtsUnitId in
                concatMap mk_include (unitIncludes rts_pkg)
-            mk_include i = "#include \"" ++ i ++ "\"\n"
+            mk_include i = "#include \"" ++ ST.unpack i ++ "\"\n"
 
             -- wrapper code mentions the ffi_arg type, which comes from ffi.h
             ffi_includes


=====================================
compiler/GHC/Driver/Finder.hs
=====================================
@@ -43,6 +43,7 @@ import GHC.Unit.State
 
 import GHC.Driver.Types
 import GHC.Data.FastString
+import qualified GHC.Data.ShortText as ST
 import GHC.Utils.Misc
 import GHC.Builtin.Names ( gHC_PRIM )
 import GHC.Driver.Session
@@ -380,7 +381,7 @@ findPackageModule_ hsc_env mod pkg_conf =
 
      mk_hi_loc = mkHiOnlyModLocation dflags package_hisuf
 
-     import_dirs = unitImportDirs pkg_conf
+     import_dirs = map ST.unpack $ unitImportDirs pkg_conf
       -- we never look for a .hi-boot file in an external package;
       -- .hi-boot files only make sense for the home package.
   in


=====================================
compiler/GHC/Runtime/Linker.hs
=====================================
@@ -61,6 +61,7 @@ import GHC.Types.SrcLoc
 import qualified GHC.Data.Maybe as Maybes
 import GHC.Types.Unique.DSet
 import GHC.Data.FastString
+import qualified GHC.Data.ShortText as ST
 import GHC.Platform
 import GHC.SysTools
 import GHC.SysTools.FileCleanup
@@ -1282,10 +1283,10 @@ linkPackage hsc_env pkg
         let dflags    = hsc_dflags hsc_env
             platform  = targetPlatform dflags
             is_dyn    = interpreterDynamic (hscInterp hsc_env)
-            dirs | is_dyn    = Packages.unitLibraryDynDirs pkg
-                 | otherwise = Packages.unitLibraryDirs pkg
+            dirs | is_dyn    = map ST.unpack $ Packages.unitLibraryDynDirs pkg
+                 | otherwise = map ST.unpack $ Packages.unitLibraryDirs pkg
 
-        let hs_libs   =  Packages.unitLibraries pkg
+        let hs_libs   = map ST.unpack $ Packages.unitLibraries pkg
             -- The FFI GHCi import lib isn't needed as
             -- GHC.Runtime.Linker + rts/Linker.c link the
             -- interpreted references to FFI to the compiled FFI.
@@ -1300,11 +1301,12 @@ linkPackage hsc_env pkg
         -- libs do not exactly match the .so/.dll equivalents. So if the
         -- package file provides an "extra-ghci-libraries" field then we use
         -- that instead of the "extra-libraries" field.
-            extra_libs =
-                      (if null (Packages.unitExtDepLibsGhc pkg)
-                            then Packages.unitExtDepLibsSys pkg
-                            else Packages.unitExtDepLibsGhc pkg)
-                      ++ [ lib | '-':'l':lib <- Packages.unitLinkerOptions pkg ]
+            extdeplibs = map ST.unpack (if null (Packages.unitExtDepLibsGhc pkg)
+                                      then Packages.unitExtDepLibsSys pkg
+                                      else Packages.unitExtDepLibsGhc pkg)
+            linkerlibs = [ lib | '-':'l':lib <- (map ST.unpack $ Packages.unitLinkerOptions pkg) ]
+            extra_libs = extdeplibs ++ linkerlibs
+
         -- See Note [Fork/Exec Windows]
         gcc_paths <- getGCCPaths dflags (platformOS platform)
         dirs_env <- addEnvPaths "LIBRARY_PATH" dirs
@@ -1434,8 +1436,8 @@ loadFrameworks :: HscEnv -> Platform -> UnitInfo -> IO ()
 loadFrameworks hsc_env platform pkg
     = when (platformUsesFrameworks platform) $ mapM_ load frameworks
   where
-    fw_dirs    = Packages.unitExtDepFrameworkDirs pkg
-    frameworks = Packages.unitExtDepFrameworks pkg
+    fw_dirs    = map ST.unpack $ Packages.unitExtDepFrameworkDirs pkg
+    frameworks = map ST.unpack $ Packages.unitExtDepFrameworks pkg
 
     load fw = do  r <- loadFramework hsc_env fw_dirs fw
                   case r of


=====================================
compiler/GHC/SysTools/ExtraObj.hs
=====================================
@@ -25,6 +25,7 @@ import GHC.Unit
 import GHC.SysTools.Elf
 import GHC.Utils.Misc
 import GHC.Prelude
+import qualified GHC.Data.ShortText as ST
 
 import Control.Monad
 import Data.Maybe
@@ -57,7 +58,7 @@ mkExtraObj dflags extn xs
       -- we're compiling C or assembler. When compiling C, we pass the usual
       -- set of include directories and PIC flags.
       cOpts = map Option (picCCOpts dflags)
-                    ++ map (FileOption "-I")
+                    ++ map (FileOption "-I" . ST.unpack)
                             (unitIncludeDirs $ unsafeLookupUnit pkgs rtsUnit)
 
       -- When compiling assembler code, we drop the usual C options, and if the


=====================================
compiler/GHC/Unit/Info.hs
=====================================
@@ -31,6 +31,7 @@ import Data.Version
 import Data.Bifunctor
 
 import GHC.Data.FastString
+import qualified GHC.Data.ShortText as ST
 import GHC.Utils.Outputable
 import GHC.Unit.Module as Module
 import GHC.Types.Unique
@@ -124,21 +125,21 @@ pprUnitInfo GenericUnitInfo {..} =
       field "exposed-modules"      (ppr unitExposedModules),
       field "hidden-modules"       (fsep (map ppr unitHiddenModules)),
       field "trusted"              (ppr unitIsTrusted),
-      field "import-dirs"          (fsep (map text unitImportDirs)),
-      field "library-dirs"         (fsep (map text unitLibraryDirs)),
-      field "dynamic-library-dirs" (fsep (map text unitLibraryDynDirs)),
-      field "hs-libraries"         (fsep (map text unitLibraries)),
-      field "extra-libraries"      (fsep (map text unitExtDepLibsSys)),
-      field "extra-ghci-libraries" (fsep (map text unitExtDepLibsGhc)),
-      field "include-dirs"         (fsep (map text unitIncludeDirs)),
-      field "includes"             (fsep (map text unitIncludes)),
+      field "import-dirs"          (fsep (map (text . ST.unpack) unitImportDirs)),
+      field "library-dirs"         (fsep (map (text . ST.unpack) unitLibraryDirs)),
+      field "dynamic-library-dirs" (fsep (map (text . ST.unpack) unitLibraryDynDirs)),
+      field "hs-libraries"         (fsep (map (text . ST.unpack) unitLibraries)),
+      field "extra-libraries"      (fsep (map (text . ST.unpack) unitExtDepLibsSys)),
+      field "extra-ghci-libraries" (fsep (map (text . ST.unpack) unitExtDepLibsGhc)),
+      field "include-dirs"         (fsep (map (text . ST.unpack) unitIncludeDirs)),
+      field "includes"             (fsep (map (text . ST.unpack) unitIncludes)),
       field "depends"              (fsep (map ppr  unitDepends)),
-      field "cc-options"           (fsep (map text unitCcOptions)),
-      field "ld-options"           (fsep (map text unitLinkerOptions)),
-      field "framework-dirs"       (fsep (map text unitExtDepFrameworkDirs)),
-      field "frameworks"           (fsep (map text unitExtDepFrameworks)),
-      field "haddock-interfaces"   (fsep (map text unitHaddockInterfaces)),
-      field "haddock-html"         (fsep (map text unitHaddockHTMLs))
+      field "cc-options"           (fsep (map (text . ST.unpack) unitCcOptions)),
+      field "ld-options"           (fsep (map (text . ST.unpack) unitLinkerOptions)),
+      field "framework-dirs"       (fsep (map (text . ST.unpack) unitExtDepFrameworkDirs)),
+      field "frameworks"           (fsep (map (text . ST.unpack) unitExtDepFrameworks)),
+      field "haddock-interfaces"   (fsep (map (text . ST.unpack) unitHaddockInterfaces)),
+      field "haddock-html"         (fsep (map (text . ST.unpack) unitHaddockHTMLs))
     ]
   where
     field name body = text name <> colon <+> nest 4 body


=====================================
compiler/GHC/Unit/State.hs
=====================================
@@ -99,6 +99,7 @@ import GHC.Data.Maybe
 
 import System.Environment ( getEnv )
 import GHC.Data.FastString
+import qualified GHC.Data.ShortText as ST
 import GHC.Utils.Error  ( debugTraceMsg, MsgDoc, dumpIfSet_dyn,
                           withTiming, DumpFormat (..) )
 import GHC.Utils.Exception
@@ -749,7 +750,7 @@ mungeUnitInfo :: FilePath -> FilePath
                    -> UnitInfo -> UnitInfo
 mungeUnitInfo top_dir pkgroot =
     mungeDynLibFields
-  . mungeUnitInfoPaths top_dir pkgroot
+  . mungeUnitInfoPaths (ST.pack top_dir) (ST.pack pkgroot)
 
 mungeDynLibFields :: UnitInfo -> UnitInfo
 mungeDynLibFields pkg =
@@ -1797,7 +1798,7 @@ getUnitIncludePath ctx unit_state home_unit pkgs =
   collectIncludeDirs `fmap` getPreloadUnitsAnd ctx unit_state home_unit pkgs
 
 collectIncludeDirs :: [UnitInfo] -> [FilePath]
-collectIncludeDirs ps = ordNub (filter notNull (concatMap unitIncludeDirs ps))
+collectIncludeDirs ps = map ST.unpack $ ordNub (filter (not . ST.null) (concatMap unitIncludeDirs ps))
 
 -- | Find all the library paths in these and the preload packages
 getUnitLibraryPath :: SDocContext -> UnitState -> HomeUnit -> Ways -> [UnitId] -> IO [String]
@@ -1822,8 +1823,8 @@ collectLinkOpts :: DynFlags -> [UnitInfo] -> ([String], [String], [String])
 collectLinkOpts dflags ps =
     (
         concatMap (map ("-l" ++) . packageHsLibs dflags) ps,
-        concatMap (map ("-l" ++) . unitExtDepLibsSys) ps,
-        concatMap unitLinkerOptions ps
+        concatMap (map ("-l" ++) . map ST.unpack . unitExtDepLibsSys) ps,
+        concatMap (map ST.unpack . unitLinkerOptions) ps
     )
 collectArchives :: DynFlags -> UnitInfo -> IO [FilePath]
 collectArchives dflags pc =
@@ -1831,7 +1832,7 @@ collectArchives dflags pc =
                         | searchPath <- searchPaths
                         , lib <- libs ]
   where searchPaths = ordNub . filter notNull . libraryDirsForWay (ways dflags) $ pc
-        libs        = packageHsLibs dflags pc ++ unitExtDepLibsSys pc
+        libs        = packageHsLibs dflags pc ++ (map ST.unpack $ unitExtDepLibsSys pc)
 
 getLibs :: DynFlags -> [UnitId] -> IO [(String,String)]
 getLibs dflags pkgs = do
@@ -1846,7 +1847,7 @@ getLibs dflags pkgs = do
     filterM (doesFileExist . fst) candidates
 
 packageHsLibs :: DynFlags -> UnitInfo -> [String]
-packageHsLibs dflags p = map (mkDynName . addSuffix) (unitLibraries p)
+packageHsLibs dflags p = map (mkDynName . addSuffix . ST.unpack) (unitLibraries p)
   where
         ways0 = ways dflags
 
@@ -1895,27 +1896,27 @@ packageHsLibs dflags p = map (mkDynName . addSuffix) (unitLibraries p)
 
 -- | Either the 'unitLibraryDirs' or 'unitLibraryDynDirs' as appropriate for the way.
 libraryDirsForWay :: Ways -> UnitInfo -> [String]
-libraryDirsForWay ws
-  | WayDyn `elem` ws = unitLibraryDynDirs
-  | otherwise        = unitLibraryDirs
+libraryDirsForWay ws ui
+  | WayDyn `elem` ws = map ST.unpack $ unitLibraryDynDirs ui
+  | otherwise        = map ST.unpack $ unitLibraryDirs ui
 
 -- | Find all the C-compiler options in these and the preload packages
 getUnitExtraCcOpts :: SDocContext -> UnitState -> HomeUnit -> [UnitId] -> IO [String]
 getUnitExtraCcOpts ctx unit_state home_unit pkgs = do
   ps <- getPreloadUnitsAnd ctx unit_state home_unit pkgs
-  return (concatMap unitCcOptions ps)
+  return $ map ST.unpack (concatMap unitCcOptions ps)
 
 -- | Find all the package framework paths in these and the preload packages
 getUnitFrameworkPath :: SDocContext -> UnitState -> HomeUnit -> [UnitId] -> IO [String]
 getUnitFrameworkPath ctx unit_state home_unit pkgs = do
   ps <- getPreloadUnitsAnd ctx unit_state home_unit pkgs
-  return (ordNub (filter notNull (concatMap unitExtDepFrameworkDirs ps)))
+  return $ map ST.unpack (ordNub (filter (not . ST.null) (concatMap unitExtDepFrameworkDirs ps)))
 
 -- | Find all the package frameworks in these and the preload packages
 getUnitFrameworks :: SDocContext -> UnitState -> HomeUnit -> [UnitId] -> IO [String]
 getUnitFrameworks ctx unit_state home_unit pkgs = do
   ps <- getPreloadUnitsAnd ctx unit_state home_unit pkgs
-  return (concatMap unitExtDepFrameworks ps)
+  return $ map ST.unpack (concatMap unitExtDepFrameworks ps)
 
 -- -----------------------------------------------------------------------------
 -- Package Utils


=====================================
compiler/ghc.cabal.in
=====================================
@@ -547,7 +547,6 @@ Library
         GHC.Data.BooleanFormula
         GHC.Utils.BufHandle
         GHC.Data.Graph.Directed
-        GHC.Utils.Encoding
         GHC.Utils.IO.Unsafe
         GHC.Data.FastMutInt
         GHC.Data.FastString


=====================================
libraries/ghc-boot/GHC/Data/ShortText.hs
=====================================
@@ -0,0 +1,112 @@
+{-# LANGUAGE BangPatterns, MagicHash, UnboxedTuples, GeneralizedNewtypeDeriving, DerivingStrategies #-}
+{-# OPTIONS_GHC -O2 -funbox-strict-fields #-}
+
+-- |
+-- An Unicode string for internal GHC use. Meant to replace String
+-- in places where being a lazy linked is not very useful and a more
+-- memory efficient data structure is desirable.
+
+-- Very similar to FastString, but not hash-consed and with some extra instances and
+-- functions for serialisation and I/O. Should be imported qualified.
+
+module GHC.Data.ShortText (
+        -- * ShortText
+        ShortText(..),
+        -- ** Conversion to and from String
+        pack,
+        unpack,
+        -- ** Operations
+        codepointLength,
+        byteLength,
+        GHC.Data.ShortText.null,
+        splitFilePath,
+        GHC.Data.ShortText.head,
+        stripPrefix
+  ) where
+
+import Prelude
+
+import Control.Monad (guard)
+import Control.DeepSeq as DeepSeq
+import Data.Binary
+import qualified Data.ByteString.Char8 as B8
+import qualified Data.ByteString.Short.Internal as SBS
+import GHC.Exts
+import GHC.IO
+import GHC.Utils.Encoding
+import System.FilePath (isPathSeparator)
+
+{-| A 'ShortText' is a modified UTF-8 encoded string meant for short strings like
+file paths, module descriptions, etc.
+-}
+newtype ShortText = ShortText { contents :: SBS.ShortByteString
+                              }
+                              deriving stock (Show)
+                              deriving newtype (Eq, Ord, Binary, Semigroup, Monoid, NFData)
+
+-- We don't want to derive this one from ShortByteString since that one won't handle
+-- UTF-8 characters correctly.
+instance IsString ShortText where
+  fromString = pack
+
+-- | /O(n)/ Returns the length of the 'ShortText' in characters.
+codepointLength :: ShortText -> Int
+codepointLength st = unsafeDupablePerformIO $ countUTF8Chars (contents st)
+-- | /O(1)/ Returns the length of the 'ShortText' in bytes.
+byteLength :: ShortText -> Int
+byteLength st = SBS.length $ contents st
+
+-- | /O(n)/ Convert a 'String' into a 'ShortText'.
+pack :: String -> ShortText
+pack s = unsafeDupablePerformIO $ ShortText <$> utf8EncodeShortByteString s
+
+-- | /O(n)/ Convert a 'ShortText' into a 'String'.
+unpack :: ShortText -> String
+unpack st = utf8DecodeShortByteString $ contents st
+
+-- | /O(1)/ Test whether the 'ShortText' is the empty string.
+null :: ShortText -> Bool
+null st = SBS.null $ contents st
+
+-- | /O(n)/ Split a 'ShortText' representing a file path into its components by separating
+-- on the file separator characters for this platform.
+splitFilePath :: ShortText -> [ShortText]
+-- This seems dangerous, but since the path separators are in the ASCII set they map down
+-- to a single byte when encoded in UTF-8 and so this should work even when casting to ByteString.
+-- We DeepSeq.force the resulting list so that we can be sure that no references to the
+-- bytestring in `st'` remain in unevaluated thunks, which might prevent `st'` from being
+-- collected by the GC.
+splitFilePath st = DeepSeq.force $ map (ShortText . SBS.toShort) $ B8.splitWith isPathSeparator st'
+  where st' = SBS.fromShort $ contents st
+
+-- | /O(1)/ Returns the first UTF-8 codepoint in the 'ShortText'. Depending on the string in
+-- question, this may or may not be the actual first character in the string due to Unicode
+-- non-printable characters.
+head :: ShortText -> Char
+head st
+  | SBS.null $ contents st = error "head: Empty ShortText"
+  | otherwise              = Prelude.head $ unpack st
+
+-- | /O(n)/ The 'stripPrefix' function takes two 'ShortText's and returns 'Just' the remainder of
+-- the second iff the first is its prefix, and otherwise Nothing.
+stripPrefix :: ShortText -> ShortText -> Maybe ShortText
+stripPrefix prefix st = do
+  let !(SBS.SBS prefixBA) = contents prefix
+  let !(SBS.SBS stBA)     = contents st
+  let prefixLength        = sizeofByteArray# prefixBA
+  let stLength            = sizeofByteArray# stBA
+  -- If the length of 'st' is not >= than the length of 'prefix', it is impossible for 'prefix'
+  -- to be the prefix of `st`.
+  guard $ (I# stLength) >= (I# prefixLength)
+  -- 'prefix' is a prefix of 'st' if the first <length of prefix> bytes of 'st'
+  -- are equal to 'prefix'
+  guard $ I# (compareByteArrays# prefixBA 0# stBA 0# prefixLength) == 0
+  -- Allocate a new ByteArray# and copy the remainder of the 'st' into it
+  unsafeDupablePerformIO $ do
+    let newBAsize = (stLength -# prefixLength)
+    newSBS <- IO $ \s0 ->
+      let !(# s1, ba #)  = newByteArray# newBAsize s0
+          s2             = copyByteArray# stBA prefixLength ba 0# newBAsize s1
+          !(# s3, fba #) = unsafeFreezeByteArray# ba s2
+      in  (# s3, SBS.SBS fba #)
+    return . Just . ShortText $ newSBS


=====================================
libraries/ghc-boot/GHC/Unit/Database.hs
=====================================
@@ -12,6 +12,7 @@
 {-# LANGUAGE TypeSynonymInstances #-}
 {-# LANGUAGE ExplicitNamespaces #-}
 {-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE OverloadedStrings #-}
 
 -----------------------------------------------------------------------------
 -- |
@@ -82,16 +83,16 @@ import Data.Bifunctor
 import Data.Binary as Bin
 import Data.Binary.Put as Bin
 import Data.Binary.Get as Bin
+import Data.List (intersperse)
 import Control.Exception as Exception
 import Control.Monad (when)
 import System.FilePath as FilePath
-import qualified System.FilePath.Posix as FilePath.Posix
 import System.IO
 import System.IO.Error
 import GHC.IO.Exception (IOErrorType(InappropriateType))
+import qualified GHC.Data.ShortText as ST
 import GHC.IO.Handle.Lock
 import System.Directory
-import Data.List (stripPrefix)
 
 -- | @ghc-boot@'s UnitInfo, serialized to the database.
 type DbUnitInfo      = GenericUnitInfo BS.ByteString BS.ByteString BS.ByteString BS.ByteString BS.ByteString DbModule
@@ -142,28 +143,28 @@ data GenericUnitInfo compid srcpkgid srcpkgname uid modulename mod = GenericUnit
       -- components that can be registered in a database and used by other
       -- modules.
 
-   , unitAbiHash        :: String
+   , unitAbiHash        :: ST.ShortText
       -- ^ ABI hash used to avoid mixing up units compiled with different
       -- dependencies, compiler, options, etc.
 
    , unitDepends        :: [uid]
       -- ^ Identifiers of the units this one depends on
 
-   , unitAbiDepends     :: [(uid, String)]
+   , unitAbiDepends     :: [(uid, ST.ShortText)]
      -- ^ Like 'unitDepends', but each dependency is annotated with the ABI hash
      -- we expect the dependency to respect.
 
-   , unitImportDirs     :: [FilePath]
+   , unitImportDirs     :: [FilePathST]
       -- ^ Directories containing module interfaces
 
-   , unitLibraries      :: [String]
+   , unitLibraries      :: [ST.ShortText]
       -- ^ Names of the Haskell libraries provided by this unit
 
-   , unitExtDepLibsSys  :: [String]
+   , unitExtDepLibsSys  :: [ST.ShortText]
       -- ^ Names of the external system libraries that this unit depends on. See
       -- also `unitExtDepLibsGhc` field.
 
-   , unitExtDepLibsGhc  :: [String]
+   , unitExtDepLibsGhc  :: [ST.ShortText]
       -- ^ Because of slight differences between the GHC dynamic linker (in
       -- GHC.Runtime.Linker) and the
       -- native system linker, some packages have to link with a different list
@@ -174,46 +175,46 @@ data GenericUnitInfo compid srcpkgid srcpkgname uid modulename mod = GenericUnit
       -- If this field is set, then we use that instead of the
       -- `unitExtDepLibsSys` field.
 
-   , unitLibraryDirs    :: [FilePath]
+   , unitLibraryDirs    :: [FilePathST]
       -- ^ Directories containing libraries provided by this unit. See also
       -- `unitLibraryDynDirs`.
       --
       -- It seems to be used to store paths to external library dependencies
       -- too.
 
-   , unitLibraryDynDirs :: [FilePath]
+   , unitLibraryDynDirs :: [FilePathST]
       -- ^ Directories containing the dynamic libraries provided by this unit.
       -- See also `unitLibraryDirs`.
       --
       -- It seems to be used to store paths to external dynamic library
       -- dependencies too.
 
-   , unitExtDepFrameworks :: [String]
+   , unitExtDepFrameworks :: [ST.ShortText]
       -- ^ Names of the external MacOS frameworks that this unit depends on.
 
-   , unitExtDepFrameworkDirs :: [FilePath]
+   , unitExtDepFrameworkDirs :: [FilePathST]
       -- ^ Directories containing MacOS frameworks that this unit depends
       -- on.
 
-   , unitLinkerOptions  :: [String]
+   , unitLinkerOptions  :: [ST.ShortText]
       -- ^ Linker (e.g. ld) command line options
 
-   , unitCcOptions      :: [String]
+   , unitCcOptions      :: [ST.ShortText]
       -- ^ C compiler options that needs to be passed to the C compiler when we
       -- compile some C code against this unit.
 
-   , unitIncludes       :: [String]
+   , unitIncludes       :: [ST.ShortText]
       -- ^ C header files that are required by this unit (provided by this unit
       -- or external)
 
-   , unitIncludeDirs    :: [FilePath]
+   , unitIncludeDirs    :: [FilePathST]
       -- ^ Directories containing C header files that this unit depends
       -- on.
 
-   , unitHaddockInterfaces :: [FilePath]
+   , unitHaddockInterfaces :: [FilePathST]
       -- ^ Paths to Haddock interface files for this unit
 
-   , unitHaddockHTMLs   :: [FilePath]
+   , unitHaddockHTMLs   :: [FilePathST]
       -- ^ Paths to Haddock directories containing HTML files
 
    , unitExposedModules :: [(modulename, Maybe mod)]
@@ -242,6 +243,8 @@ data GenericUnitInfo compid srcpkgid srcpkgname uid modulename mod = GenericUnit
    }
    deriving (Eq, Show)
 
+type FilePathST = ST.ShortText
+
 -- | Convert between GenericUnitInfo instances
 mapGenericUnitInfo
    :: (uid1 -> uid2)
@@ -646,12 +649,12 @@ instance Binary DbInstUnitId where
 -- Also perform a similar substitution for the older GHC-specific
 -- "$topdir" variable. The "topdir" is the location of the ghc
 -- installation (obtained from the -B option).
-mkMungePathUrl :: FilePath -> FilePath -> (FilePath -> FilePath, FilePath -> FilePath)
+mkMungePathUrl :: FilePathST -> FilePathST -> (FilePathST -> FilePathST, FilePathST -> FilePathST)
 mkMungePathUrl top_dir pkgroot = (munge_path, munge_url)
    where
     munge_path p
-      | Just p' <- stripVarPrefix "${pkgroot}" p = pkgroot ++ p'
-      | Just p' <- stripVarPrefix "$topdir"    p = top_dir ++ p'
+      | Just p' <- stripVarPrefix "${pkgroot}" p = mappend pkgroot p'
+      | Just p' <- stripVarPrefix "$topdir"    p = mappend top_dir p'
       | otherwise                                = p
 
     munge_url p
@@ -659,20 +662,19 @@ mkMungePathUrl top_dir pkgroot = (munge_path, munge_url)
       | Just p' <- stripVarPrefix "$httptopdir"   p = toUrlPath top_dir p'
       | otherwise                                   = p
 
-    toUrlPath r p = "file:///"
-                 -- URLs always use posix style '/' separators:
-                 ++ FilePath.Posix.joinPath
-                        (r : -- We need to drop a leading "/" or "\\"
-                             -- if there is one:
-                             dropWhile (all isPathSeparator)
-                                       (FilePath.splitDirectories p))
+    toUrlPath r p = mconcat $ "file:///" : (intersperse "/" (r : (splitDirectories p)))
+                                          -- URLs always use posix style '/' separators
+
+    -- We need to drop a leading "/" or "\\" if there is one:
+    splitDirectories :: FilePathST -> [FilePathST]
+    splitDirectories p  = filter (not . ST.null) $ ST.splitFilePath p
 
     -- We could drop the separator here, and then use </> above. However,
     -- by leaving it in and using ++ we keep the same path separator
     -- rather than letting FilePath change it to use \ as the separator
-    stripVarPrefix var path = case stripPrefix var path of
-                              Just [] -> Just []
-                              Just cs@(c : _) | isPathSeparator c -> Just cs
+    stripVarPrefix var path = case ST.stripPrefix var path of
+                              Just "" -> Just ""
+                              Just cs | isPathSeparator (ST.head cs) -> Just cs
                               _ -> Nothing
 
 
@@ -684,7 +686,7 @@ mkMungePathUrl top_dir pkgroot = (munge_path, munge_url)
 -- Also perform a similar substitution for the older GHC-specific
 -- "$topdir" variable. The "topdir" is the location of the ghc
 -- installation (obtained from the -B option).
-mungeUnitInfoPaths :: FilePath -> FilePath -> GenericUnitInfo a b c d e f -> GenericUnitInfo a b c d e f
+mungeUnitInfoPaths :: FilePathST -> FilePathST -> GenericUnitInfo a b c d e f -> GenericUnitInfo a b c d e f
 mungeUnitInfoPaths top_dir pkgroot pkg =
    -- TODO: similar code is duplicated in utils/ghc-pkg/Main.hs
     pkg


=====================================
compiler/GHC/Utils/Encoding.hs → libraries/ghc-boot/GHC/Utils/Encoding.hs
=====================================
@@ -1,7 +1,10 @@
 {-# LANGUAGE BangPatterns, MagicHash, UnboxedTuples #-}
-{-# OPTIONS_GHC -O2 #-}
+{-# OPTIONS_GHC -O2 -fno-warn-name-shadowing #-}
 -- We always optimise this, otherwise performance of a non-optimised
--- compiler is severely affected
+-- compiler is severely affected. This module used to live in the `ghc`
+-- package but has been moved to `ghc-boot` because the definition
+-- of the package database (needed in both ghc and in ghc-pkg) lives in
+-- `ghc-boot` and uses ShortText, which in turn depends on this module.
 
 -- -----------------------------------------------------------------------------
 --
@@ -36,7 +39,7 @@ module GHC.Utils.Encoding (
         toBase62Padded
   ) where
 
-import GHC.Prelude
+import Prelude
 
 import Foreign
 import Foreign.ForeignPtr.Unsafe (unsafeForeignPtrToPtr)


=====================================
libraries/ghc-boot/ghc-boot.cabal.in
=====================================
@@ -38,6 +38,8 @@ Library
 
     exposed-modules:
             GHC.BaseDir
+            GHC.Data.ShortText
+            GHC.Utils.Encoding
             GHC.LanguageExtensions
             GHC.Unit.Database
             GHC.Serialized
@@ -68,4 +70,5 @@ Library
                    containers >= 0.5 && < 0.7,
                    directory  >= 1.2 && < 1.4,
                    filepath   >= 1.3 && < 1.5,
+                   deepseq    >= 1.4 && < 1.5,
                    ghc-boot-th == @ProjectVersionMunged@


=====================================
utils/ghc-pkg/Main.hs
=====================================
@@ -31,12 +31,13 @@
 module Main (main) where
 
 import qualified GHC.Unit.Database as GhcPkg
-import GHC.Unit.Database
+import GHC.Unit.Database hiding (mkMungePathUrl)
 import GHC.HandleEncoding
 import GHC.BaseDir (getBaseDir)
 import GHC.Settings.Utils (getTargetArchOS, maybeReadFuzzy)
 import GHC.Platform.Host (hostPlatformArchOS)
 import GHC.UniqueSubdir (uniqueSubdir)
+import qualified GHC.Data.ShortText as ST
 import GHC.Version ( cProjectVersion )
 import qualified Distribution.Simple.PackageIndex as PackageIndex
 import qualified Data.Graph as Graph
@@ -56,6 +57,7 @@ import Distribution.Types.MungedPackageId
 import Distribution.Simple.Utils (toUTF8BS, writeUTF8File, readUTF8File)
 import qualified Data.Version as Version
 import System.FilePath as FilePath
+import qualified System.FilePath.Posix as FilePath.Posix
 import System.Directory ( getAppUserDataDirectory, createDirectoryIfMissing,
                           getModificationTime )
 import Text.Printf
@@ -990,6 +992,35 @@ mungePackagePaths top_dir pkgroot pkg =
     munge_urls  = map munge_url
     (munge_path,munge_url) = mkMungePathUrl top_dir pkgroot
 
+mkMungePathUrl :: FilePath -> FilePath -> (FilePath -> FilePath, FilePath -> FilePath)
+mkMungePathUrl top_dir pkgroot = (munge_path, munge_url)
+   where
+    munge_path p
+      | Just p' <- stripVarPrefix "${pkgroot}" p = pkgroot ++ p'
+      | Just p' <- stripVarPrefix "$topdir"    p = top_dir ++ p'
+      | otherwise                                = p
+
+    munge_url p
+      | Just p' <- stripVarPrefix "${pkgrooturl}" p = toUrlPath pkgroot p'
+      | Just p' <- stripVarPrefix "$httptopdir"   p = toUrlPath top_dir p'
+      | otherwise                                   = p
+
+    toUrlPath r p = "file:///"
+                 -- URLs always use posix style '/' separators:
+                 ++ FilePath.Posix.joinPath
+                        (r : -- We need to drop a leading "/" or "\\"
+                             -- if there is one:
+                             dropWhile (all isPathSeparator)
+                                       (FilePath.splitDirectories p))
+
+    -- We could drop the separator here, and then use </> above. However,
+    -- by leaving it in and using ++ we keep the same path separator
+    -- rather than letting FilePath change it to use \ as the separator
+    stripVarPrefix var path = case stripPrefix var path of
+                              Just [] -> Just []
+                              Just cs@(c : _) | isPathSeparator c -> Just cs
+                              _ -> Nothing
+
 -- -----------------------------------------------------------------------------
 -- Workaround for old single-file style package dbs
 
@@ -1331,7 +1362,7 @@ recomputeValidAbiDeps db pkg =
     newAbiDeps =
       catMaybes . flip map (GhcPkg.unitAbiDepends pkg) $ \(k, _) ->
         case filter (\d -> installedUnitId d == k) db of
-          [x] -> Just (k, unAbiHash (abiHash x))
+          [x] -> Just (k, ST.pack $ unAbiHash (abiHash x))
           _   -> Nothing
     abiDepsUpdated =
       GhcPkg.unitAbiDepends pkg /= newAbiDeps
@@ -1370,22 +1401,22 @@ convertPackageInfoToCacheFormat pkg =
        GhcPkg.unitComponentName  =
          fmap (mkPackageName . unUnqualComponentName) (libraryNameString $ sourceLibName pkg),
        GhcPkg.unitDepends        = depends pkg,
-       GhcPkg.unitAbiDepends     = map (\(AbiDependency k v) -> (k,unAbiHash v)) (abiDepends pkg),
-       GhcPkg.unitAbiHash        = unAbiHash (abiHash pkg),
-       GhcPkg.unitImportDirs     = importDirs pkg,
-       GhcPkg.unitLibraries      = hsLibraries pkg,
-       GhcPkg.unitExtDepLibsSys  = extraLibraries pkg,
-       GhcPkg.unitExtDepLibsGhc  = extraGHCiLibraries pkg,
-       GhcPkg.unitLibraryDirs    = libraryDirs pkg,
-       GhcPkg.unitLibraryDynDirs = libraryDynDirs pkg,
-       GhcPkg.unitExtDepFrameworks = frameworks pkg,
-       GhcPkg.unitExtDepFrameworkDirs = frameworkDirs pkg,
-       GhcPkg.unitLinkerOptions  = ldOptions pkg,
-       GhcPkg.unitCcOptions      = ccOptions pkg,
-       GhcPkg.unitIncludes       = includes pkg,
-       GhcPkg.unitIncludeDirs    = includeDirs pkg,
-       GhcPkg.unitHaddockInterfaces = haddockInterfaces pkg,
-       GhcPkg.unitHaddockHTMLs   = haddockHTMLs pkg,
+       GhcPkg.unitAbiDepends     = map (\(AbiDependency k v) -> (k,ST.pack $ unAbiHash v)) (abiDepends pkg),
+       GhcPkg.unitAbiHash        = ST.pack $ unAbiHash (abiHash pkg),
+       GhcPkg.unitImportDirs     = map ST.pack $ importDirs pkg,
+       GhcPkg.unitLibraries      = map ST.pack $ hsLibraries pkg,
+       GhcPkg.unitExtDepLibsSys  = map ST.pack $ extraLibraries pkg,
+       GhcPkg.unitExtDepLibsGhc  = map ST.pack $ extraGHCiLibraries pkg,
+       GhcPkg.unitLibraryDirs    = map ST.pack $ libraryDirs pkg,
+       GhcPkg.unitLibraryDynDirs = map ST.pack $ libraryDynDirs pkg,
+       GhcPkg.unitExtDepFrameworks = map ST.pack $ frameworks pkg,
+       GhcPkg.unitExtDepFrameworkDirs = map ST.pack $ frameworkDirs pkg,
+       GhcPkg.unitLinkerOptions  = map ST.pack $ ldOptions pkg,
+       GhcPkg.unitCcOptions      = map ST.pack $ ccOptions pkg,
+       GhcPkg.unitIncludes       = map ST.pack $ includes pkg,
+       GhcPkg.unitIncludeDirs    = map ST.pack $ includeDirs pkg,
+       GhcPkg.unitHaddockInterfaces = map ST.pack $ haddockInterfaces pkg,
+       GhcPkg.unitHaddockHTMLs   = map ST.pack $ haddockHTMLs pkg,
        GhcPkg.unitExposedModules = map convertExposed (exposedModules pkg),
        GhcPkg.unitHiddenModules  = hiddenModules pkg,
        GhcPkg.unitIsIndefinite   = indefinite pkg,



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/7fdcce6d4d13a10a1b2336c1d40482c64dba664d

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/7fdcce6d4d13a10a1b2336c1d40482c64dba664d
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20201013/75eb85f5/attachment-0001.html>


More information about the ghc-commits mailing list