[Git][ghc/ghc][wip/js-staging] 7 commits: Linker: remove dead code (base, compactor)

Sylvain Henry (@hsyl20) gitlab at gitlab.haskell.org
Fri Oct 7 14:44:59 UTC 2022



Sylvain Henry pushed to branch wip/js-staging at Glasgow Haskell Compiler / GHC


Commits:
7a38d5e3 by Sylvain Henry at 2022-10-07T13:43:36+02:00
Linker: remove dead code (base, compactor)

- - - - -
9e999499 by Sylvain Henry at 2022-10-07T14:01:25+02:00
Better shims linking

- - - - -
87cfb9d3 by Sylvain Henry at 2022-10-07T14:20:30+02:00
Make isJsFile a bit faster by only reading the header

- - - - -
04f6c81e by Sylvain Henry at 2022-10-07T15:02:24+02:00
Linker: write directly into output file

- - - - -
5aa0a13a by Sylvain Henry at 2022-10-07T15:51:09+02:00
Linker: more refactoring

- - - - -
dd60ba5f by Sylvain Henry at 2022-10-07T16:11:15+02:00
Lazy loading of JStat in object code

- - - - -
b9333376 by Sylvain Henry at 2022-10-07T16:32:33+02:00
Use Ppr hack to render directly into a file

- - - - -


6 changed files:

- compiler/GHC/Driver/Pipeline.hs
- compiler/GHC/StgToJS/Linker/Linker.hs
- compiler/GHC/StgToJS/Linker/Types.hs
- compiler/GHC/StgToJS/Object.hs
- compiler/GHC/StgToJS/Types.hs
- compiler/GHC/Utils/Ppr.hs


Changes:

=====================================
compiler/GHC/Driver/Pipeline.hs
=====================================
@@ -446,7 +446,7 @@ link' logger tmpfs dflags unit_env batch_attempt_linking mHscMessager hpt
               let lc_cfg   = mempty
               let extra_js = mempty
               let cfg      = initStgToJSConfig dflags
-              jsLinkBinary lc_cfg cfg extra_js logger tmpfs dflags unit_env obj_files pkg_deps
+              jsLinkBinary lc_cfg cfg extra_js logger dflags unit_env obj_files pkg_deps
             | otherwise -> linkBinary logger tmpfs dflags unit_env obj_files pkg_deps
           LinkStaticLib -> linkStaticLib logger dflags unit_env obj_files pkg_deps
           LinkDynLib    -> linkDynLibCheck logger tmpfs dflags unit_env obj_files pkg_deps
@@ -572,7 +572,7 @@ doLink hsc_env o_files = do
         let lc_cfg   = mempty
         let extra_js = mempty
         let cfg      = initStgToJSConfig dflags
-        jsLinkBinary lc_cfg cfg extra_js logger tmpfs dflags unit_env o_files []
+        jsLinkBinary lc_cfg cfg extra_js logger dflags unit_env o_files []
       | otherwise -> linkBinary logger tmpfs dflags unit_env o_files []
     LinkStaticLib -> linkStaticLib      logger       dflags unit_env o_files []
     LinkDynLib    -> linkDynLibCheck    logger tmpfs dflags unit_env o_files []


=====================================
compiler/GHC/StgToJS/Linker/Linker.hs
=====================================
@@ -29,93 +29,79 @@ import Prelude
 
 import GHC.Platform.Host (hostPlatformArchOS)
 
-import           GHC.StgToJS.Linker.Types
-import           GHC.StgToJS.Linker.Utils
-import           GHC.StgToJS.Linker.Compactor
-import           GHC.StgToJS.Linker.Shims
+import GHC.JS.Syntax
 
-import           GHC.StgToJS.Rts.Rts
+import GHC.Driver.Session (DynFlags(..))
+import Language.Haskell.Syntax.Module.Name
 
-import           GHC.JS.Syntax
+import GHC.Linker.Static.Utils (exeFileName)
+
+import GHC.StgToJS.Linker.Types
+import GHC.StgToJS.Linker.Utils
+import GHC.StgToJS.Linker.Compactor
+import GHC.StgToJS.Rts.Rts
+import GHC.StgToJS.Object
+import GHC.StgToJS.Types hiding (LinkableUnit)
+import GHC.StgToJS.UnitUtils
+import GHC.StgToJS.Printer
+
+import GHC.Unit.State
+import GHC.Unit.Env
+import GHC.Unit.Home
+import GHC.Unit.Types
+import GHC.Unit.Module (moduleStableString)
 
-import           GHC.StgToJS.Object
-import           GHC.StgToJS.Types hiding (LinkableUnit)
-import           GHC.StgToJS.UnitUtils
-import           GHC.StgToJS.Printer
+import GHC.Utils.Encoding
+import GHC.Utils.Outputable hiding ((<>))
+import GHC.Utils.Panic
+import GHC.Utils.Error
+import GHC.Utils.Logger (Logger, logVerbAtLeast)
+import GHC.Utils.Binary
+import qualified GHC.Utils.Ppr as Ppr
+import GHC.Utils.CliOption
+import GHC.Utils.Monad
 
 import qualified GHC.SysTools.Ar          as Ar
-import           GHC.Utils.Encoding
-import           GHC.Utils.Outputable hiding ((<>))
-import           GHC.Utils.Panic
-import           GHC.Unit.State
-import           GHC.Unit.Env
-import           GHC.Unit.Home
-import           GHC.Unit.Types
-import           GHC.Utils.Error
-import           GHC.Data.FastString
-
-import           Control.Concurrent.MVar
-import           Control.Monad
-
-import           Data.Array
+
+import GHC.Data.FastString
+
+import Control.Concurrent.MVar
+import Control.Monad
+
+import Data.Array
 import qualified Data.ByteString          as B
 import qualified Data.ByteString.Char8    as BC
 import qualified Data.ByteString.Lazy.Char8 as BLC
 import qualified Data.ByteString.Lazy     as BL
-import           Data.Function            (on)
-import           Data.Int
-import           Data.IntSet              (IntSet)
+import Data.Function            (on)
+import Data.IntSet              (IntSet)
 import qualified Data.IntSet              as IS
-import           Data.IORef
-import           Data.List  ( partition, nub, intercalate, group, sort
-                            , groupBy, intersperse
-                            )
-import           Data.Map.Strict          (Map)
+import Data.IORef
+import Data.List  ( partition, nub, intercalate, group, sort
+                  , groupBy, intersperse
+                  )
+import Data.Map.Strict          (Map)
 import qualified Data.Map.Strict          as M
-import           Data.Maybe
-import           Data.Set                 (Set)
+import Data.Maybe
+import Data.Set                 (Set)
 import qualified Data.Set                 as S
-import           Data.Word
-
-import           GHC.Generics (Generic)
-
-import           System.FilePath ((<.>), (</>), dropExtension)
-import           System.Directory ( createDirectoryIfMissing
-                                  , doesFileExist
-                                  , getCurrentDirectory
-                                  , Permissions(..)
-                                  , setPermissions
-                                  , getPermissions
-                                  )
-
-import GHC.Driver.Session (DynFlags(..))
-import Language.Haskell.Syntax.Module.Name
-import GHC.Unit.Module (moduleStableString)
-import GHC.Utils.Logger (Logger, logVerbAtLeast)
-import GHC.Utils.TmpFs (TmpFs)
-import GHC.Utils.Binary
-import GHC.Utils.Ppr (Style(..), renderStyle, Mode(..))
-import GHC.Utils.CliOption
-import GHC.Utils.Monad
-
-import GHC.Linker.Static.Utils (exeFileName)
-
-newtype LinkerStats = LinkerStats
-  { bytesPerModule :: Map Module Word64 -- ^ number of bytes linked per module
+import Data.Word
+
+import System.IO
+import System.FilePath ((<.>), (</>), dropExtension)
+import System.Directory ( createDirectoryIfMissing
+                        , doesFileExist
+                        , getCurrentDirectory
+                        , Permissions(..)
+                        , setPermissions
+                        , getPermissions
+                        )
+
+data LinkerStats = LinkerStats
+  { bytesPerModule     :: !(Map Module Word64) -- ^ number of bytes linked per module
+  , packedMetaDataSize :: !Word64              -- ^ number of bytes for metadata
   }
 
--- | result of a link pass
-data LinkResult = LinkResult
-  { linkOut         :: FilePath -> IO () -- ^ compiled Haskell code
-  , linkOutStats    :: LinkerStats       -- ^ statistics about generated code
-  , linkOutMetaSize :: Int64             -- ^ size of packed metadata in generated code
-  , linkForeignRefs :: [ForeignJSRef]    -- ^ foreign code references in compiled haskell code
-  , linkLibRTS      :: [FilePath]        -- ^ library code to load with the RTS
-  , linkLibA        :: [FilePath]        -- ^ library code to load after RTS
-  , linkLibAArch    :: [FilePath]        -- ^ library code to load from archives after RTS
-  , linkBase        :: Base              -- ^ base metadata to use if we want to link incrementally against this result
-  } deriving (Generic)
-
 newtype ArchiveState = ArchiveState { loadedArchives :: IORef (Map FilePath Ar.Archive) }
 
 emptyArchiveState :: IO ArchiveState
@@ -126,33 +112,30 @@ jsLinkBinary
   -> StgToJSConfig
   -> [FilePath]
   -> Logger
-  -> TmpFs
   -> DynFlags
   -> UnitEnv
   -> [FilePath]
   -> [UnitId]
   -> IO ()
-jsLinkBinary lc_cfg cfg js_srcs logger tmpfs dflags u_env objs dep_pkgs = do
-  -- additional objects to link are passed as FileOption ldInputs...
-  let cmdline_objs = [ f | FileOption _ f <- ldInputs dflags ]
-  -- discriminate JavaScript sources from real object files.
-  (cmdline_js_srcs, cmdline_js_objs) <- partitionM isJsFile cmdline_objs
-  let
-      objs'    = map ObjFile (objs ++ cmdline_js_objs)
-      js_srcs' = js_srcs ++ cmdline_js_srcs
-      isRoot _ = True
-      exe      = jsExeFileName dflags
-
-  env <- newGhcjsEnv
-  void $ link env lc_cfg cfg logger tmpfs dflags u_env exe mempty dep_pkgs objs' js_srcs' isRoot mempty
+jsLinkBinary lc_cfg cfg js_srcs logger dflags u_env objs dep_pkgs
+  | lcNoJSExecutables lc_cfg = return ()
+  | otherwise = do
+    -- additional objects to link are passed as FileOption ldInputs...
+    let cmdline_objs = [ f | FileOption _ f <- ldInputs dflags ]
+    -- discriminate JavaScript sources from real object files.
+    (cmdline_js_srcs, cmdline_js_objs) <- partitionM isJsFile cmdline_objs
+    let
+        objs'    = map ObjFile (objs ++ cmdline_js_objs)
+        js_srcs' = js_srcs ++ cmdline_js_srcs
+        isRoot _ = True
+        exe      = jsExeFileName dflags
+
+    void $ link lc_cfg cfg logger u_env exe mempty dep_pkgs objs' js_srcs' isRoot mempty
 
 -- | link and write result to disk (jsexe directory)
-link :: GhcjsEnv
-     -> JSLinkConfig
+link :: JSLinkConfig
      -> StgToJSConfig
      -> Logger
-     -> TmpFs
-     -> DynFlags
      -> UnitEnv
      -> FilePath               -- ^ output file/directory
      -> [FilePath]             -- ^ include path for home package
@@ -162,193 +145,153 @@ link :: GhcjsEnv
      -> (ExportedFun -> Bool)  -- ^ functions from the objects to use as roots (include all their deps)
      -> Set ExportedFun        -- ^ extra symbols to link in
      -> IO ()
-link env lc_cfg cfg logger tmpfs dflags unit_env out include pkgs objFiles jsFiles isRootFun extraStaticDeps
-  | lcNoJSExecutables lc_cfg = return ()
-  | otherwise = do
-      link_res <- link' env lc_cfg cfg logger unit_env out include pkgs objFiles jsFiles
-                    isRootFun extraStaticDeps
+link lc_cfg cfg logger unit_env out _include units objFiles jsFiles isRootFun extraStaticDeps = do
 
-      let genBase = isJust (lcGenBase lc_cfg)
-          jsExt | genBase   = "base.js"
-                | otherwise = "js"
+      -- create output directory
       createDirectoryIfMissing False out
-      linkOut link_res (out </> "out" <.> jsExt)
+
+      -------------------------------------------------------------
+      -- link all Haskell code (program + dependencies) into out.js
+
+      -- compute dependencies
+      (dep_map, dep_units, all_deps, rts_wired_functions, dep_archives)
+        <- computeLinkDependencies cfg logger out unit_env units objFiles extraStaticDeps isRootFun
+
+      -- retrieve code for dependencies
+      mods <- collectDeps dep_map dep_units all_deps
+
+      -- LTO + rendering of JS code
+      link_stats <- withBinaryFile (out </> "out.js") WriteMode $ \h -> do
+        (_compactorState, stats) <- renderLinker lc_cfg cfg h emptyCompactorState rts_wired_functions mods jsFiles
+        pure stats
+
+      -------------------------------------------------------------
 
       -- dump foreign references file (.frefs)
       unless (lcOnlyOut lc_cfg) $ do
-        let frefsFile   = if genBase then "out.base.frefs" else "out.frefs"
-            jsonFrefs  = mempty
+        let frefsFile  = "out.frefs"
+            -- frefs      = concatMap mc_frefs mods
+            jsonFrefs  = mempty -- FIXME: toJson frefs
 
         BL.writeFile (out </> frefsFile <.> "json") jsonFrefs
         BL.writeFile (out </> frefsFile <.> "js")
                      ("h$checkForeignRefs(" <> jsonFrefs <> ");")
 
-        -- dump stats
-        unless (lcNoStats lc_cfg) $ do
-          let statsFile = if genBase then "out.base.stats" else "out.stats"
-          let stats = linkerStats (linkOutMetaSize link_res) (linkOutStats link_res)
-          writeFile (out </> statsFile) stats
-
-        -- link with the RTS
-        unless (lcNoRts lc_cfg) $ do
-          BL.writeFile (out </> "rts.js") ( BLC.pack rtsDeclsText
-                                           <> BLC.pack (rtsText cfg))
-
-        let all_lib_js = linkLibA link_res
-        lla'    <- streamShims <$> readShimFiles logger tmpfs dflags unit_env all_lib_js
-        llarch' <- mapM readShimsArchive (linkLibAArch link_res)
-        let lib_js = BL.fromChunks $! llarch' ++ lla'
-        BL.writeFile (out </> "lib" <.> jsExt) lib_js
-
-        if genBase
-          then panic "support for base bundle not implemented"
-            -- generateBase out (linkBase link_res)
-          else when (    not (lcOnlyOut lc_cfg)
-                      && not (lcNoRts   lc_cfg)
-                      && not (usingBase lc_cfg)
-                    )
-               $ do
-                 _ <- combineFiles lc_cfg out
-                 writeHtml    out
-                 writeRunMain out
-                 writeRunner lc_cfg out
-                 writeExterns out
-
-readShimsArchive :: FilePath -> IO B.ByteString
-readShimsArchive ar_file = do
-  (Ar.Archive entries) <- Ar.loadAr ar_file
-  jsdata <- catMaybes <$> mapM readEntry entries
-  return (B.intercalate "\n" jsdata)
-    where
-      readEntry :: Ar.ArchiveEntry -> IO (Maybe B.ByteString)
-      readEntry ar_entry
-        | isJsArchiveEntry ar_entry = pure $ Just (Ar.filedata ar_entry)
-        | otherwise = pure Nothing
-
-
-
--- | link in memory
-link' :: GhcjsEnv
-      -> JSLinkConfig
-      -> StgToJSConfig
-      -> Logger
-      -> UnitEnv
-      -> String                     -- ^ target (for progress message)
-      -> [FilePath]                 -- ^ include path for home package
-      -> [UnitId]                   -- ^ packages to link
-      -> [LinkedObj]                -- ^ the object files we're linking
-      -> [FilePath]                 -- ^ extra js files to include
-      -> (ExportedFun -> Bool)      -- ^ functions from the objects to use as roots (include all their deps)
-      -> Set ExportedFun            -- ^ extra symbols to link in
-      -> IO LinkResult
-link' env lc_cfg cfg logger unit_env target _include pkgs objFiles jsFiles isRootFun extraStaticDeps
-  = do
-      let ue_state = ue_units $ unit_env
-      (objDepsMap, objRequiredUnits) <- loadObjDeps objFiles
-
-      let rootSelector | Just baseMod <- lcGenBase lc_cfg =
-                           \(ExportedFun  m _s) -> m == baseMod
-                       | otherwise = isRootFun
-          roots    = S.fromList . filter rootSelector $ concatMap (M.keys . depsHaskellExported . fst) (M.elems objDepsMap)
-          rootMods = map (moduleNameString . moduleName . head) . group . sort . map funModule . S.toList $ roots
-          objPkgs  = map moduleUnitId $ nub (M.keys objDepsMap)
-
-      when (logVerbAtLeast logger 2) $ void $
-        compilationProgressMsg logger . text $
-          case lcGenBase lc_cfg of
-            Just baseMod -> "Linking base bundle " ++ target ++ " (" ++ moduleNameString (moduleName baseMod) ++ ")"
-            _            -> "Linking " ++ target ++ " (" ++ intercalate "," rootMods ++ ")"
-
-      base <- case lcUseBase lc_cfg of
-        NoBase        -> return emptyBase
-        BaseFile _file -> panic "support for base bundle not implemented" -- loadBase file
-        BaseState b   -> return b
-
-      let (rts_wired_units, rts_wired_functions) = rtsDeps pkgs
-
-      let preload_units = preloadUnits (ue_units unit_env)
-
-      -- all the units we want to link together, without their dependencies
-      let root_units = nub (preload_units ++ rts_wired_units ++ reverse objPkgs ++ reverse pkgs)
-
-      -- all the units we want to link together, including their dependencies
-      let all_units = transitive_units root_units
-
-          -- compute transitive unit dependencies
-          transitive_units = reverse . transitive_units_ []
-          transitive_units_ xs = \case
-            []     -> xs
-            (u:us)
-              | u == mainUnitId -> transitive_units_ (u:xs) us
-              | otherwise       -> case lookupUnitId ue_state u of
-                  Nothing -> unit_not_found u
-                  Just d  ->
-                    let deps         = unitDepends d
-                        is_new_dep x = x `notElem` xs
-                        new_deps     = filter is_new_dep deps
-                    in case new_deps of
-                      []
-                        | u `elem` xs -> transitive_units_ xs us
-                        | otherwise   -> transitive_units_ (u:xs) us
-                      ds -> transitive_units_ xs     (ds ++ (u:us))
-
-          unit_not_found u = throwGhcException (CmdLineError ("unknown unit: " ++ unpackFS (unitIdFS u)))
-
-      let
-        -- units that weren't already linked in the base bundle
-        -- (or all of them, if no base bundle)
-        bundle_diff = filter (not . is_in_bundle) all_units
-        is_in_bundle uid = uid `elem` basePkgs base
-
-      (archsDepsMap, archsRequiredUnits) <- loadArchiveDeps env =<< getPackageArchives cfg (map snd $ mkPkgLibPaths ue_state all_units)
-      pkgArchs <- getPackageArchives cfg (map snd $ mkPkgLibPaths ue_state bundle_diff)
-
-      when (logVerbAtLeast logger 2) $
-        logInfo logger $ hang (text "Linking with archives:") 2 (vcat (fmap text pkgArchs))
+      -- dump stats
+      unless (lcNoStats lc_cfg) $ do
+        let statsFile = "out.stats"
+        writeFile (out </> statsFile) (renderLinkerStats link_stats)
+
+      -- link generated RTS parts into rts.js
+      unless (lcNoRts lc_cfg) $ do
+        BL.writeFile (out </> "rts.js") ( BLC.pack rtsDeclsText
+                                         <> BLC.pack (rtsText cfg))
+
+      -- link dependencies' JS files into lib.js
+      withBinaryFile (out </> "lib.js") WriteMode $ \h -> do
+        forM_ dep_archives $ \archive_file -> do
+          Ar.Archive entries <- Ar.loadAr archive_file
+          forM_ entries $ \entry -> do
+            case getJsArchiveEntry entry of
+              Nothing -> return ()
+              Just bs -> do
+                B.hPut   h bs
+                hPutChar h '\n'
+
+      -- link everything together into all.js
+      when (generateAllJs lc_cfg) $ do
+        _ <- combineFiles lc_cfg out
+        writeHtml    out
+        writeRunMain out
+        writeRunner lc_cfg out
+        writeExterns out
+
+
+computeLinkDependencies
+  :: StgToJSConfig
+  -> Logger
+  -> String
+  -> UnitEnv
+  -> [UnitId]
+  -> [LinkedObj]
+  -> Set ExportedFun
+  -> (ExportedFun -> Bool)
+  -> IO (Map Module (Deps, DepsLocation), [UnitId], Set LinkableUnit, Set ExportedFun, [FilePath])
+computeLinkDependencies cfg logger target unit_env units objFiles extraStaticDeps isRootFun = do
+  env <- newGhcjsEnv
+  (objDepsMap, objRequiredUnits) <- loadObjDeps objFiles
 
-      -- compute dependencies
-      let dep_units      = all_units ++ [homeUnitId (ue_unsafeHomeUnit $ unit_env)]
-          dep_map        = objDepsMap `M.union` archsDepsMap
-          excluded_units = baseUnits base -- already linked units
-          dep_fun_roots  = roots `S.union` rts_wired_functions `S.union` extraStaticDeps
-          dep_unit_roots = archsRequiredUnits ++ objRequiredUnits
+  let roots    = S.fromList . filter isRootFun $ concatMap (M.keys . depsHaskellExported . fst) (M.elems objDepsMap)
+      rootMods = map (moduleNameString . moduleName . head) . group . sort . map funModule . S.toList $ roots
+      objPkgs  = map moduleUnitId $ nub (M.keys objDepsMap)
 
-      all_deps <- getDeps (fmap fst dep_map) excluded_units dep_fun_roots dep_unit_roots
+  when (logVerbAtLeast logger 2) $ void $
+    compilationProgressMsg logger $ hcat
+      [ text "Linking ", text target, text " (", text (intercalate "," rootMods), char ')' ]
 
-      when (logVerbAtLeast logger 2) $
-        logInfo logger $ hang (text "Units to link:") 2 (vcat (fmap ppr dep_units))
-        -- logInfo logger $ hang (text "All deps:") 2 (vcat (fmap ppr (S.toList all_deps)))
+  let (rts_wired_units, rts_wired_functions) = rtsDeps units
 
-      -- retrieve code for dependencies
-      code <- collectDeps dep_map dep_units all_deps
-
-      (outJs, metaSize, compactorState, stats) <- renderLinker lc_cfg cfg (baseCompactorState base) rts_wired_functions code jsFiles
-      let base'  = Base compactorState (nub $ basePkgs base ++ bundle_diff)
-                         (all_deps `S.union` baseUnits base)
-
-      return $ LinkResult
-        { linkOut         = outJs
-        , linkOutStats    = stats
-        , linkOutMetaSize = metaSize
-        , linkForeignRefs = concatMap mc_frefs code
-        , linkLibRTS      = [] -- (filter (`notElem` alreadyLinkedBefore) shimsBefore)
-        , linkLibA        = [] -- (filter (`notElem` alreadyLinkedAfter)  shimsAfter)
-        , linkLibAArch    = pkgArchs
-        , linkBase        = base'
-        }
-  where
+  let ue_state = ue_units $ unit_env
+  let preload_units = preloadUnits (ue_units unit_env)
+
+  -- all the units we want to link together, without their dependencies
+  let root_units = nub (preload_units ++ rts_wired_units ++ reverse objPkgs ++ reverse units)
 
-    mkPkgLibPaths :: UnitState -> [UnitId] -> [(UnitId, ([FilePath],[String]))]
-    mkPkgLibPaths u_st
-      = map (\k -> ( k
-                   , (getInstalledPackageLibDirs u_st k
-                   , getInstalledPackageHsLibs u_st k)
-                   ))
+  -- all the units we want to link together, including their dependencies
+  let all_units = transitive_units root_units
+
+      -- compute transitive unit dependencies
+      transitive_units = reverse . transitive_units_ []
+      transitive_units_ xs = \case
+        []     -> xs
+        (u:us)
+          | u == mainUnitId -> transitive_units_ (u:xs) us
+          | otherwise       -> case lookupUnitId ue_state u of
+              Nothing -> unit_not_found u
+              Just d  ->
+                let deps         = unitDepends d
+                    is_new_dep x = x `notElem` xs
+                    new_deps     = filter is_new_dep deps
+                in case new_deps of
+                  []
+                    | u `elem` xs -> transitive_units_ xs us
+                    | otherwise   -> transitive_units_ (u:xs) us
+                  ds -> transitive_units_ xs     (ds ++ (u:us))
+
+      unit_not_found u = throwGhcException (CmdLineError ("unknown unit: " ++ unpackFS (unitIdFS u)))
+
+      mkPkgLibPaths :: UnitState -> [UnitId] -> [(UnitId, ([FilePath],[String]))]
+      mkPkgLibPaths u_st
+        = map (\k -> ( k
+                     , (getInstalledPackageLibDirs u_st k
+                     , getInstalledPackageHsLibs u_st k)
+                     ))
+
+  dep_archives <- getPackageArchives cfg (map snd $ mkPkgLibPaths ue_state all_units)
+  (archsDepsMap, archsRequiredUnits) <- loadArchiveDeps env dep_archives
+
+  when (logVerbAtLeast logger 2) $
+    logInfo logger $ hang (text "Linking with archives:") 2 (vcat (fmap text dep_archives))
+
+  -- compute dependencies
+  let dep_units      = all_units ++ [homeUnitId (ue_unsafeHomeUnit $ unit_env)]
+      dep_map        = objDepsMap `M.union` archsDepsMap
+      excluded_units = S.empty
+      dep_fun_roots  = roots `S.union` rts_wired_functions `S.union` extraStaticDeps
+      dep_unit_roots = archsRequiredUnits ++ objRequiredUnits
+
+  all_deps <- getDeps (fmap fst dep_map) excluded_units dep_fun_roots dep_unit_roots
+
+  when (logVerbAtLeast logger 2) $
+    logInfo logger $ hang (text "Units to link:") 2 (vcat (fmap ppr dep_units))
+    -- logInfo logger $ hang (text "All deps:") 2 (vcat (fmap ppr (S.toList all_deps)))
+
+  return (dep_map, dep_units, all_deps, rts_wired_functions, dep_archives)
 
 
 data ModuleCode = ModuleCode
   { mc_module   :: !Module
-  , mc_js_code  :: !JStat
+  , mc_js_code  :: JStat
   , mc_exports  :: !B.ByteString        -- ^ rendered exports
   , mc_closures :: ![ClosureInfo]
   , mc_statics  :: ![StaticInfo]
@@ -358,12 +301,13 @@ data ModuleCode = ModuleCode
 renderLinker
   :: JSLinkConfig
   -> StgToJSConfig
+  -> Handle
   -> CompactorState
   -> Set ExportedFun
   -> [ModuleCode] -- ^ linked code per module
   -> [FilePath]   -- ^ additional JS files
-  -> IO (FilePath -> IO (), Int64, CompactorState, LinkerStats)
-renderLinker settings cfg renamer_state rtsDeps code jsFiles = do
+  -> IO (CompactorState, LinkerStats)
+renderLinker settings cfg h renamer_state rtsDeps mods jsFiles = do
 
   -- extract ModuleCode fields required to make a LinkedUnit
   let code_to_linked_unit c = LinkedUnit
@@ -375,47 +319,53 @@ renderLinker settings cfg renamer_state rtsDeps code jsFiles = do
   -- call the compactor
   let (renamer_state', compacted, meta) = compact settings cfg renamer_state
                                             (map ((\(LexicalFastString f) -> f) . funSymbol) $ S.toList rtsDeps)
-                                            (map code_to_linked_unit code)
-
-  js_files_contents <- mconcat <$> mapM BL.readFile jsFiles
+                                            (map code_to_linked_unit mods)
 
   let
-    render_all fp = do
-      BL.writeFile fp (rendered_all <> js_files_contents)
-
-    -- render result into JS code
-    rendered_all     = mconcat [mconcat rendered_mods, rendered_meta, rendered_exports]
-    rendered_mods    = fmap render_js compacted
-    rendered_meta    = render_js meta
-    doc_str          = renderStyle (Style
-                          { lineLength = 100
-                          , ribbonsPerLine = 1.5
-                          , mode = LeftMode
-                            -- Faster to write but uglier code.
-                            -- Use "PageMode False" to enable nicer code instead
-                          })
-    render_js x      = BL.fromChunks [BC.pack (doc_str (pretty x)), BC.pack "\n"]
-    rendered_exports = BL.fromChunks (map mc_exports code)
-    meta_length      = fromIntegral (BL.length rendered_meta)
-    -- make LinkerStats entry for the given ModuleCode.
-    -- For now, only associate generated code size in bytes to each module
-    mk_stat c b = (mc_module c, fromIntegral . BL.length $ b)
-    stats = LinkerStats $ M.fromList $ zipWith mk_stat code rendered_mods
-
-  pure
-    ( render_all
-    , meta_length
-    , renamer_state'
-    , stats
-    )
+    putBS   = B.hPut h
+    putJS x = do
+      before <- hTell h
+      Ppr.printLeftRender h (pretty x)
+      hPutChar h '\n'
+      after <- hTell h
+      pure $! (after - before)
+
+  ---------------------------------------------------------
+  -- Pretty-print JavaScript code for all the dependencies.
+  --
+  -- We have to pretty-print at link time because we want to be able to perform
+  -- global link-time optimisations (e.g. renamings) on the whole generated JS
+  -- file.
+
+  -- modules themselves
+  mod_sizes <- forM (mods `zip` compacted) $ \(mod,compacted_mod) -> do
+    !mod_size <- fromIntegral <$> putJS compacted_mod
+    let !mod_mod  = mc_module mod
+    pure (mod_mod, mod_size)
+
+  -- metadata
+  !meta_length <- fromIntegral <$> putJS meta
+
+  -- exports
+  mapM_ (putBS . mc_exports) mods
+
+  -- explicit additional JS files
+  mapM_ (\i -> B.readFile i >>= putBS) jsFiles
+
+  -- stats
+  let link_stats = LinkerStats
+        { bytesPerModule     = M.fromList mod_sizes
+        , packedMetaDataSize = meta_length
+        }
+
+  pure (renamer_state', link_stats)
 
 -- | Render linker stats
-linkerStats :: Int64         -- ^ code size of packed metadata
-            -> LinkerStats   -- ^ code size per module
-            -> String
-linkerStats meta s =
+renderLinkerStats :: LinkerStats -> String
+renderLinkerStats s =
   intercalate "\n\n" [meta_stats, package_stats, module_stats] <> "\n\n"
   where
+    meta = packedMetaDataSize s
     meta_stats = "number of modules: " <> show (length bytes_per_mod)
                  <> "\npacked metadata:   " <> show meta
 
@@ -453,18 +403,19 @@ getPackageArchives cfg pkgs =
     profSuff | csProf cfg = "_p"
              | otherwise  = ""
 
-{- | convenience: combine rts.js, lib.js, out.js to all.js that can be run
-     directly with node.js or SpiderMonkey jsshell
- -}
+
+-- | Combine rts.js, lib.js, out.js to all.js that can be run
+-- directly with node.js or SpiderMonkey jsshell
 combineFiles :: JSLinkConfig
              -> FilePath
              -> IO ()
 combineFiles cfg fp = do
-  files   <- mapM (B.readFile.(fp</>)) ["rts.js", "lib.js", "out.js"]
-  let runMain
-        | lcNoHsMain cfg = mempty
-        | otherwise      = runMainJS
-  writeBinaryFile (fp</>"all.js") (mconcat (files ++ [runMain]))
+  let files = map (fp </>) ["rts.js", "lib.js", "out.js"]
+  withBinaryFile (fp </> "all.js") WriteMode $ \h -> do
+    let cpy i = B.readFile i >>= B.hPut h
+    mapM_ cpy files
+    unless (lcNoHsMain cfg) $ do
+      B.hPut h runMainJS
 
 -- | write the index.html file that loads the program if it does not exit
 writeHtml
@@ -808,20 +759,30 @@ loadArchiveDeps' archives = do
               pure $ Just (deps, ArchiveFile ar_file)
 
 -- | Predicate to check that an entry in Ar is a JS source
-isJsArchiveEntry :: Ar.ArchiveEntry -> Bool
-isJsArchiveEntry entry = isJsBS (Ar.filedata entry)
+-- and to return it without its header
+getJsArchiveEntry :: Ar.ArchiveEntry -> Maybe B.ByteString
+getJsArchiveEntry entry = getJsBS (Ar.filedata entry)
 
 -- | Predicate to check that a file is a JS source
 isJsFile :: FilePath -> IO Bool
-isJsFile fp = isJsBS <$> B.readFile fp
+isJsFile fp = withBinaryFile fp ReadMode $ \h -> do
+  bs <- B.hGet h jsHeaderLength
+  pure (isJsBS bs)
 
 isJsBS :: B.ByteString -> Bool
-isJsBS bs = B.take (B.length jsHeader) bs == jsHeader
-  where
-    -- Header added to JS sources to discriminate them from other object files.
-    -- They all have .o extension but JS sources have this header.
-    jsHeader :: B.ByteString
-    jsHeader = "//JavaScript"
+isJsBS bs = isJust (getJsBS bs)
+
+-- | Get JS source with its header (if it's one)
+getJsBS :: B.ByteString -> Maybe B.ByteString
+getJsBS bs = B.stripPrefix jsHeader bs
+
+-- Header added to JS sources to discriminate them from other object files.
+-- They all have .o extension but JS sources have this header.
+jsHeader :: B.ByteString
+jsHeader = "//JavaScript"
+
+jsHeaderLength :: Int
+jsHeaderLength = B.length jsHeader
 
 
 


=====================================
compiler/GHC/StgToJS/Linker/Types.hs
=====================================
@@ -15,23 +15,6 @@
 --                Josh Meredith  <josh.meredith at iohk.io>
 -- Stability   :  experimental
 --
---  A base bundle is used for incremental linking. it contains information about
---  the symbols that have already been linked. These symbols are not included
---  again in the incrementally linked program.
---
--- The Base data structure contains the information we need to do incremental
--- linking against a base bundle.
---
---  base file format:
---  - GHCJSBASE
---  - [renamer state]
---  - [linkedPackages]
---  - [packages]
---  - [modules]
---  - [symbols]
---
---  The base contains a CompactorState for consistent renaming of private names
---  and packed initialization of info tables and static closures.
 -----------------------------------------------------------------------------
 
 module GHC.StgToJS.Linker.Types where
@@ -41,10 +24,7 @@ import           GHC.StgToJS.Object
 import           GHC.StgToJS.Types (ClosureInfo, StaticInfo)
 
 import           GHC.Unit.Types
-import           GHC.Utils.Outputable hiding ((<>))
 import           GHC.Data.FastString
-import           GHC.Driver.Env.Types (HscEnv)
-import           GHC.Types.Error      (Messages)
 import           GHC.Types.Unique.Map
 
 import           Control.Monad
@@ -54,14 +34,10 @@ import           Data.ByteString      (ByteString)
 import           Data.Map.Strict      (Map)
 import qualified Data.Map.Strict      as M
 import           Data.Set             (Set)
-import qualified Data.Set             as S
-import qualified Data.IntMap          as I
 
 import           Control.Concurrent.MVar
-import qualified Control.Exception as E
 
 import           System.IO
-import           System.Process
 
 import           Prelude
 
@@ -279,72 +255,6 @@ addLabel new cs =
                    newCnt = cnt + 1
                in cs {csEntries = newLabels, csNumLabels = newCnt}
 
-
---------------------------------------------------------------------------------
--- Base
---------------------------------------------------------------------------------
-
--- | The Base bundle. Used for incremental linking it maintains the compactor
--- state the base packages and units.
-data Base = Base { baseCompactorState :: CompactorState
-                 , basePkgs           :: [UnitId]
-                 , baseUnits          :: Set (Module, Int)
-                 }
-
--- | Custom Show for the @Base@ bundle
-showBase :: Base -> String
-showBase b = unlines
-  [ "Base:"
-  , "  packages: " ++ showSDocUnsafe (ppr (basePkgs b))
-  , "  number of units: " ++ show (S.size $ baseUnits b)
-  , "  renaming table size: " ++
-    show (sizeUniqMap . csNameMap . baseCompactorState $ b)
-  ]
-
--- | The empty @Base@ bundle
-emptyBase :: Base
-emptyBase = Base emptyCompactorState [] S.empty
-
--- | make a @Base@ state from a @CompactorState@: empty the current symbols
---   sets, move everything to the parent
-makeCompactorParent :: CompactorState -> CompactorState
-makeCompactorParent (CompactorState is nm es nes ss nss ls nls pes pss pls sts)
-  = CompactorState is
-                   nm
-                   emptyUniqMap 0
-                   emptyUniqMap 0
-                   emptyUniqMap 0
-                   (plusUniqMap (fmap (+nes) pes) es)
-                   (plusUniqMap (fmap (+nss) pss) ss)
-                   (plusUniqMap (fmap (+nls) pls) ls)
-                   sts
-
--- | There are 3 ways the linker can use @Base at . We can not use it, and thus not
--- do any incremental linking. We can load it from a file, where we assume that
--- the symbols from the bundle and their dependencies have already been loaded.
--- In this case We must save the CompactorState so that we can do consistent
--- renaming. Or we can use a Base that is already in memory.
---
--- Incremental linking greatly improves link time and can also be used in
--- multi-page or repl-type applications to serve most of the code from a static
--- location, reloading only the small parts that are actually different.
-data UseBase = NoBase             -- ^ don't use incremental linking
-             | BaseFile  FilePath -- ^ load base from file
-             | BaseState Base     -- ^ use this base
-
-instance Show UseBase where
-  show NoBase       = "NoBase"
-  show BaseFile {}  = "BaseFile"
-  show BaseState {} = "BaseState"
-
-instance Monoid UseBase where
-  mempty             = NoBase
-
-instance Semigroup UseBase where
-  x <> NoBase = x
-  _ <> x      = x
-
-
 --------------------------------------------------------------------------------
 -- Linker Config
 --------------------------------------------------------------------------------
@@ -361,53 +271,57 @@ data JSLinkConfig =
                , lcOnlyOut            :: Bool
                , lcNoRts              :: Bool
                , lcNoStats            :: Bool
-               , lcGenBase            :: Maybe Module   -- ^ base module
-               , lcUseBase            :: UseBase
                , lcLinkJsLib          :: Maybe String
                , lcJsLibOutputDir     :: Maybe FilePath
                , lcJsLibSrcs          :: [FilePath]
                , lcDedupe             :: Bool
                }
 
--- | Check if we are using the @Base@ bundle, or not.
-usingBase :: JSLinkConfig -> Bool
-usingBase s | NoBase <- lcUseBase s = False
-            | otherwise             = True
-
 -- | we generate a runnable all.js only if we link a complete application,
 --   no incremental linking and no skipped parts
 generateAllJs :: JSLinkConfig -> Bool
-generateAllJs s
-  | NoBase <- lcUseBase s = not (lcOnlyOut s) && not (lcNoRts s)
-  | otherwise             = False
+generateAllJs s = not (lcOnlyOut s) && not (lcNoRts s)
 
 instance Monoid JSLinkConfig where
-  mempty = JSLinkConfig False   False   False   False False
-                        Nothing Nothing Nothing False
-                        False   False   Nothing NoBase
-                        Nothing Nothing mempty  False
+  mempty = JSLinkConfig
+            { lcNativeExecutables  = False
+            , lcNativeToo          = False
+            , lcBuildRunner        = False
+            , lcNoJSExecutables    = False
+            , lcNoHsMain           = False
+            , lcStripProgram       = Nothing
+            , lcLogCommandLine     = Nothing
+            , lcGhc                = Nothing
+            , lcOnlyOut            = False
+            , lcNoRts              = False
+            , lcNoStats            = False
+            , lcLinkJsLib          = Nothing
+            , lcJsLibOutputDir     = Nothing
+            , lcJsLibSrcs          = mempty
+            , lcDedupe             = False
+            }
 
 instance Semigroup JSLinkConfig where
-  (<>) (JSLinkConfig ne1 nn1 bc1 nj1 noHs1 sp1 lc1 gh1 oo1 nr1 ns1 gb1 ub1 ljsl1 jslo1 jslsrc1 dd1)
-       (JSLinkConfig ne2 nn2 bc2 nj2 noHs2 sp2 lc2 gh2 oo2 nr2 ns2 gb2 ub2 ljsl2 jslo2 jslsrc2 dd2) =
-          JSLinkConfig (ne1 || ne2)
-                        (nn1 || nn2)
-                        (bc1 || bc2)
-                        (nj1 || nj2)
-                        (noHs1 || noHs2)
-                        (sp1 `mplus` sp2)
-                        (lc1 `mplus` lc2)
-                        (gh1 `mplus` gh2)
-                        (oo1 || oo2)
-                        (nr1 || nr2)
-                        (ns1 || ns2)
-                        (gb1 `mplus` gb2)
-                        (ub1 <> ub2)
-                        (ljsl1 <> ljsl2)
-                        (jslo1 <> jslo2)
-                        (jslsrc1 <> jslsrc2)
-                        (dd1 || dd2)
-
+  (<>) c1 c2 =
+    let comb :: (a -> a -> a) -> (JSLinkConfig -> a) -> a
+        comb f a = f (a c1) (a c2)
+    in JSLinkConfig
+            { lcNativeExecutables  = comb (||) lcNativeExecutables
+            , lcNativeToo          = comb (||) lcNativeToo
+            , lcBuildRunner        = comb (||) lcBuildRunner
+            , lcNoJSExecutables    = comb (||) lcNoJSExecutables
+            , lcNoHsMain           = comb (||) lcNoHsMain
+            , lcStripProgram       = comb mplus lcStripProgram
+            , lcLogCommandLine     = comb mplus lcLogCommandLine
+            , lcGhc                = comb mplus lcGhc
+            , lcOnlyOut            = comb (||) lcOnlyOut
+            , lcNoRts              = comb (||) lcNoRts
+            , lcNoStats            = comb (||) lcNoStats
+            , lcLinkJsLib          = comb (<>) lcLinkJsLib
+            , lcJsLibOutputDir     = comb (<>) lcJsLibOutputDir
+            , lcJsLibSrcs          = comb (<>) lcJsLibSrcs
+            , lcDedupe             = comb (||) lcDedupe
+            }
 
 --------------------------------------------------------------------------------
 -- Linker Environment
@@ -430,68 +344,13 @@ data LinkedObj
   | ObjLoaded String Object -- ^ already loaded: description and payload
 
 data GhcjsEnv = GhcjsEnv
-  { compiledModules   :: MVar (Map Module ByteString)  -- ^ keep track of already compiled modules so we don't compile twice for dynamic-too
-  , thRunners         :: MVar THRunnerState            -- ^ template haskell runners
-  , thSplice          :: MVar Int
-  , linkerArchiveDeps :: MVar (Map (Set FilePath)
+  { linkerArchiveDeps :: MVar (Map (Set FilePath)
                                    (Map Module (Deps, DepsLocation)
                                    , [LinkableUnit]
                                    )
                               )
-  , pluginState       :: MVar (Maybe HscEnv)
   }
 
 -- | return a fresh @GhcjsEnv@
 newGhcjsEnv :: IO GhcjsEnv
 newGhcjsEnv = GhcjsEnv <$> newMVar M.empty
-                       <*> newMVar emptyTHRunnerState
-                       <*> newMVar 0
-                       <*> newMVar M.empty
-                       <*> newMVar Nothing
-
-
---------------------------------------------------------------------------------
--- Template Haskell
---------------------------------------------------------------------------------
-
-data THRunnerState = THRunnerState
-  { activeRunners :: Map String THRunner
-  , idleRunners   :: [THRunner]
-  }
-
-data THRunner =
-  THRunner { thrProcess        :: ProcessHandle
-           , thrHandleIn       :: Handle
-           , thrHandleErr      :: Handle
-           , thrBase           :: MVar Base
-           , thrRecover        :: MVar [Messages String]
-           , thrExceptions     :: MVar (I.IntMap E.SomeException)
-           }
-
-emptyTHRunnerState :: THRunnerState
-emptyTHRunnerState = THRunnerState mempty mempty
-
-
---------------------------------------------------------------------------------
--- Template Haskell helpers
---------------------------------------------------------------------------------
-
--- | Add an idle runner to the set of @idleRunners@ in @THRunnerState@
-consIdleRunner :: THRunner -> THRunnerState -> THRunnerState
-consIdleRunner r s = s { idleRunners = r : idleRunners s }
-
--- | Remove an idle runner from the set of @idleRunners@ in @THRunnerState@
-unconsIdleRunner :: THRunnerState -> Maybe (THRunner, THRunnerState)
-unconsIdleRunner s
-  | (r:xs) <- idleRunners s = Just (r, s { idleRunners = xs })
-  | otherwise               = Nothing
-
--- | Remove an active runner from the set of @activeRunners@ in @THRunnerState@
-deleteActiveRunner :: String -> THRunnerState -> THRunnerState
-deleteActiveRunner m s =
-  s { activeRunners = M.delete m (activeRunners s) }
-
--- | Add an active runner to the set of @activeRunners@ in @THRunnerState@
-insertActiveRunner :: String -> THRunner -> THRunnerState -> THRunnerState
-insertActiveRunner m runner s =
-  s { activeRunners = M.insert m runner (activeRunners s) }


=====================================
compiler/GHC/StgToJS/Object.hs
=====================================
@@ -168,7 +168,7 @@ putObjUnit :: BinHandle -> ObjUnit -> IO ()
 putObjUnit bh (ObjUnit _syms b c d e f g) = do
     put_ bh b
     put_ bh c
-    put_ bh d
+    lazyPut bh d
     put_ bh e
     put_ bh f
     put_ bh g
@@ -179,11 +179,19 @@ getObjUnit :: [FastString] -> BinHandle -> IO ObjUnit
 getObjUnit syms bh = do
     b <- get bh
     c <- get bh
-    d <- get bh
+    d <- lazyGet bh
     e <- get bh
     f <- get bh
     g <- get bh
-    pure (ObjUnit syms b c d e f g)
+    pure $ ObjUnit
+      { oiSymbols  = syms
+      , oiClInfo   = b
+      , oiStatic   = c
+      , oiStat     = d
+      , oiRaw      = e
+      , oiFExports = f
+      , oiFImports = g
+      }
 
 
 -- | A tag that determines the kind of payload in the .o file. See


=====================================
compiler/GHC/StgToJS/Types.hs
=====================================
@@ -318,7 +318,7 @@ data ObjUnit = ObjUnit
   { oiSymbols  :: ![FastString]   -- ^ toplevel symbols (stored in index)
   , oiClInfo   :: ![ClosureInfo]  -- ^ closure information of all closures in block
   , oiStatic   :: ![StaticInfo]   -- ^ static closure data
-  , oiStat     :: !JStat          -- ^ the code
+  , oiStat     :: JStat           -- ^ the code
   , oiRaw      :: !BS.ByteString  -- ^ raw JS code
   , oiFExports :: ![ExpFun]
   , oiFImports :: ![ForeignJSRef]


=====================================
compiler/GHC/Utils/Ppr.hs
=====================================
@@ -107,7 +107,7 @@ module GHC.Utils.Ppr (
 
         -- ** GHC-specific rendering
         printDoc, printDoc_,
-        bufLeftRender -- performance hack
+        bufLeftRender, printLeftRender -- performance hack
 
   ) where
 



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/99736889a3dc19deea9196ed63cf048511ef4c4d...b9333376918223418ba9d3e6d6724778866908cb

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/99736889a3dc19deea9196ed63cf048511ef4c4d...b9333376918223418ba9d3e6d6724778866908cb
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20221007/102afdfb/attachment-0001.html>


More information about the ghc-commits mailing list