[Git][ghc/ghc][wip/romes/hardwire-ghc-unit-id] 2 commits: Hardwire a better unit-id for ghc

Rodrigo Mesquita (@alt-romes) gitlab at gitlab.haskell.org
Tue Mar 14 16:13:23 UTC 2023



Rodrigo Mesquita pushed to branch wip/romes/hardwire-ghc-unit-id at Glasgow Haskell Compiler / GHC


Commits:
45ea44ba by romes at 2023-03-14T16:13:03+00:00
Hardwire a better unit-id for ghc

Previously, the unit-id of ghc-the-library was fixed as `ghc`.
This was done primarily because the compiler must know the unit-id of
some packages (including ghc) a-priori to define wired-in names.

However, as seen in #20742, a reinstallable `ghc` whose unit-id is fixed
to `ghc` might result in subtle bugs when different ghc's interact.

A good example of this is having GHC_A load a plugin compiled by GHC_B,
where GHC_A and GHC_B are linked to ghc-libraries that are ABI
incompatible. Without a distinction between the unit-id of the ghc library
GHC_A is linked against and the ghc library the plugin it is loading was
compiled against, we can't check compatibility.

This patch gives a slightly better unit-id to ghc (ghc-version) by
(1) Not setting -this-unit-id to ghc, but rather to the new unit-id (modulo stage0)
(2) Adding a definition to `GHC.Version` whose value is the new unit-id.
This unit-id definition is imported by `GHC.Unit.Types` and used to
set the wired-in unit-id of "ghc", which was previously fixed to "ghc"

The commits following this one will improve the unit-id with a
cabal-style package hash, ensure cabal-built ghcs also correctly use
a better unit-id, and check compatibility when loading plugins.

Note that we also ensure that ghc's unit key matches unit id, and no
longer add ghc to the WiringMap

- - - - -
0aa3abb8 by romes at 2023-03-14T16:13:12+00:00
Validate compatibility of ghcs when loading plugins

- - - - -


9 changed files:

- compiler/GHC/Driver/Session.hs
- compiler/GHC/Runtime/Loader.hs
- compiler/GHC/Unit/Types.hs
- compiler/ghc.cabal.in
- hadrian/src/Rules/Generate.hs
- hadrian/src/Settings/Builders/Ghc.hs
- hadrian/src/Settings/Packages.hs
- testsuite/tests/driver/j-space/jspace.hs
- utils/count-deps/Main.hs


Changes:

=====================================
compiler/GHC/Driver/Session.hs
=====================================
@@ -4703,6 +4703,7 @@ compilerInfo dflags
        ("Project Patch Level",         cProjectPatchLevel),
        ("Project Patch Level1",        cProjectPatchLevel1),
        ("Project Patch Level2",        cProjectPatchLevel2),
+       ("Project Unit Id",             cProjectUnitId),
        ("Booter version",              cBooterVersion),
        ("Stage",                       cStage),
        ("Build platform",              cBuildPlatformString),


=====================================
compiler/GHC/Runtime/Loader.hs
=====================================
@@ -42,10 +42,10 @@ import GHC.Driver.Env
 import GHCi.RemoteTypes     ( HValue )
 import GHC.Core.Type        ( Type, mkTyConTy )
 import GHC.Core.TyCo.Compare( eqType )
-import GHC.Core.TyCon       ( TyCon )
+import GHC.Core.TyCon       ( TyCon(tyConName) )
 
 import GHC.Types.SrcLoc        ( noSrcSpan )
-import GHC.Types.Name    ( Name, nameModule_maybe )
+import GHC.Types.Name    ( Name, nameModule, nameModule_maybe )
 import GHC.Types.Id      ( idType )
 import GHC.Types.TyThing
 import GHC.Types.Name.Occurrence ( OccName, mkVarOccFS )
@@ -55,7 +55,7 @@ import GHC.Types.Name.Reader   ( RdrName, ImportSpec(..), ImpDeclSpec(..)
 
 import GHC.Unit.Finder         ( findPluginModule, FindResult(..) )
 import GHC.Driver.Config.Finder ( initFinderOpts )
-import GHC.Unit.Module   ( Module, ModuleName )
+import GHC.Unit.Module   ( Module, ModuleName, thisGhcUnit, GenModule(moduleUnit) )
 import GHC.Unit.Module.ModIface
 import GHC.Unit.Env
 
@@ -171,7 +171,14 @@ loadPlugin' occ_name plugin_name hsc_env mod_name
             Just (name, mod_iface) ->
 
      do { plugin_tycon <- forceLoadTyCon hsc_env plugin_name
-        ; eith_plugin <- getValueSafely hsc_env name (mkTyConTy plugin_tycon)
+        ; case thisGhcUnit == (moduleUnit . nameModule . tyConName) plugin_tycon of {
+            False ->
+                throwGhcExceptionIO (CmdLineError $ showSDoc dflags $ hsep
+                          [ text "The plugin module", ppr mod_name
+                          , text "was built with a compiler that is incompatible with the one loading it"
+                          ]) ;
+            True ->
+     do { eith_plugin <- getValueSafely hsc_env name (mkTyConTy plugin_tycon)
         ; case eith_plugin of
             Left actual_type ->
                 throwGhcExceptionIO (CmdLineError $
@@ -182,7 +189,7 @@ loadPlugin' occ_name plugin_name hsc_env mod_name
                           , text "did not have the type"
                           , text "GHC.Plugins.Plugin"
                           , text "as required"])
-            Right (plugin, links, pkgs) -> return (plugin, mod_iface, links, pkgs) } } }
+            Right (plugin, links, pkgs) -> return (plugin, mod_iface, links, pkgs) } } } } }
 
 
 -- | Force the interfaces for the given modules to be loaded. The 'SDoc' parameter is used


=====================================
compiler/GHC/Unit/Types.hs
=====================================
@@ -99,6 +99,7 @@ import GHC.Data.FastString
 import GHC.Utils.Encoding
 import GHC.Utils.Fingerprint
 import GHC.Utils.Misc
+import GHC.Version (cProjectUnitId)
 
 import Control.DeepSeq
 import Data.Data
@@ -597,7 +598,7 @@ primUnitId        = UnitId (fsLit "ghc-prim")
 bignumUnitId      = UnitId (fsLit "ghc-bignum")
 baseUnitId        = UnitId (fsLit "base")
 rtsUnitId         = UnitId (fsLit "rts")
-thisGhcUnitId     = UnitId (fsLit "ghc")
+thisGhcUnitId     = UnitId (fsLit cProjectUnitId)
 interactiveUnitId = UnitId (fsLit "interactive")
 thUnitId          = UnitId (fsLit "template-haskell")
 
@@ -625,8 +626,15 @@ wiredInUnitIds =
    , baseUnitId
    , rtsUnitId
    , thUnitId
-   , thisGhcUnitId
    ]
+   -- NB: ghc is no longer part of the wired-in units since its unit-id, given
+   -- by hadrian or cabal, is no longer overwritten and now matches both the
+   -- cProjectUnitId defined in build-time-generated module GHC.Version, and
+   -- the unit key.
+   --
+   -- See also Note [About units], taking into consideration ghc is still a
+   -- wired-in unit but whose unit-id no longer needs special handling because
+   -- we take care that it matches the unit key.
 
 ---------------------------------------------------------------------
 -- Boot Modules


=====================================
compiler/ghc.cabal.in
=====================================
@@ -57,6 +57,12 @@ Flag build-tool-depends
     Description: Use build-tool-depends
     Default: True
 
+-- While the boot compiler fixes ghc's unit-id to `ghc`, the stage0 compiler must still be compiled with `-this-unit-id ghc`
+Flag hadrian-stage0
+    Description: Enable if compiling the stage0 compiler with hadrian
+    Default: False
+    Manual: True
+
 Library
     Default-Language: Haskell2010
     Exposed: False
@@ -136,9 +142,10 @@ Library
 
     Include-Dirs: .
 
-    -- We need to set the unit id to ghc (without a version number)
-    -- as it's magic.
-    GHC-Options: -this-unit-id ghc
+    if flag(hadrian-stage0)
+        -- We need to set the unit id to ghc (without a version number)
+        -- as it's magic.
+        GHC-Options: -this-unit-id ghc
 
     c-sources:
         cbits/cutils.c


=====================================
hadrian/src/Rules/Generate.hs
=====================================
@@ -533,6 +533,19 @@ generateVersionHs = do
     cProjectPatchLevel  <- getSetting ProjectPatchLevel
     cProjectPatchLevel1 <- getSetting ProjectPatchLevel1
     cProjectPatchLevel2 <- getSetting ProjectPatchLevel2
+    cProjectVersionMunged  <- getSetting ProjectVersionMunged
+    -- ROMES:TODO: First we attempt a fixed unit-id with version but without hash.
+    -- We now use a more informative unit-id for ghc. This same logic must be
+    -- done when passing -this-unit-id when building ghc (at stage0 one must
+    -- pass -this-unit-id ghc).
+    --
+    -- It's crucial that the unit-id matches the unit-key -- ghc is no longer
+    -- part of the WiringMap, so we don't to go back and forth between the
+    -- unit-id and the unit-key, because we take care here that they are the same.
+    --
+    -- One worry: How to guarantee this is the same when we install ghc with cabal
+    let cProjectUnitId = "ghc-" ++ cProjectVersionMunged
+
     return $ unlines
         [ "module GHC.Version where"
         , ""
@@ -555,6 +568,9 @@ generateVersionHs = do
         , ""
         , "cProjectPatchLevel2   :: String"
         , "cProjectPatchLevel2   = " ++ show cProjectPatchLevel2
+        , ""
+        , "cProjectUnitId :: String"
+        , "cProjectUnitId = " ++ show cProjectUnitId
         ]
 
 -- | Generate @Platform/Host.hs@ files.


=====================================
hadrian/src/Settings/Builders/Ghc.hs
=====================================
@@ -247,6 +247,16 @@ packageGhcArgs :: Args
 packageGhcArgs = do
     package <- getPackage
     ghc_ver <- readVersion <$> (expr . ghcVersionStage =<< getStage)
+    -- ROMES: Until the boot compiler no longer needs ghc's
+    -- unit-id to be "ghc", the stage0 compiler must be built
+    -- with `-this-unit-id ghc`, while the wired-in unit-id of
+    -- ghc is correctly set to the unit-id we'll generate for
+    -- stage1 (set in generateVersionHs in Rules.Generate).
+    --
+    -- However, we don't need to set the unit-id of "ghc" to "ghc" when
+    -- building stage0 because we have a flag in compiler/ghc.cabal.in that is
+    -- sets `-this-unit-id ghc` when hadrian is building stage0, which will
+    -- overwrite this one.
     pkgId   <- expr $ pkgIdentifier package
     mconcat [ arg "-hide-all-packages"
             , arg "-no-user-package-db"


=====================================
hadrian/src/Settings/Packages.hs
=====================================
@@ -77,6 +77,11 @@ packageArgs = do
             [ andM [expr ghcWithInterpreter, notStage0] `cabalFlag` "internal-interpreter"
             , notM cross `cabalFlag` "terminfo"
             , arg "-build-tool-depends"
+            -- ROMES: While the boot compiler is not updated wrt -this-unit-id
+            -- not being fixed to `ghc`, when building stage0, we must set
+            -- -this-unit-id to `ghc` because the boot compiler expects that.
+            -- We do it through a cabal flag in ghc.cabal
+            , stage0 ? arg "+hadrian-stage0"
             ]
 
           , builder (Haddock BuildPackage) ? arg ("--optghc=-I" ++ path) ]


=====================================
testsuite/tests/driver/j-space/jspace.hs
=====================================
@@ -2,6 +2,7 @@ module Main where
 
 import GHC
 import GHC.Driver.Monad
+import GHC.Driver.Session
 import System.Environment
 import GHC.Driver.Env.Types
 import GHC.Profiling
@@ -25,6 +26,9 @@ initGhcM xs = do
     let cmdOpts = ["-fforce-recomp"] ++ xs
     (df2, leftovers, _) <- parseDynamicFlags (hsc_logger session) df1 (map noLoc cmdOpts)
     setSessionDynFlags df2
+    ghcUnitId <- case lookup "Project Unit Id" (compilerInfo df2) of
+                    Nothing -> fail "failed to find ghc's unit-id in the compiler info"
+                    Just ghcUnitId -> pure ghcUnitId
     ts <- mapM (\s -> guessTarget s Nothing Nothing) $ map unLoc leftovers
     setTargets ts
     _ <- load LoadAllTargets
@@ -36,7 +40,7 @@ initGhcM xs = do
     liftIO $ do
       requestHeapCensus
       performGC
-      [ys] <- filter (isPrefixOf "ghc:GHC.Unit.Module.ModDetails.ModDetails") . lines <$> readFile "jspace.hp"
+      [ys] <- filter (isPrefixOf (ghcUnitId <> ":GHC.Unit.Module.ModDetails.ModDetails")) . lines <$> readFile "jspace.hp"
       let (n :: Int) = read (last (words ys))
       -- The output should be 50 * 8 * word_size (i.e. 3200, or 1600 on 32-bit architectures):
       -- the test contains DEPTH + WIDTH + 2 = 50 modules J, H_0, .., H_DEPTH, W_1, .., W_WIDTH,


=====================================
utils/count-deps/Main.hs
=====================================
@@ -56,25 +56,28 @@ calcDeps modName libdir =
         logger <- getLogger
         (df, _, _) <- parseDynamicFlags logger df [noLoc "-package=ghc"]
         setSessionDynFlags df
-        env <- getSession
-        loop env Map.empty [mkModuleName modName]
+        case lookup "Project Unit Id" (compilerInfo df) of
+          Nothing -> fail "failed to find ghc's unit-id in the compiler info"
+          Just ghcUnitId -> do
+            env <- getSession
+            loop ghcUnitId env Map.empty [mkModuleName modName]
   where
     -- Source imports are only guaranteed to show up in the 'mi_deps'
     -- of modules that import them directly and don’t propagate
     -- transitively so we loop.
-    loop :: HscEnv -> Map.Map ModuleName [ModuleName] -> [ModuleName] -> Ghc (Map.Map ModuleName [ModuleName])
-    loop env modules (m : ms) =
+    loop :: String -> HscEnv -> Map.Map ModuleName [ModuleName] -> [ModuleName] -> Ghc (Map.Map ModuleName [ModuleName])
+    loop ghcUnitId env modules (m : ms) =
       if m `Map.member` modules
-        then loop env modules ms
+        then loop ghcUnitId env modules ms
         else do
-          mi <- liftIO $ hscGetModuleInterface env (mkModule m)
+          mi <- liftIO $ hscGetModuleInterface env (mkModule ghcUnitId m)
           let deps = modDeps mi
           modules <- return $ Map.insert m [] modules
-          loop env (Map.insert m deps modules) $ ms ++ filter (not . (`Map.member` modules)) deps
-    loop _ modules [] = return modules
+          loop ghcUnitId env (Map.insert m deps modules) $ ms ++ filter (not . (`Map.member` modules)) deps
+    loop _ _ modules [] = return modules
 
-    mkModule :: ModuleName -> Module
-    mkModule = Module (stringToUnit "ghc")
+    mkModule :: String -> ModuleName -> Module
+    mkModule ghcUnitId = Module (stringToUnit ghcUnitId)
 
     modDeps :: ModIface -> [ModuleName]
     modDeps mi = map (gwib_mod . snd) $ Set.toList $ dep_direct_mods (mi_deps mi)



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/f5444fdc215485cefd6f928746fce545c68beea7...0aa3abb8a6ab4315c9d4ea301bf09c9915f59778

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/f5444fdc215485cefd6f928746fce545c68beea7...0aa3abb8a6ab4315c9d4ea301bf09c9915f59778
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20230314/d4c4fe37/attachment-0001.html>


More information about the ghc-commits mailing list