[Git][ghc/ghc][wip/romes/hardwire-ghc-unit-id] 2 commits: Hardwire a better unit-id for ghc

Rodrigo Mesquita (@alt-romes) gitlab at gitlab.haskell.org
Wed Mar 15 15:34:27 UTC 2023



Rodrigo Mesquita pushed to branch wip/romes/hardwire-ghc-unit-id at Glasgow Haskell Compiler / GHC


Commits:
49a43ba3 by romes at 2023-03-15T15:33:47+00:00
Hardwire a better unit-id for ghc

Previously, the unit-id of ghc-the-library was fixed as `ghc`.
This was done primarily because the compiler must know the unit-id of
some packages (including ghc) a-priori to define wired-in names.

However, as seen in #20742, a reinstallable `ghc` whose unit-id is fixed
to `ghc` might result in subtle bugs when different ghc's interact.

A good example of this is having GHC_A load a plugin compiled by GHC_B,
where GHC_A and GHC_B are linked to ghc-libraries that are ABI
incompatible. Without a distinction between the unit-id of the ghc library
GHC_A is linked against and the ghc library the plugin it is loading was
compiled against, we can't check compatibility.

This patch gives a slightly better unit-id to ghc (ghc-version) by
(1) Not setting -this-unit-id to ghc, but rather to the new unit-id (modulo stage0)
(2) Adding a definition to `GHC.Settings.Config` whose value is the new unit-id.
    (2.1) `GHC.Settings.Config` is generated by Hadrian
    (2.2) and also by cabal through `compiler/Setup.hs`
This unit-id definition is imported by `GHC.Unit.Types` and used to
set the wired-in unit-id of "ghc", which was previously fixed to "ghc"

The commits following this one will improve the unit-id with a
cabal-style package hash and check compatibility when loading plugins.

Note that we also ensure that ghc's unit key matches unit id both when
hadrian or cabal builds ghc, and in this way we no longer need to add
`ghc` to the WiringMap.

- - - - -
74411c8b by romes at 2023-03-15T15:34:16+00:00
Validate compatibility of ghcs when loading plugins

- - - - -


10 changed files:

- compiler/GHC/Driver/Session.hs
- compiler/GHC/Runtime/Loader.hs
- compiler/GHC/Unit/Types.hs
- compiler/Setup.hs
- compiler/ghc.cabal.in
- hadrian/src/Rules/Generate.hs
- hadrian/src/Settings/Builders/Ghc.hs
- hadrian/src/Settings/Packages.hs
- testsuite/tests/driver/j-space/jspace.hs
- utils/count-deps/Main.hs


Changes:

=====================================
compiler/GHC/Driver/Session.hs
=====================================
@@ -4703,6 +4703,7 @@ compilerInfo dflags
        ("Project Patch Level",         cProjectPatchLevel),
        ("Project Patch Level1",        cProjectPatchLevel1),
        ("Project Patch Level2",        cProjectPatchLevel2),
+       ("Project Unit Id",             cProjectUnitId),
        ("Booter version",              cBooterVersion),
        ("Stage",                       cStage),
        ("Build platform",              cBuildPlatformString),


=====================================
compiler/GHC/Runtime/Loader.hs
=====================================
@@ -42,10 +42,10 @@ import GHC.Driver.Env
 import GHCi.RemoteTypes     ( HValue )
 import GHC.Core.Type        ( Type, mkTyConTy )
 import GHC.Core.TyCo.Compare( eqType )
-import GHC.Core.TyCon       ( TyCon )
+import GHC.Core.TyCon       ( TyCon(tyConName) )
 
 import GHC.Types.SrcLoc        ( noSrcSpan )
-import GHC.Types.Name    ( Name, nameModule_maybe )
+import GHC.Types.Name    ( Name, nameModule, nameModule_maybe )
 import GHC.Types.Id      ( idType )
 import GHC.Types.TyThing
 import GHC.Types.Name.Occurrence ( OccName, mkVarOccFS )
@@ -55,7 +55,7 @@ import GHC.Types.Name.Reader   ( RdrName, ImportSpec(..), ImpDeclSpec(..)
 
 import GHC.Unit.Finder         ( findPluginModule, FindResult(..) )
 import GHC.Driver.Config.Finder ( initFinderOpts )
-import GHC.Unit.Module   ( Module, ModuleName )
+import GHC.Unit.Module   ( Module, ModuleName, thisGhcUnit, GenModule(moduleUnit) )
 import GHC.Unit.Module.ModIface
 import GHC.Unit.Env
 
@@ -171,7 +171,14 @@ loadPlugin' occ_name plugin_name hsc_env mod_name
             Just (name, mod_iface) ->
 
      do { plugin_tycon <- forceLoadTyCon hsc_env plugin_name
-        ; eith_plugin <- getValueSafely hsc_env name (mkTyConTy plugin_tycon)
+        ; case thisGhcUnit == (moduleUnit . nameModule . tyConName) plugin_tycon of {
+            False ->
+                throwGhcExceptionIO (CmdLineError $ showSDoc dflags $ hsep
+                          [ text "The plugin module", ppr mod_name
+                          , text "was built with a compiler that is incompatible with the one loading it"
+                          ]) ;
+            True ->
+     do { eith_plugin <- getValueSafely hsc_env name (mkTyConTy plugin_tycon)
         ; case eith_plugin of
             Left actual_type ->
                 throwGhcExceptionIO (CmdLineError $
@@ -182,7 +189,7 @@ loadPlugin' occ_name plugin_name hsc_env mod_name
                           , text "did not have the type"
                           , text "GHC.Plugins.Plugin"
                           , text "as required"])
-            Right (plugin, links, pkgs) -> return (plugin, mod_iface, links, pkgs) } } }
+            Right (plugin, links, pkgs) -> return (plugin, mod_iface, links, pkgs) } } } } }
 
 
 -- | Force the interfaces for the given modules to be loaded. The 'SDoc' parameter is used


=====================================
compiler/GHC/Unit/Types.hs
=====================================
@@ -99,6 +99,7 @@ import GHC.Data.FastString
 import GHC.Utils.Encoding
 import GHC.Utils.Fingerprint
 import GHC.Utils.Misc
+import GHC.Settings.Config (cProjectUnitId)
 
 import Control.DeepSeq
 import Data.Data
@@ -597,7 +598,7 @@ primUnitId        = UnitId (fsLit "ghc-prim")
 bignumUnitId      = UnitId (fsLit "ghc-bignum")
 baseUnitId        = UnitId (fsLit "base")
 rtsUnitId         = UnitId (fsLit "rts")
-thisGhcUnitId     = UnitId (fsLit "ghc")
+thisGhcUnitId     = UnitId (fsLit cProjectUnitId)
 interactiveUnitId = UnitId (fsLit "interactive")
 thUnitId          = UnitId (fsLit "template-haskell")
 
@@ -625,8 +626,15 @@ wiredInUnitIds =
    , baseUnitId
    , rtsUnitId
    , thUnitId
-   , thisGhcUnitId
    ]
+   -- NB: ghc is no longer part of the wired-in units since its unit-id, given
+   -- by hadrian or cabal, is no longer overwritten and now matches both the
+   -- cProjectUnitId defined in build-time-generated module GHC.Version, and
+   -- the unit key.
+   --
+   -- See also Note [About units], taking into consideration ghc is still a
+   -- wired-in unit but whose unit-id no longer needs special handling because
+   -- we take care that it matches the unit key.
 
 ---------------------------------------------------------------------
 -- Boot Modules


=====================================
compiler/Setup.hs
=====================================
@@ -3,7 +3,10 @@ module Main where
 
 import Distribution.Simple
 import Distribution.Simple.BuildPaths
+import Distribution.Types.ComponentLocalBuildInfo
+import Distribution.Types.ComponentName (ComponentName(CLibName))
 import Distribution.Types.LocalBuildInfo
+import Distribution.Types.LibraryName (LibraryName(LMainLibName))
 import Distribution.Verbosity
 import Distribution.Simple.Program
 import Distribution.Simple.Utils
@@ -15,6 +18,7 @@ import System.Directory
 import System.FilePath
 import Control.Monad
 import Data.Char
+import qualified Data.Map as Map
 import GHC.ResponseFile
 import System.Environment
 
@@ -85,9 +89,13 @@ ghcAutogen verbosity lbi at LocalBuildInfo{..} = do
     callProcess "deriveConstants" ["--gen-haskell-type","-o",tmp,"--target-os",targetOS]
     renameFile tmp platformConstantsPath
 
+  let cProjectUnitId = case Map.lookup (CLibName LMainLibName) componentNameMap of
+                         Just [LibComponentLocalBuildInfo{componentUnitId}] -> unUnitId componentUnitId
+                         _ -> error "Couldn't find unique cabal library when building ghc"
+
   -- Write GHC.Settings.Config
-  let configHsPath = autogenPackageModulesDir lbi </> "GHC/Settings/Config.hs"
-      configHs = generateConfigHs settings
+      configHsPath = autogenPackageModulesDir lbi </> "GHC/Settings/Config.hs"
+      configHs = generateConfigHs cProjectUnitId settings
   createDirectoryIfMissingVerbose verbosity True (takeDirectory configHsPath)
   rewriteFileEx verbosity configHsPath configHs
 
@@ -98,8 +106,9 @@ getSetting settings kh kr = go settings kr
       Nothing -> Left (show k ++ " not found in settings: " ++ show settings)
       Just v -> Right v
 
-generateConfigHs :: [(String,String)] -> String
-generateConfigHs settings = either error id $ do
+generateConfigHs :: String -- ^ ghc's cabal-generated unit-id, which matches its package-id/key
+                 -> [(String,String)] -> String
+generateConfigHs cProjectUnitId settings = either error id $ do
     let getSetting' = getSetting $ (("cStage","2"):) settings
     buildPlatform  <- getSetting' "cBuildPlatformString" "Host platform"
     hostPlatform   <- getSetting' "cHostPlatformString" "Target platform"
@@ -114,6 +123,7 @@ generateConfigHs settings = either error id $ do
         , "  , cProjectName"
         , "  , cBooterVersion"
         , "  , cStage"
+        , "  , cProjectUnitId"
         , "  ) where"
         , ""
         , "import GHC.Prelude.Basic"
@@ -134,4 +144,7 @@ generateConfigHs settings = either error id $ do
         , ""
         , "cStage                :: String"
         , "cStage                = show ("++ cStage ++ " :: Int)"
+        , ""
+        , "cProjectUnitId :: String"
+        , "cProjectUnitId = " ++ show cProjectUnitId
         ]


=====================================
compiler/ghc.cabal.in
=====================================
@@ -39,7 +39,7 @@ extra-source-files:
 
 
 custom-setup
-    setup-depends: base >= 3 && < 5, Cabal >= 1.6 && <3.10, directory, process, filepath
+    setup-depends: base >= 3 && < 5, Cabal >= 1.6 && <3.10, directory, process, filepath, containers
 
 Flag internal-interpreter
     Description: Build with internal interpreter support.
@@ -57,6 +57,12 @@ Flag build-tool-depends
     Description: Use build-tool-depends
     Default: True
 
+-- While the boot compiler fixes ghc's unit-id to `ghc`, the stage0 compiler must still be compiled with `-this-unit-id ghc`
+Flag hadrian-stage0
+    Description: Enable if compiling the stage0 compiler with hadrian
+    Default: False
+    Manual: True
+
 Library
     Default-Language: Haskell2010
     Exposed: False
@@ -136,9 +142,10 @@ Library
 
     Include-Dirs: .
 
-    -- We need to set the unit id to ghc (without a version number)
-    -- as it's magic.
-    GHC-Options: -this-unit-id ghc
+    if flag(hadrian-stage0)
+        -- We need to set the unit id to ghc (without a version number)
+        -- as it's magic.
+        GHC-Options: -this-unit-id ghc
 
     c-sources:
         cbits/cutils.c


=====================================
hadrian/src/Rules/Generate.hs
=====================================
@@ -486,6 +486,15 @@ generateConfigHs = do
     trackGenerateHs
     cProjectName        <- getSetting ProjectName
     cBooterVersion      <- getSetting GhcVersion
+    cProjectVersionMunged  <- getSetting ProjectVersionMunged
+    -- ROMES:TODO:HASH First we attempt a fixed unit-id with version but without hash.
+    --
+    -- We now use a more informative unit-id for ghc. See Note [TODO:GHC-UNITID]
+    --
+    -- It's crucial that the unit-id matches the unit-key -- ghc is no longer
+    -- part of the WiringMap, so we don't to go back and forth between the
+    -- unit-id and the unit-key -- we take care here that they are the same.
+    let cProjectUnitId = "ghc-" ++ cProjectVersionMunged -- ROMES:TODO:HASH
     return $ unlines
         [ "module GHC.Settings.Config"
         , "  ( module GHC.Version"
@@ -494,6 +503,7 @@ generateConfigHs = do
         , "  , cProjectName"
         , "  , cBooterVersion"
         , "  , cStage"
+        , "  , cProjectUnitId"
         , "  ) where"
         , ""
         , "import GHC.Prelude.Basic"
@@ -514,6 +524,9 @@ generateConfigHs = do
         , ""
         , "cStage                :: String"
         , "cStage                = show (" ++ stageString stage ++ " :: Int)"
+        , ""
+        , "cProjectUnitId :: String"
+        , "cProjectUnitId = " ++ show cProjectUnitId
         ]
   where
     stageString (Stage0 InTreeLibs) = "1"
@@ -533,6 +546,7 @@ generateVersionHs = do
     cProjectPatchLevel  <- getSetting ProjectPatchLevel
     cProjectPatchLevel1 <- getSetting ProjectPatchLevel1
     cProjectPatchLevel2 <- getSetting ProjectPatchLevel2
+
     return $ unlines
         [ "module GHC.Version where"
         , ""


=====================================
hadrian/src/Settings/Builders/Ghc.hs
=====================================
@@ -247,6 +247,16 @@ packageGhcArgs :: Args
 packageGhcArgs = do
     package <- getPackage
     ghc_ver <- readVersion <$> (expr . ghcVersionStage =<< getStage)
+    -- ROMES: Until the boot compiler no longer needs ghc's
+    -- unit-id to be "ghc", the stage0 compiler must be built
+    -- with `-this-unit-id ghc`, while the wired-in unit-id of
+    -- ghc is correctly set to the unit-id we'll generate for
+    -- stage1 (set in generateVersionHs in Rules.Generate).
+    --
+    -- However, we don't need to set the unit-id of "ghc" to "ghc" when
+    -- building stage0 because we have a flag in compiler/ghc.cabal.in that is
+    -- sets `-this-unit-id ghc` when hadrian is building stage0, which will
+    -- overwrite this one.
     pkgId   <- expr $ pkgIdentifier package
     mconcat [ arg "-hide-all-packages"
             , arg "-no-user-package-db"


=====================================
hadrian/src/Settings/Packages.hs
=====================================
@@ -77,6 +77,11 @@ packageArgs = do
             [ andM [expr ghcWithInterpreter, notStage0] `cabalFlag` "internal-interpreter"
             , notM cross `cabalFlag` "terminfo"
             , arg "-build-tool-depends"
+            -- ROMES: While the boot compiler is not updated wrt -this-unit-id
+            -- not being fixed to `ghc`, when building stage0, we must set
+            -- -this-unit-id to `ghc` because the boot compiler expects that.
+            -- We do it through a cabal flag in ghc.cabal
+            , stage0 ? arg "+hadrian-stage0"
             ]
 
           , builder (Haddock BuildPackage) ? arg ("--optghc=-I" ++ path) ]


=====================================
testsuite/tests/driver/j-space/jspace.hs
=====================================
@@ -2,6 +2,7 @@ module Main where
 
 import GHC
 import GHC.Driver.Monad
+import GHC.Driver.Session
 import System.Environment
 import GHC.Driver.Env.Types
 import GHC.Profiling
@@ -25,6 +26,9 @@ initGhcM xs = do
     let cmdOpts = ["-fforce-recomp"] ++ xs
     (df2, leftovers, _) <- parseDynamicFlags (hsc_logger session) df1 (map noLoc cmdOpts)
     setSessionDynFlags df2
+    ghcUnitId <- case lookup "Project Unit Id" (compilerInfo df2) of
+                    Nothing -> fail "failed to find ghc's unit-id in the compiler info"
+                    Just ghcUnitId -> pure ghcUnitId
     ts <- mapM (\s -> guessTarget s Nothing Nothing) $ map unLoc leftovers
     setTargets ts
     _ <- load LoadAllTargets
@@ -36,7 +40,7 @@ initGhcM xs = do
     liftIO $ do
       requestHeapCensus
       performGC
-      [ys] <- filter (isPrefixOf "ghc:GHC.Unit.Module.ModDetails.ModDetails") . lines <$> readFile "jspace.hp"
+      [ys] <- filter (isPrefixOf (ghcUnitId <> ":GHC.Unit.Module.ModDetails.ModDetails")) . lines <$> readFile "jspace.hp"
       let (n :: Int) = read (last (words ys))
       -- The output should be 50 * 8 * word_size (i.e. 3200, or 1600 on 32-bit architectures):
       -- the test contains DEPTH + WIDTH + 2 = 50 modules J, H_0, .., H_DEPTH, W_1, .., W_WIDTH,


=====================================
utils/count-deps/Main.hs
=====================================
@@ -56,25 +56,28 @@ calcDeps modName libdir =
         logger <- getLogger
         (df, _, _) <- parseDynamicFlags logger df [noLoc "-package=ghc"]
         setSessionDynFlags df
-        env <- getSession
-        loop env Map.empty [mkModuleName modName]
+        case lookup "Project Unit Id" (compilerInfo df) of
+          Nothing -> fail "failed to find ghc's unit-id in the compiler info"
+          Just ghcUnitId -> do
+            env <- getSession
+            loop ghcUnitId env Map.empty [mkModuleName modName]
   where
     -- Source imports are only guaranteed to show up in the 'mi_deps'
     -- of modules that import them directly and don’t propagate
     -- transitively so we loop.
-    loop :: HscEnv -> Map.Map ModuleName [ModuleName] -> [ModuleName] -> Ghc (Map.Map ModuleName [ModuleName])
-    loop env modules (m : ms) =
+    loop :: String -> HscEnv -> Map.Map ModuleName [ModuleName] -> [ModuleName] -> Ghc (Map.Map ModuleName [ModuleName])
+    loop ghcUnitId env modules (m : ms) =
       if m `Map.member` modules
-        then loop env modules ms
+        then loop ghcUnitId env modules ms
         else do
-          mi <- liftIO $ hscGetModuleInterface env (mkModule m)
+          mi <- liftIO $ hscGetModuleInterface env (mkModule ghcUnitId m)
           let deps = modDeps mi
           modules <- return $ Map.insert m [] modules
-          loop env (Map.insert m deps modules) $ ms ++ filter (not . (`Map.member` modules)) deps
-    loop _ modules [] = return modules
+          loop ghcUnitId env (Map.insert m deps modules) $ ms ++ filter (not . (`Map.member` modules)) deps
+    loop _ _ modules [] = return modules
 
-    mkModule :: ModuleName -> Module
-    mkModule = Module (stringToUnit "ghc")
+    mkModule :: String -> ModuleName -> Module
+    mkModule ghcUnitId = Module (stringToUnit ghcUnitId)
 
     modDeps :: ModIface -> [ModuleName]
     modDeps mi = map (gwib_mod . snd) $ Set.toList $ dep_direct_mods (mi_deps mi)



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/0aa3abb8a6ab4315c9d4ea301bf09c9915f59778...74411c8bbaf14398b965259e6ea8b8838d92b579

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/0aa3abb8a6ab4315c9d4ea301bf09c9915f59778...74411c8bbaf14398b965259e6ea8b8838d92b579
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20230315/229c9878/attachment-0001.html>


More information about the ghc-commits mailing list