[Git][ghc/ghc][wip/romes/hardwire-ghc-unit-id] WIP: Better Hash

Rodrigo Mesquita (@alt-romes) gitlab at gitlab.haskell.org
Thu Mar 16 11:35:25 UTC 2023



Rodrigo Mesquita pushed to branch wip/romes/hardwire-ghc-unit-id at Glasgow Haskell Compiler / GHC


Commits:
848c2265 by romes at 2023-03-16T11:35:17+00:00
WIP: Better Hash

- - - - -


5 changed files:

- hadrian/hadrian.cabal
- hadrian/src/Hadrian/Package.hs
- + hadrian/src/Hadrian/Package/Hash.hs
- hadrian/src/Rules/Generate.hs
- hadrian/src/Settings/Builders/Ghc.hs


Changes:

=====================================
hadrian/hadrian.cabal
=====================================
@@ -163,6 +163,8 @@ executable hadrian
                        , transformers         >= 0.4     && < 0.7
                        , unordered-containers >= 0.2.1   && < 0.3
                        , text                 >= 1.2     && < 3
+                       , cryptohash-sha256    >= 0.11    && < 0.12
+                       , base16-bytestring    >= 0.1.1 && < 1.1.0.0
     ghc-options:       -Wall
                        -Wincomplete-record-updates
                        -Wredundant-constraints


=====================================
hadrian/src/Hadrian/Package.hs
=====================================
@@ -81,4 +81,4 @@ instance NFData   PackageType
 
 instance Binary   Package
 instance Hashable Package
-instance NFData   Package
\ No newline at end of file
+instance NFData   Package


=====================================
hadrian/src/Hadrian/Package/Hash.hs
=====================================
@@ -0,0 +1,138 @@
+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE NamedFieldPuns #-}
+module Hadrian.Package.Hash where
+
+import Hadrian.Haskell.Cabal.Type
+import Hadrian.Oracles.Cabal
+import Hadrian.Package
+
+import qualified Crypto.Hash.SHA256     as SHA256
+import qualified Data.ByteString.Base16 as Base16
+import qualified Data.ByteString.Char8  as BS
+
+-- | Compute the unit-id of a package
+pkgUnitId :: Package -> String
+pkgUnitId pkg = do
+  pid   <- pkgIdentifier pkg
+  phash <- pkgHash pkg
+  pure $ pkgId <> "-" <> hash
+
+
+data PackageHashInputs = PackageHashInputs {
+       pkgHashPkgId         :: String, -- ^ name-version
+       pkgHashComponent     :: PackageType,
+       pkgHashSourceHash    :: BS.ByteString,
+       -- pkgHashPkgConfigDeps :: Set (PkgconfigName, Maybe PkgconfigVersion),
+       pkgHashDirectDeps    :: [PackageName], -- Set InstalledPackageId, -- pkgDependencies are names only, not their installed unit-ids
+       pkgHashOtherConfig   :: PackageHashConfigInputs
+     }
+
+-- | Those parts of the package configuration that contribute to the
+-- package hash computed by hadrian (which is simpler than cabal's).
+--
+-- setting in Oracle.setting, which come from system.config
+data PackageHashConfigInputs = PackageHashConfigInputs {
+       pkgHashCompilerId          :: String,
+       pkgHashPlatform            :: String,
+       -- pkgHashFlagAssignment      :: FlagAssignment, -- complete not partial
+       -- pkgHashConfigureScriptArgs :: [String], -- just ./configure for build-type Configure
+       pkgHashVanillaLib          :: Bool,
+       pkgHashSharedLib           :: Bool,
+       pkgHashDynExe              :: Bool,
+       pkgHashFullyStaticExe      :: Bool,
+       pkgHashGHCiLib             :: Bool,
+       pkgHashProfLib             :: Bool,
+       pkgHashProfExe             :: Bool,
+       pkgHashProfLibDetail       :: ProfDetailLevel,
+       pkgHashProfExeDetail       :: ProfDetailLevel,
+       pkgHashCoverage            :: Bool,
+       pkgHashOptimization        :: OptimisationLevel,
+       pkgHashSplitObjs           :: Bool,
+       pkgHashSplitSections       :: Bool,
+       pkgHashStripLibs           :: Bool,
+       pkgHashStripExes           :: Bool,
+       pkgHashDebugInfo           :: DebugInfoLevel,
+       pkgHashProgramArgs         :: Map String [String],
+       pkgHashExtraLibDirs        :: [FilePath],
+       pkgHashExtraLibDirsStatic  :: [FilePath],
+       pkgHashExtraFrameworkDirs  :: [FilePath],
+       pkgHashExtraIncludeDirs    :: [FilePath],
+       -- pkgHashProgPrefix          :: Maybe PathTemplate,
+       -- pkgHashProgSuffix          :: Maybe PathTemplate,
+       pkgHashPackageDbs          :: [Maybe PackageDB]
+     }
+  deriving Show
+
+pkgHash :: Package -> Action String
+pkgHash pkg = BS.unpack $ Base16.encode $ SHA256.hash $ do
+  pkgIdentifier
+  renderPackageHashInputs $ PackageHashInputs
+    {
+       pkgHashPkgId       = undefined
+    ,  pkgHashComponent   = undefined
+    ,  pkgHashSourceHash  = undefined
+    ,  pkgHashDirectDeps  = undefined
+    ,  pkgHashOtherConfig = undefined
+    }
+
+renderPackageHashInputs :: PackageHashInputs -> BS.ByteString
+renderPackageHashInputs PackageHashInputs{
+                          pkgHashPkgId,
+                          pkgHashComponent,
+                          pkgHashSourceHash,
+                          pkgHashDirectDeps,
+                          -- pkgHashPkgConfigDeps,
+                          pkgHashOtherConfig =
+                            PackageHashConfigInputs{..}
+                        } =
+    -- The purpose of this somewhat laboured rendering (e.g. why not just
+    -- use show?) is so that existing package hashes do not change
+    -- unnecessarily when new configuration inputs are added into the hash.
+    BS.pack $ unlines $ catMaybes $
+      [ entry "pkgid"       prettyShow pkgHashPkgId
+      , mentry "component"  show pkgHashComponent
+      , entry "src"         showHashValue pkgHashSourceHash
+      , entry "pkg-config-deps"
+                            (intercalate ", " . map (\(pn, mb_v) -> prettyShow pn ++
+                                                    case mb_v of
+                                                        Nothing -> ""
+                                                        Just v -> " " ++ prettyShow v)
+                                              . Set.toList) pkgHashPkgConfigDeps
+      , entry "deps"        (intercalate ", " . map prettyShow
+                                              . Set.toList) pkgHashDirectDeps
+        -- and then all the config
+      , entry "compilerid"  prettyShow pkgHashCompilerId
+      , entry "platform" prettyShow pkgHashPlatform
+      , opt   "flags" mempty showFlagAssignment pkgHashFlagAssignment
+      , opt   "configure-script" [] unwords pkgHashConfigureScriptArgs
+      , opt   "vanilla-lib" True  prettyShow pkgHashVanillaLib
+      , opt   "shared-lib"  False prettyShow pkgHashSharedLib
+      , opt   "dynamic-exe" False prettyShow pkgHashDynExe
+      , opt   "fully-static-exe" False prettyShow pkgHashFullyStaticExe
+      , opt   "ghci-lib"    False prettyShow pkgHashGHCiLib
+      , opt   "prof-lib"    False prettyShow pkgHashProfLib
+      , opt   "prof-exe"    False prettyShow pkgHashProfExe
+      , opt   "prof-lib-detail" ProfDetailDefault showProfDetailLevel pkgHashProfLibDetail
+      , opt   "prof-exe-detail" ProfDetailDefault showProfDetailLevel pkgHashProfExeDetail
+      , opt   "hpc"          False prettyShow pkgHashCoverage
+      , opt   "optimisation" NormalOptimisation (show . fromEnum) pkgHashOptimization
+      , opt   "split-objs"   False prettyShow pkgHashSplitObjs
+      , opt   "split-sections" False prettyShow pkgHashSplitSections
+      , opt   "stripped-lib" False prettyShow pkgHashStripLibs
+      , opt   "stripped-exe" True  prettyShow pkgHashStripExes
+      , opt   "debug-info"   NormalDebugInfo (show . fromEnum) pkgHashDebugInfo
+      , opt   "extra-lib-dirs"     [] unwords pkgHashExtraLibDirs
+      , opt   "extra-lib-dirs-static" [] unwords pkgHashExtraLibDirsStatic
+      , opt   "extra-framework-dirs" [] unwords pkgHashExtraFrameworkDirs
+      , opt   "extra-include-dirs" [] unwords pkgHashExtraIncludeDirs
+      , opt   "prog-prefix" Nothing (maybe "" fromPathTemplate) pkgHashProgPrefix
+      , opt   "prog-suffix" Nothing (maybe "" fromPathTemplate) pkgHashProgSuffix
+      , opt   "package-dbs" [] (unwords . map show) pkgHashPackageDbs
+
+      ] ++ Map.foldrWithKey (\prog args acc -> opt (prog ++ "-options") [] unwords args : acc) [] pkgHashProgramArgs
+  where
+    entry key     format value = Just (key ++ ": " ++ format value)
+    mentry key    format value = fmap (\v -> key ++ ": " ++ format v) value
+    opt   key def format value
+         | value == def = Nothing
+         | otherwise    = entry key format value


=====================================
hadrian/src/Rules/Generate.hs
=====================================
@@ -486,16 +486,14 @@ generateConfigHs = do
     trackGenerateHs
     cProjectName        <- getSetting ProjectName
     cBooterVersion      <- getSetting GhcVersion
-    cProjectVersionMunged  <- getSetting ProjectVersionMunged
-    -- ROMES:TODO:HASH First we attempt a fixed unit-id with version but without hash.
-    --
-    -- We now use a more informative unit-id for ghc. See Note [GHC's Unit Id]
-    -- in GHC.Unit.Types
+    -- We now give a unit-id with a version and a hash to ghc.
+    -- See Note [GHC's Unit Id] in GHC.Unit.Types
     --
     -- It's crucial that the unit-id matches the unit-key -- ghc is no longer
     -- part of the WiringMap, so we don't to go back and forth between the
-    -- unit-id and the unit-key -- we take care here that they are the same.
-    let cProjectUnitId = "ghc-" ++ cProjectVersionMunged -- ROMES:TODO:HASH
+    -- unit-id and the unit-key -- we take care that they are the same by using
+    -- 'pkgUnitId' to create the unit-id in both situations.
+    cProjectUnitId <- pkgUnitId <$> getPackage
     return $ unlines
         [ "module GHC.Settings.Config"
         , "  ( module GHC.Version"
@@ -592,3 +590,4 @@ generatePlatformHostHs = do
         , "hostPlatformArchOS :: ArchOS"
         , "hostPlatformArchOS = ArchOS hostPlatformArch hostPlatformOS"
         ]
+


=====================================
hadrian/src/Settings/Builders/Ghc.hs
=====================================
@@ -243,6 +243,8 @@ wayGhcArgs = do
             , (way == debug || way == debugDynamic) ?
               pure ["-ticky", "-DTICKY_TICKY"] ]
 
+-- | Args related to correct handling of packages, such as setting
+-- -this-unit-id and passing -package-id for dependencies
 packageGhcArgs :: Args
 packageGhcArgs = do
     package <- getPackage
@@ -251,13 +253,13 @@ packageGhcArgs = do
     -- unit-id to be "ghc", the stage0 compiler must be built
     -- with `-this-unit-id ghc`, while the wired-in unit-id of
     -- ghc is correctly set to the unit-id we'll generate for
-    -- stage1 (set in generateVersionHs in Rules.Generate).
+    -- stage1 (set in generateConfigHs in Rules.Generate).
     --
     -- However, we don't need to set the unit-id of "ghc" to "ghc" when
     -- building stage0 because we have a flag in compiler/ghc.cabal.in that is
     -- sets `-this-unit-id ghc` when hadrian is building stage0, which will
     -- overwrite this one.
-    pkgId   <- expr $ pkgIdentifier package
+    pkgId   <- expr $ pkgUnitId package
     mconcat [ arg "-hide-all-packages"
             , arg "-no-user-package-db"
             , arg "-package-env -"



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/848c2265e8ae73176b8da9065595992a0c60e640

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/848c2265e8ae73176b8da9065595992a0c60e640
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20230316/70a11293/attachment-0001.html>


More information about the ghc-commits mailing list