[commit: ghc] master: Fix interface hashes including time stamp of dependent files. (677820e)
git at git.haskell.org
git at git.haskell.org
Thu Aug 22 23:25:45 CEST 2013
Repository : ssh://git@git.haskell.org/ghc
On branch : master
Link : http://ghc.haskell.org/trac/ghc/changeset/677820ee3a3aadbf2ed414deb3926381d94b13a8/ghc
>---------------------------------------------------------------
commit 677820ee3a3aadbf2ed414deb3926381d94b13a8
Author: Niklas Hambüchen <mail at nh2.me>
Date: Thu Aug 22 11:05:56 2013 +0900
Fix interface hashes including time stamp of dependent files.
Fixes #8144.
Before, the modification time of e.g. #included files (and everything
that ends up as a UsageFile, e.g. via addDependentFile) was taken as
input for the interface hash of a module.
This lead to different hashes for identical inputs on every compilation.
We now use file content hashes instead.
This changes the interface file format.
You will get "Binary.get(Usage): 50" when you try to do an incremental
using .hi files that were created with a GHC 7.7 (only) older than this commit.
To calculate the md5 hash (`Fingerprint`) of a file in constant space,
there now is GHC.Fingerprint.getFileHash, and a fallback version
for older GHCs that needs to load the file into memory completely
(only used when compiling stage1 with an older GHC).
Signed-off-by: Austin Seipp <aseipp at pobox.com>
>---------------------------------------------------------------
677820ee3a3aadbf2ed414deb3926381d94b13a8
compiler/iface/MkIface.lhs | 18 +++++++++---------
compiler/main/HscTypes.lhs | 10 +++++-----
compiler/utils/Fingerprint.hsc | 31 +++++++++++++++++++++++++++++--
3 files changed, 43 insertions(+), 16 deletions(-)
diff --git a/compiler/iface/MkIface.lhs b/compiler/iface/MkIface.lhs
index 5819964..4c78955 100644
--- a/compiler/iface/MkIface.lhs
+++ b/compiler/iface/MkIface.lhs
@@ -883,16 +883,16 @@ mkUsageInfo :: HscEnv -> Module -> ImportedMods -> NameSet -> [FilePath] -> IO [
mkUsageInfo hsc_env this_mod dir_imp_mods used_names dependent_files
= do
eps <- hscEPS hsc_env
- mtimes <- mapM getModificationUTCTime dependent_files
+ hashes <- mapM getFileHash dependent_files
let mod_usages = mk_mod_usage_info (eps_PIT eps) hsc_env this_mod
dir_imp_mods used_names
- let usages = mod_usages ++ map to_file_usage (zip dependent_files mtimes)
- usages `seqList` return usages
+ let usages = mod_usages ++ [ UsageFile { usg_file_path = f
+ , usg_file_hash = hash }
+ | (f, hash) <- zip dependent_files hashes ]
+ usages `seqList` return usages
-- seq the list of Usages returned: occasionally these
-- don't get evaluated for a while and we can end up hanging on to
-- the entire collection of Ifaces.
- where
- to_file_usage (f, mtime) = UsageFile { usg_file_path = f, usg_mtime = mtime }
mk_mod_usage_info :: PackageIfaceTable
-> HscEnv
@@ -1343,15 +1343,15 @@ checkModUsage this_pkg UsageHomeModule{
checkModUsage _this_pkg UsageFile{ usg_file_path = file,
- usg_mtime = old_mtime } =
+ usg_file_hash = old_hash } =
liftIO $
handleIO handle $ do
- new_mtime <- getModificationUTCTime file
- if (old_mtime /= new_mtime)
+ new_hash <- getFileHash file
+ if (old_hash /= new_hash)
then return recomp
else return UpToDate
where
- recomp = RecompBecause (file ++ " time stamp changed")
+ recomp = RecompBecause (file ++ " changed")
handle =
#ifdef DEBUG
\e -> pprTrace "UsageFile" (text (show e)) $ return recomp
diff --git a/compiler/main/HscTypes.lhs b/compiler/main/HscTypes.lhs
index e022ae3..07e78f3 100644
--- a/compiler/main/HscTypes.lhs
+++ b/compiler/main/HscTypes.lhs
@@ -1796,7 +1796,7 @@ data Usage
} -- ^ Module from the current package
| UsageFile {
usg_file_path :: FilePath,
- usg_mtime :: UTCTime
+ usg_file_hash :: Fingerprint
-- ^ External file dependency. From a CPP #include or TH addDependentFile. Should be absolute.
}
deriving( Eq )
@@ -1831,7 +1831,7 @@ instance Binary Usage where
put_ bh usg at UsageFile{} = do
putByte bh 2
put_ bh (usg_file_path usg)
- put_ bh (usg_mtime usg)
+ put_ bh (usg_file_hash usg)
get bh = do
h <- getByte bh
@@ -1850,9 +1850,9 @@ instance Binary Usage where
return UsageHomeModule { usg_mod_name = nm, usg_mod_hash = mod,
usg_exports = exps, usg_entities = ents, usg_safe = safe }
2 -> do
- fp <- get bh
- mtime <- get bh
- return UsageFile { usg_file_path = fp, usg_mtime = mtime }
+ fp <- get bh
+ hash <- get bh
+ return UsageFile { usg_file_path = fp, usg_file_hash = hash }
i -> error ("Binary.get(Usage): " ++ show i)
\end{code}
diff --git a/compiler/utils/Fingerprint.hsc b/compiler/utils/Fingerprint.hsc
index 95f31c0..895906a 100644
--- a/compiler/utils/Fingerprint.hsc
+++ b/compiler/utils/Fingerprint.hsc
@@ -8,18 +8,22 @@
--
-- ----------------------------------------------------------------------------
-{-# OPTIONS_GHC -fno-warn-orphans #-}
module Fingerprint (
Fingerprint(..), fingerprint0,
readHexFingerprint,
fingerprintData,
- fingerprintString
+ fingerprintString,
+ -- Re-exported rom GHC.Fingerprint for GHC >= 7.7, local otherwise
+ getFileHash
) where
#include "md5.h"
##include "HsVersions.h"
import Numeric ( readHex )
+import Foreign
+import Panic
+import System.IO
import GHC.Fingerprint
@@ -30,3 +34,26 @@ readHexFingerprint s = Fingerprint w1 w2
[(w1,"")] = readHex s1
[(w2,"")] = readHex (take 16 s2)
+
+#if __GLASGOW_HASKELL__ < 707
+-- Only use this if we're smaller than GHC 7.7, otherwise
+-- GHC.Fingerprint exports a better version of this function.
+
+-- | Computes the hash of a given file.
+-- It loads the full file into memory an does not work with files bigger than
+-- MAXINT.
+getFileHash :: FilePath -> IO Fingerprint
+getFileHash path = withBinaryFile path ReadMode $ \h -> do
+
+ fileSize <- toIntFileSize `fmap` hFileSize h
+
+ allocaBytes fileSize (\bufPtr -> fingerprintData bufPtr fileSize)
+
+ where
+ toIntFileSize :: Integer -> Int
+ toIntFileSize size
+ | size > fromIntegral (maxBound :: Int) = throwGhcException $
+ Sorry $ "Fingerprint.getFileHash: Tried to calculate hash of file "
+ ++ path ++ " with size > maxBound :: Int. This is not supported."
+ | otherwise = fromIntegral size
+#endif
More information about the ghc-commits
mailing list