[commit: ghc] master: Fix interface hashes including time stamp of dependent files. (677820e)

git at git.haskell.org git at git.haskell.org
Thu Aug 22 23:25:45 CEST 2013


Repository : ssh://git@git.haskell.org/ghc

On branch  : master
Link       : http://ghc.haskell.org/trac/ghc/changeset/677820ee3a3aadbf2ed414deb3926381d94b13a8/ghc

>---------------------------------------------------------------

commit 677820ee3a3aadbf2ed414deb3926381d94b13a8
Author: Niklas Hambüchen <mail at nh2.me>
Date:   Thu Aug 22 11:05:56 2013 +0900

    Fix interface hashes including time stamp of dependent files.
    
    Fixes #8144.
    
    Before, the modification time of e.g. #included files (and everything
    that ends up as a UsageFile, e.g. via addDependentFile) was taken as
    input for the interface hash of a module.
    
    This lead to different hashes for identical inputs on every compilation.
    
    We now use file content hashes instead.
    
    This changes the interface file format.
    You will get "Binary.get(Usage): 50" when you try to do an incremental
    using .hi files that were created with a GHC 7.7 (only) older than this commit.
    
    To calculate the md5 hash (`Fingerprint`) of a file in constant space,
    there now is GHC.Fingerprint.getFileHash, and a fallback version
    for older GHCs that needs to load the file into memory completely
    (only used when compiling stage1 with an older GHC).
    
    Signed-off-by: Austin Seipp <aseipp at pobox.com>


>---------------------------------------------------------------

677820ee3a3aadbf2ed414deb3926381d94b13a8
 compiler/iface/MkIface.lhs     |   18 +++++++++---------
 compiler/main/HscTypes.lhs     |   10 +++++-----
 compiler/utils/Fingerprint.hsc |   31 +++++++++++++++++++++++++++++--
 3 files changed, 43 insertions(+), 16 deletions(-)

diff --git a/compiler/iface/MkIface.lhs b/compiler/iface/MkIface.lhs
index 5819964..4c78955 100644
--- a/compiler/iface/MkIface.lhs
+++ b/compiler/iface/MkIface.lhs
@@ -883,16 +883,16 @@ mkUsageInfo :: HscEnv -> Module -> ImportedMods -> NameSet -> [FilePath] -> IO [
 mkUsageInfo hsc_env this_mod dir_imp_mods used_names dependent_files
   = do
     eps <- hscEPS hsc_env
-    mtimes <- mapM getModificationUTCTime dependent_files
+    hashes <- mapM getFileHash dependent_files
     let mod_usages = mk_mod_usage_info (eps_PIT eps) hsc_env this_mod
                                        dir_imp_mods used_names
-    let usages = mod_usages ++ map to_file_usage (zip dependent_files mtimes)
-    usages `seqList`  return usages
+    let usages = mod_usages ++ [ UsageFile { usg_file_path = f
+                                           , usg_file_hash = hash }
+                               | (f, hash) <- zip dependent_files hashes ]
+    usages `seqList` return usages
     -- seq the list of Usages returned: occasionally these
     -- don't get evaluated for a while and we can end up hanging on to
     -- the entire collection of Ifaces.
-   where
-     to_file_usage (f, mtime) = UsageFile { usg_file_path = f, usg_mtime = mtime }
 
 mk_mod_usage_info :: PackageIfaceTable
               -> HscEnv
@@ -1343,15 +1343,15 @@ checkModUsage this_pkg UsageHomeModule{
 
 
 checkModUsage _this_pkg UsageFile{ usg_file_path = file,
-                                   usg_mtime = old_mtime } =
+                                   usg_file_hash = old_hash } =
   liftIO $
     handleIO handle $ do
-      new_mtime <- getModificationUTCTime file
-      if (old_mtime /= new_mtime)
+      new_hash <- getFileHash file
+      if (old_hash /= new_hash)
          then return recomp
          else return UpToDate
  where
-   recomp = RecompBecause (file ++ " time stamp changed")
+   recomp = RecompBecause (file ++ " changed")
    handle =
 #ifdef DEBUG
        \e -> pprTrace "UsageFile" (text (show e)) $ return recomp
diff --git a/compiler/main/HscTypes.lhs b/compiler/main/HscTypes.lhs
index e022ae3..07e78f3 100644
--- a/compiler/main/HscTypes.lhs
+++ b/compiler/main/HscTypes.lhs
@@ -1796,7 +1796,7 @@ data Usage
     }                                           -- ^ Module from the current package
   | UsageFile {
         usg_file_path  :: FilePath,
-        usg_mtime      :: UTCTime
+        usg_file_hash  :: Fingerprint
         -- ^ External file dependency. From a CPP #include or TH addDependentFile. Should be absolute.
   }
     deriving( Eq )
@@ -1831,7 +1831,7 @@ instance Binary Usage where
     put_ bh usg at UsageFile{} = do 
         putByte bh 2
         put_ bh (usg_file_path usg)
-        put_ bh (usg_mtime     usg)
+        put_ bh (usg_file_hash usg)
 
     get bh = do
         h <- getByte bh
@@ -1850,9 +1850,9 @@ instance Binary Usage where
             return UsageHomeModule { usg_mod_name = nm, usg_mod_hash = mod,
                      usg_exports = exps, usg_entities = ents, usg_safe = safe }
           2 -> do
-            fp    <- get bh
-            mtime <- get bh
-            return UsageFile { usg_file_path = fp, usg_mtime = mtime }
+            fp   <- get bh
+            hash <- get bh
+            return UsageFile { usg_file_path = fp, usg_file_hash = hash }
           i -> error ("Binary.get(Usage): " ++ show i)
 
 \end{code}
diff --git a/compiler/utils/Fingerprint.hsc b/compiler/utils/Fingerprint.hsc
index 95f31c0..895906a 100644
--- a/compiler/utils/Fingerprint.hsc
+++ b/compiler/utils/Fingerprint.hsc
@@ -8,18 +8,22 @@
 --
 -- ----------------------------------------------------------------------------
 
-{-# OPTIONS_GHC -fno-warn-orphans #-}
 module Fingerprint (
         Fingerprint(..), fingerprint0,
         readHexFingerprint,
         fingerprintData,
-        fingerprintString
+        fingerprintString,
+        -- Re-exported rom GHC.Fingerprint for GHC >= 7.7, local otherwise
+        getFileHash
    ) where
 
 #include "md5.h"
 ##include "HsVersions.h"
 
 import Numeric          ( readHex )
+import Foreign
+import Panic
+import System.IO
 
 import GHC.Fingerprint
 
@@ -30,3 +34,26 @@ readHexFingerprint s = Fingerprint w1 w2
        [(w1,"")] = readHex s1
        [(w2,"")] = readHex (take 16 s2)
 
+
+#if __GLASGOW_HASKELL__ < 707
+-- Only use this if we're smaller than GHC 7.7, otherwise
+-- GHC.Fingerprint exports a better version of this function.
+
+-- | Computes the hash of a given file.
+-- It loads the full file into memory an does not work with files bigger than
+-- MAXINT.
+getFileHash :: FilePath -> IO Fingerprint
+getFileHash path = withBinaryFile path ReadMode $ \h -> do
+
+  fileSize <- toIntFileSize `fmap` hFileSize h
+
+  allocaBytes fileSize (\bufPtr -> fingerprintData bufPtr fileSize)
+
+  where
+    toIntFileSize :: Integer -> Int
+    toIntFileSize size
+      | size > fromIntegral (maxBound :: Int) = throwGhcException $
+          Sorry $ "Fingerprint.getFileHash: Tried to calculate hash of file "
+                  ++ path ++ " with size > maxBound :: Int. This is not supported."
+      | otherwise = fromIntegral size
+#endif





More information about the ghc-commits mailing list