[Git][ghc/ghc][master] Fix and enforce validation of header for .hie files

Marge Bot gitlab at gitlab.haskell.org
Sat Jun 1 03:55:20 UTC 2019



 Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC


Commits:
0e0d87da by Zubin Duggal at 2019-05-31T06:34:57Z
Fix and enforce validation of header for .hie files

Implements #16686

The files version is automatically generated from the current GHC
version in the same manner as normal interface files.

This means that clients can first read the version and then decide how
to read the rest of the file.

- - - - -


6 changed files:

- compiler/hieFile/HieAst.hs
- compiler/hieFile/HieBin.hs
- compiler/hieFile/HieDebug.hs
- compiler/hieFile/HieTypes.hs
- compiler/main/HscMain.hs
- utils/haddock


Changes:

=====================================
compiler/hieFile/HieAst.hs
=====================================
@@ -1,3 +1,6 @@
+{-
+Main functions for .hie file generation
+-}
 {-# LANGUAGE OverloadedStrings #-}
 {-# LANGUAGE FlexibleInstances #-}
 {-# LANGUAGE UndecidableInstances #-}
@@ -20,7 +23,6 @@ import BooleanFormula
 import Class                      ( FunDep )
 import CoreUtils                  ( exprType )
 import ConLike                    ( conLikeName )
-import Config                     ( cProjectVersion )
 import Desugar                    ( deSugarExpr )
 import FieldLabel
 import HsSyn
@@ -42,7 +44,6 @@ import HieUtils
 
 import qualified Data.Array as A
 import qualified Data.ByteString as BS
-import qualified Data.ByteString.Char8 as BSC
 import qualified Data.Map as M
 import qualified Data.Set as S
 import Data.Data                  ( Data, Typeable )
@@ -98,9 +99,7 @@ mkHieFile ms ts rs = do
   let Just src_file = ml_hs_file $ ms_location ms
   src <- liftIO $ BS.readFile src_file
   return $ HieFile
-      { hie_version = curHieVersion
-      , hie_ghc_version = BSC.pack cProjectVersion
-      , hie_hs_file = src_file
+      { hie_hs_file = src_file
       , hie_module = ms_mod ms
       , hie_types = arr
       , hie_asts = asts'


=====================================
compiler/hieFile/HieBin.hs
=====================================
@@ -1,8 +1,11 @@
+{-
+Binary serialization for .hie files.
+-}
 {-# LANGUAGE ScopedTypeVariables #-}
-module HieBin ( readHieFile, writeHieFile, HieName(..), toHieName ) where
+module HieBin ( readHieFile, readHieFileWithVersion, HieHeader, writeHieFile, HieName(..), toHieName, HieFileResult(..), hieMagic) where
 
+import Config                     ( cProjectVersion )
 import GhcPrelude
-
 import Binary
 import BinIface                   ( getDictFastString )
 import FastMutInt
@@ -14,17 +17,23 @@ import Outputable
 import PrelInfo
 import SrcLoc
 import UniqSupply                 ( takeUniqFromSupply )
+import Util                       ( maybeRead )
 import Unique
 import UniqFM
 
 import qualified Data.Array as A
 import Data.IORef
+import Data.ByteString            ( ByteString )
+import qualified Data.ByteString  as BS
+import qualified Data.ByteString.Char8 as BSC
 import Data.List                  ( mapAccumR )
-import Data.Word                  ( Word32 )
-import Control.Monad              ( replicateM )
+import Data.Word                  ( Word8, Word32 )
+import Control.Monad              ( replicateM, when )
 import System.Directory           ( createDirectoryIfMissing )
 import System.FilePath            ( takeDirectory )
 
+import HieTypes
+
 -- | `Name`'s get converted into `HieName`'s before being written into @.hie@
 -- files. See 'toHieName' and 'fromHieName' for logic on how to convert between
 -- these two types.
@@ -63,10 +72,33 @@ data HieDictionary = HieDictionary
 initBinMemSize :: Int
 initBinMemSize = 1024*1024
 
-writeHieFile :: Binary a => FilePath -> a -> IO ()
+-- | The header for HIE files - Capital ASCII letters "HIE".
+hieMagic :: [Word8]
+hieMagic = [72,73,69]
+
+hieMagicLen :: Int
+hieMagicLen = length hieMagic
+
+ghcVersion :: ByteString
+ghcVersion = BSC.pack cProjectVersion
+
+putBinLine :: BinHandle -> ByteString -> IO ()
+putBinLine bh xs = do
+  mapM_ (putByte bh) $ BS.unpack xs
+  putByte bh 10 -- newline char
+
+-- | Write a `HieFile` to the given `FilePath`, with a proper header and
+-- symbol tables for `Name`s and `FastString`s
+writeHieFile :: FilePath -> HieFile -> IO ()
 writeHieFile hie_file_path hiefile = do
   bh0 <- openBinMem initBinMemSize
 
+  -- Write the header: hieHeader followed by the
+  -- hieVersion and the GHC version used to generate this file
+  mapM_ (putByte bh0) hieMagic
+  putBinLine bh0 $ BSC.pack $ show hieVersion
+  putBinLine bh0 $ ghcVersion
+
   -- remember where the dictionary pointer will go
   dict_p_p <- tellBin bh0
   put_ bh0 dict_p_p
@@ -105,7 +137,7 @@ writeHieFile hie_file_path hiefile = do
   symtab_map'  <- readIORef symtab_map
   putSymbolTable bh symtab_next' symtab_map'
 
-  -- write the dictionary pointer at the fornt of the file
+  -- write the dictionary pointer at the front of the file
   dict_p <- tellBin bh
   putAt bh dict_p_p dict_p
   seekBin bh dict_p
@@ -120,10 +152,87 @@ writeHieFile hie_file_path hiefile = do
   writeBinMem bh hie_file_path
   return ()
 
-readHieFile :: Binary a => NameCache -> FilePath -> IO (a, NameCache)
+data HieFileResult
+  = HieFileResult
+  { hie_file_result_version :: Integer
+  , hie_file_result_ghc_version :: ByteString
+  , hie_file_result :: HieFile
+  }
+
+type HieHeader = (Integer, ByteString)
+
+-- | Read a `HieFile` from a `FilePath`. Can use
+-- an existing `NameCache`. Allows you to specify
+-- which versions of hieFile to attempt to read.
+-- `Left` case returns the failing header versions.
+readHieFileWithVersion :: (HieHeader -> Bool) -> NameCache -> FilePath -> IO (Either HieHeader (HieFileResult, NameCache))
+readHieFileWithVersion readVersion nc file = do
+  bh0 <- readBinMem file
+
+  (hieVersion, ghcVersion) <- readHieFileHeader file bh0
+
+  if readVersion (hieVersion, ghcVersion)
+  then do
+    (hieFile, nc') <- readHieFileContents bh0 nc
+    return $ Right (HieFileResult hieVersion ghcVersion hieFile, nc')
+  else return $ Left (hieVersion, ghcVersion)
+
+
+-- | Read a `HieFile` from a `FilePath`. Can use
+-- an existing `NameCache`.
+readHieFile :: NameCache -> FilePath -> IO (HieFileResult, NameCache)
 readHieFile nc file = do
+
   bh0 <- readBinMem file
 
+  (readHieVersion, ghcVersion) <- readHieFileHeader file bh0
+
+  -- Check if the versions match
+  when (readHieVersion /= hieVersion) $
+    panic $ unwords ["readHieFile: hie file versions don't match for file:"
+                    , file
+                    , "Expected"
+                    , show hieVersion
+                    , "but got", show readHieVersion
+                    ]
+  (hieFile, nc') <- readHieFileContents bh0 nc
+  return $ (HieFileResult hieVersion ghcVersion hieFile, nc')
+
+readBinLine :: BinHandle -> IO ByteString
+readBinLine bh = BS.pack . reverse <$> loop []
+  where
+    loop acc = do
+      char <- get bh :: IO Word8
+      if char == 10 -- ASCII newline '\n'
+      then return acc
+      else loop (char : acc)
+
+readHieFileHeader :: FilePath -> BinHandle -> IO HieHeader
+readHieFileHeader file bh0 = do
+  -- Read the header
+  magic <- replicateM hieMagicLen (get bh0)
+  version <- BSC.unpack <$> readBinLine bh0
+  case maybeRead version of
+    Nothing ->
+      panic $ unwords ["readHieFileHeader: hieVersion isn't an Integer:"
+                      , show version
+                      ]
+    Just readHieVersion -> do
+      ghcVersion <- readBinLine bh0
+
+      -- Check if the header is valid
+      when (magic /= hieMagic) $
+        panic $ unwords ["readHieFileHeader: headers don't match for file:"
+                        , file
+                        , "Expected"
+                        , show hieMagic
+                        , "but got", show magic
+                        ]
+      return (readHieVersion, ghcVersion)
+
+readHieFileContents :: BinHandle -> NameCache -> IO (HieFile, NameCache)
+readHieFileContents bh0 nc = do
+
   dict  <- get_dictionary bh0
 
   -- read the symbol table so we are capable of reading the actual data


=====================================
compiler/hieFile/HieDebug.hs
=====================================
@@ -1,3 +1,6 @@
+{-
+Functions to validate and check .hie file ASTs generated by GHC.
+-}
 {-# LANGUAGE StandaloneDeriving #-}
 {-# LANGUAGE OverloadedStrings #-}
 {-# LANGUAGE FlexibleContexts #-}


=====================================
compiler/hieFile/HieTypes.hs
=====================================
@@ -1,3 +1,8 @@
+{-
+Types for the .hie file format are defined here.
+
+For more information see https://gitlab.haskell.org/ghc/ghc/wikis/hie-files
+-}
 {-# LANGUAGE DeriveTraversable #-}
 {-# LANGUAGE DeriveDataTypeable #-}
 {-# LANGUAGE TypeSynonymInstances #-}
@@ -7,6 +12,7 @@ module HieTypes where
 
 import GhcPrelude
 
+import Config
 import Binary
 import FastString                 ( FastString )
 import IfaceType
@@ -28,8 +34,8 @@ import Control.Applicative        ( (<|>) )
 type Span = RealSrcSpan
 
 -- | Current version of @.hie@ files
-curHieVersion :: Word8
-curHieVersion = 0
+hieVersion :: Integer
+hieVersion = read (cProjectVersionInt ++ cProjectPatchLevel) :: Integer
 
 {- |
 GHC builds up a wealth of information about Haskell source as it compiles it.
@@ -48,13 +54,7 @@ Besides saving compilation cycles, @.hie@ files also offer a more stable
 interface than the GHC API.
 -}
 data HieFile = HieFile
-    { hie_version :: Word8
-    -- ^ version of the HIE format
-
-    , hie_ghc_version :: ByteString
-    -- ^ Version of GHC that produced this file
-
-    , hie_hs_file :: FilePath
+    { hie_hs_file :: FilePath
     -- ^ Initial Haskell source file path
 
     , hie_module :: Module
@@ -74,11 +74,8 @@ data HieFile = HieFile
     , hie_hs_src :: ByteString
     -- ^ Raw bytes of the initial Haskell source
     }
-
 instance Binary HieFile where
   put_ bh hf = do
-    put_ bh $ hie_version hf
-    put_ bh $ hie_ghc_version hf
     put_ bh $ hie_hs_file hf
     put_ bh $ hie_module hf
     put_ bh $ hie_types hf
@@ -93,8 +90,6 @@ instance Binary HieFile where
     <*> get bh
     <*> get bh
     <*> get bh
-    <*> get bh
-    <*> get bh
 
 
 {-


=====================================
compiler/main/HscMain.hs
=====================================
@@ -174,7 +174,7 @@ import Data.Set (Set)
 
 import HieAst           ( mkHieFile )
 import HieTypes         ( getAsts, hie_asts )
-import HieBin           ( readHieFile, writeHieFile )
+import HieBin           ( readHieFile, writeHieFile , hie_file_result)
 import HieDebug         ( diffFile, validateScopes )
 
 #include "HsVersions.h"
@@ -434,7 +434,7 @@ extract_renamed_stuff mod_summary tc_result = do
               -- Roundtrip testing
               nc <- readIORef $ hsc_NC hs_env
               (file', _) <- readHieFile nc out_file
-              case diffFile hieFile file' of
+              case diffFile hieFile (hie_file_result file') of
                 [] ->
                   putMsg dflags $ text "Got no roundtrip errors"
                 xs -> do


=====================================
utils/haddock
=====================================
@@ -1 +1 @@
-Subproject commit f01473ed28e7c2700ff8e87b00ab87a802c9edd9
+Subproject commit 83bb9870a117f9426e6f6cff6fec3bb6e93a7c18



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/commit/0e0d87da2fd25e2fb255417fcb15f93f508c1250

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/commit/0e0d87da2fd25e2fb255417fcb15f93f508c1250
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20190531/f9fb9102/attachment-0001.html>


More information about the ghc-commits mailing list