[Git][ghc/ghc][master] Remove source location information from interface files

Marge Bot (@marge-bot) gitlab at gitlab.haskell.org
Thu Oct 27 17:54:56 UTC 2022



Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC


Commits:
223e159d by Owen Shepherd at 2022-10-27T13:54:33-04:00
Remove source location information from interface files

This change aims to minimize source location information leaking
into interface files, which makes ABI hashes dependent on the
build location.

The `Binary (Located a)` instance has been removed completely.

It seems that the HIE interface still needs the ability to
serialize SrcSpans, but by wrapping the instances, it should
be a lot more difficult to inadvertently add source location
information.

- - - - -


8 changed files:

- compiler/GHC/Data/BooleanFormula.hs
- compiler/GHC/Hs/Doc.hs
- compiler/GHC/Hs/DocString.hs
- compiler/GHC/Iface/Ext/Binary.hs
- compiler/GHC/Iface/Ext/Types.hs
- compiler/GHC/Parser/Annotation.hs
- compiler/GHC/Unit/Module/Warnings.hs
- compiler/GHC/Utils/Binary.hs


Changes:

=====================================
compiler/GHC/Data/BooleanFormula.hs
=====================================
@@ -25,7 +25,7 @@ import Data.Data
 import GHC.Utils.Monad
 import GHC.Utils.Outputable
 import GHC.Utils.Binary
-import GHC.Parser.Annotation ( LocatedL )
+import GHC.Parser.Annotation ( LocatedL, noLocA )
 import GHC.Types.SrcLoc
 import GHC.Types.Unique
 import GHC.Types.Unique.Set
@@ -251,14 +251,14 @@ pprBooleanFormulaNormal = go
 
 instance Binary a => Binary (BooleanFormula a) where
   put_ bh (Var x)    = putByte bh 0 >> put_ bh x
-  put_ bh (And xs)   = putByte bh 1 >> put_ bh xs
-  put_ bh (Or  xs)   = putByte bh 2 >> put_ bh xs
-  put_ bh (Parens x) = putByte bh 3 >> put_ bh x
+  put_ bh (And xs)   = putByte bh 1 >> put_ bh (unLoc <$> xs)
+  put_ bh (Or  xs)   = putByte bh 2 >> put_ bh (unLoc <$> xs)
+  put_ bh (Parens x) = putByte bh 3 >> put_ bh (unLoc x)
 
   get bh = do
     h <- getByte bh
     case h of
-      0 -> Var    <$> get bh
-      1 -> And    <$> get bh
-      2 -> Or     <$> get bh
-      _ -> Parens <$> get bh
+      0 -> Var                  <$> get bh
+      1 -> And    . fmap noLocA <$> get bh
+      2 -> Or     . fmap noLocA <$> get bh
+      _ -> Parens . noLocA      <$> get bh


=====================================
compiler/GHC/Hs/Doc.hs
=====================================
@@ -85,9 +85,9 @@ instance Outputable a => Outputable (WithHsDocIdentifiers a pass) where
 instance Binary a => Binary (WithHsDocIdentifiers a GhcRn) where
   put_ bh (WithHsDocIdentifiers s ids) = do
     put_ bh s
-    put_ bh ids
+    put_ bh $ BinLocated <$> ids
   get bh =
-    liftA2 WithHsDocIdentifiers (get bh) (get bh)
+    liftA2 WithHsDocIdentifiers (get bh) (fmap unBinLocated <$> get bh)
 
 -- | Extract a mapping from the lexed identifiers to the names they may
 -- correspond to.


=====================================
compiler/GHC/Hs/DocString.hs
=====================================
@@ -75,19 +75,19 @@ instance Binary HsDocString where
     MultiLineDocString dec xs -> do
       putByte bh 0
       put_ bh dec
-      put_ bh xs
+      put_ bh $ BinLocated <$> xs
     NestedDocString dec x -> do
       putByte bh 1
       put_ bh dec
-      put_ bh x
+      put_ bh $ BinLocated x
     GeneratedDocString x -> do
       putByte bh 2
       put_ bh x
   get bh = do
     tag <- getByte bh
     case tag of
-      0 -> MultiLineDocString <$> get bh <*> get bh
-      1 -> NestedDocString <$> get bh <*> get bh
+      0 -> MultiLineDocString <$> get bh <*> (fmap unBinLocated <$> get bh)
+      1 -> NestedDocString <$> get bh <*> (unBinLocated <$> get bh)
       2 -> GeneratedDocString <$> get bh
       t -> fail $ "HsDocString: invalid tag " ++ show t
 


=====================================
compiler/GHC/Iface/Ext/Binary.hs
=====================================
@@ -339,10 +339,10 @@ fromHieName nc hie_name = do
 putHieName :: BinHandle -> HieName -> IO ()
 putHieName bh (ExternalName mod occ span) = do
   putByte bh 0
-  put_ bh (mod, occ, span)
+  put_ bh (mod, occ, BinSrcSpan span)
 putHieName bh (LocalName occName span) = do
   putByte bh 1
-  put_ bh (occName, span)
+  put_ bh (occName, BinSrcSpan span)
 putHieName bh (KnownKeyName uniq) = do
   putByte bh 2
   put_ bh $ unpkUnique uniq
@@ -353,10 +353,10 @@ getHieName bh = do
   case t of
     0 -> do
       (modu, occ, span) <- get bh
-      return $ ExternalName modu occ span
+      return $ ExternalName modu occ $ unBinSrcSpan span
     1 -> do
       (occ, span) <- get bh
-      return $ LocalName occ span
+      return $ LocalName occ $ unBinSrcSpan span
     2 -> do
       (c,i) <- get bh
       return $ KnownKeyName $ mkUnique c i


=====================================
compiler/GHC/Iface/Ext/Types.hs
=====================================
@@ -251,12 +251,12 @@ data HieAST a =
 instance Binary (HieAST TypeIndex) where
   put_ bh ast = do
     put_ bh $ sourcedNodeInfo ast
-    put_ bh $ nodeSpan ast
+    put_ bh $ BinSpan $ nodeSpan ast
     put_ bh $ nodeChildren ast
 
   get bh = Node
     <$> get bh
-    <*> get bh
+    <*> (unBinSpan <$> get bh)
     <*> get bh
 
 instance Outputable a => Outputable (HieAST a) where
@@ -486,19 +486,19 @@ instance Binary ContextInfo where
     putByte bh 3
     put_ bh bt
     put_ bh sc
-    put_ bh msp
+    put_ bh $ BinSpan <$> msp
   put_ bh (PatternBind a b c) = do
     putByte bh 4
     put_ bh a
     put_ bh b
-    put_ bh c
+    put_ bh $ BinSpan <$> c
   put_ bh (ClassTyDecl sp) = do
     putByte bh 5
-    put_ bh sp
+    put_ bh $ BinSpan <$> sp
   put_ bh (Decl a b) = do
     putByte bh 6
     put_ bh a
-    put_ bh b
+    put_ bh $ BinSpan <$> b
   put_ bh (TyVarBind a b) = do
     putByte bh 7
     put_ bh a
@@ -506,13 +506,13 @@ instance Binary ContextInfo where
   put_ bh (RecField a b) = do
     putByte bh 8
     put_ bh a
-    put_ bh b
+    put_ bh $ BinSpan <$> b
   put_ bh MatchBind = putByte bh 9
   put_ bh (EvidenceVarBind a b c) = do
     putByte bh 10
     put_ bh a
     put_ bh b
-    put_ bh c
+    put_ bh $ BinSpan <$> c
   put_ bh EvidenceVarUse = putByte bh 11
 
   get bh = do
@@ -521,14 +521,14 @@ instance Binary ContextInfo where
       0 -> return Use
       1 -> IEThing <$> get bh
       2 -> return TyDecl
-      3 -> ValBind <$> get bh <*> get bh <*> get bh
-      4 -> PatternBind <$> get bh <*> get bh <*> get bh
-      5 -> ClassTyDecl <$> get bh
-      6 -> Decl <$> get bh <*> get bh
+      3 -> ValBind <$> get bh <*> get bh <*> (fmap unBinSpan <$> get bh)
+      4 -> PatternBind <$> get bh <*> get bh <*> (fmap unBinSpan <$> get bh)
+      5 -> ClassTyDecl <$> (fmap unBinSpan <$> get bh)
+      6 -> Decl <$> get bh <*> (fmap unBinSpan <$> get bh)
       7 -> TyVarBind <$> get bh <*> get bh
-      8 -> RecField <$> get bh <*> get bh
+      8 -> RecField <$> get bh <*> (fmap unBinSpan <$> get bh)
       9 -> return MatchBind
-      10 -> EvidenceVarBind <$> get bh <*> get bh <*> get bh
+      10 -> EvidenceVarBind <$> get bh <*> get bh <*> (fmap unBinSpan <$> get bh)
       11 -> return EvidenceVarUse
       _ -> panic "Binary ContextInfo: invalid tag"
 
@@ -679,14 +679,14 @@ instance Binary Scope where
   put_ bh NoScope = putByte bh 0
   put_ bh (LocalScope span) = do
     putByte bh 1
-    put_ bh span
+    put_ bh $ BinSpan span
   put_ bh ModuleScope = putByte bh 2
 
   get bh = do
     (t :: Word8) <- get bh
     case t of
       0 -> return NoScope
-      1 -> LocalScope <$> get bh
+      1 -> LocalScope . unBinSpan <$> get bh
       2 -> return ModuleScope
       _ -> panic "Binary Scope: invalid tag"
 
@@ -732,13 +732,13 @@ instance Binary TyVarScope where
   put_ bh (UnresolvedScope ns span) = do
     putByte bh 1
     put_ bh ns
-    put_ bh span
+    put_ bh (BinSpan <$> span)
 
   get bh = do
     (t :: Word8) <- get bh
     case t of
       0 -> ResolvedScopes <$> get bh
-      1 -> UnresolvedScope <$> get bh <*> get bh
+      1 -> UnresolvedScope <$> get bh <*> (fmap unBinSpan <$> get bh)
       _ -> panic "Binary TyVarScope: invalid tag"
 
 -- | `Name`'s get converted into `HieName`'s before being written into @.hie@


=====================================
compiler/GHC/Parser/Annotation.hs
=====================================
@@ -95,7 +95,6 @@ import GHC.Data.FastString
 import GHC.Types.Name
 import GHC.Types.SrcLoc
 import GHC.Hs.DocString
-import GHC.Utils.Binary
 import GHC.Utils.Outputable hiding ( (<>) )
 import GHC.Utils.Panic
 import qualified GHC.Data.Strict as Strict
@@ -1249,17 +1248,6 @@ instance Outputable AnnSortKey where
 instance Outputable IsUnicodeSyntax where
   ppr = text . show
 
-instance Binary a => Binary (LocatedL a) where
-  -- We do not serialise the annotations
-    put_ bh (L l x) = do
-            put_ bh (locA l)
-            put_ bh x
-
-    get bh = do
-            l <- get bh
-            x <- get bh
-            return (L (noAnnSrcSpan l) x)
-
 instance (Outputable a) => Outputable (SrcSpanAnn' a) where
   ppr (SrcSpanAnn a l) = text "SrcSpanAnn" <+> ppr a <+> ppr l
 


=====================================
compiler/GHC/Unit/Module/Warnings.hs
=====================================
@@ -61,21 +61,21 @@ instance Outputable (WarningTxt pass) where
 instance Binary (WarningTxt GhcRn) where
     put_ bh (WarningTxt s w) = do
             putByte bh 0
-            put_ bh s
-            put_ bh w
+            put_ bh $ unLoc s
+            put_ bh $ unLoc <$> w
     put_ bh (DeprecatedTxt s d) = do
             putByte bh 1
-            put_ bh s
-            put_ bh d
+            put_ bh $ unLoc s
+            put_ bh $ unLoc <$> d
 
     get bh = do
             h <- getByte bh
             case h of
-              0 -> do s <- get bh
-                      w <- get bh
+              0 -> do s <- noLoc <$> get bh
+                      w <- fmap noLoc  <$> get bh
                       return (WarningTxt s w)
-              _ -> do s <- get bh
-                      d <- get bh
+              _ -> do s <- noLoc <$> get bh
+                      d <- fmap noLoc <$> get bh
                       return (DeprecatedTxt s d)
 
 


=====================================
compiler/GHC/Utils/Binary.hs
=====================================
@@ -73,6 +73,9 @@ module GHC.Utils.Binary
    UserData(..), getUserData, setUserData,
    newReadState, newWriteState,
    putDictionary, getDictionary, putFS,
+
+   -- * Newtype wrappers
+   BinSpan(..), BinSrcSpan(..), BinLocated(..)
   ) where
 
 import GHC.Prelude
@@ -1285,18 +1288,23 @@ instance Binary ModuleName where
 --             fs <- get bh
 --             return (StringLiteral st fs Nothing)
 
-instance Binary a => Binary (Located a) where
-    put_ bh (L l x) = do
-            put_ bh l
+newtype BinLocated a = BinLocated { unBinLocated :: Located a }
+
+instance Binary a => Binary (BinLocated a) where
+    put_ bh (BinLocated (L l x)) = do
+            put_ bh $ BinSrcSpan l
             put_ bh x
 
     get bh = do
-            l <- get bh
+            l <- unBinSrcSpan <$> get bh
             x <- get bh
-            return (L l x)
+            return $ BinLocated (L l x)
+
+newtype BinSpan = BinSpan { unBinSpan :: RealSrcSpan }
 
-instance Binary RealSrcSpan where
-  put_ bh ss = do
+-- See Note [Source Location Wrappers]
+instance Binary BinSpan where
+  put_ bh (BinSpan ss) = do
             put_ bh (srcSpanFile ss)
             put_ bh (srcSpanStartLine ss)
             put_ bh (srcSpanStartCol ss)
@@ -1309,8 +1317,8 @@ instance Binary RealSrcSpan where
             sc <- get bh
             el <- get bh
             ec <- get bh
-            return (mkRealSrcSpan (mkRealSrcLoc f sl sc)
-                                  (mkRealSrcLoc f el ec))
+            return $ BinSpan (mkRealSrcSpan (mkRealSrcLoc f sl sc)
+                                            (mkRealSrcLoc f el ec))
 
 instance Binary UnhelpfulSpanReason where
   put_ bh r = case r of
@@ -1329,24 +1337,44 @@ instance Binary UnhelpfulSpanReason where
       3 -> return UnhelpfulGenerated
       _ -> UnhelpfulOther <$> get bh
 
-instance Binary SrcSpan where
-  put_ bh (RealSrcSpan ss _sb) = do
+newtype BinSrcSpan = BinSrcSpan { unBinSrcSpan :: SrcSpan }
+
+-- See Note [Source Location Wrappers]
+instance Binary BinSrcSpan where
+  put_ bh (BinSrcSpan (RealSrcSpan ss _sb)) = do
           putByte bh 0
           -- BufSpan doesn't ever get serialised because the positions depend
           -- on build location.
-          put_ bh ss
+          put_ bh $ BinSpan ss
 
-  put_ bh (UnhelpfulSpan s) = do
+  put_ bh (BinSrcSpan (UnhelpfulSpan s)) = do
           putByte bh 1
           put_ bh s
 
   get bh = do
           h <- getByte bh
           case h of
-            0 -> do ss <- get bh
-                    return (RealSrcSpan ss Strict.Nothing)
+            0 -> do BinSpan ss <- get bh
+                    return $ BinSrcSpan (RealSrcSpan ss Strict.Nothing)
             _ -> do s <- get bh
-                    return (UnhelpfulSpan s)
+                    return $ BinSrcSpan (UnhelpfulSpan s)
+
+
+{-
+Note [Source Location Wrappers]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Source locations are banned from interface files, to
+prevent filepaths affecting interface hashes.
+
+Unfortunately, we can't remove all binary instances,
+as they're used to serialise .hie files, and we don't
+want to break binary compatibility.
+
+To this end, the Bin[Src]Span newtypes wrappers were
+introduced to prevent accidentally serialising a
+source location as part of a larger structure.
+-}
 
 --------------------------------------------------------------------------------
 -- Instances for the containers package



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/223e159d7af546a7176eef073e6e599b3c261c9c

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/223e159d7af546a7176eef073e6e599b3c261c9c
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20221027/1540f430/attachment-0001.html>


More information about the ghc-commits mailing list