[Git][ghc/ghc][wip/az/ghc-cpp] Process nested include files

Alan Zimmerman (@alanz) gitlab at gitlab.haskell.org
Wed Oct 4 22:28:34 UTC 2023



Alan Zimmerman pushed to branch wip/az/ghc-cpp at Glasgow Haskell Compiler / GHC


Commits:
4ba68570 by Alan Zimmerman at 2023-10-04T23:27:25+01:00
Process nested include files

Also move PpState out of Lexer.x, so it is easy to evolve it in a ghci
session, loading utils/check-cpp/Main.hs

- - - - -


8 changed files:

- compiler/GHC.hs
- compiler/GHC/Cmm/Lexer.x
- compiler/GHC/Driver/Main.hs
- compiler/GHC/Parser/HaddockLex.x
- compiler/GHC/Parser/Lexer.x
- compiler/GHC/Parser/PreProcess.hs
- hadrian/src/Settings/Default.hs
- utils/check-cpp/Main.hs


Changes:

=====================================
compiler/GHC.hs
=====================================
@@ -334,7 +334,7 @@ import GHCi.RemoteTypes
 
 import qualified GHC.Parser as Parser
 import GHC.Parser.Lexer hiding (initParserState)
-import GHC.Parser.PreProcess   (initParserState)
+import GHC.Parser.PreProcess   (initParserState, initPpState)
 import GHC.Parser.Annotation
 import GHC.Parser.Utils
 


=====================================
compiler/GHC/Cmm/Lexer.x
=====================================
@@ -19,7 +19,7 @@ import GHC.Prelude
 
 import GHC.Cmm.Expr
 
-import GHC.Parser.Lexer hiding (getInput, setInput)
+import GHC.Parser.Lexer hiding (getInput, setInput, AlexInput)
 import GHC.Cmm.Parser.Monad
 import GHC.Types.SrcLoc
 import GHC.Types.Unique.FM


=====================================
compiler/GHC/Driver/Main.hs
=====================================
@@ -193,8 +193,8 @@ import GHC.CoreToStg    ( coreToStg )
 
 import GHC.Parser.Errors.Types
 import GHC.Parser
-import GHC.Parser.Lexer as Lexer hiding (initPpState, initParserState)
-import GHC.Parser.PreProcess (initParserState)
+import GHC.Parser.Lexer as Lexer hiding (initParserState)
+import GHC.Parser.PreProcess (initParserState, PpState)
 
 import GHC.Tc.Module
 import GHC.Tc.Utils.Monad


=====================================
compiler/GHC/Parser/HaddockLex.x
=====================================
@@ -9,7 +9,7 @@ import GHC.Prelude
 
 import GHC.Data.FastString
 import GHC.Hs.Doc
-import GHC.Parser.Lexer
+import GHC.Parser.Lexer hiding (AlexInput)
 import GHC.Parser.Annotation
 import GHC.Types.SrcLoc
 import GHC.Types.SourceText


=====================================
compiler/GHC/Parser/Lexer.x
=====================================
@@ -58,7 +58,7 @@ module GHC.Parser.Lexer (
    Token(..), lexer, lexerDbg,
    ParserOpts(..), mkParserOpts,
    PState (..), initParserState, initPragState,
-   PpState(..), initPpState, PpContext(..),
+   -- PpState(..), initPpState, PpContext(..),
    P(..), ParseResult(POk, PFailed),
    allocateComments, allocatePriorComments, allocateFinalComments,
    MonadP(..),
@@ -82,7 +82,7 @@ module GHC.Parser.Lexer (
    addPsMessage,
    -- * for integration with the preprocessor
    queueIgnoredToken,
-   getInput, setInput
+   getInput, setInput, AlexInput(..)
   ) where
 
 import GHC.Prelude
@@ -2785,28 +2785,28 @@ data PState a = PState {
         -- of the action, it is the *current* token.  Do I understand
         -- correctly?
 
--- | Use for emulating (limited) CPP preprocessing in GHC.
--- TODO: move this into PreProcess, and make a param on PState
-data PpState = PpState {
-        pp_defines :: !(Map String [String]),
-        pp_continuation :: ![Located Token],
-        -- pp_context :: ![PpContext],
-        pp_context :: ![Token], -- What preprocessor directive we are currently processing
-        pp_accepting :: !Bool
-     }
-    deriving (Show)
+-- -- | Use for emulating (limited) CPP preprocessing in GHC.
+-- -- TODO: move this into PreProcess, and make a param on PState
+-- data PpState = PpState {
+--         pp_defines :: !(Map String [String]),
+--         pp_continuation :: ![Located Token],
+--         -- pp_context :: ![PpContext],
+--         pp_context :: ![Token], -- What preprocessor directive we are currently processing
+--         pp_accepting :: !Bool
+--      }
+--     deriving (Show)
 
 data PpContext = PpContextIf [Located Token]
     deriving (Show)
 
 -- TODO: delete
-initPpState :: PpState
-initPpState = PpState
-   { pp_defines = Map.empty
-   , pp_continuation = []
-   , pp_context = []
-   , pp_accepting = True
-   }
+-- initPpState :: PpState
+-- initPpState = PpState
+--    { pp_defines = Map.empty
+--    , pp_continuation = []
+--    , pp_context = []
+--    , pp_accepting = True
+--    }
 
 data ALRContext = ALRNoLayout Bool{- does it contain commas? -}
                               Bool{- is it a 'let' block? -}
@@ -2904,6 +2904,7 @@ getLastLoc :: P p PsSpan
 getLastLoc = P $ \s@(PState { last_loc = last_loc }) -> POk s last_loc
 
 data AlexInput = AI !PsLoc !StringBuffer
+                deriving (Show)
 
 {-
 Note [Unicode in Alex]


=====================================
compiler/GHC/Parser/PreProcess.hs
=====================================
@@ -15,6 +15,7 @@ module GHC.Parser.PreProcess (
 ) where
 
 import Data.Char
+import Data.Map (Map)
 import qualified Data.Map as Map
 import qualified Data.Set as Set
 import Debug.Trace (trace)
@@ -22,7 +23,7 @@ import GHC.Data.FastString
 import qualified GHC.Data.Strict as Strict
 import GHC.Data.StringBuffer
 import GHC.Parser.Errors.Ppr ()
-import GHC.Parser.Lexer (P (..), PState (..), ParseResult (..), PpState (..), Token (..))
+import GHC.Parser.Lexer (P (..), PState (..), ParseResult (..), Token (..))
 import qualified GHC.Parser.Lexer as Lexer
 import GHC.Prelude
 import GHC.Types.SrcLoc
@@ -46,6 +47,15 @@ initPpState =
         , pp_accepting = True
         }
 
+data PpState = PpState {
+        pp_defines :: !(Map String [String]),
+        pp_continuation :: ![Located Token],
+        -- pp_context :: ![PpContext],
+        pp_context :: ![Token], -- What preprocessor directive we are currently processing
+        pp_accepting :: !Bool
+     }
+    deriving (Show)
+
 -- ---------------------------------------------------------------------
 
 lexer, lexerDbg :: Bool -> (Located Token -> P PpState a) -> P PpState a


=====================================
hadrian/src/Settings/Default.hs
=====================================
@@ -161,8 +161,10 @@ stage1Packages = do
         , if winTarget then win32 else unix
         ]
       , when (not cross)
-        [ haddock
-        , hpcBin
+        [
+        --   haddock
+        -- ,
+          hpcBin
         , iserv
         , runGhc
         , ghcToolchainBin


=====================================
utils/check-cpp/Main.hs
=====================================
@@ -5,6 +5,7 @@ import Control.Monad.IO.Class
 import Data.Char
 import Data.Data hiding (Fixity)
 import Data.List
+import Data.Map (Map)
 import qualified Data.Map as Map
 import Debug.Trace
 import GHC
@@ -19,21 +20,17 @@ import qualified GHC.Driver.Session as GHC
 import GHC.Hs.Dump
 import qualified GHC.LanguageExtensions as LangExt
 import GHC.Parser.Errors.Ppr ()
-import GHC.Parser.Lexer (P (..), PState (..), ParseResult (..), PpState (..), Token (..))
+import GHC.Parser.Lexer (P (..), PState (..), ParseResult (..), Token (..))
 import qualified GHC.Parser.Lexer as GHC
-import qualified GHC.Parser.Lexer as Lexer hiding (initParserState)
-import qualified GHC.Parser.PreProcess as Lexer  (initParserState)
+import qualified GHC.Parser.Lexer as Lexer
 import GHC.Types.Error
 import GHC.Types.SrcLoc
 import GHC.Utils.Error
 import GHC.Utils.Outputable
-
 import qualified Text.Parsec as Parsec
 import Text.Parsec.Char as PS
 import Text.Parsec.Combinator as PS
 import Text.Parsec.Prim as PS
-
--- import qualified Text.Parsec as Parsec
 import Text.Parsec.String (Parser)
 
 -- import Text.Parsec.Char
@@ -52,6 +49,28 @@ showAst ast =
 
 type PP = P PpState
 
+initPpState :: PpState
+initPpState =
+    PpState
+        { pp_defines = Map.empty
+        , pp_includes = Map.empty
+        , pp_include_stack = []
+        , pp_continuation = []
+        , pp_context = []
+        , pp_accepting = True
+        }
+
+data PpState = PpState
+    { pp_defines :: !(Map String [String])
+    , pp_includes :: !(Map String StringBuffer)
+    , pp_include_stack :: ![Lexer.AlexInput]
+    , pp_continuation :: ![Located Token]
+    , pp_context :: ![Token] -- What preprocessor directive we are currently processing
+    , pp_accepting :: !Bool
+    }
+    deriving (Show)
+-- deriving instance Show Lexer.AlexInput
+
 -- =====================================================================
 
 ppLexer, ppLexerDbg :: Bool -> (Located Token -> PP a) -> PP a
@@ -74,7 +93,13 @@ ppLexer queueComments cont =
              in
                 -- case tk of
                 case (trace ("M.ppLexer:tk=" ++ show (unLoc tk)) tk) of
-                    L _ ITeof -> contInner tk
+                    L _ ITeof -> do
+                      mInp <- popIncludeLoc
+                      case mInp of
+                        Nothing -> contInner tk
+                        Just inp -> do
+                          Lexer.setInput inp
+                          ppLexer queueComments cont
                     L _ (ITcpp continuation s) -> do
                         if continuation
                             then pushContinuation tk
@@ -82,7 +107,8 @@ ppLexer queueComments cont =
                         contIgnoreTok tk
                     _ -> do
                         state <- getCppState
-                        case (trace ("CPP state:" ++ show state) state) of
+                        -- case (trace ("CPP state:" ++ show state) state) of
+                        case state of
                             CppIgnoring -> contIgnoreTok tk
                             _ -> contInner tk
         )
@@ -113,25 +139,28 @@ processCpp fs = do
     -- let s = cppInitial fs
     let s = cppInitial fs
     case regularParse cppDirective s of
-      Left err -> error $ show err
-      Right (CppDefine name def) -> do
-        ppDefine name def
-      Right (CppIfdef name) -> do
-        defined <- ppIsDefined name
-        setAccepting defined
-      Right (CppIfndef name) -> do
-        defined <- ppIsDefined name
-        setAccepting (not defined)
-      Right CppElse -> do
-        accepting <- getAccepting
-        setAccepting (not accepting)
-        return ()
-      Right CppEndif -> do
-        -- TODO: nested states
-        setAccepting True
-        return ()
-
-    return (trace ("processCpp:s=" ++ show s) ())
+        Left err -> error $ show err
+        Right (CppInclude filename) -> do
+            ppInclude filename
+        Right (CppDefine name def) -> do
+            ppDefine name def
+        Right (CppIfdef name) -> do
+            defined <- ppIsDefined name
+            setAccepting defined
+        Right (CppIfndef name) -> do
+            defined <- ppIsDefined name
+            setAccepting (not defined)
+        Right CppElse -> do
+            accepting <- getAccepting
+            setAccepting (not accepting)
+            return ()
+        Right CppEndif -> do
+            -- TODO: nested states
+            setAccepting True
+            return ()
+
+    -- return (trace ("processCpp:s=" ++ show s) ())
+    return ()
 
 -- ---------------------------------------------------------------------
 -- Preprocessor state functions
@@ -193,8 +222,42 @@ popContinuation =
 
 -- pp_context stack end -------------------
 
+-- pp_include start -----------------------
+
+getInclude :: String -> PP (Maybe StringBuffer)
+getInclude filename = P $ \s -> POk s (Map.lookup filename (pp_includes (pp s)))
+
+pushIncludeLoc :: Lexer.AlexInput -> PP ()
+pushIncludeLoc pos
+  = P $ \s -> POk s {pp = (pp s){ pp_include_stack = pos: pp_include_stack (pp s)}} ()
+
+popIncludeLoc :: PP (Maybe Lexer.AlexInput)
+popIncludeLoc =
+    P $ \s ->
+          let
+            (new_st,r) = case pp_include_stack (pp s) of
+              [] ->([], Nothing)
+              (h:t) -> (t, Just h)
+          in
+            POk s{pp = (pp s){pp_include_stack = new_st }} r
+
+-- pp_include end -------------------------
+
 -- definitions start --------------------
 
+ppInclude :: String -> PP ()
+ppInclude filename = do
+    mSrc <- getInclude filename
+    case mSrc of
+      Nothing -> return ()
+      Just src -> do
+        origInput <- Lexer.getInput
+        pushIncludeLoc origInput
+        let loc =  PsLoc (mkRealSrcLoc (mkFastString filename) 1 1) (BufPos 0)
+        Lexer.setInput (Lexer.AI  loc src)
+    return $ trace ("ppInclude:mSrc=[" ++ show mSrc ++ "]") ()
+    -- return $ trace ("ppInclude:filename=[" ++ filename ++ "]") ()
+
 ppDefine :: String -> [String] -> PP ()
 ppDefine name val = P $ \s ->
     -- POk s{pp = (pp s){pp_defines = Set.insert (cleanTokenString def) (pp_defines (pp s))}} ()
@@ -227,7 +290,6 @@ type CppParser = Parsec String ()
 regularParse :: Parser a -> String -> Either Parsec.ParseError a
 regularParse p = PS.parse p ""
 
-
 -- TODO: delete this
 cppDefinition :: CppParser (String, [String])
 cppDefinition = do
@@ -239,11 +301,12 @@ cppDefinition = do
     return (name, definition)
 
 data CppDirective
-  = CppDefine String [String]
-  | CppIfdef String
-  | CppIfndef String
-  | CppElse
-  | CppEndif
+    = CppInclude String
+    | CppDefine String [String]
+    | CppIfdef String
+    | CppIfndef String
+    | CppElse
+    | CppEndif
     deriving (Show, Eq)
 
 cppDirective :: CppParser CppDirective
@@ -252,32 +315,39 @@ cppDirective = do
     _ <- whiteSpace
     choice
         [ cppKw "define" >> cmdDefinition
-        -- , cppKw "include" CppIncludeKw
-        -- , cppKw "undef" CppUndefKw
-        -- , cppKw "error" CppErrorKw
-        , try$ cppKw "ifdef" >> cmdIfdef
+        , try $ cppKw "include" >> cmdInclude
+        , try $ cppKw "ifdef" >> cmdIfdef
         , cppKw "ifndef" >> cmdIfndef
-        -- , cppKw "if" CppIfKw
-        -- , cppKw "elif" CppElifKw
         , try $ cppKw "else" >> return CppElse
         , cppKw "endif" >> return CppEndif
+        -- , cppKw "if" CppIfKw
+        -- , cppKw "elif" CppElifKw
+        -- , cppKw "undef" CppUndefKw
+        -- , cppKw "error" CppErrorKw
         ]
 
+cmdInclude :: CppParser CppDirective
+cmdInclude = do
+    _ <- string "\""
+    filename <- many1 (satisfy (\c -> not (isSpace c || c == '"')))
+    _ <- string "\""
+    return $ CppInclude filename
+
 cmdDefinition :: CppParser CppDirective
 cmdDefinition = do
-      name <- cppToken
-      definition <- cppTokens
-      return $ CppDefine name definition
+    name <- cppToken
+    definition <- cppTokens
+    return $ CppDefine name definition
 
 cmdIfdef :: CppParser CppDirective
 cmdIfdef = do
-      name <- cppToken
-      return $ CppIfdef name
+    name <- cppToken
+    return $ CppIfdef name
 
 cmdIfndef :: CppParser CppDirective
 cmdIfndef = do
-      name <- cppToken
-      return $ CppIfndef name
+    name <- cppToken
+    return $ CppIfndef name
 
 cppKw :: String -> CppParser ()
 cppKw kw = do
@@ -334,10 +404,11 @@ directive.
 -- Emulate the parser
 
 type LibDir = FilePath
+type Includes = [(String, [String])]
 
 -- parseString :: LibDir -> String -> IO (WarningMessages, Either ErrorMessages [Located Token])
-parseString :: LibDir -> String -> IO [Located Token]
-parseString libdir str = ghcWrapper libdir $ do
+parseString :: LibDir -> Includes -> String -> IO [Located Token]
+parseString libdir includes str = ghcWrapper libdir $ do
     dflags0 <- initDynFlags
     let dflags = dflags0{extensionFlags = EnumSet.insert LangExt.GhcCpp (extensionFlags dflags0)}
     let pflags = initParserOpts dflags
@@ -345,12 +416,15 @@ parseString libdir str = ghcWrapper libdir $ do
     liftIO $ putStrLn "-- parsing ----------"
     liftIO $ putStrLn str
     liftIO $ putStrLn "---------------------"
-    return $ strGetToks pflags "fake_test_file.hs" str
+    return $ strGetToks includes pflags "fake_test_file.hs" str
 
-strGetToks :: Lexer.ParserOpts -> FilePath -> String -> [Located Token]
-strGetToks popts filename str = reverse $ lexAll pstate
+strGetToks :: Includes -> Lexer.ParserOpts -> FilePath -> String -> [Located Token]
+-- strGetToks includes popts filename str = reverse $ lexAll pstate
+strGetToks includes popts filename str = reverse $ lexAll (trace ("pstate=" ++ show initState) pstate)
   where
-    pstate = Lexer.initParserState popts buf loc
+    includeMap = Map.fromList $ map (\(k,v) -> (k, stringToStringBuffer (intercalate "\n" v))) includes
+    initState = initPpState { pp_includes = includeMap }
+    pstate = Lexer.initParserState initState popts buf loc
     loc = mkRealSrcLoc (mkFastString filename) 1 1
     buf = stringToStringBuffer str
     -- cpp_enabled = Lexer.GhcCppBit `Lexer.xtest` Lexer.pExtsBitmap popts
@@ -387,7 +461,7 @@ strParser str dflags filename =
         loc = mkRealSrcLoc (mkFastString filename) 1 1
         buf = stringToStringBuffer str
      in
-        case unP parseModuleNoHaddock (Lexer.initParserState (initParserOpts dflags) buf loc) of
+        case unP parseModuleNoHaddock (Lexer.initParserState initPpState (initParserOpts dflags) buf loc) of
             PFailed pst ->
                 let (warns, errs) = Lexer.getPsMessages pst
                  in (GhcPsMessage <$> warns, Left $ GhcPsMessage <$> errs)
@@ -506,9 +580,12 @@ libdirNow :: LibDir
 libdirNow = "/home/alanz/mysrc/git.haskell.org/worktree/bisect/_build/stage1/lib"
 
 doTest :: [String] -> IO ()
-doTest strings = do
+doTest strings = doTestWithIncludes [] strings
+
+doTestWithIncludes :: Includes -> [String] -> IO ()
+doTestWithIncludes includes strings = do
     let test = intercalate "\n" strings
-    !tks <- parseString libdirNow test
+    !tks <- parseString libdirNow includes test
     putStrLn "-----------------------------------------"
     printToks (reverse tks)
 
@@ -630,3 +707,26 @@ t8 = parseDefine (mkFastString "#define MIN_VERSION_ghc_exactprint(major1,major2
 
 t9 :: Either Parsec.ParseError CppDirective
 t9 = regularParse cppDirective "#define VERSION_ghc_exactprint \"1.7.0.1\""
+
+t10 :: IO ()
+t10 = do
+    doTestWithIncludes testIncludes
+        [ "#include \"bar.h\""
+        , ""
+        , "#ifdef FOO"
+        , "x = 1"
+        , "#else"
+        , "x = 2"
+        , "#endif"
+        ]
+
+testIncludes :: Includes
+testIncludes =
+    [
+        ( "bar.h"
+        , ["#include \"sub.h\""]
+        ),
+        ( "sub.h"
+        , ["#define FOO"]
+        )
+    ]



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/4ba68570d8f07162efc641474133bd0695e85a39

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/4ba68570d8f07162efc641474133bd0695e85a39
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20231004/2d634d32/attachment-0001.html>


More information about the ghc-commits mailing list