[Git][ghc/ghc][wip/az/ghc-cpp] Process nested include files
Alan Zimmerman (@alanz)
gitlab at gitlab.haskell.org
Wed Oct 4 22:28:34 UTC 2023
Alan Zimmerman pushed to branch wip/az/ghc-cpp at Glasgow Haskell Compiler / GHC
Commits:
4ba68570 by Alan Zimmerman at 2023-10-04T23:27:25+01:00
Process nested include files
Also move PpState out of Lexer.x, so it is easy to evolve it in a ghci
session, loading utils/check-cpp/Main.hs
- - - - -
8 changed files:
- compiler/GHC.hs
- compiler/GHC/Cmm/Lexer.x
- compiler/GHC/Driver/Main.hs
- compiler/GHC/Parser/HaddockLex.x
- compiler/GHC/Parser/Lexer.x
- compiler/GHC/Parser/PreProcess.hs
- hadrian/src/Settings/Default.hs
- utils/check-cpp/Main.hs
Changes:
=====================================
compiler/GHC.hs
=====================================
@@ -334,7 +334,7 @@ import GHCi.RemoteTypes
import qualified GHC.Parser as Parser
import GHC.Parser.Lexer hiding (initParserState)
-import GHC.Parser.PreProcess (initParserState)
+import GHC.Parser.PreProcess (initParserState, initPpState)
import GHC.Parser.Annotation
import GHC.Parser.Utils
=====================================
compiler/GHC/Cmm/Lexer.x
=====================================
@@ -19,7 +19,7 @@ import GHC.Prelude
import GHC.Cmm.Expr
-import GHC.Parser.Lexer hiding (getInput, setInput)
+import GHC.Parser.Lexer hiding (getInput, setInput, AlexInput)
import GHC.Cmm.Parser.Monad
import GHC.Types.SrcLoc
import GHC.Types.Unique.FM
=====================================
compiler/GHC/Driver/Main.hs
=====================================
@@ -193,8 +193,8 @@ import GHC.CoreToStg ( coreToStg )
import GHC.Parser.Errors.Types
import GHC.Parser
-import GHC.Parser.Lexer as Lexer hiding (initPpState, initParserState)
-import GHC.Parser.PreProcess (initParserState)
+import GHC.Parser.Lexer as Lexer hiding (initParserState)
+import GHC.Parser.PreProcess (initParserState, PpState)
import GHC.Tc.Module
import GHC.Tc.Utils.Monad
=====================================
compiler/GHC/Parser/HaddockLex.x
=====================================
@@ -9,7 +9,7 @@ import GHC.Prelude
import GHC.Data.FastString
import GHC.Hs.Doc
-import GHC.Parser.Lexer
+import GHC.Parser.Lexer hiding (AlexInput)
import GHC.Parser.Annotation
import GHC.Types.SrcLoc
import GHC.Types.SourceText
=====================================
compiler/GHC/Parser/Lexer.x
=====================================
@@ -58,7 +58,7 @@ module GHC.Parser.Lexer (
Token(..), lexer, lexerDbg,
ParserOpts(..), mkParserOpts,
PState (..), initParserState, initPragState,
- PpState(..), initPpState, PpContext(..),
+ -- PpState(..), initPpState, PpContext(..),
P(..), ParseResult(POk, PFailed),
allocateComments, allocatePriorComments, allocateFinalComments,
MonadP(..),
@@ -82,7 +82,7 @@ module GHC.Parser.Lexer (
addPsMessage,
-- * for integration with the preprocessor
queueIgnoredToken,
- getInput, setInput
+ getInput, setInput, AlexInput(..)
) where
import GHC.Prelude
@@ -2785,28 +2785,28 @@ data PState a = PState {
-- of the action, it is the *current* token. Do I understand
-- correctly?
--- | Use for emulating (limited) CPP preprocessing in GHC.
--- TODO: move this into PreProcess, and make a param on PState
-data PpState = PpState {
- pp_defines :: !(Map String [String]),
- pp_continuation :: ![Located Token],
- -- pp_context :: ![PpContext],
- pp_context :: ![Token], -- What preprocessor directive we are currently processing
- pp_accepting :: !Bool
- }
- deriving (Show)
+-- -- | Use for emulating (limited) CPP preprocessing in GHC.
+-- -- TODO: move this into PreProcess, and make a param on PState
+-- data PpState = PpState {
+-- pp_defines :: !(Map String [String]),
+-- pp_continuation :: ![Located Token],
+-- -- pp_context :: ![PpContext],
+-- pp_context :: ![Token], -- What preprocessor directive we are currently processing
+-- pp_accepting :: !Bool
+-- }
+-- deriving (Show)
data PpContext = PpContextIf [Located Token]
deriving (Show)
-- TODO: delete
-initPpState :: PpState
-initPpState = PpState
- { pp_defines = Map.empty
- , pp_continuation = []
- , pp_context = []
- , pp_accepting = True
- }
+-- initPpState :: PpState
+-- initPpState = PpState
+-- { pp_defines = Map.empty
+-- , pp_continuation = []
+-- , pp_context = []
+-- , pp_accepting = True
+-- }
data ALRContext = ALRNoLayout Bool{- does it contain commas? -}
Bool{- is it a 'let' block? -}
@@ -2904,6 +2904,7 @@ getLastLoc :: P p PsSpan
getLastLoc = P $ \s@(PState { last_loc = last_loc }) -> POk s last_loc
data AlexInput = AI !PsLoc !StringBuffer
+ deriving (Show)
{-
Note [Unicode in Alex]
=====================================
compiler/GHC/Parser/PreProcess.hs
=====================================
@@ -15,6 +15,7 @@ module GHC.Parser.PreProcess (
) where
import Data.Char
+import Data.Map (Map)
import qualified Data.Map as Map
import qualified Data.Set as Set
import Debug.Trace (trace)
@@ -22,7 +23,7 @@ import GHC.Data.FastString
import qualified GHC.Data.Strict as Strict
import GHC.Data.StringBuffer
import GHC.Parser.Errors.Ppr ()
-import GHC.Parser.Lexer (P (..), PState (..), ParseResult (..), PpState (..), Token (..))
+import GHC.Parser.Lexer (P (..), PState (..), ParseResult (..), Token (..))
import qualified GHC.Parser.Lexer as Lexer
import GHC.Prelude
import GHC.Types.SrcLoc
@@ -46,6 +47,15 @@ initPpState =
, pp_accepting = True
}
+data PpState = PpState {
+ pp_defines :: !(Map String [String]),
+ pp_continuation :: ![Located Token],
+ -- pp_context :: ![PpContext],
+ pp_context :: ![Token], -- What preprocessor directive we are currently processing
+ pp_accepting :: !Bool
+ }
+ deriving (Show)
+
-- ---------------------------------------------------------------------
lexer, lexerDbg :: Bool -> (Located Token -> P PpState a) -> P PpState a
=====================================
hadrian/src/Settings/Default.hs
=====================================
@@ -161,8 +161,10 @@ stage1Packages = do
, if winTarget then win32 else unix
]
, when (not cross)
- [ haddock
- , hpcBin
+ [
+ -- haddock
+ -- ,
+ hpcBin
, iserv
, runGhc
, ghcToolchainBin
=====================================
utils/check-cpp/Main.hs
=====================================
@@ -5,6 +5,7 @@ import Control.Monad.IO.Class
import Data.Char
import Data.Data hiding (Fixity)
import Data.List
+import Data.Map (Map)
import qualified Data.Map as Map
import Debug.Trace
import GHC
@@ -19,21 +20,17 @@ import qualified GHC.Driver.Session as GHC
import GHC.Hs.Dump
import qualified GHC.LanguageExtensions as LangExt
import GHC.Parser.Errors.Ppr ()
-import GHC.Parser.Lexer (P (..), PState (..), ParseResult (..), PpState (..), Token (..))
+import GHC.Parser.Lexer (P (..), PState (..), ParseResult (..), Token (..))
import qualified GHC.Parser.Lexer as GHC
-import qualified GHC.Parser.Lexer as Lexer hiding (initParserState)
-import qualified GHC.Parser.PreProcess as Lexer (initParserState)
+import qualified GHC.Parser.Lexer as Lexer
import GHC.Types.Error
import GHC.Types.SrcLoc
import GHC.Utils.Error
import GHC.Utils.Outputable
-
import qualified Text.Parsec as Parsec
import Text.Parsec.Char as PS
import Text.Parsec.Combinator as PS
import Text.Parsec.Prim as PS
-
--- import qualified Text.Parsec as Parsec
import Text.Parsec.String (Parser)
-- import Text.Parsec.Char
@@ -52,6 +49,28 @@ showAst ast =
type PP = P PpState
+initPpState :: PpState
+initPpState =
+ PpState
+ { pp_defines = Map.empty
+ , pp_includes = Map.empty
+ , pp_include_stack = []
+ , pp_continuation = []
+ , pp_context = []
+ , pp_accepting = True
+ }
+
+data PpState = PpState
+ { pp_defines :: !(Map String [String])
+ , pp_includes :: !(Map String StringBuffer)
+ , pp_include_stack :: ![Lexer.AlexInput]
+ , pp_continuation :: ![Located Token]
+ , pp_context :: ![Token] -- What preprocessor directive we are currently processing
+ , pp_accepting :: !Bool
+ }
+ deriving (Show)
+-- deriving instance Show Lexer.AlexInput
+
-- =====================================================================
ppLexer, ppLexerDbg :: Bool -> (Located Token -> PP a) -> PP a
@@ -74,7 +93,13 @@ ppLexer queueComments cont =
in
-- case tk of
case (trace ("M.ppLexer:tk=" ++ show (unLoc tk)) tk) of
- L _ ITeof -> contInner tk
+ L _ ITeof -> do
+ mInp <- popIncludeLoc
+ case mInp of
+ Nothing -> contInner tk
+ Just inp -> do
+ Lexer.setInput inp
+ ppLexer queueComments cont
L _ (ITcpp continuation s) -> do
if continuation
then pushContinuation tk
@@ -82,7 +107,8 @@ ppLexer queueComments cont =
contIgnoreTok tk
_ -> do
state <- getCppState
- case (trace ("CPP state:" ++ show state) state) of
+ -- case (trace ("CPP state:" ++ show state) state) of
+ case state of
CppIgnoring -> contIgnoreTok tk
_ -> contInner tk
)
@@ -113,25 +139,28 @@ processCpp fs = do
-- let s = cppInitial fs
let s = cppInitial fs
case regularParse cppDirective s of
- Left err -> error $ show err
- Right (CppDefine name def) -> do
- ppDefine name def
- Right (CppIfdef name) -> do
- defined <- ppIsDefined name
- setAccepting defined
- Right (CppIfndef name) -> do
- defined <- ppIsDefined name
- setAccepting (not defined)
- Right CppElse -> do
- accepting <- getAccepting
- setAccepting (not accepting)
- return ()
- Right CppEndif -> do
- -- TODO: nested states
- setAccepting True
- return ()
-
- return (trace ("processCpp:s=" ++ show s) ())
+ Left err -> error $ show err
+ Right (CppInclude filename) -> do
+ ppInclude filename
+ Right (CppDefine name def) -> do
+ ppDefine name def
+ Right (CppIfdef name) -> do
+ defined <- ppIsDefined name
+ setAccepting defined
+ Right (CppIfndef name) -> do
+ defined <- ppIsDefined name
+ setAccepting (not defined)
+ Right CppElse -> do
+ accepting <- getAccepting
+ setAccepting (not accepting)
+ return ()
+ Right CppEndif -> do
+ -- TODO: nested states
+ setAccepting True
+ return ()
+
+ -- return (trace ("processCpp:s=" ++ show s) ())
+ return ()
-- ---------------------------------------------------------------------
-- Preprocessor state functions
@@ -193,8 +222,42 @@ popContinuation =
-- pp_context stack end -------------------
+-- pp_include start -----------------------
+
+getInclude :: String -> PP (Maybe StringBuffer)
+getInclude filename = P $ \s -> POk s (Map.lookup filename (pp_includes (pp s)))
+
+pushIncludeLoc :: Lexer.AlexInput -> PP ()
+pushIncludeLoc pos
+ = P $ \s -> POk s {pp = (pp s){ pp_include_stack = pos: pp_include_stack (pp s)}} ()
+
+popIncludeLoc :: PP (Maybe Lexer.AlexInput)
+popIncludeLoc =
+ P $ \s ->
+ let
+ (new_st,r) = case pp_include_stack (pp s) of
+ [] ->([], Nothing)
+ (h:t) -> (t, Just h)
+ in
+ POk s{pp = (pp s){pp_include_stack = new_st }} r
+
+-- pp_include end -------------------------
+
-- definitions start --------------------
+ppInclude :: String -> PP ()
+ppInclude filename = do
+ mSrc <- getInclude filename
+ case mSrc of
+ Nothing -> return ()
+ Just src -> do
+ origInput <- Lexer.getInput
+ pushIncludeLoc origInput
+ let loc = PsLoc (mkRealSrcLoc (mkFastString filename) 1 1) (BufPos 0)
+ Lexer.setInput (Lexer.AI loc src)
+ return $ trace ("ppInclude:mSrc=[" ++ show mSrc ++ "]") ()
+ -- return $ trace ("ppInclude:filename=[" ++ filename ++ "]") ()
+
ppDefine :: String -> [String] -> PP ()
ppDefine name val = P $ \s ->
-- POk s{pp = (pp s){pp_defines = Set.insert (cleanTokenString def) (pp_defines (pp s))}} ()
@@ -227,7 +290,6 @@ type CppParser = Parsec String ()
regularParse :: Parser a -> String -> Either Parsec.ParseError a
regularParse p = PS.parse p ""
-
-- TODO: delete this
cppDefinition :: CppParser (String, [String])
cppDefinition = do
@@ -239,11 +301,12 @@ cppDefinition = do
return (name, definition)
data CppDirective
- = CppDefine String [String]
- | CppIfdef String
- | CppIfndef String
- | CppElse
- | CppEndif
+ = CppInclude String
+ | CppDefine String [String]
+ | CppIfdef String
+ | CppIfndef String
+ | CppElse
+ | CppEndif
deriving (Show, Eq)
cppDirective :: CppParser CppDirective
@@ -252,32 +315,39 @@ cppDirective = do
_ <- whiteSpace
choice
[ cppKw "define" >> cmdDefinition
- -- , cppKw "include" CppIncludeKw
- -- , cppKw "undef" CppUndefKw
- -- , cppKw "error" CppErrorKw
- , try$ cppKw "ifdef" >> cmdIfdef
+ , try $ cppKw "include" >> cmdInclude
+ , try $ cppKw "ifdef" >> cmdIfdef
, cppKw "ifndef" >> cmdIfndef
- -- , cppKw "if" CppIfKw
- -- , cppKw "elif" CppElifKw
, try $ cppKw "else" >> return CppElse
, cppKw "endif" >> return CppEndif
+ -- , cppKw "if" CppIfKw
+ -- , cppKw "elif" CppElifKw
+ -- , cppKw "undef" CppUndefKw
+ -- , cppKw "error" CppErrorKw
]
+cmdInclude :: CppParser CppDirective
+cmdInclude = do
+ _ <- string "\""
+ filename <- many1 (satisfy (\c -> not (isSpace c || c == '"')))
+ _ <- string "\""
+ return $ CppInclude filename
+
cmdDefinition :: CppParser CppDirective
cmdDefinition = do
- name <- cppToken
- definition <- cppTokens
- return $ CppDefine name definition
+ name <- cppToken
+ definition <- cppTokens
+ return $ CppDefine name definition
cmdIfdef :: CppParser CppDirective
cmdIfdef = do
- name <- cppToken
- return $ CppIfdef name
+ name <- cppToken
+ return $ CppIfdef name
cmdIfndef :: CppParser CppDirective
cmdIfndef = do
- name <- cppToken
- return $ CppIfndef name
+ name <- cppToken
+ return $ CppIfndef name
cppKw :: String -> CppParser ()
cppKw kw = do
@@ -334,10 +404,11 @@ directive.
-- Emulate the parser
type LibDir = FilePath
+type Includes = [(String, [String])]
-- parseString :: LibDir -> String -> IO (WarningMessages, Either ErrorMessages [Located Token])
-parseString :: LibDir -> String -> IO [Located Token]
-parseString libdir str = ghcWrapper libdir $ do
+parseString :: LibDir -> Includes -> String -> IO [Located Token]
+parseString libdir includes str = ghcWrapper libdir $ do
dflags0 <- initDynFlags
let dflags = dflags0{extensionFlags = EnumSet.insert LangExt.GhcCpp (extensionFlags dflags0)}
let pflags = initParserOpts dflags
@@ -345,12 +416,15 @@ parseString libdir str = ghcWrapper libdir $ do
liftIO $ putStrLn "-- parsing ----------"
liftIO $ putStrLn str
liftIO $ putStrLn "---------------------"
- return $ strGetToks pflags "fake_test_file.hs" str
+ return $ strGetToks includes pflags "fake_test_file.hs" str
-strGetToks :: Lexer.ParserOpts -> FilePath -> String -> [Located Token]
-strGetToks popts filename str = reverse $ lexAll pstate
+strGetToks :: Includes -> Lexer.ParserOpts -> FilePath -> String -> [Located Token]
+-- strGetToks includes popts filename str = reverse $ lexAll pstate
+strGetToks includes popts filename str = reverse $ lexAll (trace ("pstate=" ++ show initState) pstate)
where
- pstate = Lexer.initParserState popts buf loc
+ includeMap = Map.fromList $ map (\(k,v) -> (k, stringToStringBuffer (intercalate "\n" v))) includes
+ initState = initPpState { pp_includes = includeMap }
+ pstate = Lexer.initParserState initState popts buf loc
loc = mkRealSrcLoc (mkFastString filename) 1 1
buf = stringToStringBuffer str
-- cpp_enabled = Lexer.GhcCppBit `Lexer.xtest` Lexer.pExtsBitmap popts
@@ -387,7 +461,7 @@ strParser str dflags filename =
loc = mkRealSrcLoc (mkFastString filename) 1 1
buf = stringToStringBuffer str
in
- case unP parseModuleNoHaddock (Lexer.initParserState (initParserOpts dflags) buf loc) of
+ case unP parseModuleNoHaddock (Lexer.initParserState initPpState (initParserOpts dflags) buf loc) of
PFailed pst ->
let (warns, errs) = Lexer.getPsMessages pst
in (GhcPsMessage <$> warns, Left $ GhcPsMessage <$> errs)
@@ -506,9 +580,12 @@ libdirNow :: LibDir
libdirNow = "/home/alanz/mysrc/git.haskell.org/worktree/bisect/_build/stage1/lib"
doTest :: [String] -> IO ()
-doTest strings = do
+doTest strings = doTestWithIncludes [] strings
+
+doTestWithIncludes :: Includes -> [String] -> IO ()
+doTestWithIncludes includes strings = do
let test = intercalate "\n" strings
- !tks <- parseString libdirNow test
+ !tks <- parseString libdirNow includes test
putStrLn "-----------------------------------------"
printToks (reverse tks)
@@ -630,3 +707,26 @@ t8 = parseDefine (mkFastString "#define MIN_VERSION_ghc_exactprint(major1,major2
t9 :: Either Parsec.ParseError CppDirective
t9 = regularParse cppDirective "#define VERSION_ghc_exactprint \"1.7.0.1\""
+
+t10 :: IO ()
+t10 = do
+ doTestWithIncludes testIncludes
+ [ "#include \"bar.h\""
+ , ""
+ , "#ifdef FOO"
+ , "x = 1"
+ , "#else"
+ , "x = 2"
+ , "#endif"
+ ]
+
+testIncludes :: Includes
+testIncludes =
+ [
+ ( "bar.h"
+ , ["#include \"sub.h\""]
+ ),
+ ( "sub.h"
+ , ["#define FOO"]
+ )
+ ]
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/4ba68570d8f07162efc641474133bd0695e85a39
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/4ba68570d8f07162efc641474133bd0695e85a39
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20231004/2d634d32/attachment-0001.html>
More information about the ghc-commits
mailing list