[Git][ghc/ghc][wip/az/ghc-cpp] 3 commits: Simplify Lexer interface. Only ITcpp
Alan Zimmerman (@alanz)
gitlab at gitlab.haskell.org
Mon Oct 2 20:29:12 UTC 2023
Alan Zimmerman pushed to branch wip/az/ghc-cpp at Glasgow Haskell Compiler / GHC
Commits:
347968a7 by Alan Zimmerman at 2023-10-01T22:49:36+01:00
Simplify Lexer interface. Only ITcpp
We transfer directive lines through it, then parse them from scratch
in the preprocessor.
- - - - -
e75306cd by Alan Zimmerman at 2023-10-02T21:28:04+01:00
Deal with directive on last line, with no trailing \n
- - - - -
54c33324 by Alan Zimmerman at 2023-10-02T21:28:42+01:00
Start parsing and processing the directives
- - - - -
3 changed files:
- compiler/GHC/Parser/Lexer.x
- compiler/GHC/Parser/PreProcess.hs
- utils/check-cpp/Main.hs
Changes:
=====================================
compiler/GHC/Parser/Lexer.x
=====================================
@@ -303,6 +303,7 @@ $unigraphic / { isSmartQuote } { smart_quote_error }
\n ;
-- Ghc CPP symbols
^\# \ * @cppkeyword .* \n / { ifExtension GhcCppBit } { cppToken cpp_prag }
+ ^\# \ * @cppkeyword .* / { ifExtension GhcCppBit } { cppToken cpp_prag }
^\# line { begin line_prag1 }
^\# / { followedByDigit } { begin line_prag1 }
@@ -320,6 +321,7 @@ $unigraphic / { isSmartQuote } { smart_quote_error }
-- we might encounter {-# here, but {- has been handled already
\n ;
^\# \ * @cppkeyword .* \n / { ifExtension GhcCppBit } { cppToken cpp_prag }
+ ^\# \ * @cppkeyword .* / { ifExtension GhcCppBit } { cppToken cpp_prag }
^\# (line)? { begin line_prag1 }
}
@@ -346,8 +348,10 @@ $unigraphic / { isSmartQuote } { smart_quote_error }
-- CPP continuation lines. Keep concatenating, or exit
<cpp_prag> {
- .* \\ \n { cppTokenCont (ITcppContinue True) }
- .* \n { cppTokenPop (ITcppContinue False) }
+ .* \\ \n { cppTokenCont (ITcpp True) }
+ .* \\ { cppTokenCont (ITcpp True) }
+ -- .* \n { cppTokenPop (ITcpp False) }
+ .* { cppTokenPop (ITcpp False) }
-- () { popCpp }
}
@@ -1029,16 +1033,27 @@ data Token
| ITlineComment String PsSpan -- ^ comment starting by "--"
| ITblockComment String PsSpan -- ^ comment in {- -}
- -- GHC CPP extension. Each contains an entire line of source code,
- -- possibly joining up ones ending in backslash
- | ITcppStart Bool FastString -- ^ Start of a CPP #-prefixed line. Flag for continuation
- | ITcppContinue Bool FastString -- ^ Continuation after a trailing backslash. Flag for continuation
-
+ -- GHC CPP extension. See Note [GhcCPP Token]
+ | ITcpp Bool FastString -- ^ CPP #-prefixed line, or continuation.
deriving Show
instance Outputable Token where
ppr x = text (show x)
+{- Note [GhcCPP Token]
+~~~~~~~~~~~~~~~~~~~~~~
+We only invoke the Ghc CPP processing on lines beginning with a '#'
+and one of the keywords in @cppkeyword.
+
+These directives can finish on a trailing slash, which signals a
+continuation onto the next line.
+
+When lexing, we detect the start of the directive, and put the line
+into a ITcpp token, with a flag indicating if it ends with a
+continuation. Subsequent continued lines are treated the same way,
+until the final ITcpp token with the flag set False.
+-}
+
{- Note [PsSpan in Comments]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
When using the Api Annotations to exact print a modified AST, managing
@@ -1273,20 +1288,30 @@ cppToken code span buf len _buf2 =
let tokStr = lexemeToFastString buf len
-- check if the string ends with backslash and newline
-- NOTE: performance likely sucks, make it work for now
- continue <- case (reverse $ unpackFS tokStr) of
- -- ('\n':'\\':_) -> pushLexState code >> return True
- ('\n':'\\':_) -> pushLexState (trace ("cppToken: push state") code) >> return True
- _ -> return False
- return (L span (ITcppStart continue $! tokStr))
+ (len0, continue) <- case (reverse $ unpackFS tokStr) of
+ -- ('\n':'\\':_) -> pushLexState code >> return (len -2, True)
+ ('\n':'\\':_) -> pushLexState (trace ("cppToken: push state") code) >> return (len - 2, True)
+ ('\n':_) -> return (len - 1, False)
+ _ -> return (len, False)
+ return (L span (ITcpp continue $! lexemeToFastString buf len0))
-- trace ("cppToken:" ++ show (code, t)) $ do return (L span t)
cppTokenCont :: (FastString -> Token)-> Action
-cppTokenCont t span buf len _buf2 = return (L span (t $! lexemeToFastString buf len))
+cppTokenCont code span buf len _buf2 =
+ do
+ let tokStr = lexemeToFastString buf len
+ -- check if the string ends with backslash and newline
+ -- NOTE: performance likely sucks, make it work for now
+ (len0, continue) <- case (reverse $ unpackFS tokStr) of
+ ('\n':'\\':_) -> return (len - 2, True)
+ ('\n':_) -> return (len - 1, False)
+ _ -> return (len, False)
+ return (L span (ITcpp continue $! lexemeToFastString buf len0))
cppTokenPop :: (FastString -> Token)-> Action
cppTokenPop t span buf len _buf2 =
do _ <- popLexState
- -- return (L span (t $! lexemeToFastString buf len))
+ -- return (L span (t $! lexemeToFastString buf (len - 1)))
return (L span (t $! lexemeToFastString buf (trace "cppTokenPop" len)))
popCpp :: Action
@@ -2761,7 +2786,7 @@ data PState = PState {
-- | Use for emulating (limited) CPP preprocessing in GHC.
-- TODO: move this into PreProcess, and make a param on PState
data PpState = PpState {
- pp_defines :: !(Set String),
+ pp_defines :: !(Map String [String]),
pp_continuation :: ![Located Token],
-- pp_context :: ![PpContext],
pp_context :: ![Token], -- What preprocessor directive we are currently processing
@@ -2774,7 +2799,7 @@ data PpContext = PpContextIf [Located Token]
initPpState :: PpState
initPpState = PpState
- { pp_defines = Set.empty
+ { pp_defines = Map.empty
, pp_continuation = []
, pp_context = []
, pp_accepting = True
=====================================
compiler/GHC/Parser/PreProcess.hs
=====================================
@@ -152,15 +152,15 @@ getAccepting = P $ \s -> POk s (pp_accepting (pp s))
-- definitions start --------------------
-ppDefine :: FastString -> P ()
-ppDefine def = P $ \s ->
- -- POk s{pp = (pp s){pp_defines = Set.insert (cleanTokenString def) (pp_defines (pp s))}} ()
- POk s{pp = (pp s){pp_defines = Set.insert (trace ("ppDefine:def=[" ++ show (cleanTokenString def) ++ "]") (cleanTokenString def)) (pp_defines (pp s))}} ()
-
-ppIsDefined :: FastString -> P Bool
-ppIsDefined def = P $ \s ->
- -- POk s (Set.member def (pp_defines (pp s)))
- POk s (Set.member (trace ("ppIsDefined:def=[" ++ show (cleanTokenString def) ++ "]") (cleanTokenString def)) (pp_defines (pp s)))
+-- ppDefine :: FastString -> P ()
+-- ppDefine def = P $ \s ->
+-- -- POk s{pp = (pp s){pp_defines = Set.insert (cleanTokenString def) (pp_defines (pp s))}} ()
+-- POk s{pp = (pp s){pp_defines = Set.insert (trace ("ppDefine:def=[" ++ show (cleanTokenString def) ++ "]") (cleanTokenString def)) (pp_defines (pp s))}} ()
+
+-- ppIsDefined :: FastString -> P Bool
+-- ppIsDefined def = P $ \s ->
+-- -- POk s (Set.member def (pp_defines (pp s)))
+-- POk s (Set.member (trace ("ppIsDefined:def=[" ++ show (cleanTokenString def) ++ "]") (cleanTokenString def)) (pp_defines (pp s)))
-- | Take a @FastString@ of the form "#define FOO\n" and strip off all but "FOO"
cleanTokenString :: FastString -> String
=====================================
utils/check-cpp/Main.hs
=====================================
@@ -5,7 +5,7 @@ import Control.Monad.IO.Class
import Data.Char
import Data.Data hiding (Fixity)
import Data.List
-import qualified Data.Set as Set
+import qualified Data.Map as Map
import Debug.Trace
import GHC
import qualified GHC.Data.EnumSet as EnumSet
@@ -60,7 +60,7 @@ ppLexer queueComments cont =
( \tk ->
let
contInner t = (trace ("ppLexer: tk=" ++ show (unLoc tk, unLoc t)) cont) t
- contPush = pushContext (unLoc tk) >> contIgnoreTok tk
+ -- contPush = pushContext (unLoc tk) >> contIgnoreTok tk
contIgnoreTok (L l tok) = do
case l of
RealSrcSpan r (Strict.Just b) -> Lexer.queueIgnoredToken (L (PsSpan r b) tok)
@@ -69,44 +69,12 @@ ppLexer queueComments cont =
in
-- case tk of
case (trace ("M.ppLexer:tk=" ++ show (unLoc tk)) tk) of
- L _ (ITcppStart continuation s) -> do
+ L _ ITeof -> contInner tk
+ L _ (ITcpp continuation s) -> do
if continuation
- then do
- pushContinuation tk
- contIgnoreTok tk
- else do
- processCppToks s
- contIgnoreTok tk
- L _ (ITcppContinue continuation s) -> do
- if continuation
- then do
- pushContinuation tk
- contIgnoreTok tk
- else do
- processCppToks s
- contIgnoreTok tk
- -- L _ (ITcppDefine s) -> do
- -- -- ppDefine (trace ("ppDefine:" ++ show s) s)
- -- ppDefine s
- -- popContext
- -- contIgnoreTok tk
- -- L _ (ITcppIf _) -> contPush
- -- L _ (ITcppIfdef s) -> do
- -- defined <- ppIsDefined s
- -- -- setAccepting defined
- -- setAccepting (trace ("ifdef:" ++ show (s, defined)) defined)
- -- contIgnoreTok tk
- -- L _ (ITcppIfndef s) -> do
- -- defined <- ppIsDefined s
- -- -- setAccepting (not defined)
- -- setAccepting (trace ("ifdef:" ++ show (s, defined)) (not defined))
- -- contIgnoreTok tk
- -- L _ ITcppElse -> do
- -- preprocessElse
- -- contIgnoreTok tk
- -- L _ ITcppEndif -> do
- -- preprocessEnd
- -- contIgnoreTok tk
+ then pushContinuation tk
+ else processCppToks s
+ contIgnoreTok tk
_ -> do
state <- getCppState
case (trace ("CPP state:" ++ show state) state) of
@@ -126,10 +94,8 @@ preprocessEnd = do
processCppToks :: FastString -> P ()
processCppToks fs = do
- let str = unpackFS fs
let
- get (L _ (ITcppStart _ s)) = s
- get (L _ (ITcppContinue _ s)) = s
+ get (L _ (ITcpp _ s)) = s
get _ = error "should not"
-- Combine any prior continuation tokens
cs <- popContinuation
@@ -138,32 +104,44 @@ processCppToks fs = do
processCpp :: [FastString] -> P ()
processCpp fs = do
- traceM $ "processCpp: fs=" ++ show fs
- return ()
+ -- traceM $ "processCpp: fs=" ++ show fs
+ -- let s = cppInitial fs
+ let s = cppInitial fs
+ case regularParse cppDirective s of
+ Left err -> error $ show err
+ Right (CppDefine name def) -> do
+ ppDefine name def
+ Right (CppIfdef name) -> do
+ defined <- ppIsDefined name
+ setAccepting defined
+ Right (CppIfndef name) -> do
+ defined <- ppIsDefined name
+ setAccepting (not defined)
+ Right CppElse -> do
+ accepting <- getAccepting
+ setAccepting (not accepting)
+ return ()
+ Right CppEndif -> do
+ -- TODO: nested states
+ setAccepting True
+ return ()
+
+ return (trace ("processCpp:s=" ++ show s) ())
-- ---------------------------------------------------------------------
-- Preprocessor state functions
data CppState
= CppIgnoring
- | CppInDefine
- | CppInIfdef
- | CppInIfndef
| CppNormal
deriving (Show)
getCppState :: P CppState
getCppState = do
- context <- peekContext
accepting <- getAccepting
- case context of
- -- ITcppDefine _ -> return CppInDefine
- -- ITcppIfdef _ -> return CppInIfdef
- -- ITcppIfndef _ -> return CppInIfndef
- _ ->
- if accepting
- then return CppNormal
- else return CppIgnoring
+ if accepting
+ then return CppNormal
+ else return CppIgnoring
-- pp_context stack start -----------------
@@ -212,15 +190,15 @@ popContinuation =
-- definitions start --------------------
-ppDefine :: FastString -> P ()
-ppDefine def = P $ \s ->
+ppDefine :: String -> [String] -> P ()
+ppDefine name val = P $ \s ->
-- POk s{pp = (pp s){pp_defines = Set.insert (cleanTokenString def) (pp_defines (pp s))}} ()
- POk s{pp = (pp s){pp_defines = Set.insert (trace ("ppDefine:def=[" ++ show (cleanTokenString def) ++ "]") (cleanTokenString def)) (pp_defines (pp s))}} ()
+ POk s{pp = (pp s){pp_defines = Map.insert (trace ("ppDefine:def=[" ++ name ++ "]") name) val (pp_defines (pp s))}} ()
-ppIsDefined :: FastString -> P Bool
+ppIsDefined :: String -> P Bool
ppIsDefined def = P $ \s ->
- -- POk s (Set.member (cleanTokenString def) (pp_defines (pp s)))
- POk s (Set.member (trace ("ppIsDefined:def=[" ++ show (cleanTokenString def) ++ "]") (cleanTokenString def)) (pp_defines (pp s)))
+ -- POk s (Map.member def (pp_defines (pp s)))
+ POk s (Map.member (trace ("ppIsDefined:def=[" ++ def ++ "]") def) (pp_defines (pp s)))
-- | Take a @FastString@ of the form "#define FOO\n" and strip off all but "FOO"
cleanTokenString :: FastString -> String
@@ -229,10 +207,13 @@ cleanTokenString fs = r
ss = dropWhile (\c -> not $ isSpace c) (unpackFS fs)
r = init ss
-parseDefine :: FastString -> Maybe (String, String)
-parseDefine s = r
+parseDefine :: FastString -> Maybe (String, [String])
+parseDefine fs = r
where
- r = Just (cleanTokenString s, "")
+ -- r = Just (cleanTokenString s, "")
+ r = case regularParse cppDefinition (unpackFS fs) of
+ Left _ -> Nothing
+ Right v -> Just v
-- =====================================================================
-- Parsec parsing
@@ -241,6 +222,63 @@ type CppParser = Parsec String ()
regularParse :: Parser a -> String -> Either Parsec.ParseError a
regularParse p = PS.parse p ""
+
+-- TODO: delete this
+cppDefinition :: CppParser (String, [String])
+cppDefinition = do
+ _ <- PS.char '#'
+ _ <- whiteSpace
+ _ <- lexeme (PS.string "define")
+ name <- cppToken
+ definition <- cppTokens
+ return (name, definition)
+
+data CppDirective
+ = CppDefine String [String]
+ | CppIfdef String
+ | CppIfndef String
+ | CppElse
+ | CppEndif
+ deriving (Show, Eq)
+
+cppDirective :: CppParser CppDirective
+cppDirective = do
+ _ <- PS.char '#'
+ _ <- whiteSpace
+ choice
+ [ cppKw "define" >> cmdDefinition
+ -- , cppKw "include" CppIncludeKw
+ -- , cppKw "undef" CppUndefKw
+ -- , cppKw "error" CppErrorKw
+ , try$ cppKw "ifdef" >> cmdIfdef
+ , cppKw "ifndef" >> cmdIfndef
+ -- , cppKw "if" CppIfKw
+ -- , cppKw "elif" CppElifKw
+ , try $ cppKw "else" >> return CppElse
+ , cppKw "endif" >> return CppEndif
+ ]
+
+cmdDefinition :: CppParser CppDirective
+cmdDefinition = do
+ name <- cppToken
+ definition <- cppTokens
+ return $ CppDefine name definition
+
+cmdIfdef :: CppParser CppDirective
+cmdIfdef = do
+ name <- cppToken
+ return $ CppIfdef name
+
+cmdIfndef :: CppParser CppDirective
+cmdIfndef = do
+ name <- cppToken
+ return $ CppIfndef name
+
+cppKw :: String -> CppParser ()
+cppKw kw = do
+ _ <- lexeme (PS.string kw)
+ return ()
+
cppComment :: CppParser ()
cppComment = do
_ <- PS.string "/*"
@@ -258,13 +296,17 @@ lexeme p = p <* whiteSpace
cppToken :: CppParser String
cppToken = lexeme (PS.many1 (PS.satisfy (\c -> not (isSpace c))))
+cppTokens :: CppParser [String]
+cppTokens = PS.many cppToken
+
{- | Do cpp initial processing, as per https://gcc.gnu.org/onlinedocs/cpp/Initial-processing.html
See Note [GhcCPP Initial Processing]
-}
-cppInitial :: FastString -> String
+cppInitial :: [FastString] -> String
cppInitial fs = r
where
- r = unpackFS fs
+ -- go fs' = reverse $ drop 2 $ reverse $ unpackFS fs'
+ r = concatMap unpackFS fs
{-
Note [GhcCPP Initial Processing]
@@ -491,7 +533,6 @@ t2 = do
, "#else"
, "x = 5"
, "#endif"
- , ""
]
t3 :: IO ()
@@ -573,7 +614,14 @@ t6 = do
, "#else"
, "x = \"no version\""
, "#endif"
+ , ""
]
-t7 :: Maybe (String, String)
-t7 = parseDefine (mkFastString "#define VERSION_ghc_exactprint \"1.7.0.1\"\n")
+t7 :: Maybe (String, [String])
+t7 = parseDefine (mkFastString "#define VERSION_ghc_exactprint \"1.7.0.1\"")
+
+t8 :: Maybe (String, [String])
+t8 = parseDefine (mkFastString "#define MIN_VERSION_ghc_exactprint(major1,major2,minor) ( (major1) < 1 || (major1) == 1 && (major2) < 7 || (major1) == 1 && (major2) == 7 && (minor) <= 0)")
+
+t9 :: Either Parsec.ParseError CppDirective
+t9 = regularParse cppDirective "#define VERSION_ghc_exactprint \"1.7.0.1\""
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/89aaf2869980802922f676c0d3f391d0db568b17...54c33324972d6bc3ccd48426a40f5492809d3c1b
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/89aaf2869980802922f676c0d3f391d0db568b17...54c33324972d6bc3ccd48426a40f5492809d3c1b
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20231002/48e34a61/attachment-0001.html>
More information about the ghc-commits
mailing list