[Git][ghc/ghc][wip/az/ghc-cpp] 5 commits: Make cppTokens extend to end of line, and process CPP comments
Alan Zimmerman (@alanz)
gitlab at gitlab.haskell.org
Sun Oct 1 18:51:00 UTC 2023
Alan Zimmerman pushed to branch wip/az/ghc-cpp at Glasgow Haskell Compiler / GHC
Commits:
d26981dd by Alan Zimmerman at 2023-09-30T18:06:07+01:00
Make cppTokens extend to end of line, and process CPP comments
- - - - -
6bdc7e7e by Alan Zimmerman at 2023-10-01T15:57:56+01:00
Remove unused ITcppDefined
- - - - -
8a6c7c89 by Alan Zimmerman at 2023-10-01T16:11:31+01:00
Allow spaces between # and keyword for preprocessor directive
- - - - -
9cd8aa1b by Alan Zimmerman at 2023-10-01T17:49:19+01:00
Process CPP continuation lines
They are emited as separate ITcppContinue tokens.
Perhaps the processing should be more like a comment, and keep on
going to the end.
BUT, the last line needs to be slurped as a whole.
- - - - -
89aaf286 by Alan Zimmerman at 2023-10-01T19:50:06+01:00
Accumulate CPP continuations, process when ready
Can be simplified further, we only need one CPP token
- - - - -
5 changed files:
- compiler/GHC/Cmm/Lexer.x
- compiler/GHC/Parser.y
- compiler/GHC/Parser/Lexer.x
- compiler/GHC/Parser/PreProcess.hs
- utils/check-cpp/Main.hs
Changes:
=====================================
compiler/GHC/Cmm/Lexer.x
=====================================
@@ -19,7 +19,7 @@ import GHC.Prelude
import GHC.Cmm.Expr
-import GHC.Parser.Lexer hiding (lexToken)
+import GHC.Parser.Lexer
import GHC.Cmm.Parser.Monad
import GHC.Types.SrcLoc
import GHC.Types.Unique.FM
=====================================
compiler/GHC/Parser.y
=====================================
@@ -745,17 +745,16 @@ TH_QUASIQUOTE { L _ (ITquasiQuote _) }
TH_QQUASIQUOTE { L _ (ITqQuasiQuote _) }
-- Ghc CPP
-'#define' { L _ ITcppDefine }
-'#include' { L _ ITcppInclude }
-'#undef' { L _ ITcppUndef }
-'#error' { L _ ITcppError }
-'#if' { L _ ITcppIf }
-'#ifdef' { L _ ITcppIfdef }
-'#ifndef' { L _ ITcppIfndef }
-'#elif' { L _ ITcppElif }
-'#else' { L _ ITcppElse }
-'#endif' { L _ ITcppEndif }
-'defined' { L _ ITcppDefined }
+-- '#define' { L _ (ITcppDefine _) }
+-- '#include' { L _ (ITcppInclude _) }
+-- '#undef' { L _ (ITcppUndef _) }
+-- '#error' { L _ (ITcppError _) }
+-- '#if' { L _ (ITcppIf _) }
+-- '#ifdef' { L _ (ITcppIfdef _) }
+-- '#ifndef' { L _ (ITcppIfndef _) }
+-- '#elif' { L _ (ITcppElif _) }
+-- '#else' { L _ ITcppElse }
+-- '#endif' { L _ ITcppEndif }
%monad { P } { >>= } { return }
%lexer { (lexer True) } { L _ ITeof }
=====================================
compiler/GHC/Parser/Lexer.x
=====================================
@@ -222,6 +222,9 @@ $docsym = [\| \^ \* \$]
-- not explicitly positive (contrast @exponent)
@negative = \-
+-- recognise any of the GhcCPP keywords introduced by a leading #
+ at cppkeyword = "define" | "include" | "undef" | "error" | "ifdef"
+ | "ifndef" | "if" | "elif" | "else" | "endif"
-- -----------------------------------------------------------------------------
-- Alex "Identifier"
@@ -243,7 +246,8 @@ $tab { warnTab }
-- are). We also rule out nested Haddock comments, if the -haddock flag is
-- set.
-"{-" / { isNormalComment } { nested_comment }
+"{-" / { isNormalComment } { nested_comment }
+"/*" / { ifExtension GhcCppBit } { nested_comment }
-- Single-line comments are a bit tricky. Haskell 98 says that two or
-- more dashes followed by a symbol should be parsed as a varsym, so we
@@ -298,29 +302,7 @@ $unigraphic / { isSmartQuote } { smart_quote_error }
<bol> {
\n ;
-- Ghc CPP symbols
- ^"#define" / { ifExtension GhcCppBit } { cppToken cpp_prag (ITcppDefine) }
- ^"#include" / { ifExtension GhcCppBit } { cppToken cpp_prag (ITcppInclude) }
- ^"#undef" / { ifExtension GhcCppBit } { cppToken cpp_prag (ITcppUndef) }
- ^"#error" / { ifExtension GhcCppBit } { cppToken cpp_prag (ITcppError) }
- ^"#if" / { ifExtension GhcCppBit } { cppToken cpp_prag (ITcppIf) }
- ^"#ifdef" / { ifExtension GhcCppBit } { cppToken cpp_prag (ITcppIfdef) }
- ^"#ifndef" / { ifExtension GhcCppBit } { cppToken cpp_prag (ITcppIfndef) }
- ^"#elif" / { ifExtension GhcCppBit } { cppToken cpp_prag (ITcppElif) }
- ^"#else" / { ifExtension GhcCppBit } { cppToken cpp_prag (ITcppElse) }
- ^"#endif" / { ifExtension GhcCppBit } { cppToken cpp_prag (ITcppEndif) }
- -- "defined" { token (ITcppDefined) }
-
- -- ^\# "define" / { ifExtension GhcCppBit } { cppToken cpp_prag (ITcppDefine) }
- -- ^\# "include" / { ifExtension GhcCppBit } { cppToken cpp_prag (ITcppInclude) }
- -- ^\# "undef" / { ifExtension GhcCppBit } { cppToken cpp_prag (ITcppUndef) }
- -- ^\# "error" / { ifExtension GhcCppBit } { cppToken cpp_prag (ITcppError) }
- -- ^\# "if" / { ifExtension GhcCppBit } { cppToken cpp_prag (ITcppIf) }
- -- ^\# "ifdef" / { ifExtension GhcCppBit } { cppToken cpp_prag (ITcppIfdef) }
- -- ^\# "ifndef" / { ifExtension GhcCppBit } { cppToken cpp_prag (ITcppIfndef) }
- -- ^\# "elif" / { ifExtension GhcCppBit } { cppToken cpp_prag (ITcppElif) }
- -- ^\# "else" / { ifExtension GhcCppBit } { cppToken cpp_prag (ITcppElse) }
- -- ^\# "endif" / { ifExtension GhcCppBit } { cppToken cpp_prag (ITcppEndif) }
- -- -- "defined" { token (ITcppDefined) }
+ ^\# \ * @cppkeyword .* \n / { ifExtension GhcCppBit } { cppToken cpp_prag }
^\# line { begin line_prag1 }
^\# / { followedByDigit } { begin line_prag1 }
@@ -337,6 +319,8 @@ $unigraphic / { isSmartQuote } { smart_quote_error }
\{ / { notFollowedBy '-' } { hopefully_open_brace }
-- we might encounter {-# here, but {- has been handled already
\n ;
+ ^\# \ * @cppkeyword .* \n / { ifExtension GhcCppBit } { cppToken cpp_prag }
+
^\# (line)? { begin line_prag1 }
}
@@ -360,9 +344,11 @@ $unigraphic / { isSmartQuote } { smart_quote_error }
"{-#" $whitechar* $pragmachar+ / { known_pragma linePrags }
{ dispatch_pragmas linePrags }
--- CPP pragmas
+-- CPP continuation lines. Keep concatenating, or exit
<cpp_prag> {
- () { pop }
+ .* \\ \n { cppTokenCont (ITcppContinue True) }
+ .* \n { cppTokenPop (ITcppContinue False) }
+ -- () { popCpp }
}
-- single-line line pragmas, of the form
@@ -1043,20 +1029,10 @@ data Token
| ITlineComment String PsSpan -- ^ comment starting by "--"
| ITblockComment String PsSpan -- ^ comment in {- -}
- -- GHC CPP extension
- | ITcppDefine -- ^ #define
- | ITcppInclude -- ^ #include
- | ITcppUndef -- ^ #undef
- | ITcppError -- ^ #error
- | ITcppIf -- ^ #if
- | ITcppIfdef -- ^ #ifdef
- | ITcppIfndef -- ^ #ifndef
- | ITcppElif -- ^ #elif
- | ITcppElse -- ^ #else
- | ITcppEndif -- ^ #endif
- | ITcppDefined -- ^ defined (in conditional)
- | ITcppIgnored [Located Token] -- TODO: push into comments instead
-
+ -- GHC CPP extension. Each contains an entire line of source code,
+ -- possibly joining up ones ending in backslash
+ | ITcppStart Bool FastString -- ^ Start of a CPP #-prefixed line. Flag for continuation
+ | ITcppContinue Bool FastString -- ^ Continuation after a trailing backslash. Flag for continuation
deriving Show
@@ -1291,12 +1267,34 @@ pop _span _buf _len _buf2 =
lexToken
-- trace "pop" $ do lexToken
-cppToken :: Int -> Token -> Action
-cppToken code t span _buf _len _buf2 =
- do pushLexState code
- return (L span t)
+cppToken :: Int -> Action
+cppToken code span buf len _buf2 =
+ do
+ let tokStr = lexemeToFastString buf len
+ -- check if the string ends with backslash and newline
+ -- NOTE: performance likely sucks, make it work for now
+ continue <- case (reverse $ unpackFS tokStr) of
+ -- ('\n':'\\':_) -> pushLexState code >> return True
+ ('\n':'\\':_) -> pushLexState (trace ("cppToken: push state") code) >> return True
+ _ -> return False
+ return (L span (ITcppStart continue $! tokStr))
-- trace ("cppToken:" ++ show (code, t)) $ do return (L span t)
+cppTokenCont :: (FastString -> Token)-> Action
+cppTokenCont t span buf len _buf2 = return (L span (t $! lexemeToFastString buf len))
+
+cppTokenPop :: (FastString -> Token)-> Action
+cppTokenPop t span buf len _buf2 =
+ do _ <- popLexState
+ -- return (L span (t $! lexemeToFastString buf len))
+ return (L span (t $! lexemeToFastString buf (trace "cppTokenPop" len)))
+
+popCpp :: Action
+popCpp _span _buf _len _buf2 =
+ do _ <- popLexState
+ -- lexToken
+ trace "pop" $ do lexToken
+
-- See Note [Nested comment line pragmas]
failLinePrag1 :: Action
failLinePrag1 span _buf _len _buf2 = do
@@ -1468,6 +1466,9 @@ It holds simply because we immediately lex a literal after the minus.
ifExtension :: ExtBits -> AlexAccPred ExtsBitmap
ifExtension extBits bits _ _ _ = extBits `xtest` bits
+ifNotExtension :: ExtBits -> AlexAccPred ExtsBitmap
+ifNotExtension extBits bits _ _ _ = not (extBits `xtest` bits)
+
alexNotPred p userState in1 len in2
= not (p userState in1 len in2)
@@ -1570,23 +1571,36 @@ nested_comment_logic endComment commentAcc input span = go commentAcc (1::Int) i
cspan = mkSrcSpanPs $ mkPsSpan (psSpanStart span) end_loc
lcomment = L cspan comment
endComment input lcomment
- go commentAcc n input = case alexGetChar' input of
- Nothing -> errBrace input (psRealSpan span)
- Just ('-',input) -> case alexGetChar' input of
+ go commentAcc n input = ghcCppSet >>= \ghcCppSet -> case (ghcCppSet, alexGetChar' input) of
+ (_, Nothing) -> errBrace input (psRealSpan span)
+ (_, Just ('-',input)) -> case alexGetChar' input of
Nothing -> errBrace input (psRealSpan span)
Just ('\125',input) -> go ('\125':'-':commentAcc) (n-1) input -- '}'
Just (_,_) -> go ('-':commentAcc) n input
- Just ('\123',input) -> case alexGetChar' input of -- '{' char
+ (_, Just ('\123',input)) -> case alexGetChar' input of -- '{' char
Nothing -> errBrace input (psRealSpan span)
Just ('-',input) -> go ('-':'\123':commentAcc) (n+1) input
Just (_,_) -> go ('\123':commentAcc) n input
+ (True, Just ('*',input)) -> case alexGetChar' input of
+ Nothing -> errBrace input (psRealSpan span)
+ Just ('/',input) -> go ('/':'*':commentAcc) (n-1) input -- '/'
+ Just (_,_) -> go ('-':commentAcc) n input
+ (True, Just ('/',input)) -> case alexGetChar' input of -- '/' char
+ Nothing -> errBrace input (psRealSpan span)
+ Just ('*',input) -> go ('*':'/':commentAcc) (n+1) input
+ Just (_,_) -> go ('/':commentAcc) n input
-- See Note [Nested comment line pragmas]
- Just ('\n',input) -> case alexGetChar' input of
+ (_, Just ('\n',input)) -> case alexGetChar' input of
Nothing -> errBrace input (psRealSpan span)
Just ('#',_) -> do (parsedAcc,input) <- parseNestedPragma input
go (parsedAcc ++ '\n':commentAcc) n input
Just (_,_) -> go ('\n':commentAcc) n input
- Just (c,input) -> go (c:commentAcc) n input
+ (_, Just (c,input)) -> go (c:commentAcc) n input
+
+ghcCppSet :: P Bool
+ghcCppSet = do
+ exts <- getExts
+ return $ xtest GhcCppBit exts
-- See Note [Nested comment line pragmas]
parseNestedPragma :: AlexInput -> P (String,AlexInput)
@@ -2745,9 +2759,10 @@ data PState = PState {
-- correctly?
-- | Use for emulating (limited) CPP preprocessing in GHC.
+-- TODO: move this into PreProcess, and make a param on PState
data PpState = PpState {
pp_defines :: !(Set String),
- pp_pushed_back :: !(Maybe (Located Token)),
+ pp_continuation :: ![Located Token],
-- pp_context :: ![PpContext],
pp_context :: ![Token], -- What preprocessor directive we are currently processing
pp_accepting :: !Bool
@@ -2760,7 +2775,7 @@ data PpContext = PpContextIf [Located Token]
initPpState :: PpState
initPpState = PpState
{ pp_defines = Set.empty
- , pp_pushed_back = Nothing
+ , pp_continuation = []
, pp_context = []
, pp_accepting = True
}
@@ -3206,6 +3221,8 @@ mkParserOpts extensionFlags diag_opts supported
.|. OverloadedRecordUpdateBit `xoptBit` LangExt.OverloadedRecordUpdate -- Enable testing via 'getBit OverloadedRecordUpdateBit' in the parser (RecordDotSyntax parsing uses that information).
.|. ExtendedLiteralsBit `xoptBit` LangExt.ExtendedLiterals
.|. GhcCppBit `xoptBit` LangExt.GhcCpp
+ -- .|. (trace ("GhcCppBit:" ++ show (GhcCppBit `xoptBit` LangExt.GhcCpp))
+ -- GhcCppBit `xoptBit` LangExt.GhcCpp)
optBits =
HaddockBit `setBitIf` isHaddock
.|. RawTokenStreamBit `setBitIf` rawTokStream
@@ -3426,7 +3443,7 @@ getOffside = P $ \s at PState{last_loc=loc, context=stk} ->
let offs = srcSpanStartCol (psRealSpan loc) in
let ord = case stk of
Layout n gen_semic : _ ->
- --trace ("layout: " ++ show n ++ ", offs: " ++ show offs) $
+ -- trace ("layout: " ++ show n ++ ", offs: " ++ show offs) $
(compare offs n, gen_semic)
_ ->
(GT, dontGenerateSemic)
@@ -3967,7 +3984,6 @@ commentToAnnotation (L l (ITdocComment s ll)) = mkLEpaComment l ll (EpaDocComm
commentToAnnotation (L l (ITdocOptions s ll)) = mkLEpaComment l ll (EpaDocOptions s)
commentToAnnotation (L l (ITlineComment s ll)) = mkLEpaComment l ll (EpaLineComment s)
commentToAnnotation (L l (ITblockComment s ll)) = mkLEpaComment l ll (EpaBlockComment s)
-commentToAnnotation (L l (ITblockComment s ll)) = mkLEpaComment l ll (EpaBlockComment s)
commentToAnnotation _ = panic "commentToAnnotation"
-- see Note [PsSpan in Comments]
=====================================
compiler/GHC/Parser/PreProcess.hs
=====================================
@@ -4,15 +4,16 @@
{-# LANGUAGE BangPatterns #-}
module GHC.Parser.PreProcess (
- ppLexer,
- ppLexerDbg,
+ -- ppLexer,
+ -- ppLexerDbg,
lexer,
lexerDbg,
) where
--- import Data.List ()
+import Data.Char
import qualified Data.Set as Set
import Debug.Trace (trace)
+import GHC.Data.FastString
import qualified GHC.Data.Strict as Strict
import GHC.Parser.Errors.Ppr ()
import GHC.Parser.Lexer (P (..), PState (..), ParseResult (..), PpState (..), Token (..))
@@ -23,119 +24,122 @@ import GHC.Types.SrcLoc
-- ---------------------------------------------------------------------
lexer, lexerDbg :: Bool -> (Located Token -> P a) -> P a
-lexer = ppLexer
-lexerDbg = ppLexerDbg
-
-ppLexer, ppLexerDbg :: Bool -> (Located Token -> P a) -> P a
--- Use this instead of 'lexer' in GHC.Parser to dump the tokens for debugging.
-ppLexerDbg queueComments cont = ppLexer queueComments contDbg
- where
- contDbg tok = trace ("pptoken: " ++ show (unLoc tok)) (cont tok)
-ppLexer queueComments cont =
- Lexer.lexer
- queueComments
- ( \tk ->
- let
- contInner t = (trace ("ppLexer: tk=" ++ show (unLoc tk, unLoc t)) cont) t
- -- contPush = pushContext (unLoc tk) >> contInner (L lt (ITcppIgnored [tk]))
- contPush = pushContext (unLoc tk) >> contIgnoreTok tk
- contIgnoreTok (L l tok) = do
- case l of
- RealSrcSpan r (Strict.Just b) -> Lexer.queueIgnoredToken (L (PsSpan r b) tok)
- _ -> return ()
- ppLexer queueComments cont
- in
- case tk of
- L _ ITcppDefine -> contPush
- L _ ITcppIf -> contPush
- L _ ITcppIfdef -> contPush
- L _ ITcppIfndef -> contPush
- L _ ITcppElse -> do
- preprocessElse
- contIgnoreTok tk
- L _ ITcppEndif -> do
- preprocessEnd
- contIgnoreTok tk
- L _ tok -> do
- state <- getCppState
- case (trace ("CPP state:" ++ show state) state) of
- CppIgnoring -> contIgnoreTok tk
- CppInDefine -> do
- ppDefine (trace ("ppDefine:" ++ show tok) (show tok))
- popContext
- contIgnoreTok tk
- CppInIfdef -> do
- defined <- ppIsDefined (show tok)
- setAccepting defined
- popContext
- contIgnoreTok tk
- CppInIfndef -> do
- defined <- ppIsDefined (show tok)
- setAccepting (not defined)
- popContext
- contIgnoreTok tk
- _ -> contInner tk
- )
-
-preprocessElse :: P ()
-preprocessElse = do
- accepting <- getAccepting
- setAccepting (not accepting)
-
-preprocessEnd :: P ()
-preprocessEnd = do
- -- TODO: nested context
- setAccepting True
-
--- ---------------------------------------------------------------------
--- Preprocessor state functions
-
-data CppState
- = CppIgnoring
- | CppInDefine
- | CppInIfdef
- | CppInIfndef
- | CppNormal
- deriving (Show)
-
-getCppState :: P CppState
-getCppState = do
- context <- peekContext
- accepting <- getAccepting
- case context of
- ITcppDefine -> return CppInDefine
- ITcppIfdef -> return CppInIfdef
- ITcppIfndef -> return CppInIfndef
- _ ->
- if accepting
- then return CppNormal
- else return CppIgnoring
+-- bypass for now, work in ghci
+lexer = Lexer.lexer
+lexerDbg = Lexer.lexerDbg
+
+-- lexer = ppLexer
+-- -- lexer = ppLexerDbg
+-- lexerDbg = ppLexerDbg
+
+-- ppLexer, ppLexerDbg :: Bool -> (Located Token -> P a) -> P a
+-- -- Use this instead of 'lexer' in GHC.Parser to dump the tokens for debugging.
+-- ppLexerDbg queueComments cont = ppLexer queueComments contDbg
+-- where
+-- contDbg tok = trace ("pptoken: " ++ show (unLoc tok)) (cont tok)
+-- ppLexer queueComments cont =
+-- Lexer.lexer
+-- queueComments
+-- ( \tk ->
+-- let
+-- -- contInner t = (trace ("ppLexer: tk=" ++ show (unLoc tk, unLoc t)) cont) t
+-- contInner t = cont t
+-- contPush = pushContext (unLoc tk) >> contIgnoreTok tk
+-- contIgnoreTok (L l tok) = do
+-- case l of
+-- RealSrcSpan r (Strict.Just b) -> Lexer.queueIgnoredToken (L (PsSpan r b) tok)
+-- _ -> return ()
+-- ppLexer queueComments cont
+-- in
+-- case tk of
+-- L _ (ITcppDefine s) -> do
+-- ppDefine (trace ("ppDefine:" ++ show s) s)
+-- popContext
+-- contIgnoreTok tk
+-- L _ (ITcppIf _) -> contPush
+-- L _ (ITcppIfdef s) -> do
+-- defined <- ppIsDefined s
+-- -- setAccepting defined
+-- setAccepting (trace ("ifdef:" ++ show (s, defined)) defined)
+-- contIgnoreTok tk
+-- L _ (ITcppIfndef s) -> do
+-- defined <- ppIsDefined s
+-- -- setAccepting (not defined)
+-- setAccepting (trace ("ifdef:" ++ show (s, defined)) (not defined))
+-- contIgnoreTok tk
+-- L _ ITcppElse -> do
+-- preprocessElse
+-- contIgnoreTok tk
+-- L _ ITcppEndif -> do
+-- preprocessEnd
+-- contIgnoreTok tk
+-- _ -> do
+-- state <- getCppState
+-- -- case (trace ("CPP state:" ++ show state) state) of
+-- case state of
+-- CppIgnoring -> contIgnoreTok tk
+-- _ -> contInner tk
+-- )
+
+-- preprocessElse :: P ()
+-- preprocessElse = do
+-- accepting <- getAccepting
+-- setAccepting (not accepting)
+
+-- preprocessEnd :: P ()
+-- preprocessEnd = do
+-- -- TODO: nested context
+-- setAccepting True
+
+-- -- ---------------------------------------------------------------------
+-- -- Preprocessor state functions
+
+-- data CppState
+-- = CppIgnoring
+-- | CppInDefine
+-- | CppInIfdef
+-- | CppInIfndef
+-- | CppNormal
+-- deriving (Show)
+
+-- getCppState :: P CppState
+-- getCppState = do
+-- context <- peekContext
+-- accepting <- getAccepting
+-- case context of
+-- ITcppDefine _ -> return CppInDefine
+-- ITcppIfndef _ -> return CppInIfndef
+-- ITcppIfdef _ -> return CppInIfdef
+-- _ ->
+-- if accepting
+-- then return CppNormal
+-- else return CppIgnoring
-- pp_context stack start -----------------
-pushContext :: Token -> P ()
-pushContext new =
- P $ \s -> POk s{pp = (pp s){pp_context = new : pp_context (pp s)}} ()
-
-popContext :: P ()
-popContext =
- P $ \s ->
- let
- new_context = case pp_context (pp s) of
- [] -> []
- (_ : t) -> t
- in
- POk s{pp = (pp s){pp_context = new_context}} ()
-
-peekContext :: P Token
-peekContext =
- P $ \s ->
- let
- r = case pp_context (pp s) of
- [] -> ITeof -- Anthing really, for now, except a CPP one
- (h : _) -> h
- in
- POk s r
+-- pushContext :: Token -> P ()
+-- pushContext new =
+-- P $ \s -> POk s{pp = (pp s){pp_context = new : pp_context (pp s)}} ()
+
+-- popContext :: P ()
+-- popContext =
+-- P $ \s ->
+-- let
+-- new_context = case pp_context (pp s) of
+-- [] -> []
+-- (_ : t) -> t
+-- in
+-- POk s{pp = (pp s){pp_context = (trace ("pop:new_context:" ++ show new_context) new_context)}} ()
+
+-- peekContext :: P Token
+-- peekContext =
+-- P $ \s ->
+-- let
+-- r = case pp_context (pp s) of
+-- [] -> ITeof -- Anthing really, for now, except a CPP one
+-- (h : _) -> h
+-- in
+-- POk s r
setAccepting :: Bool -> P ()
setAccepting on =
@@ -148,12 +152,21 @@ getAccepting = P $ \s -> POk s (pp_accepting (pp s))
-- definitions start --------------------
-ppDefine :: String -> P ()
+ppDefine :: FastString -> P ()
ppDefine def = P $ \s ->
- POk s{pp = (pp s){pp_defines = Set.insert def (pp_defines (pp s))}} ()
+ -- POk s{pp = (pp s){pp_defines = Set.insert (cleanTokenString def) (pp_defines (pp s))}} ()
+ POk s{pp = (pp s){pp_defines = Set.insert (trace ("ppDefine:def=[" ++ show (cleanTokenString def) ++ "]") (cleanTokenString def)) (pp_defines (pp s))}} ()
-ppIsDefined :: String -> P Bool
+ppIsDefined :: FastString -> P Bool
ppIsDefined def = P $ \s ->
- POk s (Set.member def (pp_defines (pp s)))
+ -- POk s (Set.member def (pp_defines (pp s)))
+ POk s (Set.member (trace ("ppIsDefined:def=[" ++ show (cleanTokenString def) ++ "]") (cleanTokenString def)) (pp_defines (pp s)))
+
+-- | Take a @FastString@ of the form "#define FOO\n" and strip off all but "FOO"
+cleanTokenString :: FastString -> String
+cleanTokenString fs = r
+ where
+ ss = dropWhile (\c -> not $ isSpace c) (unpackFS fs)
+ r = init ss
-- definitions end --------------------
=====================================
utils/check-cpp/Main.hs
=====================================
@@ -2,10 +2,11 @@
{-# LANGUAGE BangPatterns #-}
import Control.Monad.IO.Class
+import Data.Char
import Data.Data hiding (Fixity)
import Data.List
import qualified Data.Set as Set
-import Debug.Trace (trace)
+import Debug.Trace
import GHC
import qualified GHC.Data.EnumSet as EnumSet
import GHC.Data.FastString
@@ -26,6 +27,19 @@ import GHC.Types.SrcLoc
import GHC.Utils.Error
import GHC.Utils.Outputable
+import qualified Text.Parsec as Parsec
+import Text.Parsec.Char as PS
+import Text.Parsec.Combinator as PS
+import Text.Parsec.Prim as PS
+
+-- import qualified Text.Parsec as Parsec
+import Text.Parsec.String (Parser)
+
+-- import Text.Parsec.Char
+-- import FunctionsAndTypesForParsing (regularParse, parseWithEof, parseWithLeftOver)
+-- import Text.Parsec.String.Combinator (many1)
+-- import Text.Parsec.Combinator (many1)
+
-- ---------------------------------------------------------------------
showAst :: (Data a) => a -> String
@@ -46,7 +60,6 @@ ppLexer queueComments cont =
( \tk ->
let
contInner t = (trace ("ppLexer: tk=" ++ show (unLoc tk, unLoc t)) cont) t
- -- contPush = pushContext (unLoc tk) >> contInner (L lt (ITcppIgnored [tk]))
contPush = pushContext (unLoc tk) >> contIgnoreTok tk
contIgnoreTok (L l tok) = do
case l of
@@ -54,35 +67,50 @@ ppLexer queueComments cont =
_ -> return ()
ppLexer queueComments cont
in
- case tk of
- L _ ITcppDefine -> contPush
- L _ ITcppIf -> contPush
- L _ ITcppIfdef -> contPush
- L _ ITcppIfndef -> contPush
- L _ ITcppElse -> do
- preprocessElse
- contIgnoreTok tk
- L _ ITcppEndif -> do
- preprocessEnd
- contIgnoreTok tk
- L _ tok -> do
- state <- getCppState
- case (trace ("CPP state:" ++ show state) state) of
- CppIgnoring -> contIgnoreTok tk
- CppInDefine -> do
- ppDefine (trace ("ppDefine:" ++ show tok) (show tok))
- popContext
+ -- case tk of
+ case (trace ("M.ppLexer:tk=" ++ show (unLoc tk)) tk) of
+ L _ (ITcppStart continuation s) -> do
+ if continuation
+ then do
+ pushContinuation tk
+ contIgnoreTok tk
+ else do
+ processCppToks s
contIgnoreTok tk
- CppInIfdef -> do
- defined <- ppIsDefined (show tok)
- setAccepting defined
- popContext
+ L _ (ITcppContinue continuation s) -> do
+ if continuation
+ then do
+ pushContinuation tk
contIgnoreTok tk
- CppInIfndef -> do
- defined <- ppIsDefined (show tok)
- setAccepting (not defined)
- popContext
+ else do
+ processCppToks s
contIgnoreTok tk
+ -- L _ (ITcppDefine s) -> do
+ -- -- ppDefine (trace ("ppDefine:" ++ show s) s)
+ -- ppDefine s
+ -- popContext
+ -- contIgnoreTok tk
+ -- L _ (ITcppIf _) -> contPush
+ -- L _ (ITcppIfdef s) -> do
+ -- defined <- ppIsDefined s
+ -- -- setAccepting defined
+ -- setAccepting (trace ("ifdef:" ++ show (s, defined)) defined)
+ -- contIgnoreTok tk
+ -- L _ (ITcppIfndef s) -> do
+ -- defined <- ppIsDefined s
+ -- -- setAccepting (not defined)
+ -- setAccepting (trace ("ifdef:" ++ show (s, defined)) (not defined))
+ -- contIgnoreTok tk
+ -- L _ ITcppElse -> do
+ -- preprocessElse
+ -- contIgnoreTok tk
+ -- L _ ITcppEndif -> do
+ -- preprocessEnd
+ -- contIgnoreTok tk
+ _ -> do
+ state <- getCppState
+ case (trace ("CPP state:" ++ show state) state) of
+ CppIgnoring -> contIgnoreTok tk
_ -> contInner tk
)
@@ -96,6 +124,23 @@ preprocessEnd = do
-- TODO: nested context
setAccepting True
+processCppToks :: FastString -> P ()
+processCppToks fs = do
+ let str = unpackFS fs
+ let
+ get (L _ (ITcppStart _ s)) = s
+ get (L _ (ITcppContinue _ s)) = s
+ get _ = error "should not"
+ -- Combine any prior continuation tokens
+ cs <- popContinuation
+ processCpp (reverse $ fs : map get cs)
+ return ()
+
+processCpp :: [FastString] -> P ()
+processCpp fs = do
+ traceM $ "processCpp: fs=" ++ show fs
+ return ()
+
-- ---------------------------------------------------------------------
-- Preprocessor state functions
@@ -112,9 +157,9 @@ getCppState = do
context <- peekContext
accepting <- getAccepting
case context of
- ITcppDefine -> return CppInDefine
- ITcppIfdef -> return CppInIfdef
- ITcppIfndef -> return CppInIfndef
+ -- ITcppDefine _ -> return CppInDefine
+ -- ITcppIfdef _ -> return CppInIfdef
+ -- ITcppIfndef _ -> return CppInIfndef
_ ->
if accepting
then return CppNormal
@@ -153,17 +198,90 @@ setAccepting on =
getAccepting :: P Bool
getAccepting = P $ \s -> POk s (pp_accepting (pp s))
+-- -------------------------------------
+
+pushContinuation :: Located Token -> P ()
+pushContinuation new =
+ P $ \s -> POk s{pp = (pp s){pp_continuation = new : pp_continuation (pp s)}} ()
+
+popContinuation :: P [Located Token]
+popContinuation =
+ P $ \s -> POk s{pp = (pp s){pp_continuation = []}} (pp_continuation (pp s))
+
-- pp_context stack end -------------------
-- definitions start --------------------
-ppDefine :: String -> P ()
+ppDefine :: FastString -> P ()
ppDefine def = P $ \s ->
- POk s{pp = (pp s){pp_defines = Set.insert def (pp_defines (pp s))}} ()
+ -- POk s{pp = (pp s){pp_defines = Set.insert (cleanTokenString def) (pp_defines (pp s))}} ()
+ POk s{pp = (pp s){pp_defines = Set.insert (trace ("ppDefine:def=[" ++ show (cleanTokenString def) ++ "]") (cleanTokenString def)) (pp_defines (pp s))}} ()
-ppIsDefined :: String -> P Bool
+ppIsDefined :: FastString -> P Bool
ppIsDefined def = P $ \s ->
- POk s (Set.member def (pp_defines (pp s)))
+ -- POk s (Set.member (cleanTokenString def) (pp_defines (pp s)))
+ POk s (Set.member (trace ("ppIsDefined:def=[" ++ show (cleanTokenString def) ++ "]") (cleanTokenString def)) (pp_defines (pp s)))
+
+-- | Take a @FastString@ of the form "#define FOO\n" and strip off all but "FOO"
+cleanTokenString :: FastString -> String
+cleanTokenString fs = r
+ where
+ ss = dropWhile (\c -> not $ isSpace c) (unpackFS fs)
+ r = init ss
+
+parseDefine :: FastString -> Maybe (String, String)
+parseDefine s = r
+ where
+ r = Just (cleanTokenString s, "")
+
+-- =====================================================================
+-- Parsec parsing
+type CppParser = Parsec String ()
+
+regularParse :: Parser a -> String -> Either Parsec.ParseError a
+regularParse p = PS.parse p ""
+
+cppComment :: CppParser ()
+cppComment = do
+ _ <- PS.string "/*"
+ _ <- PS.manyTill PS.anyChar (PS.try (PS.string "*/"))
+ return ()
+
+whiteSpace :: CppParser ()
+whiteSpace = do
+ _ <- PS.many (PS.choice [cppComment, PS.space >> return ()])
+ return ()
+
+lexeme :: CppParser a -> CppParser a
+lexeme p = p <* whiteSpace
+
+cppToken :: CppParser String
+cppToken = lexeme (PS.many1 (PS.satisfy (\c -> not (isSpace c))))
+
+{- | Do cpp initial processing, as per https://gcc.gnu.org/onlinedocs/cpp/Initial-processing.html
+See Note [GhcCPP Initial Processing]
+-}
+cppInitial :: FastString -> String
+cppInitial fs = r
+ where
+ r = unpackFS fs
+
+{-
+Note [GhcCPP Initial Processing]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+This processing is based on the description at
+https://gcc.gnu.org/onlinedocs/cpp/Initial-processing.html
+
+It is only done for lines starting with a preprocessor
+directive.
+
+1. Broken into lines. We rely on the GHC Lexer to do this
+2. Trigraphs are not processed
+3. Continued lines are merged into a single line
+ and is handled in the Lexer.
+4. All comments are replaced with a single space
+
+-}
-- =====================================================================
-- Emulate the parser
@@ -330,13 +448,9 @@ happyError = Lexer.srcParseFail
-- =====================================================================
-- ---------------------------------------------------------------------
-printToks :: Int -> [Located Token] -> IO ()
-printToks indent toks = mapM_ go toks
+printToks :: [Located Token] -> IO ()
+printToks toks = mapM_ go toks
where
- go (L _ (ITcppIgnored ts)) = do
- putStr "ITcppIgnored ["
- printToks (indent + 4) ts
- putStrLn "]"
go (L _ tk) = putStrLn (show tk)
-- Testing
@@ -349,13 +463,13 @@ doTest strings = do
let test = intercalate "\n" strings
!tks <- parseString libdirNow test
putStrLn "-----------------------------------------"
- printToks 0 (reverse tks)
+ printToks (reverse tks)
t0 :: IO ()
t0 = do
doTest
- [ "#define FOO"
- , "#ifdef FOO"
+ [ "# define FOO"
+ , "# ifdef FOO"
, "x = 1"
, "#endif"
, ""
@@ -379,3 +493,87 @@ t2 = do
, "#endif"
, ""
]
+
+t3 :: IO ()
+t3 = do
+ doTest
+ [ "{-# LANGUAGE GhcCPP #-}"
+ , "module Example1 where"
+ , ""
+ , "y = 1"
+ , ""
+ , "#define FOO"
+ , ""
+ , "x ="
+ , "#ifdef FOO"
+ , " \" hello \""
+ , "#else"
+ , " \" bye now \""
+ , "#endif"
+ , ""
+ , "foo = putStrLn x"
+ ]
+
+t3a :: IO ()
+t3a = do
+ doTest
+ [ "{-# LANGUAGE GhcCPP #-}"
+ , "module Example1 where"
+ , ""
+ , "#define FOO"
+ , ""
+ , "x ="
+ , "#ifdef FOO"
+ , " \" hello \""
+ , "#else"
+ , " \" bye now \""
+ , "#endif"
+ , ""
+ , "foo = putStrLn x"
+ ]
+
+t4 :: IO ()
+t4 = do
+ doTest
+ [ "/* package ghc-exactprint-1.7.0.1 */"
+ , "#ifndef VERSION_ghc_exactprint"
+ , "#define VERSION_ghc_exactprint \"1.7.0.1\""
+ , "#endif /* VERSION_ghc_exactprint */"
+ , "#ifndef MIN_VERSION_ghc_exactprint"
+ , -- , "#define MIN_VERSION_ghc_exactprint(major1,major2,minor) (\\"
+ -- , " (major1) < 1 || \\"
+ -- , " (major1) == 1 && (major2) < 7 || \\"
+ -- , " (major1) == 1 && (major2) == 7 && (minor) <= 0)"
+ "#endif /* MIN_VERSION_ghc_exactprint */"
+ , ""
+ , "#ifdef VERSION_ghc_exactprint"
+ , "x = \"got version\""
+ , "#else"
+ , "x = \"no version\""
+ , "#endif"
+ ]
+
+t5 :: IO ()
+t5 = do
+ doTest
+ [ "#define MIN_VERSION_ghc_exactprint(major1,major2,minor) (\\"
+ , " (major1) < 1 || \\"
+ , " (major1) == 1 && (major2) < 7 || \\"
+ , " (major1) == 1 && (major2) == 7 && (minor) <= 0)"
+ , "x = x"
+ ]
+
+t6 :: IO ()
+t6 = do
+ doTest
+ [ "#define VERSION_ghc_exactprint \"1.7.0.1\""
+ , ""
+ , "#ifdef VERSION_ghc_exactprint"
+ , "x = \"got version\""
+ , "#else"
+ , "x = \"no version\""
+ , "#endif"
+ ]
+
+t7 :: Maybe (String, String)
+t7 = parseDefine (mkFastString "#define VERSION_ghc_exactprint \"1.7.0.1\"\n")
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/8134209e4092556edb7b0d324157db0ea5a468af...89aaf2869980802922f676c0d3f391d0db568b17
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/8134209e4092556edb7b0d324157db0ea5a468af...89aaf2869980802922f676c0d3f391d0db568b17
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20231001/b4ec86dc/attachment-0001.html>
More information about the ghc-commits
mailing list