[Git][ghc/ghc][wip/az/ghc-cpp] Split into separate files
Alan Zimmerman (@alanz)
gitlab at gitlab.haskell.org
Thu Oct 5 19:34:09 UTC 2023
Alan Zimmerman pushed to branch wip/az/ghc-cpp at Glasgow Haskell Compiler / GHC
Commits:
9c7e0f69 by Alan Zimmerman at 2023-10-05T20:33:42+01:00
Split into separate files
- - - - -
4 changed files:
- utils/check-cpp/Main.hs
- + utils/check-cpp/Parse.hs
- + utils/check-cpp/ParseSimulate.hs
- + utils/check-cpp/PreProcess.hs
Changes:
=====================================
utils/check-cpp/Main.hs
=====================================
@@ -2,16 +2,13 @@
{-# LANGUAGE BangPatterns #-}
import Control.Monad.IO.Class
-import Data.Char
import Data.Data hiding (Fixity)
import Data.List
-import Data.Map (Map)
import qualified Data.Map as Map
import Debug.Trace
import GHC
import qualified GHC.Data.EnumSet as EnumSet
import GHC.Data.FastString
-import qualified GHC.Data.Strict as Strict
import GHC.Data.StringBuffer
import GHC.Driver.Config.Parser
import GHC.Driver.Errors.Types
@@ -20,7 +17,7 @@ import qualified GHC.Driver.Session as GHC
import GHC.Hs.Dump
import qualified GHC.LanguageExtensions as LangExt
import GHC.Parser.Errors.Ppr ()
-import GHC.Parser.Lexer (P (..), PState (..), ParseResult (..), Token (..))
+import GHC.Parser.Lexer (P (..), ParseResult (..), Token (..))
import qualified GHC.Parser.Lexer as GHC
import qualified GHC.Parser.Lexer as Lexer
import GHC.Types.Error
@@ -28,15 +25,10 @@ import GHC.Types.SrcLoc
import GHC.Utils.Error
import GHC.Utils.Outputable
import qualified Text.Parsec as Parsec
-import Text.Parsec.Char as PS
-import Text.Parsec.Combinator as PS
-import Text.Parsec.Prim as PS
-import Text.Parsec.String (Parser)
--- import Text.Parsec.Char
--- import FunctionsAndTypesForParsing (regularParse, parseWithEof, parseWithLeftOver)
--- import Text.Parsec.String.Combinator (many1)
--- import Text.Parsec.Combinator (many1)
+import Parse
+import ParseSimulate
+import PreProcess
-- ---------------------------------------------------------------------
@@ -47,359 +39,6 @@ showAst ast =
-- =====================================================================
-type PP = P PpState
-
-initPpState :: PpState
-initPpState =
- PpState
- { pp_defines = Map.empty
- , pp_includes = Map.empty
- , pp_include_stack = []
- , pp_continuation = []
- , pp_context = []
- , pp_accepting = True
- }
-
-data PpState = PpState
- { pp_defines :: !(Map String [String])
- , pp_includes :: !(Map String StringBuffer)
- , pp_include_stack :: ![Lexer.AlexInput]
- , pp_continuation :: ![Located Token]
- , pp_context :: ![Token] -- What preprocessor directive we are currently processing
- , pp_accepting :: !Bool
- }
- deriving (Show)
--- deriving instance Show Lexer.AlexInput
-
--- =====================================================================
-
-ppLexer, ppLexerDbg :: Bool -> (Located Token -> PP a) -> PP a
--- Use this instead of 'lexer' in GHC.Parser to dump the tokens for debugging.
-ppLexerDbg queueComments cont = ppLexer queueComments contDbg
- where
- contDbg tok = trace ("pptoken: " ++ show (unLoc tok)) (cont tok)
-ppLexer queueComments cont =
- Lexer.lexer
- queueComments
- ( \tk ->
- let
- contInner t = (trace ("ppLexer: tk=" ++ show (unLoc tk, unLoc t)) cont) t
- -- contPush = pushContext (unLoc tk) >> contIgnoreTok tk
- contIgnoreTok (L l tok) = do
- case l of
- RealSrcSpan r (Strict.Just b) -> Lexer.queueIgnoredToken (L (PsSpan r b) tok)
- _ -> return ()
- ppLexer queueComments cont
- in
- -- case tk of
- case (trace ("M.ppLexer:tk=" ++ show (unLoc tk)) tk) of
- L _ ITeof -> do
- mInp <- popIncludeLoc
- case mInp of
- Nothing -> contInner tk
- Just inp -> do
- Lexer.setInput inp
- ppLexer queueComments cont
- L _ (ITcpp continuation s) -> do
- if continuation
- then pushContinuation tk
- else processCppToks s
- contIgnoreTok tk
- _ -> do
- state <- getCppState
- -- case (trace ("CPP state:" ++ show state) state) of
- case state of
- CppIgnoring -> contIgnoreTok tk
- _ -> contInner tk
- )
-
-preprocessElse :: PP ()
-preprocessElse = do
- accepting <- getAccepting
- setAccepting (not accepting)
-
-preprocessEnd :: PP ()
-preprocessEnd = do
- -- TODO: nested context
- setAccepting True
-
-processCppToks :: FastString -> PP ()
-processCppToks fs = do
- let
- get (L _ (ITcpp _ s)) = s
- get _ = error "should not"
- -- Combine any prior continuation tokens
- cs <- popContinuation
- processCpp (reverse $ fs : map get cs)
- return ()
-
-processCpp :: [FastString] -> PP ()
-processCpp fs = do
- -- traceM $ "processCpp: fs=" ++ show fs
- -- let s = cppInitial fs
- let s = cppInitial fs
- case regularParse cppDirective s of
- Left err -> error $ show err
- Right (CppInclude filename) -> do
- ppInclude filename
- Right (CppDefine name def) -> do
- ppDefine name def
- Right (CppIfdef name) -> do
- defined <- ppIsDefined name
- setAccepting defined
- Right (CppIfndef name) -> do
- defined <- ppIsDefined name
- setAccepting (not defined)
- Right CppElse -> do
- accepting <- getAccepting
- setAccepting (not accepting)
- return ()
- Right CppEndif -> do
- -- TODO: nested states
- setAccepting True
- return ()
-
- -- return (trace ("processCpp:s=" ++ show s) ())
- return ()
-
--- ---------------------------------------------------------------------
--- Preprocessor state functions
-
-data CppState
- = CppIgnoring
- | CppNormal
- deriving (Show)
-
-getCppState :: PP CppState
-getCppState = do
- accepting <- getAccepting
- if accepting
- then return CppNormal
- else return CppIgnoring
-
--- pp_context stack start -----------------
-
-pushContext :: Token -> PP ()
-pushContext new =
- P $ \s -> POk s{pp = (pp s){pp_context = new : pp_context (pp s)}} ()
-
-popContext :: PP ()
-popContext =
- P $ \s ->
- let
- new_context = case pp_context (pp s) of
- [] -> []
- (_ : t) -> t
- in
- POk s{pp = (pp s){pp_context = new_context}} ()
-
-peekContext :: PP Token
-peekContext =
- P $ \s ->
- let
- r = case pp_context (pp s) of
- [] -> ITeof -- Anthing really, for now, except a CPP one
- (h : _) -> h
- in
- POk s r
-
-setAccepting :: Bool -> PP ()
-setAccepting on =
- P $ \s -> POk s{pp = (pp s){pp_accepting = on}} ()
-
-getAccepting :: PP Bool
-getAccepting = P $ \s -> POk s (pp_accepting (pp s))
-
--- -------------------------------------
-
-pushContinuation :: Located Token -> PP ()
-pushContinuation new =
- P $ \s -> POk s{pp = (pp s){pp_continuation = new : pp_continuation (pp s)}} ()
-
-popContinuation :: PP [Located Token]
-popContinuation =
- P $ \s -> POk s{pp = (pp s){pp_continuation = []}} (pp_continuation (pp s))
-
--- pp_context stack end -------------------
-
--- pp_include start -----------------------
-
-getInclude :: String -> PP (Maybe StringBuffer)
-getInclude filename = P $ \s -> POk s (Map.lookup filename (pp_includes (pp s)))
-
-pushIncludeLoc :: Lexer.AlexInput -> PP ()
-pushIncludeLoc pos
- = P $ \s -> POk s {pp = (pp s){ pp_include_stack = pos: pp_include_stack (pp s)}} ()
-
-popIncludeLoc :: PP (Maybe Lexer.AlexInput)
-popIncludeLoc =
- P $ \s ->
- let
- (new_st,r) = case pp_include_stack (pp s) of
- [] ->([], Nothing)
- (h:t) -> (t, Just h)
- in
- POk s{pp = (pp s){pp_include_stack = new_st }} r
-
--- pp_include end -------------------------
-
--- definitions start --------------------
-
-ppInclude :: String -> PP ()
-ppInclude filename = do
- mSrc <- getInclude filename
- case mSrc of
- Nothing -> return ()
- Just src -> do
- origInput <- Lexer.getInput
- pushIncludeLoc origInput
- let loc = PsLoc (mkRealSrcLoc (mkFastString filename) 1 1) (BufPos 0)
- Lexer.setInput (Lexer.AI loc src)
- return $ trace ("ppInclude:mSrc=[" ++ show mSrc ++ "]") ()
- -- return $ trace ("ppInclude:filename=[" ++ filename ++ "]") ()
-
-ppDefine :: String -> [String] -> PP ()
-ppDefine name val = P $ \s ->
- -- POk s{pp = (pp s){pp_defines = Set.insert (cleanTokenString def) (pp_defines (pp s))}} ()
- POk s{pp = (pp s){pp_defines = Map.insert (trace ("ppDefine:def=[" ++ name ++ "]") name) val (pp_defines (pp s))}} ()
-
-ppIsDefined :: String -> PP Bool
-ppIsDefined def = P $ \s ->
- -- POk s (Map.member def (pp_defines (pp s)))
- POk s (Map.member (trace ("ppIsDefined:def=[" ++ def ++ "]") def) (pp_defines (pp s)))
-
--- | Take a @FastString@ of the form "#define FOO\n" and strip off all but "FOO"
-cleanTokenString :: FastString -> String
-cleanTokenString fs = r
- where
- ss = dropWhile (\c -> not $ isSpace c) (unpackFS fs)
- r = init ss
-
-parseDefine :: FastString -> Maybe (String, [String])
-parseDefine fs = r
- where
- -- r = Just (cleanTokenString s, "")
- r = case regularParse cppDefinition (unpackFS fs) of
- Left _ -> Nothing
- Right v -> Just v
-
--- =====================================================================
--- Parsec parsing
-type CppParser = Parsec String ()
-
-regularParse :: Parser a -> String -> Either Parsec.ParseError a
-regularParse p = PS.parse p ""
-
--- TODO: delete this
-cppDefinition :: CppParser (String, [String])
-cppDefinition = do
- _ <- PS.char '#'
- _ <- whiteSpace
- _ <- lexeme (PS.string "define")
- name <- cppToken
- definition <- cppTokens
- return (name, definition)
-
-data CppDirective
- = CppInclude String
- | CppDefine String [String]
- | CppIfdef String
- | CppIfndef String
- | CppElse
- | CppEndif
- deriving (Show, Eq)
-
-cppDirective :: CppParser CppDirective
-cppDirective = do
- _ <- PS.char '#'
- _ <- whiteSpace
- choice
- [ cppKw "define" >> cmdDefinition
- , try $ cppKw "include" >> cmdInclude
- , try $ cppKw "ifdef" >> cmdIfdef
- , cppKw "ifndef" >> cmdIfndef
- , try $ cppKw "else" >> return CppElse
- , cppKw "endif" >> return CppEndif
- -- , cppKw "if" CppIfKw
- -- , cppKw "elif" CppElifKw
- -- , cppKw "undef" CppUndefKw
- -- , cppKw "error" CppErrorKw
- ]
-
-cmdInclude :: CppParser CppDirective
-cmdInclude = do
- _ <- string "\""
- filename <- many1 (satisfy (\c -> not (isSpace c || c == '"')))
- _ <- string "\""
- return $ CppInclude filename
-
-cmdDefinition :: CppParser CppDirective
-cmdDefinition = do
- name <- cppToken
- definition <- cppTokens
- return $ CppDefine name definition
-
-cmdIfdef :: CppParser CppDirective
-cmdIfdef = do
- name <- cppToken
- return $ CppIfdef name
-
-cmdIfndef :: CppParser CppDirective
-cmdIfndef = do
- name <- cppToken
- return $ CppIfndef name
-
-cppKw :: String -> CppParser ()
-cppKw kw = do
- _ <- lexeme (PS.string kw)
- return ()
-
-cppComment :: CppParser ()
-cppComment = do
- _ <- PS.string "/*"
- _ <- PS.manyTill PS.anyChar (PS.try (PS.string "*/"))
- return ()
-
-whiteSpace :: CppParser ()
-whiteSpace = do
- _ <- PS.many (PS.choice [cppComment, PS.space >> return ()])
- return ()
-
-lexeme :: CppParser a -> CppParser a
-lexeme p = p <* whiteSpace
-
-cppToken :: CppParser String
-cppToken = lexeme (PS.many1 (PS.satisfy (\c -> not (isSpace c))))
-
-cppTokens :: CppParser [String]
-cppTokens = PS.many cppToken
-
-{- | Do cpp initial processing, as per https://gcc.gnu.org/onlinedocs/cpp/Initial-processing.html
-See Note [GhcCPP Initial Processing]
--}
-cppInitial :: [FastString] -> String
-cppInitial fs = r
- where
- -- go fs' = reverse $ drop 2 $ reverse $ unpackFS fs'
- r = concatMap unpackFS fs
-
-{-
-Note [GhcCPP Initial Processing]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-This processing is based on the description at
-https://gcc.gnu.org/onlinedocs/cpp/Initial-processing.html
-
-It is only done for lines starting with a preprocessor
-directive.
-
-1. Broken into lines. We rely on the GHC Lexer to do this
-2. Trigraphs are not processed
-3. Continued lines are merged into a single line
- and is handled in the Lexer.
-4. All comments are replaced with a single space
-
--}
-
-- =====================================================================
-- Emulate the parser
@@ -422,8 +61,8 @@ strGetToks :: Includes -> Lexer.ParserOpts -> FilePath -> String -> [Located Tok
-- strGetToks includes popts filename str = reverse $ lexAll pstate
strGetToks includes popts filename str = reverse $ lexAll (trace ("pstate=" ++ show initState) pstate)
where
- includeMap = Map.fromList $ map (\(k,v) -> (k, stringToStringBuffer (intercalate "\n" v))) includes
- initState = initPpState { pp_includes = includeMap }
+ includeMap = Map.fromList $ map (\(k, v) -> (k, stringToStringBuffer (intercalate "\n" v))) includes
+ initState = initPpState{pp_includes = includeMap}
pstate = Lexer.initParserState initState popts buf loc
loc = mkRealSrcLoc (mkFastString filename) 1 1
buf = stringToStringBuffer str
@@ -494,78 +133,6 @@ ghcWrapper libdir a =
-- ---------------------------------------------------------------------
-parseModuleNoHaddock :: PP [Located Token]
-parseModuleNoHaddock = happySomeParser
- where
- -- happySomeParser = happyThen (happyParse 0#) (\x -> happyReturn (let {(HappyWrap35 x') = happyOut35 x} in x'))
- happySomeParser = (>>=) (happyParse 0) (\x -> return x)
-
-happyParse :: Int -> PP [Located Token]
-happyParse start_state = happyNewToken start_state [] []
-
-happyNewToken :: Int -> [Int] -> [Located Token] -> PP [Located Token]
-happyNewToken action sts stk =
- -- lexer
- ppLexerDbg
- True
- ( \tk ->
- let cont i =
- trace ("happyNewToken:tk=" ++ show tk)
- $ happyDoAction i tk action sts stk
- in case tk of
- L _ ITeof -> happyDoAction 169 tk action sts stk
- _ -> cont 5
- -- _ -> happyError' (tk, [])
- )
-
-happyDoAction :: Int -> Located Token -> Int -> [Int] -> [Located Token] -> PP [Located Token]
--- happyDoAction num tk action sts stk = P $ \s -> POk s tk
-happyDoAction num tk action sts stk =
- case num of
- 1 -> happyShift 2 num tk action sts stk
- 2 -> happyShift 5 num tk action sts stk
- 3 -> happyShift 5 num tk action sts stk
- 4 -> happyShift 5 num tk action sts stk
- 5 -> happyShift 5 num tk action sts stk
- 50 -> happyAccept num tk action sts stk
- 169 -> happyAccept num tk action sts stk
- i -> happyFail ["failing:" ++ show i] i tk action sts stk
-
--- happyAccept j tk st sts (HappyStk ans _) =
--- (happyTcHack j (happyTcHack st)) (happyReturn1 ans)
-
-happyAccept :: Int -> Located Token -> Int -> [Int] -> [Located Token] -> PP [Located Token]
-happyAccept _j tk _st _sts stk =
- trace ("happyAccept:" ++ show tk)
- $ return stk
-
--- happyReturn1 :: a -> P a
--- happyReturn1 = return
-
-happyShift :: Int -> Int -> Located Token -> Int -> [Int] -> [Located Token] -> PP [Located Token]
-happyShift new_state _i tk st sts stk = do
- happyNewToken new_state (st : sts) (tk : stk)
-
--- happyShift new_state i tk st sts stk =
--- happyNewToken new_state (HappyCons (st) (sts)) ((happyInTok (tk))`HappyStk`stk)
-
-happyFail :: [String] -> Int -> Located Token -> p2 -> p3 -> p4 -> PP a
-happyFail explist i tk _old_st _ _stk =
- trace ("failing" ++ show explist)
- $ happyError_ explist i tk
-
-happyError_ :: [String] -> p1 -> Located Token -> PP a
-happyError_ explist _ tk = happyError' (tk, explist)
-
-notHappyAtAll :: a
-notHappyAtAll = Prelude.error "Internal Happy error\n"
-
-happyError' :: (Located Token, [String]) -> PP a
-happyError' tk = (\(_tokens, _explist) -> happyError) tk
-
-happyError :: PP a
-happyError = Lexer.srcParseFail
-
-- =====================================================================
-- ---------------------------------------------------------------------
@@ -710,7 +277,8 @@ t9 = regularParse cppDirective "#define VERSION_ghc_exactprint \"1.7.0.1\""
t10 :: IO ()
t10 = do
- doTestWithIncludes testIncludes
+ doTestWithIncludes
+ testIncludes
[ "#include \"bar.h\""
, ""
, "#ifdef FOO"
@@ -725,7 +293,8 @@ testIncludes =
[
( "bar.h"
, ["#include \"sub.h\""]
- ),
+ )
+ ,
( "sub.h"
, ["#define FOO"]
)
=====================================
utils/check-cpp/Parse.hs
=====================================
@@ -0,0 +1,194 @@
+module Parse where
+
+import Data.Char
+import GHC.Parser.Errors.Ppr ()
+import qualified Text.Parsec as Parsec
+import Text.Parsec.Char as PS
+import Text.Parsec.Combinator as PS
+import Text.Parsec.Prim as PS
+import Text.Parsec.String (Parser)
+
+-- import Debug.Trace
+
+-- ---------------------------------------------------------------------
+
+type CppParser = Parsec String ()
+
+regularParse :: Parser a -> String -> Either Parsec.ParseError a
+regularParse p = PS.parse p ""
+
+-- TODO: delete this
+cppDefinition :: CppParser (String, [String])
+cppDefinition = do
+ _ <- PS.char '#'
+ _ <- whiteSpace
+ _ <- lexeme (PS.string "define")
+ name <- cppToken
+ definition <- cppTokens
+ return (name, definition)
+
+data CppDirective
+ = CppInclude String
+ | CppDefine String [String]
+ | CppIfdef String
+ | CppIfndef String
+ | CppElse
+ | CppEndif
+ deriving (Show, Eq)
+
+cppDirective :: CppParser CppDirective
+cppDirective = do
+ _ <- PS.char '#'
+ _ <- whiteSpace
+ choice
+ [ cppKw "define" >> cmdDefinition
+ , try $ cppKw "include" >> cmdInclude
+ , try $ cppKw "ifdef" >> cmdIfdef
+ , cppKw "ifndef" >> cmdIfndef
+ , try $ cppKw "else" >> return CppElse
+ , cppKw "endif" >> return CppEndif
+ -- , cppKw "if" CppIfKw
+ -- , cppKw "elif" CppElifKw
+ -- , cppKw "undef" CppUndefKw
+ -- , cppKw "error" CppErrorKw
+ ]
+
+cmdInclude :: CppParser CppDirective
+cmdInclude = do
+ _ <- string "\""
+ filename <- many1 (satisfy (\c -> not (isSpace c || c == '"')))
+ _ <- string "\""
+ return $ CppInclude filename
+
+cmdDefinition :: CppParser CppDirective
+cmdDefinition = do
+ name <- cppToken
+ definition <- cppTokens
+ return $ CppDefine name definition
+
+cmdIfdef :: CppParser CppDirective
+cmdIfdef = do
+ name <- cppToken
+ return $ CppIfdef name
+
+cmdIfndef :: CppParser CppDirective
+cmdIfndef = do
+ name <- cppToken
+ return $ CppIfndef name
+
+cppKw :: String -> CppParser ()
+cppKw kw = do
+ _ <- lexeme (PS.string kw)
+ return ()
+
+cppComment :: CppParser ()
+cppComment = do
+ _ <- PS.string "/*"
+ _ <- PS.manyTill PS.anyChar (PS.try (PS.string "*/"))
+ return ()
+
+whiteSpace :: CppParser ()
+whiteSpace = do
+ _ <- PS.many (PS.choice [cppComment, PS.space >> return ()])
+ return ()
+
+lexeme :: CppParser a -> CppParser a
+lexeme p = p <* whiteSpace
+
+cppToken :: CppParser String
+cppToken = lexeme (PS.many1 (PS.satisfy (\c -> not (isSpace c))))
+
+cppTokens :: CppParser [String]
+cppTokens = PS.many cppToken
+
+-- ---------------------------------------------------------------------
+-- import GHC.S.Types
+
+-- parseS :: String -> Either ParseError S
+-- parseS = parse expr ""
+
+-- lexer :: P.TokenParser ()
+-- lexer =
+-- P.makeTokenParser
+-- ( emptyDef
+-- { P.reservedNames =
+-- [ "define"
+-- , "include"
+-- , "undef"
+-- , "error"
+-- , "ifdef"
+-- , "ifndef"
+-- , "if"
+-- , "elif"
+-- , "else"
+-- , "endif"
+-- ]
+-- }
+-- )
+
+-- slam :: Parser S
+-- slam = P.parens lexer $ do
+-- P.reserved lexer "lam"
+-- ident <- P.identifier lexer
+-- body <- expr
+-- return (SLam (Atom ident) body)
+
+-- slet :: Parser S
+-- slet = P.parens lexer $ do
+-- P.reserved lexer "let"
+-- (ident, e1) <- P.parens lexer $ do
+-- idnt <- P.identifier lexer
+-- expr1 <- expr
+-- return (idnt, expr1)
+-- e2 <- expr
+-- return (SLet (Atom ident) e1 e2)
+
+-- sletrec :: Parser S
+-- sletrec = P.parens lexer $ do
+-- P.reserved lexer "letrec"
+-- ls <- P.parens lexer $ many1 $ P.parens lexer $ do
+-- idnt <- P.identifier lexer
+-- expr1 <- expr
+-- return (Atom idnt, expr1)
+-- e2 <- expr
+-- return (SLetRec ls e2)
+
+-- scase :: Parser S
+-- scase = P.parens lexer $ do
+-- P.reserved lexer "case"
+-- e <- expr
+-- alt <- optionMaybe (P.identifier lexer)
+-- alts <- P.parens lexer $ many1 $ P.parens lexer $ do
+-- pat <- expr
+-- ex <- expr
+-- return (pat, ex)
+-- case alt of
+-- Just alt -> return (SCase (Atom alt) e alts)
+-- Nothing -> return (SCase (Atom "_") e alts)
+
+-- swild :: Parser S
+-- swild = do
+-- P.symbol lexer "_"
+-- return SWild
+
+-- sbinop :: Parser S
+-- sbinop = P.parens lexer $ do
+-- e1 <- expr
+-- op <- P.operator lexer
+-- e2 <- expr
+-- return (SBinOp (Atom op) e1 e2)
+
+-- expr :: Parser S
+-- expr =
+-- choice
+-- [ try slam
+-- , try sbinop
+-- , try slet
+-- , try sletrec
+-- , try scase
+-- , STuple <$> P.parens lexer (many expr)
+-- , swild
+-- , SAtom <$> (Atom <$> (P.identifier lexer))
+-- , SString <$> P.stringLiteral lexer
+-- , SInt <$> P.integer lexer
+-- ]
=====================================
utils/check-cpp/ParseSimulate.hs
=====================================
@@ -0,0 +1,83 @@
+module ParseSimulate where
+
+import Debug.Trace
+import GHC
+import GHC.Parser.Errors.Ppr ()
+import GHC.Parser.Lexer (Token (..))
+import qualified GHC.Parser.Lexer as Lexer
+
+import PreProcess
+
+-- ---------------------------------------------------------------------
+
+parseModuleNoHaddock :: PP [Located Token]
+parseModuleNoHaddock = happySomeParser
+ where
+ -- happySomeParser = happyThen (happyParse 0#) (\x -> happyReturn (let {(HappyWrap35 x') = happyOut35 x} in x'))
+ happySomeParser = (>>=) (happyParse 0) (\x -> return x)
+
+happyParse :: Int -> PP [Located Token]
+happyParse start_state = happyNewToken start_state [] []
+
+happyNewToken :: Int -> [Int] -> [Located Token] -> PP [Located Token]
+happyNewToken action sts stk =
+ -- lexer
+ ppLexerDbg
+ True
+ ( \tk ->
+ let cont i =
+ trace ("happyNewToken:tk=" ++ show tk)
+ $ happyDoAction i tk action sts stk
+ in case tk of
+ L _ ITeof -> happyDoAction 169 tk action sts stk
+ _ -> cont 5
+ -- _ -> happyError' (tk, [])
+ )
+
+happyDoAction :: Int -> Located Token -> Int -> [Int] -> [Located Token] -> PP [Located Token]
+-- happyDoAction num tk action sts stk = P $ \s -> POk s tk
+happyDoAction num tk action sts stk =
+ case num of
+ 1 -> happyShift 2 num tk action sts stk
+ 2 -> happyShift 5 num tk action sts stk
+ 3 -> happyShift 5 num tk action sts stk
+ 4 -> happyShift 5 num tk action sts stk
+ 5 -> happyShift 5 num tk action sts stk
+ 50 -> happyAccept num tk action sts stk
+ 169 -> happyAccept num tk action sts stk
+ i -> happyFail ["failing:" ++ show i] i tk action sts stk
+
+-- happyAccept j tk st sts (HappyStk ans _) =
+-- (happyTcHack j (happyTcHack st)) (happyReturn1 ans)
+
+happyAccept :: Int -> Located Token -> Int -> [Int] -> [Located Token] -> PP [Located Token]
+happyAccept _j tk _st _sts stk =
+ trace ("happyAccept:" ++ show tk)
+ $ return stk
+
+-- happyReturn1 :: a -> P a
+-- happyReturn1 = return
+
+happyShift :: Int -> Int -> Located Token -> Int -> [Int] -> [Located Token] -> PP [Located Token]
+happyShift new_state _i tk st sts stk = do
+ happyNewToken new_state (st : sts) (tk : stk)
+
+-- happyShift new_state i tk st sts stk =
+-- happyNewToken new_state (HappyCons (st) (sts)) ((happyInTok (tk))`HappyStk`stk)
+
+happyFail :: [String] -> Int -> Located Token -> p2 -> p3 -> p4 -> PP a
+happyFail explist i tk _old_st _ _stk =
+ trace ("failing" ++ show explist)
+ $ happyError_ explist i tk
+
+happyError_ :: [String] -> p1 -> Located Token -> PP a
+happyError_ explist _ tk = happyError' (tk, explist)
+
+notHappyAtAll :: a
+notHappyAtAll = Prelude.error "Internal Happy error\n"
+
+happyError' :: (Located Token, [String]) -> PP a
+happyError' tk = (\(_tokens, _explist) -> happyError) tk
+
+happyError :: PP a
+happyError = Lexer.srcParseFail
=====================================
utils/check-cpp/PreProcess.hs
=====================================
@@ -0,0 +1,288 @@
+module PreProcess where
+
+import Data.Char
+import Data.Map (Map)
+import qualified Data.Map as Map
+import GHC
+import GHC.Data.FastString
+import qualified GHC.Data.Strict as Strict
+import GHC.Data.StringBuffer
+import GHC.Parser.Errors.Ppr ()
+import GHC.Parser.Lexer (P (..), PState (..), ParseResult (..), Token (..))
+import qualified GHC.Parser.Lexer as Lexer
+import GHC.Types.SrcLoc
+
+import Debug.Trace
+
+import Parse
+
+-- ---------------------------------------------------------------------
+
+type PP = P PpState
+
+initPpState :: PpState
+initPpState =
+ PpState
+ { pp_defines = Map.empty
+ , pp_includes = Map.empty
+ , pp_include_stack = []
+ , pp_continuation = []
+ , pp_context = []
+ , pp_accepting = True
+ }
+
+data PpState = PpState
+ { pp_defines :: !(Map String [String])
+ , pp_includes :: !(Map String StringBuffer)
+ , pp_include_stack :: ![Lexer.AlexInput]
+ , pp_continuation :: ![Located Token]
+ , pp_context :: ![Token] -- What preprocessor directive we are currently processing
+ , pp_accepting :: !Bool
+ }
+ deriving (Show)
+
+-- deriving instance Show Lexer.AlexInput
+-- ---------------------------------------------------------------------
+
+data CppState
+ = CppIgnoring
+ | CppNormal
+ deriving (Show)
+
+-- ---------------------------------------------------------------------
+
+ppLexer, ppLexerDbg :: Bool -> (Located Token -> PP a) -> PP a
+-- Use this instead of 'lexer' in GHC.Parser to dump the tokens for debugging.
+ppLexerDbg queueComments cont = ppLexer queueComments contDbg
+ where
+ contDbg tok = trace ("pptoken: " ++ show (unLoc tok)) (cont tok)
+ppLexer queueComments cont =
+ Lexer.lexer
+ queueComments
+ ( \tk ->
+ let
+ contInner t = (trace ("ppLexer: tk=" ++ show (unLoc tk, unLoc t)) cont) t
+ -- contPush = pushContext (unLoc tk) >> contIgnoreTok tk
+ contIgnoreTok (L l tok) = do
+ case l of
+ RealSrcSpan r (Strict.Just b) -> Lexer.queueIgnoredToken (L (PsSpan r b) tok)
+ _ -> return ()
+ ppLexer queueComments cont
+ in
+ -- case tk of
+ case (trace ("M.ppLexer:tk=" ++ show (unLoc tk)) tk) of
+ L _ ITeof -> do
+ mInp <- popIncludeLoc
+ case mInp of
+ Nothing -> contInner tk
+ Just inp -> do
+ Lexer.setInput inp
+ ppLexer queueComments cont
+ L _ (ITcpp continuation s) -> do
+ if continuation
+ then pushContinuation tk
+ else processCppToks s
+ contIgnoreTok tk
+ _ -> do
+ state <- getCppState
+ -- case (trace ("CPP state:" ++ show state) state) of
+ case state of
+ CppIgnoring -> contIgnoreTok tk
+ _ -> contInner tk
+ )
+
+-- ---------------------------------------------------------------------
+
+preprocessElse :: PP ()
+preprocessElse = do
+ accepting <- getAccepting
+ setAccepting (not accepting)
+
+preprocessEnd :: PP ()
+preprocessEnd = do
+ -- TODO: nested context
+ setAccepting True
+
+processCppToks :: FastString -> PP ()
+processCppToks fs = do
+ let
+ get (L _ (ITcpp _ s)) = s
+ get _ = error "should not"
+ -- Combine any prior continuation tokens
+ cs <- popContinuation
+ processCpp (reverse $ fs : map get cs)
+ return ()
+
+processCpp :: [FastString] -> PP ()
+processCpp fs = do
+ -- traceM $ "processCpp: fs=" ++ show fs
+ -- let s = cppInitial fs
+ let s = cppInitial fs
+ case regularParse cppDirective s of
+ Left err -> error $ show err
+ Right (CppInclude filename) -> do
+ ppInclude filename
+ Right (CppDefine name def) -> do
+ ppDefine name def
+ Right (CppIfdef name) -> do
+ defined <- ppIsDefined name
+ setAccepting defined
+ Right (CppIfndef name) -> do
+ defined <- ppIsDefined name
+ setAccepting (not defined)
+ Right CppElse -> do
+ accepting <- getAccepting
+ setAccepting (not accepting)
+ return ()
+ Right CppEndif -> do
+ -- TODO: nested states
+ setAccepting True
+ return ()
+
+ -- return (trace ("processCpp:s=" ++ show s) ())
+ return ()
+
+-- ---------------------------------------------------------------------
+-- Preprocessor state functions
+
+getCppState :: PP CppState
+getCppState = do
+ accepting <- getAccepting
+ if accepting
+ then return CppNormal
+ else return CppIgnoring
+
+-- pp_context stack start -----------------
+
+pushContext :: Token -> PP ()
+pushContext new =
+ P $ \s -> POk s{pp = (pp s){pp_context = new : pp_context (pp s)}} ()
+
+popContext :: PP ()
+popContext =
+ P $ \s ->
+ let
+ new_context = case pp_context (pp s) of
+ [] -> []
+ (_ : t) -> t
+ in
+ POk s{pp = (pp s){pp_context = new_context}} ()
+
+peekContext :: PP Token
+peekContext =
+ P $ \s ->
+ let
+ r = case pp_context (pp s) of
+ [] -> ITeof -- Anthing really, for now, except a CPP one
+ (h : _) -> h
+ in
+ POk s r
+
+setAccepting :: Bool -> PP ()
+setAccepting on =
+ P $ \s -> POk s{pp = (pp s){pp_accepting = on}} ()
+
+getAccepting :: PP Bool
+getAccepting = P $ \s -> POk s (pp_accepting (pp s))
+
+-- -------------------------------------
+
+pushContinuation :: Located Token -> PP ()
+pushContinuation new =
+ P $ \s -> POk s{pp = (pp s){pp_continuation = new : pp_continuation (pp s)}} ()
+
+popContinuation :: PP [Located Token]
+popContinuation =
+ P $ \s -> POk s{pp = (pp s){pp_continuation = []}} (pp_continuation (pp s))
+
+-- pp_context stack end -------------------
+
+-- pp_include start -----------------------
+
+getInclude :: String -> PP (Maybe StringBuffer)
+getInclude filename = P $ \s -> POk s (Map.lookup filename (pp_includes (pp s)))
+
+pushIncludeLoc :: Lexer.AlexInput -> PP ()
+pushIncludeLoc pos =
+ P $ \s -> POk s{pp = (pp s){pp_include_stack = pos : pp_include_stack (pp s)}} ()
+
+popIncludeLoc :: PP (Maybe Lexer.AlexInput)
+popIncludeLoc =
+ P $ \s ->
+ let
+ (new_st, r) = case pp_include_stack (pp s) of
+ [] -> ([], Nothing)
+ (h : t) -> (t, Just h)
+ in
+ POk s{pp = (pp s){pp_include_stack = new_st}} r
+
+-- pp_include end -------------------------
+
+-- definitions start --------------------
+
+ppInclude :: String -> PP ()
+ppInclude filename = do
+ mSrc <- getInclude filename
+ case mSrc of
+ Nothing -> return ()
+ Just src -> do
+ origInput <- Lexer.getInput
+ pushIncludeLoc origInput
+ let loc = PsLoc (mkRealSrcLoc (mkFastString filename) 1 1) (BufPos 0)
+ Lexer.setInput (Lexer.AI loc src)
+ return $ trace ("ppInclude:mSrc=[" ++ show mSrc ++ "]") ()
+
+-- return $ trace ("ppInclude:filename=[" ++ filename ++ "]") ()
+
+ppDefine :: String -> [String] -> PP ()
+ppDefine name val = P $ \s ->
+ -- POk s{pp = (pp s){pp_defines = Set.insert (cleanTokenString def) (pp_defines (pp s))}} ()
+ POk s{pp = (pp s){pp_defines = Map.insert (trace ("ppDefine:def=[" ++ name ++ "]") name) val (pp_defines (pp s))}} ()
+
+ppIsDefined :: String -> PP Bool
+ppIsDefined def = P $ \s ->
+ -- POk s (Map.member def (pp_defines (pp s)))
+ POk s (Map.member (trace ("ppIsDefined:def=[" ++ def ++ "]") def) (pp_defines (pp s)))
+
+-- | Take a @FastString@ of the form "#define FOO\n" and strip off all but "FOO"
+cleanTokenString :: FastString -> String
+cleanTokenString fs = r
+ where
+ ss = dropWhile (\c -> not $ isSpace c) (unpackFS fs)
+ r = init ss
+
+parseDefine :: FastString -> Maybe (String, [String])
+parseDefine fs = r
+ where
+ -- r = Just (cleanTokenString s, "")
+ r = case regularParse cppDefinition (unpackFS fs) of
+ Left _ -> Nothing
+ Right v -> Just v
+
+-- =====================================================================
+
+{- | Do cpp initial processing, as per https://gcc.gnu.org/onlinedocs/cpp/Initial-processing.html
+See Note [GhcCPP Initial Processing]
+-}
+cppInitial :: [FastString] -> String
+cppInitial fs = r
+ where
+ -- go fs' = reverse $ drop 2 $ reverse $ unpackFS fs'
+ r = concatMap unpackFS fs
+
+{-
+Note [GhcCPP Initial Processing]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+This processing is based on the description at
+https://gcc.gnu.org/onlinedocs/cpp/Initial-processing.html
+
+It is only done for lines starting with a preprocessor
+directive.
+
+1. Broken into lines. We rely on the GHC Lexer to do this
+2. Trigraphs are not processed
+3. Continued lines are merged into a single line
+ and is handled in the Lexer.
+4. All comments are replaced with a single space
+
+-}
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/9c7e0f6989772855c535829f761e60ae396843dd
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/9c7e0f6989772855c535829f761e60ae396843dd
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20231005/69dc10a2/attachment-0001.html>
More information about the ghc-commits
mailing list