[Git][ghc/ghc][wip/az/ghc-cpp] Split into separate files

Alan Zimmerman (@alanz) gitlab at gitlab.haskell.org
Thu Oct 5 19:34:09 UTC 2023



Alan Zimmerman pushed to branch wip/az/ghc-cpp at Glasgow Haskell Compiler / GHC


Commits:
9c7e0f69 by Alan Zimmerman at 2023-10-05T20:33:42+01:00
Split into separate files

- - - - -


4 changed files:

- utils/check-cpp/Main.hs
- + utils/check-cpp/Parse.hs
- + utils/check-cpp/ParseSimulate.hs
- + utils/check-cpp/PreProcess.hs


Changes:

=====================================
utils/check-cpp/Main.hs
=====================================
@@ -2,16 +2,13 @@
 {-# LANGUAGE BangPatterns #-}
 
 import Control.Monad.IO.Class
-import Data.Char
 import Data.Data hiding (Fixity)
 import Data.List
-import Data.Map (Map)
 import qualified Data.Map as Map
 import Debug.Trace
 import GHC
 import qualified GHC.Data.EnumSet as EnumSet
 import GHC.Data.FastString
-import qualified GHC.Data.Strict as Strict
 import GHC.Data.StringBuffer
 import GHC.Driver.Config.Parser
 import GHC.Driver.Errors.Types
@@ -20,7 +17,7 @@ import qualified GHC.Driver.Session as GHC
 import GHC.Hs.Dump
 import qualified GHC.LanguageExtensions as LangExt
 import GHC.Parser.Errors.Ppr ()
-import GHC.Parser.Lexer (P (..), PState (..), ParseResult (..), Token (..))
+import GHC.Parser.Lexer (P (..), ParseResult (..), Token (..))
 import qualified GHC.Parser.Lexer as GHC
 import qualified GHC.Parser.Lexer as Lexer
 import GHC.Types.Error
@@ -28,15 +25,10 @@ import GHC.Types.SrcLoc
 import GHC.Utils.Error
 import GHC.Utils.Outputable
 import qualified Text.Parsec as Parsec
-import Text.Parsec.Char as PS
-import Text.Parsec.Combinator as PS
-import Text.Parsec.Prim as PS
-import Text.Parsec.String (Parser)
 
--- import Text.Parsec.Char
--- import FunctionsAndTypesForParsing (regularParse, parseWithEof, parseWithLeftOver)
--- import Text.Parsec.String.Combinator (many1)
--- import Text.Parsec.Combinator (many1)
+import Parse
+import ParseSimulate
+import PreProcess
 
 -- ---------------------------------------------------------------------
 
@@ -47,359 +39,6 @@ showAst ast =
 
 -- =====================================================================
 
-type PP = P PpState
-
-initPpState :: PpState
-initPpState =
-    PpState
-        { pp_defines = Map.empty
-        , pp_includes = Map.empty
-        , pp_include_stack = []
-        , pp_continuation = []
-        , pp_context = []
-        , pp_accepting = True
-        }
-
-data PpState = PpState
-    { pp_defines :: !(Map String [String])
-    , pp_includes :: !(Map String StringBuffer)
-    , pp_include_stack :: ![Lexer.AlexInput]
-    , pp_continuation :: ![Located Token]
-    , pp_context :: ![Token] -- What preprocessor directive we are currently processing
-    , pp_accepting :: !Bool
-    }
-    deriving (Show)
--- deriving instance Show Lexer.AlexInput
-
--- =====================================================================
-
-ppLexer, ppLexerDbg :: Bool -> (Located Token -> PP a) -> PP a
--- Use this instead of 'lexer' in GHC.Parser to dump the tokens for debugging.
-ppLexerDbg queueComments cont = ppLexer queueComments contDbg
-  where
-    contDbg tok = trace ("pptoken: " ++ show (unLoc tok)) (cont tok)
-ppLexer queueComments cont =
-    Lexer.lexer
-        queueComments
-        ( \tk ->
-            let
-                contInner t = (trace ("ppLexer: tk=" ++ show (unLoc tk, unLoc t)) cont) t
-                -- contPush = pushContext (unLoc tk) >> contIgnoreTok tk
-                contIgnoreTok (L l tok) = do
-                    case l of
-                        RealSrcSpan r (Strict.Just b) -> Lexer.queueIgnoredToken (L (PsSpan r b) tok)
-                        _ -> return ()
-                    ppLexer queueComments cont
-             in
-                -- case tk of
-                case (trace ("M.ppLexer:tk=" ++ show (unLoc tk)) tk) of
-                    L _ ITeof -> do
-                      mInp <- popIncludeLoc
-                      case mInp of
-                        Nothing -> contInner tk
-                        Just inp -> do
-                          Lexer.setInput inp
-                          ppLexer queueComments cont
-                    L _ (ITcpp continuation s) -> do
-                        if continuation
-                            then pushContinuation tk
-                            else processCppToks s
-                        contIgnoreTok tk
-                    _ -> do
-                        state <- getCppState
-                        -- case (trace ("CPP state:" ++ show state) state) of
-                        case state of
-                            CppIgnoring -> contIgnoreTok tk
-                            _ -> contInner tk
-        )
-
-preprocessElse :: PP ()
-preprocessElse = do
-    accepting <- getAccepting
-    setAccepting (not accepting)
-
-preprocessEnd :: PP ()
-preprocessEnd = do
-    -- TODO: nested context
-    setAccepting True
-
-processCppToks :: FastString -> PP ()
-processCppToks fs = do
-    let
-        get (L _ (ITcpp _ s)) = s
-        get _ = error "should not"
-    -- Combine any prior continuation tokens
-    cs <- popContinuation
-    processCpp (reverse $ fs : map get cs)
-    return ()
-
-processCpp :: [FastString] -> PP ()
-processCpp fs = do
-    -- traceM $ "processCpp: fs=" ++ show fs
-    -- let s = cppInitial fs
-    let s = cppInitial fs
-    case regularParse cppDirective s of
-        Left err -> error $ show err
-        Right (CppInclude filename) -> do
-            ppInclude filename
-        Right (CppDefine name def) -> do
-            ppDefine name def
-        Right (CppIfdef name) -> do
-            defined <- ppIsDefined name
-            setAccepting defined
-        Right (CppIfndef name) -> do
-            defined <- ppIsDefined name
-            setAccepting (not defined)
-        Right CppElse -> do
-            accepting <- getAccepting
-            setAccepting (not accepting)
-            return ()
-        Right CppEndif -> do
-            -- TODO: nested states
-            setAccepting True
-            return ()
-
-    -- return (trace ("processCpp:s=" ++ show s) ())
-    return ()
-
--- ---------------------------------------------------------------------
--- Preprocessor state functions
-
-data CppState
-    = CppIgnoring
-    | CppNormal
-    deriving (Show)
-
-getCppState :: PP CppState
-getCppState = do
-    accepting <- getAccepting
-    if accepting
-        then return CppNormal
-        else return CppIgnoring
-
--- pp_context stack start -----------------
-
-pushContext :: Token -> PP ()
-pushContext new =
-    P $ \s -> POk s{pp = (pp s){pp_context = new : pp_context (pp s)}} ()
-
-popContext :: PP ()
-popContext =
-    P $ \s ->
-        let
-            new_context = case pp_context (pp s) of
-                [] -> []
-                (_ : t) -> t
-         in
-            POk s{pp = (pp s){pp_context = new_context}} ()
-
-peekContext :: PP Token
-peekContext =
-    P $ \s ->
-        let
-            r = case pp_context (pp s) of
-                [] -> ITeof -- Anthing really, for now, except a CPP one
-                (h : _) -> h
-         in
-            POk s r
-
-setAccepting :: Bool -> PP ()
-setAccepting on =
-    P $ \s -> POk s{pp = (pp s){pp_accepting = on}} ()
-
-getAccepting :: PP Bool
-getAccepting = P $ \s -> POk s (pp_accepting (pp s))
-
--- -------------------------------------
-
-pushContinuation :: Located Token -> PP ()
-pushContinuation new =
-    P $ \s -> POk s{pp = (pp s){pp_continuation = new : pp_continuation (pp s)}} ()
-
-popContinuation :: PP [Located Token]
-popContinuation =
-    P $ \s -> POk s{pp = (pp s){pp_continuation = []}} (pp_continuation (pp s))
-
--- pp_context stack end -------------------
-
--- pp_include start -----------------------
-
-getInclude :: String -> PP (Maybe StringBuffer)
-getInclude filename = P $ \s -> POk s (Map.lookup filename (pp_includes (pp s)))
-
-pushIncludeLoc :: Lexer.AlexInput -> PP ()
-pushIncludeLoc pos
-  = P $ \s -> POk s {pp = (pp s){ pp_include_stack = pos: pp_include_stack (pp s)}} ()
-
-popIncludeLoc :: PP (Maybe Lexer.AlexInput)
-popIncludeLoc =
-    P $ \s ->
-          let
-            (new_st,r) = case pp_include_stack (pp s) of
-              [] ->([], Nothing)
-              (h:t) -> (t, Just h)
-          in
-            POk s{pp = (pp s){pp_include_stack = new_st }} r
-
--- pp_include end -------------------------
-
--- definitions start --------------------
-
-ppInclude :: String -> PP ()
-ppInclude filename = do
-    mSrc <- getInclude filename
-    case mSrc of
-      Nothing -> return ()
-      Just src -> do
-        origInput <- Lexer.getInput
-        pushIncludeLoc origInput
-        let loc =  PsLoc (mkRealSrcLoc (mkFastString filename) 1 1) (BufPos 0)
-        Lexer.setInput (Lexer.AI  loc src)
-    return $ trace ("ppInclude:mSrc=[" ++ show mSrc ++ "]") ()
-    -- return $ trace ("ppInclude:filename=[" ++ filename ++ "]") ()
-
-ppDefine :: String -> [String] -> PP ()
-ppDefine name val = P $ \s ->
-    -- POk s{pp = (pp s){pp_defines = Set.insert (cleanTokenString def) (pp_defines (pp s))}} ()
-    POk s{pp = (pp s){pp_defines = Map.insert (trace ("ppDefine:def=[" ++ name ++ "]") name) val (pp_defines (pp s))}} ()
-
-ppIsDefined :: String -> PP Bool
-ppIsDefined def = P $ \s ->
-    -- POk s (Map.member def (pp_defines (pp s)))
-    POk s (Map.member (trace ("ppIsDefined:def=[" ++ def ++ "]") def) (pp_defines (pp s)))
-
--- | Take a @FastString@ of the form "#define FOO\n" and strip off all but "FOO"
-cleanTokenString :: FastString -> String
-cleanTokenString fs = r
-  where
-    ss = dropWhile (\c -> not $ isSpace c) (unpackFS fs)
-    r = init ss
-
-parseDefine :: FastString -> Maybe (String, [String])
-parseDefine fs = r
-  where
-    -- r = Just (cleanTokenString s, "")
-    r = case regularParse cppDefinition (unpackFS fs) of
-        Left _ -> Nothing
-        Right v -> Just v
-
--- =====================================================================
--- Parsec parsing
-type CppParser = Parsec String ()
-
-regularParse :: Parser a -> String -> Either Parsec.ParseError a
-regularParse p = PS.parse p ""
-
--- TODO: delete this
-cppDefinition :: CppParser (String, [String])
-cppDefinition = do
-    _ <- PS.char '#'
-    _ <- whiteSpace
-    _ <- lexeme (PS.string "define")
-    name <- cppToken
-    definition <- cppTokens
-    return (name, definition)
-
-data CppDirective
-    = CppInclude String
-    | CppDefine String [String]
-    | CppIfdef String
-    | CppIfndef String
-    | CppElse
-    | CppEndif
-    deriving (Show, Eq)
-
-cppDirective :: CppParser CppDirective
-cppDirective = do
-    _ <- PS.char '#'
-    _ <- whiteSpace
-    choice
-        [ cppKw "define" >> cmdDefinition
-        , try $ cppKw "include" >> cmdInclude
-        , try $ cppKw "ifdef" >> cmdIfdef
-        , cppKw "ifndef" >> cmdIfndef
-        , try $ cppKw "else" >> return CppElse
-        , cppKw "endif" >> return CppEndif
-        -- , cppKw "if" CppIfKw
-        -- , cppKw "elif" CppElifKw
-        -- , cppKw "undef" CppUndefKw
-        -- , cppKw "error" CppErrorKw
-        ]
-
-cmdInclude :: CppParser CppDirective
-cmdInclude = do
-    _ <- string "\""
-    filename <- many1 (satisfy (\c -> not (isSpace c || c == '"')))
-    _ <- string "\""
-    return $ CppInclude filename
-
-cmdDefinition :: CppParser CppDirective
-cmdDefinition = do
-    name <- cppToken
-    definition <- cppTokens
-    return $ CppDefine name definition
-
-cmdIfdef :: CppParser CppDirective
-cmdIfdef = do
-    name <- cppToken
-    return $ CppIfdef name
-
-cmdIfndef :: CppParser CppDirective
-cmdIfndef = do
-    name <- cppToken
-    return $ CppIfndef name
-
-cppKw :: String -> CppParser ()
-cppKw kw = do
-    _ <- lexeme (PS.string kw)
-    return ()
-
-cppComment :: CppParser ()
-cppComment = do
-    _ <- PS.string "/*"
-    _ <- PS.manyTill PS.anyChar (PS.try (PS.string "*/"))
-    return ()
-
-whiteSpace :: CppParser ()
-whiteSpace = do
-    _ <- PS.many (PS.choice [cppComment, PS.space >> return ()])
-    return ()
-
-lexeme :: CppParser a -> CppParser a
-lexeme p = p <* whiteSpace
-
-cppToken :: CppParser String
-cppToken = lexeme (PS.many1 (PS.satisfy (\c -> not (isSpace c))))
-
-cppTokens :: CppParser [String]
-cppTokens = PS.many cppToken
-
-{- | Do cpp initial processing, as per https://gcc.gnu.org/onlinedocs/cpp/Initial-processing.html
-See Note [GhcCPP Initial Processing]
--}
-cppInitial :: [FastString] -> String
-cppInitial fs = r
-  where
-    -- go fs' = reverse $ drop 2 $ reverse $ unpackFS fs'
-    r = concatMap unpackFS fs
-
-{-
-Note [GhcCPP Initial Processing]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-This processing is based on the description at
-https://gcc.gnu.org/onlinedocs/cpp/Initial-processing.html
-
-It is only done for lines starting with a preprocessor
-directive.
-
-1. Broken into lines.  We rely on the GHC Lexer to do this
-2. Trigraphs are not processed
-3. Continued lines are merged into a single line
-   and is handled in the Lexer.
-4. All comments are replaced with a single space
-
--}
-
 -- =====================================================================
 -- Emulate the parser
 
@@ -422,8 +61,8 @@ strGetToks :: Includes -> Lexer.ParserOpts -> FilePath -> String -> [Located Tok
 -- strGetToks includes popts filename str = reverse $ lexAll pstate
 strGetToks includes popts filename str = reverse $ lexAll (trace ("pstate=" ++ show initState) pstate)
   where
-    includeMap = Map.fromList $ map (\(k,v) -> (k, stringToStringBuffer (intercalate "\n" v))) includes
-    initState = initPpState { pp_includes = includeMap }
+    includeMap = Map.fromList $ map (\(k, v) -> (k, stringToStringBuffer (intercalate "\n" v))) includes
+    initState = initPpState{pp_includes = includeMap}
     pstate = Lexer.initParserState initState popts buf loc
     loc = mkRealSrcLoc (mkFastString filename) 1 1
     buf = stringToStringBuffer str
@@ -494,78 +133,6 @@ ghcWrapper libdir a =
 
 -- ---------------------------------------------------------------------
 
-parseModuleNoHaddock :: PP [Located Token]
-parseModuleNoHaddock = happySomeParser
-  where
-    -- happySomeParser = happyThen (happyParse 0#) (\x -> happyReturn (let {(HappyWrap35 x') = happyOut35 x} in x'))
-    happySomeParser = (>>=) (happyParse 0) (\x -> return x)
-
-happyParse :: Int -> PP [Located Token]
-happyParse start_state = happyNewToken start_state [] []
-
-happyNewToken :: Int -> [Int] -> [Located Token] -> PP [Located Token]
-happyNewToken action sts stk =
-    -- lexer
-    ppLexerDbg
-        True
-        ( \tk ->
-            let cont i =
-                    trace ("happyNewToken:tk=" ++ show tk)
-                        $ happyDoAction i tk action sts stk
-             in case tk of
-                    L _ ITeof -> happyDoAction 169 tk action sts stk
-                    _ -> cont 5
-                    -- _ -> happyError' (tk, [])
-        )
-
-happyDoAction :: Int -> Located Token -> Int -> [Int] -> [Located Token] -> PP [Located Token]
--- happyDoAction num tk action sts stk = P $ \s -> POk s tk
-happyDoAction num tk action sts stk =
-    case num of
-        1 -> happyShift 2 num tk action sts stk
-        2 -> happyShift 5 num tk action sts stk
-        3 -> happyShift 5 num tk action sts stk
-        4 -> happyShift 5 num tk action sts stk
-        5 -> happyShift 5 num tk action sts stk
-        50 -> happyAccept num tk action sts stk
-        169 -> happyAccept num tk action sts stk
-        i -> happyFail ["failing:" ++ show i] i tk action sts stk
-
--- happyAccept j tk st sts (HappyStk ans _) =
---         (happyTcHack j (happyTcHack st)) (happyReturn1 ans)
-
-happyAccept :: Int -> Located Token -> Int -> [Int] -> [Located Token] -> PP [Located Token]
-happyAccept _j tk _st _sts stk =
-    trace ("happyAccept:" ++ show tk)
-        $ return stk
-
--- happyReturn1 :: a -> P a
--- happyReturn1 = return
-
-happyShift :: Int -> Int -> Located Token -> Int -> [Int] -> [Located Token] -> PP [Located Token]
-happyShift new_state _i tk st sts stk = do
-    happyNewToken new_state (st : sts) (tk : stk)
-
--- happyShift new_state i tk st sts stk =
---      happyNewToken new_state (HappyCons (st) (sts)) ((happyInTok (tk))`HappyStk`stk)
-
-happyFail :: [String] -> Int -> Located Token -> p2 -> p3 -> p4 -> PP a
-happyFail explist i tk _old_st _ _stk =
-    trace ("failing" ++ show explist)
-        $ happyError_ explist i tk
-
-happyError_ :: [String] -> p1 -> Located Token -> PP a
-happyError_ explist _ tk = happyError' (tk, explist)
-
-notHappyAtAll :: a
-notHappyAtAll = Prelude.error "Internal Happy error\n"
-
-happyError' :: (Located Token, [String]) -> PP a
-happyError' tk = (\(_tokens, _explist) -> happyError) tk
-
-happyError :: PP a
-happyError = Lexer.srcParseFail
-
 -- =====================================================================
 -- ---------------------------------------------------------------------
 
@@ -710,7 +277,8 @@ t9 = regularParse cppDirective "#define VERSION_ghc_exactprint \"1.7.0.1\""
 
 t10 :: IO ()
 t10 = do
-    doTestWithIncludes testIncludes
+    doTestWithIncludes
+        testIncludes
         [ "#include \"bar.h\""
         , ""
         , "#ifdef FOO"
@@ -725,7 +293,8 @@ testIncludes =
     [
         ( "bar.h"
         , ["#include \"sub.h\""]
-        ),
+        )
+    ,
         ( "sub.h"
         , ["#define FOO"]
         )


=====================================
utils/check-cpp/Parse.hs
=====================================
@@ -0,0 +1,194 @@
+module Parse where
+
+import Data.Char
+import GHC.Parser.Errors.Ppr ()
+import qualified Text.Parsec as Parsec
+import Text.Parsec.Char as PS
+import Text.Parsec.Combinator as PS
+import Text.Parsec.Prim as PS
+import Text.Parsec.String (Parser)
+
+-- import Debug.Trace
+
+-- ---------------------------------------------------------------------
+
+type CppParser = Parsec String ()
+
+regularParse :: Parser a -> String -> Either Parsec.ParseError a
+regularParse p = PS.parse p ""
+
+-- TODO: delete this
+cppDefinition :: CppParser (String, [String])
+cppDefinition = do
+    _ <- PS.char '#'
+    _ <- whiteSpace
+    _ <- lexeme (PS.string "define")
+    name <- cppToken
+    definition <- cppTokens
+    return (name, definition)
+
+data CppDirective
+    = CppInclude String
+    | CppDefine String [String]
+    | CppIfdef String
+    | CppIfndef String
+    | CppElse
+    | CppEndif
+    deriving (Show, Eq)
+
+cppDirective :: CppParser CppDirective
+cppDirective = do
+    _ <- PS.char '#'
+    _ <- whiteSpace
+    choice
+        [ cppKw "define" >> cmdDefinition
+        , try $ cppKw "include" >> cmdInclude
+        , try $ cppKw "ifdef" >> cmdIfdef
+        , cppKw "ifndef" >> cmdIfndef
+        , try $ cppKw "else" >> return CppElse
+        , cppKw "endif" >> return CppEndif
+        -- , cppKw "if" CppIfKw
+        -- , cppKw "elif" CppElifKw
+        -- , cppKw "undef" CppUndefKw
+        -- , cppKw "error" CppErrorKw
+        ]
+
+cmdInclude :: CppParser CppDirective
+cmdInclude = do
+    _ <- string "\""
+    filename <- many1 (satisfy (\c -> not (isSpace c || c == '"')))
+    _ <- string "\""
+    return $ CppInclude filename
+
+cmdDefinition :: CppParser CppDirective
+cmdDefinition = do
+    name <- cppToken
+    definition <- cppTokens
+    return $ CppDefine name definition
+
+cmdIfdef :: CppParser CppDirective
+cmdIfdef = do
+    name <- cppToken
+    return $ CppIfdef name
+
+cmdIfndef :: CppParser CppDirective
+cmdIfndef = do
+    name <- cppToken
+    return $ CppIfndef name
+
+cppKw :: String -> CppParser ()
+cppKw kw = do
+    _ <- lexeme (PS.string kw)
+    return ()
+
+cppComment :: CppParser ()
+cppComment = do
+    _ <- PS.string "/*"
+    _ <- PS.manyTill PS.anyChar (PS.try (PS.string "*/"))
+    return ()
+
+whiteSpace :: CppParser ()
+whiteSpace = do
+    _ <- PS.many (PS.choice [cppComment, PS.space >> return ()])
+    return ()
+
+lexeme :: CppParser a -> CppParser a
+lexeme p = p <* whiteSpace
+
+cppToken :: CppParser String
+cppToken = lexeme (PS.many1 (PS.satisfy (\c -> not (isSpace c))))
+
+cppTokens :: CppParser [String]
+cppTokens = PS.many cppToken
+
+-- ---------------------------------------------------------------------
+-- import GHC.S.Types
+
+-- parseS :: String -> Either ParseError S
+-- parseS = parse expr ""
+
+-- lexer :: P.TokenParser ()
+-- lexer =
+--     P.makeTokenParser
+--         ( emptyDef
+--             { P.reservedNames =
+--                 [ "define"
+--                 , "include"
+--                 , "undef"
+--                 , "error"
+--                 , "ifdef"
+--                 , "ifndef"
+--                 , "if"
+--                 , "elif"
+--                 , "else"
+--                 , "endif"
+--                 ]
+--             }
+--         )
+
+-- slam :: Parser S
+-- slam = P.parens lexer $ do
+--     P.reserved lexer "lam"
+--     ident <- P.identifier lexer
+--     body <- expr
+--     return (SLam (Atom ident) body)
+
+-- slet :: Parser S
+-- slet = P.parens lexer $ do
+--     P.reserved lexer "let"
+--     (ident, e1) <- P.parens lexer $ do
+--         idnt <- P.identifier lexer
+--         expr1 <- expr
+--         return (idnt, expr1)
+--     e2 <- expr
+--     return (SLet (Atom ident) e1 e2)
+
+-- sletrec :: Parser S
+-- sletrec = P.parens lexer $ do
+--     P.reserved lexer "letrec"
+--     ls <- P.parens lexer $ many1 $ P.parens lexer $ do
+--         idnt <- P.identifier lexer
+--         expr1 <- expr
+--         return (Atom idnt, expr1)
+--     e2 <- expr
+--     return (SLetRec ls e2)
+
+-- scase :: Parser S
+-- scase = P.parens lexer $ do
+--     P.reserved lexer "case"
+--     e <- expr
+--     alt <- optionMaybe (P.identifier lexer)
+--     alts <- P.parens lexer $ many1 $ P.parens lexer $ do
+--         pat <- expr
+--         ex <- expr
+--         return (pat, ex)
+--     case alt of
+--         Just alt -> return (SCase (Atom alt) e alts)
+--         Nothing -> return (SCase (Atom "_") e alts)
+
+-- swild :: Parser S
+-- swild = do
+--     P.symbol lexer "_"
+--     return SWild
+
+-- sbinop :: Parser S
+-- sbinop = P.parens lexer $ do
+--     e1 <- expr
+--     op <- P.operator lexer
+--     e2 <- expr
+--     return (SBinOp (Atom op) e1 e2)
+
+-- expr :: Parser S
+-- expr =
+--     choice
+--         [ try slam
+--         , try sbinop
+--         , try slet
+--         , try sletrec
+--         , try scase
+--         , STuple <$> P.parens lexer (many expr)
+--         , swild
+--         , SAtom <$> (Atom <$> (P.identifier lexer))
+--         , SString <$> P.stringLiteral lexer
+--         , SInt <$> P.integer lexer
+--         ]


=====================================
utils/check-cpp/ParseSimulate.hs
=====================================
@@ -0,0 +1,83 @@
+module ParseSimulate where
+
+import Debug.Trace
+import GHC
+import GHC.Parser.Errors.Ppr ()
+import GHC.Parser.Lexer (Token (..))
+import qualified GHC.Parser.Lexer as Lexer
+
+import PreProcess
+
+-- ---------------------------------------------------------------------
+
+parseModuleNoHaddock :: PP [Located Token]
+parseModuleNoHaddock = happySomeParser
+  where
+    -- happySomeParser = happyThen (happyParse 0#) (\x -> happyReturn (let {(HappyWrap35 x') = happyOut35 x} in x'))
+    happySomeParser = (>>=) (happyParse 0) (\x -> return x)
+
+happyParse :: Int -> PP [Located Token]
+happyParse start_state = happyNewToken start_state [] []
+
+happyNewToken :: Int -> [Int] -> [Located Token] -> PP [Located Token]
+happyNewToken action sts stk =
+    -- lexer
+    ppLexerDbg
+        True
+        ( \tk ->
+            let cont i =
+                    trace ("happyNewToken:tk=" ++ show tk)
+                        $ happyDoAction i tk action sts stk
+             in case tk of
+                    L _ ITeof -> happyDoAction 169 tk action sts stk
+                    _ -> cont 5
+                    -- _ -> happyError' (tk, [])
+        )
+
+happyDoAction :: Int -> Located Token -> Int -> [Int] -> [Located Token] -> PP [Located Token]
+-- happyDoAction num tk action sts stk = P $ \s -> POk s tk
+happyDoAction num tk action sts stk =
+    case num of
+        1 -> happyShift 2 num tk action sts stk
+        2 -> happyShift 5 num tk action sts stk
+        3 -> happyShift 5 num tk action sts stk
+        4 -> happyShift 5 num tk action sts stk
+        5 -> happyShift 5 num tk action sts stk
+        50 -> happyAccept num tk action sts stk
+        169 -> happyAccept num tk action sts stk
+        i -> happyFail ["failing:" ++ show i] i tk action sts stk
+
+-- happyAccept j tk st sts (HappyStk ans _) =
+--         (happyTcHack j (happyTcHack st)) (happyReturn1 ans)
+
+happyAccept :: Int -> Located Token -> Int -> [Int] -> [Located Token] -> PP [Located Token]
+happyAccept _j tk _st _sts stk =
+    trace ("happyAccept:" ++ show tk)
+        $ return stk
+
+-- happyReturn1 :: a -> P a
+-- happyReturn1 = return
+
+happyShift :: Int -> Int -> Located Token -> Int -> [Int] -> [Located Token] -> PP [Located Token]
+happyShift new_state _i tk st sts stk = do
+    happyNewToken new_state (st : sts) (tk : stk)
+
+-- happyShift new_state i tk st sts stk =
+--      happyNewToken new_state (HappyCons (st) (sts)) ((happyInTok (tk))`HappyStk`stk)
+
+happyFail :: [String] -> Int -> Located Token -> p2 -> p3 -> p4 -> PP a
+happyFail explist i tk _old_st _ _stk =
+    trace ("failing" ++ show explist)
+        $ happyError_ explist i tk
+
+happyError_ :: [String] -> p1 -> Located Token -> PP a
+happyError_ explist _ tk = happyError' (tk, explist)
+
+notHappyAtAll :: a
+notHappyAtAll = Prelude.error "Internal Happy error\n"
+
+happyError' :: (Located Token, [String]) -> PP a
+happyError' tk = (\(_tokens, _explist) -> happyError) tk
+
+happyError :: PP a
+happyError = Lexer.srcParseFail


=====================================
utils/check-cpp/PreProcess.hs
=====================================
@@ -0,0 +1,288 @@
+module PreProcess where
+
+import Data.Char
+import Data.Map (Map)
+import qualified Data.Map as Map
+import GHC
+import GHC.Data.FastString
+import qualified GHC.Data.Strict as Strict
+import GHC.Data.StringBuffer
+import GHC.Parser.Errors.Ppr ()
+import GHC.Parser.Lexer (P (..), PState (..), ParseResult (..), Token (..))
+import qualified GHC.Parser.Lexer as Lexer
+import GHC.Types.SrcLoc
+
+import Debug.Trace
+
+import Parse
+
+-- ---------------------------------------------------------------------
+
+type PP = P PpState
+
+initPpState :: PpState
+initPpState =
+    PpState
+        { pp_defines = Map.empty
+        , pp_includes = Map.empty
+        , pp_include_stack = []
+        , pp_continuation = []
+        , pp_context = []
+        , pp_accepting = True
+        }
+
+data PpState = PpState
+    { pp_defines :: !(Map String [String])
+    , pp_includes :: !(Map String StringBuffer)
+    , pp_include_stack :: ![Lexer.AlexInput]
+    , pp_continuation :: ![Located Token]
+    , pp_context :: ![Token] -- What preprocessor directive we are currently processing
+    , pp_accepting :: !Bool
+    }
+    deriving (Show)
+
+-- deriving instance Show Lexer.AlexInput
+-- ---------------------------------------------------------------------
+
+data CppState
+    = CppIgnoring
+    | CppNormal
+    deriving (Show)
+
+-- ---------------------------------------------------------------------
+
+ppLexer, ppLexerDbg :: Bool -> (Located Token -> PP a) -> PP a
+-- Use this instead of 'lexer' in GHC.Parser to dump the tokens for debugging.
+ppLexerDbg queueComments cont = ppLexer queueComments contDbg
+  where
+    contDbg tok = trace ("pptoken: " ++ show (unLoc tok)) (cont tok)
+ppLexer queueComments cont =
+    Lexer.lexer
+        queueComments
+        ( \tk ->
+            let
+                contInner t = (trace ("ppLexer: tk=" ++ show (unLoc tk, unLoc t)) cont) t
+                -- contPush = pushContext (unLoc tk) >> contIgnoreTok tk
+                contIgnoreTok (L l tok) = do
+                    case l of
+                        RealSrcSpan r (Strict.Just b) -> Lexer.queueIgnoredToken (L (PsSpan r b) tok)
+                        _ -> return ()
+                    ppLexer queueComments cont
+             in
+                -- case tk of
+                case (trace ("M.ppLexer:tk=" ++ show (unLoc tk)) tk) of
+                    L _ ITeof -> do
+                        mInp <- popIncludeLoc
+                        case mInp of
+                            Nothing -> contInner tk
+                            Just inp -> do
+                                Lexer.setInput inp
+                                ppLexer queueComments cont
+                    L _ (ITcpp continuation s) -> do
+                        if continuation
+                            then pushContinuation tk
+                            else processCppToks s
+                        contIgnoreTok tk
+                    _ -> do
+                        state <- getCppState
+                        -- case (trace ("CPP state:" ++ show state) state) of
+                        case state of
+                            CppIgnoring -> contIgnoreTok tk
+                            _ -> contInner tk
+        )
+
+-- ---------------------------------------------------------------------
+
+preprocessElse :: PP ()
+preprocessElse = do
+    accepting <- getAccepting
+    setAccepting (not accepting)
+
+preprocessEnd :: PP ()
+preprocessEnd = do
+    -- TODO: nested context
+    setAccepting True
+
+processCppToks :: FastString -> PP ()
+processCppToks fs = do
+    let
+        get (L _ (ITcpp _ s)) = s
+        get _ = error "should not"
+    -- Combine any prior continuation tokens
+    cs <- popContinuation
+    processCpp (reverse $ fs : map get cs)
+    return ()
+
+processCpp :: [FastString] -> PP ()
+processCpp fs = do
+    -- traceM $ "processCpp: fs=" ++ show fs
+    -- let s = cppInitial fs
+    let s = cppInitial fs
+    case regularParse cppDirective s of
+        Left err -> error $ show err
+        Right (CppInclude filename) -> do
+            ppInclude filename
+        Right (CppDefine name def) -> do
+            ppDefine name def
+        Right (CppIfdef name) -> do
+            defined <- ppIsDefined name
+            setAccepting defined
+        Right (CppIfndef name) -> do
+            defined <- ppIsDefined name
+            setAccepting (not defined)
+        Right CppElse -> do
+            accepting <- getAccepting
+            setAccepting (not accepting)
+            return ()
+        Right CppEndif -> do
+            -- TODO: nested states
+            setAccepting True
+            return ()
+
+    -- return (trace ("processCpp:s=" ++ show s) ())
+    return ()
+
+-- ---------------------------------------------------------------------
+-- Preprocessor state functions
+
+getCppState :: PP CppState
+getCppState = do
+    accepting <- getAccepting
+    if accepting
+        then return CppNormal
+        else return CppIgnoring
+
+-- pp_context stack start -----------------
+
+pushContext :: Token -> PP ()
+pushContext new =
+    P $ \s -> POk s{pp = (pp s){pp_context = new : pp_context (pp s)}} ()
+
+popContext :: PP ()
+popContext =
+    P $ \s ->
+        let
+            new_context = case pp_context (pp s) of
+                [] -> []
+                (_ : t) -> t
+         in
+            POk s{pp = (pp s){pp_context = new_context}} ()
+
+peekContext :: PP Token
+peekContext =
+    P $ \s ->
+        let
+            r = case pp_context (pp s) of
+                [] -> ITeof -- Anthing really, for now, except a CPP one
+                (h : _) -> h
+         in
+            POk s r
+
+setAccepting :: Bool -> PP ()
+setAccepting on =
+    P $ \s -> POk s{pp = (pp s){pp_accepting = on}} ()
+
+getAccepting :: PP Bool
+getAccepting = P $ \s -> POk s (pp_accepting (pp s))
+
+-- -------------------------------------
+
+pushContinuation :: Located Token -> PP ()
+pushContinuation new =
+    P $ \s -> POk s{pp = (pp s){pp_continuation = new : pp_continuation (pp s)}} ()
+
+popContinuation :: PP [Located Token]
+popContinuation =
+    P $ \s -> POk s{pp = (pp s){pp_continuation = []}} (pp_continuation (pp s))
+
+-- pp_context stack end -------------------
+
+-- pp_include start -----------------------
+
+getInclude :: String -> PP (Maybe StringBuffer)
+getInclude filename = P $ \s -> POk s (Map.lookup filename (pp_includes (pp s)))
+
+pushIncludeLoc :: Lexer.AlexInput -> PP ()
+pushIncludeLoc pos =
+    P $ \s -> POk s{pp = (pp s){pp_include_stack = pos : pp_include_stack (pp s)}} ()
+
+popIncludeLoc :: PP (Maybe Lexer.AlexInput)
+popIncludeLoc =
+    P $ \s ->
+        let
+            (new_st, r) = case pp_include_stack (pp s) of
+                [] -> ([], Nothing)
+                (h : t) -> (t, Just h)
+         in
+            POk s{pp = (pp s){pp_include_stack = new_st}} r
+
+-- pp_include end -------------------------
+
+-- definitions start --------------------
+
+ppInclude :: String -> PP ()
+ppInclude filename = do
+    mSrc <- getInclude filename
+    case mSrc of
+        Nothing -> return ()
+        Just src -> do
+            origInput <- Lexer.getInput
+            pushIncludeLoc origInput
+            let loc = PsLoc (mkRealSrcLoc (mkFastString filename) 1 1) (BufPos 0)
+            Lexer.setInput (Lexer.AI loc src)
+    return $ trace ("ppInclude:mSrc=[" ++ show mSrc ++ "]") ()
+
+-- return $ trace ("ppInclude:filename=[" ++ filename ++ "]") ()
+
+ppDefine :: String -> [String] -> PP ()
+ppDefine name val = P $ \s ->
+    -- POk s{pp = (pp s){pp_defines = Set.insert (cleanTokenString def) (pp_defines (pp s))}} ()
+    POk s{pp = (pp s){pp_defines = Map.insert (trace ("ppDefine:def=[" ++ name ++ "]") name) val (pp_defines (pp s))}} ()
+
+ppIsDefined :: String -> PP Bool
+ppIsDefined def = P $ \s ->
+    -- POk s (Map.member def (pp_defines (pp s)))
+    POk s (Map.member (trace ("ppIsDefined:def=[" ++ def ++ "]") def) (pp_defines (pp s)))
+
+-- | Take a @FastString@ of the form "#define FOO\n" and strip off all but "FOO"
+cleanTokenString :: FastString -> String
+cleanTokenString fs = r
+  where
+    ss = dropWhile (\c -> not $ isSpace c) (unpackFS fs)
+    r = init ss
+
+parseDefine :: FastString -> Maybe (String, [String])
+parseDefine fs = r
+  where
+    -- r = Just (cleanTokenString s, "")
+    r = case regularParse cppDefinition (unpackFS fs) of
+        Left _ -> Nothing
+        Right v -> Just v
+
+-- =====================================================================
+
+{- | Do cpp initial processing, as per https://gcc.gnu.org/onlinedocs/cpp/Initial-processing.html
+See Note [GhcCPP Initial Processing]
+-}
+cppInitial :: [FastString] -> String
+cppInitial fs = r
+  where
+    -- go fs' = reverse $ drop 2 $ reverse $ unpackFS fs'
+    r = concatMap unpackFS fs
+
+{-
+Note [GhcCPP Initial Processing]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+This processing is based on the description at
+https://gcc.gnu.org/onlinedocs/cpp/Initial-processing.html
+
+It is only done for lines starting with a preprocessor
+directive.
+
+1. Broken into lines.  We rely on the GHC Lexer to do this
+2. Trigraphs are not processed
+3. Continued lines are merged into a single line
+   and is handled in the Lexer.
+4. All comments are replaced with a single space
+
+-}



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/9c7e0f6989772855c535829f761e60ae396843dd

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/9c7e0f6989772855c535829f761e60ae396843dd
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20231005/69dc10a2/attachment-0001.html>


More information about the ghc-commits mailing list