[Git][ghc/ghc][wip/az/ghc-cpp] First example working

Alan Zimmerman (@alanz) gitlab at gitlab.haskell.org
Mon Feb 3 22:55:34 UTC 2025



Alan Zimmerman pushed to branch wip/az/ghc-cpp at Glasgow Haskell Compiler / GHC


Commits:
23754809 by Alan Zimmerman at 2025-02-03T22:54:04+00:00
First example working

Loading Example1.hs into ghci, getting the right results

```
{-# LANGUAGE GHC_CPP #-}
module Example1 where

y = 3

x =
  "hello"
  "bye now"

foo = putStrLn x
```

- - - - -


19 changed files:

- compiler/GHC/Driver/Session.hs
- compiler/GHC/Parser.hs-boot
- compiler/GHC/Parser.y
- compiler/GHC/Parser/HaddockLex.x
- compiler/GHC/Parser/Lexer.x
- compiler/GHC/Parser/PreProcess.hs
- + compiler/GHC/Parser/PreProcess/Eval.hs
- + compiler/GHC/Parser/PreProcess/Lexer.x
- + compiler/GHC/Parser/PreProcess/Macro.hs
- + compiler/GHC/Parser/PreProcess/ParsePP.hs
- + compiler/GHC/Parser/PreProcess/Parser.y
- + compiler/GHC/Parser/PreProcess/ParserM.hs
- + compiler/GHC/Parser/PreProcess/Types.hs
- compiler/ghc.cabal.in
- hadrian/src/Rules/SourceDist.hs
- utils/check-cpp/ParsePP.hs
- utils/check-cpp/PreProcess.hs
- utils/check-cpp/Types.hs
- utils/haddock/haddock-api/src/Haddock/Parser.hs


Changes:

=====================================
compiler/GHC/Driver/Session.hs
=====================================
@@ -295,6 +295,7 @@ import Text.ParserCombinators.ReadP as R
 import qualified GHC.Data.EnumSet as EnumSet
 
 import qualified GHC.LanguageExtensions as LangExt
+import GHC.Parser.PreProcess (initPpState)
 
 
 -- Note [Updating flag description in the User's Guide]
@@ -3168,7 +3169,7 @@ setMainIs arg = parse parse_main_f arg
       POk _ (L _ re) -> callback re
 
     -- dummy parser state.
-    p_state str = initParserState ()
+    p_state str = initParserState initPpState
               (mkParserOpts mempty emptyDiagOpts False False False True)
               (stringToStringBuffer str)
               (mkRealSrcLoc (mkFastString []) 1 1)


=====================================
compiler/GHC/Parser.hs-boot
=====================================
@@ -3,5 +3,6 @@ module GHC.Parser where
 import GHC.Types.Name.Reader (RdrName)
 import GHC.Parser.Lexer (P)
 import GHC.Parser.Annotation (LocatedN)
+import GHC.Parser.PreProcess (PpState)
 
-parseIdentifier :: P p (LocatedN RdrName)
+parseIdentifier :: P PpState (LocatedN RdrName)


=====================================
compiler/GHC/Parser.y
=====================================
@@ -83,9 +83,10 @@ import GHC.Core.DataCon ( DataCon, dataConName )
 
 import GHC.Parser.PostProcess
 import GHC.Parser.PostProcess.Haddock
--- import GHC.Parser.Lexer hiding (lexer, lexerDbg)
-import GHC.Parser.Lexer
-import GHC.Parser.PreProcess hiding (lexer, lexerDbg)
+import GHC.Parser.Lexer hiding (lexer, lexerDbg)
+-- import GHC.Parser.Lexer
+-- import GHC.Parser.PreProcess hiding (lexer, lexerDbg)
+import GHC.Parser.PreProcess
 import GHC.Parser.HaddockLex
 import GHC.Parser.Annotation
 import GHC.Parser.Errors.Types
@@ -775,7 +776,8 @@ TH_QQUASIQUOTE  { L _ (ITqQuasiQuote _) }
 -- '#else'         { L _ ITcppElse }
 -- '#endif'        { L _ ITcppEndif }
 
-%monad { P p } { >>= } { return }
+-- %monad { P p } { >>= } { return }
+%monad { P PpState } { >>= } { return }
 %lexer { (lexer True) } { L _ ITeof }
 -- %lexer { (lexerDbg True) } { L _ ITeof }
   -- Replace 'lexer' above with 'lexerDbg'
@@ -4185,7 +4187,7 @@ bars :: { ([EpToken "|"],Int) }     -- One or more bars
         | '|'                    { ([epTok $1],1) }
 
 {
-happyError :: P p a
+happyError :: P PpState a
 happyError = srcParseFail
 
 getVARID          (L _ (ITvarid    x)) = x
@@ -4290,7 +4292,7 @@ hasE (L _ (ITopenExpQuote HasE _)) = True
 hasE (L _ (ITopenTExpQuote HasE))  = True
 hasE _                             = False
 
-getSCC :: Located Token -> P p FastString
+getSCC :: Located Token -> P PpState FastString
 getSCC lt = do let s = getSTRING lt
                -- We probably actually want to be more restrictive than this
                if ' ' `elem` unpackFS s
@@ -4410,7 +4412,7 @@ incorrect.
 -- Make a source location for the file.  We're a bit lazy here and just
 -- make a point SrcSpan at line 1, column 0.  Strictly speaking we should
 -- try to find the span of the whole file (ToDo).
-fileSrcSpan :: P p SrcSpan
+fileSrcSpan :: P PpState SrcSpan
 fileSrcSpan = do
   l <- getRealSrcLoc;
   let loc = mkSrcLoc (srcLocFile l) 1 1;
@@ -4443,13 +4445,13 @@ hintOrPats pat = do
   return pat
 
 -- Hint about the MultiWayIf extension
-hintMultiWayIf :: SrcSpan -> P p ()
+hintMultiWayIf :: SrcSpan -> P PpState ()
 hintMultiWayIf span = do
   mwiEnabled <- getBit MultiWayIfBit
   unless mwiEnabled $ addError $ mkPlainErrorMsgEnvelope span PsErrMultiWayIf
 
 -- Hint about explicit-forall
-hintExplicitForall :: Located Token -> P p ()
+hintExplicitForall :: Located Token -> P PpState ()
 hintExplicitForall tok = do
     explicit_forall_enabled <- getBit ExplicitForallBit
     in_rule_prag <- getBit InRulePragBit
@@ -4458,7 +4460,7 @@ hintExplicitForall tok = do
         PsErrExplicitForall (isUnicode tok)
 
 -- Hint about qualified-do
-hintQualifiedDo :: Located Token -> P p ()
+hintQualifiedDo :: Located Token -> P PpState ()
 hintQualifiedDo tok = do
     qualifiedDo   <- getBit QualifiedDoBit
     case maybeQDoDoc of
@@ -4475,7 +4477,7 @@ hintQualifiedDo tok = do
 -- When two single quotes don't followed by tyvar or gtycon, we report the
 -- error as empty character literal, or TH quote that missing proper type
 -- variable or constructor. See #13450.
-reportEmptyDoubleQuotes :: SrcSpan -> P p a
+reportEmptyDoubleQuotes :: SrcSpan -> P PpState a
 reportEmptyDoubleQuotes span = do
     thQuotes <- getBit ThQuotesBit
     addFatalError $ mkPlainErrorMsgEnvelope span $ PsErrEmptyDoubleQuotes thQuotes
@@ -4530,7 +4532,7 @@ n2l :: LocatedN a -> LocatedA a
 n2l (L !la !a) = L (l2l la) a
 
 -- Called at the very end to pick up the EOF position, as well as any comments not allocated yet.
-acsFinal :: (EpAnnComments -> Maybe (RealSrcSpan, RealSrcSpan) -> Located a) -> P p (Located a)
+acsFinal :: (EpAnnComments -> Maybe (RealSrcSpan, RealSrcSpan) -> Located a) -> P PpState (Located a)
 acsFinal a = do
   let (L l _) = a emptyComments Nothing
   !cs <- getCommentsFor l
@@ -4561,7 +4563,7 @@ amsA' (L l a) = do
   !cs <- getCommentsFor l
   return (L (EpAnn (spanAsAnchor l) noAnn cs) a)
 
--- acsExpr :: (EpAnnComments -> LHsExpr GhcPs) -> P p ECP
+-- acsExpr :: (EpAnnComments -> LHsExpr GhcPs) -> P PpState ECP
 -- acsExpr a = do { expr :: (LHsExpr GhcPs) <- runPV $ acsa a
 --                ; return (ecpFromExp $ expr) }
 
@@ -4588,7 +4590,7 @@ amsr (L l a) an = do
 -- This and the signature module parser are the only parser entry points that
 -- deal with Haddock comments. The other entry points ('parseDeclaration',
 -- 'parseExpression', etc) do not insert them into the AST.
-parseModule :: P p (Located (HsModule GhcPs))
+parseModule :: P PpState (Located (HsModule GhcPs))
 parseModule = parseModuleNoHaddock >>= addHaddockToModule
 
 -- | Parse a Haskell signature module with Haddock comments. This is done in two
@@ -4600,7 +4602,7 @@ parseModule = parseModuleNoHaddock >>= addHaddockToModule
 -- This and the module parser are the only parser entry points that deal with
 -- Haddock comments. The other entry points ('parseDeclaration',
 -- 'parseExpression', etc) do not insert them into the AST.
-parseSignature :: P p (Located (HsModule GhcPs))
+parseSignature :: P PpState (Located (HsModule GhcPs))
 parseSignature = parseSignatureNoHaddock >>= addHaddockToModule
 
 commentsA :: (NoAnn ann) => SrcSpan -> EpAnnComments -> EpAnn ann
@@ -4614,7 +4616,7 @@ spanWithComments l = do
 -- | Instead of getting the *enclosed* comments, this includes the
 -- *preceding* ones.  It is used at the top level to get comments
 -- between top level declarations.
-commentsPA :: (NoAnn ann) => LocatedAn ann a -> P p (LocatedAn ann a)
+commentsPA :: (NoAnn ann) => LocatedAn ann a -> P PpState (LocatedAn ann a)
 commentsPA la@(L l a) = do
   !cs <- getPriorCommentsFor (getLocA la)
   return (L (addCommentsToEpAnn l cs) a)


=====================================
compiler/GHC/Parser/HaddockLex.x
=====================================
@@ -10,6 +10,7 @@ import GHC.Hs.Doc
 import GHC.Parser.Lexer hiding (AlexInput)
 import GHC.Parser.Lexer.Interface (adjustChar)
 import GHC.Parser.Annotation
+import GHC.Parser.PreProcess.Types (PpState(..), initPpState)
 import GHC.Types.SrcLoc
 import GHC.Types.SourceText
 import GHC.Data.StringBuffer
@@ -131,7 +132,7 @@ advanceSrcLocBS !loc bs = case utf8UnconsByteString bs of
   Just (c, bs') -> advanceSrcLocBS (advanceSrcLoc loc c) bs'
 
 -- | Lex 'StringLiteral' for warning messages
-lexStringLiteral :: P () (LocatedN RdrName) -- ^ A precise identifier parser
+lexStringLiteral :: P PpState (LocatedN RdrName) -- ^ A precise identifier parser
                  -> Located StringLiteral
                  -> Located (WithHsDocIdentifiers StringLiteral GhcPs)
 lexStringLiteral identParser (L l sl@(StringLiteral _ fs _))
@@ -149,7 +150,7 @@ lexStringLiteral identParser (L l sl@(StringLiteral _ fs _))
     fakeLoc = mkRealSrcLoc nilFS 0 0
 
 -- | Lex identifiers from a docstring.
-lexHsDoc :: P () (LocatedN RdrName)      -- ^ A precise identifier parser
+lexHsDoc :: P PpState (LocatedN RdrName)      -- ^ A precise identifier parser
          -> HsDocString
          -> HsDoc GhcPs
 lexHsDoc identParser doc =
@@ -169,7 +170,7 @@ lexHsDoc identParser doc =
 
     fakeLoc = mkRealSrcLoc nilFS 0 0
 
-validateIdentWith :: P () (LocatedN RdrName) -> SrcSpan -> ByteString -> Maybe (Located RdrName)
+validateIdentWith :: P PpState (LocatedN RdrName) -> SrcSpan -> ByteString -> Maybe (Located RdrName)
 validateIdentWith identParser mloc str0 =
   let -- These ParserFlags should be as "inclusive" as possible, allowing
       -- identifiers defined with any language extension.
@@ -182,7 +183,7 @@ validateIdentWith identParser mloc str0 =
       realSrcLc = case mloc of
         RealSrcSpan loc _ -> realSrcSpanStart loc
         UnhelpfulSpan _ -> mkRealSrcLoc nilFS 0 0
-      pstate = initParserState () pflags buffer realSrcLc
+      pstate = initParserState initPpState pflags buffer realSrcLc
   in case unP identParser pstate of
     POk _ name -> Just $ case mloc of
        RealSrcSpan _ _ -> reLoc name


=====================================
compiler/GHC/Parser/Lexer.x
=====================================
@@ -1259,8 +1259,8 @@ cppToken code span buf len _buf2 =
      -- check if the string ends with backslash and newline
      -- NOTE: performance likely sucks, make it work for now
      (len0, continue) <- case (reverse $ unpackFS tokStr) of
-        -- ('\n':'\\':_) -> pushLexState code >> return (len -2, True)
-        ('\n':'\\':_) -> pushLexState (trace ("cppToken: push state") code) >> return (len - 2, True)
+        ('\n':'\\':_) -> pushLexState code >> return (len -2, True)
+        -- ('\n':'\\':_) -> pushLexState (trace ("cppToken: push state") code) >> return (len - 2, True)
         ('\n':_) -> return (len - 1, False)
         _ -> return (len, False)
      return (L span (ITcpp continue $! lexemeToFastString buf len0))
@@ -1281,8 +1281,8 @@ cppTokenCont code span buf len _buf2 =
 cppTokenPop :: (FastString -> Token)-> Action p
 cppTokenPop t span buf len _buf2 =
   do _ <- popLexState
-     -- return (L span (t $! lexemeToFastString buf (len - 1)))
-     return (L span (t $! lexemeToFastString buf (trace "cppTokenPop" len)))
+     -- return (L span (t $! lexemeToFastString buf (trace "cppTokenPop" len)))
+     return (L span (t $! lexemeToFastString buf len))
 
 popCpp :: Action p
 popCpp _span _buf _len _buf2 =


=====================================
compiler/GHC/Parser/PreProcess.hs
=====================================
@@ -25,6 +25,9 @@ import GHC.Data.StringBuffer
 import GHC.Parser.Errors.Ppr ()
 import GHC.Parser.Lexer (P (..), PState (..), ParseResult (..), Token (..))
 import GHC.Parser.Lexer qualified as Lexer
+import GHC.Parser.PreProcess.Macro
+import GHC.Parser.PreProcess.ParsePP
+import GHC.Parser.PreProcess.Types
 import GHC.Prelude
 import GHC.Types.SrcLoc
 
@@ -38,46 +41,6 @@ initPragState = Lexer.initPragState initPpState
 initParserState :: Lexer.ParserOpts -> StringBuffer -> RealSrcLoc -> PState PpState
 initParserState = Lexer.initParserState initPpState
 
-initPpState :: PpState
-initPpState =
-    PpState
-        { pp_defines = Map.empty
-        , pp_includes = Map.empty
-        , pp_include_stack = []
-        , pp_continuation = []
-        , pp_context = []
-        , pp_accepting = True
-        }
-
-data PpState = PpState
-    { pp_defines :: !(Map MacroName MacroDef)
-    , pp_includes :: !(Map String StringBuffer)
-    , pp_include_stack :: ![Lexer.AlexInput]
-    , pp_continuation :: ![Located Token]
-    , pp_context :: ![Token] -- ^ What preprocessor directive we are currently processing
-    , pp_accepting :: !Bool
-    }
-    deriving (Show)
-
--- ---------------------------------------------------------------------
-
-data CppDirective
-    = CppInclude String
-    | CppDefine String [String]
-    | CppIfdef String
-    | CppIfndef String
-    | CppIf [String]
-    | CppElse
-    | CppEndif
-    deriving (Show, Eq)
-
--- ---------------------------------------------------------------------
-
-type MacroArgs = [String]
-data MacroName = MacroName String (Maybe MacroArgs)
-    deriving (Show, Eq, Ord)
-type MacroDef = String
-
 -- ---------------------------------------------------------------------
 
 data CppState
@@ -87,21 +50,8 @@ data CppState
 
 -- ---------------------------------------------------------------------
 
-{-
-lexer, lexerDbg :: Bool -> (Located Token -> P PpState a) -> P PpState a
--- bypass for now, work in ghci
-lexer = Lexer.lexer
-lexerDbg = Lexer.lexerDbg
-
-setAccepting :: Bool -> P PpState ()
-setAccepting on =
-    P $ \s -> POk s{pp = (pp s){pp_accepting = on}} ()
-
-getAccepting :: P PpState Bool
-getAccepting = P $ \s -> POk s (pp_accepting (pp s))
--}
-
--- ---------------------------------------------------------------------
+lexer = ppLexer
+lexerDbg = ppLexerDbg
 
 ppLexer, ppLexerDbg :: Bool -> (Located Token -> PP a) -> PP a
 -- Use this instead of 'lexer' in GHC.Parser to dump the tokens for debugging.
@@ -113,16 +63,16 @@ ppLexer queueComments cont =
         queueComments
         ( \tk ->
             let
-                contInner t = (trace ("ppLexer: tk=" ++ show (unLoc tk, unLoc t)) cont) t
-                -- contPush = pushContext (unLoc tk) >> contIgnoreTok tk
+                -- contInner t = (trace ("ppLexer: tk=" ++ show (unLoc tk, unLoc t)) cont) t
+                contInner t = cont t
                 contIgnoreTok (L l tok) = do
                     case l of
                         RealSrcSpan r (Strict.Just b) -> Lexer.queueIgnoredToken (L (PsSpan r b) tok)
                         _ -> return ()
                     ppLexer queueComments cont
              in
-                -- case tk of
-                case (trace ("M.ppLexer:tk=" ++ show (unLoc tk)) tk) of
+                case tk of
+                -- case (trace ("M.ppLexer:tk=" ++ show (unLoc tk)) tk) of
                     L _ ITeof -> do
                         mInp <- popIncludeLoc
                         case mInp of
@@ -173,7 +123,7 @@ processCpp fs = do
     -- let s = cppInitial fs
     let s = cppInitial fs
     case parseDirective s of
-        Left err -> error $ show err
+        Left err -> error $ show (err, s)
         Right (CppInclude filename) -> do
             ppInclude filename
         Right (CppDefine name def) -> do
@@ -287,26 +237,26 @@ ppInclude filename = do
             pushIncludeLoc origInput
             let loc = PsLoc (mkRealSrcLoc (mkFastString filename) 1 1) (BufPos 0)
             Lexer.setInput (Lexer.AI loc src)
-    return $ trace ("ppInclude:mSrc=[" ++ show mSrc ++ "]") ()
+    -- return $ trace ("ppInclude:mSrc=[" ++ show mSrc ++ "]") ()
 
 -- return $ trace ("ppInclude:filename=[" ++ filename ++ "]") ()
 
-ppDefine :: String -> [String] -> PP ()
+ppDefine :: String -> String -> PP ()
 ppDefine name val = P $ \s ->
-    -- POk s{pp = (pp s){pp_defines = Set.insert (cleanTokenString def) (pp_defines (pp s))}} ()
-    POk s{pp = (pp s){pp_defines = Map.insert (trace ("ppDefine:def=[" ++ name ++ "]") (MacroName name Nothing)) val (pp_defines (pp s))}} ()
+    -- POk s{pp = (pp s){pp_defines = Map.insert (trace ("ppDefine:def=[" ++ name ++ "]") (MacroName name Nothing)) val (pp_defines (pp s))}} ()
+    POk s{pp = (pp s){pp_defines = Map.insert (MacroName name Nothing) val (pp_defines (pp s))}} ()
 
 ppIsDefined :: String -> PP Bool
 ppIsDefined def = P $ \s ->
-    -- POk s (Map.member def (pp_defines (pp s)))
-    POk s (Map.member (trace ("ppIsDefined:def=[" ++ def ++ "]") (MacroName def Nothing)) (pp_defines (pp s)))
+    POk s (Map.member (MacroName def Nothing) (pp_defines (pp s)))
+    -- POk s (Map.member (trace ("ppIsDefined:def=[" ++ def ++ "]") (MacroName def Nothing)) (pp_defines (pp s)))
 
-ppIf :: [String] -> PP Bool
-ppIf toks = P $ \s ->
+ppIf :: String -> PP Bool
+ppIf str = P $ \s ->
     -- -- POk s (Map.member def (pp_defines (pp s)))
     -- POk s (Map.member (trace ("ppIsDefined:def=[" ++ def ++ "]") def) (pp_defines (pp s)))
     let
-        s' = cppIf (pp s) toks
+        s' = cppIf (pp s) str
      in
         POk s{pp = s'} (pp_accepting s')
 
@@ -317,13 +267,13 @@ cleanTokenString fs = r
     ss = dropWhile (\c -> not $ isSpace c) (unpackFS fs)
     r = init ss
 
-parseDefine :: FastString -> Maybe (String, [String])
-parseDefine fs = r
-  where
-    -- r = Just (cleanTokenString s, "")
-    r = case parseCppParser cppDefinition (unpackFS fs) of
-        Left _ -> Nothing
-        Right v -> Just v
+-- parseDefine :: FastString -> Maybe (String, [String])
+-- parseDefine fs = r
+--   where
+--     -- r = Just (cleanTokenString s, "")
+--     r = case parseCppParser cppDefinition (unpackFS fs) of
+--         Left _ -> Nothing
+--         Right v -> Just v
 
 -- =====================================================================
 
@@ -331,9 +281,7 @@ parseDefine fs = r
 See Note [GhcCPP Initial Processing]
 -}
 cppInitial :: [FastString] -> String
-cppInitial fs = r
-  where
-    r = concatMap unpackFS fs
+cppInitial fs = concatMap unpackFS fs
 
 {-
 Note [GhcCPP Initial Processing]


=====================================
compiler/GHC/Parser/PreProcess/Eval.hs
=====================================
@@ -0,0 +1,43 @@
+module GHC.Parser.PreProcess.Eval where
+
+import GHC.Parser.PreProcess.Types
+import GHC.Prelude
+
+-- ---------------------------------------------------------------------
+
+eval :: Expr -> Int
+eval (Parens e) = eval e
+eval (Var v) = error $ "need to look up :" ++ v
+eval (IntVal i) = i
+eval (Plus e1 e2) = (eval e1) + (eval e2)
+eval (Times e1 e2) = (eval e1) * (eval e2)
+eval (Logic op e1 e2) = evalLogicOp op (eval e1) (eval e2)
+eval (Comp op e1 e2) = evalCompOp op (eval e1) (eval e2)
+
+evalLogicOp :: LogicOp -> Int -> Int -> Int
+evalLogicOp LogicalOr e1 e2 = fromBool $ (toBool e1) || (toBool e2)
+evalLogicOp LogicalAnd e1 e2 = fromBool $ (toBool e1) || (toBool e2)
+
+
+evalCompOp :: CompOp -> Int -> Int -> Int
+evalCompOp CmpEqual e1 e2 = fromBool $ e1 == e2
+evalCompOp CmpGt e1 e2 = fromBool $ e1 > e2
+evalCompOp CmpGtE e1 e2 = fromBool $ e1 >= e2
+evalCompOp CmpLt e1 e2 = fromBool $ e1 < e2
+evalCompOp CmpLtE e1 e2 = fromBool $ e1 <= e2
+
+toBool :: Int -> Bool
+toBool 0 = False
+toBool _ = True
+
+fromBool :: Bool -> Int
+fromBool False = 0
+fromBool True = 1
+
+-- ---------------------------------------------------------------------
+
+v0 :: Int
+v0 = eval (Plus (IntVal 1) (IntVal 3))
+
+v1 :: Int
+v1 = eval (Comp CmpGt (IntVal 4) (IntVal 3))


=====================================
compiler/GHC/Parser/PreProcess/Lexer.x
=====================================
@@ -0,0 +1,132 @@
+{
+module GHC.Parser.PreProcess.Lexer (lex_tok, lexCppTokenStream ) where
+
+import GHC.Parser.PreProcess.ParserM (
+                St, init_pos,
+                ParserM (..), Action, mkTv, Token(..), start_code,
+                setStartCode,
+                show_pos, position,
+                AlexInput(..), alexGetByte)
+import qualified GHC.Parser.PreProcess.ParserM as ParserM (input)
+import Control.Monad
+import GHC.Prelude
+
+
+-- The lexer is based on
+-- https://timsong-cpp.github.io/cppwp/n4140/lex.pptoken
+}
+
+words :-
+
+    <0> $white+             ;
+---------------------------------------
+
+    <0>         "{"                  { mkTv TOpenBrace }
+    <0>         "}"                  { mkTv TCloseBrace }
+    <0>         "["                  { mkTv TOpenBracket }
+    <0>         "]"                  { mkTv TCloseBracket }
+    <0>         "#"                  { mkTv THash }
+    <0>         "##"                 { mkTv THashHash }
+    <0>         "("                  { mkTv TOpenParen }
+    <0>         ")"                  { mkTv TCloseParen }
+    <0>         "<:"                 { mkTv TLtColon }
+    <0>         ":>"                 { mkTv TColonGt}
+    <0>         "<%"                 { mkTv TLtPercent }
+    <0>         "%>"                 { mkTv TPercentGt }
+    <0>         "%:"                 { mkTv TPercentColon }
+    <0>         "%:%:"               { mkTv TPercentColonTwice }
+    <0>         ";"                  { mkTv TSemi }
+    <0>         ":"                  { mkTv TColon }
+    <0>         "..."                { mkTv TDotDotDot }
+    <0>         "new"                { mkTv TNew }
+    <0>         "delete"             { mkTv TDelete }
+    <0>         "?"                  { mkTv TQuestion }
+    <0>         "::"                 { mkTv TColonColon}
+    <0>         "."                  { mkTv TDot }
+    <0>         ".*"                 { mkTv TDotStar }
+    <0>         "+"                  { mkTv TPlus }
+    <0>         "-"                  { mkTv TMinus }
+    <0>         "*"                  { mkTv TStar }
+    <0>         "/"                  { mkTv TSlash }
+    <0>         "%"                  { mkTv TPercent }
+    <0>         "^"                  { mkTv TUpArrow }
+    <0>         "&"                  { mkTv TAmpersand }
+    <0>         "|"                  { mkTv TPipe }
+    <0>         "~"                  { mkTv TTilde }
+    <0>         "!"                  { mkTv TExclamation }
+    <0>         "="                  { mkTv TEqual }
+    <0>         "<"                  { mkTv TOpenAngle }
+    <0>         ">"                  { mkTv TCloseAngle }
+    <0>         "+="                 { mkTv TPlusEqual }
+    <0>         "-="                 { mkTv TMinusEqual }
+    <0>         "*="                 { mkTv TStarEqual }
+    <0>         "/="                 { mkTv TSlashEqual }
+    <0>         "%="                 { mkTv TPercentEqual }
+    <0>         "^="                 { mkTv TUpEqual }
+    <0>         "&="                 { mkTv TAmpersandEqual }
+    <0>         "|="                 { mkTv TPipeEqual }
+    <0>         "<<"                 { mkTv TLtLt }
+    <0>         ">>"                 { mkTv TGtGt }
+    <0>         ">>="                { mkTv TGtGtEqual }
+    <0>         "<<="                { mkTv TLtLtEqual }
+    <0>         "=="                 { mkTv TEqualEqual }
+    <0>         "!="                 { mkTv TExclaimEqual }
+    <0>         "<="                 { mkTv TLtEqual }
+    <0>         ">="                 { mkTv TGtEqual }
+    <0>         "&&"                 { mkTv TAmpersandTwice }
+    <0>         "||"                 { mkTv TPipePipe }
+    <0>         "++"                 { mkTv TPlusPlus }
+    <0>         "--"                 { mkTv TMinusMinus }
+    <0>         ","                  { mkTv TComma }
+    <0>         "->*"                { mkTv TMinusGtStar }
+    <0>         "->"                 { mkTv TMinusGt }
+    <0>         "and"                { mkTv TAnd }
+    <0>         "and_eq"             { mkTv TAndEq }
+    <0>         "bitand"             { mkTv TBitand }
+    <0>         "bitor"              { mkTv TBitor }
+    <0>         "compl"              { mkTv TCompl }
+    <0>         "not"                { mkTv TNot }
+    <0>         "not_eq"             { mkTv TNotEq }
+    <0>         "or"                 { mkTv TOr }
+    <0>         "or_eq"              { mkTv TOrEq }
+    <0>         "xor"                { mkTv TXor }
+    <0>         "xor_eq"             { mkTv TXorEq }
+----------------------------------------
+    <0>         [a-z][a-zA-Z0-9\#_]* { mkTv TLowerName }
+    <0>         [A-Z][a-zA-Z0-9\#_]* { mkTv TUpperName }
+    <0>         \-? [0-9][0-9]*      { mkTv TInteger  }
+    <0>         \" [^\"]* \"         { mkTv (TString . tail . init) }
+    <0>         ()                   { begin other }
+
+    <other>     .+                   { \i -> do {setStartCode 0;
+                                                 mkTv TOther i} }
+
+{
+
+begin :: Int -> Action
+begin sc _str =
+  do setStartCode sc
+     get_tok
+
+get_tok :: ParserM Token
+get_tok = ParserM $ \i st ->
+   case alexScan i (start_code st) of
+       AlexEOF -> Right (i, st, TEOF "")
+       AlexError _ -> Left ("Lexical error at " ++ show_pos (position i))
+       AlexSkip i' _ -> case get_tok of
+                            ParserM f -> f i' st
+       AlexToken i' l a -> case a $ take l $ ParserM.input i of
+                               ParserM f -> f i' st
+
+lex_tok :: (Token -> ParserM a) -> ParserM a
+lex_tok cont = get_tok >>= cont
+
+lexCppTokenStream :: String -> St -> Either String (AlexInput, St, [Token])
+lexCppTokenStream s = unParserM go (AlexInput init_pos [] s)
+    where
+    go = do
+      ltok <- lex_tok return
+      case ltok of
+        TEOF _ -> return []
+        _ -> liftM (ltok:) go
+}


=====================================
compiler/GHC/Parser/PreProcess/Macro.hs
=====================================
@@ -0,0 +1,134 @@
+module GHC.Parser.PreProcess.Macro where
+
+-- From https://gcc.gnu.org/onlinedocs/cpp/Macros.html
+
+{-
+
+A macro is a fragment of code which has been given a name. Whenever
+the name is used, it is replaced by the contents of the macro. There
+are two kinds of macros. They differ mostly in what they look like
+when they are used. Object-like macros resemble data objects when
+used, function-like macros resemble function calls.
+
+... the preprocessor operator `defined` can never be defined as a macro
+
+If the expansion of a macro contains its own name, either directly or
+via intermediate macros, it is not expanded again when the expansion
+is examined for more macros. See
+https://gcc.gnu.org/onlinedocs/cpp/Self-Referential-Macros.html for
+details
+
+-}
+
+-- TODO: Parse tokens with original locations in them.
+
+import Data.Map qualified as Map
+import Data.Maybe
+
+import GHC.Parser.PreProcess.Eval
+import GHC.Parser.PreProcess.ParsePP
+import GHC.Parser.PreProcess.Parser qualified as Parser
+import GHC.Parser.PreProcess.ParserM
+import GHC.Parser.PreProcess.Types
+import GHC.Prelude
+
+-- ---------------------------------------------------------------------
+
+process :: PpState -> Input -> (PpState, Output)
+process s str = (s0, o)
+  where
+    o = case parseDirective str of
+        Left _ -> undefined
+        Right r -> r
+    s0 = case o of
+        CppDefine name rhs -> define s name rhs
+        CppInclude _ -> undefined
+        CppIfdef name -> ifdef s name
+        CppIf ifstr -> cppIf s ifstr
+        CppIfndef name -> ifndef s name
+        CppElse -> undefined
+        CppEndif -> undefined
+
+-- ---------------------------------------------------------------------
+
+define :: PpState -> String -> MacroDef -> PpState
+define s name toks = s{pp_defines = Map.insert (MacroName name Nothing) toks (pp_defines s)}
+
+ifdef :: PpState -> String -> PpState
+ifdef s name =
+    case Map.lookup (MacroName name Nothing) (pp_defines s) of
+        Just _ -> s{pp_accepting = True}
+        _ -> s{pp_accepting = False}
+
+ifndef :: PpState -> String -> PpState
+ifndef s name =
+    case Map.lookup (MacroName name Nothing) (pp_defines s) of
+        Just _ -> s{pp_accepting = False}
+        _ -> s{pp_accepting = True}
+
+cppIf :: PpState -> String -> PpState
+cppIf s str = r
+  where
+    expanded = expand s str
+    -- toks0 = cppLex expanded
+    -- r = error (show toks0)
+    v = case Parser.parseExpr expanded of
+        Left err -> error $ show err
+        Right tree -> eval tree
+    --    We evaluate to an Int, which we convert to a bool
+    r = s{pp_accepting = toBool v}
+
+-- ---------------------------------------------------------------------
+
+expand :: PpState -> String -> String
+expand s str = expanded
+  where
+    -- TODO: repeat until re-expand or fixpoint
+    toks = case cppLex str of
+        Left err -> error err
+        Right tks -> tks
+    expanded = concatMap (expandOne s) toks
+
+expandOne :: PpState -> Token -> String
+expandOne s tok = r
+  where
+    -- TODO: protect against looking up `define`
+    r =
+        fromMaybe
+            (t_str tok)
+            (Map.lookup (MacroName (t_str tok) Nothing) (pp_defines s))
+
+-- ---------------------------------------------------------------------
+
+m0 :: (PpState, Output)
+m0 = do
+    let (s0, _) = process initPpState "#define FOO 3"
+    let (s1, _) = process s0 "#ifdef FOO"
+    process s1 "# if FOO == 4"
+
+-- ---------------------------------------------------------------------
+
+m1 :: Either String [Token]
+m1 = cppLex "`"
+
+m2 :: Either String [Token]
+m2 = cppLex "hello(5)"
+
+m3 :: Either String [Token]
+m3 = cppLex "#define FOO(m1,m2,m) ((m1) <  1 || (m1) == 1 && (m2) <  7 || (m1) == 1 && (m2) == 7 && (m) <= 0)"
+
+-- Right [THash {t_str = "#"}
+--       ,TDefine {t_str = "define"}
+--       ,TUpperName {t_str = "FOO"}
+--       ,TOpenParen {t_str = "("}
+--       ,TLowerName {t_str = "m1"}
+--       ,TComma {t_str = ","}
+--       ,TLowerName {t_str = "m2"}
+--       ,TComma {t_str = ","}
+--       ,TLowerName {t_str = "m"}
+--       ,TCloseParen {t_str = ")"}
+--       ,TOther {t_str = " ((m1) <  1 || (m1) == 1 && (m2) <  7 || (m1) == 1 && (m2) == 7 && (m) <= 0)"}
+--       ]
+
+m4 :: Either String [Token]
+m4 = cppLex "#if (m < 1)"


=====================================
compiler/GHC/Parser/PreProcess/ParsePP.hs
=====================================
@@ -0,0 +1,95 @@
+module GHC.Parser.PreProcess.ParsePP (
+    cppLex,
+    parseDirective,
+) where
+
+import Data.List
+import GHC.Parser.Errors.Ppr ()
+
+import GHC.Parser.PreProcess.Lexer
+import GHC.Parser.PreProcess.ParserM (Token (..), init_state)
+import GHC.Parser.PreProcess.Types
+import GHC.Prelude
+
+-- =====================================================================
+-- First parse to CPP tokens, using a C++-like language spec
+-- https://gcc.gnu.org/onlinedocs/cpp/Tokenization.html
+
+-- Parse a CPP directive, using tokens from the CPP lexer
+parseDirective :: String -> Either String CppDirective
+parseDirective s =
+    case cppLex s of
+        Left e -> Left e
+        Right toks ->
+            case map t_str toks of
+                ("#" : "define" : ts) -> cppDefine ts
+                ("#" : "include" : ts) -> Right $ cppInclude ts
+                ("#" : "if" : ts) -> Right $ cppIf ts
+                ("#" : "ifndef" : ts) -> Right $ cppIfndef ts
+                ("#" : "ifdef" : ts) -> Right $ cppIfdef ts
+                ("#" : "else" : ts) -> Right $ cppElse ts
+                ("#" : "endif" : ts) -> Right $ cppEndif ts
+                other -> Left ("unexpected directive: " ++ (intercalate " " other))
+
+cppDefine :: [String] -> Either String CppDirective
+cppDefine [] = Left "error:empty #define directive"
+cppDefine (n : ts) = Right $ CppDefine n (intercalate " " ts)
+
+cppInclude :: [String] -> CppDirective
+cppInclude ts = CppInclude (intercalate " " ts)
+
+cppIf :: [String] -> CppDirective
+cppIf ts = CppIf (intercalate " " ts)
+
+cppIfdef :: [String] -> CppDirective
+cppIfdef ts = CppIfdef (intercalate " " ts)
+
+cppIfndef :: [String] -> CppDirective
+cppIfndef ts = CppIfndef (intercalate " " ts)
+
+cppElse :: [String] -> CppDirective
+cppElse _ts = CppElse
+
+cppEndif :: [String] -> CppDirective
+cppEndif _ts = CppEndif
+
+-- ---------------------------------------------------------------------
+
+cppLex :: String -> Either String [Token]
+cppLex s = case lexCppTokenStream s init_state of
+    Left err -> Left err
+    Right (_inp, _st, toks) -> Right toks
+
+-- ---------------------------------------------------------------------
+
+doATest :: String -> Either String CppDirective
+doATest str = parseDirective str
+
+-- doATest str = parseDirectiveOld str
+
+t0 :: Either String CppDirective
+t0 = doATest "#define FOO(m1,m2,m) ((m1) <  1 || (m1) == 1 && (m2) <  7 || (m1) == 1 && (m2) == 7 && (m) <= 0)"
+
+-- Right (CppDefine "FOO(m1,m2,m)" ["((m1)","<","1","||","(m1)","==","1","&&","(m2)","<","7","||","(m1)","==","1","&&","(m2)","==","7","&&","(m)","<=","0)"])
+
+t1 :: Either String CppDirective
+t1 = doATest "#if (m < 1)"
+
+t2 :: Either String CppDirective
+t2 = doATest "# if ((m1) <  1 || (m1) == 1 && (m2) <  7 || (m1) == 1 && (m2) == 7 && (m) <= 0)"
+
+-- Right (CppIf
+--        (Logic LogicalOr
+--         (Logic LogicalOr
+--          (Comp CmpLt (Var "m1") (IntVal 1))
+--          (Logic LogicalAnd
+--           (Comp CmpEqual (Var "m1") (IntVal 1))
+--           (Comp CmpLt (Var "m2") (IntVal 7))))
+--         (Logic LogicalAnd
+--           (Logic LogicalAnd
+--            (Comp CmpEqual (Var "m1") (IntVal 1))
+--            (Comp CmpEqual (Var "m2") (IntVal 7)))
+--           (Comp CmpLtE (Var "m") (IntVal 0)))))
+
+t3 :: Either String CppDirective
+t3 = parseDirective "# if FOO == 4"


=====================================
compiler/GHC/Parser/PreProcess/Parser.y
=====================================
@@ -0,0 +1,131 @@
+{
+module GHC.Parser.PreProcess.Parser (parseExpr) where
+
+import GHC.Parser.PreProcess.Lexer (lex_tok)
+import GHC.Parser.PreProcess.ParserM (Token(..), ParserM, run_parser, get_pos, show_pos,
+                happyError)
+import GHC.Parser.PreProcess.Types
+import GHC.Prelude
+-- Needed when invoking happy -ad
+-- import qualified GHC.Internal.Data.Tuple as Happy_Prelude
+}
+
+%name      expr
+%expect    0
+%tokentype { Token }
+%monad     { ParserM }
+%lexer     { lex_tok } { TEOF "" }
+
+%token
+
+    '{'                { TOpenBrace {} }
+    '}'                { TCloseBrace {} }
+    '['                { TOpenBracket {} }
+    ']'                { TCloseBracket {} }
+    '#'                { THash {} }
+    '##'               { THashHash {} }
+    '('                { TOpenParen {} }
+    ')'                { TCloseParen {} }
+    '<:'               { TLtColon {} }
+    ':>'               { TColonGt{} }
+    '<%'               { TLtPercent {} }
+    '%>'               { TPercentGt {} }
+    '%:'               { TPercentColon {} }
+    '%:%:'             { TPercentColonTwice {} }
+    ';'                { TSemi {} }
+    ':'                { TColon {} }
+    '...'              { TDotDotDot {} }
+    'new'              { TNew {} }
+    'delete'           { TDelete {} }
+    '?'                { TQuestion {} }
+    '::'               { TColonColon{} }
+    '.'                { TDot {} }
+    '.*'               { TDotStar {} }
+    '+'                { TPlus {} }
+    '-'                { TMinus {} }
+    '*'                { TStar {} }
+    '/'                { TSlash {} }
+    '%'                { TPercent {} }
+    '^'                { TUpArrow {} }
+    '&'                { TAmpersand {} }
+    '|'                { TPipe {} }
+    '~'                { TTilde {} }
+    '!'                { TExclamation {} }
+    '='                { TEqual {} }
+    '<'                { TOpenAngle {} }
+    '>'                { TCloseAngle {} }
+    '+='               { TPlusEqual {} }
+    '-='               { TMinusEqual {} }
+    '*='               { TStarEqual {} }
+    '/='               { TSlashEqual {} }
+    '%='               { TPercentEqual {} }
+    '^='               { TUpEqual {} }
+    '&='               { TAmpersandEqual {} }
+    '|='               { TPipeEqual {} }
+    '<<'               { TLtLt {} }
+    '>>'               { TGtGt {} }
+    '>>='              { TGtGtEqual {} }
+    '<<='              { TLtLtEqual {} }
+    '=='               { TEqualEqual {} }
+    '!='               { TExclaimEqual {} }
+    '<='               { TLtEqual {} }
+    '>='               { TGtEqual {} }
+    '&&'               { TAmpersandTwice {} }
+    '||'               { TPipePipe {} }
+    '++'               { TPlusPlus {} }
+    '--'               { TMinusMinus {} }
+    ','                { TComma {} }
+    '->*'              { TMinusGtStar {} }
+    '->'               { TMinusGt {} }
+    'and'              { TAnd {} }
+    'and_eq'           { TAndEq {} }
+    'bitand'           { TBitand {} }
+    'bitor'            { TBitor {} }
+    'compl'            { TCompl {} }
+    'not'              { TNot {} }
+    'not_eq'           { TNotEq {} }
+    'or'               { TOr {} }
+    'or_eq'            { TOrEq {} }
+    'xor'              { TXor {} }
+    'xor_eq'           { TXorEq {} }
+
+    lower_name         { TLowerName {} }
+    upper_name         { TUpperName {} }
+    integer            { TInteger {} }
+    string             { TString {} }
+    other              { TOther {} }
+
+-- Operator precedence. Earlier in the table is lower
+-- Note: this seems to require all the operators to appear in the same
+-- rule.
+%left '||'
+%left '&&'
+%left '=='
+%left '>' '>=' '<' '<='
+%left '+' '-'
+%left '*' '/'
+
+%%
+
+expr :: { Expr }
+expr : variable           { $1 }
+     | integer            { IntVal (read $ t_str $1) }
+     | '(' expr ')'       { $2 }
+     | expr '||' expr     { Logic LogicalOr $1 $3 }
+     | expr '&&' expr     { Logic LogicalAnd $1 $3 }
+     | expr '==' expr     { Comp CmpEqual $1 $3 }
+     | expr '>'  expr     { Comp CmpGt $1 $3 }
+     | expr '>=' expr     { Comp CmpGtE $1 $3 }
+     | expr '<'  expr     { Comp CmpLt $1 $3 }
+     | expr '<=' expr     { Comp CmpLtE $1 $3 }
+
+variable :: {Expr}
+variable : name { Var $1 }
+
+name : lower_name { t_str $1 }
+     | upper_name { t_str $1 }
+
+{
+-- parseExpr :: String -> Either String Expr
+parseExpr = run_parser expr
+}


=====================================
compiler/GHC/Parser/PreProcess/ParserM.hs
=====================================
@@ -0,0 +1,256 @@
+{-# LANGUAGE BinaryLiterals #-}
+
+-- Support functions for the CPP Lexer.x
+
+module GHC.Parser.PreProcess.ParserM (
+    -- Parser Monad
+    ParserM (..),
+    AlexInput (..),
+    run_parser,
+    -- Parser state
+    St,
+    init_state,
+    StartCode,
+    start_code,
+    setStartCode,
+    -- Tokens
+    Token (..),
+    -- Actions
+    Action,
+    andBegin,
+    mkT,
+    mkTv,
+    -- Positions
+    init_pos,
+    get_pos,
+    show_pos,
+    -- Input
+    alexGetByte,
+    alexInputPrevChar,
+    -- Other
+    happyError,
+) where
+
+import Control.Applicative
+
+import Control.Monad.Fail (MonadFail (..))
+import Prelude hiding (fail)
+
+import Control.Monad (ap, liftM)
+import Data.Bits (shiftR, (.&.), (.|.))
+import Data.Char (ord)
+import Data.Word (Word8)
+
+-- Parser Monad
+newtype ParserM a = ParserM {unParserM :: AlexInput -> St -> Either String (AlexInput, St, a)}
+
+-- newtype P a       = P     { unP :: PState -> ParseResult a }
+
+instance Functor ParserM where
+    fmap = liftM
+
+instance Applicative ParserM where
+    pure a = ParserM $ \i s -> Right (i, s, a)
+    (<*>) = ap
+
+instance Monad ParserM where
+    ParserM m >>= k = ParserM $ \i s -> case m i s of
+        Right (i', s', x) ->
+            case k x of
+                ParserM y -> y i' s'
+        Left err ->
+            Left err
+
+instance MonadFail ParserM where
+    fail err = ParserM $ \_ _ -> Left err
+
+run_parser :: ParserM a -> (String -> Either String a)
+run_parser (ParserM f) =
+    \s -> case f (AlexInput init_pos [] s) init_state of
+        Left es -> Left es
+        Right (_, _, x) -> Right x
+
+-- Parser state
+
+data St = St
+    { start_code :: !StartCode
+    , brace_depth :: !Int
+    }
+    deriving (Show)
+type StartCode = Int
+
+init_state :: St
+init_state =
+    St
+        { start_code = 0
+        , brace_depth = 0
+        }
+
+-- Tokens
+
+data Token
+    = TEOF {t_str :: String}
+    | TOpenBrace {t_str :: String}
+    | TCloseBrace {t_str :: String}
+    | TOpenBracket {t_str :: String}
+    | TCloseBracket {t_str :: String}
+    | THash {t_str :: String}
+    | THashHash {t_str :: String}
+    | TOpenParen {t_str :: String}
+    | TCloseParen {t_str :: String}
+    | TLtColon {t_str :: String}
+    | TColonGt {t_str :: String}
+    | TLtPercent {t_str :: String}
+    | TPercentGt {t_str :: String}
+    | TPercentColon {t_str :: String}
+    | TPercentColonTwice {t_str :: String}
+    | TSemi {t_str :: String}
+    | TColon {t_str :: String}
+    | TDotDotDot {t_str :: String}
+    | TNew {t_str :: String}
+    | TDelete {t_str :: String}
+    | TQuestion {t_str :: String}
+    | TColonColon {t_str :: String}
+    | TDot {t_str :: String}
+    | TDotStar {t_str :: String}
+    | TPlus {t_str :: String}
+    | TMinus {t_str :: String}
+    | TStar {t_str :: String}
+    | TSlash {t_str :: String}
+    | TPercent {t_str :: String}
+    | TUpArrow {t_str :: String}
+    | TAmpersand {t_str :: String}
+    | TPipe {t_str :: String}
+    | TTilde {t_str :: String}
+    | TExclamation {t_str :: String}
+    | TEqual {t_str :: String}
+    | TOpenAngle {t_str :: String}
+    | TCloseAngle {t_str :: String}
+    | TPlusEqual {t_str :: String}
+    | TMinusEqual {t_str :: String}
+    | TStarEqual {t_str :: String}
+    | TSlashEqual {t_str :: String}
+    | TPercentEqual {t_str :: String}
+    | TUpEqual {t_str :: String}
+    | TAmpersandEqual {t_str :: String}
+    | TPipeEqual {t_str :: String}
+    | TLtLt {t_str :: String}
+    | TGtGt {t_str :: String}
+    | TGtGtEqual {t_str :: String}
+    | TLtLtEqual {t_str :: String}
+    | TEqualEqual {t_str :: String}
+    | TExclaimEqual {t_str :: String}
+    | TLtEqual {t_str :: String}
+    | TGtEqual {t_str :: String}
+    | TAmpersandTwice {t_str :: String}
+    | TPipePipe {t_str :: String}
+    | TPlusPlus {t_str :: String}
+    | TMinusMinus {t_str :: String}
+    | TComma {t_str :: String}
+    | TMinusGtStar {t_str :: String}
+    | TMinusGt {t_str :: String}
+    | TAnd {t_str :: String}
+    | TAndEq {t_str :: String}
+    | TBitand {t_str :: String}
+    | TBitor {t_str :: String}
+    | TCompl {t_str :: String}
+    | TNot {t_str :: String}
+    | TNotEq {t_str :: String}
+    | TOr {t_str :: String}
+    | TOrEq {t_str :: String}
+    | TXor {t_str :: String}
+    | TXorEq {t_str :: String}
+    | TLowerName {t_str :: String}
+    | TUpperName {t_str :: String}
+    | TString {t_str :: String}
+    | TInteger {t_str :: String}
+    | TOther {t_str :: String}
+    deriving (Show)
+
+-- Actions
+
+type Action = String -> ParserM Token
+
+setStartCode :: StartCode -> ParserM ()
+setStartCode sc = ParserM $ \i st -> Right (i, st{start_code = sc}, ())
+
+andBegin :: Action -> StartCode -> Action
+(act `andBegin` sc) x = do
+    setStartCode sc
+    act x
+
+mkT :: Token -> Action
+mkT t = mkTv (const t)
+
+mkTv :: (String -> Token) -> Action
+mkTv f str = ParserM (\i st -> Right (i, st, f str))
+
+-- begin :: Int -> Action
+-- begin sc _span _buf _len _buf2 =
+--   do setStartCode sc
+--      lex_tok
+
+-- Positions
+
+data Pos = Pos !Int {- Line -} !Int {- Column -}
+    deriving (Show)
+
+get_pos :: ParserM Pos
+get_pos = ParserM $ \i@(AlexInput p _ _) st -> Right (i, st, p)
+
+alexMove :: Pos -> Char -> Pos
+alexMove (Pos l _) '\n' = Pos (l + 1) 1
+alexMove (Pos l c) '\t' = Pos l ((c + 8) `div` 8 * 8)
+alexMove (Pos l c) _ = Pos l (c + 1)
+
+init_pos :: Pos
+init_pos = Pos 1 0
+
+show_pos :: Pos -> String
+show_pos (Pos l c) = "line " ++ show l ++ ", column " ++ show c
+
+-- Input
+
+data AlexInput = AlexInput
+    { position :: !Pos
+    , char_bytes :: [Word8]
+    , input :: String
+    }
+    deriving (Show)
+
+alexGetByte :: AlexInput -> Maybe (Word8, AlexInput)
+alexGetByte (AlexInput p (w : ws) cs) =
+    Just (w, AlexInput p ws cs)
+alexGetByte (AlexInput p [] (c : cs)) =
+    alexGetByte (AlexInput (alexMove p c) (utf8_encode c) cs)
+alexGetByte (AlexInput _ [] []) =
+    Nothing
+
+-- annoyingly, this doesn't seem to exist anywhere else as a standalone function
+utf8_encode :: Char -> [Word8]
+utf8_encode c = case ord c of
+    n
+        | n < 0x80 -> [fromIntegral n]
+        | n < 0x800 ->
+            [ fromIntegral $ 0b11000000 .|. (n `shiftR` 6)
+            , fromIntegral $ 0b10000000 .|. (n .&. 0b111111)
+            ]
+        | n < 0x10000 ->
+            [ fromIntegral $ 0b11100000 .|. (n `shiftR` 12)
+            , fromIntegral $ 0b10000000 .|. ((n `shiftR` 6) .&. 0b111111)
+            , fromIntegral $ 0b10000000 .|. (n .&. 0b111111)
+            ]
+        | otherwise ->
+            [ fromIntegral $ 0b11110000 .|. (n `shiftR` 18)
+            , fromIntegral $ 0b10000000 .|. ((n `shiftR` 12) .&. 0b111111)
+            , fromIntegral $ 0b10000000 .|. ((n `shiftR` 6) .&. 0b111111)
+            , fromIntegral $ 0b10000000 .|. (n .&. 0b111111)
+            ]
+
+alexInputPrevChar :: AlexInput -> Char
+alexInputPrevChar _ = error "Lexer doesn't implement alexInputPrevChar"
+
+happyError :: ParserM a
+happyError = do
+    p <- get_pos
+    fail $ "Parse error at " ++ show_pos p


=====================================
compiler/GHC/Parser/PreProcess/Types.hs
=====================================
@@ -0,0 +1,85 @@
+module GHC.Parser.PreProcess.Types where
+
+import GHC.Data.StringBuffer
+import GHC.Parser.Lexer (Token (..))
+import qualified GHC.Parser.Lexer as Lexer
+import GHC.Types.SrcLoc
+
+import Data.Map (Map)
+import qualified Data.Map as Map
+import GHC.Prelude
+
+-- ---------------------------------------------------------------------
+
+initPpState :: PpState
+initPpState =
+    PpState
+        { pp_defines = Map.empty
+        , pp_includes = Map.empty
+        , pp_include_stack = []
+        , pp_continuation = []
+        , pp_context = []
+        , pp_accepting = True
+        }
+
+data PpState = PpState
+    { pp_defines :: !(Map MacroName MacroDef)
+    , pp_includes :: !(Map String StringBuffer)
+    , pp_include_stack :: ![Lexer.AlexInput]
+    , pp_continuation :: ![Located Token]
+    , pp_context :: ![Token]
+    -- ^ What preprocessor directive we are currently processing
+    , pp_accepting :: !Bool
+    }
+    deriving (Show)
+
+-- ---------------------------------------------------------------------
+
+data CppDirective
+    = CppInclude String
+    | CppDefine String String
+    | CppIfdef String
+    | CppIfndef String
+    | CppIf String
+    | CppElse
+    | CppEndif
+    deriving (Show, Eq)
+
+-- ---------------------------------------------------------------------
+
+type MacroArgs = [String]
+data MacroName = MacroName String (Maybe MacroArgs)
+    deriving (Show, Eq, Ord)
+type MacroDef = String
+
+type Input = String
+type Output = CppDirective
+
+-- ---------------------------------------------------------------------
+-- Expression language
+-- NOTE: need to take care of macro expansion while parsing. Or perhaps before?
+
+data Expr
+    = Parens Expr
+    | Var String
+    | IntVal Int
+    | Plus Expr Expr
+    | Times Expr Expr
+    | Logic LogicOp Expr Expr
+    | Comp CompOp Expr Expr
+    deriving (Show, Eq)
+
+data LogicOp
+    = LogicalOr
+    | LogicalAnd
+    deriving (Show, Eq)
+
+data CompOp
+    = CmpEqual
+    | CmpGt
+    | CmpGtE
+    | CmpLt
+    | CmpLtE
+    deriving (Show, Eq)
+
+-- ---------------------------------------------------------------------


=====================================
compiler/ghc.cabal.in
=====================================
@@ -649,9 +649,16 @@ Library
         GHC.Parser.Lexer.Interface
         GHC.Parser.Lexer.String
         GHC.Parser.HaddockLex
-        GHC.Parser.PreProcess
         GHC.Parser.PostProcess
         GHC.Parser.PostProcess.Haddock
+        GHC.Parser.PreProcess
+        GHC.Parser.PreProcess.Eval
+        GHC.Parser.PreProcess.Lexer
+        GHC.Parser.PreProcess.Macro
+        GHC.Parser.PreProcess.ParsePP
+        GHC.Parser.PreProcess.ParserM
+        GHC.Parser.PreProcess.Parser
+        GHC.Parser.PreProcess.Types
         GHC.Parser.String
         GHC.Parser.Types
         GHC.Parser.Utils


=====================================
hadrian/src/Rules/SourceDist.hs
=====================================
@@ -187,6 +187,8 @@ prepareTree dest = do
         , (stage0InTree , compiler,      "GHC/Parser/Lexer.x", "GHC/Parser/Lexer.hs")
         , (stage0InTree , compiler,      "GHC/Parser/Lexer/String.x", "GHC/Parser/Lexer/String.hs")
         , (stage0InTree , compiler,      "GHC/Parser/HaddockLex.x", "GHC/Parser/HaddockLex.hs")
+        , (stage0InTree , compiler,      "GHC/Parser/PreProcess/Parser.y", "GHC/Parser/PreProcess/Parser.hs")
+        , (stage0InTree , compiler,      "GHC/Parser/PreProcess/Lexer.x",  "GHC/Parser/PreProcess/Lexer.hs")
         , (stage0InTree , hpcBin,        "src/Trace/Hpc/Parser.y", "src/Trace/Hpc/Parser.hs")
         , (stage0InTree , genprimopcode, "Parser.y",           "Parser.hs")
         , (stage0InTree , genprimopcode, "Lexer.x",            "Lexer.hs")


=====================================
utils/check-cpp/ParsePP.hs
=====================================
@@ -1,10 +1,6 @@
 module ParsePP (
-    -- parseCppParser,
-    -- plusTimesExpr,
     cppLex,
     parseDirective,
-    -- testing, delete
-    -- cppDefinition,
 ) where
 
 import Data.Char
@@ -13,14 +9,6 @@ import Control.Monad (void)
 import Data.Functor.Identity
 import Debug.Trace
 import GHC.Parser.Errors.Ppr ()
-import Text.Parsec
-import qualified Text.Parsec as Parsec
-import Text.Parsec.Char as PS
-import Text.Parsec.Combinator as PS
-import qualified Text.Parsec.Expr as E
-import Text.Parsec.Language (emptyDef)
-import Text.Parsec.Prim as PS hiding (token)
-import qualified Text.Parsec.Token as P
 import Data.List
 
 import qualified Parser
@@ -32,31 +20,6 @@ import Lexer
 -- First parse to CPP tokens, using a C++-like language spec
 -- https://gcc.gnu.org/onlinedocs/cpp/Tokenization.html
 
-lexer :: P.TokenParser ()
-lexer = P.makeTokenParser exprDef
-
-exprDef :: P.LanguageDef st
-exprDef =
-    emptyDef
-        { P.commentStart = "/*"
-        , P.commentEnd = "*/"
-        , P.commentLine = "//"
-        , P.nestedComments = False
-        , P.identStart = letter <|> char '_'
-        , P.identLetter = alphaNum <|> oneOf "_'"
-        , P.opStart = P.opLetter exprDef
-        , P.opLetter = oneOf ":!#$%&*+./<=>?@\\^|-~"
-        , P.reservedOpNames = []
-        , P.reservedNames = []
-        , P.caseSensitive = True
-        }
-
--- =====================================================================
--- ---------------------------------------------------------------------
-
-type CppParser = Parsec String ()
-
-
 -- Parse a CPP directive, using tokens from the CPP lexer
 parseDirective :: String -> Either String CppDirective
 parseDirective s =
@@ -94,158 +57,6 @@ cppLex s = case lexCppTokenStream s init_state of
 
 -- ---------------------------------------------------------------------
 
--- parseDirectiveOld :: String -> Either Parsec.ParseError CppDirective
--- parseDirectiveOld = parseCppParser cppDirective
-
--- parseCppParser :: CppParser a -> String -> Either Parsec.ParseError a
--- parseCppParser p = PS.parse p ""
-
--- -- TODO: delete this
--- cppDefinition :: CppParser (String, [String])
--- cppDefinition = do
---     _ <- PS.char '#'
---     _ <- whiteSpace
---     eToken "define"
---     name <- cppToken
---     definition <- cppTokens
---     return (name, definition)
-
--- cppDirective :: CppParser CppDirective
--- cppDirective = do
---     _ <- PS.char '#'
---     _ <- whiteSpace
---     choice
---         [ cppKw "define" >> cmdDefinition
---         , try $ cppKw "include" >> cmdInclude
---         , try $ cppKw "ifdef" >> cmdIfdef
---         , try $ cppKw "ifndef" >> cmdIfndef
---         -- , try $ cppKw "if" >> cmdIf
---         , try $ cppKw "else" >> return CppElse
---         , cppKw "endif" >> return CppEndif
---         -- , cppKw "elif" CppElifKw
---         -- , cppKw "undef" CppUndefKw
---         -- , cppKw "error" CppErrorKw
---         ]
-
--- cmdInclude :: CppParser CppDirective
--- cmdInclude = do
---     _ <- string "\""
---     filename <- many1 (satisfy (\c -> not (isSpace c || c == '"')))
---     _ <- string "\""
---     return $ CppInclude filename
-
--- cmdDefinition :: CppParser CppDirective
--- cmdDefinition = do
---     name <- cppToken
---     CppDefine name <$> cppTokens
-
--- cmdIfdef :: CppParser CppDirective
--- cmdIfdef = CppIfdef <$> cppToken
-
--- cmdIfndef :: CppParser CppDirective
--- cmdIfndef = CppIfndef <$> cppToken
-
--- cmdIf :: CppParser CppDirective
--- cmdIf = CppIf <$> cppTokens
-
--- cppKw :: String -> CppParser ()
--- cppKw kw = void $ lexeme (PS.string kw)
-
--- cppComment :: CppParser ()
--- cppComment = do
---     _ <- PS.string "/*"
---     _ <- PS.manyTill PS.anyChar (PS.try (PS.string "*/"))
---     return ()
-
--- whiteSpace :: CppParser ()
--- whiteSpace = do
---     _ <- PS.many (PS.choice [cppComment, void PS.space])
---     return ()
-
--- lexeme :: CppParser a -> CppParser a
--- lexeme p = p <* whiteSpace
-
--- cppToken :: CppParser String
--- cppToken = lexeme (PS.many1 (PS.satisfy (not . isSpace)))
-
--- cppTokens :: CppParser [String]
--- cppTokens = PS.many cppToken
-
--- -- -------------------------------------
-
--- plusTimesExpr :: CppParser Expr
--- plusTimesExpr = E.buildExpressionParser eTable eTerm
-
--- eTable :: [[E.Operator String () Data.Functor.Identity.Identity Expr]]
--- eTable =
---   []
-    -- -- Via https://learn.microsoft.com/en-us/cpp/cpp/cpp-built-in-operators-precedence-and-associativity?view=msvc-170
-    -- [ [E.Infix (Times <$ symbol "*") E.AssocLeft]
-    -- , [E.Infix (Plus <$ symbol "+") E.AssocLeft]
-    -- ,
-    --     [ E.Infix (try $ BinOp CmpLtE <$ symbol "<=") E.AssocLeft
-    --     , E.Infix (try $ BinOp CmpGtE <$ symbol ">=") E.AssocLeft
-    --     , E.Infix (BinOp CmpLt <$ symbol "<") E.AssocLeft
-    --     , E.Infix (BinOp CmpGt <$ symbol ">") E.AssocLeft
-    --     ]
-    -- , [E.Infix (BinOp CmpEqual <$ symbol "==") E.AssocLeft]
-    -- , [E.Infix (BinOp LogicalAnd <$ symbol "&&") E.AssocLeft]
-    -- , [E.Infix (BinOp LogicalOr <$ symbol "||") E.AssocLeft]
-    -- ]
-
--- eTerm :: CppParser Expr
--- eTerm =
---     eVariable -- <|> pteNum
---         <|> pteParens
---         <|> eInteger
-
--- pteParens :: CppParser Expr
--- pteParens = Parens <$> between (symbol "(") (symbol ")") plusTimesExpr
-
--- symbol :: String -> CppParser String
--- symbol s = lexeme $ string s
-
--- -- -------------------------------------
-
--- eExpr :: CppParser Expr
--- eExpr = choice [eParens, eBinOp, eVariable]
-
--- eParens :: CppParser Expr
--- eParens = P.parens lexer $ do
---     Parens <$> eExpr
-
--- eBinOp :: CppParser Expr
--- eBinOp = do
---     e1 <- eExpr
---     op <- eOp
---     -- _ <- cppToken
---     -- let op = Or
---     BinOp op e1 <$> eExpr
-
--- eOp :: CppParser Op
--- eOp = do
---     -- op <- P.operator lexer
---     op <- P.operator (trace "foo" lexer)
---     return $ trace ("op=" ++ show op) LogicalOr
-
--- -- TODO: Do we need this? the expression should be fully expanded by
--- -- the time we get it
--- eVariable :: CppParser Expr
--- eVariable = do
---     v <- P.identifier lexer
---     return $ Var v
-
--- eToken :: String -> CppParser ()
--- eToken = P.reserved lexer
-
--- eInteger :: CppParser Expr
--- eInteger = IntVal <$> integer
-
--- integer :: CppParser Int
--- integer = read <$> lexeme (many1 digit)
-
--- ---------------------------------------------------------------------
-
 doATest :: String -> Either String CppDirective
 doATest str = parseDirective str
 -- doATest str = parseDirectiveOld str


=====================================
utils/check-cpp/PreProcess.hs
=====================================
@@ -21,7 +21,6 @@ import Types
 
 -- ---------------------------------------------------------------------
 
-type PP = P PpState
 
 -- initPpState :: PpState
 -- initPpState =
@@ -96,6 +95,8 @@ ppLexer queueComments cont =
 
 -- ---------------------------------------------------------------------
 
+type PP = P PpState
+
 preprocessElse :: PP ()
 preprocessElse = do
     accepting <- getAccepting
@@ -280,10 +281,7 @@ cleanTokenString fs = r
 See Note [GhcCPP Initial Processing]
 -}
 cppInitial :: [FastString] -> String
-cppInitial fs = r
-  where
-    -- go fs' = reverse $ drop 2 $ reverse $ unpackFS fs'
-    r = concatMap unpackFS fs
+cppInitial fs = concatMap unpackFS fs
 
 {-
 Note [GhcCPP Initial Processing]


=====================================
utils/check-cpp/Types.hs
=====================================
@@ -51,15 +51,6 @@ data MacroName = MacroName String (Maybe MacroArgs)
     deriving (Show, Eq, Ord)
 type MacroDef = String
 
--- data PpState = PpState
---     { pp_defines :: !(Map MacroName MacroDef)
---     , pp_accepting :: !Bool
---     }
---     deriving (Show, Eq)
-
--- initPpState :: PpState
--- initPpState = PpState{pp_defines = Map.empty, pp_accepting = True}
-
 type Input = String
 type Output = CppDirective
 


=====================================
utils/haddock/haddock-api/src/Haddock/Parser.hs
=====================================
@@ -17,6 +17,7 @@ import GHC.Data.FastString (fsLit)
 import GHC.Data.StringBuffer (stringToStringBuffer)
 import GHC.Parser (parseIdentifier)
 import GHC.Parser.Lexer (ParseResult (PFailed, POk), ParserOpts, initParserState, unP)
+import GHC.Parser.PreProcess (initPpState)
 import GHC.Types.Name.Occurrence (occNameString)
 import GHC.Types.Name.Reader (RdrName (..))
 import GHC.Types.SrcLoc (GenLocated (..), mkRealSrcLoc)
@@ -45,7 +46,7 @@ parseIdent parserOpts ns str0 =
     PFailed{} -> Nothing
   where
     realSrcLc = mkRealSrcLoc (fsLit "<unknown file>") 0 0
-    pstate str = initParserState () parserOpts (stringToStringBuffer str) realSrcLc
+    pstate str = initParserState initPpState parserOpts (stringToStringBuffer str) realSrcLc
     (wrap, str1) = case str0 of
       '(' : s@(c : _)
         | c /= ','



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/237548092a8cedcc244412f1bf10a6a08f982de1

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/237548092a8cedcc244412f1bf10a6a08f982de1
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20250203/dd1fade3/attachment-0001.html>


More information about the ghc-commits mailing list