[Git][ghc/ghc][wip/az/ghc-cpp] First example working
Alan Zimmerman (@alanz)
gitlab at gitlab.haskell.org
Mon Feb 3 22:55:34 UTC 2025
Alan Zimmerman pushed to branch wip/az/ghc-cpp at Glasgow Haskell Compiler / GHC
Commits:
23754809 by Alan Zimmerman at 2025-02-03T22:54:04+00:00
First example working
Loading Example1.hs into ghci, getting the right results
```
{-# LANGUAGE GHC_CPP #-}
module Example1 where
y = 3
x =
"hello"
"bye now"
foo = putStrLn x
```
- - - - -
19 changed files:
- compiler/GHC/Driver/Session.hs
- compiler/GHC/Parser.hs-boot
- compiler/GHC/Parser.y
- compiler/GHC/Parser/HaddockLex.x
- compiler/GHC/Parser/Lexer.x
- compiler/GHC/Parser/PreProcess.hs
- + compiler/GHC/Parser/PreProcess/Eval.hs
- + compiler/GHC/Parser/PreProcess/Lexer.x
- + compiler/GHC/Parser/PreProcess/Macro.hs
- + compiler/GHC/Parser/PreProcess/ParsePP.hs
- + compiler/GHC/Parser/PreProcess/Parser.y
- + compiler/GHC/Parser/PreProcess/ParserM.hs
- + compiler/GHC/Parser/PreProcess/Types.hs
- compiler/ghc.cabal.in
- hadrian/src/Rules/SourceDist.hs
- utils/check-cpp/ParsePP.hs
- utils/check-cpp/PreProcess.hs
- utils/check-cpp/Types.hs
- utils/haddock/haddock-api/src/Haddock/Parser.hs
Changes:
=====================================
compiler/GHC/Driver/Session.hs
=====================================
@@ -295,6 +295,7 @@ import Text.ParserCombinators.ReadP as R
import qualified GHC.Data.EnumSet as EnumSet
import qualified GHC.LanguageExtensions as LangExt
+import GHC.Parser.PreProcess (initPpState)
-- Note [Updating flag description in the User's Guide]
@@ -3168,7 +3169,7 @@ setMainIs arg = parse parse_main_f arg
POk _ (L _ re) -> callback re
-- dummy parser state.
- p_state str = initParserState ()
+ p_state str = initParserState initPpState
(mkParserOpts mempty emptyDiagOpts False False False True)
(stringToStringBuffer str)
(mkRealSrcLoc (mkFastString []) 1 1)
=====================================
compiler/GHC/Parser.hs-boot
=====================================
@@ -3,5 +3,6 @@ module GHC.Parser where
import GHC.Types.Name.Reader (RdrName)
import GHC.Parser.Lexer (P)
import GHC.Parser.Annotation (LocatedN)
+import GHC.Parser.PreProcess (PpState)
-parseIdentifier :: P p (LocatedN RdrName)
+parseIdentifier :: P PpState (LocatedN RdrName)
=====================================
compiler/GHC/Parser.y
=====================================
@@ -83,9 +83,10 @@ import GHC.Core.DataCon ( DataCon, dataConName )
import GHC.Parser.PostProcess
import GHC.Parser.PostProcess.Haddock
--- import GHC.Parser.Lexer hiding (lexer, lexerDbg)
-import GHC.Parser.Lexer
-import GHC.Parser.PreProcess hiding (lexer, lexerDbg)
+import GHC.Parser.Lexer hiding (lexer, lexerDbg)
+-- import GHC.Parser.Lexer
+-- import GHC.Parser.PreProcess hiding (lexer, lexerDbg)
+import GHC.Parser.PreProcess
import GHC.Parser.HaddockLex
import GHC.Parser.Annotation
import GHC.Parser.Errors.Types
@@ -775,7 +776,8 @@ TH_QQUASIQUOTE { L _ (ITqQuasiQuote _) }
-- '#else' { L _ ITcppElse }
-- '#endif' { L _ ITcppEndif }
-%monad { P p } { >>= } { return }
+-- %monad { P p } { >>= } { return }
+%monad { P PpState } { >>= } { return }
%lexer { (lexer True) } { L _ ITeof }
-- %lexer { (lexerDbg True) } { L _ ITeof }
-- Replace 'lexer' above with 'lexerDbg'
@@ -4185,7 +4187,7 @@ bars :: { ([EpToken "|"],Int) } -- One or more bars
| '|' { ([epTok $1],1) }
{
-happyError :: P p a
+happyError :: P PpState a
happyError = srcParseFail
getVARID (L _ (ITvarid x)) = x
@@ -4290,7 +4292,7 @@ hasE (L _ (ITopenExpQuote HasE _)) = True
hasE (L _ (ITopenTExpQuote HasE)) = True
hasE _ = False
-getSCC :: Located Token -> P p FastString
+getSCC :: Located Token -> P PpState FastString
getSCC lt = do let s = getSTRING lt
-- We probably actually want to be more restrictive than this
if ' ' `elem` unpackFS s
@@ -4410,7 +4412,7 @@ incorrect.
-- Make a source location for the file. We're a bit lazy here and just
-- make a point SrcSpan at line 1, column 0. Strictly speaking we should
-- try to find the span of the whole file (ToDo).
-fileSrcSpan :: P p SrcSpan
+fileSrcSpan :: P PpState SrcSpan
fileSrcSpan = do
l <- getRealSrcLoc;
let loc = mkSrcLoc (srcLocFile l) 1 1;
@@ -4443,13 +4445,13 @@ hintOrPats pat = do
return pat
-- Hint about the MultiWayIf extension
-hintMultiWayIf :: SrcSpan -> P p ()
+hintMultiWayIf :: SrcSpan -> P PpState ()
hintMultiWayIf span = do
mwiEnabled <- getBit MultiWayIfBit
unless mwiEnabled $ addError $ mkPlainErrorMsgEnvelope span PsErrMultiWayIf
-- Hint about explicit-forall
-hintExplicitForall :: Located Token -> P p ()
+hintExplicitForall :: Located Token -> P PpState ()
hintExplicitForall tok = do
explicit_forall_enabled <- getBit ExplicitForallBit
in_rule_prag <- getBit InRulePragBit
@@ -4458,7 +4460,7 @@ hintExplicitForall tok = do
PsErrExplicitForall (isUnicode tok)
-- Hint about qualified-do
-hintQualifiedDo :: Located Token -> P p ()
+hintQualifiedDo :: Located Token -> P PpState ()
hintQualifiedDo tok = do
qualifiedDo <- getBit QualifiedDoBit
case maybeQDoDoc of
@@ -4475,7 +4477,7 @@ hintQualifiedDo tok = do
-- When two single quotes don't followed by tyvar or gtycon, we report the
-- error as empty character literal, or TH quote that missing proper type
-- variable or constructor. See #13450.
-reportEmptyDoubleQuotes :: SrcSpan -> P p a
+reportEmptyDoubleQuotes :: SrcSpan -> P PpState a
reportEmptyDoubleQuotes span = do
thQuotes <- getBit ThQuotesBit
addFatalError $ mkPlainErrorMsgEnvelope span $ PsErrEmptyDoubleQuotes thQuotes
@@ -4530,7 +4532,7 @@ n2l :: LocatedN a -> LocatedA a
n2l (L !la !a) = L (l2l la) a
-- Called at the very end to pick up the EOF position, as well as any comments not allocated yet.
-acsFinal :: (EpAnnComments -> Maybe (RealSrcSpan, RealSrcSpan) -> Located a) -> P p (Located a)
+acsFinal :: (EpAnnComments -> Maybe (RealSrcSpan, RealSrcSpan) -> Located a) -> P PpState (Located a)
acsFinal a = do
let (L l _) = a emptyComments Nothing
!cs <- getCommentsFor l
@@ -4561,7 +4563,7 @@ amsA' (L l a) = do
!cs <- getCommentsFor l
return (L (EpAnn (spanAsAnchor l) noAnn cs) a)
--- acsExpr :: (EpAnnComments -> LHsExpr GhcPs) -> P p ECP
+-- acsExpr :: (EpAnnComments -> LHsExpr GhcPs) -> P PpState ECP
-- acsExpr a = do { expr :: (LHsExpr GhcPs) <- runPV $ acsa a
-- ; return (ecpFromExp $ expr) }
@@ -4588,7 +4590,7 @@ amsr (L l a) an = do
-- This and the signature module parser are the only parser entry points that
-- deal with Haddock comments. The other entry points ('parseDeclaration',
-- 'parseExpression', etc) do not insert them into the AST.
-parseModule :: P p (Located (HsModule GhcPs))
+parseModule :: P PpState (Located (HsModule GhcPs))
parseModule = parseModuleNoHaddock >>= addHaddockToModule
-- | Parse a Haskell signature module with Haddock comments. This is done in two
@@ -4600,7 +4602,7 @@ parseModule = parseModuleNoHaddock >>= addHaddockToModule
-- This and the module parser are the only parser entry points that deal with
-- Haddock comments. The other entry points ('parseDeclaration',
-- 'parseExpression', etc) do not insert them into the AST.
-parseSignature :: P p (Located (HsModule GhcPs))
+parseSignature :: P PpState (Located (HsModule GhcPs))
parseSignature = parseSignatureNoHaddock >>= addHaddockToModule
commentsA :: (NoAnn ann) => SrcSpan -> EpAnnComments -> EpAnn ann
@@ -4614,7 +4616,7 @@ spanWithComments l = do
-- | Instead of getting the *enclosed* comments, this includes the
-- *preceding* ones. It is used at the top level to get comments
-- between top level declarations.
-commentsPA :: (NoAnn ann) => LocatedAn ann a -> P p (LocatedAn ann a)
+commentsPA :: (NoAnn ann) => LocatedAn ann a -> P PpState (LocatedAn ann a)
commentsPA la@(L l a) = do
!cs <- getPriorCommentsFor (getLocA la)
return (L (addCommentsToEpAnn l cs) a)
=====================================
compiler/GHC/Parser/HaddockLex.x
=====================================
@@ -10,6 +10,7 @@ import GHC.Hs.Doc
import GHC.Parser.Lexer hiding (AlexInput)
import GHC.Parser.Lexer.Interface (adjustChar)
import GHC.Parser.Annotation
+import GHC.Parser.PreProcess.Types (PpState(..), initPpState)
import GHC.Types.SrcLoc
import GHC.Types.SourceText
import GHC.Data.StringBuffer
@@ -131,7 +132,7 @@ advanceSrcLocBS !loc bs = case utf8UnconsByteString bs of
Just (c, bs') -> advanceSrcLocBS (advanceSrcLoc loc c) bs'
-- | Lex 'StringLiteral' for warning messages
-lexStringLiteral :: P () (LocatedN RdrName) -- ^ A precise identifier parser
+lexStringLiteral :: P PpState (LocatedN RdrName) -- ^ A precise identifier parser
-> Located StringLiteral
-> Located (WithHsDocIdentifiers StringLiteral GhcPs)
lexStringLiteral identParser (L l sl@(StringLiteral _ fs _))
@@ -149,7 +150,7 @@ lexStringLiteral identParser (L l sl@(StringLiteral _ fs _))
fakeLoc = mkRealSrcLoc nilFS 0 0
-- | Lex identifiers from a docstring.
-lexHsDoc :: P () (LocatedN RdrName) -- ^ A precise identifier parser
+lexHsDoc :: P PpState (LocatedN RdrName) -- ^ A precise identifier parser
-> HsDocString
-> HsDoc GhcPs
lexHsDoc identParser doc =
@@ -169,7 +170,7 @@ lexHsDoc identParser doc =
fakeLoc = mkRealSrcLoc nilFS 0 0
-validateIdentWith :: P () (LocatedN RdrName) -> SrcSpan -> ByteString -> Maybe (Located RdrName)
+validateIdentWith :: P PpState (LocatedN RdrName) -> SrcSpan -> ByteString -> Maybe (Located RdrName)
validateIdentWith identParser mloc str0 =
let -- These ParserFlags should be as "inclusive" as possible, allowing
-- identifiers defined with any language extension.
@@ -182,7 +183,7 @@ validateIdentWith identParser mloc str0 =
realSrcLc = case mloc of
RealSrcSpan loc _ -> realSrcSpanStart loc
UnhelpfulSpan _ -> mkRealSrcLoc nilFS 0 0
- pstate = initParserState () pflags buffer realSrcLc
+ pstate = initParserState initPpState pflags buffer realSrcLc
in case unP identParser pstate of
POk _ name -> Just $ case mloc of
RealSrcSpan _ _ -> reLoc name
=====================================
compiler/GHC/Parser/Lexer.x
=====================================
@@ -1259,8 +1259,8 @@ cppToken code span buf len _buf2 =
-- check if the string ends with backslash and newline
-- NOTE: performance likely sucks, make it work for now
(len0, continue) <- case (reverse $ unpackFS tokStr) of
- -- ('\n':'\\':_) -> pushLexState code >> return (len -2, True)
- ('\n':'\\':_) -> pushLexState (trace ("cppToken: push state") code) >> return (len - 2, True)
+ ('\n':'\\':_) -> pushLexState code >> return (len -2, True)
+ -- ('\n':'\\':_) -> pushLexState (trace ("cppToken: push state") code) >> return (len - 2, True)
('\n':_) -> return (len - 1, False)
_ -> return (len, False)
return (L span (ITcpp continue $! lexemeToFastString buf len0))
@@ -1281,8 +1281,8 @@ cppTokenCont code span buf len _buf2 =
cppTokenPop :: (FastString -> Token)-> Action p
cppTokenPop t span buf len _buf2 =
do _ <- popLexState
- -- return (L span (t $! lexemeToFastString buf (len - 1)))
- return (L span (t $! lexemeToFastString buf (trace "cppTokenPop" len)))
+ -- return (L span (t $! lexemeToFastString buf (trace "cppTokenPop" len)))
+ return (L span (t $! lexemeToFastString buf len))
popCpp :: Action p
popCpp _span _buf _len _buf2 =
=====================================
compiler/GHC/Parser/PreProcess.hs
=====================================
@@ -25,6 +25,9 @@ import GHC.Data.StringBuffer
import GHC.Parser.Errors.Ppr ()
import GHC.Parser.Lexer (P (..), PState (..), ParseResult (..), Token (..))
import GHC.Parser.Lexer qualified as Lexer
+import GHC.Parser.PreProcess.Macro
+import GHC.Parser.PreProcess.ParsePP
+import GHC.Parser.PreProcess.Types
import GHC.Prelude
import GHC.Types.SrcLoc
@@ -38,46 +41,6 @@ initPragState = Lexer.initPragState initPpState
initParserState :: Lexer.ParserOpts -> StringBuffer -> RealSrcLoc -> PState PpState
initParserState = Lexer.initParserState initPpState
-initPpState :: PpState
-initPpState =
- PpState
- { pp_defines = Map.empty
- , pp_includes = Map.empty
- , pp_include_stack = []
- , pp_continuation = []
- , pp_context = []
- , pp_accepting = True
- }
-
-data PpState = PpState
- { pp_defines :: !(Map MacroName MacroDef)
- , pp_includes :: !(Map String StringBuffer)
- , pp_include_stack :: ![Lexer.AlexInput]
- , pp_continuation :: ![Located Token]
- , pp_context :: ![Token] -- ^ What preprocessor directive we are currently processing
- , pp_accepting :: !Bool
- }
- deriving (Show)
-
--- ---------------------------------------------------------------------
-
-data CppDirective
- = CppInclude String
- | CppDefine String [String]
- | CppIfdef String
- | CppIfndef String
- | CppIf [String]
- | CppElse
- | CppEndif
- deriving (Show, Eq)
-
--- ---------------------------------------------------------------------
-
-type MacroArgs = [String]
-data MacroName = MacroName String (Maybe MacroArgs)
- deriving (Show, Eq, Ord)
-type MacroDef = String
-
-- ---------------------------------------------------------------------
data CppState
@@ -87,21 +50,8 @@ data CppState
-- ---------------------------------------------------------------------
-{-
-lexer, lexerDbg :: Bool -> (Located Token -> P PpState a) -> P PpState a
--- bypass for now, work in ghci
-lexer = Lexer.lexer
-lexerDbg = Lexer.lexerDbg
-
-setAccepting :: Bool -> P PpState ()
-setAccepting on =
- P $ \s -> POk s{pp = (pp s){pp_accepting = on}} ()
-
-getAccepting :: P PpState Bool
-getAccepting = P $ \s -> POk s (pp_accepting (pp s))
--}
-
--- ---------------------------------------------------------------------
+lexer = ppLexer
+lexerDbg = ppLexerDbg
ppLexer, ppLexerDbg :: Bool -> (Located Token -> PP a) -> PP a
-- Use this instead of 'lexer' in GHC.Parser to dump the tokens for debugging.
@@ -113,16 +63,16 @@ ppLexer queueComments cont =
queueComments
( \tk ->
let
- contInner t = (trace ("ppLexer: tk=" ++ show (unLoc tk, unLoc t)) cont) t
- -- contPush = pushContext (unLoc tk) >> contIgnoreTok tk
+ -- contInner t = (trace ("ppLexer: tk=" ++ show (unLoc tk, unLoc t)) cont) t
+ contInner t = cont t
contIgnoreTok (L l tok) = do
case l of
RealSrcSpan r (Strict.Just b) -> Lexer.queueIgnoredToken (L (PsSpan r b) tok)
_ -> return ()
ppLexer queueComments cont
in
- -- case tk of
- case (trace ("M.ppLexer:tk=" ++ show (unLoc tk)) tk) of
+ case tk of
+ -- case (trace ("M.ppLexer:tk=" ++ show (unLoc tk)) tk) of
L _ ITeof -> do
mInp <- popIncludeLoc
case mInp of
@@ -173,7 +123,7 @@ processCpp fs = do
-- let s = cppInitial fs
let s = cppInitial fs
case parseDirective s of
- Left err -> error $ show err
+ Left err -> error $ show (err, s)
Right (CppInclude filename) -> do
ppInclude filename
Right (CppDefine name def) -> do
@@ -287,26 +237,26 @@ ppInclude filename = do
pushIncludeLoc origInput
let loc = PsLoc (mkRealSrcLoc (mkFastString filename) 1 1) (BufPos 0)
Lexer.setInput (Lexer.AI loc src)
- return $ trace ("ppInclude:mSrc=[" ++ show mSrc ++ "]") ()
+ -- return $ trace ("ppInclude:mSrc=[" ++ show mSrc ++ "]") ()
-- return $ trace ("ppInclude:filename=[" ++ filename ++ "]") ()
-ppDefine :: String -> [String] -> PP ()
+ppDefine :: String -> String -> PP ()
ppDefine name val = P $ \s ->
- -- POk s{pp = (pp s){pp_defines = Set.insert (cleanTokenString def) (pp_defines (pp s))}} ()
- POk s{pp = (pp s){pp_defines = Map.insert (trace ("ppDefine:def=[" ++ name ++ "]") (MacroName name Nothing)) val (pp_defines (pp s))}} ()
+ -- POk s{pp = (pp s){pp_defines = Map.insert (trace ("ppDefine:def=[" ++ name ++ "]") (MacroName name Nothing)) val (pp_defines (pp s))}} ()
+ POk s{pp = (pp s){pp_defines = Map.insert (MacroName name Nothing) val (pp_defines (pp s))}} ()
ppIsDefined :: String -> PP Bool
ppIsDefined def = P $ \s ->
- -- POk s (Map.member def (pp_defines (pp s)))
- POk s (Map.member (trace ("ppIsDefined:def=[" ++ def ++ "]") (MacroName def Nothing)) (pp_defines (pp s)))
+ POk s (Map.member (MacroName def Nothing) (pp_defines (pp s)))
+ -- POk s (Map.member (trace ("ppIsDefined:def=[" ++ def ++ "]") (MacroName def Nothing)) (pp_defines (pp s)))
-ppIf :: [String] -> PP Bool
-ppIf toks = P $ \s ->
+ppIf :: String -> PP Bool
+ppIf str = P $ \s ->
-- -- POk s (Map.member def (pp_defines (pp s)))
-- POk s (Map.member (trace ("ppIsDefined:def=[" ++ def ++ "]") def) (pp_defines (pp s)))
let
- s' = cppIf (pp s) toks
+ s' = cppIf (pp s) str
in
POk s{pp = s'} (pp_accepting s')
@@ -317,13 +267,13 @@ cleanTokenString fs = r
ss = dropWhile (\c -> not $ isSpace c) (unpackFS fs)
r = init ss
-parseDefine :: FastString -> Maybe (String, [String])
-parseDefine fs = r
- where
- -- r = Just (cleanTokenString s, "")
- r = case parseCppParser cppDefinition (unpackFS fs) of
- Left _ -> Nothing
- Right v -> Just v
+-- parseDefine :: FastString -> Maybe (String, [String])
+-- parseDefine fs = r
+-- where
+-- -- r = Just (cleanTokenString s, "")
+-- r = case parseCppParser cppDefinition (unpackFS fs) of
+-- Left _ -> Nothing
+-- Right v -> Just v
-- =====================================================================
@@ -331,9 +281,7 @@ parseDefine fs = r
See Note [GhcCPP Initial Processing]
-}
cppInitial :: [FastString] -> String
-cppInitial fs = r
- where
- r = concatMap unpackFS fs
+cppInitial fs = concatMap unpackFS fs
{-
Note [GhcCPP Initial Processing]
=====================================
compiler/GHC/Parser/PreProcess/Eval.hs
=====================================
@@ -0,0 +1,43 @@
+module GHC.Parser.PreProcess.Eval where
+
+import GHC.Parser.PreProcess.Types
+import GHC.Prelude
+
+-- ---------------------------------------------------------------------
+
+eval :: Expr -> Int
+eval (Parens e) = eval e
+eval (Var v) = error $ "need to look up :" ++ v
+eval (IntVal i) = i
+eval (Plus e1 e2) = (eval e1) + (eval e2)
+eval (Times e1 e2) = (eval e1) * (eval e2)
+eval (Logic op e1 e2) = evalLogicOp op (eval e1) (eval e2)
+eval (Comp op e1 e2) = evalCompOp op (eval e1) (eval e2)
+
+evalLogicOp :: LogicOp -> Int -> Int -> Int
+evalLogicOp LogicalOr e1 e2 = fromBool $ (toBool e1) || (toBool e2)
+evalLogicOp LogicalAnd e1 e2 = fromBool $ (toBool e1) || (toBool e2)
+
+
+evalCompOp :: CompOp -> Int -> Int -> Int
+evalCompOp CmpEqual e1 e2 = fromBool $ e1 == e2
+evalCompOp CmpGt e1 e2 = fromBool $ e1 > e2
+evalCompOp CmpGtE e1 e2 = fromBool $ e1 >= e2
+evalCompOp CmpLt e1 e2 = fromBool $ e1 < e2
+evalCompOp CmpLtE e1 e2 = fromBool $ e1 <= e2
+
+toBool :: Int -> Bool
+toBool 0 = False
+toBool _ = True
+
+fromBool :: Bool -> Int
+fromBool False = 0
+fromBool True = 1
+
+-- ---------------------------------------------------------------------
+
+v0 :: Int
+v0 = eval (Plus (IntVal 1) (IntVal 3))
+
+v1 :: Int
+v1 = eval (Comp CmpGt (IntVal 4) (IntVal 3))
=====================================
compiler/GHC/Parser/PreProcess/Lexer.x
=====================================
@@ -0,0 +1,132 @@
+{
+module GHC.Parser.PreProcess.Lexer (lex_tok, lexCppTokenStream ) where
+
+import GHC.Parser.PreProcess.ParserM (
+ St, init_pos,
+ ParserM (..), Action, mkTv, Token(..), start_code,
+ setStartCode,
+ show_pos, position,
+ AlexInput(..), alexGetByte)
+import qualified GHC.Parser.PreProcess.ParserM as ParserM (input)
+import Control.Monad
+import GHC.Prelude
+
+
+-- The lexer is based on
+-- https://timsong-cpp.github.io/cppwp/n4140/lex.pptoken
+}
+
+words :-
+
+ <0> $white+ ;
+---------------------------------------
+
+ <0> "{" { mkTv TOpenBrace }
+ <0> "}" { mkTv TCloseBrace }
+ <0> "[" { mkTv TOpenBracket }
+ <0> "]" { mkTv TCloseBracket }
+ <0> "#" { mkTv THash }
+ <0> "##" { mkTv THashHash }
+ <0> "(" { mkTv TOpenParen }
+ <0> ")" { mkTv TCloseParen }
+ <0> "<:" { mkTv TLtColon }
+ <0> ":>" { mkTv TColonGt}
+ <0> "<%" { mkTv TLtPercent }
+ <0> "%>" { mkTv TPercentGt }
+ <0> "%:" { mkTv TPercentColon }
+ <0> "%:%:" { mkTv TPercentColonTwice }
+ <0> ";" { mkTv TSemi }
+ <0> ":" { mkTv TColon }
+ <0> "..." { mkTv TDotDotDot }
+ <0> "new" { mkTv TNew }
+ <0> "delete" { mkTv TDelete }
+ <0> "?" { mkTv TQuestion }
+ <0> "::" { mkTv TColonColon}
+ <0> "." { mkTv TDot }
+ <0> ".*" { mkTv TDotStar }
+ <0> "+" { mkTv TPlus }
+ <0> "-" { mkTv TMinus }
+ <0> "*" { mkTv TStar }
+ <0> "/" { mkTv TSlash }
+ <0> "%" { mkTv TPercent }
+ <0> "^" { mkTv TUpArrow }
+ <0> "&" { mkTv TAmpersand }
+ <0> "|" { mkTv TPipe }
+ <0> "~" { mkTv TTilde }
+ <0> "!" { mkTv TExclamation }
+ <0> "=" { mkTv TEqual }
+ <0> "<" { mkTv TOpenAngle }
+ <0> ">" { mkTv TCloseAngle }
+ <0> "+=" { mkTv TPlusEqual }
+ <0> "-=" { mkTv TMinusEqual }
+ <0> "*=" { mkTv TStarEqual }
+ <0> "/=" { mkTv TSlashEqual }
+ <0> "%=" { mkTv TPercentEqual }
+ <0> "^=" { mkTv TUpEqual }
+ <0> "&=" { mkTv TAmpersandEqual }
+ <0> "|=" { mkTv TPipeEqual }
+ <0> "<<" { mkTv TLtLt }
+ <0> ">>" { mkTv TGtGt }
+ <0> ">>=" { mkTv TGtGtEqual }
+ <0> "<<=" { mkTv TLtLtEqual }
+ <0> "==" { mkTv TEqualEqual }
+ <0> "!=" { mkTv TExclaimEqual }
+ <0> "<=" { mkTv TLtEqual }
+ <0> ">=" { mkTv TGtEqual }
+ <0> "&&" { mkTv TAmpersandTwice }
+ <0> "||" { mkTv TPipePipe }
+ <0> "++" { mkTv TPlusPlus }
+ <0> "--" { mkTv TMinusMinus }
+ <0> "," { mkTv TComma }
+ <0> "->*" { mkTv TMinusGtStar }
+ <0> "->" { mkTv TMinusGt }
+ <0> "and" { mkTv TAnd }
+ <0> "and_eq" { mkTv TAndEq }
+ <0> "bitand" { mkTv TBitand }
+ <0> "bitor" { mkTv TBitor }
+ <0> "compl" { mkTv TCompl }
+ <0> "not" { mkTv TNot }
+ <0> "not_eq" { mkTv TNotEq }
+ <0> "or" { mkTv TOr }
+ <0> "or_eq" { mkTv TOrEq }
+ <0> "xor" { mkTv TXor }
+ <0> "xor_eq" { mkTv TXorEq }
+----------------------------------------
+ <0> [a-z][a-zA-Z0-9\#_]* { mkTv TLowerName }
+ <0> [A-Z][a-zA-Z0-9\#_]* { mkTv TUpperName }
+ <0> \-? [0-9][0-9]* { mkTv TInteger }
+ <0> \" [^\"]* \" { mkTv (TString . tail . init) }
+ <0> () { begin other }
+
+ <other> .+ { \i -> do {setStartCode 0;
+ mkTv TOther i} }
+
+{
+
+begin :: Int -> Action
+begin sc _str =
+ do setStartCode sc
+ get_tok
+
+get_tok :: ParserM Token
+get_tok = ParserM $ \i st ->
+ case alexScan i (start_code st) of
+ AlexEOF -> Right (i, st, TEOF "")
+ AlexError _ -> Left ("Lexical error at " ++ show_pos (position i))
+ AlexSkip i' _ -> case get_tok of
+ ParserM f -> f i' st
+ AlexToken i' l a -> case a $ take l $ ParserM.input i of
+ ParserM f -> f i' st
+
+lex_tok :: (Token -> ParserM a) -> ParserM a
+lex_tok cont = get_tok >>= cont
+
+lexCppTokenStream :: String -> St -> Either String (AlexInput, St, [Token])
+lexCppTokenStream s = unParserM go (AlexInput init_pos [] s)
+ where
+ go = do
+ ltok <- lex_tok return
+ case ltok of
+ TEOF _ -> return []
+ _ -> liftM (ltok:) go
+}
=====================================
compiler/GHC/Parser/PreProcess/Macro.hs
=====================================
@@ -0,0 +1,134 @@
+module GHC.Parser.PreProcess.Macro where
+
+-- From https://gcc.gnu.org/onlinedocs/cpp/Macros.html
+
+{-
+
+A macro is a fragment of code which has been given a name. Whenever
+the name is used, it is replaced by the contents of the macro. There
+are two kinds of macros. They differ mostly in what they look like
+when they are used. Object-like macros resemble data objects when
+used, function-like macros resemble function calls.
+
+... the preprocessor operator `defined` can never be defined as a macro
+
+If the expansion of a macro contains its own name, either directly or
+via intermediate macros, it is not expanded again when the expansion
+is examined for more macros. See
+https://gcc.gnu.org/onlinedocs/cpp/Self-Referential-Macros.html for
+details
+
+-}
+
+-- TODO: Parse tokens with original locations in them.
+
+import Data.Map qualified as Map
+import Data.Maybe
+
+import GHC.Parser.PreProcess.Eval
+import GHC.Parser.PreProcess.ParsePP
+import GHC.Parser.PreProcess.Parser qualified as Parser
+import GHC.Parser.PreProcess.ParserM
+import GHC.Parser.PreProcess.Types
+import GHC.Prelude
+
+-- ---------------------------------------------------------------------
+
+process :: PpState -> Input -> (PpState, Output)
+process s str = (s0, o)
+ where
+ o = case parseDirective str of
+ Left _ -> undefined
+ Right r -> r
+ s0 = case o of
+ CppDefine name rhs -> define s name rhs
+ CppInclude _ -> undefined
+ CppIfdef name -> ifdef s name
+ CppIf ifstr -> cppIf s ifstr
+ CppIfndef name -> ifndef s name
+ CppElse -> undefined
+ CppEndif -> undefined
+
+-- ---------------------------------------------------------------------
+
+define :: PpState -> String -> MacroDef -> PpState
+define s name toks = s{pp_defines = Map.insert (MacroName name Nothing) toks (pp_defines s)}
+
+ifdef :: PpState -> String -> PpState
+ifdef s name =
+ case Map.lookup (MacroName name Nothing) (pp_defines s) of
+ Just _ -> s{pp_accepting = True}
+ _ -> s{pp_accepting = False}
+
+ifndef :: PpState -> String -> PpState
+ifndef s name =
+ case Map.lookup (MacroName name Nothing) (pp_defines s) of
+ Just _ -> s{pp_accepting = False}
+ _ -> s{pp_accepting = True}
+
+cppIf :: PpState -> String -> PpState
+cppIf s str = r
+ where
+ expanded = expand s str
+ -- toks0 = cppLex expanded
+ -- r = error (show toks0)
+ v = case Parser.parseExpr expanded of
+ Left err -> error $ show err
+ Right tree -> eval tree
+ -- We evaluate to an Int, which we convert to a bool
+ r = s{pp_accepting = toBool v}
+
+-- ---------------------------------------------------------------------
+
+expand :: PpState -> String -> String
+expand s str = expanded
+ where
+ -- TODO: repeat until re-expand or fixpoint
+ toks = case cppLex str of
+ Left err -> error err
+ Right tks -> tks
+ expanded = concatMap (expandOne s) toks
+
+expandOne :: PpState -> Token -> String
+expandOne s tok = r
+ where
+ -- TODO: protect against looking up `define`
+ r =
+ fromMaybe
+ (t_str tok)
+ (Map.lookup (MacroName (t_str tok) Nothing) (pp_defines s))
+
+-- ---------------------------------------------------------------------
+
+m0 :: (PpState, Output)
+m0 = do
+ let (s0, _) = process initPpState "#define FOO 3"
+ let (s1, _) = process s0 "#ifdef FOO"
+ process s1 "# if FOO == 4"
+
+-- ---------------------------------------------------------------------
+
+m1 :: Either String [Token]
+m1 = cppLex "`"
+
+m2 :: Either String [Token]
+m2 = cppLex "hello(5)"
+
+m3 :: Either String [Token]
+m3 = cppLex "#define FOO(m1,m2,m) ((m1) < 1 || (m1) == 1 && (m2) < 7 || (m1) == 1 && (m2) == 7 && (m) <= 0)"
+
+-- Right [THash {t_str = "#"}
+-- ,TDefine {t_str = "define"}
+-- ,TUpperName {t_str = "FOO"}
+-- ,TOpenParen {t_str = "("}
+-- ,TLowerName {t_str = "m1"}
+-- ,TComma {t_str = ","}
+-- ,TLowerName {t_str = "m2"}
+-- ,TComma {t_str = ","}
+-- ,TLowerName {t_str = "m"}
+-- ,TCloseParen {t_str = ")"}
+-- ,TOther {t_str = " ((m1) < 1 || (m1) == 1 && (m2) < 7 || (m1) == 1 && (m2) == 7 && (m) <= 0)"}
+-- ]
+
+m4 :: Either String [Token]
+m4 = cppLex "#if (m < 1)"
=====================================
compiler/GHC/Parser/PreProcess/ParsePP.hs
=====================================
@@ -0,0 +1,95 @@
+module GHC.Parser.PreProcess.ParsePP (
+ cppLex,
+ parseDirective,
+) where
+
+import Data.List
+import GHC.Parser.Errors.Ppr ()
+
+import GHC.Parser.PreProcess.Lexer
+import GHC.Parser.PreProcess.ParserM (Token (..), init_state)
+import GHC.Parser.PreProcess.Types
+import GHC.Prelude
+
+-- =====================================================================
+-- First parse to CPP tokens, using a C++-like language spec
+-- https://gcc.gnu.org/onlinedocs/cpp/Tokenization.html
+
+-- Parse a CPP directive, using tokens from the CPP lexer
+parseDirective :: String -> Either String CppDirective
+parseDirective s =
+ case cppLex s of
+ Left e -> Left e
+ Right toks ->
+ case map t_str toks of
+ ("#" : "define" : ts) -> cppDefine ts
+ ("#" : "include" : ts) -> Right $ cppInclude ts
+ ("#" : "if" : ts) -> Right $ cppIf ts
+ ("#" : "ifndef" : ts) -> Right $ cppIfndef ts
+ ("#" : "ifdef" : ts) -> Right $ cppIfdef ts
+ ("#" : "else" : ts) -> Right $ cppElse ts
+ ("#" : "endif" : ts) -> Right $ cppEndif ts
+ other -> Left ("unexpected directive: " ++ (intercalate " " other))
+
+cppDefine :: [String] -> Either String CppDirective
+cppDefine [] = Left "error:empty #define directive"
+cppDefine (n : ts) = Right $ CppDefine n (intercalate " " ts)
+
+cppInclude :: [String] -> CppDirective
+cppInclude ts = CppInclude (intercalate " " ts)
+
+cppIf :: [String] -> CppDirective
+cppIf ts = CppIf (intercalate " " ts)
+
+cppIfdef :: [String] -> CppDirective
+cppIfdef ts = CppIfdef (intercalate " " ts)
+
+cppIfndef :: [String] -> CppDirective
+cppIfndef ts = CppIfndef (intercalate " " ts)
+
+cppElse :: [String] -> CppDirective
+cppElse _ts = CppElse
+
+cppEndif :: [String] -> CppDirective
+cppEndif _ts = CppEndif
+
+-- ---------------------------------------------------------------------
+
+cppLex :: String -> Either String [Token]
+cppLex s = case lexCppTokenStream s init_state of
+ Left err -> Left err
+ Right (_inp, _st, toks) -> Right toks
+
+-- ---------------------------------------------------------------------
+
+doATest :: String -> Either String CppDirective
+doATest str = parseDirective str
+
+-- doATest str = parseDirectiveOld str
+
+t0 :: Either String CppDirective
+t0 = doATest "#define FOO(m1,m2,m) ((m1) < 1 || (m1) == 1 && (m2) < 7 || (m1) == 1 && (m2) == 7 && (m) <= 0)"
+
+-- Right (CppDefine "FOO(m1,m2,m)" ["((m1)","<","1","||","(m1)","==","1","&&","(m2)","<","7","||","(m1)","==","1","&&","(m2)","==","7","&&","(m)","<=","0)"])
+
+t1 :: Either String CppDirective
+t1 = doATest "#if (m < 1)"
+
+t2 :: Either String CppDirective
+t2 = doATest "# if ((m1) < 1 || (m1) == 1 && (m2) < 7 || (m1) == 1 && (m2) == 7 && (m) <= 0)"
+
+-- Right (CppIf
+-- (Logic LogicalOr
+-- (Logic LogicalOr
+-- (Comp CmpLt (Var "m1") (IntVal 1))
+-- (Logic LogicalAnd
+-- (Comp CmpEqual (Var "m1") (IntVal 1))
+-- (Comp CmpLt (Var "m2") (IntVal 7))))
+-- (Logic LogicalAnd
+-- (Logic LogicalAnd
+-- (Comp CmpEqual (Var "m1") (IntVal 1))
+-- (Comp CmpEqual (Var "m2") (IntVal 7)))
+-- (Comp CmpLtE (Var "m") (IntVal 0)))))
+
+t3 :: Either String CppDirective
+t3 = parseDirective "# if FOO == 4"
=====================================
compiler/GHC/Parser/PreProcess/Parser.y
=====================================
@@ -0,0 +1,131 @@
+{
+module GHC.Parser.PreProcess.Parser (parseExpr) where
+
+import GHC.Parser.PreProcess.Lexer (lex_tok)
+import GHC.Parser.PreProcess.ParserM (Token(..), ParserM, run_parser, get_pos, show_pos,
+ happyError)
+import GHC.Parser.PreProcess.Types
+import GHC.Prelude
+-- Needed when invoking happy -ad
+-- import qualified GHC.Internal.Data.Tuple as Happy_Prelude
+}
+
+%name expr
+%expect 0
+%tokentype { Token }
+%monad { ParserM }
+%lexer { lex_tok } { TEOF "" }
+
+%token
+
+ '{' { TOpenBrace {} }
+ '}' { TCloseBrace {} }
+ '[' { TOpenBracket {} }
+ ']' { TCloseBracket {} }
+ '#' { THash {} }
+ '##' { THashHash {} }
+ '(' { TOpenParen {} }
+ ')' { TCloseParen {} }
+ '<:' { TLtColon {} }
+ ':>' { TColonGt{} }
+ '<%' { TLtPercent {} }
+ '%>' { TPercentGt {} }
+ '%:' { TPercentColon {} }
+ '%:%:' { TPercentColonTwice {} }
+ ';' { TSemi {} }
+ ':' { TColon {} }
+ '...' { TDotDotDot {} }
+ 'new' { TNew {} }
+ 'delete' { TDelete {} }
+ '?' { TQuestion {} }
+ '::' { TColonColon{} }
+ '.' { TDot {} }
+ '.*' { TDotStar {} }
+ '+' { TPlus {} }
+ '-' { TMinus {} }
+ '*' { TStar {} }
+ '/' { TSlash {} }
+ '%' { TPercent {} }
+ '^' { TUpArrow {} }
+ '&' { TAmpersand {} }
+ '|' { TPipe {} }
+ '~' { TTilde {} }
+ '!' { TExclamation {} }
+ '=' { TEqual {} }
+ '<' { TOpenAngle {} }
+ '>' { TCloseAngle {} }
+ '+=' { TPlusEqual {} }
+ '-=' { TMinusEqual {} }
+ '*=' { TStarEqual {} }
+ '/=' { TSlashEqual {} }
+ '%=' { TPercentEqual {} }
+ '^=' { TUpEqual {} }
+ '&=' { TAmpersandEqual {} }
+ '|=' { TPipeEqual {} }
+ '<<' { TLtLt {} }
+ '>>' { TGtGt {} }
+ '>>=' { TGtGtEqual {} }
+ '<<=' { TLtLtEqual {} }
+ '==' { TEqualEqual {} }
+ '!=' { TExclaimEqual {} }
+ '<=' { TLtEqual {} }
+ '>=' { TGtEqual {} }
+ '&&' { TAmpersandTwice {} }
+ '||' { TPipePipe {} }
+ '++' { TPlusPlus {} }
+ '--' { TMinusMinus {} }
+ ',' { TComma {} }
+ '->*' { TMinusGtStar {} }
+ '->' { TMinusGt {} }
+ 'and' { TAnd {} }
+ 'and_eq' { TAndEq {} }
+ 'bitand' { TBitand {} }
+ 'bitor' { TBitor {} }
+ 'compl' { TCompl {} }
+ 'not' { TNot {} }
+ 'not_eq' { TNotEq {} }
+ 'or' { TOr {} }
+ 'or_eq' { TOrEq {} }
+ 'xor' { TXor {} }
+ 'xor_eq' { TXorEq {} }
+
+ lower_name { TLowerName {} }
+ upper_name { TUpperName {} }
+ integer { TInteger {} }
+ string { TString {} }
+ other { TOther {} }
+
+-- Operator precedence. Earlier in the table is lower
+-- Note: this seems to require all the operators to appear in the same
+-- rule.
+%left '||'
+%left '&&'
+%left '=='
+%left '>' '>=' '<' '<='
+%left '+' '-'
+%left '*' '/'
+
+%%
+
+expr :: { Expr }
+expr : variable { $1 }
+ | integer { IntVal (read $ t_str $1) }
+ | '(' expr ')' { $2 }
+ | expr '||' expr { Logic LogicalOr $1 $3 }
+ | expr '&&' expr { Logic LogicalAnd $1 $3 }
+ | expr '==' expr { Comp CmpEqual $1 $3 }
+ | expr '>' expr { Comp CmpGt $1 $3 }
+ | expr '>=' expr { Comp CmpGtE $1 $3 }
+ | expr '<' expr { Comp CmpLt $1 $3 }
+ | expr '<=' expr { Comp CmpLtE $1 $3 }
+
+variable :: {Expr}
+variable : name { Var $1 }
+
+name : lower_name { t_str $1 }
+ | upper_name { t_str $1 }
+
+{
+-- parseExpr :: String -> Either String Expr
+parseExpr = run_parser expr
+}
=====================================
compiler/GHC/Parser/PreProcess/ParserM.hs
=====================================
@@ -0,0 +1,256 @@
+{-# LANGUAGE BinaryLiterals #-}
+
+-- Support functions for the CPP Lexer.x
+
+module GHC.Parser.PreProcess.ParserM (
+ -- Parser Monad
+ ParserM (..),
+ AlexInput (..),
+ run_parser,
+ -- Parser state
+ St,
+ init_state,
+ StartCode,
+ start_code,
+ setStartCode,
+ -- Tokens
+ Token (..),
+ -- Actions
+ Action,
+ andBegin,
+ mkT,
+ mkTv,
+ -- Positions
+ init_pos,
+ get_pos,
+ show_pos,
+ -- Input
+ alexGetByte,
+ alexInputPrevChar,
+ -- Other
+ happyError,
+) where
+
+import Control.Applicative
+
+import Control.Monad.Fail (MonadFail (..))
+import Prelude hiding (fail)
+
+import Control.Monad (ap, liftM)
+import Data.Bits (shiftR, (.&.), (.|.))
+import Data.Char (ord)
+import Data.Word (Word8)
+
+-- Parser Monad
+newtype ParserM a = ParserM {unParserM :: AlexInput -> St -> Either String (AlexInput, St, a)}
+
+-- newtype P a = P { unP :: PState -> ParseResult a }
+
+instance Functor ParserM where
+ fmap = liftM
+
+instance Applicative ParserM where
+ pure a = ParserM $ \i s -> Right (i, s, a)
+ (<*>) = ap
+
+instance Monad ParserM where
+ ParserM m >>= k = ParserM $ \i s -> case m i s of
+ Right (i', s', x) ->
+ case k x of
+ ParserM y -> y i' s'
+ Left err ->
+ Left err
+
+instance MonadFail ParserM where
+ fail err = ParserM $ \_ _ -> Left err
+
+run_parser :: ParserM a -> (String -> Either String a)
+run_parser (ParserM f) =
+ \s -> case f (AlexInput init_pos [] s) init_state of
+ Left es -> Left es
+ Right (_, _, x) -> Right x
+
+-- Parser state
+
+data St = St
+ { start_code :: !StartCode
+ , brace_depth :: !Int
+ }
+ deriving (Show)
+type StartCode = Int
+
+init_state :: St
+init_state =
+ St
+ { start_code = 0
+ , brace_depth = 0
+ }
+
+-- Tokens
+
+data Token
+ = TEOF {t_str :: String}
+ | TOpenBrace {t_str :: String}
+ | TCloseBrace {t_str :: String}
+ | TOpenBracket {t_str :: String}
+ | TCloseBracket {t_str :: String}
+ | THash {t_str :: String}
+ | THashHash {t_str :: String}
+ | TOpenParen {t_str :: String}
+ | TCloseParen {t_str :: String}
+ | TLtColon {t_str :: String}
+ | TColonGt {t_str :: String}
+ | TLtPercent {t_str :: String}
+ | TPercentGt {t_str :: String}
+ | TPercentColon {t_str :: String}
+ | TPercentColonTwice {t_str :: String}
+ | TSemi {t_str :: String}
+ | TColon {t_str :: String}
+ | TDotDotDot {t_str :: String}
+ | TNew {t_str :: String}
+ | TDelete {t_str :: String}
+ | TQuestion {t_str :: String}
+ | TColonColon {t_str :: String}
+ | TDot {t_str :: String}
+ | TDotStar {t_str :: String}
+ | TPlus {t_str :: String}
+ | TMinus {t_str :: String}
+ | TStar {t_str :: String}
+ | TSlash {t_str :: String}
+ | TPercent {t_str :: String}
+ | TUpArrow {t_str :: String}
+ | TAmpersand {t_str :: String}
+ | TPipe {t_str :: String}
+ | TTilde {t_str :: String}
+ | TExclamation {t_str :: String}
+ | TEqual {t_str :: String}
+ | TOpenAngle {t_str :: String}
+ | TCloseAngle {t_str :: String}
+ | TPlusEqual {t_str :: String}
+ | TMinusEqual {t_str :: String}
+ | TStarEqual {t_str :: String}
+ | TSlashEqual {t_str :: String}
+ | TPercentEqual {t_str :: String}
+ | TUpEqual {t_str :: String}
+ | TAmpersandEqual {t_str :: String}
+ | TPipeEqual {t_str :: String}
+ | TLtLt {t_str :: String}
+ | TGtGt {t_str :: String}
+ | TGtGtEqual {t_str :: String}
+ | TLtLtEqual {t_str :: String}
+ | TEqualEqual {t_str :: String}
+ | TExclaimEqual {t_str :: String}
+ | TLtEqual {t_str :: String}
+ | TGtEqual {t_str :: String}
+ | TAmpersandTwice {t_str :: String}
+ | TPipePipe {t_str :: String}
+ | TPlusPlus {t_str :: String}
+ | TMinusMinus {t_str :: String}
+ | TComma {t_str :: String}
+ | TMinusGtStar {t_str :: String}
+ | TMinusGt {t_str :: String}
+ | TAnd {t_str :: String}
+ | TAndEq {t_str :: String}
+ | TBitand {t_str :: String}
+ | TBitor {t_str :: String}
+ | TCompl {t_str :: String}
+ | TNot {t_str :: String}
+ | TNotEq {t_str :: String}
+ | TOr {t_str :: String}
+ | TOrEq {t_str :: String}
+ | TXor {t_str :: String}
+ | TXorEq {t_str :: String}
+ | TLowerName {t_str :: String}
+ | TUpperName {t_str :: String}
+ | TString {t_str :: String}
+ | TInteger {t_str :: String}
+ | TOther {t_str :: String}
+ deriving (Show)
+
+-- Actions
+
+type Action = String -> ParserM Token
+
+setStartCode :: StartCode -> ParserM ()
+setStartCode sc = ParserM $ \i st -> Right (i, st{start_code = sc}, ())
+
+andBegin :: Action -> StartCode -> Action
+(act `andBegin` sc) x = do
+ setStartCode sc
+ act x
+
+mkT :: Token -> Action
+mkT t = mkTv (const t)
+
+mkTv :: (String -> Token) -> Action
+mkTv f str = ParserM (\i st -> Right (i, st, f str))
+
+-- begin :: Int -> Action
+-- begin sc _span _buf _len _buf2 =
+-- do setStartCode sc
+-- lex_tok
+
+-- Positions
+
+data Pos = Pos !Int {- Line -} !Int {- Column -}
+ deriving (Show)
+
+get_pos :: ParserM Pos
+get_pos = ParserM $ \i@(AlexInput p _ _) st -> Right (i, st, p)
+
+alexMove :: Pos -> Char -> Pos
+alexMove (Pos l _) '\n' = Pos (l + 1) 1
+alexMove (Pos l c) '\t' = Pos l ((c + 8) `div` 8 * 8)
+alexMove (Pos l c) _ = Pos l (c + 1)
+
+init_pos :: Pos
+init_pos = Pos 1 0
+
+show_pos :: Pos -> String
+show_pos (Pos l c) = "line " ++ show l ++ ", column " ++ show c
+
+-- Input
+
+data AlexInput = AlexInput
+ { position :: !Pos
+ , char_bytes :: [Word8]
+ , input :: String
+ }
+ deriving (Show)
+
+alexGetByte :: AlexInput -> Maybe (Word8, AlexInput)
+alexGetByte (AlexInput p (w : ws) cs) =
+ Just (w, AlexInput p ws cs)
+alexGetByte (AlexInput p [] (c : cs)) =
+ alexGetByte (AlexInput (alexMove p c) (utf8_encode c) cs)
+alexGetByte (AlexInput _ [] []) =
+ Nothing
+
+-- annoyingly, this doesn't seem to exist anywhere else as a standalone function
+utf8_encode :: Char -> [Word8]
+utf8_encode c = case ord c of
+ n
+ | n < 0x80 -> [fromIntegral n]
+ | n < 0x800 ->
+ [ fromIntegral $ 0b11000000 .|. (n `shiftR` 6)
+ , fromIntegral $ 0b10000000 .|. (n .&. 0b111111)
+ ]
+ | n < 0x10000 ->
+ [ fromIntegral $ 0b11100000 .|. (n `shiftR` 12)
+ , fromIntegral $ 0b10000000 .|. ((n `shiftR` 6) .&. 0b111111)
+ , fromIntegral $ 0b10000000 .|. (n .&. 0b111111)
+ ]
+ | otherwise ->
+ [ fromIntegral $ 0b11110000 .|. (n `shiftR` 18)
+ , fromIntegral $ 0b10000000 .|. ((n `shiftR` 12) .&. 0b111111)
+ , fromIntegral $ 0b10000000 .|. ((n `shiftR` 6) .&. 0b111111)
+ , fromIntegral $ 0b10000000 .|. (n .&. 0b111111)
+ ]
+
+alexInputPrevChar :: AlexInput -> Char
+alexInputPrevChar _ = error "Lexer doesn't implement alexInputPrevChar"
+
+happyError :: ParserM a
+happyError = do
+ p <- get_pos
+ fail $ "Parse error at " ++ show_pos p
=====================================
compiler/GHC/Parser/PreProcess/Types.hs
=====================================
@@ -0,0 +1,85 @@
+module GHC.Parser.PreProcess.Types where
+
+import GHC.Data.StringBuffer
+import GHC.Parser.Lexer (Token (..))
+import qualified GHC.Parser.Lexer as Lexer
+import GHC.Types.SrcLoc
+
+import Data.Map (Map)
+import qualified Data.Map as Map
+import GHC.Prelude
+
+-- ---------------------------------------------------------------------
+
+initPpState :: PpState
+initPpState =
+ PpState
+ { pp_defines = Map.empty
+ , pp_includes = Map.empty
+ , pp_include_stack = []
+ , pp_continuation = []
+ , pp_context = []
+ , pp_accepting = True
+ }
+
+data PpState = PpState
+ { pp_defines :: !(Map MacroName MacroDef)
+ , pp_includes :: !(Map String StringBuffer)
+ , pp_include_stack :: ![Lexer.AlexInput]
+ , pp_continuation :: ![Located Token]
+ , pp_context :: ![Token]
+ -- ^ What preprocessor directive we are currently processing
+ , pp_accepting :: !Bool
+ }
+ deriving (Show)
+
+-- ---------------------------------------------------------------------
+
+data CppDirective
+ = CppInclude String
+ | CppDefine String String
+ | CppIfdef String
+ | CppIfndef String
+ | CppIf String
+ | CppElse
+ | CppEndif
+ deriving (Show, Eq)
+
+-- ---------------------------------------------------------------------
+
+type MacroArgs = [String]
+data MacroName = MacroName String (Maybe MacroArgs)
+ deriving (Show, Eq, Ord)
+type MacroDef = String
+
+type Input = String
+type Output = CppDirective
+
+-- ---------------------------------------------------------------------
+-- Expression language
+-- NOTE: need to take care of macro expansion while parsing. Or perhaps before?
+
+data Expr
+ = Parens Expr
+ | Var String
+ | IntVal Int
+ | Plus Expr Expr
+ | Times Expr Expr
+ | Logic LogicOp Expr Expr
+ | Comp CompOp Expr Expr
+ deriving (Show, Eq)
+
+data LogicOp
+ = LogicalOr
+ | LogicalAnd
+ deriving (Show, Eq)
+
+data CompOp
+ = CmpEqual
+ | CmpGt
+ | CmpGtE
+ | CmpLt
+ | CmpLtE
+ deriving (Show, Eq)
+
+-- ---------------------------------------------------------------------
=====================================
compiler/ghc.cabal.in
=====================================
@@ -649,9 +649,16 @@ Library
GHC.Parser.Lexer.Interface
GHC.Parser.Lexer.String
GHC.Parser.HaddockLex
- GHC.Parser.PreProcess
GHC.Parser.PostProcess
GHC.Parser.PostProcess.Haddock
+ GHC.Parser.PreProcess
+ GHC.Parser.PreProcess.Eval
+ GHC.Parser.PreProcess.Lexer
+ GHC.Parser.PreProcess.Macro
+ GHC.Parser.PreProcess.ParsePP
+ GHC.Parser.PreProcess.ParserM
+ GHC.Parser.PreProcess.Parser
+ GHC.Parser.PreProcess.Types
GHC.Parser.String
GHC.Parser.Types
GHC.Parser.Utils
=====================================
hadrian/src/Rules/SourceDist.hs
=====================================
@@ -187,6 +187,8 @@ prepareTree dest = do
, (stage0InTree , compiler, "GHC/Parser/Lexer.x", "GHC/Parser/Lexer.hs")
, (stage0InTree , compiler, "GHC/Parser/Lexer/String.x", "GHC/Parser/Lexer/String.hs")
, (stage0InTree , compiler, "GHC/Parser/HaddockLex.x", "GHC/Parser/HaddockLex.hs")
+ , (stage0InTree , compiler, "GHC/Parser/PreProcess/Parser.y", "GHC/Parser/PreProcess/Parser.hs")
+ , (stage0InTree , compiler, "GHC/Parser/PreProcess/Lexer.x", "GHC/Parser/PreProcess/Lexer.hs")
, (stage0InTree , hpcBin, "src/Trace/Hpc/Parser.y", "src/Trace/Hpc/Parser.hs")
, (stage0InTree , genprimopcode, "Parser.y", "Parser.hs")
, (stage0InTree , genprimopcode, "Lexer.x", "Lexer.hs")
=====================================
utils/check-cpp/ParsePP.hs
=====================================
@@ -1,10 +1,6 @@
module ParsePP (
- -- parseCppParser,
- -- plusTimesExpr,
cppLex,
parseDirective,
- -- testing, delete
- -- cppDefinition,
) where
import Data.Char
@@ -13,14 +9,6 @@ import Control.Monad (void)
import Data.Functor.Identity
import Debug.Trace
import GHC.Parser.Errors.Ppr ()
-import Text.Parsec
-import qualified Text.Parsec as Parsec
-import Text.Parsec.Char as PS
-import Text.Parsec.Combinator as PS
-import qualified Text.Parsec.Expr as E
-import Text.Parsec.Language (emptyDef)
-import Text.Parsec.Prim as PS hiding (token)
-import qualified Text.Parsec.Token as P
import Data.List
import qualified Parser
@@ -32,31 +20,6 @@ import Lexer
-- First parse to CPP tokens, using a C++-like language spec
-- https://gcc.gnu.org/onlinedocs/cpp/Tokenization.html
-lexer :: P.TokenParser ()
-lexer = P.makeTokenParser exprDef
-
-exprDef :: P.LanguageDef st
-exprDef =
- emptyDef
- { P.commentStart = "/*"
- , P.commentEnd = "*/"
- , P.commentLine = "//"
- , P.nestedComments = False
- , P.identStart = letter <|> char '_'
- , P.identLetter = alphaNum <|> oneOf "_'"
- , P.opStart = P.opLetter exprDef
- , P.opLetter = oneOf ":!#$%&*+./<=>?@\\^|-~"
- , P.reservedOpNames = []
- , P.reservedNames = []
- , P.caseSensitive = True
- }
-
--- =====================================================================
--- ---------------------------------------------------------------------
-
-type CppParser = Parsec String ()
-
-
-- Parse a CPP directive, using tokens from the CPP lexer
parseDirective :: String -> Either String CppDirective
parseDirective s =
@@ -94,158 +57,6 @@ cppLex s = case lexCppTokenStream s init_state of
-- ---------------------------------------------------------------------
--- parseDirectiveOld :: String -> Either Parsec.ParseError CppDirective
--- parseDirectiveOld = parseCppParser cppDirective
-
--- parseCppParser :: CppParser a -> String -> Either Parsec.ParseError a
--- parseCppParser p = PS.parse p ""
-
--- -- TODO: delete this
--- cppDefinition :: CppParser (String, [String])
--- cppDefinition = do
--- _ <- PS.char '#'
--- _ <- whiteSpace
--- eToken "define"
--- name <- cppToken
--- definition <- cppTokens
--- return (name, definition)
-
--- cppDirective :: CppParser CppDirective
--- cppDirective = do
--- _ <- PS.char '#'
--- _ <- whiteSpace
--- choice
--- [ cppKw "define" >> cmdDefinition
--- , try $ cppKw "include" >> cmdInclude
--- , try $ cppKw "ifdef" >> cmdIfdef
--- , try $ cppKw "ifndef" >> cmdIfndef
--- -- , try $ cppKw "if" >> cmdIf
--- , try $ cppKw "else" >> return CppElse
--- , cppKw "endif" >> return CppEndif
--- -- , cppKw "elif" CppElifKw
--- -- , cppKw "undef" CppUndefKw
--- -- , cppKw "error" CppErrorKw
--- ]
-
--- cmdInclude :: CppParser CppDirective
--- cmdInclude = do
--- _ <- string "\""
--- filename <- many1 (satisfy (\c -> not (isSpace c || c == '"')))
--- _ <- string "\""
--- return $ CppInclude filename
-
--- cmdDefinition :: CppParser CppDirective
--- cmdDefinition = do
--- name <- cppToken
--- CppDefine name <$> cppTokens
-
--- cmdIfdef :: CppParser CppDirective
--- cmdIfdef = CppIfdef <$> cppToken
-
--- cmdIfndef :: CppParser CppDirective
--- cmdIfndef = CppIfndef <$> cppToken
-
--- cmdIf :: CppParser CppDirective
--- cmdIf = CppIf <$> cppTokens
-
--- cppKw :: String -> CppParser ()
--- cppKw kw = void $ lexeme (PS.string kw)
-
--- cppComment :: CppParser ()
--- cppComment = do
--- _ <- PS.string "/*"
--- _ <- PS.manyTill PS.anyChar (PS.try (PS.string "*/"))
--- return ()
-
--- whiteSpace :: CppParser ()
--- whiteSpace = do
--- _ <- PS.many (PS.choice [cppComment, void PS.space])
--- return ()
-
--- lexeme :: CppParser a -> CppParser a
--- lexeme p = p <* whiteSpace
-
--- cppToken :: CppParser String
--- cppToken = lexeme (PS.many1 (PS.satisfy (not . isSpace)))
-
--- cppTokens :: CppParser [String]
--- cppTokens = PS.many cppToken
-
--- -- -------------------------------------
-
--- plusTimesExpr :: CppParser Expr
--- plusTimesExpr = E.buildExpressionParser eTable eTerm
-
--- eTable :: [[E.Operator String () Data.Functor.Identity.Identity Expr]]
--- eTable =
--- []
- -- -- Via https://learn.microsoft.com/en-us/cpp/cpp/cpp-built-in-operators-precedence-and-associativity?view=msvc-170
- -- [ [E.Infix (Times <$ symbol "*") E.AssocLeft]
- -- , [E.Infix (Plus <$ symbol "+") E.AssocLeft]
- -- ,
- -- [ E.Infix (try $ BinOp CmpLtE <$ symbol "<=") E.AssocLeft
- -- , E.Infix (try $ BinOp CmpGtE <$ symbol ">=") E.AssocLeft
- -- , E.Infix (BinOp CmpLt <$ symbol "<") E.AssocLeft
- -- , E.Infix (BinOp CmpGt <$ symbol ">") E.AssocLeft
- -- ]
- -- , [E.Infix (BinOp CmpEqual <$ symbol "==") E.AssocLeft]
- -- , [E.Infix (BinOp LogicalAnd <$ symbol "&&") E.AssocLeft]
- -- , [E.Infix (BinOp LogicalOr <$ symbol "||") E.AssocLeft]
- -- ]
-
--- eTerm :: CppParser Expr
--- eTerm =
--- eVariable -- <|> pteNum
--- <|> pteParens
--- <|> eInteger
-
--- pteParens :: CppParser Expr
--- pteParens = Parens <$> between (symbol "(") (symbol ")") plusTimesExpr
-
--- symbol :: String -> CppParser String
--- symbol s = lexeme $ string s
-
--- -- -------------------------------------
-
--- eExpr :: CppParser Expr
--- eExpr = choice [eParens, eBinOp, eVariable]
-
--- eParens :: CppParser Expr
--- eParens = P.parens lexer $ do
--- Parens <$> eExpr
-
--- eBinOp :: CppParser Expr
--- eBinOp = do
--- e1 <- eExpr
--- op <- eOp
--- -- _ <- cppToken
--- -- let op = Or
--- BinOp op e1 <$> eExpr
-
--- eOp :: CppParser Op
--- eOp = do
--- -- op <- P.operator lexer
--- op <- P.operator (trace "foo" lexer)
--- return $ trace ("op=" ++ show op) LogicalOr
-
--- -- TODO: Do we need this? the expression should be fully expanded by
--- -- the time we get it
--- eVariable :: CppParser Expr
--- eVariable = do
--- v <- P.identifier lexer
--- return $ Var v
-
--- eToken :: String -> CppParser ()
--- eToken = P.reserved lexer
-
--- eInteger :: CppParser Expr
--- eInteger = IntVal <$> integer
-
--- integer :: CppParser Int
--- integer = read <$> lexeme (many1 digit)
-
--- ---------------------------------------------------------------------
-
doATest :: String -> Either String CppDirective
doATest str = parseDirective str
-- doATest str = parseDirectiveOld str
=====================================
utils/check-cpp/PreProcess.hs
=====================================
@@ -21,7 +21,6 @@ import Types
-- ---------------------------------------------------------------------
-type PP = P PpState
-- initPpState :: PpState
-- initPpState =
@@ -96,6 +95,8 @@ ppLexer queueComments cont =
-- ---------------------------------------------------------------------
+type PP = P PpState
+
preprocessElse :: PP ()
preprocessElse = do
accepting <- getAccepting
@@ -280,10 +281,7 @@ cleanTokenString fs = r
See Note [GhcCPP Initial Processing]
-}
cppInitial :: [FastString] -> String
-cppInitial fs = r
- where
- -- go fs' = reverse $ drop 2 $ reverse $ unpackFS fs'
- r = concatMap unpackFS fs
+cppInitial fs = concatMap unpackFS fs
{-
Note [GhcCPP Initial Processing]
=====================================
utils/check-cpp/Types.hs
=====================================
@@ -51,15 +51,6 @@ data MacroName = MacroName String (Maybe MacroArgs)
deriving (Show, Eq, Ord)
type MacroDef = String
--- data PpState = PpState
--- { pp_defines :: !(Map MacroName MacroDef)
--- , pp_accepting :: !Bool
--- }
--- deriving (Show, Eq)
-
--- initPpState :: PpState
--- initPpState = PpState{pp_defines = Map.empty, pp_accepting = True}
-
type Input = String
type Output = CppDirective
=====================================
utils/haddock/haddock-api/src/Haddock/Parser.hs
=====================================
@@ -17,6 +17,7 @@ import GHC.Data.FastString (fsLit)
import GHC.Data.StringBuffer (stringToStringBuffer)
import GHC.Parser (parseIdentifier)
import GHC.Parser.Lexer (ParseResult (PFailed, POk), ParserOpts, initParserState, unP)
+import GHC.Parser.PreProcess (initPpState)
import GHC.Types.Name.Occurrence (occNameString)
import GHC.Types.Name.Reader (RdrName (..))
import GHC.Types.SrcLoc (GenLocated (..), mkRealSrcLoc)
@@ -45,7 +46,7 @@ parseIdent parserOpts ns str0 =
PFailed{} -> Nothing
where
realSrcLc = mkRealSrcLoc (fsLit "<unknown file>") 0 0
- pstate str = initParserState () parserOpts (stringToStringBuffer str) realSrcLc
+ pstate str = initParserState initPpState parserOpts (stringToStringBuffer str) realSrcLc
(wrap, str1) = case str0 of
'(' : s@(c : _)
| c /= ','
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/237548092a8cedcc244412f1bf10a6a08f982de1
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/237548092a8cedcc244412f1bf10a6a08f982de1
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20250203/dd1fade3/attachment-0001.html>
More information about the ghc-commits
mailing list