[Git][ghc/ghc][wip/az/ghc-cpp] Sync the playground to compiler

Alan Zimmerman (@alanz) gitlab at gitlab.haskell.org
Sun Feb 16 20:54:56 UTC 2025



Alan Zimmerman pushed to branch wip/az/ghc-cpp at Glasgow Haskell Compiler / GHC


Commits:
34185e27 by Alan Zimmerman at 2025-02-16T20:54:07+00:00
Sync the playground to compiler

- - - - -


13 changed files:

- compiler/GHC/Driver/Config/Parser.hs
- compiler/GHC/Driver/Main.hs
- compiler/GHC/Parser/Header.hs
- compiler/GHC/Parser/PreProcess.hs
- compiler/GHC/Parser/PreProcess/Eval.hs
- compiler/GHC/Parser/PreProcess/Lexer.x
- compiler/GHC/Parser/PreProcess/Macro.hs
- compiler/GHC/Parser/PreProcess/ParsePP.hs
- compiler/GHC/Parser/PreProcess/Parser.y
- compiler/GHC/Parser/PreProcess/ParserM.hs
- compiler/GHC/Parser/PreProcess/State.hs
- utils/check-cpp/ParsePP.hs
- utils/check-cpp/PreProcess.hs


Changes:

=====================================
compiler/GHC/Driver/Config/Parser.hs
=====================================
@@ -16,7 +16,8 @@ import GHC.Driver.Session
 import GHC.Driver.Config.Diagnostic
 
 import GHC.Parser.Lexer
-import GHC.Parser.PreProcess.State (MacroName (..), MacroDef)
+import qualified GHC.Parser.PreProcess.ParserM as PM
+import GHC.Parser.PreProcess.State ( MacroDefines)
 
 -- | Extracts the flags needed for parsing
 initParserOpts :: DynFlags -> ParserOpts
@@ -36,20 +37,20 @@ supportedLanguagePragmas = supportedLanguagesAndExtensions . platformArchOS . ta
 -- Predefined macros, for use in GHC_CPP @PpState@
 -- Derived from the GHC source file `ghcversion.h.in`
 
-predefinedMacros :: DynFlags -> Map.Map MacroName MacroDef
+predefinedMacros :: DynFlags -> MacroDefines
 predefinedMacros df = Map.fromList
         [
-            ( MacroName "__GLASGOW_HASKELL__" Nothing
-            , projectVersionInt
+            ( "__GLASGOW_HASKELL__"
+            , Map.singleton Nothing (Nothing, [PM.TInteger projectVersionInt])
             ),
-            ( MacroName "__GLASGOW_HASKELL_FULL_VERSION__" Nothing
-            , projectVersion
+            ( "__GLASGOW_HASKELL_FULL_VERSION__"
+            , Map.singleton Nothing (Nothing, [PM.TOther projectVersion])
             ),
-            ( MacroName  "__GLASGOW_HASKELL_PATCHLEVEL1__" Nothing
-            , projectPatchLevel1
+            ( "__GLASGOW_HASKELL_PATCHLEVEL1__"
+            , Map.singleton Nothing (Nothing, [PM.TOther projectPatchLevel1])
             ),
-            ( MacroName  "__GLASGOW_HASKELL_PATCHLEVEL2__" Nothing
-            , projectPatchLevel2
+            ( "__GLASGOW_HASKELL_PATCHLEVEL2__"
+            , Map.singleton Nothing (Nothing, [PM.TOther projectPatchLevel2])
             )
         ]
   where


=====================================
compiler/GHC/Driver/Main.hs
=====================================
@@ -2953,4 +2953,5 @@ writeInterfaceOnlyMode dflags =
 
 initParserStateWithMacros :: DynFlags -> ParserOpts -> StringBuffer -> RealSrcLoc -> PState PpState
 initParserStateWithMacros df
-  = Lexer.initParserState (initPpState { pp_scope = (PpScope (predefinedMacros df) True) :| [] })
+  = Lexer.initParserState (initPpState { pp_defines = predefinedMacros df
+                                       , pp_scope = (PpScope True) :| [] })


=====================================
compiler/GHC/Parser/Header.hs
=====================================
@@ -118,7 +118,8 @@ getImports dflags popts implicit_prelude buf filename source_filename = do
 
 initParserStateWithMacros :: DynFlags -> ParserOpts -> StringBuffer -> RealSrcLoc -> PState PpState
 initParserStateWithMacros df
-  = Lexer.initParserState (initPpState { pp_scope = (PpScope (predefinedMacros df) True) NE.:| [] })
+  = Lexer.initParserState (initPpState { pp_defines = predefinedMacros df
+                                       , pp_scope = (PpScope  True) NE.:| [] })
 
 mkPrelImports :: ModuleName
               -> SrcSpan    -- Attribute the "import Prelude" to this location


=====================================
compiler/GHC/Parser/PreProcess.hs
=====================================
@@ -115,8 +115,8 @@ processCpp fs = do
                 Left err -> error $ show (err, s)
                 Right (CppInclude filename) -> do
                     ppInclude filename
-                Right (CppDefine name def) -> do
-                    ppDefine (MacroName name Nothing) def
+                Right (CppDefine name args def) -> do
+                    ppDefine (MacroName name args) def
                 Right (CppIf cond) -> do
                     ppIf cond
                 Right (CppIfdef name) -> do


=====================================
compiler/GHC/Parser/PreProcess/Eval.hs
=====================================
@@ -10,6 +10,7 @@ eval (Parens e) = eval e
 eval (Var v) = error $ "need to look up :" ++ v
 eval (IntVal i) = i
 eval (Plus e1 e2) = (eval e1) + (eval e2)
+eval (Minus e1 e2) = (eval e1) - (eval e2)
 eval (Times e1 e2) = (eval e1) * (eval e2)
 eval (Logic op e1 e2) = evalLogicOp op (eval e1) (eval e2)
 eval (Comp op e1 e2) = evalCompOp op (eval e1) (eval e2)
@@ -18,7 +19,6 @@ evalLogicOp :: LogicOp -> Int -> Int -> Int
 evalLogicOp LogicalOr e1 e2 = fromBool $ (toBool e1) || (toBool e2)
 evalLogicOp LogicalAnd e1 e2 = fromBool $ (toBool e1) || (toBool e2)
 
-
 evalCompOp :: CompOp -> Int -> Int -> Int
 evalCompOp CmpEqual e1 e2 = fromBool $ e1 == e2
 evalCompOp CmpNotEqual e1 e2 = fromBool $ e1 /= e2


=====================================
compiler/GHC/Parser/PreProcess/Lexer.x
=====================================
@@ -92,8 +92,7 @@ words :-
     <0>         "xor"                { mkTv TXor }
     <0>         "xor_eq"             { mkTv TXorEq }
 ----------------------------------------
-    <0>         [_]*[a-z][a-zA-Z0-9\#_]* { mkTv TLowerName }
-    <0>         [_]*[A-Z][a-zA-Z0-9\#_]* { mkTv TUpperName }
+    <0>         [a-zA-Z_][a-zA-Z0-9_]* { mkTv TIdentifier }
     <0>         \-? [0-9][0-9]*          { mkTv TInteger  }
     <0>         \" [^\"]* \"             { mkTv (TString . tail . init) }
     <0>         ()                       { begin other }


=====================================
compiler/GHC/Parser/PreProcess/Macro.hs
=====================================
@@ -1,4 +1,15 @@
-module GHC.Parser.PreProcess.Macro where
+module GHC.Parser.PreProcess.Macro (
+    process,
+    cppIf,
+    -- get rid of warnings for tests
+    m0,
+    m1,
+    m2,
+    m3,
+    m4,
+    m5,
+    tt,
+) where
 
 -- From https://gcc.gnu.org/onlinedocs/cpp/Macros.html
 
@@ -22,6 +33,7 @@ details
 
 -- TODO: Parse tokens with original locations in them.
 
+import Data.Map qualified as Map
 import Data.Maybe
 
 import GHC.Parser.PreProcess.Eval
@@ -29,6 +41,7 @@ import GHC.Parser.PreProcess.ParsePP
 import GHC.Parser.PreProcess.Parser qualified as Parser
 import GHC.Parser.PreProcess.ParserM
 import GHC.Parser.PreProcess.State
+import qualified Data.Semigroup as S
 import GHC.Prelude
 
 -- ---------------------------------------------------------------------
@@ -40,7 +53,7 @@ process s str = (s0, o)
         Left _ -> undefined
         Right r -> r
     s0 = case o of
-        CppDefine name rhs -> define s name rhs
+        CppDefine name args rhs -> define s name args rhs
         CppInclude _ -> undefined
         CppIfdef name -> ifdef s name
         CppIf ifstr -> cppIf s ifstr
@@ -51,8 +64,8 @@ process s str = (s0, o)
 
 -- ---------------------------------------------------------------------
 
-define :: PpState -> String -> MacroDef -> PpState
-define s name toks = addDefine' s (MacroName name Nothing) toks
+define :: PpState -> String -> Maybe ([String]) -> MacroDef -> PpState
+define s name args toks = addDefine' s (MacroName name args) toks
 
 ifdef :: PpState -> String -> PpState
 ifdef s name = pushAccepting' s (ppIsDefined' s (MacroName name Nothing))
@@ -64,7 +77,7 @@ ifndef s name = pushAccepting' s (not $ ppIsDefined' s (MacroName name Nothing))
 cppIf :: PpState -> String -> PpState
 cppIf s str = pushAccepting' s (toBool v)
   where
-    expanded = expand s str
+    expanded = expand (pp_defines s) str
     v = case Parser.parseExpr expanded of
         Left err -> error $ "parseExpr:" ++ show (err, expanded)
         Right tree -> eval tree
@@ -74,23 +87,163 @@ cppElse s = setAccepting' s (not $ getAccepting' s)
 
 -- ---------------------------------------------------------------------
 
-expand :: PpState -> String -> String
+expand :: MacroDefines -> String -> String
 expand s str = expanded
   where
     -- TODO: repeat until re-expand or fixpoint
     toks = case cppLex str of
         Left err -> error $ "expand:" ++ show (err, str)
         Right tks -> tks
-    expanded = concatMap (expandOne s) toks
+    expanded = combineToks $ map t_str $ expandToks s toks
 
-expandOne :: PpState -> Token -> String
-expandOne s tok = r
+expandToks :: MacroDefines -> [Token] -> [Token]
+expandToks s ts =
+    let
+        (expansionDone, r) = doExpandToks False s ts
+     in
+        if expansionDone
+            then expandToks s r
+            else r
+
+doExpandToks :: Bool -> MacroDefines -> [Token] -> (Bool, [Token])
+doExpandToks ed _ [] = (ed, [])
+doExpandToks ed s (TIdentifier n : ts) = (ed'', expanded ++ rest)
+  where
+    (ed', expanded, ts') = case Map.lookup n s of
+        Nothing -> (ed, [TIdentifier n], ts)
+        Just defs -> (ed0, r, rest0)
+          where
+            (args, rest0) = getExpandArgs ts
+            (m_args, rhs) = fromMaybe (Nothing, [TIdentifier n]) (Map.lookup (arg_arity args) defs)
+            (ed0, r) = case m_args of
+                Nothing -> (True, rhs)
+                Just _ -> (True, replace_args args m_args rhs)
+    (ed'', rest) = doExpandToks ed' s ts'
+doExpandToks ed s (t : ts) = (ed', t : r)
   where
-    -- TODO: protect against looking up `define`
-    r =
-        fromMaybe
-            (t_str tok)
-            (ppDefinition' s (MacroName (t_str tok) Nothing))
+    (ed', r) = doExpandToks ed s ts
+
+-- ---------------------------------------------------------------------
+
+replace_args ::
+    Maybe [[Token]] ->
+    Maybe [String] ->
+    [Token] ->
+    [Token]
+replace_args Nothing Nothing rhs = rhs
+replace_args (Just args) (Just m_args) rhs = rhs'
+  where
+    -- At this point, the surrounding context should guarantee that the
+    -- args and m_args have the same arity
+    rhs' = foldl' (\acc (arg, m_arg) -> replace_arg arg m_arg acc) rhs (zip args m_args)
+replace_args args margs _ = error $ "replace_args: impossible, mismatch between: " ++ show (args, margs)
+
+-- The spec (https://timsong-cpp.github.io/cppwp/n4140/cpp#replace-10)
+-- says an arg can only be an identifier
+-- replace_arg arg m_arg acc = acc
+replace_arg :: [Token] -> String -> [Token] -> [Token]
+replace_arg _ _ [] = []
+replace_arg a ma (TIdentifier t : ts)
+    | ma == t = a ++ replace_arg a ma ts
+replace_arg a ma (t : ts) = t : replace_arg a ma ts
+
+-- ---------------------------------------------------------------------
+
+{-
+https://timsong-cpp.github.io/cppwp/n4140/cpp#replace-11
+
+The sequence of preprocessing tokens bounded by the outside-most
+matching parentheses forms the list of arguments for the function-like
+macro. The individual arguments within the list are separated by comma
+preprocessing tokens, but comma preprocessing tokens between matching
+inner parentheses do not separate arguments.
+-}
+
+{- | Look for possible arguments to a macro expansion.
+The only thing we look for are commas, open parens, and close parens.
+-}
+getExpandArgs :: [Token] -> (Maybe [[Token]], [Token])
+getExpandArgs ts =
+    case pArgs ts of
+        Left err -> error $ err
+        Right r -> r
+
+pArgs :: [Token] -> Either String (Maybe [[Token]], [Token])
+pArgs (TOpenParen _ : ts) = do
+    (args, rest) <- pArgsList ts
+    case rest of
+        [] -> return (args, rest)
+        TCloseParen _ : rest' -> return (args, rest')
+        _ -> Left $ "expected ')', got: " ++ show rest
+pArgs ts = Right (Nothing, ts)
+
+pArgsList :: [Token] -> Either String (Maybe [[Token]], [Token])
+pArgsList ts = do
+    (arg, rest) <- pArg ts
+    case rest of
+        [] -> return (Just [arg], rest)
+        TCloseParen _ : _ -> return (Just [arg], rest)
+        TComma _ : rest1 -> do
+            (args, rest2) <- pArgsList rest1
+            return (Just [arg] S.<> args, rest2)
+        _ -> Left $ "expected ',' or ')', got: " ++ show rest
+
+-- An arg is
+--  sequence of non-comma tokens, ending with ',' or ')'
+--  within that, (', anything, ')', possibly nested
+pArg :: [Token] -> Either String ([Token], [Token])
+pArg ts = do
+    (frag, rest) <- pa_frag ts
+    case rest of
+        [] -> return (frag, rest)
+        TCloseParen _ : _ -> return (frag, rest)
+        TComma _ : _ -> return (frag, rest)
+        (t at TOpenParen{}) : ts' -> do
+            (frag', rest') <- inside_parens 1 [t] ts'
+            return (frag ++ frag', rest')
+        _ -> do
+            (frag', rest') <- pa_frag rest
+            return (frag ++ frag', rest')
+
+pa_frag :: [Token] -> Either String ([Token], [Token])
+pa_frag [] = return ([], [])
+pa_frag (t : ts)
+    | isOther t = return $ pOtherRest [t] ts
+pa_frag (t : ts) =
+    case t of
+        TOpenParen _ -> do
+            inside_parens 1 [t] ts
+        _ -> return ([], (t : ts))
+
+-- Process the part in an argument starting with parens
+inside_parens :: Int -> [Token] -> [Token] -> Either String ([Token], [Token])
+inside_parens pc acc [] =
+    if pc == 0
+        then return (reverse acc, [])
+        else Left $ "Unexpected end of input in arg, at: " ++ show (map t_str $ reverse acc)
+inside_parens pc acc (t : ts) =
+    case t of
+        TComma _ ->
+            if pc == 0
+                then return (reverse acc, t : ts)
+                else inside_parens pc (t : acc) ts
+        TOpenParen _ -> inside_parens (pc + 1) (t : acc) ts
+        TCloseParen _ ->
+            if pc > 0
+                then inside_parens (pc - 1) (t : acc) ts
+                else return (reverse acc, t : ts)
+        _ -> inside_parens pc (t : acc) ts
+
+pOtherRest :: [Token] -> [Token] -> ([Token], [Token])
+pOtherRest acc (t : ts)
+    | isOther t = pOtherRest (t : acc) ts
+pOtherRest acc ts = (reverse acc, ts)
+
+isOther :: Token -> Bool
+isOther (TComma _) = False
+isOther (TOpenParen _) = False
+isOther (TCloseParen _) = False
+isOther _ = True
 
 -- ---------------------------------------------------------------------
 
@@ -126,3 +279,15 @@ m3 = cppLex "#define FOO(m1,m2,m) ((m1) <  1 || (m1) == 1 && (m2) <  7 || (m1) =
 
 m4 :: Either String [Token]
 m4 = cppLex "#if (m < 1)"
+
+m5 :: Either String (Maybe [[Token]], [Token])
+m5 = do
+    -- toks <- cppLex "(43,foo(a)) some other stuff"
+    toks <- cppLex "( ff(bar(),baz), 4 )"
+    return $ getExpandArgs toks
+
+tt :: Either String ([[Char]], [Char])
+tt = case m5 of
+    Left err -> Left err
+    Right (Just a, b) -> Right (map (\k -> concatMap t_str k) a, concatMap t_str b)
+    Right (Nothing, _) -> error "oops"


=====================================
compiler/GHC/Parser/PreProcess/ParsePP.hs
=====================================
@@ -1,6 +1,12 @@
 module GHC.Parser.PreProcess.ParsePP (
     parseDirective,
     cppLex,
+    combineToks,
+    -- Reduce warnings so long
+    t0,
+    t1,
+    t2,
+    t3,
 ) where
 
 import Data.List (intercalate)
@@ -20,16 +26,16 @@ parseDirective s =
     case cppLex s of
         Left e -> Left e
         Right toks ->
-            case map t_str toks of
-                ("#" : "define" : ts) -> cppDefine ts
-                ("#" : "include" : ts) -> Right $ cppInclude ts
-                ("#" : "if" : ts) -> Right $ cppIf ts
-                ("#" : "ifndef" : ts) -> Right $ cppIfndef ts
-                ("#" : "ifdef" : ts) -> Right $ cppIfdef ts
-                ("#" : "else" : ts) -> Right $ cppElse ts
-                ("#" : "endif" : ts) -> Right $ cppEndif ts
-                ("#" : "dumpghccpp" : ts) -> Right $ cppDumpState ts
-                other -> Left ("unexpected directive: " ++ (combineToks other))
+            case toks of
+                (THash "#" : TIdentifier "define" : ts) -> cppDefine ts
+                (THash "#" : TIdentifier "include" : ts) -> Right $ cppInclude (map t_str ts)
+                (THash "#" : TIdentifier "if" : ts) -> Right $ cppIf (map t_str ts)
+                (THash "#" : TIdentifier "ifndef" : ts) -> Right $ cppIfndef (map t_str ts)
+                (THash "#" : TIdentifier "ifdef" : ts) -> Right $ cppIfdef (map t_str ts)
+                (THash "#" : TIdentifier "else" : ts) -> Right $ cppElse (map t_str ts)
+                (THash "#" : TIdentifier "endif" : ts) -> Right $ cppEndif (map t_str ts)
+                (THash "#" : TIdentifier "dumpghccpp" : ts) -> Right $ cppDumpState (map t_str ts)
+                _ -> Left ("unexpected directive: " ++ (show toks))
 
 {- | Comply with the CPP requirement to not combine adjacent tokens.
 This will automatically insert a space in place of a comment, as
@@ -38,9 +44,12 @@ comments cannot occur within a token.
 combineToks :: [String] -> String
 combineToks ss = intercalate " " ss
 
-cppDefine :: [String] -> Either String CppDirective
+cppDefine :: [Token] -> Either String CppDirective
 cppDefine [] = Left "error:empty #define directive"
-cppDefine (n : ts) = Right $ CppDefine n (combineToks ts)
+cppDefine (TIdentifier n : ts) = Right $ CppDefine n args def
+  where
+    (args, def) = getArgs ts
+cppDefine (t : _) = Left $ "#define: expecting an identifier, got :" ++ show t
 
 cppInclude :: [String] -> CppDirective
 cppInclude ts = CppInclude (combineToks ts)
@@ -65,6 +74,28 @@ cppDumpState _ts = CppDumpState
 
 -- ---------------------------------------------------------------------
 
+-- Crack out the arguments to a #define. This is of the form of
+-- comma-separated identifiers between parens
+getArgs :: [Token] -> (Maybe [String], [Token])
+getArgs [] = (Nothing, [])
+getArgs (TOpenParen _ : ts) =
+    case parseDefineArgs [] ts of
+        Left err -> error err
+        Right (args, rest) -> (Just (reverse args), rest)
+getArgs ts = (Nothing, ts)
+
+parseDefineArgs ::
+    [String] ->
+    [Token] ->
+    Either String ([String], [Token])
+parseDefineArgs acc [] = Right (acc, [])
+parseDefineArgs acc (TCloseParen _ : ts) = Right (acc, ts)
+parseDefineArgs acc (TIdentifier n : TCloseParen _ : ts) = Right (n : acc, ts)
+parseDefineArgs acc (TIdentifier n : TComma _ : ts) = parseDefineArgs (n : acc) ts
+parseDefineArgs acc ts = Left $ "malformed macro args, expecting identifier followed by comma or ')', got:" ++ show ts ++ " after getting " ++ show (reverse acc)
+
+-- ---------------------------------------------------------------------
+
 cppLex :: String -> Either String [Token]
 cppLex s = case lexCppTokenStream s init_state of
     Left err -> Left err


=====================================
compiler/GHC/Parser/PreProcess/Parser.y
=====================================
@@ -11,6 +11,8 @@ import GHC.Prelude
 }
 
 %name      expr
+%name      args
+%name      parameters
 %expect    0
 %tokentype { Token }
 %monad     { ParserM }
@@ -89,8 +91,7 @@ import GHC.Prelude
     'xor'              { TXor {} }
     'xor_eq'           { TXorEq {} }
 
-    lower_name         { TLowerName {} }
-    upper_name         { TUpperName {} }
+    identifier         { TIdentifier {} }
     integer            { TInteger {} }
     string             { TString {} }
     other              { TOther {} }
@@ -120,12 +121,35 @@ expr : variable           { $1 }
      | expr '>=' expr     { Comp CmpGtE $1 $3 }
      | expr '<'  expr     { Comp CmpLt $1 $3 }
      | expr '<=' expr     { Comp CmpLtE $1 $3 }
+     | expr '+' expr      { Plus $1 $3 }
+     | expr '-' expr      { Minus $1 $3 }
+     | expr '*' expr      { Times $1 $3 }
 
 variable :: {Expr}
 variable : name { Var $1 }
 
-name : lower_name { t_str $1 }
-     | upper_name { t_str $1 }
+name :: { String }
+name : identifier { t_str $1 }
+
+------------------------------------------------------------------------
+
+-- function-like macro args, in definition
+parameters :: { [String] }
+parameters : '(' param_list ')' { reverse $2 }
+
+param_list :: { [String] }
+param_list : param_list ',' name  { $3 : $1 }
+           | name                 { [$1] }
+
+-- function-like macro args, in call
+-- NOTE: according to https://timsong-cpp.github.io/cppwp/n4140/cpp#replace
+--       we should only be paying attention to parens and commas.
+args :: { [Expr] }
+args : '(' arg_list ')' { reverse $2 }
+
+arg_list :: { [Expr] }
+arg_list : arg_list ',' expr  { $3 : $1 }
+         | expr               { [$1] }
 
 {
 -- parseExpr :: String -> Either String Expr


=====================================
compiler/GHC/Parser/PreProcess/ParserM.hs
=====================================
@@ -90,82 +90,84 @@ init_state =
 
 data Token
     = TEOF {t_str :: String}
-    | TOpenBrace {t_str :: String}
-    | TCloseBrace {t_str :: String}
-    | TOpenBracket {t_str :: String}
-    | TCloseBracket {t_str :: String}
-    | THash {t_str :: String}
-    | THashHash {t_str :: String}
-    | TOpenParen {t_str :: String}
-    | TCloseParen {t_str :: String}
-    | TLtColon {t_str :: String}
-    | TColonGt {t_str :: String}
-    | TLtPercent {t_str :: String}
-    | TPercentGt {t_str :: String}
-    | TPercentColon {t_str :: String}
-    | TPercentColonTwice {t_str :: String}
-    | TSemi {t_str :: String}
-    | TColon {t_str :: String}
-    | TDotDotDot {t_str :: String}
-    | TNew {t_str :: String}
-    | TDelete {t_str :: String}
-    | TQuestion {t_str :: String}
-    | TColonColon {t_str :: String}
-    | TDot {t_str :: String}
-    | TDotStar {t_str :: String}
-    | TPlus {t_str :: String}
-    | TMinus {t_str :: String}
-    | TStar {t_str :: String}
-    | TSlash {t_str :: String}
-    | TPercent {t_str :: String}
-    | TUpArrow {t_str :: String}
-    | TAmpersand {t_str :: String}
-    | TPipe {t_str :: String}
-    | TTilde {t_str :: String}
-    | TExclamation {t_str :: String}
-    | TEqual {t_str :: String}
-    | TOpenAngle {t_str :: String}
-    | TCloseAngle {t_str :: String}
-    | TPlusEqual {t_str :: String}
-    | TMinusEqual {t_str :: String}
-    | TStarEqual {t_str :: String}
-    | TSlashEqual {t_str :: String}
-    | TPercentEqual {t_str :: String}
-    | TUpEqual {t_str :: String}
-    | TAmpersandEqual {t_str :: String}
-    | TPipeEqual {t_str :: String}
-    | TLtLt {t_str :: String}
-    | TGtGt {t_str :: String}
-    | TGtGtEqual {t_str :: String}
-    | TLtLtEqual {t_str :: String}
-    | TEqualEqual {t_str :: String}
-    | TExclaimEqual {t_str :: String}
-    | TLtEqual {t_str :: String}
-    | TGtEqual {t_str :: String}
-    | TAmpersandTwice {t_str :: String}
-    | TPipePipe {t_str :: String}
-    | TPlusPlus {t_str :: String}
-    | TMinusMinus {t_str :: String}
-    | TComma {t_str :: String}
-    | TMinusGtStar {t_str :: String}
-    | TMinusGt {t_str :: String}
-    | TAnd {t_str :: String}
-    | TAndEq {t_str :: String}
-    | TBitand {t_str :: String}
-    | TBitor {t_str :: String}
-    | TCompl {t_str :: String}
-    | TNot {t_str :: String}
-    | TNotEq {t_str :: String}
-    | TOr {t_str :: String}
-    | TOrEq {t_str :: String}
-    | TXor {t_str :: String}
-    | TXorEq {t_str :: String}
-    | TLowerName {t_str :: String}
-    | TUpperName {t_str :: String}
-    | TString {t_str :: String}
+    | TIdentifier {t_str :: String}
     | TInteger {t_str :: String}
+    | -- preprocessing-op-or-punc
+      -- https://timsong-cpp.github.io/cppwp/n4140/lex.operators#nt:preprocessing-op-or-punc
+      TOpenBrace {t_str :: String} -- '{'
+    | TCloseBrace {t_str :: String} -- '}'
+    | TOpenBracket {t_str :: String} -- '['
+    | TCloseBracket {t_str :: String} -- ']'
+    | THash {t_str :: String} -- '#'
+    | THashHash {t_str :: String} -- '##'
+    | TOpenParen {t_str :: String} -- '('
+    | TCloseParen {t_str :: String} -- ')'
+    | TLtColon {t_str :: String} -- '<:'
+    | TColonGt {t_str :: String} -- ':>'
+    | TLtPercent {t_str :: String} -- '<%'
+    | TPercentGt {t_str :: String} -- '%>'
+    | TPercentColon {t_str :: String} -- '%:'
+    | TPercentColonTwice {t_str :: String} -- '%:%:'
+    | TSemi {t_str :: String} -- ';'
+    | TColon {t_str :: String} -- ':'
+    | TDotDotDot {t_str :: String} -- '...'
+    | TNew {t_str :: String} -- 'new'
+    | TDelete {t_str :: String} -- 'delete'
+    | TQuestion {t_str :: String} -- '?'
+    | TColonColon {t_str :: String} -- '::'
+    | TDot {t_str :: String} -- '.'
+    | TDotStar {t_str :: String} -- '.*'
+    | TPlus {t_str :: String} -- '+'
+    | TMinus {t_str :: String} -- '-'
+    | TStar {t_str :: String} -- '*'
+    | TSlash {t_str :: String} -- '/'
+    | TPercent {t_str :: String} -- '%'
+    | TUpArrow {t_str :: String} -- '^'
+    | TAmpersand {t_str :: String} -- '&'
+    | TPipe {t_str :: String} -- '|'
+    | TTilde {t_str :: String} -- '~'
+    | TExclamation {t_str :: String} -- '!'
+    | TEqual {t_str :: String} -- '='
+    | TOpenAngle {t_str :: String} -- '<'
+    | TCloseAngle {t_str :: String} -- '>'
+    | TPlusEqual {t_str :: String} -- '+='
+    | TMinusEqual {t_str :: String} -- '-='
+    | TStarEqual {t_str :: String} -- '*='
+    | TSlashEqual {t_str :: String} -- '/='
+    | TPercentEqual {t_str :: String} -- '%='
+    | TUpEqual {t_str :: String} -- '^='
+    | TAmpersandEqual {t_str :: String} -- '&='
+    | TPipeEqual {t_str :: String} -- '|='
+    | TLtLt {t_str :: String} -- '<<'
+    | TGtGt {t_str :: String} -- '>>'
+    | TGtGtEqual {t_str :: String} -- '>>='
+    | TLtLtEqual {t_str :: String} -- '<<='
+    | TEqualEqual {t_str :: String} -- '=='
+    | TExclaimEqual {t_str :: String} -- '!='
+    | TLtEqual {t_str :: String} -- '<='
+    | TGtEqual {t_str :: String} -- '>-'
+    | TAmpersandTwice {t_str :: String} -- '&&'
+    | TPipePipe {t_str :: String} -- '||'
+    | TPlusPlus {t_str :: String} -- '++'
+    | TMinusMinus {t_str :: String} -- '--'
+    | TComma {t_str :: String} -- ','
+    | TMinusGtStar {t_str :: String} -- '->*'
+    | TMinusGt {t_str :: String} -- '->'
+    | TAnd {t_str :: String} -- 'and'
+    | TAndEq {t_str :: String} -- 'and_eq'
+    | TBitand {t_str :: String} -- 'bitand'
+    | TBitor {t_str :: String} -- 'bitor'
+    | TCompl {t_str :: String} -- 'compl'
+    | TNot {t_str :: String} -- 'not'
+    | TNotEq {t_str :: String} -- 'not_eq'
+    | TOr {t_str :: String} -- 'or'
+    | TOrEq {t_str :: String} -- 'or_eq'
+    | TXor {t_str :: String} -- 'xor'
+    | TXorEq {t_str :: String} -- 'xor_eq'
+    -- end of preprocessing-op-or-punc
+    | TString {t_str :: String}
     | TOther {t_str :: String}
-    deriving (Show)
+    deriving (Show, Eq)
 
 -- Actions
 


=====================================
compiler/GHC/Parser/PreProcess/State.hs
=====================================
@@ -1,15 +1,18 @@
 module GHC.Parser.PreProcess.State where
 
-import Data.List.NonEmpty (NonEmpty (..), (<|))
-import Data.List.NonEmpty qualified as NonEmpty
+import Data.List.NonEmpty ((<|))
+import qualified Data.List.NonEmpty as NonEmpty
 import Data.Map (Map)
-import Data.Maybe
-import Data.Map qualified as Map
+import Data.Maybe (isJust)
+import qualified Data.Map as Map
+import GHC.Base
 import GHC.Data.StringBuffer
-import GHC.Parser.Lexer (P (..), PState (..), ParseResult (..), Token (..))
-import GHC.Parser.Lexer qualified as Lexer
-import GHC.Prelude
+import GHC.Parser.Lexer (P (..), PState (..), ParseResult (..))
+import qualified GHC.Parser.Lexer as Lexer
 import GHC.Types.SrcLoc
+import GHC.Parser.PreProcess.ParserM (Token (..))
+
+import GHC.Prelude
 
 -- ---------------------------------------------------------------------
 
@@ -28,20 +31,21 @@ initPpState =
         { pp_includes = Map.empty
         , pp_include_stack = []
         , pp_continuation = []
-        , pp_scope = (PpScope Map.empty True) :| []
+        , pp_defines = Map.empty
+        , pp_scope = (PpScope True) :| []
         }
 
 data PpState = PpState
     { pp_includes :: !(Map String StringBuffer)
     , pp_include_stack :: ![Lexer.AlexInput]
-    , pp_continuation :: ![Located Token]
+    , pp_continuation :: ![Located Lexer.Token]
+    , pp_defines :: !MacroDefines
     , pp_scope :: !(NonEmpty PpScope)
     }
     deriving (Show)
 
 data PpScope = PpScope
-    { pp_defines :: !(Map MacroName MacroDef)
-    , pp_accepting :: !Bool
+    { pp_accepting :: !Bool
     }
     deriving (Show)
 
@@ -49,7 +53,8 @@ data PpScope = PpScope
 
 data CppDirective
     = CppInclude String
-    | CppDefine String String
+    | -- | name, optional args, replacement
+      CppDefine String (Maybe [String]) MacroDef
     | CppIfdef String
     | CppIfndef String
     | CppIf String
@@ -63,7 +68,10 @@ data CppDirective
 type MacroArgs = [String]
 data MacroName = MacroName String (Maybe MacroArgs)
     deriving (Show, Eq, Ord)
-type MacroDef = String
+type MacroDef = [Token]
+
+-- Indexed by name, and then arity
+type MacroDefines = Map String (Map (Maybe Int) ((Maybe MacroArgs), MacroDef))
 
 type Input = String
 type Output = CppDirective
@@ -77,6 +85,7 @@ data Expr
     | Var String
     | IntVal Int
     | Plus Expr Expr
+    | Minus Expr Expr
     | Times Expr Expr
     | Logic LogicOp Expr Expr
     | Comp CompOp Expr Expr
@@ -164,10 +173,10 @@ setAccepting on = do
     setScope (scope{pp_accepting = on})
 
 pushAccepting :: Bool -> PP ()
-pushAccepting on = pushScope (PpScope Map.empty on)
+pushAccepting on = pushScope (PpScope on)
 
 pushAccepting' :: PpState -> Bool -> PpState
-pushAccepting' s on = pushScope' s (PpScope Map.empty on)
+pushAccepting' s on = pushScope' s (PpScope on)
 
 setAccepting' :: PpState -> Bool -> PpState
 setAccepting' s on = setScope' s (scope{pp_accepting = on})
@@ -182,64 +191,59 @@ getAccepting' s = pp_accepting (NonEmpty.head $ pp_scope s)
 
 addDefine :: MacroName -> MacroDef -> PP ()
 addDefine name def = do
-    scope <- getScope
-    setScope (scope{pp_defines = Map.insert name def (pp_defines scope)})
+    accepting <- getAccepting
+    when accepting $ do
+        s <- getPpState
+        setPpState $ addDefine' s name def
 
 addDefine' :: PpState -> MacroName -> MacroDef -> PpState
-addDefine' s name def = r
-  where
-    scope = getScope' s
-    r = setScope' s (scope{pp_defines = Map.insert name def (pp_defines scope)})
+addDefine' s name def =
+    s{pp_defines = insertMacroDef name def (pp_defines s)}
 
 ppDefine :: MacroName -> MacroDef -> PP ()
 ppDefine name val = addDefine name val
 
 ppIsDefined :: MacroName -> PP Bool
 ppIsDefined name = do
-    -- Look up the chain of scopes, until we find one that works, or end
-    let
-        lookup [] = False
-        lookup (h : t) =
-            if Map.member name (pp_defines h)
-                then True
-                else lookup t
-    pp <- getPpState
-    let scopes = NonEmpty.toList (pp_scope pp)
-    return $ lookup scopes
+    s <- getPpState
+    return $ ppIsDefined' s name
 
 ppIsDefined' :: PpState -> MacroName -> Bool
-ppIsDefined' s name = lookup scopes
-  where
-    -- Look up the chain of scopes, until we find one that works, or end
-    lookup [] = False
-    lookup (h : t) =
-        if Map.member name (pp_defines h)
-            then True
-            else lookup t
-    scopes = NonEmpty.toList (pp_scope s)
-
-ppDefinition' :: PpState -> MacroName -> Maybe MacroDef
-ppDefinition' s name = lookup scopes
-  where
-    -- Look up the chain of scopes, until we find one that works, or end
-    lookup [] = Nothing
-    lookup (h : t) =
-        if Map.member name (pp_defines h)
-            then Map.lookup name (pp_defines h)
-            else lookup t
-    scopes = NonEmpty.toList (pp_scope s)
+ppIsDefined' s (MacroName name _args) =
+    isJust $ Map.lookup name (pp_defines s)
+
+ppDefinition' :: PpState -> String -> Maybe (Map (Maybe Int) ((Maybe MacroArgs), MacroDef))
+ppDefinition' s name = Map.lookup name (pp_defines s)
 
 getPpState :: PP PpState
 getPpState = P $ \s -> POk s (pp s)
 
+setPpState :: PpState -> PP ()
+setPpState pp' = P $ \s -> POk s{pp = pp'} ()
+
 -- ---------------------------------------------------------------------
 
-pushContinuation :: Located Token -> PP ()
+pushContinuation :: Located Lexer.Token -> PP ()
 pushContinuation new =
     P $ \s -> POk s{pp = (pp s){pp_continuation = new : pp_continuation (pp s)}} ()
 
-popContinuation :: PP [Located Token]
+popContinuation :: PP [Located Lexer.Token]
 popContinuation =
     P $ \s -> POk s{pp = (pp s){pp_continuation = []}} (pp_continuation (pp s))
 
 -- ---------------------------------------------------------------------
+-- Dealing with MacroDefines
+
+arg_arity :: Maybe [t] -> Maybe Int
+arg_arity args = case args of
+    Nothing -> Nothing
+    Just as -> Just (length as)
+
+insertMacroDef :: MacroName -> MacroDef -> MacroDefines -> MacroDefines
+insertMacroDef (MacroName name args) def md =
+    let arity = arg_arity args
+     in case Map.lookup name md of
+            Nothing -> Map.insert name (Map.singleton arity (args, def)) md
+            Just dm -> Map.insert name (Map.insert arity (args, def) dm) md
+
+-- ---------------------------------------------------------------------


=====================================
utils/check-cpp/ParsePP.hs
=====================================
@@ -21,7 +21,7 @@ import State
 -- First parse to CPP tokens, using a C++-like language spec
 -- https://gcc.gnu.org/onlinedocs/cpp/Tokenization.html
 
--- Parse a CPP directive, using tokens from the CPP lexer
+-- | Parse a CPP directive, using tokens from the CPP lexer
 parseDirective :: String -> Either String CppDirective
 parseDirective s =
     case cppLex s of


=====================================
utils/check-cpp/PreProcess.hs
=====================================
@@ -9,7 +9,7 @@ import GHC.Parser.Errors.Ppr ()
 import qualified GHC.Parser.Lexer as Lexer
 import GHC.Types.SrcLoc
 
-import GHC.Parser.Lexer (P (..), PState (..), ParseResult (..), Token(..))
+import GHC.Parser.Lexer (P (..), PState (..), ParseResult (..), Token (..))
 
 import Macro
 import ParsePP



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/34185e2710b8503ffe53130caa6fc37279d85bf4

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/34185e2710b8503ffe53130caa6fc37279d85bf4
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20250216/36d41540/attachment-0001.html>


More information about the ghc-commits mailing list