[Git][ghc/ghc][wip/az/ghc-cpp] 3 commits: Simplify Lexer interface. Only ITcpp

Alan Zimmerman (@alanz) gitlab at gitlab.haskell.org
Mon Oct 2 20:29:12 UTC 2023



Alan Zimmerman pushed to branch wip/az/ghc-cpp at Glasgow Haskell Compiler / GHC


Commits:
347968a7 by Alan Zimmerman at 2023-10-01T22:49:36+01:00
Simplify Lexer interface. Only ITcpp

We transfer directive lines through it, then parse them from scratch
in the preprocessor.

- - - - -
e75306cd by Alan Zimmerman at 2023-10-02T21:28:04+01:00
Deal with directive on last line, with no trailing \n

- - - - -
54c33324 by Alan Zimmerman at 2023-10-02T21:28:42+01:00
Start parsing and processing the directives

- - - - -


3 changed files:

- compiler/GHC/Parser/Lexer.x
- compiler/GHC/Parser/PreProcess.hs
- utils/check-cpp/Main.hs


Changes:

=====================================
compiler/GHC/Parser/Lexer.x
=====================================
@@ -303,6 +303,7 @@ $unigraphic / { isSmartQuote } { smart_quote_error }
   \n                                    ;
   -- Ghc CPP symbols
   ^\# \ * @cppkeyword  .* \n / { ifExtension GhcCppBit } { cppToken cpp_prag }
+  ^\# \ * @cppkeyword  .*    / { ifExtension GhcCppBit } { cppToken cpp_prag }
 
   ^\# line                              { begin line_prag1 }
   ^\# / { followedByDigit }             { begin line_prag1 }
@@ -320,6 +321,7 @@ $unigraphic / { isSmartQuote } { smart_quote_error }
         -- we might encounter {-# here, but {- has been handled already
   \n                                    ;
   ^\# \ * @cppkeyword  .* \n / { ifExtension GhcCppBit } { cppToken cpp_prag }
+  ^\# \ * @cppkeyword  .*    / { ifExtension GhcCppBit } { cppToken cpp_prag }
 
   ^\# (line)?                           { begin line_prag1 }
 }
@@ -346,8 +348,10 @@ $unigraphic / { isSmartQuote } { smart_quote_error }
 
 -- CPP continuation lines. Keep concatenating, or exit
 <cpp_prag> {
-  .* \\ \n                   { cppTokenCont (ITcppContinue True) }
-  .* \n                      { cppTokenPop  (ITcppContinue False) }
+  .* \\ \n                   { cppTokenCont (ITcpp True) }
+  .* \\                      { cppTokenCont (ITcpp True) }
+  -- .* \n                      { cppTokenPop  (ITcpp False) }
+  .*                       { cppTokenPop  (ITcpp False) }
   -- () { popCpp }
 }
 
@@ -1029,16 +1033,27 @@ data Token
   | ITlineComment  String      PsSpan -- ^ comment starting by "--"
   | ITblockComment String      PsSpan -- ^ comment in {- -}
 
-  -- GHC CPP extension. Each contains an entire line of source code,
-  -- possibly joining up ones ending in backslash
-  | ITcppStart    Bool FastString   -- ^ Start of a CPP #-prefixed line. Flag for continuation
-  | ITcppContinue Bool FastString   -- ^ Continuation after a trailing backslash. Flag for continuation
-
+  -- GHC CPP extension. See Note [GhcCPP Token]
+  | ITcpp Bool FastString   -- ^ CPP #-prefixed line, or continuation.
   deriving Show
 
 instance Outputable Token where
   ppr x = text (show x)
 
+{- Note [GhcCPP Token]
+~~~~~~~~~~~~~~~~~~~~~~
+We only invoke the Ghc CPP processing on lines beginning with a '#'
+and one of the keywords in @cppkeyword.
+
+These directives can finish on a trailing slash, which signals a
+continuation onto the next line.
+
+When lexing, we detect the start of the directive, and put the line
+into a ITcpp token, with a flag indicating if it ends with a
+continuation. Subsequent continued lines are treated the same way,
+until the final ITcpp token with the flag set False.
+-}
+
 {- Note [PsSpan in Comments]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 When using the Api Annotations to exact print a modified AST, managing
@@ -1273,20 +1288,30 @@ cppToken code span buf len _buf2 =
      let tokStr = lexemeToFastString buf len
      -- check if the string ends with backslash and newline
      -- NOTE: performance likely sucks, make it work for now
-     continue <- case (reverse $ unpackFS tokStr) of
-        -- ('\n':'\\':_) -> pushLexState code >> return True
-        ('\n':'\\':_) -> pushLexState (trace ("cppToken: push state") code) >> return True
-        _ -> return False
-     return (L span (ITcppStart continue $! tokStr))
+     (len0, continue) <- case (reverse $ unpackFS tokStr) of
+        -- ('\n':'\\':_) -> pushLexState code >> return (len -2, True)
+        ('\n':'\\':_) -> pushLexState (trace ("cppToken: push state") code) >> return (len - 2, True)
+        ('\n':_) -> return (len - 1, False)
+        _ -> return (len, False)
+     return (L span (ITcpp continue $! lexemeToFastString buf len0))
      -- trace ("cppToken:" ++ show (code, t)) $ do return (L span t)
 
 cppTokenCont :: (FastString -> Token)-> Action
-cppTokenCont t span buf len _buf2 = return (L span (t $! lexemeToFastString buf len))
+cppTokenCont code span buf len _buf2 =
+  do
+     let tokStr = lexemeToFastString buf len
+     -- check if the string ends with backslash and newline
+     -- NOTE: performance likely sucks, make it work for now
+     (len0, continue) <- case (reverse $ unpackFS tokStr) of
+        ('\n':'\\':_) -> return (len - 2, True)
+        ('\n':_) -> return (len - 1, False)
+        _ -> return (len, False)
+     return (L span (ITcpp continue $! lexemeToFastString buf len0))
 
 cppTokenPop :: (FastString -> Token)-> Action
 cppTokenPop t span buf len _buf2 =
   do _ <- popLexState
-     -- return (L span (t $! lexemeToFastString buf len))
+     -- return (L span (t $! lexemeToFastString buf (len - 1)))
      return (L span (t $! lexemeToFastString buf (trace "cppTokenPop" len)))
 
 popCpp :: Action
@@ -2761,7 +2786,7 @@ data PState = PState {
 -- | Use for emulating (limited) CPP preprocessing in GHC.
 -- TODO: move this into PreProcess, and make a param on PState
 data PpState = PpState {
-        pp_defines :: !(Set String),
+        pp_defines :: !(Map String [String]),
         pp_continuation :: ![Located Token],
         -- pp_context :: ![PpContext],
         pp_context :: ![Token], -- What preprocessor directive we are currently processing
@@ -2774,7 +2799,7 @@ data PpContext = PpContextIf [Located Token]
 
 initPpState :: PpState
 initPpState = PpState
-   { pp_defines = Set.empty
+   { pp_defines = Map.empty
    , pp_continuation = []
    , pp_context = []
    , pp_accepting = True


=====================================
compiler/GHC/Parser/PreProcess.hs
=====================================
@@ -152,15 +152,15 @@ getAccepting = P $ \s -> POk s (pp_accepting (pp s))
 
 -- definitions start --------------------
 
-ppDefine :: FastString -> P ()
-ppDefine def = P $ \s ->
-    -- POk s{pp = (pp s){pp_defines = Set.insert (cleanTokenString def) (pp_defines (pp s))}} ()
-    POk s{pp = (pp s){pp_defines = Set.insert (trace ("ppDefine:def=[" ++ show (cleanTokenString def) ++ "]") (cleanTokenString def)) (pp_defines (pp s))}} ()
-
-ppIsDefined :: FastString -> P Bool
-ppIsDefined def = P $ \s ->
-    -- POk s (Set.member def (pp_defines (pp s)))
-    POk s (Set.member (trace ("ppIsDefined:def=[" ++ show (cleanTokenString def) ++ "]") (cleanTokenString def)) (pp_defines (pp s)))
+-- ppDefine :: FastString -> P ()
+-- ppDefine def = P $ \s ->
+--     -- POk s{pp = (pp s){pp_defines = Set.insert (cleanTokenString def) (pp_defines (pp s))}} ()
+--     POk s{pp = (pp s){pp_defines = Set.insert (trace ("ppDefine:def=[" ++ show (cleanTokenString def) ++ "]") (cleanTokenString def)) (pp_defines (pp s))}} ()
+
+-- ppIsDefined :: FastString -> P Bool
+-- ppIsDefined def = P $ \s ->
+--     -- POk s (Set.member def (pp_defines (pp s)))
+--     POk s (Set.member (trace ("ppIsDefined:def=[" ++ show (cleanTokenString def) ++ "]") (cleanTokenString def)) (pp_defines (pp s)))
 
 -- | Take a @FastString@ of the form "#define FOO\n" and strip off all but "FOO"
 cleanTokenString :: FastString -> String


=====================================
utils/check-cpp/Main.hs
=====================================
@@ -5,7 +5,7 @@ import Control.Monad.IO.Class
 import Data.Char
 import Data.Data hiding (Fixity)
 import Data.List
-import qualified Data.Set as Set
+import qualified Data.Map as Map
 import Debug.Trace
 import GHC
 import qualified GHC.Data.EnumSet as EnumSet
@@ -60,7 +60,7 @@ ppLexer queueComments cont =
         ( \tk ->
             let
                 contInner t = (trace ("ppLexer: tk=" ++ show (unLoc tk, unLoc t)) cont) t
-                contPush = pushContext (unLoc tk) >> contIgnoreTok tk
+                -- contPush = pushContext (unLoc tk) >> contIgnoreTok tk
                 contIgnoreTok (L l tok) = do
                     case l of
                         RealSrcSpan r (Strict.Just b) -> Lexer.queueIgnoredToken (L (PsSpan r b) tok)
@@ -69,44 +69,12 @@ ppLexer queueComments cont =
              in
                 -- case tk of
                 case (trace ("M.ppLexer:tk=" ++ show (unLoc tk)) tk) of
-                    L _ (ITcppStart continuation s) -> do
+                    L _ ITeof -> contInner tk
+                    L _ (ITcpp continuation s) -> do
                         if continuation
-                            then do
-                                pushContinuation tk
-                                contIgnoreTok tk
-                            else do
-                                processCppToks s
-                                contIgnoreTok tk
-                    L _ (ITcppContinue continuation s) -> do
-                        if continuation
-                            then do
-                                pushContinuation tk
-                                contIgnoreTok tk
-                            else do
-                                processCppToks s
-                                contIgnoreTok tk
-                    -- L _ (ITcppDefine s) -> do
-                    --     -- ppDefine (trace ("ppDefine:" ++ show s) s)
-                    --     ppDefine s
-                    --     popContext
-                    --     contIgnoreTok tk
-                    -- L _ (ITcppIf _) -> contPush
-                    -- L _ (ITcppIfdef s) -> do
-                    --     defined <- ppIsDefined s
-                    --     -- setAccepting defined
-                    --     setAccepting (trace ("ifdef:" ++ show (s, defined)) defined)
-                    --     contIgnoreTok tk
-                    -- L _ (ITcppIfndef s) -> do
-                    --     defined <- ppIsDefined s
-                    --     -- setAccepting (not defined)
-                    --     setAccepting (trace ("ifdef:" ++ show (s, defined)) (not defined))
-                    --     contIgnoreTok tk
-                    -- L _ ITcppElse -> do
-                    --     preprocessElse
-                    --     contIgnoreTok tk
-                    -- L _ ITcppEndif -> do
-                    --     preprocessEnd
-                    --     contIgnoreTok tk
+                            then pushContinuation tk
+                            else processCppToks s
+                        contIgnoreTok tk
                     _ -> do
                         state <- getCppState
                         case (trace ("CPP state:" ++ show state) state) of
@@ -126,10 +94,8 @@ preprocessEnd = do
 
 processCppToks :: FastString -> P ()
 processCppToks fs = do
-    let str = unpackFS fs
     let
-        get (L _ (ITcppStart _ s)) = s
-        get (L _ (ITcppContinue _ s)) = s
+        get (L _ (ITcpp _ s)) = s
         get _ = error "should not"
     -- Combine any prior continuation tokens
     cs <- popContinuation
@@ -138,32 +104,44 @@ processCppToks fs = do
 
 processCpp :: [FastString] -> P ()
 processCpp fs = do
-    traceM $ "processCpp: fs=" ++ show fs
-    return ()
+    -- traceM $ "processCpp: fs=" ++ show fs
+    -- let s = cppInitial fs
+    let s = cppInitial fs
+    case regularParse cppDirective s of
+      Left err -> error $ show err
+      Right (CppDefine name def) -> do
+        ppDefine name def
+      Right (CppIfdef name) -> do
+        defined <- ppIsDefined name
+        setAccepting defined
+      Right (CppIfndef name) -> do
+        defined <- ppIsDefined name
+        setAccepting (not defined)
+      Right CppElse -> do
+        accepting <- getAccepting
+        setAccepting (not accepting)
+        return ()
+      Right CppEndif -> do
+        -- TODO: nested states
+        setAccepting True
+        return ()
+
+    return (trace ("processCpp:s=" ++ show s) ())
 
 -- ---------------------------------------------------------------------
 -- Preprocessor state functions
 
 data CppState
     = CppIgnoring
-    | CppInDefine
-    | CppInIfdef
-    | CppInIfndef
     | CppNormal
     deriving (Show)
 
 getCppState :: P CppState
 getCppState = do
-    context <- peekContext
     accepting <- getAccepting
-    case context of
-        -- ITcppDefine _ -> return CppInDefine
-        -- ITcppIfdef _ -> return CppInIfdef
-        -- ITcppIfndef _ -> return CppInIfndef
-        _ ->
-            if accepting
-                then return CppNormal
-                else return CppIgnoring
+    if accepting
+        then return CppNormal
+        else return CppIgnoring
 
 -- pp_context stack start -----------------
 
@@ -212,15 +190,15 @@ popContinuation =
 
 -- definitions start --------------------
 
-ppDefine :: FastString -> P ()
-ppDefine def = P $ \s ->
+ppDefine :: String -> [String] -> P ()
+ppDefine name val = P $ \s ->
     -- POk s{pp = (pp s){pp_defines = Set.insert (cleanTokenString def) (pp_defines (pp s))}} ()
-    POk s{pp = (pp s){pp_defines = Set.insert (trace ("ppDefine:def=[" ++ show (cleanTokenString def) ++ "]") (cleanTokenString def)) (pp_defines (pp s))}} ()
+    POk s{pp = (pp s){pp_defines = Map.insert (trace ("ppDefine:def=[" ++ name ++ "]") name) val (pp_defines (pp s))}} ()
 
-ppIsDefined :: FastString -> P Bool
+ppIsDefined :: String -> P Bool
 ppIsDefined def = P $ \s ->
-    -- POk s (Set.member (cleanTokenString def) (pp_defines (pp s)))
-    POk s (Set.member (trace ("ppIsDefined:def=[" ++ show (cleanTokenString def) ++ "]") (cleanTokenString def)) (pp_defines (pp s)))
+    -- POk s (Map.member def (pp_defines (pp s)))
+    POk s (Map.member (trace ("ppIsDefined:def=[" ++ def ++ "]") def) (pp_defines (pp s)))
 
 -- | Take a @FastString@ of the form "#define FOO\n" and strip off all but "FOO"
 cleanTokenString :: FastString -> String
@@ -229,10 +207,13 @@ cleanTokenString fs = r
     ss = dropWhile (\c -> not $ isSpace c) (unpackFS fs)
     r = init ss
 
-parseDefine :: FastString -> Maybe (String, String)
-parseDefine s = r
+parseDefine :: FastString -> Maybe (String, [String])
+parseDefine fs = r
   where
-    r = Just (cleanTokenString s, "")
+    -- r = Just (cleanTokenString s, "")
+    r = case regularParse cppDefinition (unpackFS fs) of
+        Left _ -> Nothing
+        Right v -> Just v
 
 -- =====================================================================
 -- Parsec parsing
@@ -241,6 +222,63 @@ type CppParser = Parsec String ()
 regularParse :: Parser a -> String -> Either Parsec.ParseError a
 regularParse p = PS.parse p ""
 
+
+-- TODO: delete this
+cppDefinition :: CppParser (String, [String])
+cppDefinition = do
+    _ <- PS.char '#'
+    _ <- whiteSpace
+    _ <- lexeme (PS.string "define")
+    name <- cppToken
+    definition <- cppTokens
+    return (name, definition)
+
+data CppDirective
+  = CppDefine String [String]
+  | CppIfdef String
+  | CppIfndef String
+  | CppElse
+  | CppEndif
+    deriving (Show, Eq)
+
+cppDirective :: CppParser CppDirective
+cppDirective = do
+    _ <- PS.char '#'
+    _ <- whiteSpace
+    choice
+        [ cppKw "define" >> cmdDefinition
+        -- , cppKw "include" CppIncludeKw
+        -- , cppKw "undef" CppUndefKw
+        -- , cppKw "error" CppErrorKw
+        , try$ cppKw "ifdef" >> cmdIfdef
+        , cppKw "ifndef" >> cmdIfndef
+        -- , cppKw "if" CppIfKw
+        -- , cppKw "elif" CppElifKw
+        , try $ cppKw "else" >> return CppElse
+        , cppKw "endif" >> return CppEndif
+        ]
+
+cmdDefinition :: CppParser CppDirective
+cmdDefinition = do
+      name <- cppToken
+      definition <- cppTokens
+      return $ CppDefine name definition
+
+cmdIfdef :: CppParser CppDirective
+cmdIfdef = do
+      name <- cppToken
+      return $ CppIfdef name
+
+cmdIfndef :: CppParser CppDirective
+cmdIfndef = do
+      name <- cppToken
+      return $ CppIfndef name
+
+cppKw :: String -> CppParser ()
+cppKw kw = do
+    _ <- lexeme (PS.string kw)
+    return ()
+
 cppComment :: CppParser ()
 cppComment = do
     _ <- PS.string "/*"
@@ -258,13 +296,17 @@ lexeme p = p <* whiteSpace
 cppToken :: CppParser String
 cppToken = lexeme (PS.many1 (PS.satisfy (\c -> not (isSpace c))))
 
+cppTokens :: CppParser [String]
+cppTokens = PS.many cppToken
+
 {- | Do cpp initial processing, as per https://gcc.gnu.org/onlinedocs/cpp/Initial-processing.html
 See Note [GhcCPP Initial Processing]
 -}
-cppInitial :: FastString -> String
+cppInitial :: [FastString] -> String
 cppInitial fs = r
   where
-    r = unpackFS fs
+    -- go fs' = reverse $ drop 2 $ reverse $ unpackFS fs'
+    r = concatMap unpackFS fs
 
 {-
 Note [GhcCPP Initial Processing]
@@ -491,7 +533,6 @@ t2 = do
         , "#else"
         , "x = 5"
         , "#endif"
-        , ""
         ]
 
 t3 :: IO ()
@@ -573,7 +614,14 @@ t6 = do
         , "#else"
         , "x = \"no version\""
         , "#endif"
+        , ""
         ]
 
-t7 :: Maybe (String, String)
-t7 = parseDefine (mkFastString "#define VERSION_ghc_exactprint \"1.7.0.1\"\n")
+t7 :: Maybe (String, [String])
+t7 = parseDefine (mkFastString "#define VERSION_ghc_exactprint \"1.7.0.1\"")
+
+t8 :: Maybe (String, [String])
+t8 = parseDefine (mkFastString "#define MIN_VERSION_ghc_exactprint(major1,major2,minor) (  (major1) <  1 ||   (major1) == 1 && (major2) <  7 ||   (major1) == 1 && (major2) == 7 && (minor) <= 0)")
+
+t9 :: Either Parsec.ParseError CppDirective
+t9 = regularParse cppDirective "#define VERSION_ghc_exactprint \"1.7.0.1\""



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/89aaf2869980802922f676c0d3f391d0db568b17...54c33324972d6bc3ccd48426a40f5492809d3c1b

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/89aaf2869980802922f676c0d3f391d0db568b17...54c33324972d6bc3ccd48426a40f5492809d3c1b
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20231002/48e34a61/attachment-0001.html>


More information about the ghc-commits mailing list