[Git][ghc/ghc][wip/az/ghc-cpp] Starting to integrate.

Alan Zimmerman (@alanz) gitlab at gitlab.haskell.org
Wed Sep 27 22:36:17 UTC 2023



Alan Zimmerman pushed to branch wip/az/ghc-cpp at Glasgow Haskell Compiler / GHC


Commits:
8134209e by Alan Zimmerman at 2023-09-27T23:35:41+01:00
Starting to integrate.

Need to get the pragma recognised and set

- - - - -


7 changed files:

- compiler/GHC/Parser/Annotation.hs
- compiler/GHC/Parser/Lexer.x
- compiler/GHC/Parser/PreProcess.hs
- utils/check-cpp/Main.hs
- utils/check-exact/Transform.hs
- utils/check-exact/Utils.hs
- utils/haddock


Changes:

=====================================
compiler/GHC/Parser/Annotation.hs
=====================================
@@ -370,13 +370,7 @@ data EpaCommentTok =
   | EpaDocOptions      String     -- ^ doc options (prune, ignore-exports, etc)
   | EpaLineComment     String     -- ^ comment starting by "--"
   | EpaBlockComment    String     -- ^ comment in {- -}
-  | EpaEofComment                 -- ^ empty comment, capturing
-                                  -- location of EOF
-
-  -- See #19697 for a discussion of EpaEofComment's use and how it
-  -- should be removed in favour of capturing it in the location for
-  -- 'Located HsModule' in the parser.
-
+  | EpaCppIgnored      [PsLocated String] -- ^ Token ignored by the GHC preprocessor
     deriving (Eq, Data, Show)
 -- Note: these are based on the Token versions, but the Token type is
 -- defined in GHC.Parser.Lexer and bringing it in here would create a loop


=====================================
compiler/GHC/Parser/Lexer.x
=====================================
@@ -81,7 +81,7 @@ module GHC.Parser.Lexer (
    adjustChar,
    addPsMessage,
    -- * for integration with the preprocessor
-   lexToken
+   queueIgnoredToken
   ) where
 
 import GHC.Prelude
@@ -3885,6 +3885,18 @@ queueComment c = P $ \s -> POk s {
   comment_q = commentToAnnotation c : comment_q s
   } ()
 
+queueIgnoredToken :: PsLocated Token -> P()
+queueIgnoredToken (L l tok) = do
+  ll <- getLastLocIncludingComments
+  let
+     -- TODO:AZ: make the tok the right type
+     comment = mkLEpaComment (psRealSpan l) ll (EpaCppIgnored [L l (show tok)])
+     push c = P $ \s  -> POk s {
+          comment_q = c : comment_q s
+          } ()
+  push comment
+
+
 allocateComments
   :: RealSrcSpan
   -> [LEpaComment]
@@ -3955,6 +3967,7 @@ commentToAnnotation (L l (ITdocComment s ll))   = mkLEpaComment l ll (EpaDocComm
 commentToAnnotation (L l (ITdocOptions s ll))   = mkLEpaComment l ll (EpaDocOptions s)
 commentToAnnotation (L l (ITlineComment s ll))  = mkLEpaComment l ll (EpaLineComment s)
 commentToAnnotation (L l (ITblockComment s ll)) = mkLEpaComment l ll (EpaBlockComment s)
+commentToAnnotation (L l (ITblockComment s ll)) = mkLEpaComment l ll (EpaBlockComment s)
 commentToAnnotation _                           = panic "commentToAnnotation"
 
 -- see Note [PsSpan in Comments]


=====================================
compiler/GHC/Parser/PreProcess.hs
=====================================
@@ -1,48 +1,159 @@
 -- Implement a subset of CPP, sufficient for conditional compilation
 -- (only)
-
 -- Note: this file formatted with fourmolu
+{-# LANGUAGE BangPatterns #-}
 
 module GHC.Parser.PreProcess (
+    ppLexer,
+    ppLexerDbg,
     lexer,
     lexerDbg,
 ) where
 
-import GHC.Data.FastString
-import GHC.Data.Maybe
-import GHC.Data.OrdList
-import GHC.Data.StringBuffer
-import GHC.Types.Error
-import GHC.Types.Unique.FM
-import GHC.Utils.Error
-import GHC.Utils.Misc (readHexSignificandExponentPair, readSignificandExponentPair)
-import GHC.Utils.Outputable
-import GHC.Utils.Panic
-
-import GHC.Hs.Doc
-import GHC.Types.Basic (InlineSpec (..), RuleMatchInfo (..))
-import GHC.Types.SourceText
-import GHC.Types.SrcLoc
-
-import GHC.Parser.CharClass
-
+-- import Data.List ()
+import qualified Data.Set as Set
 import Debug.Trace (trace)
-import GHC.Driver.Flags
-import GHC.Parser.Annotation
-import GHC.Parser.Errors.Basic
+import qualified GHC.Data.Strict as Strict
 import GHC.Parser.Errors.Ppr ()
-import GHC.Parser.Errors.Types
-import GHC.Parser.Lexer (P (..), Token (..))
+import GHC.Parser.Lexer (P (..), PState (..), ParseResult (..), PpState (..), Token (..))
 import qualified GHC.Parser.Lexer as Lexer
 import GHC.Prelude
+import GHC.Types.SrcLoc
 
 -- ---------------------------------------------------------------------
 
 lexer, lexerDbg :: Bool -> (Located Token -> P a) -> P a
-lexer queueComments cont = do
-    Lexer.lexer queueComments cont
+lexer = ppLexer
+lexerDbg = ppLexerDbg
 
+ppLexer, ppLexerDbg :: Bool -> (Located Token -> P a) -> P a
 -- Use this instead of 'lexer' in GHC.Parser to dump the tokens for debugging.
-lexerDbg queueComments cont = lexer queueComments contDbg
+ppLexerDbg queueComments cont = ppLexer queueComments contDbg
   where
-    contDbg tok = trace ("ptoken: " ++ show (unLoc tok)) (cont tok)
+    contDbg tok = trace ("pptoken: " ++ show (unLoc tok)) (cont tok)
+ppLexer queueComments cont =
+    Lexer.lexer
+        queueComments
+        ( \tk ->
+            let
+                contInner t = (trace ("ppLexer: tk=" ++ show (unLoc tk, unLoc t)) cont) t
+                -- contPush = pushContext (unLoc tk) >> contInner (L lt (ITcppIgnored [tk]))
+                contPush = pushContext (unLoc tk) >> contIgnoreTok tk
+                contIgnoreTok (L l tok) = do
+                    case l of
+                        RealSrcSpan r (Strict.Just b) -> Lexer.queueIgnoredToken (L (PsSpan r b) tok)
+                        _ -> return ()
+                    ppLexer queueComments cont
+             in
+                case tk of
+                    L _ ITcppDefine -> contPush
+                    L _ ITcppIf -> contPush
+                    L _ ITcppIfdef -> contPush
+                    L _ ITcppIfndef -> contPush
+                    L _ ITcppElse -> do
+                        preprocessElse
+                        contIgnoreTok tk
+                    L _ ITcppEndif -> do
+                        preprocessEnd
+                        contIgnoreTok tk
+                    L _ tok -> do
+                        state <- getCppState
+                        case (trace ("CPP state:" ++ show state) state) of
+                            CppIgnoring -> contIgnoreTok tk
+                            CppInDefine -> do
+                                ppDefine (trace ("ppDefine:" ++ show tok) (show tok))
+                                popContext
+                                contIgnoreTok tk
+                            CppInIfdef -> do
+                                defined <- ppIsDefined (show tok)
+                                setAccepting defined
+                                popContext
+                                contIgnoreTok tk
+                            CppInIfndef -> do
+                                defined <- ppIsDefined (show tok)
+                                setAccepting (not defined)
+                                popContext
+                                contIgnoreTok tk
+                            _ -> contInner tk
+        )
+
+preprocessElse :: P ()
+preprocessElse = do
+    accepting <- getAccepting
+    setAccepting (not accepting)
+
+preprocessEnd :: P ()
+preprocessEnd = do
+    -- TODO: nested context
+    setAccepting True
+
+-- ---------------------------------------------------------------------
+-- Preprocessor state functions
+
+data CppState
+    = CppIgnoring
+    | CppInDefine
+    | CppInIfdef
+    | CppInIfndef
+    | CppNormal
+    deriving (Show)
+
+getCppState :: P CppState
+getCppState = do
+    context <- peekContext
+    accepting <- getAccepting
+    case context of
+        ITcppDefine -> return CppInDefine
+        ITcppIfdef -> return CppInIfdef
+        ITcppIfndef -> return CppInIfndef
+        _ ->
+            if accepting
+                then return CppNormal
+                else return CppIgnoring
+
+-- pp_context stack start -----------------
+
+pushContext :: Token -> P ()
+pushContext new =
+    P $ \s -> POk s{pp = (pp s){pp_context = new : pp_context (pp s)}} ()
+
+popContext :: P ()
+popContext =
+    P $ \s ->
+        let
+            new_context = case pp_context (pp s) of
+                [] -> []
+                (_ : t) -> t
+         in
+            POk s{pp = (pp s){pp_context = new_context}} ()
+
+peekContext :: P Token
+peekContext =
+    P $ \s ->
+        let
+            r = case pp_context (pp s) of
+                [] -> ITeof -- Anthing really, for now, except a CPP one
+                (h : _) -> h
+         in
+            POk s r
+
+setAccepting :: Bool -> P ()
+setAccepting on =
+    P $ \s -> POk s{pp = (pp s){pp_accepting = on}} ()
+
+getAccepting :: P Bool
+getAccepting = P $ \s -> POk s (pp_accepting (pp s))
+
+-- pp_context stack end -------------------
+
+-- definitions start --------------------
+
+ppDefine :: String -> P ()
+ppDefine def = P $ \s ->
+    POk s{pp = (pp s){pp_defines = Set.insert def (pp_defines (pp s))}} ()
+
+ppIsDefined :: String -> P Bool
+ppIsDefined def = P $ \s ->
+    POk s (Set.member def (pp_defines (pp s)))
+
+-- definitions end --------------------


=====================================
utils/check-cpp/Main.hs
=====================================
@@ -9,7 +9,7 @@ import Debug.Trace (trace)
 import GHC
 import qualified GHC.Data.EnumSet as EnumSet
 import GHC.Data.FastString
-import GHC.Data.Maybe
+import qualified GHC.Data.Strict as Strict
 import GHC.Data.StringBuffer
 import GHC.Driver.Config.Parser
 import GHC.Driver.Errors.Types
@@ -40,55 +40,61 @@ ppLexer, ppLexerDbg :: Bool -> (Located Token -> P a) -> P a
 ppLexerDbg queueComments cont = ppLexer queueComments contDbg
   where
     contDbg tok = trace ("pptoken: " ++ show (unLoc tok)) (cont tok)
-
 ppLexer queueComments cont =
     Lexer.lexer
         queueComments
-        ( \tk@(L lt _) ->
+        ( \tk ->
             let
                 contInner t = (trace ("ppLexer: tk=" ++ show (unLoc tk, unLoc t)) cont) t
-                contPush = pushContext (unLoc tk) >> contInner (L lt (ITcppIgnored [tk]))
+                -- contPush = pushContext (unLoc tk) >> contInner (L lt (ITcppIgnored [tk]))
+                contPush = pushContext (unLoc tk) >> contIgnoreTok tk
+                contIgnoreTok (L l tok) = do
+                    case l of
+                        RealSrcSpan r (Strict.Just b) -> Lexer.queueIgnoredToken (L (PsSpan r b) tok)
+                        _ -> return ()
+                    ppLexer queueComments cont
              in
                 case tk of
                     L _ ITcppDefine -> contPush
                     L _ ITcppIf -> contPush
                     L _ ITcppIfdef -> contPush
+                    L _ ITcppIfndef -> contPush
                     L _ ITcppElse -> do
-                        tk' <- preprocessElse tk
-                        contInner tk'
+                        preprocessElse
+                        contIgnoreTok tk
                     L _ ITcppEndif -> do
-                        tk' <- preprocessEnd tk
-                        contInner tk'
-                    L l tok -> do
+                        preprocessEnd
+                        contIgnoreTok tk
+                    L _ tok -> do
                         state <- getCppState
                         case (trace ("CPP state:" ++ show state) state) of
-                            CppIgnoring -> contInner (L l (ITcppIgnored [tk]))
+                            CppIgnoring -> contIgnoreTok tk
                             CppInDefine -> do
                                 ppDefine (trace ("ppDefine:" ++ show tok) (show tok))
                                 popContext
-                                contInner (L l (ITcppIgnored [tk]))
+                                contIgnoreTok tk
                             CppInIfdef -> do
                                 defined <- ppIsDefined (show tok)
-                                if defined
-                                    then setAccepting True
-                                    else setAccepting False
+                                setAccepting defined
                                 popContext
-                                contInner (L l (ITcppIgnored [tk]))
+                                contIgnoreTok tk
+                            CppInIfndef -> do
+                                defined <- ppIsDefined (show tok)
+                                setAccepting (not defined)
+                                popContext
+                                contIgnoreTok tk
                             _ -> contInner tk
         )
 
-
-preprocessElse :: Located Token -> P (Located Token)
-preprocessElse tok@(L l _) = do
+preprocessElse :: P ()
+preprocessElse = do
     accepting <- getAccepting
     setAccepting (not accepting)
-    return (L l (ITcppIgnored [tok]))
 
-preprocessEnd :: Located Token -> P (Located Token)
-preprocessEnd tok@(L l _) = do
+preprocessEnd :: P ()
+preprocessEnd = do
     -- TODO: nested context
     setAccepting True
-    return (L l (ITcppIgnored [tok]))
 
 -- ---------------------------------------------------------------------
 -- Preprocessor state functions
@@ -97,6 +103,7 @@ data CppState
     = CppIgnoring
     | CppInDefine
     | CppInIfdef
+    | CppInIfndef
     | CppNormal
     deriving (Show)
 
@@ -107,6 +114,7 @@ getCppState = do
     case context of
         ITcppDefine -> return CppInDefine
         ITcppIfdef -> return CppInIfdef
+        ITcppIfndef -> return CppInIfndef
         _ ->
             if accepting
                 then return CppNormal
@@ -147,34 +155,6 @@ getAccepting = P $ \s -> POk s (pp_accepting (pp s))
 
 -- pp_context stack end -------------------
 
--- pp_pushed_back token start --------------
-
-pushBack :: Located Token -> P ()
-pushBack tok = P $ \s ->
-    if isJust (pp_pushed_back (pp s))
-        then
-            PFailed
-                $ s
-        else -- { errors =
-        --     ("pushBack: " ++ show tok ++ ", we already have a token:" ++ show (pp_pushed_back (pp s)))
-        --         : errors s
-        -- }
-
-            let
-                ppVal = pp s
-                pp' = ppVal{pp_pushed_back = Just tok}
-                s' = s{pp = pp'}
-             in
-                POk s' ()
-
--- | Destructive read of the pp_pushed back token (if any)
-getPushBack :: P (Maybe (Located Token))
-getPushBack = P $ \s ->
-    POk s{pp = (pp s){pp_pushed_back = Nothing}} (pp_pushed_back (pp s))
-
-
--- pp_pushed_back token end ----------------
-
 -- definitions start --------------------
 
 ppDefine :: String -> P ()
@@ -387,3 +367,15 @@ t1 = do
         [ "data X = X"
         , ""
         ]
+
+t2 :: IO ()
+t2 = do
+    doTest
+        [ "#define FOO"
+        , "#ifndef FOO"
+        , "x = 1"
+        , "#else"
+        , "x = 5"
+        , "#endif"
+        , ""
+        ]


=====================================
utils/check-exact/Transform.hs
=====================================
@@ -676,9 +676,10 @@ commentOrigDelta (L (GHC.Anchor la _) (GHC.EpaComment t pp))
                -- then MovedAnchor (ss2delta (r,c+0) la)
                -- else MovedAnchor (ss2delta (r,c)   la)
                else MovedAnchor (tweakDelta $ ss2delta (r,c)   la)
-        op = if t == EpaEofComment && op' == MovedAnchor (SameLine 0)
-               then MovedAnchor (DifferentLine 1 0)
-               else op'
+        -- op = if t == EpaEofComment && op' == MovedAnchor (SameLine 0)
+        --        then MovedAnchor (DifferentLine 1 0)
+        --        else op'
+        op = op'
 
 -- ---------------------------------------------------------------------
 


=====================================
utils/check-exact/Utils.hs
=====================================
@@ -235,7 +235,8 @@ ghcCommentText (L _ (GHC.EpaComment (EpaDocComment s) _))      = exactPrintHsDoc
 ghcCommentText (L _ (GHC.EpaComment (EpaDocOptions s) _))      = s
 ghcCommentText (L _ (GHC.EpaComment (EpaLineComment s) _))     = s
 ghcCommentText (L _ (GHC.EpaComment (EpaBlockComment s) _))    = s
-ghcCommentText (L _ (GHC.EpaComment (EpaEofComment) _))        = ""
+ghcCommentText (L _ (GHC.EpaComment (EpaCppIgnored [L _ s]) _))= s
+ghcCommentText (L _ (GHC.EpaComment (EpaCppIgnored _) _))      = ""
 
 tokComment :: LEpaComment -> Comment
 tokComment t@(L lt c) = mkComment (normaliseCommentText $ ghcCommentText t) lt (ac_prior_tok c)
@@ -250,7 +251,7 @@ comment2LEpaComment :: Comment -> LEpaComment
 comment2LEpaComment (Comment s anc r _mk) = mkLEpaComment s anc r
 
 mkLEpaComment :: String -> Anchor -> RealSrcSpan -> LEpaComment
-mkLEpaComment "" anc r = (L anc (GHC.EpaComment (EpaEofComment) r))
+mkLEpaComment "" anc r = (L anc (GHC.EpaComment (EpaCppIgnored []) r))
 mkLEpaComment s anc r = (L anc (GHC.EpaComment (EpaLineComment s) r))
 
 mkComment :: String -> Anchor -> RealSrcSpan -> Comment


=====================================
utils/haddock
=====================================
@@ -1 +1 @@
-Subproject commit 1130973f07aecc37a37943f4b1cc529aabd15e61
+Subproject commit 267207c66495388c76297f9bd3f57454c021b9a9



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/8134209e4092556edb7b0d324157db0ea5a468af

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/8134209e4092556edb7b0d324157db0ea5a468af
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20230927/bd76afc6/attachment-0001.html>


More information about the ghc-commits mailing list