[Git][ghc/ghc][wip/az/ghc-cpp] First implementation of dumpGhcCpp.

Alan Zimmerman (@alanz) gitlab at gitlab.haskell.org
Thu Feb 20 20:49:13 UTC 2025



Alan Zimmerman pushed to branch wip/az/ghc-cpp at Glasgow Haskell Compiler / GHC


Commits:
8ea872af by Alan Zimmerman at 2025-02-20T20:47:18+00:00
First implementation of dumpGhcCpp.

Example output

First dumps all macros in the state, then the source, showing which
lines are in and which are out

------------------------------

- |#define FOO(A,B) A + B
- |#define FOO(A,B,C) A + B + C
- |#if FOO(1,FOO(3,4)) == 8
- |-- a comment
  |x = 1
- |#else
- |x = 5
- |#endif

- - - - -


3 changed files:

- compiler/GHC/Parser/Lexer.x
- utils/check-cpp/Main.hs
- utils/check-cpp/PreProcess.hs


Changes:

=====================================
compiler/GHC/Parser/Lexer.x
=====================================
@@ -325,7 +325,6 @@ $unigraphic / { isSmartQuote } { smart_quote_error }
 <bol> {
   \n                                    ;
   -- Ghc CPP symbols
-  ^\# \ * @cppkeyword  .* \n / { ifExtension GhcCppBit } { cppToken cpp_prag }
   ^\# \ * @cppkeyword  .*    / { ifExtension GhcCppBit } { cppToken cpp_prag }
 
   ^\# line                              { begin line_prag1 }
@@ -343,7 +342,6 @@ $unigraphic / { isSmartQuote } { smart_quote_error }
   \{ / { notFollowedBy '-' }            { hopefully_open_brace }
         -- we might encounter {-# here, but {- has been handled already
   \n                                    ;
-  ^\# \ * @cppkeyword  .* \n / { ifExtension GhcCppBit } { cppToken cpp_prag }
   ^\# \ * @cppkeyword  .*    / { ifExtension GhcCppBit } { cppToken cpp_prag }
 
   ^\# (line)?                           { begin line_prag1 }
@@ -371,9 +369,7 @@ $unigraphic / { isSmartQuote } { smart_quote_error }
 
 -- CPP continuation lines. Keep concatenating, or exit
 <cpp_prag> {
-  .* \\ \n                   { cppTokenCont (ITcpp True) }
   .* \\                      { cppTokenCont (ITcpp True) }
-  -- .* \n                      { cppTokenPop  (ITcpp False) }
   .*                       { cppTokenPop  (ITcpp False) }
   -- () { popCpp }
 }


=====================================
utils/check-cpp/Main.hs
=====================================
@@ -7,7 +7,6 @@ import Data.List
 import Data.List.NonEmpty (NonEmpty (..))
 import qualified Data.Map as Map
 import Data.Maybe
-import Debug.Trace
 import GHC
 import qualified GHC.Data.EnumSet as EnumSet
 import GHC.Data.FastString
@@ -20,15 +19,13 @@ import qualified GHC.Driver.Session as GHC
 import GHC.Hs.Dump
 import qualified GHC.LanguageExtensions as LangExt
 import GHC.Parser.Errors.Ppr ()
-import GHC.Parser.Lexer (P (..), ParseResult (..), Token (..))
+import GHC.Parser.Lexer (P (..), ParseResult (..))
 import qualified GHC.Parser.Lexer as GHC
 import qualified GHC.Parser.Lexer as Lexer
 import GHC.SysTools.Cpp
-import GHC.Types.Error
 import GHC.Types.SrcLoc
 import GHC.Unit.Env
 import GHC.Unit.State
-import GHC.Utils.Error
 import GHC.Utils.Outputable
 
 -- Local simulation -----------------
@@ -74,12 +71,12 @@ parseString libdir includes str = ghcWrapper libdir $ do
     -- return $ strParserWrapper str dflags "fake_test_file.hs"
     return $ snd $ strGetToks dflags includes pflags "fake_test_file.hs" str
 
--- doDump :: LibDir -> String -> IO [Located Token]
+doDump :: LibDir -> String -> IO ()
 doDump libdir str = ghcWrapper libdir $ do
     dflags0 <- initDynFlags
     let dflags = dflags0{extensionFlags = EnumSet.insert LangExt.GhcCpp (extensionFlags dflags0)}
     let pflags = initParserOpts dflags
-    hsc <- getSession
+    -- hsc <- getSession
     liftIO $ putStrLn "-- parsing ----------"
     liftIO $ putStrLn str
     liftIO $ putStrLn "---------------------"
@@ -89,7 +86,6 @@ doDump libdir str = ghcWrapper libdir $ do
     liftIO $ putStrLn $ showPprUnsafe $ dumpGhcCpp pst
     liftIO $ putStrLn "---------------------"
 
-
 -- return $ strGetToks dflags includes pflags "fake_test_file.hs" str
 
 unitPackages :: UnitEnv -> [UnitInfo]
@@ -108,25 +104,19 @@ strGetToks ::
     (Lexer.PState PpState, [Located Token])
 strGetToks dflags includes popts filename str = (final, reverse toks)
   where
-    includeMap = Map.fromList $ map (\(k, v) -> (k, stringToStringBuffer (intercalate "\n" v))) includes
-    initState =
-        initPpState
-            { pp_includes = includeMap
-            , pp_defines = predefinedMacros dflags
-            , pp_scope = (PpScope True) :| []
-            }
-    pstate = Lexer.initParserState initState popts buf loc
-    loc = mkRealSrcLoc (mkFastString filename) 1 1
-    buf = stringToStringBuffer str
-    -- cpp_enabled = Lexer.GhcCppBit `Lexer.xtest` Lexer.pExtsBitmap popts
-
-    lexAll :: Lexer.PState PpState -> (Lexer.PState PpState, [Located Token])
-    lexAll state = case unP (PP.lexerDbg True return) state of
-        POk s t@(L _ ITeof) -> (s, [t])
-        POk state' t -> (ss, t : rest)
-          where
-            (ss, rest) = lexAll state'
-        PFailed pst -> error $ "failed" ++ showErrorMessages (GHC.GhcPsMessage <$> GHC.getPsErrorMessages pst)
+    -- includeMap = Map.fromList $ map (\(k, v) -> (k, stringToStringBuffer (intercalate "\n" v))) includes
+    -- initState =
+    --     initPpState
+    --         { pp_includes = includeMap
+    --         , pp_defines = predefinedMacros dflags
+    --         , pp_scope = (PpScope True) :| []
+    --         }
+    -- pstate = Lexer.initParserState initState popts buf loc
+    -- loc = mkRealSrcLoc (mkFastString filename) 1 1
+    -- buf = stringToStringBuffer str
+    -- -- cpp_enabled = Lexer.GhcCppBit `Lexer.xtest` Lexer.pExtsBitmap popts
+
+    pstate = getPState dflags includes popts filename str
     (final, toks) = lexAll pstate
 
 getPState ::
@@ -148,16 +138,6 @@ getPState dflags includes popts filename str = pstate
     pstate = Lexer.initParserState initState popts buf loc
     loc = mkRealSrcLoc (mkFastString filename) 1 1
     buf = stringToStringBuffer str
-    -- cpp_enabled = Lexer.GhcCppBit `Lexer.xtest` Lexer.pExtsBitmap popts
-
-    -- lexAll :: Lexer.PState PpState -> (Lexer.PState PpState, [Located Token])
-    -- lexAll state = case unP (PP.lexerDbg True return) state of
-    --     POk s t@(L _ ITeof) -> (s, [t])
-    --     POk state' t -> (ss, t : rest)
-    --       where
-    --         (ss, rest) = lexAll state'
-    --     PFailed pst -> error $ "failed" ++ showErrorMessages (GHC.GhcPsMessage <$> GHC.getPsErrorMessages pst)
-    -- (final, toks) = lexAll pstate
 
 -- ---------------------------------------------------------------------
 
@@ -167,15 +147,15 @@ parseWith ::
     P PpState w ->
     String ->
     Either GHC.ErrorMessages (Lexer.PState PpState, w)
-parseWith dflags fileName parser s =
-    case runParser parser dflags fileName s of
+parseWith dflags fileName aParser s =
+    case runParser aParser dflags fileName s of
         PFailed pst ->
             Left (GhcPsMessage <$> GHC.getPsErrorMessages pst)
         POk pst pmod ->
             Right (pst, pmod)
 
 runParser :: P PpState a -> DynFlags -> FilePath -> String -> ParseResult PpState a
-runParser parser flags filename str = unP parser parseState
+runParser aParser flags filename str = unP aParser parseState
   where
     location = mkRealSrcLoc (mkFastString filename) 1 1
     buffer = stringToStringBuffer str
@@ -183,51 +163,6 @@ runParser parser flags filename str = unP parser parseState
 
 -- ---------------------------------------------------------------------
 
--- showErrorMessages :: Messages GhcMessage -> String
--- showErrorMessages msgs =
---     renderWithContext defaultSDocContext $
---         vcat $
---             pprMsgEnvelopeBagWithLocDefault $
---                 getMessages $
---                     msgs
-
--- strParserWrapper ::
---     -- | Haskell module source text (full Unicode is supported)
---     String ->
---     -- | the flags
---     DynFlags ->
---     -- | the filename (for source locations)
---     FilePath ->
---     [Located Token]
--- strParserWrapper str dflags filename =
---     case strParser str dflags filename of
---         (_, Left _err) -> error "oops"
---         (_, Right toks) -> toks
-
--- {- | Parse a file, using the emulated haskell parser, returning the
--- resulting tokens only
--- -}
--- strParser ::
---     -- | Haskell module source text (full Unicode is supported)
---     String ->
---     -- | the flags
---     DynFlags ->
---     -- | the filename (for source locations)
---     FilePath ->
---     (WarningMessages, Either ErrorMessages [Located Token])
--- strParser str dflags filename =
---     let
---         loc = mkRealSrcLoc (mkFastString filename) 1 1
---         buf = stringToStringBuffer str
---      in
---         case unP parseModuleNoHaddock (Lexer.initParserState initPpState (initParserOpts dflags) buf loc) of
---             PFailed pst ->
---                 let (warns, errs) = Lexer.getPsMessages pst
---                  in (GhcPsMessage <$> warns, Left $ GhcPsMessage <$> errs)
---             POk pst rdr_module ->
---                 let (warns, _) = Lexer.getPsMessages pst
---                  in (GhcPsMessage <$> warns, Right rdr_module)
-
 initDynFlags :: (GHC.GhcMonad m) => m GHC.DynFlags
 initDynFlags = do
     -- Based on GHC backpack driver doBackPack


=====================================
utils/check-cpp/PreProcess.hs
=====================================
@@ -1,75 +1,100 @@
 module PreProcess where
 
-import Control.Monad.IO.Class
-import Data.Data hiding (Fixity)
 import Data.List
-import Data.List.NonEmpty (NonEmpty (..))
 import qualified Data.Map as Map
-import Data.Maybe
 import Debug.Trace
 import GHC
-import qualified GHC.Data.EnumSet as EnumSet
 import GHC.Data.FastString
 import qualified GHC.Data.Strict as Strict
 import GHC.Data.StringBuffer
-import GHC.Driver.Config.Parser hiding (predefinedMacros)
-import GHC.Driver.Env.Types
 import GHC.Driver.Errors.Types
 import qualified GHC.Driver.Errors.Types as GHC
-import qualified GHC.Driver.Session as GHC
-import GHC.Hs.Dump
-import qualified GHC.LanguageExtensions as LangExt
 import GHC.Parser.Errors.Ppr ()
 import GHC.Parser.Lexer (P (..), PState (..), ParseResult (..), Token (..))
 import qualified GHC.Parser.Lexer as GHC
 import qualified GHC.Parser.Lexer as Lexer
-import GHC.SysTools.Cpp
 import GHC.Types.Error
 import GHC.Types.SrcLoc
-import GHC.Unit.Env
-import GHC.Unit.State
 import GHC.Utils.Error
 import GHC.Utils.Outputable
+import GHC.Utils.Panic.Plain
 
 import Macro
 import ParsePP
 import qualified ParserM as PM
 import State
 
-import Debug.Trace
-
 -- ---------------------------------------------------------------------
 
 dumpGhcCpp :: PState PpState -> SDoc
-dumpGhcCpp pst = text $ sep ++ defines ++ sep ++ comments ++ sep ++ orig ++ sep ++ final ++ sep
-  -- ++ show startLoc ++ sep ++ show bare_toks ++ sep
+dumpGhcCpp pst = text $ sepa ++ defines ++ sepa ++ final ++ sepa
   where
     -- Note: pst is the state /before/ the parser runs, so we can use it to lex.
     (pst_final, bare_toks) = lexAll pst
+    comments = reverse (Lexer.comment_q pst_final)
+    -- We are going to addSourceToTokens, only need the location
+    to_tok (L (EpaSpan l) _) = L l (ITunknown "-")
+    to_tok (L (EpaDelta l _ _) _) = L l (ITunknown "-")
+    comments_as_toks = map to_tok comments
     defines = showDefines (pp_defines (pp pst_final))
-    sep = "\n------------------------------\n"
-    comments = showPprUnsafe (Lexer.comment_q pst_final)
-    buf = (buffer pst){cur = 0}
-    orig = lexemeToString buf (len buf)
+    sepa = "\n------------------------------\n"
+    -- buf = (buffer pst){cur = 0}
+    -- orig = lexemeToString buf (len buf)
     startLoc = mkRealSrcLoc (srcLocFile (psRealLoc $ loc pst)) 1 1
     buf1 = (buffer pst){cur = 0}
-    toks = GHC.addSourceToTokens startLoc buf1 bare_toks
-    final = renderCombinedToks toks (Lexer.comment_q pst_final)
-
-renderCombinedToks :: [(Located Token, String)] -> [LEpaComment] -> String
-renderCombinedToks toks ctoks = show toks1 ++ show ctoks
+    all_toks = sortBy cmpBs (bare_toks ++ comments_as_toks)
+    toks = GHC.addSourceToTokens startLoc buf1 all_toks
+    final = renderCombinedToks toks
+
+cmpBs :: Located Token -> Located Token -> Ordering
+cmpBs (L (RealSrcSpan _ (Strict.Just bs1)) _) (L (RealSrcSpan _ (Strict.Just bs2)) _) =
+    compare bs1 bs2
+cmpBs (L (RealSrcSpan r1 _) _) (L (RealSrcSpan r2 _) _) =
+    compare r1 r2
+cmpBs _ _ = EQ
+
+renderCombinedToks :: [(Located Token, String)] -> String
+renderCombinedToks toks = showCppTokenStream toks
+
+-- Tweaked from showRichTokenStream
+showCppTokenStream :: [(Located Token, String)] -> String
+showCppTokenStream ts0 = go startLoc ts0 ""
   where
-    toks1 = map (\(L l _, s) -> (l,s)) toks
-    ctoks1 = map (\(L l t) -> (l, ghcCommentText t)) ctoks
-
+    sourceFile = getFile $ map (getLoc . fst) ts0
+    getFile [] = panic "showCppTokenStream: No source file found"
+    getFile (UnhelpfulSpan _ : xs) = getFile xs
+    getFile (RealSrcSpan s _ : _) = srcSpanFile s
+    startLoc = mkRealSrcLoc sourceFile 0 1
+    go _ [] = id
+    go loc ((L span' tok, str) : ts) =
+        case span' of
+            UnhelpfulSpan _ -> go loc ts
+            RealSrcSpan s _
+                | locLine == tokLine ->
+                    ((replicate (tokCol - locCol) ' ') ++)
+                        . (str ++)
+                        . go tokEnd ts
+                | otherwise ->
+                    ((replicate (tokLine - locLine) '\n') ++)
+                        . (extra ++)
+                        . ((replicate (tokCol - 1) ' ') ++)
+                        . (str ++)
+                        . go tokEnd ts
+              where
+                (locLine, locCol) = (srcLocLine loc, srcLocCol loc)
+                (tokLine, tokCol) = (srcSpanStartLine s, srcSpanStartCol s)
+                tokEnd = realSrcSpanEnd s
+                extra = case tok of
+                    ITunknown _ -> "- |"
+                    _ -> "  |"
 
 ghcCommentText :: EpaComment -> String
-ghcCommentText (GHC.EpaComment (EpaDocComment s) _)      = exactPrintHsDocString s
-ghcCommentText (GHC.EpaComment (EpaDocOptions s) _)      = s
-ghcCommentText (GHC.EpaComment (EpaLineComment s) _)     = s
-ghcCommentText (GHC.EpaComment (EpaBlockComment s) _)    = s
-ghcCommentText (GHC.EpaComment (EpaCppIgnored [L _ s]) _)= s
-ghcCommentText (GHC.EpaComment (EpaCppIgnored _) _)      = ""
+ghcCommentText (GHC.EpaComment (EpaDocComment s) _) = exactPrintHsDocString s
+ghcCommentText (GHC.EpaComment (EpaDocOptions s) _) = s
+ghcCommentText (GHC.EpaComment (EpaLineComment s) _) = s
+ghcCommentText (GHC.EpaComment (EpaBlockComment s) _) = s
+ghcCommentText (GHC.EpaComment (EpaCppIgnored [L _ s]) _) = s
+ghcCommentText (GHC.EpaComment (EpaCppIgnored _) _) = ""
 
 showDefines :: MacroDefines -> String
 showDefines defines = Map.foldlWithKey' (\acc k d -> acc ++ "\n" ++ renderDefine k d) "" defines



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/8ea872af5f98061309d4991587f8d9cd9b74e198

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/8ea872af5f98061309d4991587f8d9cd9b74e198
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20250220/3fc49d1c/attachment-0001.html>


More information about the ghc-commits mailing list