[Git][ghc/ghc][wip/az/ghc-cpp] First implementation of dumpGhcCpp.
Alan Zimmerman (@alanz)
gitlab at gitlab.haskell.org
Thu Feb 20 20:49:13 UTC 2025
Alan Zimmerman pushed to branch wip/az/ghc-cpp at Glasgow Haskell Compiler / GHC
Commits:
8ea872af by Alan Zimmerman at 2025-02-20T20:47:18+00:00
First implementation of dumpGhcCpp.
Example output
First dumps all macros in the state, then the source, showing which
lines are in and which are out
------------------------------
- |#define FOO(A,B) A + B
- |#define FOO(A,B,C) A + B + C
- |#if FOO(1,FOO(3,4)) == 8
- |-- a comment
|x = 1
- |#else
- |x = 5
- |#endif
- - - - -
3 changed files:
- compiler/GHC/Parser/Lexer.x
- utils/check-cpp/Main.hs
- utils/check-cpp/PreProcess.hs
Changes:
=====================================
compiler/GHC/Parser/Lexer.x
=====================================
@@ -325,7 +325,6 @@ $unigraphic / { isSmartQuote } { smart_quote_error }
<bol> {
\n ;
-- Ghc CPP symbols
- ^\# \ * @cppkeyword .* \n / { ifExtension GhcCppBit } { cppToken cpp_prag }
^\# \ * @cppkeyword .* / { ifExtension GhcCppBit } { cppToken cpp_prag }
^\# line { begin line_prag1 }
@@ -343,7 +342,6 @@ $unigraphic / { isSmartQuote } { smart_quote_error }
\{ / { notFollowedBy '-' } { hopefully_open_brace }
-- we might encounter {-# here, but {- has been handled already
\n ;
- ^\# \ * @cppkeyword .* \n / { ifExtension GhcCppBit } { cppToken cpp_prag }
^\# \ * @cppkeyword .* / { ifExtension GhcCppBit } { cppToken cpp_prag }
^\# (line)? { begin line_prag1 }
@@ -371,9 +369,7 @@ $unigraphic / { isSmartQuote } { smart_quote_error }
-- CPP continuation lines. Keep concatenating, or exit
<cpp_prag> {
- .* \\ \n { cppTokenCont (ITcpp True) }
.* \\ { cppTokenCont (ITcpp True) }
- -- .* \n { cppTokenPop (ITcpp False) }
.* { cppTokenPop (ITcpp False) }
-- () { popCpp }
}
=====================================
utils/check-cpp/Main.hs
=====================================
@@ -7,7 +7,6 @@ import Data.List
import Data.List.NonEmpty (NonEmpty (..))
import qualified Data.Map as Map
import Data.Maybe
-import Debug.Trace
import GHC
import qualified GHC.Data.EnumSet as EnumSet
import GHC.Data.FastString
@@ -20,15 +19,13 @@ import qualified GHC.Driver.Session as GHC
import GHC.Hs.Dump
import qualified GHC.LanguageExtensions as LangExt
import GHC.Parser.Errors.Ppr ()
-import GHC.Parser.Lexer (P (..), ParseResult (..), Token (..))
+import GHC.Parser.Lexer (P (..), ParseResult (..))
import qualified GHC.Parser.Lexer as GHC
import qualified GHC.Parser.Lexer as Lexer
import GHC.SysTools.Cpp
-import GHC.Types.Error
import GHC.Types.SrcLoc
import GHC.Unit.Env
import GHC.Unit.State
-import GHC.Utils.Error
import GHC.Utils.Outputable
-- Local simulation -----------------
@@ -74,12 +71,12 @@ parseString libdir includes str = ghcWrapper libdir $ do
-- return $ strParserWrapper str dflags "fake_test_file.hs"
return $ snd $ strGetToks dflags includes pflags "fake_test_file.hs" str
--- doDump :: LibDir -> String -> IO [Located Token]
+doDump :: LibDir -> String -> IO ()
doDump libdir str = ghcWrapper libdir $ do
dflags0 <- initDynFlags
let dflags = dflags0{extensionFlags = EnumSet.insert LangExt.GhcCpp (extensionFlags dflags0)}
let pflags = initParserOpts dflags
- hsc <- getSession
+ -- hsc <- getSession
liftIO $ putStrLn "-- parsing ----------"
liftIO $ putStrLn str
liftIO $ putStrLn "---------------------"
@@ -89,7 +86,6 @@ doDump libdir str = ghcWrapper libdir $ do
liftIO $ putStrLn $ showPprUnsafe $ dumpGhcCpp pst
liftIO $ putStrLn "---------------------"
-
-- return $ strGetToks dflags includes pflags "fake_test_file.hs" str
unitPackages :: UnitEnv -> [UnitInfo]
@@ -108,25 +104,19 @@ strGetToks ::
(Lexer.PState PpState, [Located Token])
strGetToks dflags includes popts filename str = (final, reverse toks)
where
- includeMap = Map.fromList $ map (\(k, v) -> (k, stringToStringBuffer (intercalate "\n" v))) includes
- initState =
- initPpState
- { pp_includes = includeMap
- , pp_defines = predefinedMacros dflags
- , pp_scope = (PpScope True) :| []
- }
- pstate = Lexer.initParserState initState popts buf loc
- loc = mkRealSrcLoc (mkFastString filename) 1 1
- buf = stringToStringBuffer str
- -- cpp_enabled = Lexer.GhcCppBit `Lexer.xtest` Lexer.pExtsBitmap popts
-
- lexAll :: Lexer.PState PpState -> (Lexer.PState PpState, [Located Token])
- lexAll state = case unP (PP.lexerDbg True return) state of
- POk s t@(L _ ITeof) -> (s, [t])
- POk state' t -> (ss, t : rest)
- where
- (ss, rest) = lexAll state'
- PFailed pst -> error $ "failed" ++ showErrorMessages (GHC.GhcPsMessage <$> GHC.getPsErrorMessages pst)
+ -- includeMap = Map.fromList $ map (\(k, v) -> (k, stringToStringBuffer (intercalate "\n" v))) includes
+ -- initState =
+ -- initPpState
+ -- { pp_includes = includeMap
+ -- , pp_defines = predefinedMacros dflags
+ -- , pp_scope = (PpScope True) :| []
+ -- }
+ -- pstate = Lexer.initParserState initState popts buf loc
+ -- loc = mkRealSrcLoc (mkFastString filename) 1 1
+ -- buf = stringToStringBuffer str
+ -- -- cpp_enabled = Lexer.GhcCppBit `Lexer.xtest` Lexer.pExtsBitmap popts
+
+ pstate = getPState dflags includes popts filename str
(final, toks) = lexAll pstate
getPState ::
@@ -148,16 +138,6 @@ getPState dflags includes popts filename str = pstate
pstate = Lexer.initParserState initState popts buf loc
loc = mkRealSrcLoc (mkFastString filename) 1 1
buf = stringToStringBuffer str
- -- cpp_enabled = Lexer.GhcCppBit `Lexer.xtest` Lexer.pExtsBitmap popts
-
- -- lexAll :: Lexer.PState PpState -> (Lexer.PState PpState, [Located Token])
- -- lexAll state = case unP (PP.lexerDbg True return) state of
- -- POk s t@(L _ ITeof) -> (s, [t])
- -- POk state' t -> (ss, t : rest)
- -- where
- -- (ss, rest) = lexAll state'
- -- PFailed pst -> error $ "failed" ++ showErrorMessages (GHC.GhcPsMessage <$> GHC.getPsErrorMessages pst)
- -- (final, toks) = lexAll pstate
-- ---------------------------------------------------------------------
@@ -167,15 +147,15 @@ parseWith ::
P PpState w ->
String ->
Either GHC.ErrorMessages (Lexer.PState PpState, w)
-parseWith dflags fileName parser s =
- case runParser parser dflags fileName s of
+parseWith dflags fileName aParser s =
+ case runParser aParser dflags fileName s of
PFailed pst ->
Left (GhcPsMessage <$> GHC.getPsErrorMessages pst)
POk pst pmod ->
Right (pst, pmod)
runParser :: P PpState a -> DynFlags -> FilePath -> String -> ParseResult PpState a
-runParser parser flags filename str = unP parser parseState
+runParser aParser flags filename str = unP aParser parseState
where
location = mkRealSrcLoc (mkFastString filename) 1 1
buffer = stringToStringBuffer str
@@ -183,51 +163,6 @@ runParser parser flags filename str = unP parser parseState
-- ---------------------------------------------------------------------
--- showErrorMessages :: Messages GhcMessage -> String
--- showErrorMessages msgs =
--- renderWithContext defaultSDocContext $
--- vcat $
--- pprMsgEnvelopeBagWithLocDefault $
--- getMessages $
--- msgs
-
--- strParserWrapper ::
--- -- | Haskell module source text (full Unicode is supported)
--- String ->
--- -- | the flags
--- DynFlags ->
--- -- | the filename (for source locations)
--- FilePath ->
--- [Located Token]
--- strParserWrapper str dflags filename =
--- case strParser str dflags filename of
--- (_, Left _err) -> error "oops"
--- (_, Right toks) -> toks
-
--- {- | Parse a file, using the emulated haskell parser, returning the
--- resulting tokens only
--- -}
--- strParser ::
--- -- | Haskell module source text (full Unicode is supported)
--- String ->
--- -- | the flags
--- DynFlags ->
--- -- | the filename (for source locations)
--- FilePath ->
--- (WarningMessages, Either ErrorMessages [Located Token])
--- strParser str dflags filename =
--- let
--- loc = mkRealSrcLoc (mkFastString filename) 1 1
--- buf = stringToStringBuffer str
--- in
--- case unP parseModuleNoHaddock (Lexer.initParserState initPpState (initParserOpts dflags) buf loc) of
--- PFailed pst ->
--- let (warns, errs) = Lexer.getPsMessages pst
--- in (GhcPsMessage <$> warns, Left $ GhcPsMessage <$> errs)
--- POk pst rdr_module ->
--- let (warns, _) = Lexer.getPsMessages pst
--- in (GhcPsMessage <$> warns, Right rdr_module)
-
initDynFlags :: (GHC.GhcMonad m) => m GHC.DynFlags
initDynFlags = do
-- Based on GHC backpack driver doBackPack
=====================================
utils/check-cpp/PreProcess.hs
=====================================
@@ -1,75 +1,100 @@
module PreProcess where
-import Control.Monad.IO.Class
-import Data.Data hiding (Fixity)
import Data.List
-import Data.List.NonEmpty (NonEmpty (..))
import qualified Data.Map as Map
-import Data.Maybe
import Debug.Trace
import GHC
-import qualified GHC.Data.EnumSet as EnumSet
import GHC.Data.FastString
import qualified GHC.Data.Strict as Strict
import GHC.Data.StringBuffer
-import GHC.Driver.Config.Parser hiding (predefinedMacros)
-import GHC.Driver.Env.Types
import GHC.Driver.Errors.Types
import qualified GHC.Driver.Errors.Types as GHC
-import qualified GHC.Driver.Session as GHC
-import GHC.Hs.Dump
-import qualified GHC.LanguageExtensions as LangExt
import GHC.Parser.Errors.Ppr ()
import GHC.Parser.Lexer (P (..), PState (..), ParseResult (..), Token (..))
import qualified GHC.Parser.Lexer as GHC
import qualified GHC.Parser.Lexer as Lexer
-import GHC.SysTools.Cpp
import GHC.Types.Error
import GHC.Types.SrcLoc
-import GHC.Unit.Env
-import GHC.Unit.State
import GHC.Utils.Error
import GHC.Utils.Outputable
+import GHC.Utils.Panic.Plain
import Macro
import ParsePP
import qualified ParserM as PM
import State
-import Debug.Trace
-
-- ---------------------------------------------------------------------
dumpGhcCpp :: PState PpState -> SDoc
-dumpGhcCpp pst = text $ sep ++ defines ++ sep ++ comments ++ sep ++ orig ++ sep ++ final ++ sep
- -- ++ show startLoc ++ sep ++ show bare_toks ++ sep
+dumpGhcCpp pst = text $ sepa ++ defines ++ sepa ++ final ++ sepa
where
-- Note: pst is the state /before/ the parser runs, so we can use it to lex.
(pst_final, bare_toks) = lexAll pst
+ comments = reverse (Lexer.comment_q pst_final)
+ -- We are going to addSourceToTokens, only need the location
+ to_tok (L (EpaSpan l) _) = L l (ITunknown "-")
+ to_tok (L (EpaDelta l _ _) _) = L l (ITunknown "-")
+ comments_as_toks = map to_tok comments
defines = showDefines (pp_defines (pp pst_final))
- sep = "\n------------------------------\n"
- comments = showPprUnsafe (Lexer.comment_q pst_final)
- buf = (buffer pst){cur = 0}
- orig = lexemeToString buf (len buf)
+ sepa = "\n------------------------------\n"
+ -- buf = (buffer pst){cur = 0}
+ -- orig = lexemeToString buf (len buf)
startLoc = mkRealSrcLoc (srcLocFile (psRealLoc $ loc pst)) 1 1
buf1 = (buffer pst){cur = 0}
- toks = GHC.addSourceToTokens startLoc buf1 bare_toks
- final = renderCombinedToks toks (Lexer.comment_q pst_final)
-
-renderCombinedToks :: [(Located Token, String)] -> [LEpaComment] -> String
-renderCombinedToks toks ctoks = show toks1 ++ show ctoks
+ all_toks = sortBy cmpBs (bare_toks ++ comments_as_toks)
+ toks = GHC.addSourceToTokens startLoc buf1 all_toks
+ final = renderCombinedToks toks
+
+cmpBs :: Located Token -> Located Token -> Ordering
+cmpBs (L (RealSrcSpan _ (Strict.Just bs1)) _) (L (RealSrcSpan _ (Strict.Just bs2)) _) =
+ compare bs1 bs2
+cmpBs (L (RealSrcSpan r1 _) _) (L (RealSrcSpan r2 _) _) =
+ compare r1 r2
+cmpBs _ _ = EQ
+
+renderCombinedToks :: [(Located Token, String)] -> String
+renderCombinedToks toks = showCppTokenStream toks
+
+-- Tweaked from showRichTokenStream
+showCppTokenStream :: [(Located Token, String)] -> String
+showCppTokenStream ts0 = go startLoc ts0 ""
where
- toks1 = map (\(L l _, s) -> (l,s)) toks
- ctoks1 = map (\(L l t) -> (l, ghcCommentText t)) ctoks
-
+ sourceFile = getFile $ map (getLoc . fst) ts0
+ getFile [] = panic "showCppTokenStream: No source file found"
+ getFile (UnhelpfulSpan _ : xs) = getFile xs
+ getFile (RealSrcSpan s _ : _) = srcSpanFile s
+ startLoc = mkRealSrcLoc sourceFile 0 1
+ go _ [] = id
+ go loc ((L span' tok, str) : ts) =
+ case span' of
+ UnhelpfulSpan _ -> go loc ts
+ RealSrcSpan s _
+ | locLine == tokLine ->
+ ((replicate (tokCol - locCol) ' ') ++)
+ . (str ++)
+ . go tokEnd ts
+ | otherwise ->
+ ((replicate (tokLine - locLine) '\n') ++)
+ . (extra ++)
+ . ((replicate (tokCol - 1) ' ') ++)
+ . (str ++)
+ . go tokEnd ts
+ where
+ (locLine, locCol) = (srcLocLine loc, srcLocCol loc)
+ (tokLine, tokCol) = (srcSpanStartLine s, srcSpanStartCol s)
+ tokEnd = realSrcSpanEnd s
+ extra = case tok of
+ ITunknown _ -> "- |"
+ _ -> " |"
ghcCommentText :: EpaComment -> String
-ghcCommentText (GHC.EpaComment (EpaDocComment s) _) = exactPrintHsDocString s
-ghcCommentText (GHC.EpaComment (EpaDocOptions s) _) = s
-ghcCommentText (GHC.EpaComment (EpaLineComment s) _) = s
-ghcCommentText (GHC.EpaComment (EpaBlockComment s) _) = s
-ghcCommentText (GHC.EpaComment (EpaCppIgnored [L _ s]) _)= s
-ghcCommentText (GHC.EpaComment (EpaCppIgnored _) _) = ""
+ghcCommentText (GHC.EpaComment (EpaDocComment s) _) = exactPrintHsDocString s
+ghcCommentText (GHC.EpaComment (EpaDocOptions s) _) = s
+ghcCommentText (GHC.EpaComment (EpaLineComment s) _) = s
+ghcCommentText (GHC.EpaComment (EpaBlockComment s) _) = s
+ghcCommentText (GHC.EpaComment (EpaCppIgnored [L _ s]) _) = s
+ghcCommentText (GHC.EpaComment (EpaCppIgnored _) _) = ""
showDefines :: MacroDefines -> String
showDefines defines = Map.foldlWithKey' (\acc k d -> acc ++ "\n" ++ renderDefine k d) "" defines
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/8ea872af5f98061309d4991587f8d9cd9b74e198
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/8ea872af5f98061309d4991587f8d9cd9b74e198
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20250220/3fc49d1c/attachment-0001.html>
More information about the ghc-commits
mailing list