[Git][ghc/ghc][wip/az/ghc-cpp] 2 commits: Clean up a bit
Alan Zimmerman (@alanz)
gitlab at gitlab.haskell.org
Thu Feb 20 21:51:21 UTC 2025
Alan Zimmerman pushed to branch wip/az/ghc-cpp at Glasgow Haskell Compiler / GHC
Commits:
13c33bbf by Alan Zimmerman at 2025-02-20T20:52:25+00:00
Clean up a bit
- - - - -
f79d3f62 by Alan Zimmerman at 2025-02-20T21:50:40+00:00
Add -ddump-ghc-cpp option and a test based on it
- - - - -
10 changed files:
- compiler/GHC/Parser/Lexer.x
- compiler/GHC/Parser/PreProcess.hs
- docs/users_guide/debugging.rst
- + testsuite/tests/ghc-cpp/GhcCpp01.hs
- + testsuite/tests/ghc-cpp/GhcCpp01.stderr
- + testsuite/tests/ghc-cpp/all.T
- utils/check-cpp/Example1.hs
- utils/check-cpp/Macro.hs
- utils/check-cpp/Main.hs
- utils/check-cpp/PreProcess.hs
Changes:
=====================================
compiler/GHC/Parser/Lexer.x
=====================================
@@ -1264,7 +1264,7 @@ cppToken code span buf len _buf2 =
-- trace ("cppToken:" ++ show (code, t)) $ do return (L span t)
cppTokenCont :: (FastString -> Token)-> Action p
-cppTokenCont code span buf len _buf2 =
+cppTokenCont _code span buf len _buf2 =
do
let tokStr = lexemeToFastString buf len
-- check if the string ends with backslash and newline
=====================================
compiler/GHC/Parser/PreProcess.hs
=====================================
@@ -13,6 +13,7 @@ module GHC.Parser.PreProcess (
dumpGhcCpp,
) where
+import Data.List (intercalate, sortBy)
import Data.Map qualified as Map
import Debug.Trace (trace)
import GHC.Data.FastString
@@ -23,15 +24,126 @@ import GHC.Parser.Lexer (P (..), PState (..), ParseResult (..), Token (..))
import GHC.Parser.Lexer qualified as Lexer
import GHC.Parser.PreProcess.Macro
import GHC.Parser.PreProcess.ParsePP
+import GHC.Parser.PreProcess.ParserM qualified as PM
import GHC.Parser.PreProcess.State
import GHC.Prelude
import GHC.Types.SrcLoc
-import GHC.Utils.Outputable (SDoc)
+import GHC.Utils.Outputable (SDoc, text)
+import GHC.Utils.Panic.Plain (panic)
-- ---------------------------------------------------------------------
dumpGhcCpp :: PState PpState -> SDoc
-dumpGhcCpp pst = undefined
+dumpGhcCpp pst = text $ sepa ++ defines ++ sepa ++ final ++ sepa
+ where
+ -- Note: pst is the state /before/ the parser runs, so we can use it to lex.
+ (pst_final, bare_toks) = lexAll pst
+ comments = reverse (Lexer.comment_q pst_final)
+ -- We are going to addSourceToTokens, only need the location
+ to_tok (L (EpaSpan l) _) = L l (ITunknown "-")
+ to_tok (L (EpaDelta l _ _) _) = L l (ITunknown "-")
+ comments_as_toks = map to_tok comments
+ defines = showDefines (pp_defines (pp pst_final))
+ sepa = "\n------------------------------\n"
+ startLoc = mkRealSrcLoc (srcLocFile (psRealLoc $ loc pst)) 1 1
+ buf1 = (buffer pst){cur = 0}
+ all_toks = sortBy cmpBs (bare_toks ++ comments_as_toks)
+ toks = addSourceToTokens startLoc buf1 all_toks
+ final = renderCombinedToks toks
+
+cmpBs :: Located Token -> Located Token -> Ordering
+cmpBs (L (RealSrcSpan _ (Strict.Just bs1)) _) (L (RealSrcSpan _ (Strict.Just bs2)) _) =
+ compare bs1 bs2
+cmpBs (L (RealSrcSpan r1 _) _) (L (RealSrcSpan r2 _) _) =
+ compare r1 r2
+cmpBs _ _ = EQ
+
+renderCombinedToks :: [(Located Token, String)] -> String
+renderCombinedToks toks = showCppTokenStream toks
+
+-- ---------------------------------------------------------------------
+-- addSourceToTokens copied here to unbreak an import loop.
+-- It should probably move somewhere else
+
+{- | Given a source location and a StringBuffer corresponding to this
+location, return a rich token stream with the source associated to the
+tokens.
+-}
+addSourceToTokens ::
+ RealSrcLoc ->
+ StringBuffer ->
+ [Located Token] ->
+ [(Located Token, String)]
+addSourceToTokens _ _ [] = []
+addSourceToTokens loc buf (t@(L span _) : ts) =
+ case span of
+ UnhelpfulSpan _ -> (t, "") : addSourceToTokens loc buf ts
+ RealSrcSpan s _ -> (t, str) : addSourceToTokens newLoc newBuf ts
+ where
+ (newLoc, newBuf, str) = go "" loc buf
+ start = realSrcSpanStart s
+ end = realSrcSpanEnd s
+ go acc loc buf
+ | loc < start = go acc nLoc nBuf
+ | start <= loc && loc < end = go (ch : acc) nLoc nBuf
+ | otherwise = (loc, buf, reverse acc)
+ where
+ (ch, nBuf) = nextChar buf
+ nLoc = advanceSrcLoc loc ch
+
+-- ---------------------------------------------------------------------
+
+-- Tweaked from showRichTokenStream
+showCppTokenStream :: [(Located Token, String)] -> String
+showCppTokenStream ts0 = go startLoc ts0 ""
+ where
+ sourceFile = getFile $ map (getLoc . fst) ts0
+ getFile [] = panic "showCppTokenStream: No source file found"
+ getFile (UnhelpfulSpan _ : xs) = getFile xs
+ getFile (RealSrcSpan s _ : _) = srcSpanFile s
+ startLoc = mkRealSrcLoc sourceFile 0 1
+ go _ [] = id
+ go loc ((L span' tok, str) : ts) =
+ case span' of
+ UnhelpfulSpan _ -> go loc ts
+ RealSrcSpan s _
+ | locLine == tokLine ->
+ ((replicate (tokCol - locCol) ' ') ++)
+ . (str ++)
+ . go tokEnd ts
+ | otherwise ->
+ ((replicate (tokLine - locLine) '\n') ++)
+ . (extra ++)
+ . ((replicate (tokCol - 1) ' ') ++)
+ . (str ++)
+ . go tokEnd ts
+ where
+ (locLine, locCol) = (srcLocLine loc, srcLocCol loc)
+ (tokLine, tokCol) = (srcSpanStartLine s, srcSpanStartCol s)
+ tokEnd = realSrcSpanEnd s
+ extra = case tok of
+ ITunknown _ -> "- |"
+ _ -> " |"
+
+showDefines :: MacroDefines -> String
+showDefines defines = Map.foldlWithKey' (\acc k d -> acc ++ "\n" ++ renderDefine k d) "" defines
+ where
+ renderDefine :: String -> (Map.Map (Maybe Int) ((Maybe MacroArgs), MacroDef)) -> String
+ renderDefine k defs = Map.foldl' (\acc d -> acc ++ "\n" ++ renderArity k d) "" defs
+
+ renderArity :: String -> ((Maybe MacroArgs), MacroDef) -> String
+ renderArity n (Nothing, rhs) =
+ "#define " ++ n ++ " " ++ (intercalate " " (map PM.t_str rhs))
+ renderArity n (Just args, rhs) =
+ "#define " ++ n ++ "(" ++ (intercalate "," args) ++ ") " ++ (intercalate " " (map PM.t_str rhs))
+
+lexAll :: Lexer.PState PpState -> (Lexer.PState PpState, [Located Token])
+lexAll state = case unP (lexer True return) state of
+ POk s t@(L _ ITeof) -> (s, [t])
+ POk state' t -> (ss, t : rest)
+ where
+ (ss, rest) = lexAll state'
+ PFailed _pst -> panic $ "GHC.Parser.PreProcess.lexAll failed"
-- ---------------------------------------------------------------------
=====================================
docs/users_guide/debugging.rst
=====================================
@@ -149,6 +149,13 @@ parser and interface file reader.
Include comments in the parser. Useful in combination with :ghc-flag:`-ddump-parsed-ast`.
+.. ghc-flag:: -ddump-ghc-cpp
+ :shortdesc: Dump GHC_CPP output
+ :type: dynamic
+
+ Dump a representation of the output of the preprocessor when using the GHC_CPP option
+ Dump parser output as a syntax tree
+
.. ghc-flag:: -ddump-if-trace
:shortdesc: Trace interface files
=====================================
testsuite/tests/ghc-cpp/GhcCpp01.hs
=====================================
@@ -0,0 +1,12 @@
+{-# LANGUAGE GHC_CPP #-}
+module GhcCpp01 where
+
+#define FOO(A,B) A + B
+#define FOO(A,B,C) A + B + C
+#if FOO(1,FOO(3,4)) == 8
+
+-- a comment
+x = 1
+#else
+x = 5
+#endif
=====================================
testsuite/tests/ghc-cpp/GhcCpp01.stderr
=====================================
@@ -0,0 +1,35 @@
+
+==================== After GHC_CPP ====================
+
+------------------------------
+
+
+#define FOO(A,B) A + B
+#define FOO(A,B,C) A + B + C
+
+#define __GLASGOW_HASKELL_FULL_VERSION__ 9.13.20250216
+
+#define __GLASGOW_HASKELL_PATCHLEVEL1__ 20250216
+
+#define __GLASGOW_HASKELL_PATCHLEVEL2__ 0
+
+#define __GLASGOW_HASKELL__ 913
+------------------------------
+
+- |{-# LANGUAGE GHC_CPP #-}
+ |module GhcCpp01 where
+
+- |#define FOO(A,B) A + B
+- |#define FOO(A,B,C) A + B + C
+- |#if FOO(1,FOO(3,4)) == 8
+
+- |-- a comment
+ |x = 1
+- |#else
+- |x = 5
+- |#endif
+ |
+------------------------------
+
+
+
=====================================
testsuite/tests/ghc-cpp/all.T
=====================================
@@ -0,0 +1 @@
+test('GhcCpp01', normal, compile, ['-ddump-ghc-cpp -dkeep-comments'])
\ No newline at end of file
=====================================
utils/check-cpp/Example1.hs
=====================================
@@ -1,6 +1,8 @@
{-# LANGUAGE GHC_CPP #-}
+{-# OPTIONS -ddump-ghc-cpp -dkeep-comments #-}
module Example1 where
+-- A comment
y = 3
#define FOO
=====================================
utils/check-cpp/Macro.hs
=====================================
@@ -142,7 +142,7 @@ replace_args args margs _ = error $ "replace_args: impossible, mismatch between:
replace_arg :: [Token] -> String -> [Token] -> [Token]
replace_arg _ _ [] = []
replace_arg a ma (TIdentifier t : ts)
- | ma == t = a <> replace_arg a ma ts
+ | ma == t = a ++ replace_arg a ma ts
replace_arg a ma (t : ts) = t : replace_arg a ma ts
-- ---------------------------------------------------------------------
@@ -198,10 +198,10 @@ pArg ts = do
TComma _ : _ -> return (frag, rest)
(t at TOpenParen{}) : ts' -> do
(frag', rest') <- inside_parens 1 [t] ts'
- return (frag <> frag', rest')
+ return (frag ++ frag', rest')
_ -> do
(frag', rest') <- pa_frag rest
- return (frag <> frag', rest')
+ return (frag ++ frag', rest')
pa_frag :: [Token] -> Either String ([Token], [Token])
pa_frag [] = return ([], [])
=====================================
utils/check-cpp/Main.hs
=====================================
@@ -511,6 +511,7 @@ t18 = do
[ "#define FOO(A,B) A + B"
, "#define FOO(A,B,C) A + B + C"
, "#if FOO(1,FOO(3,4)) == 8"
+ , ""
, "-- a comment"
, "x = 1"
, "#else"
=====================================
utils/check-cpp/PreProcess.hs
=====================================
@@ -38,8 +38,6 @@ dumpGhcCpp pst = text $ sepa ++ defines ++ sepa ++ final ++ sepa
comments_as_toks = map to_tok comments
defines = showDefines (pp_defines (pp pst_final))
sepa = "\n------------------------------\n"
- -- buf = (buffer pst){cur = 0}
- -- orig = lexemeToString buf (len buf)
startLoc = mkRealSrcLoc (srcLocFile (psRealLoc $ loc pst)) 1 1
buf1 = (buffer pst){cur = 0}
all_toks = sortBy cmpBs (bare_toks ++ comments_as_toks)
@@ -75,7 +73,7 @@ showCppTokenStream ts0 = go startLoc ts0 ""
. (str ++)
. go tokEnd ts
| otherwise ->
- ((replicate (tokLine - locLine) '\n') ++)
+ ((replicateStr (tokLine - locLine) " |\n") ++)
. (extra ++)
. ((replicate (tokCol - 1) ' ') ++)
. (str ++)
@@ -88,13 +86,8 @@ showCppTokenStream ts0 = go startLoc ts0 ""
ITunknown _ -> "- |"
_ -> " |"
-ghcCommentText :: EpaComment -> String
-ghcCommentText (GHC.EpaComment (EpaDocComment s) _) = exactPrintHsDocString s
-ghcCommentText (GHC.EpaComment (EpaDocOptions s) _) = s
-ghcCommentText (GHC.EpaComment (EpaLineComment s) _) = s
-ghcCommentText (GHC.EpaComment (EpaBlockComment s) _) = s
-ghcCommentText (GHC.EpaComment (EpaCppIgnored [L _ s]) _) = s
-ghcCommentText (GHC.EpaComment (EpaCppIgnored _) _) = ""
+replicateStr :: Int -> String -> String
+replicateStr n s = concat (replicate n s)
showDefines :: MacroDefines -> String
showDefines defines = Map.foldlWithKey' (\acc k d -> acc ++ "\n" ++ renderDefine k d) "" defines
@@ -109,7 +102,7 @@ showDefines defines = Map.foldlWithKey' (\acc k d -> acc ++ "\n" ++ renderDefine
"#define " ++ n ++ "(" ++ (intercalate "," args) ++ ") " ++ (intercalate " " (map PM.t_str rhs))
lexAll :: Lexer.PState PpState -> (Lexer.PState PpState, [Located Token])
-lexAll state = case unP (lexerDbg True return) state of
+lexAll state = case unP (lexer True return) state of
POk s t@(L _ ITeof) -> (s, [t])
POk state' t -> (ss, t : rest)
where
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/8ea872af5f98061309d4991587f8d9cd9b74e198...f79d3f62fee919e7f495affd4dec5e02f174a2c9
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/8ea872af5f98061309d4991587f8d9cd9b74e198...f79d3f62fee919e7f495affd4dec5e02f174a2c9
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20250220/a4b881ce/attachment-0001.html>
More information about the ghc-commits
mailing list