[Git][ghc/ghc][wip/az/ghc-cpp] 2 commits: Clean up a bit

Alan Zimmerman (@alanz) gitlab at gitlab.haskell.org
Thu Feb 20 21:51:21 UTC 2025



Alan Zimmerman pushed to branch wip/az/ghc-cpp at Glasgow Haskell Compiler / GHC


Commits:
13c33bbf by Alan Zimmerman at 2025-02-20T20:52:25+00:00
Clean up a bit

- - - - -
f79d3f62 by Alan Zimmerman at 2025-02-20T21:50:40+00:00
Add -ddump-ghc-cpp option and a test based on it

- - - - -


10 changed files:

- compiler/GHC/Parser/Lexer.x
- compiler/GHC/Parser/PreProcess.hs
- docs/users_guide/debugging.rst
- + testsuite/tests/ghc-cpp/GhcCpp01.hs
- + testsuite/tests/ghc-cpp/GhcCpp01.stderr
- + testsuite/tests/ghc-cpp/all.T
- utils/check-cpp/Example1.hs
- utils/check-cpp/Macro.hs
- utils/check-cpp/Main.hs
- utils/check-cpp/PreProcess.hs


Changes:

=====================================
compiler/GHC/Parser/Lexer.x
=====================================
@@ -1264,7 +1264,7 @@ cppToken code span buf len _buf2 =
      -- trace ("cppToken:" ++ show (code, t)) $ do return (L span t)
 
 cppTokenCont :: (FastString -> Token)-> Action p
-cppTokenCont code span buf len _buf2 =
+cppTokenCont _code span buf len _buf2 =
   do
      let tokStr = lexemeToFastString buf len
      -- check if the string ends with backslash and newline


=====================================
compiler/GHC/Parser/PreProcess.hs
=====================================
@@ -13,6 +13,7 @@ module GHC.Parser.PreProcess (
     dumpGhcCpp,
 ) where
 
+import Data.List (intercalate, sortBy)
 import Data.Map qualified as Map
 import Debug.Trace (trace)
 import GHC.Data.FastString
@@ -23,15 +24,126 @@ import GHC.Parser.Lexer (P (..), PState (..), ParseResult (..), Token (..))
 import GHC.Parser.Lexer qualified as Lexer
 import GHC.Parser.PreProcess.Macro
 import GHC.Parser.PreProcess.ParsePP
+import GHC.Parser.PreProcess.ParserM qualified as PM
 import GHC.Parser.PreProcess.State
 import GHC.Prelude
 import GHC.Types.SrcLoc
-import GHC.Utils.Outputable (SDoc)
+import GHC.Utils.Outputable (SDoc, text)
+import GHC.Utils.Panic.Plain (panic)
 
 -- ---------------------------------------------------------------------
 
 dumpGhcCpp :: PState PpState -> SDoc
-dumpGhcCpp pst = undefined
+dumpGhcCpp pst = text $ sepa ++ defines ++ sepa ++ final ++ sepa
+  where
+    -- Note: pst is the state /before/ the parser runs, so we can use it to lex.
+    (pst_final, bare_toks) = lexAll pst
+    comments = reverse (Lexer.comment_q pst_final)
+    -- We are going to addSourceToTokens, only need the location
+    to_tok (L (EpaSpan l) _) = L l (ITunknown "-")
+    to_tok (L (EpaDelta l _ _) _) = L l (ITunknown "-")
+    comments_as_toks = map to_tok comments
+    defines = showDefines (pp_defines (pp pst_final))
+    sepa = "\n------------------------------\n"
+    startLoc = mkRealSrcLoc (srcLocFile (psRealLoc $ loc pst)) 1 1
+    buf1 = (buffer pst){cur = 0}
+    all_toks = sortBy cmpBs (bare_toks ++ comments_as_toks)
+    toks = addSourceToTokens startLoc buf1 all_toks
+    final = renderCombinedToks toks
+
+cmpBs :: Located Token -> Located Token -> Ordering
+cmpBs (L (RealSrcSpan _ (Strict.Just bs1)) _) (L (RealSrcSpan _ (Strict.Just bs2)) _) =
+    compare bs1 bs2
+cmpBs (L (RealSrcSpan r1 _) _) (L (RealSrcSpan r2 _) _) =
+    compare r1 r2
+cmpBs _ _ = EQ
+
+renderCombinedToks :: [(Located Token, String)] -> String
+renderCombinedToks toks = showCppTokenStream toks
+
+-- ---------------------------------------------------------------------
+-- addSourceToTokens copied here to unbreak an import loop.
+-- It should probably move somewhere else
+
+{- | Given a source location and a StringBuffer corresponding to this
+location, return a rich token stream with the source associated to the
+tokens.
+-}
+addSourceToTokens ::
+    RealSrcLoc ->
+    StringBuffer ->
+    [Located Token] ->
+    [(Located Token, String)]
+addSourceToTokens _ _ [] = []
+addSourceToTokens loc buf (t@(L span _) : ts) =
+    case span of
+        UnhelpfulSpan _ -> (t, "") : addSourceToTokens loc buf ts
+        RealSrcSpan s _ -> (t, str) : addSourceToTokens newLoc newBuf ts
+          where
+            (newLoc, newBuf, str) = go "" loc buf
+            start = realSrcSpanStart s
+            end = realSrcSpanEnd s
+            go acc loc buf
+                | loc < start = go acc nLoc nBuf
+                | start <= loc && loc < end = go (ch : acc) nLoc nBuf
+                | otherwise = (loc, buf, reverse acc)
+              where
+                (ch, nBuf) = nextChar buf
+                nLoc = advanceSrcLoc loc ch
+
+-- ---------------------------------------------------------------------
+
+-- Tweaked from showRichTokenStream
+showCppTokenStream :: [(Located Token, String)] -> String
+showCppTokenStream ts0 = go startLoc ts0 ""
+  where
+    sourceFile = getFile $ map (getLoc . fst) ts0
+    getFile [] = panic "showCppTokenStream: No source file found"
+    getFile (UnhelpfulSpan _ : xs) = getFile xs
+    getFile (RealSrcSpan s _ : _) = srcSpanFile s
+    startLoc = mkRealSrcLoc sourceFile 0 1
+    go _ [] = id
+    go loc ((L span' tok, str) : ts) =
+        case span' of
+            UnhelpfulSpan _ -> go loc ts
+            RealSrcSpan s _
+                | locLine == tokLine ->
+                    ((replicate (tokCol - locCol) ' ') ++)
+                        . (str ++)
+                        . go tokEnd ts
+                | otherwise ->
+                    ((replicate (tokLine - locLine) '\n') ++)
+                        . (extra ++)
+                        . ((replicate (tokCol - 1) ' ') ++)
+                        . (str ++)
+                        . go tokEnd ts
+              where
+                (locLine, locCol) = (srcLocLine loc, srcLocCol loc)
+                (tokLine, tokCol) = (srcSpanStartLine s, srcSpanStartCol s)
+                tokEnd = realSrcSpanEnd s
+                extra = case tok of
+                    ITunknown _ -> "- |"
+                    _ -> "  |"
+
+showDefines :: MacroDefines -> String
+showDefines defines = Map.foldlWithKey' (\acc k d -> acc ++ "\n" ++ renderDefine k d) "" defines
+  where
+    renderDefine :: String -> (Map.Map (Maybe Int) ((Maybe MacroArgs), MacroDef)) -> String
+    renderDefine k defs = Map.foldl' (\acc d -> acc ++ "\n" ++ renderArity k d) "" defs
+
+    renderArity :: String -> ((Maybe MacroArgs), MacroDef) -> String
+    renderArity n (Nothing, rhs) =
+        "#define " ++ n ++ " " ++ (intercalate " " (map PM.t_str rhs))
+    renderArity n (Just args, rhs) =
+        "#define " ++ n ++ "(" ++ (intercalate "," args) ++ ") " ++ (intercalate " " (map PM.t_str rhs))
+
+lexAll :: Lexer.PState PpState -> (Lexer.PState PpState, [Located Token])
+lexAll state = case unP (lexer True return) state of
+    POk s t@(L _ ITeof) -> (s, [t])
+    POk state' t -> (ss, t : rest)
+      where
+        (ss, rest) = lexAll state'
+    PFailed _pst -> panic $ "GHC.Parser.PreProcess.lexAll failed"
 
 -- ---------------------------------------------------------------------
 


=====================================
docs/users_guide/debugging.rst
=====================================
@@ -149,6 +149,13 @@ parser and interface file reader.
 
     Include comments in the parser.  Useful in combination with :ghc-flag:`-ddump-parsed-ast`.
 
+.. ghc-flag:: -ddump-ghc-cpp
+    :shortdesc: Dump GHC_CPP output
+    :type: dynamic
+
+    Dump a representation of the output of the preprocessor when using the GHC_CPP option
+    Dump parser output as a syntax tree
+
 
 .. ghc-flag:: -ddump-if-trace
     :shortdesc: Trace interface files


=====================================
testsuite/tests/ghc-cpp/GhcCpp01.hs
=====================================
@@ -0,0 +1,12 @@
+{-# LANGUAGE GHC_CPP #-}
+module GhcCpp01 where
+
+#define FOO(A,B) A + B
+#define FOO(A,B,C) A + B + C
+#if FOO(1,FOO(3,4)) == 8
+
+-- a comment
+x = 1
+#else
+x = 5
+#endif


=====================================
testsuite/tests/ghc-cpp/GhcCpp01.stderr
=====================================
@@ -0,0 +1,35 @@
+
+==================== After GHC_CPP ====================
+
+------------------------------
+
+
+#define FOO(A,B) A + B
+#define FOO(A,B,C) A + B + C
+
+#define __GLASGOW_HASKELL_FULL_VERSION__ 9.13.20250216
+
+#define __GLASGOW_HASKELL_PATCHLEVEL1__ 20250216
+
+#define __GLASGOW_HASKELL_PATCHLEVEL2__ 0
+
+#define __GLASGOW_HASKELL__ 913
+------------------------------
+
+- |{-# LANGUAGE GHC_CPP #-}
+  |module GhcCpp01 where
+
+- |#define FOO(A,B) A + B
+- |#define FOO(A,B,C) A + B + C
+- |#if FOO(1,FOO(3,4)) == 8
+
+- |-- a comment
+  |x = 1
+- |#else
+- |x = 5
+- |#endif
+  |
+------------------------------
+
+
+


=====================================
testsuite/tests/ghc-cpp/all.T
=====================================
@@ -0,0 +1 @@
+test('GhcCpp01',      normal, compile, ['-ddump-ghc-cpp -dkeep-comments'])
\ No newline at end of file


=====================================
utils/check-cpp/Example1.hs
=====================================
@@ -1,6 +1,8 @@
 {-# LANGUAGE GHC_CPP #-}
+{-# OPTIONS -ddump-ghc-cpp -dkeep-comments #-}
 module Example1 where
 
+-- A comment
 y = 3
 
 #define FOO


=====================================
utils/check-cpp/Macro.hs
=====================================
@@ -142,7 +142,7 @@ replace_args args margs _ = error $ "replace_args: impossible, mismatch between:
 replace_arg :: [Token] -> String -> [Token] -> [Token]
 replace_arg _ _ [] = []
 replace_arg a ma (TIdentifier t : ts)
-    | ma == t = a <> replace_arg a ma ts
+    | ma == t = a ++ replace_arg a ma ts
 replace_arg a ma (t : ts) = t : replace_arg a ma ts
 
 -- ---------------------------------------------------------------------
@@ -198,10 +198,10 @@ pArg ts = do
         TComma _ : _ -> return (frag, rest)
         (t at TOpenParen{}) : ts' -> do
             (frag', rest') <- inside_parens 1 [t] ts'
-            return (frag <> frag', rest')
+            return (frag ++ frag', rest')
         _ -> do
             (frag', rest') <- pa_frag rest
-            return (frag <> frag', rest')
+            return (frag ++ frag', rest')
 
 pa_frag :: [Token] -> Either String ([Token], [Token])
 pa_frag [] = return ([], [])


=====================================
utils/check-cpp/Main.hs
=====================================
@@ -511,6 +511,7 @@ t18 = do
         [ "#define FOO(A,B) A + B"
         , "#define FOO(A,B,C) A + B + C"
         , "#if FOO(1,FOO(3,4)) == 8"
+        , ""
         , "-- a comment"
         , "x = 1"
         , "#else"


=====================================
utils/check-cpp/PreProcess.hs
=====================================
@@ -38,8 +38,6 @@ dumpGhcCpp pst = text $ sepa ++ defines ++ sepa ++ final ++ sepa
     comments_as_toks = map to_tok comments
     defines = showDefines (pp_defines (pp pst_final))
     sepa = "\n------------------------------\n"
-    -- buf = (buffer pst){cur = 0}
-    -- orig = lexemeToString buf (len buf)
     startLoc = mkRealSrcLoc (srcLocFile (psRealLoc $ loc pst)) 1 1
     buf1 = (buffer pst){cur = 0}
     all_toks = sortBy cmpBs (bare_toks ++ comments_as_toks)
@@ -75,7 +73,7 @@ showCppTokenStream ts0 = go startLoc ts0 ""
                         . (str ++)
                         . go tokEnd ts
                 | otherwise ->
-                    ((replicate (tokLine - locLine) '\n') ++)
+                    ((replicateStr (tokLine - locLine) "  |\n") ++)
                         . (extra ++)
                         . ((replicate (tokCol - 1) ' ') ++)
                         . (str ++)
@@ -88,13 +86,8 @@ showCppTokenStream ts0 = go startLoc ts0 ""
                     ITunknown _ -> "- |"
                     _ -> "  |"
 
-ghcCommentText :: EpaComment -> String
-ghcCommentText (GHC.EpaComment (EpaDocComment s) _) = exactPrintHsDocString s
-ghcCommentText (GHC.EpaComment (EpaDocOptions s) _) = s
-ghcCommentText (GHC.EpaComment (EpaLineComment s) _) = s
-ghcCommentText (GHC.EpaComment (EpaBlockComment s) _) = s
-ghcCommentText (GHC.EpaComment (EpaCppIgnored [L _ s]) _) = s
-ghcCommentText (GHC.EpaComment (EpaCppIgnored _) _) = ""
+replicateStr :: Int -> String -> String
+replicateStr n s = concat (replicate n s)
 
 showDefines :: MacroDefines -> String
 showDefines defines = Map.foldlWithKey' (\acc k d -> acc ++ "\n" ++ renderDefine k d) "" defines
@@ -109,7 +102,7 @@ showDefines defines = Map.foldlWithKey' (\acc k d -> acc ++ "\n" ++ renderDefine
         "#define " ++ n ++ "(" ++ (intercalate "," args) ++ ") " ++ (intercalate " " (map PM.t_str rhs))
 
 lexAll :: Lexer.PState PpState -> (Lexer.PState PpState, [Located Token])
-lexAll state = case unP (lexerDbg True return) state of
+lexAll state = case unP (lexer True return) state of
     POk s t@(L _ ITeof) -> (s, [t])
     POk state' t -> (ss, t : rest)
       where



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/8ea872af5f98061309d4991587f8d9cd9b74e198...f79d3f62fee919e7f495affd4dec5e02f174a2c9

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/8ea872af5f98061309d4991587f8d9cd9b74e198...f79d3f62fee919e7f495affd4dec5e02f174a2c9
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20250220/a4b881ce/attachment-0001.html>


More information about the ghc-commits mailing list