[Git][ghc/ghc][wip/az/ghc-cpp] Working on getting check-exact to work properly

Alan Zimmerman (@alanz) gitlab at gitlab.haskell.org
Thu Mar 13 22:38:17 UTC 2025



Alan Zimmerman pushed to branch wip/az/ghc-cpp at Glasgow Haskell Compiler / GHC


Commits:
3a1904ad by Alan Zimmerman at 2025-03-13T22:37:47+00:00
Working on getting check-exact to work properly

- - - - -


5 changed files:

- compiler/GHC/Parser/Lexer.x
- compiler/GHC/Parser/PreProcess.hs
- utils/check-cpp/PreProcess.hs
- utils/check-exact/Main.hs
- utils/check-exact/Preprocess.hs


Changes:

=====================================
compiler/GHC/Parser/Lexer.x
=====================================
@@ -1006,7 +1006,7 @@ data Token
   | ITblockComment String      PsSpan -- ^ comment in {- -}
 
   -- GHC CPP extension. See Note [GhcCPP Token]
-  | ITcpp Bool FastString   -- ^ CPP #-prefixed line, or continuation.
+  | ITcpp Bool FastString      PsSpan -- ^ CPP #-prefixed line, or continuation.
   deriving Show
 
 instance Outputable Token where
@@ -1262,6 +1262,7 @@ cppTokenCont = doCppToken Nothing
 doCppToken :: (Maybe Int) -> Action p
 doCppToken code span buf len _buf2 =
   do
+    lt <- getLastLocIncludingComments
     let
       pushLexStateMaybe Nothing = return ()
       pushLexStateMaybe (Just code) = pushLexState code
@@ -1275,7 +1276,8 @@ doCppToken code span buf len _buf2 =
        ('\n':_) -> return (len - 1, False)
        _ -> return (len, False)
     let span' = cppSpan span len0
-    return (L span' (ITcpp continue $! lexemeToFastString buf len0))
+    let !s = lexemeToFastString buf len0
+    return (L span' (ITcpp continue s lt))
 
 
 -- cppToken :: Int -> Action p
@@ -1313,10 +1315,12 @@ cppSpan span len = mkPsSpan start_loc end_loc
      BufPos sb = psBufPos start_loc
      end_loc = PsLoc real_loc (BufPos (sb + len + 1))
 
-cppTokenPop :: (FastString -> Token)-> Action p
+cppTokenPop :: (FastString -> PsSpan -> Token)-> Action p
 cppTokenPop t span buf len _buf2 =
   do _ <- popLexState
-     return (L span (t $! lexemeToFastString buf len))
+     lt <- getLastLocIncludingComments
+     let !s = lexemeToFastString buf len
+     return (L span (t s lt))
 
 -- See Note [Nested comment line pragmas]
 failLinePrag1 :: Action p
@@ -3821,6 +3825,7 @@ commentToAnnotation (L l (ITdocComment s ll))   = mkLEpaComment l ll (EpaDocComm
 commentToAnnotation (L l (ITdocOptions s ll))   = mkLEpaComment l ll (EpaDocOptions s)
 commentToAnnotation (L l (ITlineComment s ll))  = mkLEpaComment l ll (EpaLineComment s)
 commentToAnnotation (L l (ITblockComment s ll)) = mkLEpaComment l ll (EpaBlockComment s)
+commentToAnnotation (L l (ITcpp _ s ll))        = mkLEpaComment l ll (EpaLineComment (unpackFS s))
 commentToAnnotation _                           = panic "commentToAnnotation"
 
 -- see Note [PsSpan in Comments]


=====================================
compiler/GHC/Parser/PreProcess.hs
=====================================
@@ -201,7 +201,7 @@ ppLexer queueComments cont =
                             Just inp -> do
                                 Lexer.setInput inp
                                 ppLexer queueComments cont
-                    L l (ITcpp continuation s) -> do
+                    L l (ITcpp continuation s sp) -> do
                         ghcpp <- ghcCppEnabled
                         -- Only process the directive if GhcCpp is explicitly enabled.
                         -- Otherwise we are scanning for pragmas
@@ -216,7 +216,7 @@ ppLexer queueComments cont =
                                         case mdump of
                                             Just dump ->
                                                 -- We have a dump of the state, put it into an ignored token
-                                                contIgnoreTok (L l (ITcpp continuation (appendFS s (fsLit dump))))
+                                                contIgnoreTok (L l (ITcpp continuation (appendFS s (fsLit dump)) sp))
                                             Nothing -> contIgnoreTok tk
                             else contInner tk
                     _ -> do
@@ -232,7 +232,7 @@ ppLexer queueComments cont =
 processCppToks :: FastString -> PP (Maybe String)
 processCppToks fs = do
     let
-        get (L _ (ITcpp _ s)) = s
+        get (L _ (ITcpp _ s _)) = s
         get _ = error "should not"
     -- Combine any prior continuation tokens
     cs <- popContinuation


=====================================
utils/check-cpp/PreProcess.hs
=====================================
@@ -49,6 +49,8 @@ dumpGhcCpp dflags pst = output
                         ++ sepa
                         ++ show bare_toks
                         ++ sepa
+                        ++ show lll
+                        ++ sepa
     -- ++ show all_toks ++ sepa
     -- Note: pst is the state /before/ the parser runs, so we can use it to lex.
     (pst_final, bare_toks) = lexAll pst
@@ -67,6 +69,9 @@ dumpGhcCpp dflags pst = output
     toks =
         addSourceToTokens startLoc buf1 all_toks
     final = renderCombinedToks toks
+    lll = case Lexer.lexTokenStream () (options pst) (buffer pst) startLoc of
+      POk _ x -> x
+      _ -> error $ "wtf"
 
 cmpBs :: Located Token -> Located Token -> Ordering
 cmpBs (L (RealSrcSpan _ (Strict.Just bs1)) _) (L (RealSrcSpan _ (Strict.Just bs2)) _) =
@@ -158,8 +163,9 @@ showDefines defines = Map.foldlWithKey' (\acc k d -> acc ++ "\n" ++ renderDefine
         "#define " ++ n ++ "(" ++ (intercalate "," args) ++ ") " ++ (intercalate " " (map PM.t_str rhs))
 
 lexAll :: Lexer.PState PpState -> (Lexer.PState PpState, [Located Token])
--- lexAll state = case unP (lexer True return) state of
-lexAll state = case unP (lexerDbg True return) state of
+lexAll state = case unP (lexer True return) state of
+-- lexAll state = case unP (lexerDbg True return) state of
+-- lexAll state = case unP (Lexer.lexerDbg True return) state of
     POk s t@(L _ ITeof) -> (s, [t])
     -- POk state' t -> (ss, t : rest)
     POk state' t -> (ss, trace ("lexAll:" ++ show t) t : rest)


=====================================
utils/check-exact/Main.hs
=====================================
@@ -218,7 +218,8 @@ _tt = testOneFile changers "/home/alanz/mysrc/git.haskell.org/worktree/bisect/_b
  -- "../../testsuite/tests/printer/Test22771.hs" Nothing
  -- "../../testsuite/tests/printer/Test23465.hs" Nothing
  -- "../../testsuite/tests/printer/Test25454.hs" Nothing
- "../../testsuite/tests/printer/Test25467.hs" Nothing
+ -- "../../testsuite/tests/printer/Test25467.hs" Nothing
+ "../../testsuite/tests/printer/CppCommentPlacement.hs" Nothing
 
 -- cloneT does not need a test, function can be retired
 


=====================================
utils/check-exact/Preprocess.hs
=====================================
@@ -20,11 +20,13 @@ import qualified Control.Monad.IO.Class as GHC
 import qualified GHC.Data.FastString   as GHC
 import qualified GHC.Data.StringBuffer as GHC
 import qualified GHC.Driver.Config.Parser as GHC
+import qualified GHC.Driver.DynFlags   as GHC
 import qualified GHC.Driver.Env        as GHC
 import qualified GHC.Driver.Errors.Types as GHC
 import qualified GHC.Driver.Phases     as GHC
 import qualified GHC.Driver.Pipeline   as GHC
 import qualified GHC.Parser.Lexer      as GHC
+import qualified GHC.Parser.PreProcess.State as GHC
 import qualified GHC.Settings          as GHC
 import qualified GHC.Types.Error       as GHC
 import qualified GHC.Types.SourceError as GHC
@@ -37,7 +39,7 @@ import qualified GHC.Utils.Panic.Plain as GHC
 import GHC.Types.SrcLoc (mkSrcSpan, mkSrcLoc)
 import GHC.Data.FastString (mkFastString)
 
-import Data.List (isPrefixOf)
+import Data.List (isPrefixOf, partition)
 import Data.Maybe
 import Types
 import Utils
@@ -45,7 +47,8 @@ import qualified Data.Set as Set
 import qualified GHC.Data.Strict as Strict
 
 
--- import Debug.Trace
+import Debug.Trace
+import qualified GHC.LanguageExtensions as LangExt
 --
 
 -- ---------------------------------------------------------------------
@@ -106,16 +109,27 @@ getCppTokensAsComments cppOptions sourceFile = do
   source <- GHC.liftIO $ GHC.hGetStringBuffer sourceFile
   let startLoc = GHC.mkRealSrcLoc (GHC.mkFastString sourceFile) 1 1
   (_txt,strSrcBuf,flags2') <- getPreprocessedSrcDirectPrim cppOptions sourceFile
+
+  let flags2g  = GHC.xopt_set flags2' LangExt.GhcCpp
   let flags2 = GHC.initParserOpts flags2'
+  let flags2'' = GHC.initParserOpts flags2g
+  -- let flags2'' = flags2 { GHC.pExtsBitmap = GHC.xset GHC.GhcCppBit (GHC.pExtsBitmap flags2)}
   -- hash-ifdef tokens
-  directiveToks <- GHC.liftIO $ getPreprocessorAsComments sourceFile
+  -- directiveToks <- GHC.liftIO $ getPreprocessorAsComments sourceFile
   -- Tokens without hash-ifdef
   nonDirectiveToks <- tokeniseOriginalSrc startLoc flags2 source
-  case GHC.lexTokenStream () flags2 strSrcBuf startLoc of
+  case GHC.lexTokenStream () (GHC.enableGhcCpp flags2) source startLoc of
         GHC.POk _ ts ->
                do
-                  let toks = GHC.addSourceToTokens startLoc source ts
-                      cppCommentToks = getCppTokens directiveToks nonDirectiveToks toks
+                  let
+                      isCppTok (GHC.L _ (GHC.ITcpp _ _ _)) = True
+                      isCppTok _                           = False
+                      toks = GHC.addSourceToTokens startLoc source
+                                (trace ("bitmap:" ++ show (GHC.pExtsBitmap flags2)) ts)
+                      (directiveToks, toks') = partition (\(t,_) -> isCppTok t) toks
+                      -- (directiveToks, toks') = partition (\(t,_) -> isCppTok t)
+                      --                 (trace ("toks:" ++ show toks) toks)
+                      cppCommentToks = getCppTokens directiveToks nonDirectiveToks toks'
                   return $ filter goodComment
                          $  map (GHC.commentToAnnotation . toRealLocated . fst) cppCommentToks
         GHC.PFailed pst -> parseError pst
@@ -131,7 +145,7 @@ goodComment c = isGoodComment (tokComment c)
 
 toRealLocated :: GHC.Located a -> GHC.PsLocated a
 toRealLocated (GHC.L (GHC.RealSrcSpan s (Strict.Just b)) x) = GHC.L (GHC.PsSpan s b) x
-toRealLocated (GHC.L _ _)                     = GHC.panic "toRealLocated"
+toRealLocated (GHC.L l _)                     = GHC.panic $ "toRealLocated:" ++ show l
 
 -- ---------------------------------------------------------------------
 
@@ -150,8 +164,9 @@ getCppTokens ::
   -> [(GHC.Located GHC.Token, String)]
   -> [(GHC.Located GHC.Token, String)]
   -> [(GHC.Located GHC.Token, String)]
-getCppTokens directiveToks origSrcToks postCppToks = toks
+getCppTokens directiveToks' origSrcToks postCppToks = toks
   where
+    directiveToks = trace ("directiveToks: " ++ show directiveToks') directiveToks'
     locFn (GHC.L l1 _,_) (GHC.L l2 _,_) = compare (rs l1) (rs l2)
     m1Toks = mergeBy locFn postCppToks directiveToks
 
@@ -262,6 +277,23 @@ alterToolSettings f dynFlags = dynFlags { GHC.toolSettings = f (GHC.toolSettings
 
 -- ---------------------------------------------------------------------
 
+-- | Get the preprocessor directives as comment tokens from the
+-- source.
+getPreprocessorAsComments' :: FilePath -> IO [(GHC.Located GHC.Token, String)]
+getPreprocessorAsComments' srcFile = do
+  fcontents <- readFileGhc srcFile
+  let directives = filter (\(_lineNum,line) -> case line of '#' : _ -> True; _ -> False)
+                    $ zip [1..] (lines fcontents)
+
+  let mkTok (lineNum,line) = (GHC.L l (GHC.ITlineComment line (makeBufSpan l)),line)
+       where
+         start = GHC.mkSrcLoc (GHC.mkFastString srcFile) lineNum 1
+         end   = GHC.mkSrcLoc (GHC.mkFastString srcFile) lineNum (length line)
+         l = GHC.mkSrcSpan start end
+
+  let toks = map mkTok directives
+  return toks
+
 -- | Get the preprocessor directives as comment tokens from the
 -- source.
 getPreprocessorAsComments :: FilePath -> IO [(GHC.Located GHC.Token, String)]
@@ -279,7 +311,22 @@ getPreprocessorAsComments srcFile = do
   let toks = map mkTok directives
   return toks
 
+-- TODO: possibly use the one from GHC.Parser.PreProcess, depending on
+-- which lexer it ends up using. Or have specific versions of lexAll
+-- for the two lexers.
+lexAll :: GHC.PState GHC.PpState -> (GHC.PState GHC.PpState, [GHC.Located GHC.Token])
+lexAll state = case GHC.unP (GHC.lexer True return) state of
+-- lexAll state = case unP (lexerDbg True return) state of
+-- lexAll state = case unP (Lexer.lexerDbg True return) state of
+    GHC.POk s t@(GHC.L _ GHC.ITeof) -> (s, [t])
+    -- POk state' t -> (ss, t : rest)
+    GHC.POk state' t -> (ss, trace ("lexAll:" ++ show t) t : rest)
+      where
+        (ss, rest) = lexAll state'
+    GHC.PFailed _pst -> GHC.panic $ "GHC.Parser.PreProcess.lexAll failed"
+
 makeBufSpan :: GHC.SrcSpan -> GHC.PsSpan
+makeBufSpan (GHC.RealSrcSpan s (Strict.Just bs)) = GHC.PsSpan s bs
 makeBufSpan ss = pspan
   where
     bl = GHC.BufPos 0



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/3a1904adb2edd1efd1f70707e7317a47d32dc4f0

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/3a1904adb2edd1efd1f70707e7317a47d32dc4f0
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20250313/f615e60b/attachment-0001.html>


More information about the ghc-commits mailing list