[Git][ghc/ghc][wip/az/ghc-cpp] 2 commits: Working on getting check-exact to work properly

Alan Zimmerman (@alanz) gitlab at gitlab.haskell.org
Sun Mar 16 14:40:38 UTC 2025



Alan Zimmerman pushed to branch wip/az/ghc-cpp at Glasgow Haskell Compiler / GHC


Commits:
7c43e50c by Alan Zimmerman at 2025-03-16T13:04:52+00:00
Working on getting check-exact to work properly

- - - - -
90e47997 by Alan Zimmerman at 2025-03-16T14:40:15+00:00
Passes CppCommentPlacement test

- - - - -


6 changed files:

- compiler/GHC/Parser/Lexer.x
- compiler/GHC/Parser/PreProcess.hs
- + testsuite/tests/printer/CppCommentPlacement.hs
- utils/check-cpp/PreProcess.hs
- utils/check-exact/Main.hs
- utils/check-exact/Preprocess.hs


Changes:

=====================================
compiler/GHC/Parser/Lexer.x
=====================================
@@ -1006,7 +1006,7 @@ data Token
   | ITblockComment String      PsSpan -- ^ comment in {- -}
 
   -- GHC CPP extension. See Note [GhcCPP Token]
-  | ITcpp Bool FastString   -- ^ CPP #-prefixed line, or continuation.
+  | ITcpp Bool FastString      PsSpan -- ^ CPP #-prefixed line, or continuation.
   deriving Show
 
 instance Outputable Token where
@@ -1262,6 +1262,7 @@ cppTokenCont = doCppToken Nothing
 doCppToken :: (Maybe Int) -> Action p
 doCppToken code span buf len _buf2 =
   do
+    lt <- getLastLocIncludingComments
     let
       pushLexStateMaybe Nothing = return ()
       pushLexStateMaybe (Just code) = pushLexState code
@@ -1275,7 +1276,8 @@ doCppToken code span buf len _buf2 =
        ('\n':_) -> return (len - 1, False)
        _ -> return (len, False)
     let span' = cppSpan span len0
-    return (L span' (ITcpp continue $! lexemeToFastString buf len0))
+    let !s = lexemeToFastString buf len0
+    return (L span' (ITcpp continue s lt))
 
 
 -- cppToken :: Int -> Action p
@@ -1313,10 +1315,12 @@ cppSpan span len = mkPsSpan start_loc end_loc
      BufPos sb = psBufPos start_loc
      end_loc = PsLoc real_loc (BufPos (sb + len + 1))
 
-cppTokenPop :: (FastString -> Token)-> Action p
+cppTokenPop :: (FastString -> PsSpan -> Token)-> Action p
 cppTokenPop t span buf len _buf2 =
   do _ <- popLexState
-     return (L span (t $! lexemeToFastString buf len))
+     lt <- getLastLocIncludingComments
+     let !s = lexemeToFastString buf len
+     return (L span (t s lt))
 
 -- See Note [Nested comment line pragmas]
 failLinePrag1 :: Action p
@@ -3821,6 +3825,7 @@ commentToAnnotation (L l (ITdocComment s ll))   = mkLEpaComment l ll (EpaDocComm
 commentToAnnotation (L l (ITdocOptions s ll))   = mkLEpaComment l ll (EpaDocOptions s)
 commentToAnnotation (L l (ITlineComment s ll))  = mkLEpaComment l ll (EpaLineComment s)
 commentToAnnotation (L l (ITblockComment s ll)) = mkLEpaComment l ll (EpaBlockComment s)
+commentToAnnotation (L l (ITcpp _ s ll))        = mkLEpaComment l ll (EpaLineComment (unpackFS s))
 commentToAnnotation _                           = panic "commentToAnnotation"
 
 -- see Note [PsSpan in Comments]


=====================================
compiler/GHC/Parser/PreProcess.hs
=====================================
@@ -201,7 +201,7 @@ ppLexer queueComments cont =
                             Just inp -> do
                                 Lexer.setInput inp
                                 ppLexer queueComments cont
-                    L l (ITcpp continuation s) -> do
+                    L l (ITcpp continuation s sp) -> do
                         ghcpp <- ghcCppEnabled
                         -- Only process the directive if GhcCpp is explicitly enabled.
                         -- Otherwise we are scanning for pragmas
@@ -216,7 +216,7 @@ ppLexer queueComments cont =
                                         case mdump of
                                             Just dump ->
                                                 -- We have a dump of the state, put it into an ignored token
-                                                contIgnoreTok (L l (ITcpp continuation (appendFS s (fsLit dump))))
+                                                contIgnoreTok (L l (ITcpp continuation (appendFS s (fsLit dump)) sp))
                                             Nothing -> contIgnoreTok tk
                             else contInner tk
                     _ -> do
@@ -232,7 +232,7 @@ ppLexer queueComments cont =
 processCppToks :: FastString -> PP (Maybe String)
 processCppToks fs = do
     let
-        get (L _ (ITcpp _ s)) = s
+        get (L _ (ITcpp _ s _)) = s
         get _ = error "should not"
     -- Combine any prior continuation tokens
     cs <- popContinuation


=====================================
testsuite/tests/printer/CppCommentPlacement.hs
=====================================
@@ -0,0 +1,26 @@
+-- | Top comment
+
+{-# LANGUAGE CPP #-}
+module CppCommentPlacement where
+
+#ifndef CONDITION
+
+--8-----------------------------------------------------------------------------
+-- * comment1
+
+fn :: Integer
+fn = 1
+
+--14----------------------------------------------------------------------------
+
+#else
+
+--18----------------------------------------------------------------------------
+-- * comment2
+
+fn :: Integer
+fn = 2
+
+#endif
+
+--26----------------------------------------------------------------------------


=====================================
utils/check-cpp/PreProcess.hs
=====================================
@@ -49,6 +49,8 @@ dumpGhcCpp dflags pst = output
                         ++ sepa
                         ++ show bare_toks
                         ++ sepa
+                        ++ show lll
+                        ++ sepa
     -- ++ show all_toks ++ sepa
     -- Note: pst is the state /before/ the parser runs, so we can use it to lex.
     (pst_final, bare_toks) = lexAll pst
@@ -67,6 +69,9 @@ dumpGhcCpp dflags pst = output
     toks =
         addSourceToTokens startLoc buf1 all_toks
     final = renderCombinedToks toks
+    lll = case Lexer.lexTokenStream () (options pst) (buffer pst) startLoc of
+      POk _ x -> x
+      _ -> error $ "wtf"
 
 cmpBs :: Located Token -> Located Token -> Ordering
 cmpBs (L (RealSrcSpan _ (Strict.Just bs1)) _) (L (RealSrcSpan _ (Strict.Just bs2)) _) =
@@ -158,8 +163,9 @@ showDefines defines = Map.foldlWithKey' (\acc k d -> acc ++ "\n" ++ renderDefine
         "#define " ++ n ++ "(" ++ (intercalate "," args) ++ ") " ++ (intercalate " " (map PM.t_str rhs))
 
 lexAll :: Lexer.PState PpState -> (Lexer.PState PpState, [Located Token])
--- lexAll state = case unP (lexer True return) state of
-lexAll state = case unP (lexerDbg True return) state of
+lexAll state = case unP (lexer True return) state of
+-- lexAll state = case unP (lexerDbg True return) state of
+-- lexAll state = case unP (Lexer.lexerDbg True return) state of
     POk s t@(L _ ITeof) -> (s, [t])
     -- POk state' t -> (ss, t : rest)
     POk state' t -> (ss, trace ("lexAll:" ++ show t) t : rest)


=====================================
utils/check-exact/Main.hs
=====================================
@@ -218,7 +218,8 @@ _tt = testOneFile changers "/home/alanz/mysrc/git.haskell.org/worktree/bisect/_b
  -- "../../testsuite/tests/printer/Test22771.hs" Nothing
  -- "../../testsuite/tests/printer/Test23465.hs" Nothing
  -- "../../testsuite/tests/printer/Test25454.hs" Nothing
- "../../testsuite/tests/printer/Test25467.hs" Nothing
+ -- "../../testsuite/tests/printer/Test25467.hs" Nothing
+ "../../testsuite/tests/printer/CppCommentPlacement.hs" Nothing
 
 -- cloneT does not need a test, function can be retired
 


=====================================
utils/check-exact/Preprocess.hs
=====================================
@@ -44,10 +44,6 @@ import Utils
 import qualified Data.Set as Set
 import qualified GHC.Data.Strict as Strict
 
-
--- import Debug.Trace
---
-
 -- ---------------------------------------------------------------------
 
 data CppOptions = CppOptions
@@ -106,9 +102,10 @@ getCppTokensAsComments cppOptions sourceFile = do
   source <- GHC.liftIO $ GHC.hGetStringBuffer sourceFile
   let startLoc = GHC.mkRealSrcLoc (GHC.mkFastString sourceFile) 1 1
   (_txt,strSrcBuf,flags2') <- getPreprocessedSrcDirectPrim cppOptions sourceFile
+
   let flags2 = GHC.initParserOpts flags2'
   -- hash-ifdef tokens
-  directiveToks <- GHC.liftIO $ getPreprocessorAsComments sourceFile
+  directiveToks <- GHC.liftIO $ getPreprocessorAsComments (GHC.enableGhcCpp flags2) source startLoc
   -- Tokens without hash-ifdef
   nonDirectiveToks <- tokeniseOriginalSrc startLoc flags2 source
   case GHC.lexTokenStream () flags2 strSrcBuf startLoc of
@@ -131,7 +128,7 @@ goodComment c = isGoodComment (tokComment c)
 
 toRealLocated :: GHC.Located a -> GHC.PsLocated a
 toRealLocated (GHC.L (GHC.RealSrcSpan s (Strict.Just b)) x) = GHC.L (GHC.PsSpan s b) x
-toRealLocated (GHC.L _ _)                     = GHC.panic "toRealLocated"
+toRealLocated (GHC.L l _)                     = GHC.panic $ "toRealLocated:" ++ show l
 
 -- ---------------------------------------------------------------------
 
@@ -264,22 +261,21 @@ alterToolSettings f dynFlags = dynFlags { GHC.toolSettings = f (GHC.toolSettings
 
 -- | Get the preprocessor directives as comment tokens from the
 -- source.
-getPreprocessorAsComments :: FilePath -> IO [(GHC.Located GHC.Token, String)]
-getPreprocessorAsComments srcFile = do
-  fcontents <- readFileGhc srcFile
-  let directives = filter (\(_lineNum,line) -> case line of '#' : _ -> True; _ -> False)
-                    $ zip [1..] (lines fcontents)
-
-  let mkTok (lineNum,line) = (GHC.L l (GHC.ITlineComment line (makeBufSpan l)),line)
-       where
-         start = GHC.mkSrcLoc (GHC.mkFastString srcFile) lineNum 1
-         end   = GHC.mkSrcLoc (GHC.mkFastString srcFile) lineNum (length line)
-         l = GHC.mkSrcSpan start end
-
-  let toks = map mkTok directives
-  return toks
+getPreprocessorAsComments :: GHC.ParserOpts -> GHC.StringBuffer -> GHC.RealSrcLoc -> IO [(GHC.Located GHC.Token, String)]
+getPreprocessorAsComments opts source startLoc = do
+  case GHC.lexTokenStream () opts source startLoc of
+        GHC.POk _ ts ->
+               do
+                  let
+                      isCppTok (GHC.L _ (GHC.ITcpp _ _ _)) = True
+                      isCppTok _                           = False
+                      toks = GHC.addSourceToTokens startLoc source ts
+                      directiveToks = filter (\(t,_) -> isCppTok t) toks
+                  return directiveToks
+        GHC.PFailed pst -> parseError pst
 
 makeBufSpan :: GHC.SrcSpan -> GHC.PsSpan
+makeBufSpan (GHC.RealSrcSpan s (Strict.Just bs)) = GHC.PsSpan s bs
 makeBufSpan ss = pspan
   where
     bl = GHC.BufPos 0



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/3a1904adb2edd1efd1f70707e7317a47d32dc4f0...90e47997dca940879720928e75c5f361ac2b5f29

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/3a1904adb2edd1efd1f70707e7317a47d32dc4f0...90e47997dca940879720928e75c5f361ac2b5f29
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20250316/f7bc5064/attachment-0001.html>


More information about the ghc-commits mailing list