[Git][ghc/ghc][wip/az/ghc-cpp] Working on getting check-exact to work properly
Alan Zimmerman (@alanz)
gitlab at gitlab.haskell.org
Thu Mar 13 22:38:17 UTC 2025
Alan Zimmerman pushed to branch wip/az/ghc-cpp at Glasgow Haskell Compiler / GHC
Commits:
3a1904ad by Alan Zimmerman at 2025-03-13T22:37:47+00:00
Working on getting check-exact to work properly
- - - - -
5 changed files:
- compiler/GHC/Parser/Lexer.x
- compiler/GHC/Parser/PreProcess.hs
- utils/check-cpp/PreProcess.hs
- utils/check-exact/Main.hs
- utils/check-exact/Preprocess.hs
Changes:
=====================================
compiler/GHC/Parser/Lexer.x
=====================================
@@ -1006,7 +1006,7 @@ data Token
| ITblockComment String PsSpan -- ^ comment in {- -}
-- GHC CPP extension. See Note [GhcCPP Token]
- | ITcpp Bool FastString -- ^ CPP #-prefixed line, or continuation.
+ | ITcpp Bool FastString PsSpan -- ^ CPP #-prefixed line, or continuation.
deriving Show
instance Outputable Token where
@@ -1262,6 +1262,7 @@ cppTokenCont = doCppToken Nothing
doCppToken :: (Maybe Int) -> Action p
doCppToken code span buf len _buf2 =
do
+ lt <- getLastLocIncludingComments
let
pushLexStateMaybe Nothing = return ()
pushLexStateMaybe (Just code) = pushLexState code
@@ -1275,7 +1276,8 @@ doCppToken code span buf len _buf2 =
('\n':_) -> return (len - 1, False)
_ -> return (len, False)
let span' = cppSpan span len0
- return (L span' (ITcpp continue $! lexemeToFastString buf len0))
+ let !s = lexemeToFastString buf len0
+ return (L span' (ITcpp continue s lt))
-- cppToken :: Int -> Action p
@@ -1313,10 +1315,12 @@ cppSpan span len = mkPsSpan start_loc end_loc
BufPos sb = psBufPos start_loc
end_loc = PsLoc real_loc (BufPos (sb + len + 1))
-cppTokenPop :: (FastString -> Token)-> Action p
+cppTokenPop :: (FastString -> PsSpan -> Token)-> Action p
cppTokenPop t span buf len _buf2 =
do _ <- popLexState
- return (L span (t $! lexemeToFastString buf len))
+ lt <- getLastLocIncludingComments
+ let !s = lexemeToFastString buf len
+ return (L span (t s lt))
-- See Note [Nested comment line pragmas]
failLinePrag1 :: Action p
@@ -3821,6 +3825,7 @@ commentToAnnotation (L l (ITdocComment s ll)) = mkLEpaComment l ll (EpaDocComm
commentToAnnotation (L l (ITdocOptions s ll)) = mkLEpaComment l ll (EpaDocOptions s)
commentToAnnotation (L l (ITlineComment s ll)) = mkLEpaComment l ll (EpaLineComment s)
commentToAnnotation (L l (ITblockComment s ll)) = mkLEpaComment l ll (EpaBlockComment s)
+commentToAnnotation (L l (ITcpp _ s ll)) = mkLEpaComment l ll (EpaLineComment (unpackFS s))
commentToAnnotation _ = panic "commentToAnnotation"
-- see Note [PsSpan in Comments]
=====================================
compiler/GHC/Parser/PreProcess.hs
=====================================
@@ -201,7 +201,7 @@ ppLexer queueComments cont =
Just inp -> do
Lexer.setInput inp
ppLexer queueComments cont
- L l (ITcpp continuation s) -> do
+ L l (ITcpp continuation s sp) -> do
ghcpp <- ghcCppEnabled
-- Only process the directive if GhcCpp is explicitly enabled.
-- Otherwise we are scanning for pragmas
@@ -216,7 +216,7 @@ ppLexer queueComments cont =
case mdump of
Just dump ->
-- We have a dump of the state, put it into an ignored token
- contIgnoreTok (L l (ITcpp continuation (appendFS s (fsLit dump))))
+ contIgnoreTok (L l (ITcpp continuation (appendFS s (fsLit dump)) sp))
Nothing -> contIgnoreTok tk
else contInner tk
_ -> do
@@ -232,7 +232,7 @@ ppLexer queueComments cont =
processCppToks :: FastString -> PP (Maybe String)
processCppToks fs = do
let
- get (L _ (ITcpp _ s)) = s
+ get (L _ (ITcpp _ s _)) = s
get _ = error "should not"
-- Combine any prior continuation tokens
cs <- popContinuation
=====================================
utils/check-cpp/PreProcess.hs
=====================================
@@ -49,6 +49,8 @@ dumpGhcCpp dflags pst = output
++ sepa
++ show bare_toks
++ sepa
+ ++ show lll
+ ++ sepa
-- ++ show all_toks ++ sepa
-- Note: pst is the state /before/ the parser runs, so we can use it to lex.
(pst_final, bare_toks) = lexAll pst
@@ -67,6 +69,9 @@ dumpGhcCpp dflags pst = output
toks =
addSourceToTokens startLoc buf1 all_toks
final = renderCombinedToks toks
+ lll = case Lexer.lexTokenStream () (options pst) (buffer pst) startLoc of
+ POk _ x -> x
+ _ -> error $ "wtf"
cmpBs :: Located Token -> Located Token -> Ordering
cmpBs (L (RealSrcSpan _ (Strict.Just bs1)) _) (L (RealSrcSpan _ (Strict.Just bs2)) _) =
@@ -158,8 +163,9 @@ showDefines defines = Map.foldlWithKey' (\acc k d -> acc ++ "\n" ++ renderDefine
"#define " ++ n ++ "(" ++ (intercalate "," args) ++ ") " ++ (intercalate " " (map PM.t_str rhs))
lexAll :: Lexer.PState PpState -> (Lexer.PState PpState, [Located Token])
--- lexAll state = case unP (lexer True return) state of
-lexAll state = case unP (lexerDbg True return) state of
+lexAll state = case unP (lexer True return) state of
+-- lexAll state = case unP (lexerDbg True return) state of
+-- lexAll state = case unP (Lexer.lexerDbg True return) state of
POk s t@(L _ ITeof) -> (s, [t])
-- POk state' t -> (ss, t : rest)
POk state' t -> (ss, trace ("lexAll:" ++ show t) t : rest)
=====================================
utils/check-exact/Main.hs
=====================================
@@ -218,7 +218,8 @@ _tt = testOneFile changers "/home/alanz/mysrc/git.haskell.org/worktree/bisect/_b
-- "../../testsuite/tests/printer/Test22771.hs" Nothing
-- "../../testsuite/tests/printer/Test23465.hs" Nothing
-- "../../testsuite/tests/printer/Test25454.hs" Nothing
- "../../testsuite/tests/printer/Test25467.hs" Nothing
+ -- "../../testsuite/tests/printer/Test25467.hs" Nothing
+ "../../testsuite/tests/printer/CppCommentPlacement.hs" Nothing
-- cloneT does not need a test, function can be retired
=====================================
utils/check-exact/Preprocess.hs
=====================================
@@ -20,11 +20,13 @@ import qualified Control.Monad.IO.Class as GHC
import qualified GHC.Data.FastString as GHC
import qualified GHC.Data.StringBuffer as GHC
import qualified GHC.Driver.Config.Parser as GHC
+import qualified GHC.Driver.DynFlags as GHC
import qualified GHC.Driver.Env as GHC
import qualified GHC.Driver.Errors.Types as GHC
import qualified GHC.Driver.Phases as GHC
import qualified GHC.Driver.Pipeline as GHC
import qualified GHC.Parser.Lexer as GHC
+import qualified GHC.Parser.PreProcess.State as GHC
import qualified GHC.Settings as GHC
import qualified GHC.Types.Error as GHC
import qualified GHC.Types.SourceError as GHC
@@ -37,7 +39,7 @@ import qualified GHC.Utils.Panic.Plain as GHC
import GHC.Types.SrcLoc (mkSrcSpan, mkSrcLoc)
import GHC.Data.FastString (mkFastString)
-import Data.List (isPrefixOf)
+import Data.List (isPrefixOf, partition)
import Data.Maybe
import Types
import Utils
@@ -45,7 +47,8 @@ import qualified Data.Set as Set
import qualified GHC.Data.Strict as Strict
--- import Debug.Trace
+import Debug.Trace
+import qualified GHC.LanguageExtensions as LangExt
--
-- ---------------------------------------------------------------------
@@ -106,16 +109,27 @@ getCppTokensAsComments cppOptions sourceFile = do
source <- GHC.liftIO $ GHC.hGetStringBuffer sourceFile
let startLoc = GHC.mkRealSrcLoc (GHC.mkFastString sourceFile) 1 1
(_txt,strSrcBuf,flags2') <- getPreprocessedSrcDirectPrim cppOptions sourceFile
+
+ let flags2g = GHC.xopt_set flags2' LangExt.GhcCpp
let flags2 = GHC.initParserOpts flags2'
+ let flags2'' = GHC.initParserOpts flags2g
+ -- let flags2'' = flags2 { GHC.pExtsBitmap = GHC.xset GHC.GhcCppBit (GHC.pExtsBitmap flags2)}
-- hash-ifdef tokens
- directiveToks <- GHC.liftIO $ getPreprocessorAsComments sourceFile
+ -- directiveToks <- GHC.liftIO $ getPreprocessorAsComments sourceFile
-- Tokens without hash-ifdef
nonDirectiveToks <- tokeniseOriginalSrc startLoc flags2 source
- case GHC.lexTokenStream () flags2 strSrcBuf startLoc of
+ case GHC.lexTokenStream () (GHC.enableGhcCpp flags2) source startLoc of
GHC.POk _ ts ->
do
- let toks = GHC.addSourceToTokens startLoc source ts
- cppCommentToks = getCppTokens directiveToks nonDirectiveToks toks
+ let
+ isCppTok (GHC.L _ (GHC.ITcpp _ _ _)) = True
+ isCppTok _ = False
+ toks = GHC.addSourceToTokens startLoc source
+ (trace ("bitmap:" ++ show (GHC.pExtsBitmap flags2)) ts)
+ (directiveToks, toks') = partition (\(t,_) -> isCppTok t) toks
+ -- (directiveToks, toks') = partition (\(t,_) -> isCppTok t)
+ -- (trace ("toks:" ++ show toks) toks)
+ cppCommentToks = getCppTokens directiveToks nonDirectiveToks toks'
return $ filter goodComment
$ map (GHC.commentToAnnotation . toRealLocated . fst) cppCommentToks
GHC.PFailed pst -> parseError pst
@@ -131,7 +145,7 @@ goodComment c = isGoodComment (tokComment c)
toRealLocated :: GHC.Located a -> GHC.PsLocated a
toRealLocated (GHC.L (GHC.RealSrcSpan s (Strict.Just b)) x) = GHC.L (GHC.PsSpan s b) x
-toRealLocated (GHC.L _ _) = GHC.panic "toRealLocated"
+toRealLocated (GHC.L l _) = GHC.panic $ "toRealLocated:" ++ show l
-- ---------------------------------------------------------------------
@@ -150,8 +164,9 @@ getCppTokens ::
-> [(GHC.Located GHC.Token, String)]
-> [(GHC.Located GHC.Token, String)]
-> [(GHC.Located GHC.Token, String)]
-getCppTokens directiveToks origSrcToks postCppToks = toks
+getCppTokens directiveToks' origSrcToks postCppToks = toks
where
+ directiveToks = trace ("directiveToks: " ++ show directiveToks') directiveToks'
locFn (GHC.L l1 _,_) (GHC.L l2 _,_) = compare (rs l1) (rs l2)
m1Toks = mergeBy locFn postCppToks directiveToks
@@ -262,6 +277,23 @@ alterToolSettings f dynFlags = dynFlags { GHC.toolSettings = f (GHC.toolSettings
-- ---------------------------------------------------------------------
+-- | Get the preprocessor directives as comment tokens from the
+-- source.
+getPreprocessorAsComments' :: FilePath -> IO [(GHC.Located GHC.Token, String)]
+getPreprocessorAsComments' srcFile = do
+ fcontents <- readFileGhc srcFile
+ let directives = filter (\(_lineNum,line) -> case line of '#' : _ -> True; _ -> False)
+ $ zip [1..] (lines fcontents)
+
+ let mkTok (lineNum,line) = (GHC.L l (GHC.ITlineComment line (makeBufSpan l)),line)
+ where
+ start = GHC.mkSrcLoc (GHC.mkFastString srcFile) lineNum 1
+ end = GHC.mkSrcLoc (GHC.mkFastString srcFile) lineNum (length line)
+ l = GHC.mkSrcSpan start end
+
+ let toks = map mkTok directives
+ return toks
+
-- | Get the preprocessor directives as comment tokens from the
-- source.
getPreprocessorAsComments :: FilePath -> IO [(GHC.Located GHC.Token, String)]
@@ -279,7 +311,22 @@ getPreprocessorAsComments srcFile = do
let toks = map mkTok directives
return toks
+-- TODO: possibly use the one from GHC.Parser.PreProcess, depending on
+-- which lexer it ends up using. Or have specific versions of lexAll
+-- for the two lexers.
+lexAll :: GHC.PState GHC.PpState -> (GHC.PState GHC.PpState, [GHC.Located GHC.Token])
+lexAll state = case GHC.unP (GHC.lexer True return) state of
+-- lexAll state = case unP (lexerDbg True return) state of
+-- lexAll state = case unP (Lexer.lexerDbg True return) state of
+ GHC.POk s t@(GHC.L _ GHC.ITeof) -> (s, [t])
+ -- POk state' t -> (ss, t : rest)
+ GHC.POk state' t -> (ss, trace ("lexAll:" ++ show t) t : rest)
+ where
+ (ss, rest) = lexAll state'
+ GHC.PFailed _pst -> GHC.panic $ "GHC.Parser.PreProcess.lexAll failed"
+
makeBufSpan :: GHC.SrcSpan -> GHC.PsSpan
+makeBufSpan (GHC.RealSrcSpan s (Strict.Just bs)) = GHC.PsSpan s bs
makeBufSpan ss = pspan
where
bl = GHC.BufPos 0
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/3a1904adb2edd1efd1f70707e7317a47d32dc4f0
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/3a1904adb2edd1efd1f70707e7317a47d32dc4f0
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20250313/f615e60b/attachment-0001.html>
More information about the ghc-commits
mailing list