[Git][ghc/ghc][wip/az/ghc-cpp] 3 commits: Remove unnecessary LExer rules

Alan Zimmerman (@alanz) gitlab at gitlab.haskell.org
Sun Feb 23 15:17:20 UTC 2025



Alan Zimmerman pushed to branch wip/az/ghc-cpp at Glasgow Haskell Compiler / GHC


Commits:
15d69429 by Alan Zimmerman at 2025-02-23T11:25:02+00:00
Remove unnecessary LExer rules

We *need* the ones that explicitly match to the end of the line.

- - - - -
026516cc by Alan Zimmerman at 2025-02-23T14:54:17+00:00
Generate correct span for ITcpp

Dump now works, except we do not render trailing `\` for continuation
lines. This is good enough for use in test output.

- - - - -
fc86f7be by Alan Zimmerman at 2025-02-23T15:17:00+00:00
Reduce duplication in lexer

- - - - -


1 changed file:

- compiler/GHC/Parser/Lexer.x


Changes:

=====================================
compiler/GHC/Parser/Lexer.x
=====================================
@@ -326,7 +326,6 @@ $unigraphic / { isSmartQuote } { smart_quote_error }
   \n                                    ;
   -- Ghc CPP symbols
   ^\# \ * @cppkeyword  .* \n / { ifExtension GhcCppBit } { cppToken cpp_prag }
-  ^\# \ * @cppkeyword  .*    / { ifExtension GhcCppBit } { cppToken cpp_prag }
 
   ^\# line                              { begin line_prag1 }
   ^\# / { followedByDigit }             { begin line_prag1 }
@@ -344,7 +343,6 @@ $unigraphic / { isSmartQuote } { smart_quote_error }
         -- we might encounter {-# here, but {- has been handled already
   \n                                    ;
   ^\# \ * @cppkeyword  .* \n / { ifExtension GhcCppBit } { cppToken cpp_prag }
-  ^\# \ * @cppkeyword  .*    / { ifExtension GhcCppBit } { cppToken cpp_prag }
 
   ^\# (line)?                           { begin line_prag1 }
 }
@@ -372,10 +370,7 @@ $unigraphic / { isSmartQuote } { smart_quote_error }
 -- CPP continuation lines. Keep concatenating, or exit
 <cpp_prag> {
   .* \\ \n                   { cppTokenCont }
-  .* \\                      { cppTokenCont }
-  .* \n                    { cppTokenPop (ITcpp False) }
-  .*                       { cppTokenPop (ITcpp False)}
-  -- () { popCpp }
+  .* \n                      { cppTokenPop (ITcpp False) }
 }
 
 -- single-line line pragmas, of the form
@@ -1251,48 +1246,72 @@ pop :: Action p
 pop _span _buf _len _buf2 =
   do _ <- popLexState
      lexToken
-     -- trace "pop" $ do lexToken
 
 cppToken :: Int -> Action p
-cppToken code span buf len _buf2 =
-  do
-     let tokStr = lexemeToFastString buf len
-     -- check if the string ends with backslash and newline
-     -- NOTE: performance likely sucks, make it work for now
-     (len0, continue) <- case (reverse $ unpackFS tokStr) of
-        ('\\':_) -> pushLexState code >> return (len, True)
-        ('\n':'\\':_) -> pushLexState code >> return (len -2, True) -- TODO remove
-        ('\n':_) -> return (len - 1, False) -- TODO remove
-        _ -> return (len, False)
-     return (L span (ITcpp continue $! lexemeToFastString buf len0))
-     -- trace ("cppToken:" ++ show (code, t)) $ do return (L span t)
+cppToken code = doCppToken (Just code)
 
 cppTokenCont :: Action p
-cppTokenCont span buf len _buf2 =
-  do
-     let tokStr = lexemeToFastString buf len
-     -- check if the string ends with backslash and newline
-     -- NOTE: performance likely sucks, make it work for now
-     (len0, continue) <- case (reverse $ unpackFS tokStr) of
-        ('\\':_) -> return (len - 1, True)
-        ('\n':'\\':_) -> return (len - 2, True) -- TODO: remove
-        ('\n':_) -> return (len - 1, False) -- TODO: remove
-        _ -> return (len, False)
-     return (L span (ITcpp continue $! lexemeToFastString buf len0))
+cppTokenCont = doCppToken Nothing
 
+doCppToken :: (Maybe Int) -> Action p
+doCppToken code span buf len _buf2 =
+  do
+    let
+      pushLexStateMaybe Nothing = return ()
+      pushLexStateMaybe (Just code) = pushLexState code
+
+      tokStr = lexemeToFastString buf len
+
+    -- check if the string ends with backslash and newline
+    -- NOTE: performance likely sucks, make it work for now
+    (len0, continue) <- case (reverse $ unpackFS tokStr) of
+       ('\n':'\\':_) -> pushLexStateMaybe code >> return (len -2, True)
+       ('\n':_) -> return (len - 1, False)
+       _ -> return (len, False)
+    let span' = cppSpan span len0
+    return (L span' (ITcpp continue $! lexemeToFastString buf len0))
+
+
+-- cppToken :: Int -> Action p
+-- cppToken code span buf len _buf2 =
+--   do
+--      let tokStr = lexemeToFastString buf len
+--      -- check if the string ends with backslash and newline
+--      -- NOTE: performance likely sucks, make it work for now
+--      (len0, continue) <- case (reverse $ unpackFS tokStr) of
+--         ('\n':'\\':_) -> pushLexState code >> return (len -2, True)
+--         ('\n':_) -> return (len - 1, False)
+--         _ -> return (len, False)
+--      let span' = cppSpan span len0
+--      return (L span' (ITcpp continue $! lexemeToFastString buf len0))
+
+-- cppTokenCont :: Action p
+-- cppTokenCont span buf len _buf2 =
+--   do
+--      let tokStr = lexemeToFastString buf len
+--      -- check if the string ends with backslash and newline
+--      -- NOTE: performance likely sucks, make it work for now
+--      (len0, continue) <- case (reverse $ unpackFS tokStr) of
+--         ('\n':'\\':_) -> return (len - 2, True)
+--         ('\n':_) -> return (len - 1, False)
+--         _ -> return (len, False)
+--      let span' = cppSpan span len0
+--      return (L span' (ITcpp continue $! lexemeToFastString buf len0))
+
+cppSpan :: PsSpan -> Int -> PsSpan
+cppSpan span len = mkPsSpan start_loc end_loc
+  where
+     start_loc = psSpanStart span
+     file = srcLocFile (psRealLoc start_loc)
+     real_loc = mkRealSrcLoc file (srcLocLine (psRealLoc start_loc)) (len + 1)
+     BufPos sb = psBufPos start_loc
+     end_loc = PsLoc real_loc (BufPos (sb + len + 1))
 
 cppTokenPop :: (FastString -> Token)-> Action p
 cppTokenPop t span buf len _buf2 =
   do _ <- popLexState
-     -- return (L span (t $! lexemeToFastString buf (trace "cppTokenPop" len)))
      return (L span (t $! lexemeToFastString buf len))
 
-popCpp :: Action p
-popCpp _span _buf _len _buf2 =
-  do _ <- popLexState
-     -- lexToken
-     trace "pop" $ do lexToken
-
 -- See Note [Nested comment line pragmas]
 failLinePrag1 :: Action p
 failLinePrag1 span _buf _len _buf2 = do



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/d3bc176bb1765473511f3465430264ec494805e8...fc86f7bec83fd2da6794b2d0938f4b37697b6527

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/d3bc176bb1765473511f3465430264ec494805e8...fc86f7bec83fd2da6794b2d0938f4b37697b6527
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20250223/e370f926/attachment-0001.html>


More information about the ghc-commits mailing list