[Git][ghc/ghc][wip/strings] Simplify multiline string lexing

Brandon Chinn (@brandonchinn178) gitlab at gitlab.haskell.org
Thu Sep 19 02:13:10 UTC 2024



Brandon Chinn pushed to branch wip/strings at Glasgow Haskell Compiler / GHC


Commits:
da077a6d by Brandon Chinn at 2024-09-18T19:12:58-07:00
Simplify multiline string lexing

- - - - -


1 changed file:

- compiler/GHC/Parser/Lexer.x


Changes:

=====================================
compiler/GHC/Parser/Lexer.x
=====================================
@@ -692,17 +692,11 @@ $unigraphic / { isSmartQuote } { smart_quote_error }
   \" @stringchar*    $unigraphic / { isSmartQuote } { smart_quote_error }
 }
 
-<string_multi_bol> {
-  ([\  $tab] | @gap)* { tok_string_multi_content }
-}
-
 <string_multi_content> {
-  @stringchar*               { tok_string_multi_content }
-  $nl                        { tok_string_multi_content }
+  -- Parse as much of the multiline string as possible, except for quotes
+  @stringchar* ($nl ([\  $tab] | @gap)* @stringchar*)* { tok_string_multi_content }
   -- Allow bare quotes if it's not a triple quote
-  -- N.B. We need to explicitly check for \n in the right context because
-  -- the character set [^...] doesn't include newlines
-  (\" | \"\") / (\n | [^\"]) { tok_string_multi_content }
+  (\" | \"\") / ([\n .] # \") { tok_string_multi_content }
 }
 
 <0> {
@@ -2277,8 +2271,6 @@ tok_string_multi startSpan startBuf _len _buf2 = do
       case alexScan i0 string_multi_content of
         AlexToken i1 len _
           | Just i2 <- lexDelim i1 -> pure (i1, i2)
-          | -- did we just lex a newline?
-            Just ('\n', _) <- alexGetChar' i0 -> goBOL i1
           | -- is the next token a tab character?
             -- need this explicitly because there's a global rule matching $tab
             Just ('\t', _) <- alexGetChar' i1 -> setInput i1 >> lexError LexError
@@ -2288,12 +2280,6 @@ tok_string_multi startSpan startBuf _len _buf2 = do
         AlexSkip i1 _ -> goContent i1
         _ -> lexError LexError
 
-    goBOL i0 =
-      case alexScan i0 string_multi_bol of
-        AlexToken i1 _ _ -> goContent i1
-        AlexSkip i1 _ -> goBOL i1
-        _ -> lexError LexError
-
     lexDelim =
       let go 0 i = Just i
           go n i =



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/da077a6d96cd08c140c0ff2d3b81348fd5be07e1

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/da077a6d96cd08c140c0ff2d3b81348fd5be07e1
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240918/0896d2f0/attachment-0001.html>


More information about the ghc-commits mailing list