[Git][ghc/ghc][wip/juhp-hp2ps-fixup2] 4 commits: Break out GHC.Parser.Lexer.Interface

Fri Jan 24 21:59:33 UTC 2025

Ben Gamari pushed to branch wip/juhp-hp2ps-fixup2 at Glasgow Haskell Compiler / GHC


Commits:
4495e48f by Brandon Chinn at 2025-01-24T11:54:24-05:00
Break out GHC.Parser.Lexer.Interface

- - - - -
4f8fc11e by Brandon Chinn at 2025-01-24T11:54:24-05:00
Fix lexing comments in multiline strings (#25609)

Metric Decrease:
    MultiLayerModulesRecomp
    parsing001

- - - - -
e7ab778f by Matthew Pickering at 2025-01-24T11:55:01-05:00
testsuite: Pass TEST_HC_OPTS to many more tests

This passes `-dno-debug-output` to the test and `-dlint.

- - - - -
cfd0b338 by Jens Petersen at 2025-01-24T16:59:00-05:00
hp2ps/Utilities.c: add extern parameter types for malloc and realloc for C23

use portable C types!

- - - - -


20 changed files:

- compiler/GHC/Parser/HaddockLex.x
- compiler/GHC/Parser/Lexer.x
- + compiler/GHC/Parser/Lexer/Interface.hs
- + compiler/GHC/Parser/Lexer/String.x
- compiler/ghc.cabal.in
- hadrian/src/Rules/SourceDist.hs
- testsuite/tests/bytecode/T24634/Makefile
- testsuite/tests/codeGen/should_compile/Makefile
- testsuite/tests/count-deps/CountDepsParser.stdout
- testsuite/tests/driver/j-space/jspace.hs
- testsuite/tests/parser/should_run/NumericUnderscores0.hs
- testsuite/tests/parser/should_run/NumericUnderscores0.stdout
- + testsuite/tests/parser/should_run/T25609.hs
- + testsuite/tests/parser/should_run/T25609.stdout
- testsuite/tests/parser/should_run/all.T
- testsuite/tests/patsyn/should_compile/T13350/Makefile
- testsuite/tests/rts/Makefile
- testsuite/tests/rts/T1791/Makefile
- testsuite/tests/wasm/should_run/control-flow/WasmControlFlow.hs
- utils/hp2ps/Utilities.c


Changes:

=====================================
compiler/GHC/Parser/HaddockLex.x
=====================================
@@ -8,6 +8,7 @@ import GHC.Prelude
 import GHC.Data.FastString
 import GHC.Hs.Doc
 import GHC.Parser.Lexer
+import GHC.Parser.Lexer.Interface (adjustChar)
 import GHC.Parser.Annotation
 import GHC.Types.SrcLoc
 import GHC.Types.SourceText


=====================================
compiler/GHC/Parser/Lexer.x
=====================================
@@ -78,7 +78,6 @@ module GHC.Parser.Lexer (
    commentToAnnotation,
    HdkComment(..),
    warnopt,
-   adjustChar,
    addPsMessage
   ) where
 
@@ -132,6 +131,8 @@ import GHC.Driver.Flags
 import GHC.Parser.Errors.Basic
 import GHC.Parser.Errors.Types
 import GHC.Parser.Errors.Ppr ()
+import GHC.Parser.Lexer.Interface
+import qualified GHC.Parser.Lexer.String as Lexer.String
 import GHC.Parser.String
 }
 
@@ -622,13 +623,6 @@ $unigraphic / { isSmartQuote } { smart_quote_error }
   \" @stringchar*    $unigraphic / { isSmartQuote } { smart_quote_error }
 }
 
-<string_multi_content> {
-  -- Parse as much of the multiline string as possible, except for quotes
-  @stringchar* ($nl ([\  $tab] | @gap)* @stringchar*)* { tok_string_multi_content }
-  -- Allow bare quotes if it's not a triple quote
-  (\" | \"\") / ([\n .] # \") { tok_string_multi_content }
-}
-
 <0> {
   \'\' { token ITtyQuote }
 
@@ -2171,11 +2165,23 @@ tok_string span buf len _buf2 = do
     src = SourceText $ lexemeToFastString buf len
     endsInHash = currentChar (offsetBytes (len - 1) buf) == '#'

--- | Ideally, we would define this completely with Alex syntax, like normal strings.
--- Instead, this is defined as a hybrid solution by manually invoking lex states, which
--- we're doing for two reasons:
---   1. The multiline string should all be one lexical token, not multiple
---   2. We need to allow bare quotes, which can't be done with one regex
+{- Note [Lexing multiline strings]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Ideally, we would lex multiline strings completely with Alex syntax, like
+normal strings. However, we can't because:
+
+    1. The multiline string should all be one lexical token, not multiple
+    2. We need to allow bare quotes, which can't be done with one regex
+
+Instead, we'll lex them with a hybrid solution in tok_string_multi by manually
+invoking lex states. This allows us to get the performance of native Alex
+syntax as much as possible, and just gluing the pieces together outside of
+Alex.
+
+Implemented in string_multi_content in GHC/Parser/Lexer/String.x
+-}
+
+-- | See Note [Lexing multiline strings]
 tok_string_multi :: Action
 tok_string_multi startSpan startBuf _len _buf2 = do
   -- advance to the end of the multiline string
@@ -2201,17 +2207,14 @@ tok_string_multi startSpan startBuf _len _buf2 = do
   pure $ L span $ ITstringMulti src (mkFastString s)
   where
     goContent i0 =
-      case alexScan i0 string_multi_content of
-        AlexToken i1 len _
+      case Lexer.String.alexScan i0 Lexer.String.string_multi_content of
+        Lexer.String.AlexToken i1 len _
           | Just i2 <- lexDelim i1 -> pure (i1, i2)
           | isEOF i1 -> checkSmartQuotes >> setInput i1 >> lexError LexError
-          -- is the next token a tab character?
-          -- need this explicitly because there's a global rule matching $tab
-          | Just ('\t', _) <- alexGetChar' i1 -> setInput i1 >> lexError LexError
           -- Can happen if no patterns match, e.g. an unterminated gap
           | len == 0  -> setInput i1 >> lexError LexError
           | otherwise -> goContent i1
-        AlexSkip i1 _ -> goContent i1
+        Lexer.String.AlexSkip i1 _ -> goContent i1
         _ -> setInput i0 >> lexError LexError
 
     lexDelim =
@@ -2235,11 +2238,6 @@ tok_string_multi startSpan startBuf _len _buf2 = do
         Just (c, loc) -> throwSmartQuoteError c loc
         Nothing -> pure ()
 
--- | Dummy action that should never be called. Should only be used in lex states
--- that are manually lexed in tok_string_multi.
-tok_string_multi_content :: Action
-tok_string_multi_content = panic "tok_string_multi_content unexpectedly invoked"
-
 lex_chars :: (String, String) -> PsSpan -> StringBuffer -> Int -> P String
 lex_chars (startDelim, endDelim) span buf len =
   either (throwStringLexError i0) pure $
@@ -2591,105 +2589,6 @@ getLastLocIncludingComments = P $ \s@(PState { prev_loc = prev_loc }) -> POk s p
 getLastLoc :: P PsSpan
 getLastLoc = P $ \s@(PState { last_loc = last_loc }) -> POk s last_loc
 
-data AlexInput = AI !PsLoc !StringBuffer deriving (Show)
-
-{-
-Note [Unicode in Alex]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Although newer versions of Alex support unicode, this grammar is processed with
-the old style '--latin1' behaviour. This means that when implementing the
-functions
-
-    alexGetByte       :: AlexInput -> Maybe (Word8,AlexInput)
-    alexInputPrevChar :: AlexInput -> Char
-
-which Alex uses to take apart our 'AlexInput', we must
-
-  * return a latin1 character in the 'Word8' that 'alexGetByte' expects
-  * return a latin1 character in 'alexInputPrevChar'.
-
-We handle this in 'adjustChar' by squishing entire classes of unicode
-characters into single bytes.
--}
-
-{-# INLINE adjustChar #-}
-adjustChar :: Char -> Word8
-adjustChar c = adj_c
-  where non_graphic     = 0x00
-        upper           = 0x01
-        lower           = 0x02
-        digit           = 0x03
-        symbol          = 0x04
-        space           = 0x05
-        other_graphic   = 0x06
-        uniidchar       = 0x07
-
-        adj_c
-          | c <= '\x07' = non_graphic
-          | c <= '\x7f' = fromIntegral (ord c)
-          -- Alex doesn't handle Unicode, so when Unicode
-          -- character is encountered we output these values
-          -- with the actual character value hidden in the state.
-          | otherwise =
-                -- NB: The logic behind these definitions is also reflected
-                -- in "GHC.Utils.Lexeme"
-                -- Any changes here should likely be reflected there.
-
-                case generalCategory c of
-                  UppercaseLetter       -> upper
-                  LowercaseLetter       -> lower
-                  TitlecaseLetter       -> upper
-                  ModifierLetter        -> uniidchar -- see #10196
-                  OtherLetter           -> lower -- see #1103
-                  NonSpacingMark        -> uniidchar -- see #7650
-                  SpacingCombiningMark  -> other_graphic
-                  EnclosingMark         -> other_graphic
-                  DecimalNumber         -> digit
-                  LetterNumber          -> digit
-                  OtherNumber           -> digit -- see #4373
-                  ConnectorPunctuation  -> symbol
-                  DashPunctuation       -> symbol
-                  OpenPunctuation       -> other_graphic
-                  ClosePunctuation      -> other_graphic
-                  InitialQuote          -> other_graphic
-                  FinalQuote            -> other_graphic
-                  OtherPunctuation      -> symbol
-                  MathSymbol            -> symbol
-                  CurrencySymbol        -> symbol
-                  ModifierSymbol        -> symbol
-                  OtherSymbol           -> symbol
-                  Space                 -> space
-                  _other                -> non_graphic
-
--- Getting the previous 'Char' isn't enough here - we need to convert it into
--- the same format that 'alexGetByte' would have produced.
---
--- See Note [Unicode in Alex] and #13986.
-alexInputPrevChar :: AlexInput -> Char
-alexInputPrevChar (AI _ buf) = unsafeChr (fromIntegral (adjustChar pc))
-  where pc = prevChar buf '\n'
-
-unsafeChr :: Int -> Char
-unsafeChr (I# c) = GHC.Exts.C# (GHC.Exts.chr# c)
-
--- backwards compatibility for Alex 2.x
-alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
-alexGetChar inp = case alexGetByte inp of
-                    Nothing    -> Nothing
-                    Just (b,i) -> c `seq` Just (c,i)
-                       where c = unsafeChr $ fromIntegral b
-
--- See Note [Unicode in Alex]
-alexGetByte :: AlexInput -> Maybe (Word8,AlexInput)
-alexGetByte (AI loc s)
-  | atEnd s   = Nothing
-  | otherwise = byte `seq` loc' `seq` s' `seq`
-                --trace (show (ord c)) $
-                Just (byte, (AI loc' s'))
-  where (c,s') = nextChar s
-        loc'   = advancePsLoc loc c
-        byte   = adjustChar c
-
 {-# INLINE alexGetChar' #-}
 -- This version does not squash unicode characters, it is used when
 -- lexing strings.
@@ -3470,6 +3369,7 @@ topNoLayoutContainsCommas (ALRNoLayout b _ : _) = b
 -- If the generated alexScan/alexScanUser functions are called multiple times
 -- in this file, alexScanUser gets broken out into a separate function and
 -- increases memory usage. Make sure GHC inlines this function and optimizes it.
+-- https://github.com/haskell/alex/pull/262
 {-# INLINE alexScanUser #-}
 
 lexToken :: P (PsLocated Token)


=====================================
compiler/GHC/Parser/Lexer/Interface.hs
=====================================
@@ -0,0 +1,124 @@
+{-# LANGUAGE MagicHash #-}
+
+{- |
+This module defines the types and functions necessary for an Alex-generated
+lexer.
+
+https://haskell-alex.readthedocs.io/en/latest/api.html#
+-}
+module GHC.Parser.Lexer.Interface (
+  AlexInput (..),
+  alexGetByte,
+  alexInputPrevChar,
+
+  -- * Helpers
+  alexGetChar,
+  adjustChar,
+) where
+
+import GHC.Prelude
+
+import Data.Char (GeneralCategory (..), generalCategory, ord)
+import Data.Word (Word8)
+import GHC.Data.StringBuffer (StringBuffer, atEnd, nextChar, prevChar)
+import GHC.Exts
+import GHC.Types.SrcLoc (PsLoc, advancePsLoc)
+
+data AlexInput = AI !PsLoc !StringBuffer deriving (Show)
+
+-- See Note [Unicode in Alex]
+alexGetByte :: AlexInput -> Maybe (Word8,AlexInput)
+alexGetByte (AI loc s)
+  | atEnd s   = Nothing
+  | otherwise = byte `seq` loc' `seq` s' `seq`
+                --trace (show (ord c)) $
+                Just (byte, (AI loc' s'))
+  where (c,s') = nextChar s
+        loc'   = advancePsLoc loc c
+        byte   = adjustChar c
+
+-- Getting the previous 'Char' isn't enough here - we need to convert it into
+-- the same format that 'alexGetByte' would have produced.
+--
+-- See Note [Unicode in Alex] and #13986.
+alexInputPrevChar :: AlexInput -> Char
+alexInputPrevChar (AI _ buf) = unsafeChr (fromIntegral (adjustChar pc))
+  where pc = prevChar buf '\n'
+
+-- backwards compatibility for Alex 2.x
+alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
+alexGetChar inp = case alexGetByte inp of
+                    Nothing    -> Nothing
+                    Just (b,i) -> c `seq` Just (c,i)
+                       where c = unsafeChr $ fromIntegral b
+
+unsafeChr :: Int -> Char
+unsafeChr (I# c) = GHC.Exts.C# (GHC.Exts.chr# c)
+
+{-
+Note [Unicode in Alex]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Although newer versions of Alex support unicode, this grammar is processed with
+the old style '--latin1' behaviour. This means that when implementing the
+functions
+
+    alexGetByte       :: AlexInput -> Maybe (Word8,AlexInput)
+    alexInputPrevChar :: AlexInput -> Char
+
+which Alex uses to take apart our 'AlexInput', we must
+
+  * return a latin1 character in the 'Word8' that 'alexGetByte' expects
+  * return a latin1 character in 'alexInputPrevChar'.
+
+We handle this in 'adjustChar' by squishing entire classes of unicode
+characters into single bytes.
+-}
+
+{-# INLINE adjustChar #-}
+adjustChar :: Char -> Word8
+adjustChar c = adj_c
+  where non_graphic     = 0x00
+        upper           = 0x01
+        lower           = 0x02
+        digit           = 0x03
+        symbol          = 0x04
+        space           = 0x05
+        other_graphic   = 0x06
+        uniidchar       = 0x07
+
+        adj_c
+          | c <= '\x07' = non_graphic
+          | c <= '\x7f' = fromIntegral (ord c)
+          -- Alex doesn't handle Unicode, so when Unicode
+          -- character is encountered we output these values
+          -- with the actual character value hidden in the state.
+          | otherwise =
+                -- NB: The logic behind these definitions is also reflected
+                -- in "GHC.Utils.Lexeme"
+                -- Any changes here should likely be reflected there.
+
+                case generalCategory c of
+                  UppercaseLetter       -> upper
+                  LowercaseLetter       -> lower
+                  TitlecaseLetter       -> upper
+                  ModifierLetter        -> uniidchar -- see #10196
+                  OtherLetter           -> lower -- see #1103
+                  NonSpacingMark        -> uniidchar -- see #7650
+                  SpacingCombiningMark  -> other_graphic
+                  EnclosingMark         -> other_graphic
+                  DecimalNumber         -> digit
+                  LetterNumber          -> digit
+                  OtherNumber           -> digit -- see #4373
+                  ConnectorPunctuation  -> symbol
+                  DashPunctuation       -> symbol
+                  OpenPunctuation       -> other_graphic
+                  ClosePunctuation      -> other_graphic
+                  InitialQuote          -> other_graphic
+                  FinalQuote            -> other_graphic
+                  OtherPunctuation      -> symbol
+                  MathSymbol            -> symbol
+                  CurrencySymbol        -> symbol
+                  ModifierSymbol        -> symbol
+                  OtherSymbol           -> symbol
+                  Space                 -> space
+                  _other                -> non_graphic


=====================================
compiler/GHC/Parser/Lexer/String.x
=====================================
@@ -0,0 +1,96 @@
+{
+{- |
+This module defines lex states for strings.
+
+This needs to be separate from the normal lexer because the normal lexer
+automatically includes rules like skipping whitespace or lexing comments,
+which we don't want in these contexts.
+-}
+module GHC.Parser.Lexer.String (
+  AlexReturn (..),
+  alexScan,
+  string_multi_content,
+) where
+
+import GHC.Prelude
+
+import GHC.Parser.Lexer.Interface
+import GHC.Utils.Panic (panic)
+}
+
+-- -----------------------------------------------------------------------------
+-- Alex "Character set macros"
+-- Copied from GHC/Parser/Lexer.x
+
+$unispace    = \x05 -- Trick Alex into handling Unicode. See Note [Unicode in Alex].
+$nl          = [\n\r\f]
+$space       = [\ $unispace]
+$whitechar   = [$nl \v $space]
+$tab         = \t
+
+$ascdigit  = 0-9
+$unidigit  = \x03 -- Trick Alex into handling Unicode. See Note [Unicode in Alex].
+$decdigit  = $ascdigit -- exactly $ascdigit, no more no less.
+$digit     = [$ascdigit $unidigit]
+
+$special   = [\(\)\,\;\[\]\`\{\}]
+$ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~\:]
+$unisymbol = \x04 -- Trick Alex into handling Unicode. See Note [Unicode in Alex].
+$symbol    = [$ascsymbol $unisymbol] # [$special \_\"\']
+
+$unilarge  = \x01 -- Trick Alex into handling Unicode. See Note [Unicode in Alex].
+$asclarge  = [A-Z]
+$large     = [$asclarge $unilarge]
+
+$unismall  = \x02 -- Trick Alex into handling Unicode. See Note [Unicode in Alex].
+$ascsmall  = [a-z]
+$small     = [$ascsmall $unismall \_]
+
+$uniidchar = \x07 -- Trick Alex into handling Unicode. See Note [Unicode in Alex].
+$idchar    = [$small $large $digit $uniidchar \']
+
+$unigraphic = \x06 -- Trick Alex into handling Unicode. See Note [Unicode in Alex].
+$graphic   = [$small $large $symbol $digit $idchar $special $unigraphic \"\']
+$charesc   = [a b f n r t v \\ \" \' \&]
+
+$octit     = 0-7
+$hexit     = [$decdigit A-F a-f]
+
+-- -----------------------------------------------------------------------------
+-- Alex "Regular expression macros"
+-- Copied from GHC/Parser/Lexer.x
+
+ at numspc       = _*                   -- numeric spacer (#14473)
+ at decimal      = $decdigit(@numspc $decdigit)*
+ at octal        = $octit(@numspc $octit)*
+ at hexadecimal  = $hexit(@numspc $hexit)*
+ at gap = \\ $whitechar+ \\
+ at cntrl = $asclarge | \@ | \[ | \\ | \] | \^ | \_
+ at ascii = \^ @cntrl | "NUL" | "SOH" | "STX" | "ETX" | "EOT" | "ENQ" | "ACK"
+       | "BEL" | "BS" | "HT" | "LF" | "VT" | "FF" | "CR" | "SO" | "SI" | "DLE"
+       | "DC1" | "DC2" | "DC3" | "DC4" | "NAK" | "SYN" | "ETB" | "CAN"
+       | "EM" | "SUB" | "ESC" | "FS" | "GS" | "RS" | "US" | "SP" | "DEL"
+ at escape     = \\ ( $charesc      | @ascii | @decimal | o @octal | x @hexadecimal )
+ at stringchar = ($graphic # [\\ \"]) | $space | @escape     | @gap
+
+:-
+
+-- Define an empty rule so it compiles; callers should always explicitly specify a startcode
+<0> () ;
+
+-- See Note [Lexing multiline strings]
+<string_multi_content> {
+  -- Parse as much of the multiline string as possible, except for quotes
+  @stringchar* ($nl ([\  $tab] | @gap)* @stringchar*)* { string_multi_content_action }
+  -- Allow bare quotes if it's not a triple quote
+  (\" | \"\") / ([\n .] # \") { string_multi_content_action }
+}
+
+-- -----------------------------------------------------------------------------
+-- Haskell actions
+{
+-- | Dummy action that should never be called. Should only be used in lex states
+-- that are manually lexed in tok_string_multi.
+string_multi_content_action :: a
+string_multi_content_action = panic "string_multi_content_action unexpectedly invoked"
+}


=====================================
compiler/ghc.cabal.in
=====================================
@@ -646,6 +646,8 @@ Library
         GHC.Parser.Errors.Types
         GHC.Parser.Header
         GHC.Parser.Lexer
+        GHC.Parser.Lexer.Interface
+        GHC.Parser.Lexer.String
         GHC.Parser.HaddockLex
         GHC.Parser.PostProcess
         GHC.Parser.PostProcess.Haddock


=====================================
hadrian/src/Rules/SourceDist.hs
=====================================
@@ -185,6 +185,7 @@ prepareTree dest = do
         , (stage0InTree , compiler,      "GHC/Cmm/Lexer.x",    "GHC/Cmm/Lexer.hs")
         , (stage0InTree , compiler,      "GHC/Parser.y",       "GHC/Parser.hs")
         , (stage0InTree , compiler,      "GHC/Parser/Lexer.x", "GHC/Parser/Lexer.hs")
+        , (stage0InTree , compiler,      "GHC/Parser/Lexer/String.x", "GHC/Parser/Lexer/String.hs")
         , (stage0InTree , compiler,      "GHC/Parser/HaddockLex.x", "GHC/Parser/HaddockLex.hs")
         , (stage0InTree , hpcBin,        "src/Trace/Hpc/Parser.y", "src/Trace/Hpc/Parser.hs")
         , (stage0InTree , genprimopcode, "Parser.y",           "Parser.hs")


=====================================
testsuite/tests/bytecode/T24634/Makefile
=====================================
@@ -4,14 +4,14 @@ include $(TOP)/mk/test.mk
 
 # This case loads bytecode from the interface file written in the second invocation.
 T24634a:
-	'$(TEST_HC)' -c hello_c.c -o hello_c.o
-	'$(TEST_HC)' -c -fbyte-code-and-object-code -fno-omit-interface-pragmas Hello.hs
-	'$(TEST_HC)' -fprefer-byte-code -fbyte-code-and-object-code -fno-ignore-interface-pragmas hello_c.o Main.hs
+	'$(TEST_HC)' $(TEST_HC_OPTS) -c hello_c.c -o hello_c.o
+	'$(TEST_HC)' $(TEST_HC_OPTS) -c -fbyte-code-and-object-code -fno-omit-interface-pragmas Hello.hs
+	'$(TEST_HC)' $(TEST_HC_OPTS) -fprefer-byte-code -fbyte-code-and-object-code -fno-ignore-interface-pragmas hello_c.o Main.hs
 	./Main
 
 # This case uses the bytecode generated in 'runHscBackendPhase', not involving the interface, since 'Hello' is compiled
 # in the same invocation as 'Main'.
 T24634b:
-	'$(TEST_HC)' -c hello_c.c -o hello_c.o
-	'$(TEST_HC)' -fprefer-byte-code -fbyte-code-and-object-code -fno-ignore-interface-pragmas hello_c.o Hello.hs Main.hs
+	'$(TEST_HC)' $(TEST_HC_OPTS) -c hello_c.c -o hello_c.o
+	'$(TEST_HC)' $(TEST_HC_OPTS) -fprefer-byte-code -fbyte-code-and-object-code -fno-ignore-interface-pragmas hello_c.o Hello.hs Main.hs
 	./Main


=====================================
testsuite/tests/codeGen/should_compile/Makefile
=====================================
@@ -79,4 +79,4 @@ T17648:
 		grep -F 'f :: T GHC.Types.Int -> ()  [TagSig' >/dev/null
 
 T25166:
-	'$(TEST_HC)' -O2 -dno-typeable-binds -ddump-cmm T25166.hs | awk '/foo_closure/{flag=1}/}]/{flag=0}flag'
+	'$(TEST_HC)' $(TEST_HC_OPTS) -O2 -dno-typeable-binds -ddump-cmm T25166.hs | awk '/foo_closure/{flag=1}/}]/{flag=0}flag'


=====================================
testsuite/tests/count-deps/CountDepsParser.stdout
=====================================
@@ -127,6 +127,8 @@ GHC.Parser.Errors.Ppr
 GHC.Parser.Errors.Types
 GHC.Parser.HaddockLex
 GHC.Parser.Lexer
+GHC.Parser.Lexer.Interface
+GHC.Parser.Lexer.String
 GHC.Parser.PostProcess
 GHC.Parser.PostProcess.Haddock
 GHC.Parser.String


=====================================
testsuite/tests/driver/j-space/jspace.hs
=====================================
@@ -23,7 +23,7 @@ initGhcM :: [String] -> Ghc ()
 initGhcM xs = do
     session <- getSession
     df1 <- getSessionDynFlags
-    let cmdOpts = ["-fforce-recomp"] ++ xs
+    let cmdOpts = ["-fforce-recomp", "-dno-debug-output"] ++ xs
     (df2, leftovers, _) <- parseDynamicFlags (hsc_logger session) df1 (map noLoc cmdOpts)
     setSessionDynFlags df2
     ghcUnitId <- case lookup "Project Unit Id" (compilerInfo df2) of


=====================================
testsuite/tests/parser/should_run/NumericUnderscores0.hs
=====================================
@@ -99,3 +99,6 @@ main = do
             0x_ff == 0xff,
             0x__ff == 0xff
           ]
+
+    -- ensure that strings are unaffected
+    print ["\o16_000", "\16_000", "\x16_000"]


=====================================
testsuite/tests/parser/should_run/NumericUnderscores0.stdout
=====================================
@@ -11,3 +11,4 @@
 [True,True,True]
 [True,True,True]
 [True,True,True,True,True,True,True,True,True,True,True,True,True,True,True]
+["\SO_000","\DLE_000","\SYN_000"]


=====================================
testsuite/tests/parser/should_run/T25609.hs
=====================================
@@ -0,0 +1,34 @@
+{-# LANGUAGE MultilineStrings #-}
+
+main :: IO ()
+main = do
+  -- strings with comment tokens
+  print """{- asdf -}"""
+  print """a {- asdf -} b"""
+  print """-- asdf"""
+  print """{-"""
+
+  -- strings with haddock comments
+  print """{- | test -}"""
+  print """{- * test -}"""
+  print """{- ^ test -}"""
+  print """{- $ test -}"""
+  print """-- | test"""
+  print """-- * test"""
+  print """-- ^ test"""
+  print """-- $ test"""
+
+  -- strings with only whitespace
+  print """    """
+  print """
+
+
+        """
+
+  -- strings with unicode
+  print """
+          ★
+        ★
+          ★
+        ★
+        """


=====================================
testsuite/tests/parser/should_run/T25609.stdout
=====================================
@@ -0,0 +1,15 @@
+"{- asdf -}"
+"a {- asdf -} b"
+"-- asdf"
+"{-"
+"{- | test -}"
+"{- * test -}"
+"{- ^ test -}"
+"{- $ test -}"
+"-- | test"
+"-- * test"
+"-- ^ test"
+"-- $ test"
+"    "
+"\n"
+"  \9733\n\9733\n  \9733\n\9733"


=====================================
testsuite/tests/parser/should_run/all.T
=====================================
@@ -21,6 +21,9 @@ test('RecordDotSyntax3', [extra_files(['RecordDotSyntaxA.hs'])], multimod_compil
 test('RecordDotSyntax4', [extra_files(['RecordDotSyntaxA.hs'])], multimod_compile_and_run, ['RecordDotSyntax4', ''])
 test('RecordDotSyntax5', normal, compile_and_run, [''])
 test('ListTuplePunsConstraints', extra_files(['ListTuplePunsConstraints.hs']), ghci_script, ['ListTuplePunsConstraints.script'])
+
+# Multiline strings
 test('MultilineStrings', normal, compile_and_run, [''])
 test('MultilineStringsOverloaded', normal, compile_and_run, [''])
 test('T25375', normal, compile_and_run, [''])
+test('T25609', normal, compile_and_run, [''])


=====================================
testsuite/tests/patsyn/should_compile/T13350/Makefile
=====================================
@@ -7,7 +7,7 @@ LOCAL_PKGCONF=local.package.conf
 T13350:
 	"$(GHC_PKG)" init $(LOCAL_PKGCONF)
 	cd boolean && "$(TEST_HC)" $(TEST_HC_OPTS) -v0 --make Setup.hs
-	cd boolean && ./Setup configure -v0 --with-compiler="$(TEST_HC)" --with-hc-pkg="$(GHC_PKG)" --package-db=../$(LOCAL_PKGCONF)
+	cd boolean && ./Setup configure -v0 --with-compiler="$(TEST_HC)" --ghc-options='$(filter-out -rtsopts,$(TEST_HC_OPTS))' --with-hc-pkg="$(GHC_PKG)" --package-db=../$(LOCAL_PKGCONF)
 	cd boolean && ./Setup build -v0
 	cd boolean && ./Setup register -v0 --inplace
 	"$(TEST_HC)" $(TEST_HC_OPTS) -c T13350.hs -package-db $(LOCAL_PKGCONF)


=====================================
testsuite/tests/rts/Makefile
=====================================
@@ -82,7 +82,7 @@ T10296a:
 
  .PHONY: T11788
 T11788:
-	"$(TEST_HC)" -c T11788.c -o T11788_obj.o
+	"$(TEST_HC)" $(TEST_HC_OPTS) -c T11788.c -o T11788_obj.o
 	"$(AR)" rsT libT11788.a T11788_obj.o 2> /dev/null
 	echo main | "$(TEST_HC)" $(filter-out -rtsopts, $(TEST_HC_OPTS_INTERACTIVE)) T11788.hs -lT11788 -L"$(PWD)"
 
@@ -101,13 +101,13 @@ T14695:
 
 .PHONY: InternalCounters
 InternalCounters:
-	"$(TEST_HC)" +RTS -s --internal-counters -RTS 2>&1 | grep "Internal Counters"
-	-"$(TEST_HC)" +RTS -s -RTS 2>&1 | grep "Internal Counters"
+	"$(TEST_HC)" $(TEST_HC_OPTS) +RTS -s --internal-counters -RTS 2>&1 | grep "Internal Counters"
+	-"$(TEST_HC)" $(TEST_HC_OPTS) +RTS -s -RTS 2>&1 | grep "Internal Counters"
 
 .PHONY: KeepCafsFail
 KeepCafsFail:
-	"$(TEST_HC)" -c -g -v0 KeepCafsBase.hs KeepCafs1.hs KeepCafs2.hs
-	"$(TEST_HC)" -g -v0 KeepCafsMain.hs KeepCafsBase.o -debug -rdynamic -fwhole-archive-hs-libs $(KEEPCAFS)
+	"$(TEST_HC)" $(TEST_HC_OPTS) -c -g -v0 KeepCafsBase.hs KeepCafs1.hs KeepCafs2.hs
+	"$(TEST_HC)" $(TEST_HC_OPTS) -g -v0 KeepCafsMain.hs KeepCafsBase.o -debug -rdynamic -fwhole-archive-hs-libs $(KEEPCAFS)
 	./KeepCafsMain 2>&1 || echo "exit($$?)"
 
 .PHONY: KeepCafs
@@ -116,37 +116,37 @@ KeepCafs:
 
 .PHONY: EventlogOutput1
 EventlogOutput1:
-	"$(TEST_HC)" -v0 EventlogOutput.hs
+	"$(TEST_HC)" $(TEST_HC_OPTS) -v0 EventlogOutput.hs
 	./EventlogOutput +RTS -l -olhello.eventlog
 	ls hello.eventlog >/dev/null
 
 .PHONY: EventlogOutput2
 EventlogOutput2:
-	"$(TEST_HC)" -v0 EventlogOutput.hs
+	"$(TEST_HC)" $(TEST_HC_OPTS) -v0 EventlogOutput.hs
 	./EventlogOutput +RTS -l
 	ls EventlogOutput.eventlog >/dev/null
 
 .PHONY: EventlogOutputNull
 EventlogOutputNull:
-	"$(TEST_HC)" -rtsopts -v0 EventlogOutput.hs
+	"$(TEST_HC)" $(TEST_HC_OPTS) -rtsopts -v0 EventlogOutput.hs
 	./EventlogOutput +RTS -l --null-eventlog-writer
 	test ! -e EventlogOutput.eventlog
 
 .PHONY: T20199
 T20199:
-	"$(TEST_HC)" -no-hs-main -optcxx-std=c++11 -v0 T20199.cpp -o T20199
+	"$(TEST_HC)" $(TEST_HC_OPTS) -no-hs-main -optcxx-std=c++11 -v0 T20199.cpp -o T20199
 	./T20199
 
 .PHONY: EventlogOutput_IPE
 EventlogOutput_IPE:
-	"$(TEST_HC)" -debug -finfo-table-map -v0 EventlogOutput.hs
+	"$(TEST_HC)" $(TEST_HC_OPTS) -debug -finfo-table-map -v0 EventlogOutput.hs
 	./EventlogOutput +RTS -va 2> EventlogOutput_IPE.stderr.log
 	grep "IPE:" EventlogOutput_IPE.stderr.log
 
 .PHONY: T23142
 T23142:
 	# Test that the -Di output contains different frames
-	"$(TEST_HC)" --run -ignore-dot-ghci T23142.hs +RTS -Di -RTS 2> T23142.log
+	"$(TEST_HC)" $(TEST_HC_OPTS) --run -ignore-dot-ghci T23142.hs +RTS -Di -RTS 2> T23142.log
 	grep -m1 -c "ATOMICALLY_FRAME" T23142.log
 	grep -m1 -c "CATCH_RETRY_FRAME" T23142.log
 	grep -m1 -c "CATCH_STM_FRAME" T23142.log


=====================================
testsuite/tests/rts/T1791/Makefile
=====================================
@@ -3,4 +3,4 @@ include $(TOP)/mk/boilerplate.mk
 include $(TOP)/mk/test.mk
 
 T1791:
-	'$(TEST_HC)' T1791.hs -o T1791 -O -rtsopts
+	'$(TEST_HC)' $(TEST_HC_OPTS) T1791.hs -o T1791 -O -rtsopts


=====================================
testsuite/tests/wasm/should_run/control-flow/WasmControlFlow.hs
=====================================
@@ -41,6 +41,7 @@ main = do
                          `xopt_set` LangExt.StandaloneKindSignatures
                          `xopt_set` LangExt.UnliftedDatatypes
                          `xopt_set` LangExt.DataKinds
+                         `dopt_set` Opt_D_no_debug_output
         setSessionDynFlags dflags
         groups <- mapM loadPath files
         liftIO $ do


=====================================
utils/hp2ps/Utilities.c
=====================================
@@ -3,7 +3,7 @@
 #include <string.h>
 #include "Error.h"
 
-extern void* malloc(long unsigned int);
+extern void* malloc(size_t);
 
 char*
 Basename(char *name)
@@ -89,7 +89,7 @@ void *
 xrealloc(void *p, size_t n)
 {
     void *r;
-    extern void *realloc(void *, long unsigned int);
+    extern void *realloc(void *, size_t);
 
     r = realloc(p, n);
     if (!r) {



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/7b8e8664bfa45fdc351de23f2277262487635647...cfd0b33860039d4b60693a73fb189440bcc10bd8

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/7b8e8664bfa45fdc351de23f2277262487635647...cfd0b33860039d4b60693a73fb189440bcc10bd8
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20250124/ff7fb77a/attachment-0001.html>