[Git][ghc/ghc][wip/multiline-strings] 5 commits: Add test cases for MultilineStrings
Brandon Chinn (@brandonchinn178)
gitlab at gitlab.haskell.org
Sat Feb 17 23:35:29 UTC 2024
Brandon Chinn pushed to branch wip/multiline-strings at Glasgow Haskell Compiler / GHC
Commits:
d826077d by Brandon Chinn at 2024-02-17T15:35:04-08:00
Add test cases for MultilineStrings
- - - - -
6b5b43a4 by Brandon Chinn at 2024-02-17T15:35:04-08:00
Break out common lex_magic_hash logic for strings and chars
- - - - -
6c676cb9 by Brandon Chinn at 2024-02-17T15:35:04-08:00
Factor out string processing functions
- - - - -
405a0d7f by Brandon Chinn at 2024-02-17T15:35:04-08:00
Implement MultilineStrings (#24390)
- - - - -
e0fce55b by Brandon Chinn at 2024-02-17T15:35:04-08:00
Add docs for MultilineStrings
- - - - -
28 changed files:
- compiler/GHC/Hs/Lit.hs
- compiler/GHC/Hs/Syn/Type.hs
- compiler/GHC/HsToCore/Match/Literal.hs
- compiler/GHC/Parser.y
- compiler/GHC/Parser/Lexer.x
- + compiler/GHC/Parser/String.hs
- compiler/GHC/Rename/Expr.hs
- compiler/Language/Haskell/Syntax/Extension.hs
- compiler/Language/Haskell/Syntax/Lit.hs
- compiler/ghc.cabal.in
- + docs/users_guide/9.12.1-notes.rst
- docs/users_guide/exts/literals.rst
- + docs/users_guide/exts/multiline_strings.rst
- testsuite/tests/count-deps/CountDepsParser.stdout
- + testsuite/tests/parser/should_fail/MultilineStringsError.hs
- + testsuite/tests/parser/should_fail/MultilineStringsError.stderr
- + testsuite/tests/parser/should_fail/MultilineStringsInnerTab.hs
- + testsuite/tests/parser/should_fail/MultilineStringsInnerTab.stderr
- + testsuite/tests/parser/should_fail/MultilineStringsSmartQuotes.hs
- + testsuite/tests/parser/should_fail/MultilineStringsSmartQuotes.stderr
- testsuite/tests/parser/should_fail/all.T
- + testsuite/tests/parser/should_run/MultilineStrings.hs
- + testsuite/tests/parser/should_run/MultilineStrings.stdout
- + testsuite/tests/parser/should_run/MultilineStringsOverloaded.hs
- + testsuite/tests/parser/should_run/MultilineStringsOverloaded.stdout
- testsuite/tests/parser/should_run/all.T
- utils/check-exact/ExactPrint.hs
- utils/haddock
Changes:
=====================================
compiler/GHC/Hs/Lit.hs
=====================================
@@ -25,6 +25,7 @@ import GHC.Prelude
import {-# SOURCE #-} GHC.Hs.Expr( pprExpr )
+import GHC.Data.FastString (unpackFS)
import GHC.Types.Basic (PprPrec(..), topPrec )
import GHC.Core.Ppr ( {- instance OutputableBndr TyVar -} )
import GHC.Types.SourceText
@@ -46,6 +47,7 @@ import Language.Haskell.Syntax.Lit
type instance XHsChar (GhcPass _) = SourceText
type instance XHsCharPrim (GhcPass _) = SourceText
type instance XHsString (GhcPass _) = SourceText
+type instance XHsMultilineString (GhcPass _) = SourceText
type instance XHsStringPrim (GhcPass _) = SourceText
type instance XHsInt (GhcPass _) = NoExtField
type instance XHsIntPrim (GhcPass _) = SourceText
@@ -132,6 +134,7 @@ hsLitNeedsParens p = go
go (HsChar {}) = False
go (HsCharPrim {}) = False
go (HsString {}) = False
+ go (HsMultilineString {}) = False
go (HsStringPrim {}) = False
go (HsInt _ x) = p > topPrec && il_neg x
go (HsInteger _ x _) = p > topPrec && x < 0
@@ -155,6 +158,7 @@ convertLit :: HsLit (GhcPass p1) -> HsLit (GhcPass p2)
convertLit (HsChar a x) = HsChar a x
convertLit (HsCharPrim a x) = HsCharPrim a x
convertLit (HsString a x) = HsString a x
+convertLit (HsMultilineString a x) = HsMultilineString a x
convertLit (HsStringPrim a x) = HsStringPrim a x
convertLit (HsInt a x) = HsInt a x
convertLit (HsIntPrim a x) = HsIntPrim a x
@@ -192,6 +196,17 @@ instance Outputable (HsLit (GhcPass p)) where
ppr (HsChar st c) = pprWithSourceText st (pprHsChar c)
ppr (HsCharPrim st c) = pprWithSourceText st (pprPrimChar c)
ppr (HsString st s) = pprWithSourceText st (pprHsString s)
+ ppr (HsMultilineString st s) =
+ case st of
+ NoSourceText -> pprHsString s
+ SourceText src ->
+ vcat $ map text $ splitOn '\n' (unpackFS src)
+ where
+ splitOn c s =
+ let (firstLine, rest) = break (== c) s
+ in case rest of
+ "" -> [firstLine]
+ _ : rest -> firstLine : splitOn c rest
ppr (HsStringPrim st s) = pprWithSourceText st (pprHsBytes s)
ppr (HsInt _ i)
= pprWithSourceText (il_text i) (integer (il_value i))
@@ -231,6 +246,7 @@ pmPprHsLit :: HsLit (GhcPass x) -> SDoc
pmPprHsLit (HsChar _ c) = pprHsChar c
pmPprHsLit (HsCharPrim _ c) = pprHsChar c
pmPprHsLit (HsString st s) = pprWithSourceText st (pprHsString s)
+pmPprHsLit (HsMultilineString st s) = pprWithSourceText st (pprHsString s)
pmPprHsLit (HsStringPrim _ s) = pprHsBytes s
pmPprHsLit (HsInt _ i) = integer (il_value i)
pmPprHsLit (HsIntPrim _ i) = integer i
=====================================
compiler/GHC/Hs/Syn/Type.hs
=====================================
@@ -74,6 +74,7 @@ hsLitType :: HsLit (GhcPass p) -> Type
hsLitType (HsChar _ _) = charTy
hsLitType (HsCharPrim _ _) = charPrimTy
hsLitType (HsString _ _) = stringTy
+hsLitType (HsMultilineString _ _) = stringTy
hsLitType (HsStringPrim _ _) = addrPrimTy
hsLitType (HsInt _ _) = intTy
hsLitType (HsIntPrim _ _) = intPrimTy
=====================================
compiler/GHC/HsToCore/Match/Literal.hs
=====================================
@@ -121,6 +121,7 @@ dsLit l = do
HsDoublePrim _ fl -> return (Lit (LitDouble (rationalFromFractionalLit fl)))
HsChar _ c -> return (mkCharExpr c)
HsString _ str -> mkStringExprFS str
+ HsMultilineString _ str -> mkStringExprFS str
HsInteger _ i _ -> return (mkIntegerExpr platform i)
HsInt _ i -> return (mkIntExpr platform (il_value i))
HsRat _ fl ty -> dsFractionalLitToRational fl ty
@@ -474,6 +475,7 @@ getSimpleIntegralLit (HsInteger _ i ty) = Just (i, ty)
getSimpleIntegralLit HsChar{} = Nothing
getSimpleIntegralLit HsCharPrim{} = Nothing
getSimpleIntegralLit HsString{} = Nothing
+getSimpleIntegralLit HsMultilineString{} = Nothing
getSimpleIntegralLit HsStringPrim{} = Nothing
getSimpleIntegralLit HsRat{} = Nothing
getSimpleIntegralLit HsFloatPrim{} = Nothing
=====================================
compiler/GHC/Parser.y
=====================================
@@ -697,6 +697,7 @@ are the most common patterns, rewritten as regular expressions for clarity:
CHAR { L _ (ITchar _ _) }
STRING { L _ (ITstring _ _) }
+ MULTILINESTRING { L _ (ITmultilinestring _ _) }
INTEGER { L _ (ITinteger _) }
RATIONAL { L _ (ITrational _) }
@@ -3905,6 +3906,8 @@ literal :: { Located (HsLit GhcPs) }
: CHAR { sL1 $1 $ HsChar (getCHARs $1) $ getCHAR $1 }
| STRING { sL1 $1 $ HsString (getSTRINGs $1)
$ getSTRING $1 }
+ | MULTILINESTRING { sL1 $1 $ HsMultilineString (getMULTILINESTRINGs $1)
+ $ getMULTILINESTRING $1 }
| PRIMINTEGER { sL1 $1 $ HsIntPrim (getPRIMINTEGERs $1)
$ getPRIMINTEGER $1 }
| PRIMWORD { sL1 $1 $ HsWordPrim (getPRIMWORDs $1)
@@ -4010,6 +4013,7 @@ getIPDUPVARID (L _ (ITdupipvarid x)) = x
getLABELVARID (L _ (ITlabelvarid _ x)) = x
getCHAR (L _ (ITchar _ x)) = x
getSTRING (L _ (ITstring _ x)) = x
+getMULTILINESTRING (L _ (ITmultilinestring _ x)) = x
getINTEGER (L _ (ITinteger x)) = x
getRATIONAL (L _ (ITrational x)) = x
getPRIMCHAR (L _ (ITprimchar _ x)) = x
@@ -4035,6 +4039,7 @@ getVOCURLY (L (RealSrcSpan l _) ITvocurly) = srcSpanStartCol l
getINTEGERs (L _ (ITinteger (IL src _ _))) = src
getCHARs (L _ (ITchar src _)) = src
getSTRINGs (L _ (ITstring src _)) = src
+getMULTILINESTRINGs (L _ (ITmultilinestring src _)) = src
getPRIMCHARs (L _ (ITprimchar src _)) = src
getPRIMSTRINGs (L _ (ITprimstring src _)) = src
getPRIMINTEGERs (L _ (ITprimint src _)) = src
=====================================
compiler/GHC/Parser/Lexer.x
=====================================
@@ -87,7 +87,7 @@ import qualified GHC.Data.Strict as Strict
import Control.Monad
import Control.Applicative
import Data.Char
-import Data.List (stripPrefix, isInfixOf, partition)
+import Data.List (stripPrefix, isInfixOf, partition, unfoldr)
import Data.List.NonEmpty ( NonEmpty(..) )
import qualified Data.List.NonEmpty as NE
import Data.Maybe
@@ -130,6 +130,7 @@ import GHC.Driver.Flags
import GHC.Parser.Errors.Basic
import GHC.Parser.Errors.Types
import GHC.Parser.Errors.Ppr ()
+import GHC.Parser.String
}
-- -----------------------------------------------------------------------------
@@ -662,7 +663,8 @@ $unigraphic / { isSmartQuote } { smart_quote_error }
-- to convert it to a String.
<0> {
\' { lex_char_tok }
- \" { lex_string_tok }
+ \"\"\" / { ifExtension MultilineStringsBit} { lex_string_tok StringTypeMulti }
+ \" { lex_string_tok StringTypeSingle }
}
-- Note [Whitespace-sensitive operator parsing]
@@ -948,6 +950,7 @@ data Token
| ITchar SourceText Char -- Note [Literal source text] in "GHC.Types.SourceText"
| ITstring SourceText FastString -- Note [Literal source text] in "GHC.Types.SourceText"
+ | ITmultilinestring SourceText FastString -- Note [Literal source text] in "GHC.Types.SourceText"
| ITinteger IntegralLit -- Note [Literal source text] in "GHC.Types.SourceText"
| ITrational FractionalLit
@@ -2175,22 +2178,37 @@ lex_string_prag_comment mkTok span _buf _len _buf2
-- This stuff is horrible. I hates it.
-lex_string_tok :: Action
-lex_string_tok span buf _len _buf2 = do
- lexed <- lex_string
- (AI end bufEnd) <- getInput
- let
- tok = case lexed of
- LexedPrimString s -> ITprimstring (SourceText src) (unsafeMkByteString s)
- LexedRegularString s -> ITstring (SourceText src) (mkFastString s)
- src = lexemeToFastString buf (cur bufEnd - cur buf)
- return $ L (mkPsSpan (psSpanStart span) end) tok
+lex_string_tok :: LexStringType -> Action
+lex_string_tok strType span buf _len _buf2 = do
+ s <- lex_string strType
+
+ i <- getInput
+ case strType of
+ StringTypeSingle ->
+ lex_magic_hash i >>= \case
+ Just i' -> do
+ when (any (> '\xFF') s) $ do
+ pState <- getPState
+ let msg = PsErrPrimStringInvalidChar
+ let err = mkPlainErrorMsgEnvelope (mkSrcSpanPs (last_loc pState)) msg
+ addError err
+
+ setInput i'
+ let (psSpan, src) = getStringLoc (buf, locStart) i'
+ pure $ L psSpan (ITprimstring src (unsafeMkByteString s))
+ Nothing -> do
+ let (psSpan, src) = getStringLoc (buf, locStart) i
+ pure $ L psSpan (ITstring src (mkFastString s))
+ StringTypeMulti -> do
+ let (psSpan, src) = getStringLoc (buf, locStart) i
+ pure $ L psSpan (ITmultilinestring src (mkFastString s))
+ where
+ locStart = psSpanStart span
lex_quoted_label :: Action
lex_quoted_label span buf _len _buf2 = do
- start <- getInput
- s <- lex_string_helper "" start
+ s <- lex_string StringTypeSingle
(AI end bufEnd) <- getInput
let
token = ITlabelvarid (SourceText src) (mkFastString s)
@@ -2200,75 +2218,69 @@ lex_quoted_label span buf _len _buf2 = do
return $ L (mkPsSpan start end) token
-data LexedString = LexedRegularString String | LexedPrimString String
-
-lex_string :: P LexedString
-lex_string = do
+lex_string :: LexStringType -> P String
+lex_string strType = do
start <- getInput
- s <- lex_string_helper "" start
- magicHash <- getBit MagicHashBit
- if magicHash
- then do
- i <- getInput
- case alexGetChar' i of
- Just ('#',i) -> do
- setInput i
- when (any (> '\xFF') s) $ do
- pState <- getPState
- let msg = PsErrPrimStringInvalidChar
- let err = mkPlainErrorMsgEnvelope (mkSrcSpanPs (last_loc pState)) msg
- addError err
- return $ LexedPrimString s
- _other ->
- return $ LexedRegularString s
- else
- return $ LexedRegularString s
-
-
-lex_string_helper :: String -> AlexInput -> P String
-lex_string_helper s start = do
- i <- getInput
- case alexGetChar' i of
- Nothing -> lit_error i
-
- Just ('"',i) -> do
- setInput i
- return (reverse s)
-
- Just ('\\',i)
- | Just ('&',i) <- next -> do
- setInput i; lex_string_helper s start
- | Just (c,i) <- next, c <= '\x7f' && is_space c -> do
- -- is_space only works for <= '\x7f' (#3751, #5425)
- setInput i; lex_stringgap s start
- where next = alexGetChar' i
-
- Just (c, i1) -> do
- case c of
- '\\' -> do setInput i1; c' <- lex_escape; lex_string_helper (c':s) start
- c | isAny c -> do setInput i1; lex_string_helper (c:s) start
- _other | any isDoubleSmartQuote s -> do
- -- if the built-up string s contains a smart double quote character, it was
- -- likely the reason why the string literal was not lexed correctly
- setInput start -- rewind to the first character in the string literal
- -- so we can find the smart quote character's location
- advance_to_smart_quote_character
- i2@(AI loc _) <- getInput
- case alexGetChar' i2 of
- Just (c, _) -> do add_nonfatal_smart_quote_error c loc; lit_error i
- Nothing -> lit_error i -- should never get here
- _other -> lit_error i
-
-
-lex_stringgap :: String -> AlexInput -> P String
-lex_stringgap s start = do
- i <- getInput
- c <- getCharOrFail i
- case c of
- '\\' -> lex_string_helper s start
- c | c <= '\x7f' && is_space c -> lex_stringgap s start
- -- is_space only works for <= '\x7f' (#3751, #5425)
- _other -> lit_error i
+ case lexString [] start of
+ Right (lexedStr, next) -> do
+ setInput next
+ either fromStringLexError pure $ resolveLexedString strType lexedStr
+ Left (e, s, i) -> do
+ -- see if we can find a smart quote in the string we've found so far.
+ -- if the built-up string s contains a smart double quote character, it was
+ -- likely the reason why the string literal was not lexed correctly
+ case filter (\(LexedChar c _) -> isDoubleSmartQuote c) s of
+ LexedChar c (AI loc _) : _ -> add_nonfatal_smart_quote_error c loc
+ _ -> pure ()
+
+ -- regardless whether we found a smart quote, throw a lexical error
+ setInput i >> lexError e
+ where
+ -- Given the (reversed) string we've seen so far and the current location,
+ -- return Right with the fully lexed string and the subsequent location,
+ -- or Left with the string we've seen so far and the location where lexing
+ -- failed.
+ lexString acc0 i0 = do
+ let acc = reverse acc0
+ case alexGetChar' i0 of
+ _ | Just i1 <- lexDelimiter i0 -> Right (acc, i1)
+
+ Just (c0, i1) -> do
+ let acc1 = LexedChar c0 i0 : acc0
+ case c0 of
+ '\\' -> do
+ case alexGetChar' i1 of
+ Just (c1, i2)
+ | is_space' c1 -> lexStringGap (LexedChar c1 i1 : acc1) i2
+ | otherwise -> lexString (LexedChar c1 i1 : acc1) i2
+ Nothing -> Left (LexStringCharLit, acc, i1)
+ _ | isAny c0 -> lexString acc1 i1
+ _ | strType == StringTypeMulti && c0 `elem` ['\n', '\t'] -> lexString acc1 i1
+ _ -> Left (LexStringCharLit, acc, i0)
+
+ Nothing -> Left (LexStringCharLit, acc, i0)
+
+ lexDelimiter i0 =
+ case strType of
+ StringTypeSingle -> do
+ ('"', i1) <- alexGetChar' i0
+ Just i1
+ StringTypeMulti -> do
+ ('"', i1) <- alexGetChar' i0
+ ('"', i2) <- alexGetChar' i1
+ ('"', i3) <- alexGetChar' i2
+ Just i3
+
+ lexStringGap acc0 i0 = do
+ let acc = reverse acc0
+ case alexGetChar' i0 of
+ Just (c0, i1) -> do
+ let acc1 = LexedChar c0 i0 : acc0
+ case c0 of
+ '\\' -> lexString acc1 i1
+ _ | is_space' c0 -> lexStringGap acc1 i1
+ _ -> Left (LexStringCharLit, acc, i0)
+ Nothing -> Left (LexStringCharLitEOF, acc, i0)
lex_char_tok :: Action
@@ -2289,13 +2301,19 @@ lex_char_tok span buf _len _buf2 = do -- We've seen '
return (L (mkPsSpan loc end2) ITtyQuote)
Just ('\\', i2@(AI end2 _)) -> do -- We've seen 'backslash
- setInput i2
- lit_ch <- lex_escape
- i3 <- getInput
- mc <- getCharOrFail i3 -- Trailing quote
- if mc == '\'' then finish_char_tok buf loc lit_ch
- else if isSingleSmartQuote mc then add_smart_quote_error mc end2
- else lit_error i3
+ (LexedChar lit_ch _, rest) <-
+ either fromStringLexError pure $
+ resolveEscapeCharacter (LexedChar '\\' i1) (asLexedString i2)
+ i3 <-
+ case rest of
+ LexedChar _ i3 : _ -> pure i3
+ [] -> lexError LexStringCharLitEOF
+ case alexGetChar' i3 of
+ Just ('\'', i4) -> do
+ setInput i4
+ finish_char_tok buf loc lit_ch
+ Just (mc, _) | isSingleSmartQuote mc -> add_smart_quote_error mc end2
+ _ -> lit_error i3
Just (c, i2@(AI end2 _))
| not (isAny c) -> lit_error i1
@@ -2314,139 +2332,66 @@ lex_char_tok span buf _len _buf2 = do -- We've seen '
let (AI end _) = i1
return (L (mkPsSpan loc end) ITsimpleQuote)
+-- We've already seen the closing quote
+-- Just need to check for trailing #
finish_char_tok :: StringBuffer -> PsLoc -> Char -> P (PsLocated Token)
-finish_char_tok buf loc ch -- We've already seen the closing quote
- -- Just need to check for trailing #
- = do magicHash <- getBit MagicHashBit
- i@(AI end bufEnd) <- getInput
- let src = lexemeToFastString buf (cur bufEnd - cur buf)
- if magicHash then do
- case alexGetChar' i of
- Just ('#',i@(AI end bufEnd')) -> do
- setInput i
- -- Include the trailing # in SourceText
- let src' = lexemeToFastString buf (cur bufEnd' - cur buf)
- return (L (mkPsSpan loc end)
- (ITprimchar (SourceText src') ch))
- _other ->
- return (L (mkPsSpan loc end)
- (ITchar (SourceText src) ch))
- else do
- return (L (mkPsSpan loc end) (ITchar (SourceText src) ch))
+finish_char_tok buf loc ch = do
+ i <- getInput
+ lex_magic_hash i >>= \case
+ Just i' -> do
+ setInput i'
+ -- Include the trailing # in SourceText
+ let (psSpan, src) = getStringLoc (buf, loc) i'
+ pure $ L psSpan (ITprimchar src ch)
+ Nothing -> do
+ let (psSpan, src) = getStringLoc (buf, loc) i
+ pure $ L psSpan (ITchar src ch)
+
+
+-- | Get the span and source text for a string from the given start to the given end.
+getStringLoc :: (StringBuffer, PsLoc) -> AlexInput -> (PsSpan, SourceText)
+getStringLoc (bufStart, locStart) (AI locEnd bufEnd) = (psSpan, SourceText src)
+ where
+ psSpan = mkPsSpan locStart locEnd
+ src = lexemeToFastString bufStart (cur bufEnd - cur bufStart)
+
+
+-- Return Just if we found the magic hash, with the next input.
+lex_magic_hash :: AlexInput -> P (Maybe AlexInput)
+lex_magic_hash i = do
+ magicHash <- getBit MagicHashBit
+ if magicHash
+ then
+ case alexGetChar' i of
+ Just ('#', i') -> pure (Just i')
+ _other -> pure Nothing
+ else pure Nothing
isAny :: Char -> Bool
isAny c | c > '\x7f' = isPrint c
| otherwise = is_any c
-lex_escape :: P Char
-lex_escape = do
- i0@(AI loc _) <- getInput
- c <- getCharOrFail i0
- case c of
- 'a' -> return '\a'
- 'b' -> return '\b'
- 'f' -> return '\f'
- 'n' -> return '\n'
- 'r' -> return '\r'
- 't' -> return '\t'
- 'v' -> return '\v'
- '\\' -> return '\\'
- '"' -> return '\"'
- '\'' -> return '\''
- -- the next two patterns build up a Unicode smart quote error (#21843)
- smart_double_quote | isDoubleSmartQuote smart_double_quote ->
- add_smart_quote_error smart_double_quote loc
- smart_single_quote | isSingleSmartQuote smart_single_quote ->
- add_smart_quote_error smart_single_quote loc
- '^' -> do i1 <- getInput
- c <- getCharOrFail i1
- if c >= '@' && c <= '_'
- then return (chr (ord c - ord '@'))
- else lit_error i1
-
- 'x' -> readNum is_hexdigit 16 hexDigit
- 'o' -> readNum is_octdigit 8 octDecDigit
- x | is_decdigit x -> readNum2 is_decdigit 10 octDecDigit (octDecDigit x)
-
- c1 -> do
- i <- getInput
- case alexGetChar' i of
- Nothing -> lit_error i0
- Just (c2,i2) ->
- case alexGetChar' i2 of
- Nothing -> do lit_error i0
- Just (c3,i3) ->
- let str = [c1,c2,c3] in
- case [ (c,rest) | (p,c) <- silly_escape_chars,
- Just rest <- [stripPrefix p str] ] of
- (escape_char,[]):_ -> do
- setInput i3
- return escape_char
- (escape_char,_:_):_ -> do
- setInput i2
- return escape_char
- [] -> lit_error i0
-
-readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> P Char
-readNum is_digit base conv = do
- i <- getInput
- c <- getCharOrFail i
- if is_digit c
- then readNum2 is_digit base conv (conv c)
- else lit_error i
+-- is_space only works for <= '\x7f' (#3751, #5425)
+--
+-- TODO: why not put this logic in is_space directly?
+is_space' :: Char -> Bool
+is_space' c | c > '\x7f' = False
+ | otherwise = is_space c
+
+-- | Returns a LexedString that, when iterated, lazily streams
+-- successive characters from the AlexInput.
+asLexedString :: AlexInput -> LexedString AlexInput
+asLexedString = unfoldr toLexedChar
+ where
+ toLexedChar i =
+ case alexGetChar' i of
+ Just (c, i') -> Just (LexedChar c i, i')
+ Nothing -> Nothing
-readNum2 :: (Char -> Bool) -> Int -> (Char -> Int) -> Int -> P Char
-readNum2 is_digit base conv i = do
- input <- getInput
- read i input
- where read i input = do
- case alexGetChar' input of
- Just (c,input') | is_digit c -> do
- let i' = i*base + conv c
- if i' > 0x10ffff
- then setInput input >> lexError LexNumEscapeRange
- else read i' input'
- _other -> do
- setInput input; return (chr i)
-
-
-silly_escape_chars :: [(String, Char)]
-silly_escape_chars = [
- ("NUL", '\NUL'),
- ("SOH", '\SOH'),
- ("STX", '\STX'),
- ("ETX", '\ETX'),
- ("EOT", '\EOT'),
- ("ENQ", '\ENQ'),
- ("ACK", '\ACK'),
- ("BEL", '\BEL'),
- ("BS", '\BS'),
- ("HT", '\HT'),
- ("LF", '\LF'),
- ("VT", '\VT'),
- ("FF", '\FF'),
- ("CR", '\CR'),
- ("SO", '\SO'),
- ("SI", '\SI'),
- ("DLE", '\DLE'),
- ("DC1", '\DC1'),
- ("DC2", '\DC2'),
- ("DC3", '\DC3'),
- ("DC4", '\DC4'),
- ("NAK", '\NAK'),
- ("SYN", '\SYN'),
- ("ETB", '\ETB'),
- ("CAN", '\CAN'),
- ("EM", '\EM'),
- ("SUB", '\SUB'),
- ("ESC", '\ESC'),
- ("FS", '\FS'),
- ("GS", '\GS'),
- ("RS", '\RS'),
- ("US", '\US'),
- ("SP", '\SP'),
- ("DEL", '\DEL')
- ]
+fromStringLexError :: StringLexError AlexInput -> P a
+fromStringLexError = \case
+ SmartQuoteError c (AI loc _) -> add_smart_quote_error c loc
+ StringLexError _ i e -> setInput i >> lexError e
-- before calling lit_error, ensure that the current input is pointing to
-- the position of the error in the buffer. This is so that we can report
@@ -2515,16 +2460,6 @@ quasiquote_error start = do
-- -----------------------------------------------------------------------------
-- Unicode Smart Quote detection (#21843)
-isDoubleSmartQuote :: Char -> Bool
-isDoubleSmartQuote '“' = True
-isDoubleSmartQuote '”' = True
-isDoubleSmartQuote _ = False
-
-isSingleSmartQuote :: Char -> Bool
-isSingleSmartQuote '‘' = True
-isSingleSmartQuote '’' = True
-isSingleSmartQuote _ = False
-
isSmartQuote :: AlexAccPred ExtsBitmap
isSmartQuote _ _ _ (AI _ buf) = let c = prevChar buf ' ' in isSingleSmartQuote c || isDoubleSmartQuote c
@@ -3053,6 +2988,7 @@ data ExtBits
| OverloadedRecordDotBit
| OverloadedRecordUpdateBit
| ExtendedLiteralsBit
+ | MultilineStringsBit
-- Flags that are updated once parsing starts
| InRulePragBit
@@ -3133,6 +3069,7 @@ mkParserOpts extensionFlags diag_opts supported
.|. OverloadedRecordDotBit `xoptBit` LangExt.OverloadedRecordDot
.|. OverloadedRecordUpdateBit `xoptBit` LangExt.OverloadedRecordUpdate -- Enable testing via 'getBit OverloadedRecordUpdateBit' in the parser (RecordDotSyntax parsing uses that information).
.|. ExtendedLiteralsBit `xoptBit` LangExt.ExtendedLiterals
+ .|. MultilineStringsBit `xoptBit` LangExt.MultilineStrings
optBits =
HaddockBit `setBitIf` isHaddock
.|. RawTokenStreamBit `setBitIf` rawTokStream
=====================================
compiler/GHC/Parser/String.hs
=====================================
@@ -0,0 +1,357 @@
+{-# LANGUAGE LambdaCase #-}
+
+module GHC.Parser.String (
+ LexedString,
+ LexedChar (..),
+ StringLexError (..),
+ LexStringType (..),
+ resolveLexedString,
+ resolveEscapeCharacter,
+
+ -- * Unicode smart quote helpers
+ isDoubleSmartQuote,
+ isSingleSmartQuote,
+) where
+
+import GHC.Prelude
+
+import Control.Monad (forM_, guard, unless, when, (>=>))
+import Data.Char (chr, isSpace, ord)
+import qualified Data.Foldable1 as Foldable1
+import qualified Data.List.NonEmpty as NonEmpty
+import Data.Maybe (listToMaybe, mapMaybe, maybeToList)
+import GHC.Parser.CharClass (
+ hexDigit,
+ is_decdigit,
+ is_hexdigit,
+ is_octdigit,
+ octDecDigit,
+ )
+import GHC.Parser.Errors.Types (LexErr (..))
+import GHC.Utils.Panic (panic)
+
+data LexStringType = StringTypeSingle | StringTypeMulti deriving (Eq)
+
+data LexedChar loc = LexedChar !Char !loc
+type LexedString loc = [LexedChar loc]
+
+unLexedChar :: LexedChar loc -> Char
+unLexedChar (LexedChar c _) = c
+
+unLexedString :: LexedString loc -> String
+unLexedString = map unLexedChar
+
+-- | Apply the given StringProcessors to the given LexedString left-to-right,
+-- and return the processed string.
+resolveLexedString ::
+ LexStringType ->
+ LexedString loc ->
+ Either (StringLexError loc) String
+resolveLexedString strType = fmap unLexedString . foldr (>=>) pure processString
+ where
+ processString =
+ case strType of
+ StringTypeSingle ->
+ [ collapseStringGaps
+ , resolveEscapeCharacters
+ ]
+ StringTypeMulti ->
+ [ collapseStringGaps
+ , resolveMultilineString
+ , checkInnerTabs
+ , resolveEscapeCharacters
+ ]
+
+data StringLexError loc
+ = SmartQuoteError !Char !loc
+ | StringLexError !Char !loc !LexErr
+
+type StringProcessor loc = LexedString loc -> Either (StringLexError loc) (LexedString loc)
+
+collapseStringGaps :: StringProcessor loc
+collapseStringGaps s0 = pure (go s0)
+ where
+ go = \case
+ [] -> []
+
+ backslash@(LexedChar '\\' _) : c : s
+ | isLexedSpace c ->
+ -- lexer should have validated that this is a valid gap,
+ -- so we'll panic if we find any invalid characters
+ case dropWhile isLexedSpace s of
+ LexedChar '\\' _ : s -> go s
+ _ -> panic $ "Invalid string gap in " ++ show (unLexedString s0)
+ | otherwise ->
+ backslash : c : go s
+
+ c : s -> c : go s
+
+resolveEscapeCharacters :: StringProcessor loc
+resolveEscapeCharacters = go
+ where
+ go = \case
+ [] -> pure []
+ LexedChar '\\' _ : LexedChar '&' _ : s -> go s
+ backslashChar@(LexedChar '\\' _) : s -> do
+ (c, s') <- resolveEscapeCharacter backslashChar s
+ (c :) <$> go s'
+ c : s ->
+ (c :) <$> go s
+
+-- | After finding a backslash, parse the rest of the escape character.
+resolveEscapeCharacter ::
+ LexedChar loc -> -- the backslash character
+ LexedString loc -> -- the rest of the string to parse
+ Either
+ (StringLexError loc)
+ (LexedChar loc, LexedString loc) -- the resolved escape character and the rest of the string
+resolveEscapeCharacter backslashChar s0 = do
+ (firstChar@(LexedChar c loc), s1) <- expectNext backslashChar s0
+ let rewrap c' = pure (LexedChar c' loc, s1)
+ case c of
+ 'a' -> rewrap '\a'
+ 'b' -> rewrap '\b'
+ 'f' -> rewrap '\f'
+ 'n' -> rewrap '\n'
+ 'r' -> rewrap '\r'
+ 't' -> rewrap '\t'
+ 'v' -> rewrap '\v'
+ '\\' -> rewrap '\\'
+ '"' -> rewrap '\"'
+ '\'' -> rewrap '\''
+ -- escape codes
+ 'x' -> expectNum is_hexdigit 16 hexDigit (firstChar, s1)
+ 'o' -> expectNum is_octdigit 8 octDecDigit (firstChar, s1)
+ _ | is_decdigit c -> expectNum is_decdigit 10 octDecDigit (backslashChar, s0)
+ -- control characters (e.g. '\^M')
+ '^' -> do
+ (LexedChar c1 loc1, s2) <- expectNext firstChar s1
+ unless (c1 >= '@' && c1 <= '_') $
+ Left $ StringLexError c1 loc1 LexStringCharLit
+ let c' = chr $ ord c1 - ord '@'
+ pure (LexedChar c' loc, s2)
+ -- long form escapes (e.g. '\NUL')
+ _ | Just (c', s2) <- parseLongEscape firstChar s1 -> pure (LexedChar c' loc, s2)
+ -- check unicode smart quotes (#21843)
+ _ | isDoubleSmartQuote c -> Left $ SmartQuoteError c loc
+ _ | isSingleSmartQuote c -> Left $ SmartQuoteError c loc
+ -- unknown escape
+ _ -> Left $ StringLexError c loc LexStringCharLit
+ where
+ expectNext lastChar = \case
+ [] -> do
+ let LexedChar c loc = lastChar
+ Left $ StringLexError c loc LexStringCharLitEOF
+ c : cs -> pure (c, cs)
+
+ expectNum isDigit base toDigit (lastChar, s0) = do
+ (LexedChar c loc, s1) <- expectNext lastChar s0
+ unless (isDigit c) $ Left $ StringLexError c loc LexStringCharLit
+ let parseNum x = \case
+ LexedChar c' loc' : s' | isDigit c' -> do
+ let x' = x * base + toDigit c'
+ when (x' > 0x10ffff) $ Left $ StringLexError c' loc' LexNumEscapeRange
+ parseNum x' s'
+ s ->
+ pure (LexedChar (chr x) loc, s)
+ parseNum (toDigit c) s1
+
+-- | Check if the escape characters match a long escape code.
+--
+-- >>> parseLongEscape 'C' [LexedChar 'R', LexedChar 'X', ...s] = Just ('\CR', [LexedChar 'X', ...s])
+-- >>> parseLongEscape 'X' [LexedChar 'X', LexedChar 'X', ...s] = Nothing
+parseLongEscape :: LexedChar loc -> LexedString loc -> Maybe (Char, LexedString loc)
+parseLongEscape (LexedChar c _) s = listToMaybe $ mapMaybe tryParse longEscapeCodes
+ where
+ tryParse (prefix, c') = do
+ p0 : p <- pure prefix
+ guard (p0 == c) -- see if the first character matches
+ s' <- parsePrefix p s -- see if the rest of the prefix matches
+ pure (c', s')
+
+ parsePrefix (p : ps) (LexedChar t _ : ts) | p == t = parsePrefix ps ts
+ parsePrefix [] s' = Just s' -- we've matched the whole prefix, return the rest
+ parsePrefix _ _ = Nothing
+
+ longEscapeCodes =
+ [ ("NUL", '\NUL')
+ , ("SOH", '\SOH')
+ , ("STX", '\STX')
+ , ("ETX", '\ETX')
+ , ("EOT", '\EOT')
+ , ("ENQ", '\ENQ')
+ , ("ACK", '\ACK')
+ , ("BEL", '\BEL')
+ , ("BS", '\BS')
+ , ("HT", '\HT')
+ , ("LF", '\LF')
+ , ("VT", '\VT')
+ , ("FF", '\FF')
+ , ("CR", '\CR')
+ , ("SO", '\SO')
+ , ("SI", '\SI')
+ , ("DLE", '\DLE')
+ , ("DC1", '\DC1')
+ , ("DC2", '\DC2')
+ , ("DC3", '\DC3')
+ , ("DC4", '\DC4')
+ , ("NAK", '\NAK')
+ , ("SYN", '\SYN')
+ , ("ETB", '\ETB')
+ , ("CAN", '\CAN')
+ , ("EM", '\EM')
+ , ("SUB", '\SUB')
+ , ("ESC", '\ESC')
+ , ("FS", '\FS')
+ , ("GS", '\GS')
+ , ("RS", '\RS')
+ , ("US", '\US')
+ , ("SP", '\SP')
+ , ("DEL", '\DEL')
+ ]
+
+-- | Error if string contains any tab characters.
+--
+-- Normal strings don't lex tab characters in the first place, but we
+-- have to allow them in multiline strings for leading indentation. So
+-- we allow them in the initial lexing pass, then check for any remaining
+-- tabs after replacing leading tabs in resolveMultilineString.
+checkInnerTabs :: StringProcessor loc
+checkInnerTabs s = do
+ forM_ s $ \(LexedChar c loc) ->
+ when (c == '\t') $ Left $ StringLexError c loc LexStringCharLit
+ pure s
+
+-- -----------------------------------------------------------------------------
+-- Unicode Smart Quote detection (#21843)
+
+isDoubleSmartQuote :: Char -> Bool
+isDoubleSmartQuote = \case
+ '“' -> True
+ '”' -> True
+ _ -> False
+
+isSingleSmartQuote :: Char -> Bool
+isSingleSmartQuote = \case
+ '‘' -> True
+ '’' -> True
+ _ -> False
+
+{-
+Note [Multiline string literals]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Multiline string literals were added following the acceptance of the
+proposal: https://github.com/ghc-proposals/ghc-proposals/pull/569
+
+Multiline string literals are syntax sugar for normal string literals,
+with an extra post processing step. This all happens in the Lexer; that
+is, HsMultilineString will contain the post-processed string. This matches
+the same behavior as HsString, which contains the normalized string
+(see Note [Literal source text]).
+
+The string is post-processed with the following steps:
+1. Collapse string gaps
+2. Split the string by newlines
+3. Convert leading tabs into spaces
+ * In each line, any tabs preceding non-whitespace characters are replaced with spaces up to the next tab stop
+4. Remove common whitespace prefix in every line (see below)
+5. If a line contains only whitespace, remove all of the whitespace
+6. Join the string back with `\n` delimiters
+7. If the first character of the string is a newline, remove it
+8. Interpret escaped characters
+
+The common whitespace prefix can be informally defined as "The longest
+prefix of whitespace shared by all lines in the string, excluding the
+first line and any whitespace-only lines".
+
+It's more precisely defined with the following algorithm:
+
+1. Take a list representing the lines in the string
+2. Ignore the following elements in the list:
+ * The first line (we want to ignore everything before the first newline)
+ * Empty lines
+ * Lines with only whitespace characters
+3. Calculate the longest prefix of whitespace shared by all lines in the remaining list
+-}
+
+-- | A lexed line, with the string and the location info of the ending newline
+-- character, if one exists
+data LexedLine loc = LexedLine !(LexedString loc) (Maybe loc)
+
+mapLine :: (LexedString loc -> LexedString loc) -> LexedLine loc -> LexedLine loc
+mapLine f (LexedLine line nl) = LexedLine (f line) nl
+
+mapLines :: (LexedString loc -> LexedString loc) -> [LexedLine loc] -> [LexedLine loc]
+mapLines f = map (mapLine f)
+
+filterLines :: (LexedString loc -> Bool) -> [LexedLine loc] -> [LexedLine loc]
+filterLines f = filter (\(LexedLine line _) -> f line)
+
+splitLines :: LexedString loc -> [LexedLine loc]
+splitLines =
+ foldr
+ ( curry $ \case
+ (LexedChar '\n' loc, ls) -> LexedLine [] (Just loc) : ls
+ (c, l : ls) -> mapLine (c :) l : ls
+ (c, []) -> LexedLine [c] Nothing : [] -- should not happen
+ )
+ [emptyLine]
+ where
+ emptyLine = LexedLine [] Nothing
+
+joinLines :: [LexedLine loc] -> LexedString loc
+joinLines = concatMap (\(LexedLine line nl) -> line ++ maybeToList (LexedChar '\n' <$> nl))
+
+-- | See Note [Multiline string literals]
+resolveMultilineString :: StringProcessor loc
+resolveMultilineString = pure . process
+ where
+ (.>) :: (a -> b) -> (b -> c) -> (a -> c)
+ (.>) = flip (.)
+
+ process =
+ splitLines
+ .> convertLeadingTabs
+ .> rmCommonWhitespacePrefix
+ .> stripOnlyWhitespace
+ .> joinLines
+ .> rmFirstNewline
+
+ convertLeadingTabs =
+ let convertLine col = \case
+ [] -> []
+ c@(LexedChar ' ' _) : cs -> c : convertLine (col + 1) cs
+ LexedChar '\t' loc : cs ->
+ let fill = 8 - (col `mod` 8)
+ in replicate fill (LexedChar ' ' loc) ++ convertLine (col + fill) cs
+ c : cs -> c : cs
+ in mapLines (convertLine 0)
+
+ rmCommonWhitespacePrefix = \case
+ [] -> []
+ -- exclude the first line from this calculation
+ firstLine : strLines ->
+ let excludeWsOnlyLines = filterLines (not . all isLexedSpace)
+ commonWSPrefix =
+ case NonEmpty.nonEmpty (excludeWsOnlyLines strLines) of
+ Nothing -> 0
+ Just strLines' ->
+ Foldable1.minimum $
+ flip NonEmpty.map strLines' $ \(LexedLine line _) ->
+ length $ takeWhile isLexedSpace line
+ in firstLine : mapLines (drop commonWSPrefix) strLines
+
+ stripOnlyWhitespace =
+ let stripWsOnlyLine line = if all isLexedSpace line then [] else line
+ in mapLines stripWsOnlyLine
+
+ rmFirstNewline = \case
+ LexedChar '\n' _ : s -> s
+ s -> s
+
+-- -----------------------------------------------------------------------------
+-- Helpers
+
+isLexedSpace :: LexedChar loc -> Bool
+isLexedSpace = isSpace . unLexedChar
=====================================
compiler/GHC/Rename/Expr.hs
=====================================
@@ -2,6 +2,7 @@
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
@@ -366,13 +367,18 @@ rnExpr (HsOverLabel _ src v)
hs_ty_arg = mkEmptyWildCardBndrs $ wrapGenSpan $
HsTyLit noExtField (HsStrTy NoSourceText v)
-rnExpr (HsLit x lit@(HsString src s))
+rnExpr (HsLit x lit) | Just (src, s) <- stringLike lit
= do { opt_OverloadedStrings <- xoptM LangExt.OverloadedStrings
; if opt_OverloadedStrings then
rnExpr (HsOverLit x (mkHsIsString src s))
else do {
; rnLit lit
; return (HsLit x (convertLit lit), emptyFVs) } }
+ where
+ stringLike = \case
+ HsString src s -> Just (src, s)
+ HsMultilineString src s -> Just (src, s)
+ _ -> Nothing
rnExpr (HsLit x lit)
= do { rnLit lit
=====================================
compiler/Language/Haskell/Syntax/Extension.hs
=====================================
@@ -565,6 +565,7 @@ type family XXApplicativeArg x
type family XHsChar x
type family XHsCharPrim x
type family XHsString x
+type family XHsMultilineString x
type family XHsStringPrim x
type family XHsInt x
type family XHsIntPrim x
=====================================
compiler/Language/Haskell/Syntax/Lit.hs
=====================================
@@ -54,6 +54,8 @@ data HsLit x
-- ^ Unboxed character
| HsString (XHsString x) {- SourceText -} FastString
-- ^ String
+ | HsMultilineString (XHsMultilineString x) {- SourceText -} FastString
+ -- ^ String
| HsStringPrim (XHsStringPrim x) {- SourceText -} !ByteString
-- ^ Packed bytes
| HsInt (XHsInt x) IntegralLit
=====================================
compiler/ghc.cabal.in
=====================================
@@ -613,6 +613,7 @@ Library
GHC.Parser.HaddockLex
GHC.Parser.PostProcess
GHC.Parser.PostProcess.Haddock
+ GHC.Parser.String
GHC.Parser.Types
GHC.Parser.Utils
GHC.Platform
=====================================
docs/users_guide/9.12.1-notes.rst
=====================================
@@ -0,0 +1,103 @@
+.. _release-9-12-1:
+
+Version 9.12.1
+==============
+
+Language
+~~~~~~~~
+
+- GHC Proposal `#569 <https://github.com/ghc-proposals/ghc-proposals/blob/master/proposals/0569-multiline-strings.rst>`_
+ "Multiline string literals" has been implemented.
+ The following code is now accepted by GHC::
+
+ {-# LANGUAGE MultilineStrings #-}
+
+ x :: String
+ x =
+ """
+ This is a
+ multiline
+
+ string
+
+ literal
+ """
+
+ This feature is guarded behind :extension:`MultilineStrings`.
+
+Compiler
+~~~~~~~~
+
+JavaScript backend
+~~~~~~~~~~~~~~~~~~
+
+WebAssembly backend
+~~~~~~~~~~~~~~~~~~~
+
+GHCi
+~~~~
+
+Runtime system
+~~~~~~~~~~~~~~
+
+``base`` library
+~~~~~~~~~~~~~~~~
+
+``ghc-prim`` library
+~~~~~~~~~~~~~~~~~~~~
+
+``ghc`` library
+~~~~~~~~~~~~~~~
+
+``ghc-heap`` library
+~~~~~~~~~~~~~~~~~~~~
+
+``ghc-experimental`` library
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+``template-haskell`` library
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Included libraries
+~~~~~~~~~~~~~~~~~~
+
+The package database provided with this distribution also contains a number of
+packages other than GHC itself. See the changelogs provided with these packages
+for further change information.
+
+.. ghc-package-list::
+
+ libraries/array/array.cabal: Dependency of ``ghc`` library
+ libraries/base/base.cabal: Core library
+ libraries/binary/binary.cabal: Dependency of ``ghc`` library
+ libraries/bytestring/bytestring.cabal: Dependency of ``ghc`` library
+ libraries/Cabal/Cabal/Cabal.cabal: Dependency of ``ghc-pkg`` utility
+ libraries/Cabal/Cabal-syntax/Cabal-syntax.cabal: Dependency of ``ghc-pkg`` utility
+ libraries/containers/containers/containers.cabal: Dependency of ``ghc`` library
+ libraries/deepseq/deepseq.cabal: Dependency of ``ghc`` library
+ libraries/directory/directory.cabal: Dependency of ``ghc`` library
+ libraries/exceptions/exceptions.cabal: Dependency of ``ghc`` and ``haskeline`` library
+ libraries/filepath/filepath.cabal: Dependency of ``ghc`` library
+ compiler/ghc.cabal: The compiler itself
+ libraries/ghci/ghci.cabal: The REPL interface
+ libraries/ghc-boot/ghc-boot.cabal: Internal compiler library
+ libraries/ghc-boot-th/ghc-boot-th.cabal: Internal compiler library
+ libraries/ghc-compact/ghc-compact.cabal: Core library
+ libraries/ghc-heap/ghc-heap.cabal: GHC heap-walking library
+ libraries/ghc-prim/ghc-prim.cabal: Core library
+ libraries/haskeline/haskeline.cabal: Dependency of ``ghci`` executable
+ libraries/hpc/hpc.cabal: Dependency of ``hpc`` executable
+ libraries/integer-gmp/integer-gmp.cabal: Core library
+ libraries/mtl/mtl.cabal: Dependency of ``Cabal`` library
+ libraries/parsec/parsec.cabal: Dependency of ``Cabal`` library
+ libraries/pretty/pretty.cabal: Dependency of ``ghc`` library
+ libraries/process/process.cabal: Dependency of ``ghc`` library
+ libraries/stm/stm.cabal: Dependency of ``haskeline`` library
+ libraries/template-haskell/template-haskell.cabal: Core library
+ libraries/terminfo/terminfo.cabal: Dependency of ``haskeline`` library
+ libraries/text/text.cabal: Dependency of ``Cabal`` library
+ libraries/time/time.cabal: Dependency of ``ghc`` library
+ libraries/transformers/transformers.cabal: Dependency of ``ghc`` library
+ libraries/unix/unix.cabal: Dependency of ``ghc`` library
+ libraries/Win32/Win32.cabal: Dependency of ``ghc`` library
+ libraries/xhtml/xhtml.cabal: Dependency of ``haddock`` executable
=====================================
docs/users_guide/exts/literals.rst
=====================================
@@ -14,3 +14,4 @@ Literals
numeric_underscores
overloaded_strings
overloaded_labels
+ multiline_strings
=====================================
docs/users_guide/exts/multiline_strings.rst
=====================================
@@ -0,0 +1,86 @@
+.. _multiline-strings:
+
+Multiline string literals
+-------------------------
+
+.. extension:: MultilineStrings
+ :shortdesc: Enable multiline string literals.
+
+ :since: 9.12.1
+
+ Enable multiline string literals.
+
+With this extension, GHC now recognizes multiline string literals with ``"""`` delimiters. Indentation is automatically stripped, and gets desugared to normal string literals, so it works as expected for ``OverloadedStrings`` and any other functionality. The indentation that is stripped can be informally defined as "The longest prefix of whitespace shared by all lines in the string, excluding the first line and any whitespace-only lines".
+
+Normal string literals are lexed, then string gaps are collapsed, then escape characters are resolved. Multiline string literals add the following post-processing steps between collapsing string gaps and resolving escape characters:
+
+#. Split the string by newlines
+
+#. Replace leading tabs with spaces up to the next tab stop
+
+#. Remove common whitespace prefix in every line
+
+#. If a line only contains whitespace, remove all of the whitespace
+
+#. Join the string back with ``\n`` delimiters
+
+#. If the first character of the string is a newline, remove it
+
+Examples
+~~~~~~~~
+
+.. code-blocks use plain text because the Haskell syntax for pygments doesn't
+ support multiline strings yet. Remove if/when pygments adds multiline
+ strings to Haskell
+
++-----------------------+------------------------+---------------------------+
+| Expression | Output | Notes |
++=======================+========================+===========================+
+| .. code-block:: text | .. code-block:: | |
+| | | |
+| """ | "Line 1\n" | |
+| Line 1 | ++ "Line 2\n" | |
+| Line 2 | ++ "Line 3\n" | |
+| Line 3 | | |
+| """ | | |
++-----------------------+------------------------+---------------------------+
+| .. code-block:: text | .. code-block:: | |
+| | | Characters on the same |
+| """Test | "Test\n" | line as the delimiter are |
+| Line 1 | ++ "Line 1\n" | still included |
+| Line 2 | ++ "Line 2\n" | |
+| Line 3 | ++ "Line 3\n" | |
+| """ | | |
++-----------------------+------------------------+---------------------------+
+| .. code-block:: text | .. code-block:: | |
+| | | Omit the trailing newline |
+| """ | "Line 1\n" | with string gaps |
+| Line 1 | ++ "Line 2\n" | |
+| Line 2 | ++ "Line 3" | |
+| Line 3\ | | |
+| \""" | | |
++-----------------------+------------------------+---------------------------+
+| .. code-block:: text | .. code-block:: | |
+| | | Double quotes don't need |
+| """ | "\"Hello\"\n" | to be escaped unless |
+| "Hello" | ++ "\"\"\"\n" | they're triple quoted |
+| \"\"\" | ++ "\"\"\"\n" | |
+| \""" | | |
+| """ | | |
++-----------------------+------------------------+---------------------------+
+| .. code-block:: text | .. code-block:: | |
+| | | Only common indentation |
+| """ | "<div>\n" | is stripped |
+| <div> | ++ " <p>ABC</p>\n" | |
+| <p>ABC</p> | ++ "</div>\n" | |
+| </div> | | |
+| """ | | |
++-----------------------+------------------------+---------------------------+
+| .. code-block:: text | .. code-block:: | |
+| | | Use ``\&`` to keep |
+| """ | " Line 1\n" | leading indentation for |
+| \& Line 1 | ++ " Line 2\n" | each line |
+| \& Line 2 | ++ " Line 3\n" | |
+| \& Line 3 | | |
+| """ | | |
++-----------------------+------------------------+---------------------------+
=====================================
testsuite/tests/count-deps/CountDepsParser.stdout
=====================================
@@ -121,6 +121,7 @@ GHC.Parser.HaddockLex
GHC.Parser.Lexer
GHC.Parser.PostProcess
GHC.Parser.PostProcess.Haddock
+GHC.Parser.String
GHC.Parser.Types
GHC.Platform
GHC.Platform.Constants
=====================================
testsuite/tests/parser/should_fail/MultilineStringsError.hs
=====================================
@@ -0,0 +1,9 @@
+{-# LANGUAGE MultilineStrings #-}
+
+-- Test that the error message containing multiline strings is well-formatted.
+x :: Int
+x =
+ """
+ this is
+ a test
+ """
=====================================
testsuite/tests/parser/should_fail/MultilineStringsError.stderr
=====================================
@@ -0,0 +1,15 @@
+
+MultilineStringsError.hs:6:3: [GHC-83865]
+ Couldn't match type ‘[Char]’ with ‘Int’
+ Expected: Int
+ Actual: String
+ In the expression:
+ """
+ this is
+ a test
+ """
+ In an equation for ‘x’:
+ x = """
+ this is
+ a test
+ """
=====================================
testsuite/tests/parser/should_fail/MultilineStringsInnerTab.hs
=====================================
@@ -0,0 +1,9 @@
+{-# LANGUAGE MultilineStrings #-}
+
+-- Test that multiline strings disallow tabs in the middle
+-- of the string, like normal strings
+x :: String
+x =
+ """
+ ab sadf
+ """
=====================================
testsuite/tests/parser/should_fail/MultilineStringsInnerTab.stderr
=====================================
@@ -0,0 +1,3 @@
+
+MultilineStringsInnerTab.hs:8:5: error: [GHC-21231]
+ lexical error in string/character literal at character '\t'
=====================================
testsuite/tests/parser/should_fail/MultilineStringsSmartQuotes.hs
=====================================
@@ -0,0 +1,9 @@
+{-# LANGUAGE MultilineStrings #-}
+
+-- Test that multiline strings disallow smart quotes and show
+-- a helpful error message, like normal strings
+x :: String
+x =
+ """
+ a
+ ”””
=====================================
testsuite/tests/parser/should_fail/MultilineStringsSmartQuotes.stderr
=====================================
@@ -0,0 +1,6 @@
+
+MultilineStringsSmartQuotes.hs:9:3: [GHC-31623]
+ Unicode character '”' ('/8221') looks like '"' (Quotation Mark), but it is not
+
+MultilineStringsSmartQuotes.hs:10:1: [GHC-21231]
+ lexical error in string/character literal at end of input
=====================================
testsuite/tests/parser/should_fail/all.T
=====================================
@@ -221,3 +221,6 @@ test('T20609b', normal, compile_fail, [''])
test('T20609c', normal, compile_fail, [''])
test('T20609d', normal, compile_fail, [''])
test('SuffixAtFail', normal, compile_fail, ['-fdiagnostics-show-caret'])
+test('MultilineStringsError', [normalise_whitespace_fun(lambda s: s)], compile_fail, [''])
+test('MultilineStringsSmartQuotes', normal, compile_fail, [''])
+test('MultilineStringsInnerTab', normal, compile_fail, [''])
=====================================
testsuite/tests/parser/should_run/MultilineStrings.hs
=====================================
@@ -0,0 +1,214 @@
+{-# LANGUAGE MultilineStrings #-}
+{-# OPTIONS_GHC -Wno-tabs #-}
+
+import Text.Printf (printf)
+
+{-
+Test the MultilineStrings proposal
+https://github.com/ghc-proposals/ghc-proposals/blob/master/proposals/0569-multiline-strings.rst
+-}
+
+main :: IO ()
+main = do
+ putStrLn "-- 1"
+ prints example_1
+ putStrLn "\n-- 2"
+ prints example_2a
+ prints example_2b
+ prints example_2c
+ putStrLn "\n-- 3"
+ prints example_3
+ putStrLn "\n-- 4"
+ prints example_4
+ putStrLn "\n-- 5"
+ prints example_5
+ putStrLn "\n-- 6"
+ prints example_6a
+ prints example_6b
+ putStrLn "\n-- 7"
+ prints example_7a
+ prints example_7b_1
+ prints example_7b_2
+ putStrLn "\n-- 8"
+ prints example_8
+ putStrLn "\n-- 9"
+ prints example_9
+ putStrLn "\n-- 10"
+ prints example_10a
+ prints example_10b
+ putStrLn "\n-- 11"
+ prints example_11
+
+ putStrLn "\n-- extra"
+ prints """"""
+ prints
+ """
+ """
+ prints
+ """
+ a"""
+ prints
+ """a
+ """
+ prints
+ """
+ \n
+ """
+ prints
+ """
+ \\n
+ """
+ prints
+ """
+ a
+
+ b
+ """
+ where
+ prints :: String -> IO ()
+ prints = print
+
+example_1 =
+ """
+ abc
+
+ def
+
+ ghi
+ \njkl
+ """
+
+example_2a =
+ """Line 1
+ Line 2
+ Line 3
+ """
+
+example_2b =
+ """\
+ \Line 1
+ Line 2
+ Line 3
+ """
+
+example_2c = """hello world"""
+
+example_3 =
+ """
+ a b\
+ \ c d e
+ f g
+ """
+
+example_4 =
+ """
+ a
+ b
+ c
+ """
+
+example_5 =
+ """
+
+ a
+ b
+ c
+ """
+
+example_6a =
+ """
+ a
+ b
+ c"""
+
+example_6b =
+ """
+ a
+ b
+ c\
+ \"""
+
+example_7a =
+ """
+ a
+ b
+ c
+ """
+
+example_7b_1 =
+ """
+ \& a
+ b
+ c
+ """
+
+example_7b_2 =
+ """
+ \& a
+ \& b
+ \& c
+ """
+
+example_8 =
+ """
+ This is a literal multiline string:
+ \"\"\"
+ Hello
+ world!
+ \"""
+ """
+
+example_9 =
+ """
+ name\tage
+ Alice\t20
+ Bob\t30
+ \t40
+ """
+
+example_10a =
+ """
+ \\v -> case v of
+ Aeson.Null -> pure PrintStyleInherit
+ Aeson.String "" -> pure PrintStyleInherit
+ _ -> PrintStyleOverride <$> Aeson.parseJSON v
+ """
+
+example_10b =
+ """
+ \\s -> case s of
+ "" -> pure PrintStyleInherit
+ _ -> PrintStyleOverride <$> parsePrinterOptType s
+ """
+
+example_11 =
+ printf
+ """
+ instance Aeson.FromJSON %s where
+ parseJSON =
+ Aeson.withText "%s" $ \\s ->
+ either Aeson.parseFail pure $
+ parsePrinterOptType (Text.unpack s)
+
+ instance PrinterOptsFieldType %s where
+ parsePrinterOptType s =
+ case s of
+ %s
+ _ ->
+ Left . unlines $
+ [ "unknown value: " <> show s
+ , "Valid values are: %s"
+ ]
+ """
+ fieldTypeName
+ fieldTypeName
+ fieldTypeName
+ ( unlines
+ [ printf " \"%s\" -> Right %s" val con
+ | (con, val) <- enumOptions
+ ]
+ )
+ (unwords $ map snd enumOptions)
+ where
+ fieldTypeName = "MyEnum"
+ enumOptions = [("Foo", "foo"), ("BarBaz", "bar-baz")]
=====================================
testsuite/tests/parser/should_run/MultilineStrings.stdout
=====================================
@@ -0,0 +1,47 @@
+-- 1
+" abc\n\n def\n\nghi\n \njkl\n"
+
+-- 2
+"Line 1\n Line 2\nLine 3\n"
+"Line 1\n Line 2\nLine 3\n"
+"hello world"
+
+-- 3
+"a b c d e\nf g\n"
+
+-- 4
+"a\nb\nc\n"
+
+-- 5
+"\na\nb\nc\n"
+
+-- 6
+"a\nb\nc"
+"a\nb\nc"
+
+-- 7
+"a\nb\nc\n"
+" a\n b\n c\n"
+" a\n b\n c\n"
+
+-- 8
+"This is a literal multiline string:\n\"\"\"\nHello\n world!\n\"\"\"\n"
+
+-- 9
+" name\tage\n Alice\t20\n Bob\t30\n\t40\n"
+
+-- 10
+"\\v -> case v of\n Aeson.Null -> pure PrintStyleInherit\n Aeson.String \"\" -> pure PrintStyleInherit\n _ -> PrintStyleOverride <$> Aeson.parseJSON v\n"
+"\\s -> case s of\n \"\" -> pure PrintStyleInherit\n _ -> PrintStyleOverride <$> parsePrinterOptType s\n"
+
+-- 11
+"instance Aeson.FromJSON MyEnum where\n parseJSON =\n Aeson.withText \"MyEnum\" $ \\s ->\n either Aeson.parseFail pure $\n parsePrinterOptType (Text.unpack s)\n\ninstance PrinterOptsFieldType MyEnum where\n parsePrinterOptType s =\n case s of\n \"foo\" -> Right Foo\n \"bar-baz\" -> Right BarBaz\n\n _ ->\n Left . unlines $\n [ \"unknown value: \" <> show s\n , \"Valid values are: foo bar-baz\"\n ]\n"
+
+-- extra
+""
+""
+"a"
+"a\n"
+"\n\n"
+"\\n\n"
+"a\n\n b\n"
\ No newline at end of file
=====================================
testsuite/tests/parser/should_run/MultilineStringsOverloaded.hs
=====================================
@@ -0,0 +1,22 @@
+{-# LANGUAGE MultilineStrings #-}
+{-# LANGUAGE OverloadedStrings #-}
+
+import Data.String (IsString (..))
+import Data.Text (Text)
+
+newtype Lines s = Lines [s]
+ deriving (Show)
+
+instance IsString s => IsString (Lines s) where
+ fromString = Lines . map fromString . lines
+
+lines0 :: Lines Text
+lines0 =
+ """
+ this is
+ a test
+ with multiple lines
+ """
+
+main :: IO ()
+main = print lines0
=====================================
testsuite/tests/parser/should_run/MultilineStringsOverloaded.stdout
=====================================
@@ -0,0 +1 @@
+Lines ["this is","a test","with multiple lines"]
=====================================
testsuite/tests/parser/should_run/all.T
=====================================
@@ -20,3 +20,5 @@ test('RecordDotSyntax2', normal, compile_and_run, [''])
test('RecordDotSyntax3', [extra_files(['RecordDotSyntaxA.hs'])], multimod_compile_and_run, ['RecordDotSyntax3', ''])
test('RecordDotSyntax4', [extra_files(['RecordDotSyntaxA.hs'])], multimod_compile_and_run, ['RecordDotSyntax4', ''])
test('RecordDotSyntax5', normal, compile_and_run, [''])
+test('MultilineStrings', normal, compile_and_run, [''])
+test('MultilineStringsOverloaded', normal, compile_and_run, [''])
=====================================
utils/check-exact/ExactPrint.hs
=====================================
@@ -4773,6 +4773,7 @@ hsLit2String lit =
HsChar src v -> toSourceTextWithSuffix src v ""
HsCharPrim src p -> toSourceTextWithSuffix src p ""
HsString src v -> toSourceTextWithSuffix src v ""
+ HsMultilineString src v -> toSourceTextWithSuffix src v ""
HsStringPrim src v -> toSourceTextWithSuffix src v ""
HsInt _ (IL src _ v) -> toSourceTextWithSuffix src v ""
HsIntPrim src v -> toSourceTextWithSuffix src v ""
=====================================
utils/haddock
=====================================
@@ -1 +1 @@
-Subproject commit 9fcf5cf499102baf9e00986bb8b54b80ec5ffc81
+Subproject commit 980facc88c8f321dce624945502402ad502093b8
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/7ca74556a90108a3891c3d0fd854291d5e04f202...e0fce55bf025603417c7c8c875ac9bc137c3629b
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/7ca74556a90108a3891c3d0fd854291d5e04f202...e0fce55bf025603417c7c8c875ac9bc137c3629b
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240217/5ca2d7d6/attachment-0001.html>
More information about the ghc-commits
mailing list