[Git][ghc/ghc][wip/interpolated-strings] 5 commits: Merge branch 'wip/strings' into HEAD
Brandon Chinn (@brandonchinn178)
gitlab at gitlab.haskell.org
Wed Sep 25 04:34:58 UTC 2024
Brandon Chinn pushed to branch wip/interpolated-strings at Glasgow Haskell Compiler / GHC
Commits:
0b20a4a8 by Brandon Chinn at 2024-09-24T21:04:45-07:00
Merge branch 'wip/strings' into HEAD
- - - - -
f46ed78e by Brandon Chinn at 2024-09-24T21:04:45-07:00
Unify ITstring + ITstringMulti
- - - - -
34df2e36 by Brandon Chinn at 2024-09-24T21:04:46-07:00
Unify HsString + HsMultilineString
- - - - -
5851b8bf by Brandon Chinn at 2024-09-24T21:07:12-07:00
Move multiline string processing functions to top-level
- - - - -
cc7d07b3 by Brandon Chinn at 2024-09-24T21:34:40-07:00
Implement interpolated strings
- - - - -
15 changed files:
- compiler/GHC/Hs/Expr.hs
- compiler/GHC/Hs/Lit.hs
- compiler/GHC/Hs/Syn/Type.hs
- compiler/GHC/HsToCore/Match/Literal.hs
- compiler/GHC/HsToCore/Quote.hs
- compiler/GHC/Parser.y
- compiler/GHC/Parser/Lexer.x
- compiler/GHC/Parser/String.hs
- compiler/GHC/Rename/Expr.hs
- compiler/GHC/Tc/Gen/HsType.hs
- compiler/Language/Haskell/Syntax/Expr.hs
- compiler/Language/Haskell/Syntax/Extension.hs
- compiler/Language/Haskell/Syntax/Lit.hs
- utils/check-exact/ExactPrint.hs
- utils/haddock/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs
Changes:
=====================================
compiler/GHC/Hs/Expr.hs
=====================================
@@ -242,6 +242,9 @@ type instance XIPVar GhcRn = NoExtField
type instance XIPVar GhcTc = DataConCantHappen
type instance XOverLitE (GhcPass _) = NoExtField
type instance XLitE (GhcPass _) = NoExtField
+type instance XInterString (GhcPass _) = NoExtField
+type instance XInterStringRaw (GhcPass _) = NoExtField
+type instance XInterStringExp (GhcPass _) = NoExtField
type instance XLam (GhcPass _) = [AddEpAnn]
type instance XApp (GhcPass _) = NoExtField
=====================================
compiler/GHC/Hs/Lit.hs
=====================================
@@ -49,7 +49,6 @@ import Language.Haskell.Syntax.Lit
type instance XHsChar (GhcPass _) = SourceText
type instance XHsCharPrim (GhcPass _) = SourceText
type instance XHsString (GhcPass _) = SourceText
-type instance XHsMultilineString (GhcPass _) = SourceText
type instance XHsStringPrim (GhcPass _) = SourceText
type instance XHsInt (GhcPass _) = NoExtField
type instance XHsIntPrim (GhcPass _) = SourceText
@@ -136,7 +135,6 @@ hsLitNeedsParens p = go
go (HsChar {}) = False
go (HsCharPrim {}) = False
go (HsString {}) = False
- go (HsMultilineString {}) = False
go (HsStringPrim {}) = False
go (HsInt _ x) = p > topPrec && il_neg x
go (HsInteger _ x _) = p > topPrec && x < 0
@@ -160,7 +158,6 @@ convertLit :: HsLit (GhcPass p1) -> HsLit (GhcPass p2)
convertLit (HsChar a x) = HsChar a x
convertLit (HsCharPrim a x) = HsCharPrim a x
convertLit (HsString a x) = HsString a x
-convertLit (HsMultilineString a x) = HsMultilineString a x
convertLit (HsStringPrim a x) = HsStringPrim a x
convertLit (HsInt a x) = HsInt a x
convertLit (HsIntPrim a x) = HsIntPrim a x
@@ -197,11 +194,11 @@ Equivalently it's True if
instance Outputable (HsLit (GhcPass p)) where
ppr (HsChar st c) = pprWithSourceText st (pprHsChar c)
ppr (HsCharPrim st c) = pprWithSourceText st (pprPrimChar c)
- ppr (HsString st s) = pprWithSourceText st (pprHsString s)
- ppr (HsMultilineString st s) =
- case st of
- NoSourceText -> pprHsString s
- SourceText src -> vcat $ map text $ split '\n' (unpackFS src)
+ ppr (HsString st ty s) =
+ case (ty, st) of
+ (HsStringTypeSingle, _) -> pprWithSourceText st (pprHsString s)
+ (HsStringTypeMulti, NoSourceText) -> pprHsString s
+ (HsStringTypeMulti, SourceText src) -> vcat $ map text $ split '\n' (unpackFS src)
ppr (HsStringPrim st s) = pprWithSourceText st (pprHsBytes s)
ppr (HsInt _ i)
= pprWithSourceText (il_text i) (integer (il_value i))
@@ -241,7 +238,6 @@ pmPprHsLit :: HsLit (GhcPass x) -> SDoc
pmPprHsLit (HsChar _ c) = pprHsChar c
pmPprHsLit (HsCharPrim _ c) = pprHsChar c
pmPprHsLit (HsString st s) = pprWithSourceText st (pprHsString s)
-pmPprHsLit (HsMultilineString st s) = pprWithSourceText st (pprHsString s)
pmPprHsLit (HsStringPrim _ s) = pprHsBytes s
pmPprHsLit (HsInt _ i) = integer (il_value i)
pmPprHsLit (HsIntPrim _ i) = integer i
=====================================
compiler/GHC/Hs/Syn/Type.hs
=====================================
@@ -75,8 +75,7 @@ hsPatType (SplicePat v _) = dataConCantHappen v
hsLitType :: HsLit (GhcPass p) -> Type
hsLitType (HsChar _ _) = charTy
hsLitType (HsCharPrim _ _) = charPrimTy
-hsLitType (HsString _ _) = stringTy
-hsLitType (HsMultilineString _ _) = stringTy
+hsLitType (HsString _ _ _) = stringTy
hsLitType (HsStringPrim _ _) = addrPrimTy
hsLitType (HsInt _ _) = intTy
hsLitType (HsIntPrim _ _) = intPrimTy
=====================================
compiler/GHC/HsToCore/Match/Literal.hs
=====================================
@@ -121,7 +121,6 @@ dsLit l = do
HsDoublePrim _ fl -> return (Lit (LitDouble (rationalFromFractionalLit fl)))
HsChar _ c -> return (mkCharExpr c)
HsString _ str -> mkStringExprFS str
- HsMultilineString _ str -> mkStringExprFS str
HsInteger _ i _ -> return (mkIntegerExpr platform i)
HsInt _ i -> return (mkIntExpr platform (il_value i))
HsRat _ fl ty -> dsFractionalLitToRational fl ty
@@ -475,7 +474,6 @@ getSimpleIntegralLit (HsInteger _ i ty) = Just (i, ty)
getSimpleIntegralLit HsChar{} = Nothing
getSimpleIntegralLit HsCharPrim{} = Nothing
getSimpleIntegralLit HsString{} = Nothing
-getSimpleIntegralLit HsMultilineString{} = Nothing
getSimpleIntegralLit HsStringPrim{} = Nothing
getSimpleIntegralLit HsRat{} = Nothing
getSimpleIntegralLit HsFloatPrim{} = Nothing
=====================================
compiler/GHC/HsToCore/Quote.hs
=====================================
@@ -3025,8 +3025,7 @@ repLiteral lit
HsDoublePrim _ _ -> Just doublePrimLName
HsChar _ _ -> Just charLName
HsCharPrim _ _ -> Just charPrimLName
- HsString _ _ -> Just stringLName
- HsMultilineString _ _ -> Just stringLName
+ HsString _ _ _ -> Just stringLName
HsRat _ _ _ -> Just rationalLName
_ -> Nothing
=====================================
compiler/GHC/Parser.y
=====================================
@@ -727,8 +727,13 @@ are the most common patterns, rewritten as regular expressions for clarity:
LABELVARID { L _ (ITlabelvarid _ _) }
CHAR { L _ (ITchar _ _) }
- STRING { L _ (ITstring _ _) }
- STRING_MULTI { L _ (ITstringMulti _ _) }
+ STRING { L _ (ITstring _ _ StringTypeSingle) }
+ STRING_MULTI { L _ (ITstring _ _ StringTypeMulti) }
+ STRING_INTER_BEGIN { L _ (ITstringInterBegin _) }
+ STRING_INTER_RAW { L _ (ITstringInterRaw _ _) }
+ STRING_INTER_EXP_OPEN { L _ ITstringInterExpOpen }
+ STRING_INTER_EXP_CLOSE { L _ ITstringInterExpClose }
+ STRING_INTER_END { L _ (ITstringInterEnd _) }
INTEGER { L _ (ITinteger _) }
RATIONAL { L _ (ITrational _) }
@@ -3080,6 +3085,8 @@ aexp2 :: { ECP }
-- into HsOverLit when -XOverloadedStrings is on.
-- | STRING { sL (getLoc $1) (HsOverLit $! mkHsIsString (getSTRINGs $1)
-- (getSTRING $1) noExtField) }
+ | stringInter {% fmap ecpFromExp
+ (ams1 $1 (HsInterString NoExtField $! unLoc $1)) }
| INTEGER { ECP $ mkHsOverLitPV (sL1a $1 $ mkHsIntegral (getINTEGER $1)) }
| RATIONAL { ECP $ mkHsOverLitPV (sL1a $1 $ mkHsFractional (getRATIONAL $1)) }
@@ -3677,6 +3684,14 @@ ipvar :: { Located HsIPName }
overloaded_label :: { Located (SourceText, FastString) }
: LABELVARID { sL1 $1 (getLABELVARIDs $1, getLABELVARID $1) }
+-----------------------------------------------------------------------------
+-- Interpolated strings
+-- See Note [Interpolated strings] in GHC.Parser.String
+
+stringInter :: { [Either FastString (LHsExpr GhcPs)] }
+ -- TODO(bchinn): break out recursive stringInterPart rule
+ : STRING_INTER_BEGIN (STRING_INTER_RAW | STRING_INTER_EXP_OPEN exp STRING_INTER_EXP_CLOSE)* STRING_INTER_END { undefined }
+
-----------------------------------------------------------------------------
-- Warnings and deprecations
@@ -4153,8 +4168,8 @@ getQCONSYM (L _ (ITqconsym x)) = x
getIPDUPVARID (L _ (ITdupipvarid x)) = x
getLABELVARID (L _ (ITlabelvarid _ x)) = x
getCHAR (L _ (ITchar _ x)) = x
-getSTRING (L _ (ITstring _ x)) = x
-getSTRINGMULTI (L _ (ITstringMulti _ x)) = x
+getSTRING (L _ (ITstring _ x StringTypeSingle)) = x
+getSTRINGMULTI (L _ (ITstring _ x StringTypeMulti)) = x
getINTEGER (L _ (ITinteger x)) = x
getRATIONAL (L _ (ITrational x)) = x
getPRIMCHAR (L _ (ITprimchar _ x)) = x
@@ -4179,8 +4194,8 @@ getVOCURLY (L (RealSrcSpan l _) ITvocurly) = srcSpanStartCol l
getINTEGERs (L _ (ITinteger (IL src _ _))) = src
getCHARs (L _ (ITchar src _)) = src
-getSTRINGs (L _ (ITstring src _)) = src
-getSTRINGMULTIs (L _ (ITstringMulti src _)) = src
+getSTRINGs (L _ (ITstring src _ StringTypeSingle)) = src
+getSTRINGMULTIs (L _ (ITstring src _ StringTypeMulti)) = src
getPRIMCHARs (L _ (ITprimchar src _)) = src
getPRIMSTRINGs (L _ (ITprimstring src _)) = src
getPRIMINTEGERs (L _ (ITprimint src _)) = src
=====================================
compiler/GHC/Parser/Lexer.x
=====================================
@@ -168,7 +168,7 @@ $idchar = [$small $large $digit $uniidchar \']
$unigraphic = \x06 -- Trick Alex into handling Unicode. See Note [Unicode in Alex].
$graphic = [$small $large $symbol $digit $idchar $special $unigraphic \"\']
-$charesc = [a b f n r t v \\ \" \' \&]
+$charesc = [a b f n r t v \\ \" \' \& \$]
$binit = 0-1
$octit = 0-7
@@ -226,8 +226,9 @@ $docsym = [\| \^ \* \$]
-- character sets can be subtracted, not strings
@escape = \\ ( $charesc | @ascii | @decimal | o @octal | x @hexadecimal )
@escapechar = \\ ( $charesc # \& | @ascii | @decimal | o @octal | x @hexadecimal )
- at stringchar = ($graphic # [\\ \"]) | $space | @escape | @gap
- at char = ($graphic # [\\ \']) | $space | @escapechar
+ at stringchar = ($graphic # [\\ \"]) | $space | @escape | @gap
+ at char = ($graphic # [\\ \']) | $space | @escapechar
+ at stringinterchar = ($graphic # [\\ \" \$]) | $space | @escape | @gap
-- normal signed numerical literals can only be explicitly negative,
-- not explicitly positive (contrast @exponent)
@@ -700,6 +701,21 @@ $unigraphic / { isSmartQuote } { smart_quote_error }
(\" | \"\") / ([\n .] # \") { tok_string_multi_content }
}
+-- See Note [Interpolated strings]
+<0> {
+ s \" { \span _ _ _ -> pushLexState string_inter_content >> pure (L span (ITstringInterBegin StringTypeSingle)) }
+ -- TODO(bchinn): interpolated multiline strings
+}
+
+-- TODO(bchinn): add string_inter state to all <0> states that can be in an interpolated string
+<string_inter_content> {
+ @stringinterchar* { tok_string_inter_raw }
+ \$ \{ { \span _ _ _ -> pushLexState string_inter >> pure (L span ITstringInterExpOpen) }
+ \" { \span _ _ _ -> popLexState >> pure (L span (ITstringInterEnd StringTypeSingle)) }
+
+ -- TODO(bchinn): check for smart quotes
+}
+
<0> {
\'\' { token ITtyQuote }
@@ -991,9 +1007,16 @@ data Token
-- have a string literal as a label
-- Note [Literal source text] in "GHC.Types.SourceText"
- | ITchar SourceText Char -- Note [Literal source text] in "GHC.Types.SourceText"
- | ITstring SourceText FastString -- Note [Literal source text] in "GHC.Types.SourceText"
- | ITstringMulti SourceText FastString -- Note [Literal source text] in "GHC.Types.SourceText"
+ | ITchar SourceText Char -- Note [Literal source text] in "GHC.Types.SourceText"
+ | ITstring SourceText FastString StringType -- Note [Literal source text] in "GHC.Types.SourceText"
+
+ -- See Note [Interpolated strings]
+ | ITstringInterBegin StringType
+ | ITstringInterRaw SourceText RawLexedString -- Note [Literal source text] in "GHC.Types.SourceText"
+ | ITstringInterExpOpen
+ | ITstringInterExpClose
+ | ITstringInterEnd StringType
+
| ITinteger IntegralLit -- Note [Literal source text] in "GHC.Types.SourceText"
| ITrational FractionalLit
@@ -1743,8 +1766,11 @@ open_brace span _str _len _buf2 = do
setContext (NoLayout:ctx)
return (L span ITocurly)
close_brace span _str _len _buf2 = do
- popContext
- return (L span ITccurly)
+ ctx <- getContext
+ sc <- getLexState
+ if null ctx && sc == string_inter
+ then popLexState >> pure (L span ITstringInterExpClose)
+ else popContext >> pure (L span ITccurly)
qvarid, qconid :: StringBuffer -> Int -> Token
qvarid buf len = ITqvarid $! splitQualName buf len False
@@ -2234,7 +2260,7 @@ tok_string span buf len _buf2 = do
addError err
pure $ L span (ITprimstring src (unsafeMkByteString s))
else
- pure $ L span (ITstring src (mkFastString s))
+ pure $ L span (ITstring src (mkFastString s) StringTypeSingle)
where
src = SourceText $ lexemeToFastString buf len
endsInHash = currentChar (offsetBytes (len - 1) buf) == '#'
@@ -2263,10 +2289,10 @@ tok_string_multi startSpan startBuf _len _buf2 = do
let contentLen = byteDiff contentStartBuf contentEndBuf
s <-
either (throwStringLexError (AI startLoc startBuf)) pure $
- lexMultilineString contentLen contentStartBuf
+ lexString StringTypeMulti contentLen contentStartBuf
setInput i'
- pure $ L span $ ITstringMulti src (mkFastString s)
+ pure $ L span $ ITstring src (mkFastString s) StringTypeMulti
where
goContent i0 =
case alexScan i0 string_multi_content of
@@ -2310,7 +2336,7 @@ tok_string_multi_content = panic "tok_string_multi_content unexpectedly invoked"
lex_chars :: (String, String) -> PsSpan -> StringBuffer -> Int -> P String
lex_chars (startDelim, endDelim) span buf len =
either (throwStringLexError i0) pure $
- lexString contentLen contentBuf
+ lexString StringTypeSingle contentLen contentBuf
where
i0@(AI _ contentBuf) = advanceInputBytes (length startDelim) $ AI (psSpanStart span) buf
=====================================
compiler/GHC/Parser/String.hs
=====================================
@@ -5,8 +5,12 @@
module GHC.Parser.String (
StringLexError (..),
+ StringType (..),
lexString,
- lexMultilineString,
+
+ -- * Raw strings
+ RawLexedString,
+ lexStringRaw,
-- * Unicode smart quote helpers
isDoubleSmartQuote,
@@ -17,6 +21,7 @@ import GHC.Prelude hiding (getChar)
import Control.Arrow ((>>>))
import Control.Monad (when)
+import Data.Bifunctor (first)
import Data.Char (chr, ord)
import qualified Data.Foldable1 as Foldable1
import qualified Data.List.NonEmpty as NonEmpty
@@ -37,13 +42,16 @@ import GHC.Utils.Panic (panic)
type BufPos = Int
data StringLexError = StringLexError LexErr BufPos
-lexString :: Int -> StringBuffer -> Either StringLexError String
-lexString = lexStringWith processChars processChars
+data StringType = StringTypeSingle | StringTypeMulti deriving (Show)
+
+lexString :: StringType -> Int -> StringBuffer -> Either StringLexError String
+lexString strType = lexStringWith processChars processChars
where
processChars :: HasChar c => [c] -> Either (c, LexErr) [c]
processChars =
- collapseGaps
- >>> resolveEscapes
+ case strType of
+ StringTypeSingle -> processCharsSingle
+ StringTypeMulti -> processCharsMulti
-- -----------------------------------------------------------------------------
-- Lexing interface
@@ -122,6 +130,11 @@ bufferLocatedChars initialBuf len = go initialBuf
-- -----------------------------------------------------------------------------
-- Lexing phases
+processCharsSingle :: HasChar c => [c] -> Either (c, LexErr) [c]
+processCharsSingle =
+ collapseGaps
+ >>> resolveEscapes
+
collapseGaps :: HasChar c => [c] -> [c]
collapseGaps = go
where
@@ -255,75 +268,71 @@ isSingleSmartQuote = \case
-- Assumes string is lexically valid. Skips the steps about splitting
-- and rejoining lines, and instead manually find newline characters,
-- for performance.
-lexMultilineString :: Int -> StringBuffer -> Either StringLexError String
-lexMultilineString = lexStringWith processChars processChars
+processCharsMulti :: HasChar c => [c] -> Either (c, LexErr) [c]
+processCharsMulti = fmap from . processCharsMulti' . to
where
- processChars :: HasChar c => [c] -> Either (c, LexErr) [c]
- processChars =
- collapseGaps -- Step 1
- >>> expandLeadingTabs -- Step 3
- >>> rmCommonWhitespacePrefix -- Step 4
- >>> collapseOnlyWsLines -- Step 5
- >>> rmFirstNewline -- Step 7a
- >>> rmLastNewline -- Step 7b
- >>> resolveEscapes -- Step 8
-
- -- expands all tabs, since the lexer will verify that tabs can only appear
- -- as leading indentation
- expandLeadingTabs :: HasChar c => [c] -> [c]
- expandLeadingTabs =
- let go !col = \case
- c@(Char '\t') : cs ->
- let fill = 8 - (col `mod` 8)
- in replicate fill (setChar ' ' c) ++ go (col + fill) cs
- c : cs -> c : go (if getChar c == '\n' then 0 else col + 1) cs
- [] -> []
- in go 0
+ -- Convert a normal multiline string to/from an interpolated multiline string
+ -- with no interpolated expressions.
+ to s = [Left s]
+ from = \case
+ [Left s] -> s
+ _ -> panic "Got unexpected result when processing characters in multiline string"
+
+-- | Process multiline characters generally, for both normal multiline strings and interpolated
+-- multiline strings.
+processCharsMulti' :: HasChar c => [Either [c] x] -> Either (c, LexErr) [Either [c] x]
+processCharsMulti' =
+ overRaw collapseGaps -- Step 1
+ >>> expandLeadingTabs -- Step 3
+ >>> rmCommonWhitespacePrefix -- Step 4
+ >>> collapseOnlyWsLines -- Step 5
+ >>> rmFirstNewline -- Step 7a
+ >>> rmLastNewline -- Step 7b
+ >>> overRaw resolveEscapes -- Step 8
+
+-- Run the given function over all raw strings, ignoring expressions
+overRaw :: (s -> s) -> [Either s x] -> [Either s x]
+overRaw f = map $ \case
+ Left s -> Left $ f s
+ Right x -> Right x
+
+-- | Expands all tabs blindly, since the lexer will verify that tabs can only appear
+-- as leading indentation
+expandLeadingTabs :: HasChar c => [Either [c] x] -> [Either [c] x]
+expandLeadingTabs =
+ -- we can expand each raw string part independently, because leading
+ -- indentation will never contain an interpolated expression
+ overRaw $ go 0
+ where
+ go !col = \case
+ c@(Char '\t') : cs ->
+ let fill = 8 - (col `mod` 8)
+ in replicate fill (setChar ' ' c) ++ go (col + fill) cs
+ c : cs -> c : go (if getChar c == '\n' then 0 else col + 1) cs
+ [] -> []
- rmCommonWhitespacePrefix :: HasChar c => [c] -> [c]
- rmCommonWhitespacePrefix cs0 =
- let commonWSPrefix = getCommonWsPrefix (map getChar cs0)
- go = \case
- c@(Char '\n') : cs -> c : go (dropLine commonWSPrefix cs)
- c : cs -> c : go cs
- [] -> []
- -- drop x characters from the string, or up to a newline, whichever
- -- comes first
- dropLine !x = \case
- cs | x <= 0 -> cs
- cs@(Char '\n' : _) -> cs
- _ : cs -> dropLine (x - 1) cs
- [] -> []
- in go cs0
-
- collapseOnlyWsLines :: HasChar c => [c] -> [c]
- collapseOnlyWsLines =
- let go = \case
- c@(Char '\n') : cs | Just cs' <- checkAllWs cs -> c : go cs'
- c : cs -> c : go cs
- [] -> []
- checkAllWs = \case
- -- got all the way to a newline or the end of the string, return
- cs@(Char '\n' : _) -> Just cs
- cs@[] -> Just cs
- -- found whitespace, continue
- Char c : cs | is_space c -> checkAllWs cs
- -- anything else, stop
- _ -> Nothing
- in go
-
- rmFirstNewline :: HasChar c => [c] -> [c]
- rmFirstNewline = \case
- Char '\n' : cs -> cs
- cs -> cs
-
- rmLastNewline :: HasChar c => [c] -> [c]
- rmLastNewline =
- let go = \case
- [] -> []
- [Char '\n'] -> []
- c : cs -> c : go cs
- in go
+rmCommonWhitespacePrefix :: HasChar c => [Either [c] x] -> [Either [c] x]
+rmCommonWhitespacePrefix parts =
+ -- Whitespace prefix, by definition, only comes after newline characters, and there can
+ -- never be an interpolated expr within a whitespace prefix (since the expr would end
+ -- the prefix). So we can use a plain `map` to just process the string parts, because
+ -- the "drop prefix" logic will never span over multiple parts. TODO(bchinn): what about two Strings next to each other?
+ map (first go) parts
+ where
+ -- treat interpolated exprs as a single, non-space character string
+ commonWSPrefix = getCommonWsPrefix $ concatMap (either (map getChar) (const "x")) parts
+
+ go = \case
+ c@(Char '\n') : cs -> c : go (dropPrefix commonWSPrefix cs)
+ c : cs -> c : go cs
+ [] -> []
+
+ -- drop x characters from the string, or up to a newline, whichever comes first
+ dropPrefix !x = \case
+ cs | x <= 0 -> cs
+ cs@(Char '\n' : _) -> cs
+ _ : cs -> dropPrefix (x - 1) cs
+ [] -> []
-- | See step 4 in Note [Multiline string literals]
--
@@ -339,6 +348,57 @@ getCommonWsPrefix s =
. drop 1 -- ignore first line in calculation
$ lines s
+-- TODO(bchinn): treat interpolated exprs as one character non-space string
+collapseOnlyWsLines :: HasChar c => [Either [c] x] -> [Either [c] x]
+collapseOnlyWsLines = goParts
+ where
+ goParts = \case
+ Right x : parts -> Right x : goParts parts
+ [Left s] -> _
+ Left s : _ -> _
+ [] -> []
+
+ go = \case
+ c@(Char '\n') : cs | Just cs' <- checkAllWs cs -> c : go cs'
+ c : cs -> c : go cs
+ [] -> []
+
+ -- Return (String, Bool, String), where:
+ -- * String - The line that was just parsed, ending with a newline character, unless
+ -- we reached the end of the string
+ -- * Bool - Whether the line that was just parsed was all whitespace characters
+ -- * String - The rest of the string
+ takeLine =
+ let go acc allWS = \case
+ -- reached a newline or the end of the string
+ c@(Char '\n') : cs -> (dlistToList (acc `dlistSnoc` c), allWS, cs)
+ [] -> (dlistToList acc, allWS, [])
+ -- found whitespace
+ c : cs | is_space (getChar c) -> go (acc `dlistSnoc` c) allWS
+ -- found non-whitespace
+ c : cs -> go (acc `dlistSnoc` c) False
+ in go dlistEmpty True
+
+rmFirstNewline :: HasChar c => [Either [c] x] -> [Either [c] x]
+rmFirstNewline = \case
+ Left (Char '\n' : cs) : parts -> Left cs : parts
+ parts -> parts
+
+rmLastNewline :: HasChar c => [Either [c] x] -> [Either [c] x]
+rmLastNewline = goParts
+ where
+ goParts = \case
+ [] -> []
+ [Left s] ->
+ case goLastLine s of
+ [] -> []
+ s' -> [Left s']
+ c : cs -> c : goParts cs
+ goLastLine = \case
+ [] -> []
+ [Char '\n'] -> []
+ c : cs -> c : goLastLine cs
+
{-
Note [Multiline string literals]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -348,8 +408,8 @@ proposal: https://github.com/ghc-proposals/ghc-proposals/pull/569
Multiline string literals are syntax sugar for normal string literals,
with an extra post processing step. This all happens in the Lexer; that
-is, HsMultilineString will contain the post-processed string. This matches
-the same behavior as HsString, which contains the normalized string
+is, HsString will contain the post-processed string. This matches the same
+behavior as single-line HsString, which contains the normalized string
(see Note [Literal source text]).
The canonical steps for post processing a multiline string are:
@@ -378,6 +438,76 @@ It's more precisely defined with the following algorithm:
3. Calculate the longest prefix of whitespace shared by all lines in the remaining list
-}
+-- -----------------------------------------------------------------------------
+-- Interpolated strings
+
+-- | A string that's been validated to be lexically correct, but still
+-- contains the raw string lexed, without anything resolved.
+newtype RawLexedString = RawLexedString {unRawLexedString :: String}
+
+-- | Load and validate the string in the given StringBuffer.
+--
+-- e.g. Lexing "a\nb" will return RawLexedString ['a', '\\', 'n', 'b'].
+lexStringRaw :: Int -> StringBuffer -> Either StringLexError RawLexedString
+lexStringRaw len buf = RawLexedString (bufferChars len buf) <$ lexString strType len buf
+ where
+ -- Always lex as a single-line string, because single-line and multi-line strings
+ -- have the same validation logic, so we can skip the multi-line processing steps
+ strType = StringTypeSingle
+
+fromRawLexedStringSingle :: RawLexedString -> String
+fromRawLexedStringSingle (RawLexedString s) =
+ case processCharsSingle s of
+ Right s' -> s'
+ Left _ -> panic "Unexpectedly got an error when re-lexing the string"
+
+fromRawLexedStringMulti :: [Either RawLexedString x] -> [Either String x]
+fromRawLexedStringMulti parts =
+ case processCharsMulti' $ map (first unRawLexedString) parts of
+ Right parts' -> parts'
+ Left _ -> panic "Unexpectedly got an error when re-lexing the string"
+
+{-
+Note [Interpolated strings]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Interpolated string syntax was accepted in this proposal:
+https://github.com/ghc-proposals/ghc-proposals/pull/570
+
+Interpolated strings are syntax sugar for <TODO(bchinn)>
+
+Interpolated strings are implemented in the following manner:
+
+1. Lexer takes the string as input:
+
+ s"Hello ${Text.toUpper name}!"
+
+ and outputs the following tokens:
+
+ [ ITstringInterBegin src StringTypeSingle
+ , ITstringInterRaw src "Hello "
+ , ITstringInterExpOpen src
+ , ITqvarid ("Text.toUpper", "name")
+ , ITvarid "name"
+ , ITstringInterExpClose src
+ , ITstringInterRaw src "!"
+ , ITstringInterEnd src StringTypeSingle
+ ]
+
+2. The parser will then parse the tokens into the following HsExpr:
+
+ HsInterString ext
+ [ HsInterRaw ext "Hello "
+ , HsInterExp ext $
+ HsApp ext
+ (HsVar ext 'Text.toUpper)
+ (HsVar ext 'name)
+ , HsInterRaw ext "!"
+ ]
+
+3. This will then be desugared into <TODO(bchinn)>
+-}
+
-- -----------------------------------------------------------------------------
-- DList
=====================================
compiler/GHC/Rename/Expr.hs
=====================================
@@ -377,8 +377,7 @@ rnExpr (HsLit x lit) | Just (src, s) <- stringLike lit
; return (HsLit x (convertLit lit), emptyFVs) } }
where
stringLike = \case
- HsString src s -> Just (src, s)
- HsMultilineString src s -> Just (src, s)
+ HsString src _ s -> Just (src, s)
_ -> Nothing
rnExpr (HsLit x lit)
@@ -393,6 +392,9 @@ rnExpr (HsOverLit x lit)
return (HsApp noExtField (noLocA neg) (noLocA (HsOverLit x lit'))
, fvs ) }
+rnExpr (HsInterString x ty parts)
+ = undefined -- TODO(bchinn)
+
rnExpr (HsApp x fun arg)
= do { (fun',fvFun) <- rnLExpr fun
; (arg',fvArg) <- rnLExpr arg
=====================================
compiler/GHC/Tc/Gen/HsType.hs
=====================================
@@ -4725,8 +4725,7 @@ addTyConFlavCtxt name flav
tyLitFromLit :: HsLit GhcRn -> Maybe (HsTyLit GhcRn)
-tyLitFromLit (HsString x str) = Just (HsStrTy x str)
-tyLitFromLit (HsMultilineString x str) = Just (HsStrTy x str)
+tyLitFromLit (HsString x _ str) = Just (HsStrTy x str)
tyLitFromLit (HsChar x char) = Just (HsCharTy x char)
tyLitFromLit _ = Nothing
=====================================
compiler/Language/Haskell/Syntax/Expr.hs
=====================================
@@ -350,7 +350,6 @@ data HsExpr p
-- erroring expression will be written after
-- solving. See Note [Holes] in GHC.Tc.Types.Constraint.
-
| HsRecSel (XRecSel p)
(FieldOcc p) -- ^ Variable pointing to record selector
-- See Note [Non-overloaded record field selectors] and
@@ -361,12 +360,19 @@ data HsExpr p
| HsIPVar (XIPVar p)
HsIPName -- ^ Implicit parameter (not in use after typechecking)
+
| HsOverLit (XOverLitE p)
(HsOverLit p) -- ^ Overloaded literals
| HsLit (XLitE p)
(HsLit p) -- ^ Simple (non-overloaded) literals
+ | -- | See Note [Interpolated strings]
+ HsInterString
+ (XInterString p)
+ HsStringType
+ [Either FastString (LHsExpr p)]
+
-- | Lambda, Lambda-case, and Lambda-cases
--
-- - 'GHC.Parser.Annotation.AnnKeywordId' : 'GHC.Parser.Annotation.AnnLam',
=====================================
compiler/Language/Haskell/Syntax/Extension.hs
=====================================
@@ -413,6 +413,9 @@ type family XOverLabel x
type family XIPVar x
type family XOverLitE x
type family XLitE x
+type family XInterString x
+type family XInterStringRaw x
+type family XInterStringExp x
type family XLam x
type family XLamCase x
type family XApp x
@@ -556,7 +559,6 @@ type family XXParStmtBlock x x'
type family XHsChar x
type family XHsCharPrim x
type family XHsString x
-type family XHsMultilineString x
type family XHsStringPrim x
type family XHsInt x
type family XHsIntPrim x
=====================================
compiler/Language/Haskell/Syntax/Lit.hs
=====================================
@@ -51,9 +51,7 @@ data HsLit x
-- ^ Character
| HsCharPrim (XHsCharPrim x) {- SourceText -} Char
-- ^ Unboxed character
- | HsString (XHsString x) {- SourceText -} FastString
- -- ^ String
- | HsMultilineString (XHsMultilineString x) {- SourceText -} FastString
+ | HsString (XHsString x) {- SourceText -} HsStringType FastString
-- ^ String
| HsStringPrim (XHsStringPrim x) {- SourceText -} !ByteString
-- ^ Packed bytes
@@ -145,3 +143,5 @@ instance Ord OverLitVal where
compare (HsIsString _ s1) (HsIsString _ s2) = s1 `lexicalCompareFS` s2
compare (HsIsString _ _) (HsIntegral _) = GT
compare (HsIsString _ _) (HsFractional _) = GT
+
+data HsStringType = HsStringTypeSingle | HsStringTypeMulti
=====================================
utils/check-exact/ExactPrint.hs
=====================================
@@ -4923,8 +4923,7 @@ hsLit2String lit =
case lit of
HsChar src v -> toSourceTextWithSuffix src v ""
HsCharPrim src p -> toSourceTextWithSuffix src p ""
- HsString src v -> toSourceTextWithSuffix src v ""
- HsMultilineString src v -> toSourceTextWithSuffix src v ""
+ HsString src _ v -> toSourceTextWithSuffix src v ""
HsStringPrim src v -> toSourceTextWithSuffix src v ""
HsInt _ (IL src _ v) -> toSourceTextWithSuffix src v ""
HsIntPrim src v -> toSourceTextWithSuffix src v ""
=====================================
utils/haddock/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs
=====================================
@@ -320,7 +320,6 @@ classify tok =
ITlabelvarid{} -> TkUnknown
ITchar{} -> TkChar
ITstring{} -> TkString
- ITstringMulti{} -> TkString
ITinteger{} -> TkNumber
ITrational{} -> TkNumber
ITprimchar{} -> TkChar
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/65d1c307e0dbf367ccc3555a1e6411b45c8efd09...cc7d07b3b8a0eb678af4eeaed2726c1cc8c79ab3
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/65d1c307e0dbf367ccc3555a1e6411b45c8efd09...cc7d07b3b8a0eb678af4eeaed2726c1cc8c79ab3
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240925/a6a41621/attachment-0001.html>
More information about the ghc-commits
mailing list