[Git][ghc/ghc][wip/interpolated-strings] Implement interpolated strings
Brandon Chinn (@brandonchinn178)
gitlab at gitlab.haskell.org
Sat Sep 21 19:18:31 UTC 2024
Brandon Chinn pushed to branch wip/interpolated-strings at Glasgow Haskell Compiler / GHC
Commits:
e0692156 by Brandon Chinn at 2024-09-21T12:18:23-07:00
Implement interpolated strings
- - - - -
8 changed files:
- compiler/GHC/Hs/Expr.hs
- compiler/GHC/Parser/Lexer.x
- compiler/GHC/Parser/String.hs
- compiler/Language/Haskell/Syntax/Expr.hs
- compiler/Language/Haskell/Syntax/Extension.hs
- libraries/array
- libraries/deepseq
- libraries/directory
Changes:
=====================================
compiler/GHC/Hs/Expr.hs
=====================================
@@ -242,6 +242,9 @@ type instance XIPVar GhcRn = NoExtField
type instance XIPVar GhcTc = DataConCantHappen
type instance XOverLitE (GhcPass _) = NoExtField
type instance XLitE (GhcPass _) = NoExtField
+type instance XInterString (GhcPass _) = NoExtField
+type instance XInterStringRaw (GhcPass _) = NoExtField
+type instance XInterStringExp (GhcPass _) = NoExtField
type instance XLam (GhcPass _) = [AddEpAnn]
type instance XApp (GhcPass _) = NoExtField
=====================================
compiler/GHC/Parser/Lexer.x
=====================================
@@ -167,7 +167,7 @@ $idchar = [$small $large $digit $uniidchar \']
$unigraphic = \x06 -- Trick Alex into handling Unicode. See Note [Unicode in Alex].
$graphic = [$small $large $symbol $digit $idchar $special $unigraphic \"\']
-$charesc = [a b f n r t v \\ \" \' \&]
+$charesc = [a b f n r t v \\ \" \' \& \$]
$binit = 0-1
$octit = 0-7
@@ -223,10 +223,11 @@ $docsym = [\| \^ \* \$]
-- N.B. ideally, we would do `@escape # \\ \&` instead of duplicating in @escapechar,
-- which is what the Haskell Report says, but this isn't valid Alex syntax, as only
-- character sets can be subtracted, not strings
- at escape = \\ ( $charesc | @ascii | @decimal | o @octal | x @hexadecimal )
- at escapechar = \\ ( $charesc # \& | @ascii | @decimal | o @octal | x @hexadecimal )
- at stringchar = ($graphic # [\\ \"]) | " " | @escape | @gap
- at char = ($graphic # [\\ \']) | " " | @escapechar
+ at escape = \\ ( $charesc | @ascii | @decimal | o @octal | x @hexadecimal )
+ at escapechar = \\ ( $charesc # \& | @ascii | @decimal | o @octal | x @hexadecimal )
+ at stringchar = ($graphic # [\\ \"]) | " " | @escape | @gap
+ at char = ($graphic # [\\ \']) | " " | @escapechar
+ at stringinterchar = ($graphic # [\\ \" \$]) | " " | @escape | @gap
-- normal signed numerical literals can only be explicitly negative,
-- not explicitly positive (contrast @exponent)
@@ -699,6 +700,25 @@ $unigraphic / { isSmartQuote } { smart_quote_error }
(\" | \"\") / ([\n .] # \") { tok_string_multi_content }
}
+-- See Note [Interpolated strings]
+<0> {
+ s \" { \span _ _ _ -> pushLexState string_inter_content >> pure (L span (ITstringInterBegin src StringTypeSingle)) }
+ -- TODO: interpolated multiline strings
+}
+
+<string_inter_content> {
+ @stringinterchar* { tok_string_inter_raw }
+ \$ \{ { \span _ _ _ -> pushLexState string_inter >> pure (L span (ITstringInterExpOpen src)) }
+ \" { \span _ _ _ -> popLexState >> pure (L span (ITstringInterEnd src StringTypeSingle)) }
+
+ -- TODO: check for smart quotes
+}
+
+-- TODO: add string_inter state to all <0> states that can be in an interpolated string
+<string_inter> {
+ \} { \span _ _ _ -> popLexState >> pure (L span (ITstringInterExpClose src)) }
+}
+
<0> {
\'\' { token ITtyQuote }
@@ -993,6 +1013,13 @@ data Token
| ITchar SourceText Char -- Note [Literal source text] in "GHC.Types.SourceText"
| ITstring SourceText FastString StringType -- Note [Literal source text] in "GHC.Types.SourceText"
+ -- See Note [Interpolated strings]
+ | ITstringInterBegin SourceText StringType -- Note [Literal source text] in "GHC.Types.SourceText"
+ | ITstringInterRaw SourceText FastString -- Note [Literal source text] in "GHC.Types.SourceText"
+ | ITstringInterExpOpen SourceText -- Note [Literal source text] in "GHC.Types.SourceText"
+ | ITstringInterExpClose SourceText -- Note [Literal source text] in "GHC.Types.SourceText"
+ | ITstringInterEnd SourceText StringType -- Note [Literal source text] in "GHC.Types.SourceText"
+
| ITinteger IntegralLit -- Note [Literal source text] in "GHC.Types.SourceText"
| ITrational FractionalLit
=====================================
compiler/GHC/Parser/String.hs
=====================================
@@ -378,6 +378,47 @@ It's more precisely defined with the following algorithm:
3. Calculate the longest prefix of whitespace shared by all lines in the remaining list
-}
+{-
+Note [Interpolated strings]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Interpolated string syntax was accepted in this proposal:
+https://github.com/ghc-proposals/ghc-proposals/pull/570
+
+Interpolated strings are syntax sugar for <TODO>
+
+Interpolated strings are implemented in the following manner:
+
+1. Lexer takes the string as input:
+
+ s"Hello ${Text.toUpper name}!"
+
+ and outputs the following tokens:
+
+ [ ITstringInterBegin src StringTypeSingle
+ , ITstringInterRaw src "Hello "
+ , ITstringInterExpOpen src
+ , ITqvarid ("Text.toUpper", "name")
+ , ITvarid "name"
+ , ITstringInterExpClose src
+ , ITstringInterRaw src "!"
+ , ITstringInterEnd src StringTypeSingle
+ ]
+
+2. The parser will then parse the tokens into the following HsExpr:
+
+ HsInterString ext
+ [ HsInterRaw ext "Hello "
+ , HsInterExp ext $
+ HsApp ext
+ (HsVar ext 'Text.toUpper)
+ (HsVar ext 'name)
+ , HsInterRaw ext "!"
+ ]
+
+3. This will then be desugared into <TODO>
+-}
+
-- -----------------------------------------------------------------------------
-- DList
=====================================
compiler/Language/Haskell/Syntax/Expr.hs
=====================================
@@ -350,7 +350,6 @@ data HsExpr p
-- erroring expression will be written after
-- solving. See Note [Holes] in GHC.Tc.Types.Constraint.
-
| HsRecSel (XRecSel p)
(FieldOcc p) -- ^ Variable pointing to record selector
-- See Note [Non-overloaded record field selectors] and
@@ -361,12 +360,18 @@ data HsExpr p
| HsIPVar (XIPVar p)
HsIPName -- ^ Implicit parameter (not in use after typechecking)
+
| HsOverLit (XOverLitE p)
(HsOverLit p) -- ^ Overloaded literals
| HsLit (XLitE p)
(HsLit p) -- ^ Simple (non-overloaded) literals
+ | -- | See Note [Interpolated strings]
+ HsInterString
+ (XInterString p)
+ [Either FastString (HsExpr p)]
+
-- | Lambda, Lambda-case, and Lambda-cases
--
-- - 'GHC.Parser.Annotation.AnnKeywordId' : 'GHC.Parser.Annotation.AnnLam',
=====================================
compiler/Language/Haskell/Syntax/Extension.hs
=====================================
@@ -413,6 +413,9 @@ type family XOverLabel x
type family XIPVar x
type family XOverLitE x
type family XLitE x
+type family XInterString x
+type family XInterStringRaw x
+type family XInterStringExp x
type family XLam x
type family XLamCase x
type family XApp x
=====================================
libraries/array
=====================================
@@ -1 +1 @@
-Subproject commit c9cb2c1e8762aa83b6e77af82c87a55e03e990e4
+Subproject commit ba5e9dcf1370190239395b8361b1c92ea9fc7632
=====================================
libraries/deepseq
=====================================
@@ -1 +1 @@
-Subproject commit 7ce6e2d3760b23336fd5f9a36f50df6571606947
+Subproject commit 09aed1bf774f2f05c8b390539ce35adf5cd68c30
=====================================
libraries/directory
=====================================
@@ -1 +1 @@
-Subproject commit 6045b93c4ef7a713c8f3d6837ca69f8e96b12bf1
+Subproject commit a97a8a8f30d652f972192122fd5f459a147c13e5
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/e069215631ede6dc7c4ee35fab30dbe049ee856d
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/e069215631ede6dc7c4ee35fab30dbe049ee856d
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240921/094251ec/attachment-0001.html>
More information about the ghc-commits
mailing list