[Git][ghc/ghc][wip/interpolated-strings] Implement interpolated strings

Brandon Chinn (@brandonchinn178) gitlab at gitlab.haskell.org
Sat Sep 21 19:18:31 UTC 2024



Brandon Chinn pushed to branch wip/interpolated-strings at Glasgow Haskell Compiler / GHC


Commits:
e0692156 by Brandon Chinn at 2024-09-21T12:18:23-07:00
Implement interpolated strings

- - - - -


8 changed files:

- compiler/GHC/Hs/Expr.hs
- compiler/GHC/Parser/Lexer.x
- compiler/GHC/Parser/String.hs
- compiler/Language/Haskell/Syntax/Expr.hs
- compiler/Language/Haskell/Syntax/Extension.hs
- libraries/array
- libraries/deepseq
- libraries/directory


Changes:

=====================================
compiler/GHC/Hs/Expr.hs
=====================================
@@ -242,6 +242,9 @@ type instance XIPVar         GhcRn = NoExtField
 type instance XIPVar         GhcTc = DataConCantHappen
 type instance XOverLitE      (GhcPass _) = NoExtField
 type instance XLitE          (GhcPass _) = NoExtField
+type instance XInterString   (GhcPass _) = NoExtField
+type instance XInterStringRaw (GhcPass _) = NoExtField
+type instance XInterStringExp (GhcPass _) = NoExtField
 type instance XLam           (GhcPass _) = [AddEpAnn]
 type instance XApp           (GhcPass _) = NoExtField
 


=====================================
compiler/GHC/Parser/Lexer.x
=====================================
@@ -167,7 +167,7 @@ $idchar    = [$small $large $digit $uniidchar \']
 
 $unigraphic = \x06 -- Trick Alex into handling Unicode. See Note [Unicode in Alex].
 $graphic   = [$small $large $symbol $digit $idchar $special $unigraphic \"\']
-$charesc   = [a b f n r t v \\ \" \' \&]
+$charesc   = [a b f n r t v \\ \" \' \& \$]
 
 $binit     = 0-1
 $octit     = 0-7
@@ -223,10 +223,11 @@ $docsym    = [\| \^ \* \$]
 -- N.B. ideally, we would do `@escape # \\ \&` instead of duplicating in @escapechar,
 -- which is what the Haskell Report says, but this isn't valid Alex syntax, as only
 -- character sets can be subtracted, not strings
- at escape     = \\ ( $charesc      | @ascii | @decimal | o @octal | x @hexadecimal )
- at escapechar = \\ ( $charesc # \& | @ascii | @decimal | o @octal | x @hexadecimal )
- at stringchar = ($graphic # [\\ \"]) | " " | @escape     | @gap
- at char       = ($graphic # [\\ \']) | " " | @escapechar
+ at escape          = \\ ( $charesc      | @ascii | @decimal | o @octal | x @hexadecimal )
+ at escapechar      = \\ ( $charesc # \& | @ascii | @decimal | o @octal | x @hexadecimal )
+ at stringchar      = ($graphic # [\\ \"])    | " " | @escape     | @gap
+ at char            = ($graphic # [\\ \'])    | " " | @escapechar
+ at stringinterchar = ($graphic # [\\ \" \$]) | " " | @escape     | @gap
 
 -- normal signed numerical literals can only be explicitly negative,
 -- not explicitly positive (contrast @exponent)
@@ -699,6 +700,25 @@ $unigraphic / { isSmartQuote } { smart_quote_error }
   (\" | \"\") / ([\n .] # \") { tok_string_multi_content }
 }
 
+-- See Note [Interpolated strings]
+<0> {
+  s \" { \span _ _ _ -> pushLexState string_inter_content >> pure (L span (ITstringInterBegin src StringTypeSingle)) }
+  -- TODO: interpolated multiline strings
+}
+
+<string_inter_content> {
+  @stringinterchar* { tok_string_inter_raw }
+  \$ \{             { \span _ _ _ -> pushLexState string_inter >> pure (L span (ITstringInterExpOpen src)) }
+  \"                { \span _ _ _ -> popLexState >> pure (L span (ITstringInterEnd src StringTypeSingle)) }
+
+  -- TODO: check for smart quotes
+}
+
+-- TODO: add string_inter state to all <0> states that can be in an interpolated string
+<string_inter> {
+  \} { \span _ _ _ -> popLexState >> pure (L span (ITstringInterExpClose src)) }
+}
+
 <0> {
   \'\' { token ITtyQuote }
 
@@ -993,6 +1013,13 @@ data Token
   | ITchar   SourceText Char                  -- Note [Literal source text] in "GHC.Types.SourceText"
   | ITstring SourceText FastString StringType -- Note [Literal source text] in "GHC.Types.SourceText"
 
+  -- See Note [Interpolated strings]
+  | ITstringInterBegin    SourceText StringType -- Note [Literal source text] in "GHC.Types.SourceText"
+  | ITstringInterRaw      SourceText FastString -- Note [Literal source text] in "GHC.Types.SourceText"
+  | ITstringInterExpOpen  SourceText            -- Note [Literal source text] in "GHC.Types.SourceText"
+  | ITstringInterExpClose SourceText            -- Note [Literal source text] in "GHC.Types.SourceText"
+  | ITstringInterEnd      SourceText StringType -- Note [Literal source text] in "GHC.Types.SourceText"
+
   | ITinteger  IntegralLit           -- Note [Literal source text] in "GHC.Types.SourceText"
   | ITrational FractionalLit
 


=====================================
compiler/GHC/Parser/String.hs
=====================================
@@ -378,6 +378,47 @@ It's more precisely defined with the following algorithm:
 3. Calculate the longest prefix of whitespace shared by all lines in the remaining list
 -}
 
+{-
+Note [Interpolated strings]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Interpolated string syntax was accepted in this proposal:
+https://github.com/ghc-proposals/ghc-proposals/pull/570
+
+Interpolated strings are syntax sugar for <TODO>
+
+Interpolated strings are implemented in the following manner:
+
+1. Lexer takes the string as input:
+
+    s"Hello ${Text.toUpper name}!"
+
+  and outputs the following tokens:
+
+    [ ITstringInterBegin    src StringTypeSingle
+    , ITstringInterRaw      src "Hello "
+    , ITstringInterExpOpen  src
+    , ITqvarid                  ("Text.toUpper", "name")
+    , ITvarid                   "name"
+    , ITstringInterExpClose src
+    , ITstringInterRaw      src "!"
+    , ITstringInterEnd      src StringTypeSingle
+    ]
+
+2. The parser will then parse the tokens into the following HsExpr:
+
+    HsInterString ext
+      [ HsInterRaw ext "Hello "
+      , HsInterExp ext $
+          HsApp ext
+            (HsVar ext 'Text.toUpper)
+            (HsVar ext 'name)
+      , HsInterRaw ext "!"
+      ]
+
+3. This will then be desugared into <TODO>
+-}
+
 -- -----------------------------------------------------------------------------
 -- DList
 


=====================================
compiler/Language/Haskell/Syntax/Expr.hs
=====================================
@@ -350,7 +350,6 @@ data HsExpr p
                              --   erroring expression will be written after
                              --   solving. See Note [Holes] in GHC.Tc.Types.Constraint.
 
-
   | HsRecSel  (XRecSel p)
               (FieldOcc p) -- ^ Variable pointing to record selector
                            -- See Note [Non-overloaded record field selectors] and
@@ -361,12 +360,18 @@ data HsExpr p
 
   | HsIPVar   (XIPVar p)
               HsIPName   -- ^ Implicit parameter (not in use after typechecking)
+
   | HsOverLit (XOverLitE p)
               (HsOverLit p)  -- ^ Overloaded literals
 
   | HsLit     (XLitE p)
               (HsLit p)      -- ^ Simple (non-overloaded) literals
 
+  | -- | See Note [Interpolated strings]
+    HsInterString
+      (XInterString p)
+      [Either FastString (HsExpr p)]
+
   -- | Lambda, Lambda-case, and Lambda-cases
   --
   -- - 'GHC.Parser.Annotation.AnnKeywordId' : 'GHC.Parser.Annotation.AnnLam',


=====================================
compiler/Language/Haskell/Syntax/Extension.hs
=====================================
@@ -413,6 +413,9 @@ type family XOverLabel      x
 type family XIPVar          x
 type family XOverLitE       x
 type family XLitE           x
+type family XInterString    x
+type family XInterStringRaw x
+type family XInterStringExp x
 type family XLam            x
 type family XLamCase        x
 type family XApp            x


=====================================
libraries/array
=====================================
@@ -1 +1 @@
-Subproject commit c9cb2c1e8762aa83b6e77af82c87a55e03e990e4
+Subproject commit ba5e9dcf1370190239395b8361b1c92ea9fc7632


=====================================
libraries/deepseq
=====================================
@@ -1 +1 @@
-Subproject commit 7ce6e2d3760b23336fd5f9a36f50df6571606947
+Subproject commit 09aed1bf774f2f05c8b390539ce35adf5cd68c30


=====================================
libraries/directory
=====================================
@@ -1 +1 @@
-Subproject commit 6045b93c4ef7a713c8f3d6837ca69f8e96b12bf1
+Subproject commit a97a8a8f30d652f972192122fd5f459a147c13e5



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/e069215631ede6dc7c4ee35fab30dbe049ee856d

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/e069215631ede6dc7c4ee35fab30dbe049ee856d
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240921/094251ec/attachment-0001.html>


More information about the ghc-commits mailing list