[Git][ghc/ghc][wip/interpolated-strings] Implement interpolated strings

Brandon Chinn (@brandonchinn178) gitlab at gitlab.haskell.org
Sun Dec 22 06:35:08 UTC 2024



Brandon Chinn pushed to branch wip/interpolated-strings at Glasgow Haskell Compiler / GHC


Commits:
0af9f775 by Brandon Chinn at 2024-12-21T22:34:55-08:00
Implement interpolated strings

- - - - -


13 changed files:

- compiler/GHC/Hs/Expr.hs
- compiler/GHC/Hs/Instances.hs
- compiler/GHC/Hs/Lit.hs
- compiler/GHC/Hs/Syn/Type.hs
- compiler/GHC/HsToCore/Ticks.hs
- compiler/GHC/Parser.y
- compiler/GHC/Parser/Lexer.x
- compiler/GHC/Parser/String.hs
- compiler/GHC/Rename/Expr.hs
- compiler/GHC/Tc/Types/Origin.hs
- compiler/Language/Haskell/Syntax/Expr.hs
- compiler/Language/Haskell/Syntax/Extension.hs
- testsuite/tests/parser/should_run/all.T


Changes:

=====================================
compiler/GHC/Hs/Expr.hs
=====================================
@@ -254,6 +254,10 @@ type instance XIPVar         GhcRn = NoExtField
 type instance XIPVar         GhcTc = DataConCantHappen
 type instance XOverLitE      (GhcPass _) = NoExtField
 type instance XLitE          (GhcPass _) = NoExtField
+type instance XInterString   (GhcPass _) = NoExtField
+-- | Note: does not contain any delimiters
+type instance XInterStringRaw (GhcPass _) = SourceText
+type instance XInterStringExp (GhcPass _) = NoExtField
 type instance XLam           (GhcPass _) = EpAnnLam
 type instance XApp           (GhcPass _) = NoExtField
 
@@ -710,6 +714,24 @@ ppr_expr (HsOverLabel s l) = case ghcPass @p of
                           SourceText src -> ftext src
 ppr_expr (HsLit _ lit)       = ppr lit
 ppr_expr (HsOverLit _ lit)   = ppr lit
+
+ppr_expr (HsInterString _ strType parts) =
+  char 's' <> delim <> hcat (map pprInterPart parts) <> delim
+  where
+    pprInterPart = \case
+      HsInterStringRaw st s ->
+        case (strType, st) of
+          (HsStringTypeSingle, SourceText src) -> ftext src
+          (HsStringTypeSingle, NoSourceText) -> pprHsString' (unpackFS s)
+          (HsStringTypeMulti, SourceText src) -> vcat $ map text $ split '\n' (unpackFS src)
+          (HsStringTypeMulti, NoSourceText) -> pprHsStringMulti' (unpackFS s)
+      HsInterStringExpr _ expr -> text "${" <> ppr_lexpr expr <> text "}"
+
+    delim =
+      case strType of
+        HsStringTypeSingle -> char '"'
+        HsStringTypeMulti -> text "\"\"\""
+
 ppr_expr (HsPar _ e)         = parens (ppr_lexpr e)
 
 ppr_expr (HsPragE _ prag e) = sep [ppr prag, ppr_lexpr e]
@@ -1028,6 +1050,7 @@ hsExprNeedsParens prec = go
     go (HsOverLabel{})                = False
     go (HsLit _ l)                    = hsLitNeedsParens prec l
     go (HsOverLit _ ol)               = hsOverLitNeedsParens prec ol
+    go (HsInterString{})              = False
     go (HsPar{})                      = False
     go (HsApp{})                      = prec >= appPrec
     go (HsAppType {})                 = prec >= appPrec


=====================================
compiler/GHC/Hs/Instances.hs
=====================================
@@ -319,6 +319,11 @@ deriving instance Data (HsTupArg GhcPs)
 deriving instance Data (HsTupArg GhcRn)
 deriving instance Data (HsTupArg GhcTc)
 
+-- deriving instance (DataIdLR p p) => Data (HsInterStringPart p)
+deriving instance Data (HsInterStringPart GhcPs)
+deriving instance Data (HsInterStringPart GhcRn)
+deriving instance Data (HsInterStringPart GhcTc)
+
 -- deriving instance (DataIdLR p p) => Data (HsCmd p)
 deriving instance Data (HsCmd GhcPs)
 deriving instance Data (HsCmd GhcRn)


=====================================
compiler/GHC/Hs/Lit.hs
=====================================
@@ -48,6 +48,7 @@ import Language.Haskell.Syntax.Lit
 
 type instance XHsChar       (GhcPass _) = SourceText
 type instance XHsCharPrim   (GhcPass _) = SourceText
+-- | Note: contains quote delimiters
 type instance XHsString     (GhcPass _) = SourceText
 type instance XHsStringPrim (GhcPass _) = SourceText
 type instance XHsInt        (GhcPass _) = NoExtField


=====================================
compiler/GHC/Hs/Syn/Type.hs
=====================================
@@ -107,6 +107,7 @@ hsExprType (HsOverLabel v _) = dataConCantHappen v
 hsExprType (HsIPVar v _) = dataConCantHappen v
 hsExprType (HsOverLit _ lit) = overLitType lit
 hsExprType (HsLit _ lit) = hsLitType lit
+hsExprType (HsInterString _ _ _) = stringTy -- TODO: handle OverloadedStrings
 hsExprType (HsLam _ _ (MG { mg_ext = match_group })) = matchGroupTcType match_group
 hsExprType (HsApp _ f _) = funResultTy $ lhsExprType f
 hsExprType (HsAppType x f _) = piResultTy (lhsExprType f) x


=====================================
compiler/GHC/HsToCore/Ticks.hs
=====================================
@@ -1,4 +1,5 @@
 {-# LANGUAGE DeriveFunctor            #-}
+{-# LANGUAGE LambdaCase               #-}
 {-# LANGUAGE NondecreasingIndentation #-}
 {-# LANGUAGE TypeFamilies             #-}
 
@@ -476,6 +477,11 @@ addTickHsExpr e@(HsIPVar {})            = return e
 addTickHsExpr e@(HsOverLit {})          = return e
 addTickHsExpr e@(HsOverLabel{})         = return e
 addTickHsExpr e@(HsLit {})              = return e
+addTickHsExpr (HsInterString x ty parts) = do
+  parts' <- forM parts $ \case
+    part@(HsInterStringRaw {}) -> return part
+    HsInterStringExpr x e -> HsInterStringExpr x <$> addTickLHsExpr e
+  return $ HsInterString x ty parts'
 addTickHsExpr e@(HsEmbTy {})            = return e
 addTickHsExpr e@(HsQual {})             = return e
 addTickHsExpr e@(HsForAll {})           = return e


=====================================
compiler/GHC/Parser.y
=====================================
@@ -730,6 +730,13 @@ are the most common patterns, rewritten as regular expressions for clarity:
  CHAR           { L _ (ITchar   _ _) }
  STRING         { L _ (ITstring _ StringTypeSingle _) }
  STRING_MULTI   { L _ (ITstring _ StringTypeMulti _) }
+ STRING_INTER_BEGIN       { L _ (ITstringInterBegin StringTypeSingle) }
+ STRING_INTER_END         { L _ (ITstringInterEnd   StringTypeSingle) }
+ STRING_INTER_MULTI_BEGIN { L _ (ITstringInterBegin StringTypeMulti) }
+ STRING_INTER_MULTI_END   { L _ (ITstringInterEnd   StringTypeMulti) }
+ STRING_INTER_RAW         { L _ (ITstringInterRaw _ _) }
+ STRING_INTER_EXP_OPEN    { L _ ITstringInterExpOpen }
+ STRING_INTER_EXP_CLOSE   { L _ ITstringInterExpClose }
  INTEGER        { L _ (ITinteger _) }
  RATIONAL       { L _ (ITrational _) }
 
@@ -3114,6 +3121,7 @@ aexp2   :: { ECP }
 -- into HsOverLit when -XOverloadedStrings is on.
 --      | STRING    { sL (getLoc $1) (HsOverLit $! mkHsIsString (getSTRINGs $1)
 --                                       (getSTRING $1) noExtField) }
+        | stringInter                   { ecpFromExp $1 }
         | INTEGER   { ECP $ mkHsOverLitPV (sL1a $1 $ mkHsIntegral   (getINTEGER  $1)) }
         | RATIONAL  { ECP $ mkHsOverLitPV (sL1a $1 $ mkHsFractional (getRATIONAL $1)) }
 
@@ -3708,6 +3716,22 @@ ipvar   :: { Located HsIPName }
 overloaded_label :: { Located (SourceText, FastString) }
         : LABELVARID          { sL1 $1 (getLABELVARIDs $1, getLABELVARID $1) }
 
+-----------------------------------------------------------------------------
+-- Interpolated strings
+-- See Note [Interpolated strings] in GHC.Parser.String
+
+stringInter :: { LHsExpr GhcPs }
+        : STRING_INTER_BEGIN       stringInterParts STRING_INTER_END       { processStringInter StringTypeSingle $1 $2 $3 }
+        | STRING_INTER_MULTI_BEGIN stringInterParts STRING_INTER_MULTI_END { processStringInter StringTypeMulti  $1 $2 $3 }
+
+stringInterParts :: { [Either (SourceText, RawLexedString) (LHsExpr GhcPs)] }
+        : stringInterPart                  { [$1] }
+        | stringInterPart stringInterParts { $1 : $2 }
+
+stringInterPart :: { Either (SourceText, RawLexedString) (LHsExpr GhcPs) }
+        : STRING_INTER_RAW                                 { Left (getStringInterRaw $1) }
+        | STRING_INTER_EXP_OPEN exp STRING_INTER_EXP_CLOSE { Right $2 }
+
 -----------------------------------------------------------------------------
 -- Warnings and deprecations
 
@@ -4284,6 +4308,70 @@ getSCC lt = do let s = getSTRING lt
 stringLiteralToHsDocWst :: Located StringLiteral -> LocatedE (WithHsDocIdentifiers StringLiteral GhcPs)
 stringLiteralToHsDocWst  sl = reLoc $ lexStringLiteral parseIdentifier sl
 
+getStringInterRaw :: Located Token -> (SourceText, RawLexedString)
+getStringInterRaw (L _ (ITstringInterRaw src s)) = (src, s)
+
+processStringInter ::
+     StringType
+  -> Located Token
+  -> Located Token
+  -> [Either (SourceText, RawLexedString) (LHsExpr GhcPs)]
+  -> LHsExpr GhcPs
+processStringInter strType tokBegin tokEnd parts =
+  L (comb2 tokBegin tokEnd) $
+    HsInterString noExtField strType $ processParts parts
+  where
+    processParts =
+      map toInterStringPart $
+        case strType of
+          StringTypeSingle -> map (first (fmap fromRawLexedStringSingle))
+          StringTypeMulti  -> fromAlt . withoutSrcText fromRawLexedStringMulti . toAlt
+
+    toInterStringPart = \case
+      Left (src, s) -> HsInterStringRaw src (fsLit s)
+      Right e -> HsInterStringExpr noExtField e
+
+    -- Strip SourceText annotations, run the given function, and
+    -- reapply SourceText annotations. Assumes the function does
+    -- not change the order or number of elements, which is true
+    -- for fromRawLexedStringMulti.
+    withoutSrcText ::
+      ((s, [(x, s)]) -> (s, [(x, s)])) ->
+      ((SourceText, s), [(x, (SourceText, s))]) ->
+      ((SourceText, s), [(x, (SourceText, s))])
+    withoutSrcText f vals =
+      let
+        -- extract out (SourceText, [SourceText]) from the parts
+        unannotate ((src, s), parts) = ((src, map (fst . snd) parts), (s, map (fmap snd) parts))
+        -- reapply SourceTexts
+        reannotate ((src0, srcs), (s, parts)) =
+          ( (src0, s)
+          , zipWith (\src (x, s) -> (x, (src, s))) srcs parts
+          )
+      in
+        reannotate . f . unannotate
+
+    toAlt :: Monoid s => [Either s x] -> (s, [(x, s)])
+    toAlt =
+      let go = \case
+            [] -> (mempty, [])
+            Left s : [] -> (s, [])
+            Left s1 : Left s2 : rest -> go $ Left (s1 <> s2) : rest
+            Left s : Right x : rest ->
+              let (s', rest') = go rest
+               in (s, (x, s') : rest')
+            Right x : rest ->
+              let (s, rest') = go rest
+               in (mempty, (x, s) : rest')
+       in go
+
+    fromAlt :: Foldable s => (s, [(x, s)]) -> [Either s x]
+    fromAlt (s, xs) =
+      let notEmpty = \case
+            Left s -> null s
+            Right _ -> True
+      in filter notEmpty $ Left s : concatMap (\(x, s') -> [Right x, Left s']) xs
+
 -- Utilities for combining source spans
 comb2 :: (HasLoc a, HasLoc b) => a -> b -> SrcSpan
 comb2 !a !b = combineHasLocs a b


=====================================
compiler/GHC/Parser/Lexer.x
=====================================
@@ -170,7 +170,7 @@ $idchar    = [$small $large $digit $uniidchar \']
 
 $unigraphic = \x06 -- Trick Alex into handling Unicode. See Note [Unicode in Alex].
 $graphic   = [$small $large $symbol $digit $idchar $special $unigraphic \"\']
-$charesc   = [a b f n r t v \\ \" \' \&]
+$charesc   = [a b f n r t v \\ \" \' \& \$]
 
 $binit     = 0-1
 $octit     = 0-7
@@ -228,8 +228,9 @@ $docsym    = [\| \^ \* \$]
 -- character sets can be subtracted, not strings
 @escape     = \\ ( $charesc      | @ascii | @decimal | o @octal | x @hexadecimal )
 @escapechar = \\ ( $charesc # \& | @ascii | @decimal | o @octal | x @hexadecimal )
- at stringchar = ($graphic # [\\ \"]) | $space | @escape     | @gap
- at char       = ($graphic # [\\ \']) | $space | @escapechar
+ at stringchar = ($graphic # [\\ \"])         | $space | @escape     | @gap
+ at char       = ($graphic # [\\ \'])         | $space | @escapechar
+ at stringinterchar = ($graphic # [\\ \" \$]) | $space | @escape     | @gap
 
 -- normal signed numerical literals can only be explicitly negative,
 -- not explicitly positive (contrast @exponent)
@@ -629,6 +630,21 @@ $unigraphic / { isSmartQuote } { smart_quote_error }
   (\" | \"\") / ([\n .] # \") { tok_string_multi_content }
 }
 
+-- See Note [Interpolated strings]
+<0> {
+  s \" { \span _ _ _ -> pushLexState string_inter_content >> pure (L span (ITstringInterBegin StringTypeSingle)) }
+  -- TODO(bchinn): interpolated multiline strings
+}
+
+-- TODO(bchinn): add string_inter state to all <0> states that can be in an interpolated string
+<string_inter_content> {
+  @stringinterchar* { tok_string_inter_raw }
+  \$ \{             { \span _ _ _ -> pushLexState string_inter >> pure (L span ITstringInterExpOpen) }
+  \"                { \span _ _ _ -> popLexState >> pure (L span (ITstringInterEnd StringTypeSingle)) }
+
+  -- TODO(bchinn): check for smart quotes
+}
+
 <0> {
   \'\' { token ITtyQuote }
 
@@ -923,6 +939,13 @@ data Token
   | ITchar   SourceText Char                  -- Note [Literal source text] in "GHC.Types.SourceText"
   | ITstring SourceText StringType FastString -- Note [Literal source text] in "GHC.Types.SourceText"
 
+  -- See Note [Interpolated strings]
+  | ITstringInterBegin    StringType
+  | ITstringInterRaw      SourceText RawLexedString -- Note [Literal source text] in "GHC.Types.SourceText"
+  | ITstringInterExpOpen
+  | ITstringInterExpClose
+  | ITstringInterEnd      StringType
+
   | ITinteger  IntegralLit           -- Note [Literal source text] in "GHC.Types.SourceText"
   | ITrational FractionalLit
 
@@ -1672,8 +1695,11 @@ open_brace span _str _len _buf2 = do
   setContext (NoLayout:ctx)
   return (L span ITocurly)
 close_brace span _str _len _buf2 = do
-  popContext
-  return (L span ITccurly)
+  ctx <- getContext
+  sc <- getLexState
+  if null ctx && sc == string_inter
+    then popLexState >> pure (L span ITstringInterExpClose)
+    else popContext >> pure (L span ITccurly)
 
 qvarid, qconid :: StringBuffer -> Int -> Token
 qvarid buf len = ITqvarid $! splitQualName buf len False
@@ -2171,6 +2197,14 @@ tok_string span buf len _buf2 = do
     src = SourceText $ lexemeToFastString buf len
     endsInHash = currentChar (offsetBytes (len - 1) buf) == '#'
 
+tok_string_inter_raw :: Action
+tok_string_inter_raw span buf len _ = do
+  s <- either (throwStringLexError i0) pure $ lexStringRaw len buf
+  pure $ L span (ITstringInterRaw src s)
+  where
+    i0 = AI (psSpanStart span) buf
+    src = SourceText $ lexemeToFastString buf len
+
 -- | Ideally, we would define this completely with Alex syntax, like normal strings.
 -- Instead, this is defined as a hybrid solution by manually invoking lex states, which
 -- we're doing for two reasons:


=====================================
compiler/GHC/Parser/String.hs
=====================================
@@ -8,6 +8,10 @@ module GHC.Parser.String (
   StringType (..),
   lexString,
 
+  -- * Raw strings
+  RawLexedString,
+  lexStringRaw,
+
   -- * Unicode smart quote helpers
   isDoubleSmartQuote,
   isSingleSmartQuote,
@@ -17,8 +21,11 @@ import GHC.Prelude hiding (getChar)
 
 import Control.Arrow ((>>>))
 import Control.Monad (when)
+import Data.Bifunctor (first)
 import Data.Char (chr, ord)
 import qualified Data.Foldable1 as Foldable1
+import Data.Functor.Identity (Identity (..))
+import Data.List (unsnoc)
 import qualified Data.List.NonEmpty as NonEmpty
 import Data.Maybe (listToMaybe, mapMaybe)
 import GHC.Data.StringBuffer (StringBuffer)
@@ -259,6 +266,76 @@ isSingleSmartQuote = \case
   '’' -> True
   _ -> False
 
+-- -----------------------------------------------------------------------------
+-- Interpolated strings
+
+-- | A string that's been validated to be lexically correct, but still
+-- contains the raw string lexed, without anything resolved.
+newtype RawLexedString = RawLexedString {unRawLexedString :: String}
+  deriving (Foldable, Semigroup, Monoid)
+
+-- | Load and validate the string in the given StringBuffer.
+--
+-- e.g. Lexing "a\nb" will return RawLexedString ['a', '\\', 'n', 'b'].
+lexStringRaw :: Int -> StringBuffer -> Either StringLexError RawLexedString
+lexStringRaw len buf = RawLexedString (bufferChars len buf) <$ validateString len buf
+
+fromRawLexedStringSingle :: RawLexedString -> String
+fromRawLexedStringSingle (RawLexedString s) =
+  case processCharsSingle s of
+    Right s' -> s'
+    Left _ -> panic "Unexpectedly got an error when re-lexing the string"
+
+fromRawLexedStringMulti :: (RawLexedString, [(x, RawLexedString)]) -> (String, [(x, String)])
+fromRawLexedStringMulti s =
+  case processCharsMulti' (to s) of
+    Just s' -> from s'
+    Nothing -> panic "Unexpectedly got an error when re-lexing the string"
+  where
+    to (pre, parts) = InterMultiString pre parts
+    from (InterMultiString pre parts) = (pre, parts)
+
+{-
+Note [Interpolated strings]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Interpolated string syntax was accepted in this proposal:
+https://github.com/ghc-proposals/ghc-proposals/pull/570
+
+Interpolated strings are syntax sugar for <TODO(bchinn)>
+
+Interpolated strings are implemented in the following manner:
+
+1. Lexer takes the string as input:
+
+    s"Hello ${Text.toUpper name}!"
+
+  and outputs the following tokens:
+
+    [ ITstringInterBegin    src StringTypeSingle
+    , ITstringInterRaw      src "Hello "
+    , ITstringInterExpOpen  src
+    , ITqvarid                  ("Text.toUpper", "name")
+    , ITvarid                   "name"
+    , ITstringInterExpClose src
+    , ITstringInterRaw      src "!"
+    , ITstringInterEnd      src StringTypeSingle
+    ]
+
+2. The parser will then parse the tokens into the following HsExpr:
+
+    HsInterString ext
+      [ HsInterRaw ext "Hello "
+      , HsInterExp ext $
+          HsApp ext
+            (HsVar ext 'Text.toUpper)
+            (HsVar ext 'name)
+      , HsInterRaw ext "!"
+      ]
+
+3. This will then be desugared into <TODO(bchinn)>
+-}
+
 -- -----------------------------------------------------------------------------
 -- Multiline strings
 
@@ -268,20 +345,54 @@ isSingleSmartQuote = \case
 -- and rejoining lines, and instead manually find newline characters,
 -- for performance.
 processCharsMulti :: String -> Maybe String
-processCharsMulti =
-      collapseGaps             -- Step 1
-  >>> normalizeEOL
-  >>> expandLeadingTabs        -- Step 3
-  >>> rmCommonWhitespacePrefix -- Step 4
-  >>> collapseOnlyWsLines      -- Step 5
-  >>> rmFirstNewline           -- Step 7a
-  >>> rmLastNewline            -- Step 7b
-  >>> resolveEscapesMaybe      -- Step 8
+processCharsMulti = fmap from . processCharsMulti' . to
+  where
+    -- Convert a normal multiline string to/from an interpolated multiline string
+    -- with no interpolated expressions.
+    to s = InterMultiString s []
+    from = \case
+      InterMultiString s [] -> s
+      _ -> panic "Got unexpected result when processing characters in multiline string"
+
+-- | An interpolated, multiline string to be processed.
+--
+-- `x` here will only ever be instantiated as `HsExpr`, but we'll leave it general to ensure
+-- we never modify it, we only ever propagate it.
+--
+-- We represent this as a list of (x, String) tuples instead of [Either x String] to guarantee
+-- that we don't have to handle two raw Strings next to each other.
+data InterMultiString x =
+  InterMultiString
+    String        -- ^ beginning of the string before the first interpolated expr
+    [(x, String)] -- ^ (expr, raw string) interleaved groups
+
+-- Run the given function over all raw strings, ignoring expressions
+overRaw :: (String -> String) -> InterMultiString x -> InterMultiString x
+overRaw f = runIdentity . overRawM (Identity . f)
+
+overRawM :: Monad m => (String -> m String) -> InterMultiString x -> m (InterMultiString x)
+overRawM f (InterMultiString pre parts) = InterMultiString <$> f pre <*> (traverse . traverse) f parts
+
+-- | Process multiline characters generally, for both normal multiline strings and interpolated
+-- multiline strings.
+processCharsMulti' :: InterMultiString x -> Maybe (InterMultiString x)
+processCharsMulti' =
+      overRaw collapseGaps         -- Step 1
+  >>> overRaw normalizeEOL
+  >>> expandLeadingTabs            -- Step 3
+  >>> rmCommonWhitespacePrefix     -- Step 4
+  >>> collapseOnlyWsLines          -- Step 5
+  >>> rmFirstNewline               -- Step 7a
+  >>> rmLastNewline                -- Step 7b
+  >>> overRawM resolveEscapesMaybe -- Step 8
 
 -- | Expands all tabs blindly, since the lexer will verify that tabs can only appear
 -- as leading indentation
-expandLeadingTabs :: String -> String
-expandLeadingTabs = go 0
+expandLeadingTabs :: InterMultiString x -> InterMultiString x
+expandLeadingTabs =
+  -- we can expand each raw string part independently, because leading
+  -- indentation will never contain an interpolated expression
+  overRaw $ go 0
   where
     go !col = \case
       c@'\t' : cs ->
@@ -303,21 +414,27 @@ normalizeEOL = go
       c : cs -> c : go cs
       [] -> []
 
-rmCommonWhitespacePrefix :: String -> String
-rmCommonWhitespacePrefix cs0 = go cs0
+rmCommonWhitespacePrefix :: InterMultiString x -> InterMultiString x
+rmCommonWhitespacePrefix s0 =
+  -- Whitespace prefix, by definition, only comes after newline characters, and there can
+  -- never be an interpolated expr within a whitespace prefix (since the expr would end
+  -- the prefix). So we can use a plain `map` to just process the string parts, because
+  -- the "drop prefix" logic will never span over multiple parts.
+  map (first go) parts
   where
-    commonWSPrefix = getCommonWsPrefix cs0
+    -- treat interpolated exprs as a single, non-space character string
+    commonWSPrefix = getCommonWsPrefix $ case s0 of InterMultiString pre parts -> pre ++ concatMap snd parts
 
     go = \case
-      c@'\n' : cs -> c : go (dropLine commonWSPrefix cs)
+      c@'\n' : cs -> c : go (dropPrefix commonWSPrefix cs)
       c : cs -> c : go cs
       [] -> []
 
     -- drop x characters from the string, or up to a newline, whichever comes first
-    dropLine !x = \case
+    dropPrefix !x = \case
       cs | x <= 0 -> cs
       cs@('\n' : _) -> cs
-      _ : cs -> dropLine (x - 1) cs
+      _ : cs -> dropPrefix (x - 1) cs
       [] -> []
 
 -- | See step 4 in Note [Multiline string literals]
@@ -334,30 +451,42 @@ getCommonWsPrefix s =
       . drop 1                      -- ignore first line in calculation
       $ lines s
 
-collapseOnlyWsLines :: String -> String
-collapseOnlyWsLines = go
+collapseOnlyWsLines :: InterMultiString x -> InterMultiString x
+collapseOnlyWsLines (InterMultiString pre parts) =
+  let pre' = go (null parts) pre
+      parts' = [(expr, go isLast s) | ((expr, s), isLast) <- withIsLast parts]
+   in InterMultiString pre' parts'
   where
-    go = \case
-      c@'\n' : cs | Just cs' <- checkAllWs cs -> c : go cs'
+    go isLast = \case
+      c@'\n' : cs | Just cs' <- checkAllWs isLast cs -> c : go cs'
       c : cs -> c : go cs
       [] -> []
 
-    checkAllWs = \case
+    checkAllWs isLast = \case
       -- got all the way to a newline or the end of the string, return
       cs@('\n' : _) -> Just cs
-      cs@[] -> Just cs
+      cs@[] | isLast -> Just cs
       -- found whitespace, continue
       c : cs | is_space c -> checkAllWs cs
       -- anything else, stop
       _ -> Nothing
 
-rmFirstNewline :: String -> String
-rmFirstNewline = \case
-  '\n' : cs -> cs
-  cs -> cs
+    -- annotate every element with a Bool indicating if it's the last element
+    withIsLast :: [a] -> [(a, Bool)]
+    withIsLast xs = zip xs $ (False <$ init xs) ++ [True]
 
-rmLastNewline :: String -> String
-rmLastNewline = go
+rmFirstNewline :: InterMultiString x -> InterMultiString x
+rmFirstNewline = \case
+  InterMultiString ('\n' : pre) parts -> InterMultiString pre parts
+  s -> s
+
+rmLastNewline :: InterMultiString x -> InterMultiString x
+rmLastNewline (InterMultiString pre parts) =
+  case unsnoc parts of
+    Nothing ->
+      InterMultiString (go pre) parts
+    Just (parts0, (x, lastLine)) ->
+      InterMultiString pre (parts0 ++ [(x, go lastLine)])
   where
     go = \case
       [] -> []


=====================================
compiler/GHC/Rename/Expr.hs
=====================================
@@ -390,6 +390,9 @@ rnExpr (HsOverLit x lit)
                  return (HsApp noExtField (noLocA neg) (noLocA (HsOverLit x lit'))
                         , fvs ) }
 
+rnExpr (HsInterString x ty parts)
+  = undefined -- TODO(bchinn)
+
 rnExpr (HsApp x fun arg)
   = do { (fun',fvFun) <- rnLExpr fun
        ; (arg',fvArg) <- rnLExpr arg


=====================================
compiler/GHC/Tc/Types/Origin.hs
=====================================
@@ -537,6 +537,7 @@ data CtOrigin
 
   | LiteralOrigin (HsOverLit GhcRn)     -- Occurrence of a literal
   | NegateOrigin                        -- Occurrence of syntactic negation
+  | InterStringOrigin                   -- Occurrence of an interpolated string
 
   | ArithSeqOrigin (ArithSeqInfo GhcRn) -- [x..], [x..y] etc
   | AssocFamPatOrigin   -- When matching the patterns of an associated
@@ -723,6 +724,7 @@ exprCtOrigin (ExplicitList {})    = ListOrigin
 exprCtOrigin (HsIPVar _ ip)       = IPOccOrigin ip
 exprCtOrigin (HsOverLit _ lit)    = LiteralOrigin lit
 exprCtOrigin (HsLit {})           = Shouldn'tHappenOrigin "concrete literal"
+exprCtOrigin (HsInterString _ _ _) = InterStringOrigin
 exprCtOrigin (HsLam _ _ ms)       = matchesCtOrigin ms
 exprCtOrigin (HsApp _ e1 _)       = lexprCtOrigin e1
 exprCtOrigin (HsAppType _ e1 _)   = lexprCtOrigin e1
@@ -913,6 +915,7 @@ pprCtO PatSigOrigin          = text "a pattern type signature"
 pprCtO PatOrigin             = text "a pattern"
 pprCtO ViewPatOrigin         = text "a view pattern"
 pprCtO (LiteralOrigin lit)   = hsep [text "the literal", quotes (ppr lit)]
+pprCtO InterStringOrigin     = text "an interpolated string"
 pprCtO (ArithSeqOrigin seq)  = hsep [text "the arithmetic sequence", quotes (ppr seq)]
 pprCtO SectionOrigin         = text "an operator section"
 pprCtO (GetFieldOrigin f)    = hsep [text "selecting the field", quotes (ppr f)]


=====================================
compiler/Language/Haskell/Syntax/Expr.hs
=====================================
@@ -346,19 +346,24 @@ data HsExpr p
                              --   erroring expression will be written after
                              --   solving. See Note [Holes] in GHC.Tc.Types.Constraint.
 
-
-
   | HsOverLabel (XOverLabel p) FastString
      -- ^ Overloaded label (Note [Overloaded labels] in GHC.OverloadedLabels)
 
   | HsIPVar   (XIPVar p)
               HsIPName   -- ^ Implicit parameter (not in use after typechecking)
+
   | HsOverLit (XOverLitE p)
               (HsOverLit p)  -- ^ Overloaded literals
 
   | HsLit     (XLitE p)
               (HsLit p)      -- ^ Simple (non-overloaded) literals
 
+  | -- | See Note [Interpolated strings]
+    HsInterString
+      (XInterString p)
+      HsStringType
+      [HsInterStringPart p]
+
   -- | Lambda, Lambda-case, and Lambda-cases
   | HsLam     (XLam p)
               HsLamVariant -- ^ Tells whether this is for lambda, \case, or \cases
@@ -589,6 +594,10 @@ data HsLamVariant
   | LamCases   -- ^ `\cases psi -> ei`
   deriving (Data, Eq)
 
+data HsInterStringPart p
+  = HsInterStringRaw (XInterStringRaw p) FastString
+  | HsInterStringExpr (XInterStringExp p) (LHsExpr p)
+
 {-
 Note [Parens in HsSyn]
 ~~~~~~~~~~~~~~~~~~~~~~


=====================================
compiler/Language/Haskell/Syntax/Extension.hs
=====================================
@@ -410,6 +410,9 @@ type family XOverLabel      x
 type family XIPVar          x
 type family XOverLitE       x
 type family XLitE           x
+type family XInterString    x
+type family XInterStringRaw x
+type family XInterStringExp x
 type family XLam            x
 type family XLamCase        x
 type family XApp            x


=====================================
testsuite/tests/parser/should_run/all.T
=====================================
@@ -24,3 +24,5 @@ test('ListTuplePunsConstraints', extra_files(['ListTuplePunsConstraints.hs']), g
 test('MultilineStrings', normal, compile_and_run, [''])
 test('MultilineStringsOverloaded', normal, compile_and_run, [''])
 test('T25375', normal, compile_and_run, [''])
+# TODO(bchinn): interpolated strings
+# TODO(bchinn): interpolated multiline strings



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/0af9f7757c7c20638c61e626807aee5dc72dce94

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/0af9f7757c7c20638c61e626807aee5dc72dce94
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20241222/94f92d64/attachment-0001.html>


More information about the ghc-commits mailing list