[Git][ghc/ghc][wip/interpolated-strings] 5 commits: Merge branch 'wip/strings' into HEAD

Brandon Chinn (@brandonchinn178) gitlab at gitlab.haskell.org
Wed Sep 25 04:34:58 UTC 2024



Brandon Chinn pushed to branch wip/interpolated-strings at Glasgow Haskell Compiler / GHC


Commits:
0b20a4a8 by Brandon Chinn at 2024-09-24T21:04:45-07:00
Merge branch 'wip/strings' into HEAD

- - - - -
f46ed78e by Brandon Chinn at 2024-09-24T21:04:45-07:00
Unify ITstring + ITstringMulti

- - - - -
34df2e36 by Brandon Chinn at 2024-09-24T21:04:46-07:00
Unify HsString + HsMultilineString

- - - - -
5851b8bf by Brandon Chinn at 2024-09-24T21:07:12-07:00
Move multiline string processing functions to top-level

- - - - -
cc7d07b3 by Brandon Chinn at 2024-09-24T21:34:40-07:00
Implement interpolated strings

- - - - -


15 changed files:

- compiler/GHC/Hs/Expr.hs
- compiler/GHC/Hs/Lit.hs
- compiler/GHC/Hs/Syn/Type.hs
- compiler/GHC/HsToCore/Match/Literal.hs
- compiler/GHC/HsToCore/Quote.hs
- compiler/GHC/Parser.y
- compiler/GHC/Parser/Lexer.x
- compiler/GHC/Parser/String.hs
- compiler/GHC/Rename/Expr.hs
- compiler/GHC/Tc/Gen/HsType.hs
- compiler/Language/Haskell/Syntax/Expr.hs
- compiler/Language/Haskell/Syntax/Extension.hs
- compiler/Language/Haskell/Syntax/Lit.hs
- utils/check-exact/ExactPrint.hs
- utils/haddock/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs


Changes:

=====================================
compiler/GHC/Hs/Expr.hs
=====================================
@@ -242,6 +242,9 @@ type instance XIPVar         GhcRn = NoExtField
 type instance XIPVar         GhcTc = DataConCantHappen
 type instance XOverLitE      (GhcPass _) = NoExtField
 type instance XLitE          (GhcPass _) = NoExtField
+type instance XInterString   (GhcPass _) = NoExtField
+type instance XInterStringRaw (GhcPass _) = NoExtField
+type instance XInterStringExp (GhcPass _) = NoExtField
 type instance XLam           (GhcPass _) = [AddEpAnn]
 type instance XApp           (GhcPass _) = NoExtField
 


=====================================
compiler/GHC/Hs/Lit.hs
=====================================
@@ -49,7 +49,6 @@ import Language.Haskell.Syntax.Lit
 type instance XHsChar       (GhcPass _) = SourceText
 type instance XHsCharPrim   (GhcPass _) = SourceText
 type instance XHsString     (GhcPass _) = SourceText
-type instance XHsMultilineString (GhcPass _) = SourceText
 type instance XHsStringPrim (GhcPass _) = SourceText
 type instance XHsInt        (GhcPass _) = NoExtField
 type instance XHsIntPrim    (GhcPass _) = SourceText
@@ -136,7 +135,6 @@ hsLitNeedsParens p = go
     go (HsChar {})        = False
     go (HsCharPrim {})    = False
     go (HsString {})      = False
-    go (HsMultilineString {}) = False
     go (HsStringPrim {})  = False
     go (HsInt _ x)        = p > topPrec && il_neg x
     go (HsInteger _ x _)  = p > topPrec && x < 0
@@ -160,7 +158,6 @@ convertLit :: HsLit (GhcPass p1) -> HsLit (GhcPass p2)
 convertLit (HsChar a x)       = HsChar a x
 convertLit (HsCharPrim a x)   = HsCharPrim a x
 convertLit (HsString a x)     = HsString a x
-convertLit (HsMultilineString a x) = HsMultilineString a x
 convertLit (HsStringPrim a x) = HsStringPrim a x
 convertLit (HsInt a x)        = HsInt a x
 convertLit (HsIntPrim a x)    = HsIntPrim a x
@@ -197,11 +194,11 @@ Equivalently it's True if
 instance Outputable (HsLit (GhcPass p)) where
     ppr (HsChar st c)       = pprWithSourceText st (pprHsChar c)
     ppr (HsCharPrim st c)   = pprWithSourceText st (pprPrimChar c)
-    ppr (HsString st s)     = pprWithSourceText st (pprHsString s)
-    ppr (HsMultilineString st s) =
-      case st of
-        NoSourceText -> pprHsString s
-        SourceText src -> vcat $ map text $ split '\n' (unpackFS src)
+    ppr (HsString st ty s)  =
+      case (ty, st) of
+        (HsStringTypeSingle, _) -> pprWithSourceText st (pprHsString s)
+        (HsStringTypeMulti, NoSourceText) -> pprHsString s
+        (HsStringTypeMulti, SourceText src) -> vcat $ map text $ split '\n' (unpackFS src)
     ppr (HsStringPrim st s) = pprWithSourceText st (pprHsBytes s)
     ppr (HsInt _ i)
       = pprWithSourceText (il_text i) (integer (il_value i))
@@ -241,7 +238,6 @@ pmPprHsLit :: HsLit (GhcPass x) -> SDoc
 pmPprHsLit (HsChar _ c)       = pprHsChar c
 pmPprHsLit (HsCharPrim _ c)   = pprHsChar c
 pmPprHsLit (HsString st s)    = pprWithSourceText st (pprHsString s)
-pmPprHsLit (HsMultilineString st s) = pprWithSourceText st (pprHsString s)
 pmPprHsLit (HsStringPrim _ s) = pprHsBytes s
 pmPprHsLit (HsInt _ i)        = integer (il_value i)
 pmPprHsLit (HsIntPrim _ i)    = integer i


=====================================
compiler/GHC/Hs/Syn/Type.hs
=====================================
@@ -75,8 +75,7 @@ hsPatType (SplicePat v _)               = dataConCantHappen v
 hsLitType :: HsLit (GhcPass p) -> Type
 hsLitType (HsChar _ _)       = charTy
 hsLitType (HsCharPrim _ _)   = charPrimTy
-hsLitType (HsString _ _)     = stringTy
-hsLitType (HsMultilineString _ _) = stringTy
+hsLitType (HsString _ _ _)   = stringTy
 hsLitType (HsStringPrim _ _) = addrPrimTy
 hsLitType (HsInt _ _)        = intTy
 hsLitType (HsIntPrim _ _)    = intPrimTy


=====================================
compiler/GHC/HsToCore/Match/Literal.hs
=====================================
@@ -121,7 +121,6 @@ dsLit l = do
     HsDoublePrim _ fl -> return (Lit (LitDouble (rationalFromFractionalLit fl)))
     HsChar _ c       -> return (mkCharExpr c)
     HsString _ str   -> mkStringExprFS str
-    HsMultilineString _ str -> mkStringExprFS str
     HsInteger _ i _  -> return (mkIntegerExpr platform i)
     HsInt _ i        -> return (mkIntExpr platform (il_value i))
     HsRat _ fl ty    -> dsFractionalLitToRational fl ty
@@ -475,7 +474,6 @@ getSimpleIntegralLit (HsInteger _ i ty) = Just (i, ty)
 getSimpleIntegralLit HsChar{}           = Nothing
 getSimpleIntegralLit HsCharPrim{}       = Nothing
 getSimpleIntegralLit HsString{}         = Nothing
-getSimpleIntegralLit HsMultilineString{} = Nothing
 getSimpleIntegralLit HsStringPrim{}     = Nothing
 getSimpleIntegralLit HsRat{}            = Nothing
 getSimpleIntegralLit HsFloatPrim{}      = Nothing


=====================================
compiler/GHC/HsToCore/Quote.hs
=====================================
@@ -3025,8 +3025,7 @@ repLiteral lit
                  HsDoublePrim _ _ -> Just doublePrimLName
                  HsChar _ _       -> Just charLName
                  HsCharPrim _ _   -> Just charPrimLName
-                 HsString _ _     -> Just stringLName
-                 HsMultilineString _ _ -> Just stringLName
+                 HsString _ _ _   -> Just stringLName
                  HsRat _ _ _      -> Just rationalLName
                  _                -> Nothing
 


=====================================
compiler/GHC/Parser.y
=====================================
@@ -727,8 +727,13 @@ are the most common patterns, rewritten as regular expressions for clarity:
  LABELVARID     { L _ (ITlabelvarid _ _) }
 
  CHAR           { L _ (ITchar   _ _) }
- STRING         { L _ (ITstring _ _) }
- STRING_MULTI   { L _ (ITstringMulti _ _) }
+ STRING         { L _ (ITstring _ _ StringTypeSingle) }
+ STRING_MULTI   { L _ (ITstring _ _ StringTypeMulti) }
+ STRING_INTER_BEGIN     { L _ (ITstringInterBegin _) }
+ STRING_INTER_RAW       { L _ (ITstringInterRaw _ _) }
+ STRING_INTER_EXP_OPEN  { L _ ITstringInterExpOpen }
+ STRING_INTER_EXP_CLOSE { L _ ITstringInterExpClose }
+ STRING_INTER_END       { L _ (ITstringInterEnd _) }
  INTEGER        { L _ (ITinteger _) }
  RATIONAL       { L _ (ITrational _) }
 
@@ -3080,6 +3085,8 @@ aexp2   :: { ECP }
 -- into HsOverLit when -XOverloadedStrings is on.
 --      | STRING    { sL (getLoc $1) (HsOverLit $! mkHsIsString (getSTRINGs $1)
 --                                       (getSTRING $1) noExtField) }
+        | stringInter                   {% fmap ecpFromExp
+                                           (ams1 $1 (HsInterString NoExtField $! unLoc $1)) }
         | INTEGER   { ECP $ mkHsOverLitPV (sL1a $1 $ mkHsIntegral   (getINTEGER  $1)) }
         | RATIONAL  { ECP $ mkHsOverLitPV (sL1a $1 $ mkHsFractional (getRATIONAL $1)) }
 
@@ -3677,6 +3684,14 @@ ipvar   :: { Located HsIPName }
 overloaded_label :: { Located (SourceText, FastString) }
         : LABELVARID          { sL1 $1 (getLABELVARIDs $1, getLABELVARID $1) }
 
+-----------------------------------------------------------------------------
+-- Interpolated strings
+-- See Note [Interpolated strings] in GHC.Parser.String
+
+stringInter :: { [Either FastString (LHsExpr GhcPs)] }
+        -- TODO(bchinn): break out recursive stringInterPart rule
+        : STRING_INTER_BEGIN (STRING_INTER_RAW | STRING_INTER_EXP_OPEN exp STRING_INTER_EXP_CLOSE)* STRING_INTER_END { undefined }
+
 -----------------------------------------------------------------------------
 -- Warnings and deprecations
 
@@ -4153,8 +4168,8 @@ getQCONSYM        (L _ (ITqconsym  x)) = x
 getIPDUPVARID     (L _ (ITdupipvarid   x)) = x
 getLABELVARID     (L _ (ITlabelvarid _ x)) = x
 getCHAR           (L _ (ITchar   _ x)) = x
-getSTRING         (L _ (ITstring _ x)) = x
-getSTRINGMULTI    (L _ (ITstringMulti _ x)) = x
+getSTRING         (L _ (ITstring _ x StringTypeSingle)) = x
+getSTRINGMULTI    (L _ (ITstring _ x StringTypeMulti)) = x
 getINTEGER        (L _ (ITinteger x))  = x
 getRATIONAL       (L _ (ITrational x)) = x
 getPRIMCHAR       (L _ (ITprimchar _ x)) = x
@@ -4179,8 +4194,8 @@ getVOCURLY        (L (RealSrcSpan l _) ITvocurly) = srcSpanStartCol l
 
 getINTEGERs       (L _ (ITinteger (IL src _ _))) = src
 getCHARs          (L _ (ITchar       src _)) = src
-getSTRINGs        (L _ (ITstring     src _)) = src
-getSTRINGMULTIs   (L _ (ITstringMulti src _)) = src
+getSTRINGs        (L _ (ITstring     src _ StringTypeSingle)) = src
+getSTRINGMULTIs   (L _ (ITstring     src _ StringTypeMulti)) = src
 getPRIMCHARs      (L _ (ITprimchar   src _)) = src
 getPRIMSTRINGs    (L _ (ITprimstring src _)) = src
 getPRIMINTEGERs   (L _ (ITprimint    src _)) = src


=====================================
compiler/GHC/Parser/Lexer.x
=====================================
@@ -168,7 +168,7 @@ $idchar    = [$small $large $digit $uniidchar \']
 
 $unigraphic = \x06 -- Trick Alex into handling Unicode. See Note [Unicode in Alex].
 $graphic   = [$small $large $symbol $digit $idchar $special $unigraphic \"\']
-$charesc   = [a b f n r t v \\ \" \' \&]
+$charesc   = [a b f n r t v \\ \" \' \& \$]
 
 $binit     = 0-1
 $octit     = 0-7
@@ -226,8 +226,9 @@ $docsym    = [\| \^ \* \$]
 -- character sets can be subtracted, not strings
 @escape     = \\ ( $charesc      | @ascii | @decimal | o @octal | x @hexadecimal )
 @escapechar = \\ ( $charesc # \& | @ascii | @decimal | o @octal | x @hexadecimal )
- at stringchar = ($graphic # [\\ \"]) | $space | @escape     | @gap
- at char       = ($graphic # [\\ \']) | $space | @escapechar
+ at stringchar = ($graphic # [\\ \"])         | $space | @escape     | @gap
+ at char       = ($graphic # [\\ \'])         | $space | @escapechar
+ at stringinterchar = ($graphic # [\\ \" \$]) | $space | @escape     | @gap
 
 -- normal signed numerical literals can only be explicitly negative,
 -- not explicitly positive (contrast @exponent)
@@ -700,6 +701,21 @@ $unigraphic / { isSmartQuote } { smart_quote_error }
   (\" | \"\") / ([\n .] # \") { tok_string_multi_content }
 }
 
+-- See Note [Interpolated strings]
+<0> {
+  s \" { \span _ _ _ -> pushLexState string_inter_content >> pure (L span (ITstringInterBegin StringTypeSingle)) }
+  -- TODO(bchinn): interpolated multiline strings
+}
+
+-- TODO(bchinn): add string_inter state to all <0> states that can be in an interpolated string
+<string_inter_content> {
+  @stringinterchar* { tok_string_inter_raw }
+  \$ \{             { \span _ _ _ -> pushLexState string_inter >> pure (L span ITstringInterExpOpen) }
+  \"                { \span _ _ _ -> popLexState >> pure (L span (ITstringInterEnd StringTypeSingle)) }
+
+  -- TODO(bchinn): check for smart quotes
+}
+
 <0> {
   \'\' { token ITtyQuote }
 
@@ -991,9 +1007,16 @@ data Token
                                          -- have a string literal as a label
                                          -- Note [Literal source text] in "GHC.Types.SourceText"
 
-  | ITchar     SourceText Char       -- Note [Literal source text] in "GHC.Types.SourceText"
-  | ITstring   SourceText FastString -- Note [Literal source text] in "GHC.Types.SourceText"
-  | ITstringMulti SourceText FastString -- Note [Literal source text] in "GHC.Types.SourceText"
+  | ITchar   SourceText Char                  -- Note [Literal source text] in "GHC.Types.SourceText"
+  | ITstring SourceText FastString StringType -- Note [Literal source text] in "GHC.Types.SourceText"
+
+  -- See Note [Interpolated strings]
+  | ITstringInterBegin    StringType
+  | ITstringInterRaw      SourceText RawLexedString -- Note [Literal source text] in "GHC.Types.SourceText"
+  | ITstringInterExpOpen
+  | ITstringInterExpClose
+  | ITstringInterEnd      StringType
+
   | ITinteger  IntegralLit           -- Note [Literal source text] in "GHC.Types.SourceText"
   | ITrational FractionalLit
 
@@ -1743,8 +1766,11 @@ open_brace span _str _len _buf2 = do
   setContext (NoLayout:ctx)
   return (L span ITocurly)
 close_brace span _str _len _buf2 = do
-  popContext
-  return (L span ITccurly)
+  ctx <- getContext
+  sc <- getLexState
+  if null ctx && sc == string_inter
+    then popLexState >> pure (L span ITstringInterExpClose)
+    else popContext >> pure (L span ITccurly)
 
 qvarid, qconid :: StringBuffer -> Int -> Token
 qvarid buf len = ITqvarid $! splitQualName buf len False
@@ -2234,7 +2260,7 @@ tok_string span buf len _buf2 = do
         addError err
       pure $ L span (ITprimstring src (unsafeMkByteString s))
     else
-      pure $ L span (ITstring src (mkFastString s))
+      pure $ L span (ITstring src (mkFastString s) StringTypeSingle)
   where
     src = SourceText $ lexemeToFastString buf len
     endsInHash = currentChar (offsetBytes (len - 1) buf) == '#'
@@ -2263,10 +2289,10 @@ tok_string_multi startSpan startBuf _len _buf2 = do
   let contentLen = byteDiff contentStartBuf contentEndBuf
   s <-
     either (throwStringLexError (AI startLoc startBuf)) pure $
-      lexMultilineString contentLen contentStartBuf
+      lexString StringTypeMulti contentLen contentStartBuf
 
   setInput i'
-  pure $ L span $ ITstringMulti src (mkFastString s)
+  pure $ L span $ ITstring src (mkFastString s) StringTypeMulti
   where
     goContent i0 =
       case alexScan i0 string_multi_content of
@@ -2310,7 +2336,7 @@ tok_string_multi_content = panic "tok_string_multi_content unexpectedly invoked"
 lex_chars :: (String, String) -> PsSpan -> StringBuffer -> Int -> P String
 lex_chars (startDelim, endDelim) span buf len =
   either (throwStringLexError i0) pure $
-    lexString contentLen contentBuf
+    lexString StringTypeSingle contentLen contentBuf
   where
     i0@(AI _ contentBuf) = advanceInputBytes (length startDelim) $ AI (psSpanStart span) buf
 


=====================================
compiler/GHC/Parser/String.hs
=====================================
@@ -5,8 +5,12 @@
 
 module GHC.Parser.String (
   StringLexError (..),
+  StringType (..),
   lexString,
-  lexMultilineString,
+
+  -- * Raw strings
+  RawLexedString,
+  lexStringRaw,
 
   -- * Unicode smart quote helpers
   isDoubleSmartQuote,
@@ -17,6 +21,7 @@ import GHC.Prelude hiding (getChar)
 
 import Control.Arrow ((>>>))
 import Control.Monad (when)
+import Data.Bifunctor (first)
 import Data.Char (chr, ord)
 import qualified Data.Foldable1 as Foldable1
 import qualified Data.List.NonEmpty as NonEmpty
@@ -37,13 +42,16 @@ import GHC.Utils.Panic (panic)
 type BufPos = Int
 data StringLexError = StringLexError LexErr BufPos
 
-lexString :: Int -> StringBuffer -> Either StringLexError String
-lexString = lexStringWith processChars processChars
+data StringType = StringTypeSingle | StringTypeMulti deriving (Show)
+
+lexString :: StringType -> Int -> StringBuffer -> Either StringLexError String
+lexString strType = lexStringWith processChars processChars
   where
     processChars :: HasChar c => [c] -> Either (c, LexErr) [c]
     processChars =
-          collapseGaps
-      >>> resolveEscapes
+      case strType of
+        StringTypeSingle -> processCharsSingle
+        StringTypeMulti -> processCharsMulti
 
 -- -----------------------------------------------------------------------------
 -- Lexing interface
@@ -122,6 +130,11 @@ bufferLocatedChars initialBuf len = go initialBuf
 -- -----------------------------------------------------------------------------
 -- Lexing phases
 
+processCharsSingle :: HasChar c => [c] -> Either (c, LexErr) [c]
+processCharsSingle =
+      collapseGaps
+  >>> resolveEscapes
+
 collapseGaps :: HasChar c => [c] -> [c]
 collapseGaps = go
   where
@@ -255,75 +268,71 @@ isSingleSmartQuote = \case
 -- Assumes string is lexically valid. Skips the steps about splitting
 -- and rejoining lines, and instead manually find newline characters,
 -- for performance.
-lexMultilineString :: Int -> StringBuffer -> Either StringLexError String
-lexMultilineString = lexStringWith processChars processChars
+processCharsMulti :: HasChar c => [c] -> Either (c, LexErr) [c]
+processCharsMulti = fmap from . processCharsMulti' . to
   where
-    processChars :: HasChar c => [c] -> Either (c, LexErr) [c]
-    processChars =
-          collapseGaps             -- Step 1
-      >>> expandLeadingTabs        -- Step 3
-      >>> rmCommonWhitespacePrefix -- Step 4
-      >>> collapseOnlyWsLines      -- Step 5
-      >>> rmFirstNewline           -- Step 7a
-      >>> rmLastNewline            -- Step 7b
-      >>> resolveEscapes           -- Step 8
-
-    -- expands all tabs, since the lexer will verify that tabs can only appear
-    -- as leading indentation
-    expandLeadingTabs :: HasChar c => [c] -> [c]
-    expandLeadingTabs =
-      let go !col = \case
-            c@(Char '\t') : cs ->
-              let fill = 8 - (col `mod` 8)
-               in replicate fill (setChar ' ' c) ++ go (col + fill) cs
-            c : cs -> c : go (if getChar c == '\n' then 0 else col + 1) cs
-            [] -> []
-       in go 0
+    -- Convert a normal multiline string to/from an interpolated multiline string
+    -- with no interpolated expressions.
+    to s = [Left s]
+    from = \case
+      [Left s] -> s
+      _ -> panic "Got unexpected result when processing characters in multiline string"
+
+-- | Process multiline characters generally, for both normal multiline strings and interpolated
+-- multiline strings.
+processCharsMulti' :: HasChar c => [Either [c] x] -> Either (c, LexErr) [Either [c] x]
+processCharsMulti' =
+      overRaw collapseGaps     -- Step 1
+  >>> expandLeadingTabs        -- Step 3
+  >>> rmCommonWhitespacePrefix -- Step 4
+  >>> collapseOnlyWsLines      -- Step 5
+  >>> rmFirstNewline           -- Step 7a
+  >>> rmLastNewline            -- Step 7b
+  >>> overRaw resolveEscapes   -- Step 8
+
+-- Run the given function over all raw strings, ignoring expressions
+overRaw :: (s -> s) -> [Either s x] -> [Either s x]
+overRaw f = map $ \case
+  Left s -> Left $ f s
+  Right x -> Right x
+
+-- | Expands all tabs blindly, since the lexer will verify that tabs can only appear
+-- as leading indentation
+expandLeadingTabs :: HasChar c => [Either [c] x] -> [Either [c] x]
+expandLeadingTabs =
+  -- we can expand each raw string part independently, because leading
+  -- indentation will never contain an interpolated expression
+  overRaw $ go 0
+  where
+    go !col = \case
+      c@(Char '\t') : cs ->
+        let fill = 8 - (col `mod` 8)
+         in replicate fill (setChar ' ' c) ++ go (col + fill) cs
+      c : cs -> c : go (if getChar c == '\n' then 0 else col + 1) cs
+      [] -> []
 
-    rmCommonWhitespacePrefix :: HasChar c => [c] -> [c]
-    rmCommonWhitespacePrefix cs0 =
-      let commonWSPrefix = getCommonWsPrefix (map getChar cs0)
-          go = \case
-            c@(Char '\n') : cs -> c : go (dropLine commonWSPrefix cs)
-            c : cs -> c : go cs
-            [] -> []
-          -- drop x characters from the string, or up to a newline, whichever
-          -- comes first
-          dropLine !x = \case
-            cs | x <= 0 -> cs
-            cs@(Char '\n' : _) -> cs
-            _ : cs -> dropLine (x - 1) cs
-            [] -> []
-       in go cs0
-
-    collapseOnlyWsLines :: HasChar c => [c] -> [c]
-    collapseOnlyWsLines =
-      let go = \case
-            c@(Char '\n') : cs | Just cs' <- checkAllWs cs -> c : go cs'
-            c : cs -> c : go cs
-            [] -> []
-          checkAllWs = \case
-            -- got all the way to a newline or the end of the string, return
-            cs@(Char '\n' : _) -> Just cs
-            cs@[] -> Just cs
-            -- found whitespace, continue
-            Char c : cs | is_space c -> checkAllWs cs
-            -- anything else, stop
-            _ -> Nothing
-       in go
-
-    rmFirstNewline :: HasChar c => [c] -> [c]
-    rmFirstNewline = \case
-      Char '\n' : cs -> cs
-      cs -> cs
-
-    rmLastNewline :: HasChar c => [c] -> [c]
-    rmLastNewline =
-      let go = \case
-            [] -> []
-            [Char '\n'] -> []
-            c : cs -> c : go cs
-       in go
+rmCommonWhitespacePrefix :: HasChar c => [Either [c] x] -> [Either [c] x]
+rmCommonWhitespacePrefix parts =
+  -- Whitespace prefix, by definition, only comes after newline characters, and there can
+  -- never be an interpolated expr within a whitespace prefix (since the expr would end
+  -- the prefix). So we can use a plain `map` to just process the string parts, because
+  -- the "drop prefix" logic will never span over multiple parts. TODO(bchinn): what about two Strings next to each other?
+  map (first go) parts
+  where
+    -- treat interpolated exprs as a single, non-space character string
+    commonWSPrefix = getCommonWsPrefix $ concatMap (either (map getChar) (const "x")) parts
+
+    go = \case
+      c@(Char '\n') : cs -> c : go (dropPrefix commonWSPrefix cs)
+      c : cs -> c : go cs
+      [] -> []
+
+    -- drop x characters from the string, or up to a newline, whichever comes first
+    dropPrefix !x = \case
+      cs | x <= 0 -> cs
+      cs@(Char '\n' : _) -> cs
+      _ : cs -> dropPrefix (x - 1) cs
+      [] -> []
 
 -- | See step 4 in Note [Multiline string literals]
 --
@@ -339,6 +348,57 @@ getCommonWsPrefix s =
       . drop 1                      -- ignore first line in calculation
       $ lines s
 
+-- TODO(bchinn): treat interpolated exprs as one character non-space string
+collapseOnlyWsLines :: HasChar c => [Either [c] x] -> [Either [c] x]
+collapseOnlyWsLines = goParts
+  where
+    goParts = \case
+      Right x : parts -> Right x : goParts parts
+      [Left s] -> _
+      Left s : _ -> _
+      [] -> []
+
+    go = \case
+      c@(Char '\n') : cs | Just cs' <- checkAllWs cs -> c : go cs'
+      c : cs -> c : go cs
+      [] -> []
+
+    -- Return (String, Bool, String), where:
+    --  * String - The line that was just parsed, ending with a newline character, unless
+    --             we reached the end of the string
+    --  * Bool   - Whether the line that was just parsed was all whitespace characters
+    --  * String - The rest of the string
+    takeLine =
+      let go acc allWS = \case
+            -- reached a newline or the end of the string
+            c@(Char '\n') : cs -> (dlistToList (acc `dlistSnoc` c), allWS, cs)
+            [] -> (dlistToList acc, allWS, [])
+            -- found whitespace
+            c : cs | is_space (getChar c) -> go (acc `dlistSnoc` c) allWS
+            -- found non-whitespace
+            c : cs -> go (acc `dlistSnoc` c) False
+       in go dlistEmpty True
+
+rmFirstNewline :: HasChar c => [Either [c] x] -> [Either [c] x]
+rmFirstNewline = \case
+  Left (Char '\n' : cs) : parts -> Left cs : parts
+  parts -> parts
+
+rmLastNewline :: HasChar c => [Either [c] x] -> [Either [c] x]
+rmLastNewline = goParts
+  where
+    goParts = \case
+      [] -> []
+      [Left s] ->
+        case goLastLine s of
+          [] -> []
+          s' -> [Left s']
+      c : cs -> c : goParts cs
+    goLastLine = \case
+      [] -> []
+      [Char '\n'] -> []
+      c : cs -> c : goLastLine cs
+
 {-
 Note [Multiline string literals]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -348,8 +408,8 @@ proposal: https://github.com/ghc-proposals/ghc-proposals/pull/569
 
 Multiline string literals are syntax sugar for normal string literals,
 with an extra post processing step. This all happens in the Lexer; that
-is, HsMultilineString will contain the post-processed string. This matches
-the same behavior as HsString, which contains the normalized string
+is, HsString will contain the post-processed string. This matches the same
+behavior as single-line HsString, which contains the normalized string
 (see Note [Literal source text]).
 
 The canonical steps for post processing a multiline string are:
@@ -378,6 +438,76 @@ It's more precisely defined with the following algorithm:
 3. Calculate the longest prefix of whitespace shared by all lines in the remaining list
 -}
 
+-- -----------------------------------------------------------------------------
+-- Interpolated strings
+
+-- | A string that's been validated to be lexically correct, but still
+-- contains the raw string lexed, without anything resolved.
+newtype RawLexedString = RawLexedString {unRawLexedString :: String}
+
+-- | Load and validate the string in the given StringBuffer.
+--
+-- e.g. Lexing "a\nb" will return RawLexedString ['a', '\\', 'n', 'b'].
+lexStringRaw :: Int -> StringBuffer -> Either StringLexError RawLexedString
+lexStringRaw len buf = RawLexedString (bufferChars len buf) <$ lexString strType len buf
+  where
+    -- Always lex as a single-line string, because single-line and multi-line strings
+    -- have the same validation logic, so we can skip the multi-line processing steps
+    strType = StringTypeSingle
+
+fromRawLexedStringSingle :: RawLexedString -> String
+fromRawLexedStringSingle (RawLexedString s) =
+  case processCharsSingle s of
+    Right s' -> s'
+    Left _ -> panic "Unexpectedly got an error when re-lexing the string"
+
+fromRawLexedStringMulti :: [Either RawLexedString x] -> [Either String x]
+fromRawLexedStringMulti parts =
+  case processCharsMulti' $ map (first unRawLexedString) parts of
+    Right parts' -> parts'
+    Left _ -> panic "Unexpectedly got an error when re-lexing the string"
+
+{-
+Note [Interpolated strings]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Interpolated string syntax was accepted in this proposal:
+https://github.com/ghc-proposals/ghc-proposals/pull/570
+
+Interpolated strings are syntax sugar for <TODO(bchinn)>
+
+Interpolated strings are implemented in the following manner:
+
+1. Lexer takes the string as input:
+
+    s"Hello ${Text.toUpper name}!"
+
+  and outputs the following tokens:
+
+    [ ITstringInterBegin    src StringTypeSingle
+    , ITstringInterRaw      src "Hello "
+    , ITstringInterExpOpen  src
+    , ITqvarid                  ("Text.toUpper", "name")
+    , ITvarid                   "name"
+    , ITstringInterExpClose src
+    , ITstringInterRaw      src "!"
+    , ITstringInterEnd      src StringTypeSingle
+    ]
+
+2. The parser will then parse the tokens into the following HsExpr:
+
+    HsInterString ext
+      [ HsInterRaw ext "Hello "
+      , HsInterExp ext $
+          HsApp ext
+            (HsVar ext 'Text.toUpper)
+            (HsVar ext 'name)
+      , HsInterRaw ext "!"
+      ]
+
+3. This will then be desugared into <TODO(bchinn)>
+-}
+
 -- -----------------------------------------------------------------------------
 -- DList
 


=====================================
compiler/GHC/Rename/Expr.hs
=====================================
@@ -377,8 +377,7 @@ rnExpr (HsLit x lit) | Just (src, s) <- stringLike lit
             ; return (HsLit x (convertLit lit), emptyFVs) } }
   where
     stringLike = \case
-      HsString src s -> Just (src, s)
-      HsMultilineString src s -> Just (src, s)
+      HsString src _ s -> Just (src, s)
       _ -> Nothing
 
 rnExpr (HsLit x lit)
@@ -393,6 +392,9 @@ rnExpr (HsOverLit x lit)
                  return (HsApp noExtField (noLocA neg) (noLocA (HsOverLit x lit'))
                         , fvs ) }
 
+rnExpr (HsInterString x ty parts)
+  = undefined -- TODO(bchinn)
+
 rnExpr (HsApp x fun arg)
   = do { (fun',fvFun) <- rnLExpr fun
        ; (arg',fvArg) <- rnLExpr arg


=====================================
compiler/GHC/Tc/Gen/HsType.hs
=====================================
@@ -4725,8 +4725,7 @@ addTyConFlavCtxt name flav
 
 
 tyLitFromLit :: HsLit GhcRn -> Maybe (HsTyLit GhcRn)
-tyLitFromLit (HsString x str) = Just (HsStrTy x str)
-tyLitFromLit (HsMultilineString x str) = Just (HsStrTy x str)
+tyLitFromLit (HsString x _ str) = Just (HsStrTy x str)
 tyLitFromLit (HsChar x char) = Just (HsCharTy x char)
 tyLitFromLit _ = Nothing
 


=====================================
compiler/Language/Haskell/Syntax/Expr.hs
=====================================
@@ -350,7 +350,6 @@ data HsExpr p
                              --   erroring expression will be written after
                              --   solving. See Note [Holes] in GHC.Tc.Types.Constraint.
 
-
   | HsRecSel  (XRecSel p)
               (FieldOcc p) -- ^ Variable pointing to record selector
                            -- See Note [Non-overloaded record field selectors] and
@@ -361,12 +360,19 @@ data HsExpr p
 
   | HsIPVar   (XIPVar p)
               HsIPName   -- ^ Implicit parameter (not in use after typechecking)
+
   | HsOverLit (XOverLitE p)
               (HsOverLit p)  -- ^ Overloaded literals
 
   | HsLit     (XLitE p)
               (HsLit p)      -- ^ Simple (non-overloaded) literals
 
+  | -- | See Note [Interpolated strings]
+    HsInterString
+      (XInterString p)
+      HsStringType
+      [Either FastString (LHsExpr p)]
+
   -- | Lambda, Lambda-case, and Lambda-cases
   --
   -- - 'GHC.Parser.Annotation.AnnKeywordId' : 'GHC.Parser.Annotation.AnnLam',


=====================================
compiler/Language/Haskell/Syntax/Extension.hs
=====================================
@@ -413,6 +413,9 @@ type family XOverLabel      x
 type family XIPVar          x
 type family XOverLitE       x
 type family XLitE           x
+type family XInterString    x
+type family XInterStringRaw x
+type family XInterStringExp x
 type family XLam            x
 type family XLamCase        x
 type family XApp            x
@@ -556,7 +559,6 @@ type family XXParStmtBlock x x'
 type family XHsChar x
 type family XHsCharPrim x
 type family XHsString x
-type family XHsMultilineString x
 type family XHsStringPrim x
 type family XHsInt x
 type family XHsIntPrim x


=====================================
compiler/Language/Haskell/Syntax/Lit.hs
=====================================
@@ -51,9 +51,7 @@ data HsLit x
       -- ^ Character
   | HsCharPrim (XHsCharPrim x) {- SourceText -} Char
       -- ^ Unboxed character
-  | HsString (XHsString x) {- SourceText -} FastString
-      -- ^ String
-  | HsMultilineString (XHsMultilineString x) {- SourceText -} FastString
+  | HsString (XHsString x) {- SourceText -} HsStringType FastString
       -- ^ String
   | HsStringPrim (XHsStringPrim x) {- SourceText -} !ByteString
       -- ^ Packed bytes
@@ -145,3 +143,5 @@ instance Ord OverLitVal where
   compare (HsIsString _ s1)   (HsIsString _ s2)   = s1 `lexicalCompareFS` s2
   compare (HsIsString _ _)    (HsIntegral   _)    = GT
   compare (HsIsString _ _)    (HsFractional _)    = GT
+
+data HsStringType = HsStringTypeSingle | HsStringTypeMulti


=====================================
utils/check-exact/ExactPrint.hs
=====================================
@@ -4923,8 +4923,7 @@ hsLit2String lit =
   case lit of
     HsChar       src v   -> toSourceTextWithSuffix src v ""
     HsCharPrim   src p   -> toSourceTextWithSuffix src p ""
-    HsString     src v   -> toSourceTextWithSuffix src v ""
-    HsMultilineString src v -> toSourceTextWithSuffix src v ""
+    HsString     src _ v -> toSourceTextWithSuffix src v ""
     HsStringPrim src v   -> toSourceTextWithSuffix src v ""
     HsInt        _ (IL src _ v)   -> toSourceTextWithSuffix src v ""
     HsIntPrim    src v   -> toSourceTextWithSuffix src v ""


=====================================
utils/haddock/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs
=====================================
@@ -320,7 +320,6 @@ classify tok =
     ITlabelvarid{} -> TkUnknown
     ITchar{} -> TkChar
     ITstring{} -> TkString
-    ITstringMulti{} -> TkString
     ITinteger{} -> TkNumber
     ITrational{} -> TkNumber
     ITprimchar{} -> TkChar



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/65d1c307e0dbf367ccc3555a1e6411b45c8efd09...cc7d07b3b8a0eb678af4eeaed2726c1cc8c79ab3

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/65d1c307e0dbf367ccc3555a1e6411b45c8efd09...cc7d07b3b8a0eb678af4eeaed2726c1cc8c79ab3
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240925/a6a41621/attachment-0001.html>


More information about the ghc-commits mailing list