[Git][ghc/ghc][wip/multiline-strings] 2 commits: Implement MultilineStrings (#24390)

Brandon Chinn (@brandonchinn178) gitlab at gitlab.haskell.org
Sun Feb 18 04:58:37 UTC 2024



Brandon Chinn pushed to branch wip/multiline-strings at Glasgow Haskell Compiler / GHC


Commits:
a75756eb by Brandon Chinn at 2024-02-17T20:58:23-08:00
Implement MultilineStrings (#24390)

Updates haddock submodule for new ITmultiline constructor

- - - - -
f840b011 by Brandon Chinn at 2024-02-17T20:58:24-08:00
Add docs for MultilineStrings

- - - - -


14 changed files:

- compiler/GHC/Hs/Lit.hs
- compiler/GHC/Hs/Syn/Type.hs
- compiler/GHC/HsToCore/Match/Literal.hs
- compiler/GHC/Parser.y
- compiler/GHC/Parser/Lexer.x
- compiler/GHC/Parser/String.hs
- compiler/GHC/Rename/Expr.hs
- compiler/Language/Haskell/Syntax/Extension.hs
- compiler/Language/Haskell/Syntax/Lit.hs
- + docs/users_guide/9.12.1-notes.rst
- docs/users_guide/exts/literals.rst
- + docs/users_guide/exts/multiline_strings.rst
- utils/check-exact/ExactPrint.hs
- utils/haddock


Changes:

=====================================
compiler/GHC/Hs/Lit.hs
=====================================
@@ -25,6 +25,7 @@ import GHC.Prelude
 
 import {-# SOURCE #-} GHC.Hs.Expr( pprExpr )
 
+import GHC.Data.FastString (unpackFS)
 import GHC.Types.Basic (PprPrec(..), topPrec )
 import GHC.Core.Ppr ( {- instance OutputableBndr TyVar -} )
 import GHC.Types.SourceText
@@ -46,6 +47,7 @@ import Language.Haskell.Syntax.Lit
 type instance XHsChar       (GhcPass _) = SourceText
 type instance XHsCharPrim   (GhcPass _) = SourceText
 type instance XHsString     (GhcPass _) = SourceText
+type instance XHsMultilineString (GhcPass _) = SourceText
 type instance XHsStringPrim (GhcPass _) = SourceText
 type instance XHsInt        (GhcPass _) = NoExtField
 type instance XHsIntPrim    (GhcPass _) = SourceText
@@ -132,6 +134,7 @@ hsLitNeedsParens p = go
     go (HsChar {})        = False
     go (HsCharPrim {})    = False
     go (HsString {})      = False
+    go (HsMultilineString {}) = False
     go (HsStringPrim {})  = False
     go (HsInt _ x)        = p > topPrec && il_neg x
     go (HsInteger _ x _)  = p > topPrec && x < 0
@@ -155,6 +158,7 @@ convertLit :: HsLit (GhcPass p1) -> HsLit (GhcPass p2)
 convertLit (HsChar a x)       = HsChar a x
 convertLit (HsCharPrim a x)   = HsCharPrim a x
 convertLit (HsString a x)     = HsString a x
+convertLit (HsMultilineString a x) = HsMultilineString a x
 convertLit (HsStringPrim a x) = HsStringPrim a x
 convertLit (HsInt a x)        = HsInt a x
 convertLit (HsIntPrim a x)    = HsIntPrim a x
@@ -192,6 +196,17 @@ instance Outputable (HsLit (GhcPass p)) where
     ppr (HsChar st c)       = pprWithSourceText st (pprHsChar c)
     ppr (HsCharPrim st c)   = pprWithSourceText st (pprPrimChar c)
     ppr (HsString st s)     = pprWithSourceText st (pprHsString s)
+    ppr (HsMultilineString st s) =
+      case st of
+        NoSourceText -> pprHsString s
+        SourceText src ->
+          vcat $ map text $ splitOn '\n' (unpackFS src)
+      where
+        splitOn c s =
+          let (firstLine, rest) = break (== c) s
+           in case rest of
+                "" -> [firstLine]
+                _ : rest -> firstLine : splitOn c rest
     ppr (HsStringPrim st s) = pprWithSourceText st (pprHsBytes s)
     ppr (HsInt _ i)
       = pprWithSourceText (il_text i) (integer (il_value i))
@@ -231,6 +246,7 @@ pmPprHsLit :: HsLit (GhcPass x) -> SDoc
 pmPprHsLit (HsChar _ c)       = pprHsChar c
 pmPprHsLit (HsCharPrim _ c)   = pprHsChar c
 pmPprHsLit (HsString st s)    = pprWithSourceText st (pprHsString s)
+pmPprHsLit (HsMultilineString st s) = pprWithSourceText st (pprHsString s)
 pmPprHsLit (HsStringPrim _ s) = pprHsBytes s
 pmPprHsLit (HsInt _ i)        = integer (il_value i)
 pmPprHsLit (HsIntPrim _ i)    = integer i


=====================================
compiler/GHC/Hs/Syn/Type.hs
=====================================
@@ -74,6 +74,7 @@ hsLitType :: HsLit (GhcPass p) -> Type
 hsLitType (HsChar _ _)       = charTy
 hsLitType (HsCharPrim _ _)   = charPrimTy
 hsLitType (HsString _ _)     = stringTy
+hsLitType (HsMultilineString _ _) = stringTy
 hsLitType (HsStringPrim _ _) = addrPrimTy
 hsLitType (HsInt _ _)        = intTy
 hsLitType (HsIntPrim _ _)    = intPrimTy


=====================================
compiler/GHC/HsToCore/Match/Literal.hs
=====================================
@@ -121,6 +121,7 @@ dsLit l = do
     HsDoublePrim _ fl -> return (Lit (LitDouble (rationalFromFractionalLit fl)))
     HsChar _ c       -> return (mkCharExpr c)
     HsString _ str   -> mkStringExprFS str
+    HsMultilineString _ str -> mkStringExprFS str
     HsInteger _ i _  -> return (mkIntegerExpr platform i)
     HsInt _ i        -> return (mkIntExpr platform (il_value i))
     HsRat _ fl ty    -> dsFractionalLitToRational fl ty
@@ -474,6 +475,7 @@ getSimpleIntegralLit (HsInteger _ i ty) = Just (i, ty)
 getSimpleIntegralLit HsChar{}           = Nothing
 getSimpleIntegralLit HsCharPrim{}       = Nothing
 getSimpleIntegralLit HsString{}         = Nothing
+getSimpleIntegralLit HsMultilineString{} = Nothing
 getSimpleIntegralLit HsStringPrim{}     = Nothing
 getSimpleIntegralLit HsRat{}            = Nothing
 getSimpleIntegralLit HsFloatPrim{}      = Nothing


=====================================
compiler/GHC/Parser.y
=====================================
@@ -697,6 +697,7 @@ are the most common patterns, rewritten as regular expressions for clarity:
 
  CHAR           { L _ (ITchar   _ _) }
  STRING         { L _ (ITstring _ _) }
+ MULTILINESTRING { L _ (ITmultilinestring _ _) }
  INTEGER        { L _ (ITinteger _) }
  RATIONAL       { L _ (ITrational _) }
 
@@ -3905,6 +3906,8 @@ literal :: { Located (HsLit GhcPs) }
         : CHAR              { sL1 $1 $ HsChar       (getCHARs $1) $ getCHAR $1 }
         | STRING            { sL1 $1 $ HsString     (getSTRINGs $1)
                                                     $ getSTRING $1 }
+        | MULTILINESTRING   { sL1 $1 $ HsMultilineString (getMULTILINESTRINGs $1)
+                                                    $ getMULTILINESTRING $1 }
         | PRIMINTEGER       { sL1 $1 $ HsIntPrim    (getPRIMINTEGERs $1)
                                                     $ getPRIMINTEGER $1 }
         | PRIMWORD          { sL1 $1 $ HsWordPrim   (getPRIMWORDs $1)
@@ -4010,6 +4013,7 @@ getIPDUPVARID     (L _ (ITdupipvarid   x)) = x
 getLABELVARID     (L _ (ITlabelvarid _ x)) = x
 getCHAR           (L _ (ITchar   _ x)) = x
 getSTRING         (L _ (ITstring _ x)) = x
+getMULTILINESTRING (L _ (ITmultilinestring _ x)) = x
 getINTEGER        (L _ (ITinteger x))  = x
 getRATIONAL       (L _ (ITrational x)) = x
 getPRIMCHAR       (L _ (ITprimchar _ x)) = x
@@ -4035,6 +4039,7 @@ getVOCURLY        (L (RealSrcSpan l _) ITvocurly) = srcSpanStartCol l
 getINTEGERs       (L _ (ITinteger (IL src _ _))) = src
 getCHARs          (L _ (ITchar       src _)) = src
 getSTRINGs        (L _ (ITstring     src _)) = src
+getMULTILINESTRINGs (L _ (ITmultilinestring src _)) = src
 getPRIMCHARs      (L _ (ITprimchar   src _)) = src
 getPRIMSTRINGs    (L _ (ITprimstring src _)) = src
 getPRIMINTEGERs   (L _ (ITprimint    src _)) = src


=====================================
compiler/GHC/Parser/Lexer.x
=====================================
@@ -663,7 +663,8 @@ $unigraphic / { isSmartQuote } { smart_quote_error }
 -- to convert it to a String.
 <0> {
   \'                            { lex_char_tok }
-  \"                            { lex_string_tok }
+  \"\"\" / { ifExtension MultilineStringsBit} { lex_string_tok StringTypeMulti }
+  \"                            { lex_string_tok StringTypeSingle }
 }
 
 -- Note [Whitespace-sensitive operator parsing]
@@ -949,6 +950,7 @@ data Token
 
   | ITchar     SourceText Char       -- Note [Literal source text] in "GHC.Types.SourceText"
   | ITstring   SourceText FastString -- Note [Literal source text] in "GHC.Types.SourceText"
+  | ITmultilinestring SourceText FastString -- Note [Literal source text] in "GHC.Types.SourceText"
   | ITinteger  IntegralLit           -- Note [Literal source text] in "GHC.Types.SourceText"
   | ITrational FractionalLit
 
@@ -2176,32 +2178,37 @@ lex_string_prag_comment mkTok span _buf _len _buf2
 
 -- This stuff is horrible.  I hates it.
 
-lex_string_tok :: Action
-lex_string_tok span buf _len _buf2 = do
-  s <- lex_string
+lex_string_tok :: LexStringType -> Action
+lex_string_tok strType span buf _len _buf2 = do
+  s <- lex_string strType
 
   i <- getInput
-  lex_magic_hash i >>= \case
-    Just i' -> do
-      when (any (> '\xFF') s) $ do
-        pState <- getPState
-        let msg = PsErrPrimStringInvalidChar
-        let err = mkPlainErrorMsgEnvelope (mkSrcSpanPs (last_loc pState)) msg
-        addError err
-
-      setInput i'
-      let (psSpan, src) = getStringLoc (buf, locStart) i'
-      pure $ L psSpan (ITprimstring src (unsafeMkByteString s))
-    Nothing -> do
+  case strType of
+    StringTypeSingle ->
+      lex_magic_hash i >>= \case
+        Just i' -> do
+          when (any (> '\xFF') s) $ do
+            pState <- getPState
+            let msg = PsErrPrimStringInvalidChar
+            let err = mkPlainErrorMsgEnvelope (mkSrcSpanPs (last_loc pState)) msg
+            addError err
+
+          setInput i'
+          let (psSpan, src) = getStringLoc (buf, locStart) i'
+          pure $ L psSpan (ITprimstring src (unsafeMkByteString s))
+        Nothing -> do
+          let (psSpan, src) = getStringLoc (buf, locStart) i
+          pure $ L psSpan (ITstring src (mkFastString s))
+    StringTypeMulti -> do
       let (psSpan, src) = getStringLoc (buf, locStart) i
-      pure $ L psSpan (ITstring src (mkFastString s))
+      pure $ L psSpan (ITmultilinestring src (mkFastString s))
   where
     locStart = psSpanStart span
 
 
 lex_quoted_label :: Action
 lex_quoted_label span buf _len _buf2 = do
-  s <- lex_string
+  s <- lex_string StringTypeSingle
   (AI end bufEnd) <- getInput
   let
     token = ITlabelvarid (SourceText src) (mkFastString s)
@@ -2211,13 +2218,13 @@ lex_quoted_label span buf _len _buf2 = do
   return $ L (mkPsSpan start end) token
 
 
-lex_string :: P String
-lex_string = do
+lex_string :: LexStringType -> P String
+lex_string strType = do
   start <- getInput
   case lexString [] start of
     Right (lexedStr, next) -> do
       setInput next
-      either fromStringLexError pure $ resolveLexedString lexedStr
+      either fromStringLexError pure $ resolveLexedString strType lexedStr
     Left (e, s, i) -> do
       -- see if we can find a smart quote in the string we've found so far.
       -- if the built-up string s contains a smart double quote character, it was
@@ -2236,7 +2243,7 @@ lex_string = do
     lexString acc0 i0 = do
       let acc = reverse acc0
       case alexGetChar' i0 of
-        Just ('"', i1) -> Right (acc, i1)
+        _ | Just i1 <- lexDelimiter i0 -> Right (acc, i1)
 
         Just (c0, i1) -> do
           let acc1 = LexedChar c0 i0 : acc0
@@ -2248,10 +2255,22 @@ lex_string = do
                   | otherwise -> lexString (LexedChar c1 i1 : acc1) i2
                 Nothing -> Left (LexStringCharLit, acc, i1)
             _ | isAny c0 -> lexString acc1 i1
+            _ | strType == StringTypeMulti && c0 `elem` ['\n', '\t'] -> lexString acc1 i1
             _ -> Left (LexStringCharLit, acc, i0)
 
         Nothing -> Left (LexStringCharLit, acc, i0)
 
+    lexDelimiter i0 =
+      case strType of
+        StringTypeSingle -> do
+          ('"', i1) <- alexGetChar' i0
+          Just i1
+        StringTypeMulti -> do
+          ('"', i1) <- alexGetChar' i0
+          ('"', i2) <- alexGetChar' i1
+          ('"', i3) <- alexGetChar' i2
+          Just i3
+
     lexStringGap acc0 i0 = do
       let acc = reverse acc0
       case alexGetChar' i0 of
@@ -2969,6 +2988,7 @@ data ExtBits
   | OverloadedRecordDotBit
   | OverloadedRecordUpdateBit
   | ExtendedLiteralsBit
+  | MultilineStringsBit
 
   -- Flags that are updated once parsing starts
   | InRulePragBit
@@ -3049,6 +3069,7 @@ mkParserOpts extensionFlags diag_opts supported
       .|. OverloadedRecordDotBit      `xoptBit` LangExt.OverloadedRecordDot
       .|. OverloadedRecordUpdateBit   `xoptBit` LangExt.OverloadedRecordUpdate  -- Enable testing via 'getBit OverloadedRecordUpdateBit' in the parser (RecordDotSyntax parsing uses that information).
       .|. ExtendedLiteralsBit         `xoptBit` LangExt.ExtendedLiterals
+      .|. MultilineStringsBit         `xoptBit` LangExt.MultilineStrings
     optBits =
           HaddockBit        `setBitIf` isHaddock
       .|. RawTokenStreamBit `setBitIf` rawTokStream


=====================================
compiler/GHC/Parser/String.hs
=====================================
@@ -4,6 +4,7 @@ module GHC.Parser.String (
   LexedString,
   LexedChar (..),
   StringLexError (..),
+  LexStringType (..),
   resolveLexedString,
   resolveEscapeCharacter,
 
@@ -14,9 +15,11 @@ module GHC.Parser.String (
 
 import GHC.Prelude
 
-import Control.Monad (guard, unless, when, (>=>))
+import Control.Monad (forM_, guard, unless, when, (>=>))
 import Data.Char (chr, isSpace, ord)
-import Data.Maybe (listToMaybe, mapMaybe)
+import Data.List.NonEmpty (NonEmpty)
+import qualified Data.List.NonEmpty as NonEmpty
+import Data.Maybe (listToMaybe, mapMaybe, maybeToList)
 import GHC.Parser.CharClass (
   hexDigit,
   is_decdigit,
@@ -27,6 +30,8 @@ import GHC.Parser.CharClass (
 import GHC.Parser.Errors.Types (LexErr (..))
 import GHC.Utils.Panic (panic)
 
+data LexStringType = StringTypeSingle | StringTypeMulti deriving (Eq)
+
 data LexedChar loc = LexedChar !Char !loc
 type LexedString loc = [LexedChar loc]
 
@@ -39,14 +44,23 @@ unLexedString = map unLexedChar
 -- | Apply the given StringProcessors to the given LexedString left-to-right,
 -- and return the processed string.
 resolveLexedString ::
+  LexStringType ->
   LexedString loc ->
   Either (StringLexError loc) String
-resolveLexedString = fmap unLexedString . foldr (>=>) pure processString
+resolveLexedString strType = fmap unLexedString . foldr (>=>) pure processString
   where
     processString =
-      [ collapseStringGaps
-      , resolveEscapeCharacters
-      ]
+      case strType of
+        StringTypeSingle ->
+          [ collapseStringGaps
+          , resolveEscapeCharacters
+          ]
+        StringTypeMulti ->
+          [ collapseStringGaps
+          , resolveMultilineString
+          , checkInnerTabs
+          , resolveEscapeCharacters
+          ]
 
 data StringLexError loc
   = SmartQuoteError !Char !loc
@@ -72,8 +86,6 @@ collapseStringGaps s0 = pure (go s0)
 
       c : s -> c : go s
 
-    isLexedSpace = isSpace . unLexedChar
-
 resolveEscapeCharacters :: StringProcessor loc
 resolveEscapeCharacters = go
   where
@@ -198,6 +210,18 @@ parseLongEscape (LexedChar c _) s = listToMaybe $ mapMaybe tryParse longEscapeCo
       , ("DEL", '\DEL')
       ]
 
+-- | Error if string contains any tab characters.
+--
+-- Normal strings don't lex tab characters in the first place, but we
+-- have to allow them in multiline strings for leading indentation. So
+-- we allow them in the initial lexing pass, then check for any remaining
+-- tabs after replacing leading tabs in resolveMultilineString.
+checkInnerTabs :: StringProcessor loc
+checkInnerTabs s = do
+  forM_ s $ \(LexedChar c loc) ->
+    when (c == '\t') $ Left $ StringLexError c loc LexStringCharLit
+  pure s
+
 -- -----------------------------------------------------------------------------
 -- Unicode Smart Quote detection (#21843)
 
@@ -212,3 +236,126 @@ isSingleSmartQuote = \case
   '‘' -> True
   '’' -> True
   _ -> False
+
+{-
+Note [Multiline string literals]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Multiline string literals were added following the acceptance of the
+proposal: https://github.com/ghc-proposals/ghc-proposals/pull/569
+
+Multiline string literals are syntax sugar for normal string literals,
+with an extra post processing step. This all happens in the Lexer; that
+is, HsMultilineString will contain the post-processed string. This matches
+the same behavior as HsString, which contains the normalized string
+(see Note [Literal source text]).
+
+The string is post-processed with the following steps:
+1. Collapse string gaps
+2. Split the string by newlines
+3. Convert leading tabs into spaces
+    * In each line, any tabs preceding non-whitespace characters are replaced with spaces up to the next tab stop
+4. Remove common whitespace prefix in every line (see below)
+5. If a line contains only whitespace, remove all of the whitespace
+6. Join the string back with `\n` delimiters
+7. If the first character of the string is a newline, remove it
+8. Interpret escaped characters
+
+The common whitespace prefix can be informally defined as "The longest
+prefix of whitespace shared by all lines in the string, excluding the
+first line and any whitespace-only lines".
+
+It's more precisely defined with the following algorithm:
+
+1. Take a list representing the lines in the string
+2. Ignore the following elements in the list:
+    * The first line (we want to ignore everything before the first newline)
+    * Empty lines
+    * Lines with only whitespace characters
+3. Calculate the longest prefix of whitespace shared by all lines in the remaining list
+-}
+
+-- | A lexed line, with the string and the location info of the ending newline
+-- character, if one exists
+data LexedLine loc = LexedLine !(LexedString loc) (Maybe loc)
+
+mapLine :: (LexedString loc -> LexedString loc) -> LexedLine loc -> LexedLine loc
+mapLine f (LexedLine line nl) = LexedLine (f line) nl
+
+mapLines :: (LexedString loc -> LexedString loc) -> [LexedLine loc] -> [LexedLine loc]
+mapLines f = map (mapLine f)
+
+filterLines :: (LexedString loc -> Bool) -> [LexedLine loc] -> [LexedLine loc]
+filterLines f = filter (\(LexedLine line _) -> f line)
+
+splitLines :: LexedString loc -> [LexedLine loc]
+splitLines =
+  foldr
+    ( curry $ \case
+        (LexedChar '\n' loc, ls) -> LexedLine [] (Just loc) : ls
+        (c, l : ls) -> mapLine (c :) l : ls
+        (c, []) -> LexedLine [c] Nothing : [] -- should not happen
+    )
+    [emptyLine]
+  where
+    emptyLine = LexedLine [] Nothing
+
+joinLines :: [LexedLine loc] -> LexedString loc
+joinLines = concatMap (\(LexedLine line nl) -> line ++ maybeToList (LexedChar '\n' <$> nl))
+
+-- | See Note [Multiline string literals]
+resolveMultilineString :: StringProcessor loc
+resolveMultilineString = pure . process
+  where
+    (.>) :: (a -> b) -> (b -> c) -> (a -> c)
+    (.>) = flip (.)
+
+    process =
+         splitLines
+      .> convertLeadingTabs
+      .> rmCommonWhitespacePrefix
+      .> stripOnlyWhitespace
+      .> joinLines
+      .> rmFirstNewline
+
+    convertLeadingTabs =
+      let convertLine col = \case
+            [] -> []
+            c@(LexedChar ' ' _) : cs -> c : convertLine (col + 1) cs
+            LexedChar '\t' loc : cs ->
+              let fill = 8 - (col `mod` 8)
+               in replicate fill (LexedChar ' ' loc) ++ convertLine (col + fill) cs
+            c : cs -> c : cs
+       in mapLines (convertLine 0)
+
+    rmCommonWhitespacePrefix = \case
+      [] -> []
+      -- exclude the first line from this calculation
+      firstLine : strLines ->
+        let excludeWsOnlyLines = filterLines (not . all isLexedSpace)
+            commonWSPrefix =
+              case NonEmpty.nonEmpty (excludeWsOnlyLines strLines) of
+                Nothing -> 0
+                Just strLines' ->
+                  minimum1 $
+                    flip NonEmpty.map strLines' $ \(LexedLine line _) ->
+                      length $ takeWhile isLexedSpace line
+         in firstLine : mapLines (drop commonWSPrefix) strLines
+
+    stripOnlyWhitespace =
+      let stripWsOnlyLine line = if all isLexedSpace line then [] else line
+       in mapLines stripWsOnlyLine
+
+    rmFirstNewline = \case
+      LexedChar '\n' _ : s -> s
+      s -> s
+
+    -- TODO: replace with Foldable1.minimum when GHC 9.6+ required to build
+    minimum1 :: Ord a => NonEmpty a -> a
+    minimum1 = minimum
+
+-- -----------------------------------------------------------------------------
+-- Helpers
+
+isLexedSpace :: LexedChar loc -> Bool
+isLexedSpace = isSpace . unLexedChar


=====================================
compiler/GHC/Rename/Expr.hs
=====================================
@@ -2,6 +2,7 @@
 {-# LANGUAGE ConstraintKinds     #-}
 {-# LANGUAGE CPP                 #-}
 {-# LANGUAGE FlexibleContexts    #-}
+{-# LANGUAGE LambdaCase          #-}
 {-# LANGUAGE MultiWayIf          #-}
 {-# LANGUAGE ScopedTypeVariables #-}
 {-# LANGUAGE TypeApplications    #-}
@@ -366,13 +367,18 @@ rnExpr (HsOverLabel _ src v)
     hs_ty_arg = mkEmptyWildCardBndrs $ wrapGenSpan $
                 HsTyLit noExtField (HsStrTy NoSourceText v)
 
-rnExpr (HsLit x lit@(HsString src s))
+rnExpr (HsLit x lit) | Just (src, s) <- stringLike lit
   = do { opt_OverloadedStrings <- xoptM LangExt.OverloadedStrings
        ; if opt_OverloadedStrings then
             rnExpr (HsOverLit x (mkHsIsString src s))
          else do {
             ; rnLit lit
             ; return (HsLit x (convertLit lit), emptyFVs) } }
+  where
+    stringLike = \case
+      HsString src s -> Just (src, s)
+      HsMultilineString src s -> Just (src, s)
+      _ -> Nothing
 
 rnExpr (HsLit x lit)
   = do { rnLit lit


=====================================
compiler/Language/Haskell/Syntax/Extension.hs
=====================================
@@ -565,6 +565,7 @@ type family XXApplicativeArg     x
 type family XHsChar x
 type family XHsCharPrim x
 type family XHsString x
+type family XHsMultilineString x
 type family XHsStringPrim x
 type family XHsInt x
 type family XHsIntPrim x


=====================================
compiler/Language/Haskell/Syntax/Lit.hs
=====================================
@@ -54,6 +54,8 @@ data HsLit x
       -- ^ Unboxed character
   | HsString (XHsString x) {- SourceText -} FastString
       -- ^ String
+  | HsMultilineString (XHsMultilineString x) {- SourceText -} FastString
+      -- ^ String
   | HsStringPrim (XHsStringPrim x) {- SourceText -} !ByteString
       -- ^ Packed bytes
   | HsInt (XHsInt x)  IntegralLit


=====================================
docs/users_guide/9.12.1-notes.rst
=====================================
@@ -0,0 +1,103 @@
+.. _release-9-12-1:
+
+Version 9.12.1
+==============
+
+Language
+~~~~~~~~
+
+- GHC Proposal `#569 <https://github.com/ghc-proposals/ghc-proposals/blob/master/proposals/0569-multiline-strings.rst>`_
+  "Multiline string literals" has been implemented.
+  The following code is now accepted by GHC::
+
+    {-# LANGUAGE MultilineStrings #-}
+
+    x :: String
+    x =
+      """
+      This is a
+      multiline
+
+          string
+
+      literal
+      """
+
+  This feature is guarded behind :extension:`MultilineStrings`.
+
+Compiler
+~~~~~~~~
+
+JavaScript backend
+~~~~~~~~~~~~~~~~~~
+
+WebAssembly backend
+~~~~~~~~~~~~~~~~~~~
+
+GHCi
+~~~~
+
+Runtime system
+~~~~~~~~~~~~~~
+
+``base`` library
+~~~~~~~~~~~~~~~~
+
+``ghc-prim`` library
+~~~~~~~~~~~~~~~~~~~~
+
+``ghc`` library
+~~~~~~~~~~~~~~~
+
+``ghc-heap`` library
+~~~~~~~~~~~~~~~~~~~~
+
+``ghc-experimental`` library
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+``template-haskell`` library
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Included libraries
+~~~~~~~~~~~~~~~~~~
+
+The package database provided with this distribution also contains a number of
+packages other than GHC itself. See the changelogs provided with these packages
+for further change information.
+
+.. ghc-package-list::
+
+    libraries/array/array.cabal:             Dependency of ``ghc`` library
+    libraries/base/base.cabal:               Core library
+    libraries/binary/binary.cabal:           Dependency of ``ghc`` library
+    libraries/bytestring/bytestring.cabal:   Dependency of ``ghc`` library
+    libraries/Cabal/Cabal/Cabal.cabal:       Dependency of ``ghc-pkg`` utility
+    libraries/Cabal/Cabal-syntax/Cabal-syntax.cabal:  Dependency of ``ghc-pkg`` utility
+    libraries/containers/containers/containers.cabal: Dependency of ``ghc`` library
+    libraries/deepseq/deepseq.cabal:         Dependency of ``ghc`` library
+    libraries/directory/directory.cabal:     Dependency of ``ghc`` library
+    libraries/exceptions/exceptions.cabal:   Dependency of ``ghc`` and ``haskeline`` library
+    libraries/filepath/filepath.cabal:       Dependency of ``ghc`` library
+    compiler/ghc.cabal:                      The compiler itself
+    libraries/ghci/ghci.cabal:               The REPL interface
+    libraries/ghc-boot/ghc-boot.cabal:       Internal compiler library
+    libraries/ghc-boot-th/ghc-boot-th.cabal: Internal compiler library
+    libraries/ghc-compact/ghc-compact.cabal: Core library
+    libraries/ghc-heap/ghc-heap.cabal:       GHC heap-walking library
+    libraries/ghc-prim/ghc-prim.cabal:       Core library
+    libraries/haskeline/haskeline.cabal:     Dependency of ``ghci`` executable
+    libraries/hpc/hpc.cabal:                 Dependency of ``hpc`` executable
+    libraries/integer-gmp/integer-gmp.cabal: Core library
+    libraries/mtl/mtl.cabal:                 Dependency of ``Cabal`` library
+    libraries/parsec/parsec.cabal:           Dependency of ``Cabal`` library
+    libraries/pretty/pretty.cabal:           Dependency of ``ghc`` library
+    libraries/process/process.cabal:         Dependency of ``ghc`` library
+    libraries/stm/stm.cabal:                 Dependency of ``haskeline`` library
+    libraries/template-haskell/template-haskell.cabal: Core library
+    libraries/terminfo/terminfo.cabal:       Dependency of ``haskeline`` library
+    libraries/text/text.cabal:               Dependency of ``Cabal`` library
+    libraries/time/time.cabal:               Dependency of ``ghc`` library
+    libraries/transformers/transformers.cabal: Dependency of ``ghc`` library
+    libraries/unix/unix.cabal:               Dependency of ``ghc`` library
+    libraries/Win32/Win32.cabal:             Dependency of ``ghc`` library
+    libraries/xhtml/xhtml.cabal:             Dependency of ``haddock`` executable


=====================================
docs/users_guide/exts/literals.rst
=====================================
@@ -14,3 +14,4 @@ Literals
     numeric_underscores
     overloaded_strings
     overloaded_labels
+    multiline_strings


=====================================
docs/users_guide/exts/multiline_strings.rst
=====================================
@@ -0,0 +1,86 @@
+.. _multiline-strings:
+
+Multiline string literals
+-------------------------
+
+.. extension:: MultilineStrings
+    :shortdesc: Enable multiline string literals.
+
+    :since: 9.12.1
+
+    Enable multiline string literals.
+
+With this extension, GHC now recognizes multiline string literals with ``"""`` delimiters. Indentation is automatically stripped, and gets desugared to normal string literals, so it works as expected for ``OverloadedStrings`` and any other functionality. The indentation that is stripped can be informally defined as "The longest prefix of whitespace shared by all lines in the string, excluding the first line and any whitespace-only lines".
+
+Normal string literals are lexed, then string gaps are collapsed, then escape characters are resolved. Multiline string literals add the following post-processing steps between collapsing string gaps and resolving escape characters:
+
+#. Split the string by newlines
+
+#. Replace leading tabs with spaces up to the next tab stop
+
+#. Remove common whitespace prefix in every line
+
+#. If a line only contains whitespace, remove all of the whitespace
+
+#. Join the string back with ``\n`` delimiters
+
+#. If the first character of the string is a newline, remove it
+
+Examples
+~~~~~~~~
+
+.. code-blocks use plain text because the Haskell syntax for pygments doesn't
+   support multiline strings yet. Remove if/when pygments adds multiline
+   strings to Haskell
+
++-----------------------+------------------------+---------------------------+
+| Expression            | Output                 | Notes                     |
++=======================+========================+===========================+
+| .. code-block:: text  | .. code-block::        |                           |
+|                       |                        |                           |
+|    """                |       "Line 1\n"       |                           |
+|    Line 1             |    ++ "Line 2\n"       |                           |
+|    Line 2             |    ++ "Line 3\n"       |                           |
+|    Line 3             |                        |                           |
+|    """                |                        |                           |
++-----------------------+------------------------+---------------------------+
+| .. code-block:: text  | .. code-block::        |                           |
+|                       |                        | Characters on the same    |
+|    """Test            |       "Test\n"         | line as the delimiter are |
+|    Line 1             |    ++ "Line 1\n"       | still included            |
+|    Line 2             |    ++ "Line 2\n"       |                           |
+|    Line 3             |    ++ "Line 3\n"       |                           |
+|    """                |                        |                           |
++-----------------------+------------------------+---------------------------+
+| .. code-block:: text  | .. code-block::        |                           |
+|                       |                        | Omit the trailing newline |
+|    """                |       "Line 1\n"       | with string gaps          |
+|    Line 1             |    ++ "Line 2\n"       |                           |
+|    Line 2             |    ++ "Line 3"         |                           |
+|    Line 3\            |                        |                           |
+|    \"""               |                        |                           |
++-----------------------+------------------------+---------------------------+
+| .. code-block:: text  | .. code-block::        |                           |
+|                       |                        | Double quotes don't need  |
+|    """                |       "\"Hello\"\n"    | to be escaped unless      |
+|    "Hello"            |    ++ "\"\"\"\n"       | they're triple quoted     |
+|    \"\"\"             |    ++ "\"\"\"\n"       |                           |
+|    \"""               |                        |                           |
+|    """                |                        |                           |
++-----------------------+------------------------+---------------------------+
+| .. code-block:: text  | .. code-block::        |                           |
+|                       |                        | Only common indentation   |
+|    """                |       "<div>\n"        | is stripped               |
+|      <div>            |    ++ "  <p>ABC</p>\n" |                           |
+|        <p>ABC</p>     |    ++ "</div>\n"       |                           |
+|      </div>           |                        |                           |
+|    """                |                        |                           |
++-----------------------+------------------------+---------------------------+
+| .. code-block:: text  | .. code-block::        |                           |
+|                       |                        | Use ``\&`` to keep        |
+|    """                |       "  Line 1\n"     | leading indentation for   |
+|      \&  Line 1       |    ++ "  Line 2\n"     | each line                 |
+|      \&  Line 2       |    ++ "  Line 3\n"     |                           |
+|      \&  Line 3       |                        |                           |
+|    """                |                        |                           |
++-----------------------+------------------------+---------------------------+


=====================================
utils/check-exact/ExactPrint.hs
=====================================
@@ -4773,6 +4773,7 @@ hsLit2String lit =
     HsChar       src v   -> toSourceTextWithSuffix src v ""
     HsCharPrim   src p   -> toSourceTextWithSuffix src p ""
     HsString     src v   -> toSourceTextWithSuffix src v ""
+    HsMultilineString src v -> toSourceTextWithSuffix src v ""
     HsStringPrim src v   -> toSourceTextWithSuffix src v ""
     HsInt        _ (IL src _ v)   -> toSourceTextWithSuffix src v ""
     HsIntPrim    src v   -> toSourceTextWithSuffix src v ""


=====================================
utils/haddock
=====================================
@@ -1 +1 @@
-Subproject commit 9fcf5cf499102baf9e00986bb8b54b80ec5ffc81
+Subproject commit 980facc88c8f321dce624945502402ad502093b8



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/61d7581f70801e2bdfafb6f1f4ef3f97c1042b22...f840b011609b18e3fb2b6b7f7eeab406a0212b7b

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/61d7581f70801e2bdfafb6f1f4ef3f97c1042b22...f840b011609b18e3fb2b6b7f7eeab406a0212b7b
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240217/e9b19932/attachment-0001.html>


More information about the ghc-commits mailing list