[Git][ghc/ghc][wip/multiline-strings] [ci skip] wip
Brandon Chinn (@brandonchinn178)
gitlab at gitlab.haskell.org
Sun Feb 4 05:34:11 UTC 2024
Brandon Chinn pushed to branch wip/multiline-strings at Glasgow Haskell Compiler / GHC
Commits:
c658ce3d by Brandon Chinn at 2024-02-03T21:33:24-08:00
[ci skip] wip
- - - - -
5 changed files:
- compiler/GHC/Hs/Lit.hs
- compiler/GHC/HsToCore/Match/Literal.hs
- compiler/GHC/Parser/Lexer.x
- compiler/GHC/Rename/Expr.hs
- testsuite/tests/parser/should_run/MultilineStrings.hs
Changes:
=====================================
compiler/GHC/Hs/Lit.hs
=====================================
@@ -240,7 +240,7 @@ pmPprHsLit :: HsLit (GhcPass x) -> SDoc
pmPprHsLit (HsChar _ c) = pprHsChar c
pmPprHsLit (HsCharPrim _ c) = pprHsChar c
pmPprHsLit (HsString st s) = pprWithSourceText st (pprHsString s)
-pmPprHsLit (HsMultilineString st s) = pprWithSourceText st (pprHsString $ processMultilineStringLiteral s)
+pmPprHsLit (HsMultilineString st s) = pprWithSourceText st (pprHsString $ processMultilineStringLiteral st s)
pmPprHsLit (HsStringPrim _ s) = pprHsBytes s
pmPprHsLit (HsInt _ i) = integer (il_value i)
pmPprHsLit (HsIntPrim _ i) = integer i
@@ -266,15 +266,20 @@ Multiline string literals were added following the acceptance of the
proposal: https://github.com/ghc-proposals/ghc-proposals/pull/569
Multiline string literals are syntax sugar for normal string literals,
-post processed with the following steps:
-
-1. Split the string by newlines
-2. Convert leading tabs into spaces
+with an extra post processing step on the SourceText. We do this on
+the SourceText instead of the parsed output because the lexer resolves
+escaped characters, but we need the actual escaped characters here.
+
+The string is post-process with the following steps:
+1. Collapse string gaps
+2. Split the string by newlines
+3. Convert leading tabs into spaces
* In each line, any tabs preceding non-whitespace characters are replaced with spaces up to the next tab stop
-3. Remove common whitespace prefix in every line
+4. Remove common whitespace prefix in every line
* See below
-4. Join the string back with `\n` delimiters
-5. If the first character of the string is a newline, remove it
+5. Join the string back with `\n` delimiters
+6. If the first character of the string is a newline, remove it
+7. Interpret escaped characters
The common whitespace prefix can be informally defined as "The longest
prefix of whitespace shared by all lines in the string, excluding the
@@ -291,14 +296,22 @@ It's more precisely defined with the following algorithm:
-}
-- | See Note [Multiline string literals]
-processMultilineStringLiteral :: FastString -> FastString
-processMultilineStringLiteral = mkFastString . process . unpackFS
+processMultilineStringLiteral :: SourceText -> FastString -> FastString
+processMultilineStringLiteral = \case
+ SourceText s | Just s' <- fromSourceText s -> \_ -> mkFastString $ process s'
+ -- if we don't get a valid SourceText, be safe and don't post-process
+ _ -> id
where
(.>) :: (a -> b) -> (b -> c) -> (a -> c)
(.>) = flip (.)
+ fromSourceText s =
+ let stripSuffix x = fmap reverse . stripPrefix x . reverse
+ in stripSuffix "\"\"\"" =<< stripPrefix "\"\"\"" (unpackFS s)
+
process =
- splitLines
+ collapseStringGaps
+ .> splitLines
.> convertLeadingTabs
.> rmCommonWhitespacePrefix
.> joinLines
@@ -323,6 +336,7 @@ processMultilineStringLiteral = mkFastString . process . unpackFS
in replicate fill ' ' ++ convertLine (col + fill) cs
c : cs -> c : cs
in map (convertLine 0)
+
rmCommonWhitespacePrefix strLines =
let
excludeLines =
@@ -334,6 +348,7 @@ processMultilineStringLiteral = mkFastString . process . unpackFS
Just strLines' -> Foldable1.minimum $ NonEmpty.map (length . takeWhile (== ' ')) strLines'
in
map (drop commonWSPrefix) strLines
+ -- map (drop commonWSPrefix) . (\s -> traceShow ("rmCommonWhitespacePrefix", commonWSPrefix, excludeLines strLines, s) s) $ strLines
joinLines = intercalate "\n"
=====================================
compiler/GHC/HsToCore/Match/Literal.hs
=====================================
@@ -73,6 +73,7 @@ import Data.List.NonEmpty (NonEmpty(..))
import qualified Data.List.NonEmpty as NEL
import Data.Word
import GHC.Real ( Ratio(..), numerator, denominator )
+import Debug.Trace
{-
************************************************************************
@@ -121,7 +122,7 @@ dsLit l = do
HsDoublePrim _ fl -> return (Lit (LitDouble (rationalFromFractionalLit fl)))
HsChar _ c -> return (mkCharExpr c)
HsString _ str -> mkStringExprFS str
- HsMultilineString _ str -> mkStringExprFS $ processMultilineStringLiteral str
+ HsMultilineString st str -> mkStringExprFS $ processMultilineStringLiteral st str
HsInteger _ i _ -> return (mkIntegerExpr platform i)
HsInt _ i -> return (mkIntExpr platform (il_value i))
HsRat _ fl ty -> dsFractionalLitToRational fl ty
=====================================
compiler/GHC/Parser/Lexer.x
=====================================
@@ -949,6 +949,7 @@ data Token
| ITchar SourceText Char -- Note [Literal source text] in "GHC.Types.SourceText"
| ITstring SourceText FastString -- Note [Literal source text] in "GHC.Types.SourceText"
+ -- TODO: change haddock-api?
| ITmultilinestring SourceText FastString -- Note [Literal source text] in "GHC.Types.SourceText"
| ITinteger IntegralLit -- Note [Literal source text] in "GHC.Types.SourceText"
| ITrational FractionalLit
=====================================
compiler/GHC/Rename/Expr.hs
=====================================
@@ -299,6 +299,7 @@ rnExpr (HsOverLabel _ src v)
hs_ty_arg = mkEmptyWildCardBndrs $ wrapGenSpan $
HsTyLit noExtField (HsStrTy NoSourceText v)
+-- TODO: how to handle multiline strings? Do I need to add HsIsStringMultiline?
rnExpr (HsLit x lit@(HsString src s))
= do { opt_OverloadedStrings <- xoptM LangExt.OverloadedStrings
; if opt_OverloadedStrings then
=====================================
testsuite/tests/parser/should_run/MultilineStrings.hs
=====================================
@@ -8,6 +8,7 @@ Test the MultilineStrings proposal
https://github.com/ghc-proposals/ghc-proposals/blob/master/proposals/0569-multiline-strings.rst
-}
+-- TODO: add failing test case for unicode smart quotes
main :: IO ()
main = do
putStrLn "-- 1"
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/c658ce3db460fdac1d9c4668de9b7d5a6b85e192
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/c658ce3db460fdac1d9c4668de9b7d5a6b85e192
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240204/b407c293/attachment-0001.html>
More information about the ghc-commits
mailing list