[Git][ghc/ghc][wip/multiline-strings] [ci skip] wip

Brandon Chinn (@brandonchinn178) gitlab at gitlab.haskell.org
Sun Feb 4 05:34:11 UTC 2024



Brandon Chinn pushed to branch wip/multiline-strings at Glasgow Haskell Compiler / GHC


Commits:
c658ce3d by Brandon Chinn at 2024-02-03T21:33:24-08:00
[ci skip] wip

- - - - -


5 changed files:

- compiler/GHC/Hs/Lit.hs
- compiler/GHC/HsToCore/Match/Literal.hs
- compiler/GHC/Parser/Lexer.x
- compiler/GHC/Rename/Expr.hs
- testsuite/tests/parser/should_run/MultilineStrings.hs


Changes:

=====================================
compiler/GHC/Hs/Lit.hs
=====================================
@@ -240,7 +240,7 @@ pmPprHsLit :: HsLit (GhcPass x) -> SDoc
 pmPprHsLit (HsChar _ c)       = pprHsChar c
 pmPprHsLit (HsCharPrim _ c)   = pprHsChar c
 pmPprHsLit (HsString st s)    = pprWithSourceText st (pprHsString s)
-pmPprHsLit (HsMultilineString st s) = pprWithSourceText st (pprHsString $ processMultilineStringLiteral s)
+pmPprHsLit (HsMultilineString st s) = pprWithSourceText st (pprHsString $ processMultilineStringLiteral st s)
 pmPprHsLit (HsStringPrim _ s) = pprHsBytes s
 pmPprHsLit (HsInt _ i)        = integer (il_value i)
 pmPprHsLit (HsIntPrim _ i)    = integer i
@@ -266,15 +266,20 @@ Multiline string literals were added following the acceptance of the
 proposal: https://github.com/ghc-proposals/ghc-proposals/pull/569
 
 Multiline string literals are syntax sugar for normal string literals,
-post processed with the following steps:
-
-1. Split the string by newlines
-2. Convert leading tabs into spaces
+with an extra post processing step on the SourceText. We do this on
+the SourceText instead of the parsed output because the lexer resolves
+escaped characters, but we need the actual escaped characters here.
+
+The string is post-process with the following steps:
+1. Collapse string gaps
+2. Split the string by newlines
+3. Convert leading tabs into spaces
     * In each line, any tabs preceding non-whitespace characters are replaced with spaces up to the next tab stop
-3. Remove common whitespace prefix in every line
+4. Remove common whitespace prefix in every line
     * See below
-4. Join the string back with `\n` delimiters
-5. If the first character of the string is a newline, remove it
+5. Join the string back with `\n` delimiters
+6. If the first character of the string is a newline, remove it
+7. Interpret escaped characters
 
 The common whitespace prefix can be informally defined as "The longest
 prefix of whitespace shared by all lines in the string, excluding the
@@ -291,14 +296,22 @@ It's more precisely defined with the following algorithm:
 -}
 
 -- | See Note [Multiline string literals]
-processMultilineStringLiteral :: FastString -> FastString
-processMultilineStringLiteral = mkFastString . process . unpackFS
+processMultilineStringLiteral :: SourceText -> FastString -> FastString
+processMultilineStringLiteral = \case
+  SourceText s | Just s' <- fromSourceText s -> \_ -> mkFastString $ process s'
+  -- if we don't get a valid SourceText, be safe and don't post-process
+  _ -> id
   where
     (.>) :: (a -> b) -> (b -> c) -> (a -> c)
     (.>) = flip (.)
 
+    fromSourceText s =
+      let stripSuffix x = fmap reverse . stripPrefix x . reverse
+       in stripSuffix "\"\"\"" =<< stripPrefix "\"\"\"" (unpackFS s)
+
     process =
-         splitLines
+         collapseStringGaps
+      .> splitLines
       .> convertLeadingTabs
       .> rmCommonWhitespacePrefix
       .> joinLines
@@ -323,6 +336,7 @@ processMultilineStringLiteral = mkFastString . process . unpackFS
                in replicate fill ' ' ++ convertLine (col + fill) cs
             c : cs -> c : cs
        in map (convertLine 0)
+
     rmCommonWhitespacePrefix strLines =
       let
         excludeLines =
@@ -334,6 +348,7 @@ processMultilineStringLiteral = mkFastString . process . unpackFS
             Just strLines' -> Foldable1.minimum $ NonEmpty.map (length . takeWhile (== ' ')) strLines'
       in
         map (drop commonWSPrefix) strLines
+        -- map (drop commonWSPrefix) . (\s -> traceShow ("rmCommonWhitespacePrefix", commonWSPrefix, excludeLines strLines, s) s) $ strLines
 
     joinLines = intercalate "\n"
 


=====================================
compiler/GHC/HsToCore/Match/Literal.hs
=====================================
@@ -73,6 +73,7 @@ import Data.List.NonEmpty (NonEmpty(..))
 import qualified Data.List.NonEmpty as NEL
 import Data.Word
 import GHC.Real ( Ratio(..), numerator, denominator )
+import Debug.Trace
 
 {-
 ************************************************************************
@@ -121,7 +122,7 @@ dsLit l = do
     HsDoublePrim _ fl -> return (Lit (LitDouble (rationalFromFractionalLit fl)))
     HsChar _ c       -> return (mkCharExpr c)
     HsString _ str   -> mkStringExprFS str
-    HsMultilineString _ str -> mkStringExprFS $ processMultilineStringLiteral str
+    HsMultilineString st str -> mkStringExprFS $ processMultilineStringLiteral st str
     HsInteger _ i _  -> return (mkIntegerExpr platform i)
     HsInt _ i        -> return (mkIntExpr platform (il_value i))
     HsRat _ fl ty    -> dsFractionalLitToRational fl ty


=====================================
compiler/GHC/Parser/Lexer.x
=====================================
@@ -949,6 +949,7 @@ data Token
 
   | ITchar     SourceText Char       -- Note [Literal source text] in "GHC.Types.SourceText"
   | ITstring   SourceText FastString -- Note [Literal source text] in "GHC.Types.SourceText"
+  -- TODO: change haddock-api?
   | ITmultilinestring SourceText FastString -- Note [Literal source text] in "GHC.Types.SourceText"
   | ITinteger  IntegralLit           -- Note [Literal source text] in "GHC.Types.SourceText"
   | ITrational FractionalLit


=====================================
compiler/GHC/Rename/Expr.hs
=====================================
@@ -299,6 +299,7 @@ rnExpr (HsOverLabel _ src v)
     hs_ty_arg = mkEmptyWildCardBndrs $ wrapGenSpan $
                 HsTyLit noExtField (HsStrTy NoSourceText v)
 
+-- TODO: how to handle multiline strings? Do I need to add HsIsStringMultiline?
 rnExpr (HsLit x lit@(HsString src s))
   = do { opt_OverloadedStrings <- xoptM LangExt.OverloadedStrings
        ; if opt_OverloadedStrings then


=====================================
testsuite/tests/parser/should_run/MultilineStrings.hs
=====================================
@@ -8,6 +8,7 @@ Test the MultilineStrings proposal
 https://github.com/ghc-proposals/ghc-proposals/blob/master/proposals/0569-multiline-strings.rst
 -}
 
+-- TODO: add failing test case for unicode smart quotes
 main :: IO ()
 main = do
   putStrLn "-- 1"



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/c658ce3db460fdac1d9c4668de9b7d5a6b85e192

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/c658ce3db460fdac1d9c4668de9b7d5a6b85e192
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240204/b407c293/attachment-0001.html>


More information about the ghc-commits mailing list