[Git][ghc/ghc][wip/strings] 3 commits: Inline alexScanUser
Brandon Chinn (@brandonchinn178)
gitlab at gitlab.haskell.org
Wed Sep 18 05:07:36 UTC 2024
Brandon Chinn pushed to branch wip/strings at Glasgow Haskell Compiler / GHC
Commits:
fb6c36bd by Brandon Chinn at 2024-09-17T21:17:20-07:00
Inline alexScanUser
- - - - -
c3780088 by Brandon Chinn at 2024-09-17T21:17:23-07:00
Use dlist to reduce allocations
- - - - -
4607c624 by Brandon Chinn at 2024-09-17T21:17:23-07:00
Duplicate processChars for specialization
- - - - -
2 changed files:
- compiler/GHC/Parser/Lexer.x
- compiler/GHC/Parser/String.hs
Changes:
=====================================
compiler/GHC/Parser/Lexer.x
=====================================
@@ -3551,6 +3551,11 @@ topNoLayoutContainsCommas [] = False
topNoLayoutContainsCommas (ALRLayout _ _ : ls) = topNoLayoutContainsCommas ls
topNoLayoutContainsCommas (ALRNoLayout b _ : _) = b
+-- If the generated alexScan/alexScanUser functions are called multiple times
+-- in this file, alexScanUser gets broken out into a separate function and
+-- increases memory usage. Make sure GHC inlines this function and optimizes it.
+{-# INLINE alexScanUser #-}
+
lexToken :: P (PsLocated Token)
lexToken = do
inp@(AI loc1 buf) <- getInput
=====================================
compiler/GHC/Parser/String.hs
=====================================
@@ -18,11 +18,9 @@ import GHC.Prelude hiding (getChar)
import Control.Arrow ((>>>))
import Control.Monad (when)
import Data.Char (chr, ord)
-import qualified Data.Foldable as Seq (toList)
import qualified Data.Foldable1 as Foldable1
import qualified Data.List.NonEmpty as NonEmpty
import Data.Maybe (listToMaybe, mapMaybe)
-import qualified Data.Sequence as Seq
import GHC.Data.StringBuffer (StringBuffer)
import qualified GHC.Data.StringBuffer as StringBuffer
import GHC.Parser.CharClass (
@@ -40,7 +38,7 @@ type BufPos = Int
data StringLexError = StringLexError LexErr BufPos
lexString :: Int -> StringBuffer -> Either StringLexError String
-lexString = lexStringWith processChars
+lexString = lexStringWith processChars processChars
where
processChars :: HasChar c => [c] -> Either (c, LexErr) [c]
processChars =
@@ -68,19 +66,24 @@ So what we'll do is do two passes. The first pass is optimistic; just convert
to a plain String and process it. If this results in an error, we do a second
pass, this time where each character is annotated with its position. Now, the
error has all the information it needs.
+
+Ideally, lexStringWith would take a single (forall c. HasChar c => ...) function,
+but to help the specializer, we pass it in twice to concretize it for the two
+types we actually use.
-}
-- | See Note [Lexing strings]
lexStringWith ::
- (forall c. HasChar c => [c] -> Either (c, LexErr) [c])
+ ([Char] -> Either (Char, LexErr) [Char])
+ -> ([CharPos] -> Either (CharPos, LexErr) [CharPos])
-> Int
-> StringBuffer
-> Either StringLexError String
-lexStringWith processChars len buf =
+lexStringWith processChars processCharsPos len buf =
case processChars $ bufferChars buf len of
Right s -> Right s
Left _ ->
- case processChars $ bufferLocatedChars buf len of
+ case processCharsPos $ bufferLocatedChars buf len of
Right _ -> panic "expected lex error on second pass"
Left ((_, pos), e) -> Left $ StringLexError e pos
@@ -103,7 +106,9 @@ pattern Char c <- (getChar -> c)
bufferChars :: StringBuffer -> Int -> [Char]
bufferChars = StringBuffer.lexemeToString
-bufferLocatedChars :: StringBuffer -> Int -> [(Char, BufPos)]
+type CharPos = (Char, BufPos)
+
+bufferLocatedChars :: StringBuffer -> Int -> [CharPos]
bufferLocatedChars initialBuf len = go initialBuf
where
go buf
@@ -133,17 +138,16 @@ collapseGaps = go
[] -> panic "gap unexpectedly ended"
resolveEscapes :: HasChar c => [c] -> Either (c, LexErr) [c]
-resolveEscapes = go Seq.empty
+resolveEscapes = go dlistEmpty
where
- -- FIXME.bchinn: see if dlist/reverselist improves performance
go !acc = \case
- [] -> pure $ Seq.toList acc
+ [] -> pure $ dlistToList acc
Char '\\' : Char '&' : cs -> go acc cs
backslash@(Char '\\') : cs ->
case resolveEscapeChar cs of
- Right (esc, cs') -> go (acc Seq.|> setChar esc backslash) cs'
+ Right (esc, cs') -> go (acc `dlistSnoc` setChar esc backslash) cs'
Left (c, e) -> Left (c, e)
- c : cs -> go (acc Seq.|> c) cs
+ c : cs -> go (acc `dlistSnoc` c) cs
-- -----------------------------------------------------------------------------
-- Escape characters
@@ -252,7 +256,7 @@ isSingleSmartQuote = \case
-- and rejoining lines, and instead manually find newline characters,
-- for performance.
lexMultilineString :: Int -> StringBuffer -> Either StringLexError String
-lexMultilineString = lexStringWith processChars
+lexMultilineString = lexStringWith processChars processChars
where
processChars :: HasChar c => [c] -> Either (c, LexErr) [c]
processChars =
@@ -373,3 +377,17 @@ It's more precisely defined with the following algorithm:
* Lines with only whitespace characters
3. Calculate the longest prefix of whitespace shared by all lines in the remaining list
-}
+
+-- -----------------------------------------------------------------------------
+-- DList
+
+newtype DList a = DList ([a] -> [a])
+
+dlistEmpty :: DList a
+dlistEmpty = DList id
+
+dlistToList :: DList a -> [a]
+dlistToList (DList f) = f []
+
+dlistSnoc :: DList a -> a -> DList a
+dlistSnoc (DList f) x = DList (f . (x :))
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/d70e41ae439b6a6f2967b43d977b8af6a8febe63...4607c624e4cf4cf27ba9a9c443b2cd203a43b883
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/d70e41ae439b6a6f2967b43d977b8af6a8febe63...4607c624e4cf4cf27ba9a9c443b2cd203a43b883
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240918/00d15070/attachment-0001.html>
More information about the ghc-commits
mailing list