[Git][ghc/ghc][wip/T25753] compiler: Eliminate quadratic behavior of command-line parsing
Ben Gamari (@bgamari)
gitlab at gitlab.haskell.org
Tue Feb 18 22:40:38 UTC 2025
Ben Gamari pushed to branch wip/T25753 at Glasgow Haskell Compiler / GHC
Commits:
796c5a95 by Ben Gamari at 2025-02-18T17:40:31-05:00
compiler: Eliminate quadratic behavior of command-line parsing
Previously `GHC.Driver.CmdLine.findArg` would be called for each
command-line argument, checking each against the all of the known flags
with `Data.List.stripSuffix`. Fix this by instead building a trie of
known flags for matching.
Fixes #25763.
- - - - -
3 changed files:
- + compiler/GHC/Data/StringTrie.hs
- compiler/GHC/Driver/CmdLine.hs
- compiler/ghc.cabal.in
Changes:
=====================================
compiler/GHC/Data/StringTrie.hs
=====================================
@@ -0,0 +1,62 @@
+module GHC.Data.StringTrie
+ ( StringTrie
+ , empty
+ , insert
+ , fromList
+ , lookup
+ , lookup'
+ ) where
+
+import GHC.Prelude hiding (lookup)
+
+import Data.Maybe (fromMaybe)
+import qualified Data.Map.Strict as M
+
+data StringTrie a
+ = STNode !(Maybe a) !(M.Map Char (StringTrie a))
+
+empty :: StringTrie a
+empty = STNode Nothing M.empty
+
+insert :: String -> a -> StringTrie a -> StringTrie a
+insert k0 v = go k0
+ where
+ --go :: String -> StringTrie a -> StringTrie a
+ go [] (STNode _ children) = STNode (Just v) children
+ go (x:xs) (STNode end children) =
+ STNode end (M.insert x child children)
+ where
+ child = go xs (fromMaybe empty $ M.lookup x children)
+
+lookup :: String -> StringTrie a -> Maybe (a, String)
+lookup = lookup' (\_ _ -> True)
+
+-- | Returns the value associated with the longest match prefix as well
+-- as the non-matching suffix.
+lookup'
+ :: (a -> String -> Bool)
+ -- ^ a predicate on the value and non-matching suffix
+ -- filtering acceptable matches
+ -> String
+ -- ^ key to llookup
+ -> StringTrie a
+ -> Maybe (a, String)
+lookup' pred = go Nothing
+ where
+ --go :: Maybe (a, String) -> String -> StringTrie a -> Maybe (a, String)
+ go prev [] (STNode (Just v) _) = Just (v, "")
+ go prev [] (STNode Nothing _) = prev
+ go prev match@(c:rest) (STNode end children) =
+ case M.lookup c children of
+ Nothing -> prev'
+ Just child -> go prev' rest child
+ where
+ prev' = case end of
+ Just v | pred v match -> Just (v, match)
+ _ -> prev
+
+fromList :: [(String, a)] -> StringTrie a
+fromList = foldl' (\trie (k,v) -> insert k v trie) empty
+
+x :: StringTrie String
+x = fromList [ (k,k) | k <- ["hello", "hell", "world"] ]
=====================================
compiler/GHC/Driver/CmdLine.hs
=====================================
@@ -1,3 +1,4 @@
+{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE RankNTypes #-}
@@ -27,6 +28,7 @@ import GHC.Prelude
import GHC.Utils.Misc
import GHC.Utils.Panic
import GHC.Data.Bag
+import qualified GHC.Data.StringTrie as StringTrie
import GHC.Types.SrcLoc
import GHC.Types.Error
import GHC.Utils.Error
@@ -34,8 +36,7 @@ import GHC.Driver.Errors.Types
import GHC.Driver.Errors.Ppr () -- instance Diagnostic DriverMessage
import GHC.Utils.Outputable (text)
-import Data.Function
-import Data.List (sortBy, intercalate, stripPrefix)
+import Data.List (intercalate)
import Data.Word
import GHC.ResponseFile
@@ -182,6 +183,8 @@ processArgs spec args handleRespFile = do
(errs, warns, spare) <- runEwM action
return (spare, bagToList errs, warns)
where
+ !trie = StringTrie.fromList [ (flagName flag, flag) | flag <- spec ]
+
action = process args []
-- process :: [Located String] -> [Located String] -> EwM m [Located String]
@@ -192,7 +195,7 @@ processArgs spec args handleRespFile = do
process (resp_args ++ args) spare
process (locArg@(L _ ('-' : arg)) : args) spare =
- case findArg spec arg of
+ case findArg trie arg of
Just (rest, opt_kind) ->
case processOneArg opt_kind rest arg args of
Left err ->
@@ -249,31 +252,27 @@ processOneArg opt_kind rest arg args
OptPrefix f -> Right (f rest_no_eq, args)
AnySuffix f -> Right (f dash_arg, args)
-findArg :: [Flag m] -> String -> Maybe (String, OptKind m)
-findArg spec arg =
- case sortBy (compare `on` (length . fst)) -- prefer longest matching flag
- [ (removeSpaces rest, optKind)
- | flag <- spec,
- let optKind = flagOptKind flag,
- Just rest <- [stripPrefix (flagName flag) arg],
- arg_ok optKind rest arg ]
- of
- [] -> Nothing
- (one:_) -> Just one
-
-arg_ok :: OptKind t -> [Char] -> String -> Bool
-arg_ok (NoArg _) rest _ = null rest
-arg_ok (HasArg _) _ _ = True
-arg_ok (SepArg _) rest _ = null rest
-arg_ok (Prefix _) _ _ = True -- Missing argument checked for in processOneArg t
- -- to improve error message (#12625)
-arg_ok (OptIntSuffix _) _ _ = True
-arg_ok (IntSuffix _) _ _ = True
-arg_ok (Word64Suffix _) _ _ = True
-arg_ok (FloatSuffix _) _ _ = True
-arg_ok (OptPrefix _) _ _ = True
-arg_ok (PassFlag _) rest _ = null rest
-arg_ok (AnySuffix _) _ _ = True
+findArg :: StringTrie.StringTrie (Flag m) -> String -> Maybe (String, OptKind m)
+findArg trie arg =
+ case StringTrie.lookup' okay arg trie of
+ Nothing -> Nothing
+ Just (flag, rest) -> Just (removeSpaces rest, flagOptKind flag)
+ where
+ okay :: Flag m -> String -> Bool
+ okay flag rest =
+ case flagOptKind flag of
+ NoArg _ -> null rest
+ HasArg _ -> True
+ SepArg _ -> null rest
+ Prefix _ -> True -- Missing argument checked for in processOneArg t
+ -- to improve error message (#12625)
+ OptIntSuffix _ -> True
+ IntSuffix _ -> True
+ Word64Suffix _ -> True
+ FloatSuffix _ -> True
+ OptPrefix _ -> True
+ PassFlag _ -> null rest
+ AnySuffix _ -> True
-- | Parse an Int
--
=====================================
compiler/ghc.cabal.in
=====================================
@@ -464,6 +464,7 @@ Library
GHC.Data.Stream
GHC.Data.Strict
GHC.Data.StringBuffer
+ GHC.Data.StringTrie
GHC.Data.TrieMap
GHC.Data.Unboxed
GHC.Data.UnionFind
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/796c5a95d40a62ddcc749364f8af31d72ed7c2bf
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/796c5a95d40a62ddcc749364f8af31d72ed7c2bf
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20250218/4578e3a3/attachment-0001.html>
More information about the ghc-commits
mailing list