[Git][ghc/ghc][wip/T25753] compiler: Eliminate quadratic behavior of command-line parsing

Ben Gamari (@bgamari) gitlab at gitlab.haskell.org
Tue Feb 18 22:40:38 UTC 2025



Ben Gamari pushed to branch wip/T25753 at Glasgow Haskell Compiler / GHC


Commits:
796c5a95 by Ben Gamari at 2025-02-18T17:40:31-05:00
compiler: Eliminate quadratic behavior of command-line parsing

Previously `GHC.Driver.CmdLine.findArg` would be called for each
command-line argument, checking each against the all of the known flags
with `Data.List.stripSuffix`. Fix this by instead building a trie of
known flags for matching.

Fixes #25763.

- - - - -


3 changed files:

- + compiler/GHC/Data/StringTrie.hs
- compiler/GHC/Driver/CmdLine.hs
- compiler/ghc.cabal.in


Changes:

=====================================
compiler/GHC/Data/StringTrie.hs
=====================================
@@ -0,0 +1,62 @@
+module GHC.Data.StringTrie
+  ( StringTrie
+  , empty
+  , insert
+  , fromList
+  , lookup
+  , lookup'
+  ) where
+
+import GHC.Prelude hiding (lookup)
+
+import Data.Maybe (fromMaybe)
+import qualified Data.Map.Strict as M
+
+data StringTrie a
+    = STNode !(Maybe a) !(M.Map Char (StringTrie a))
+
+empty :: StringTrie a
+empty = STNode Nothing M.empty
+
+insert :: String -> a -> StringTrie a -> StringTrie a
+insert k0 v = go k0
+  where
+    --go :: String -> StringTrie a -> StringTrie a
+    go [] (STNode _ children) = STNode (Just v) children
+    go (x:xs) (STNode end children) =
+        STNode end (M.insert x child children)
+      where
+        child = go xs (fromMaybe empty $ M.lookup x children)
+
+lookup :: String -> StringTrie a -> Maybe (a, String)
+lookup = lookup' (\_ _ -> True)
+
+-- | Returns the value associated with the longest match prefix as well
+-- as the non-matching suffix.
+lookup'
+    :: (a -> String -> Bool)
+       -- ^ a predicate on the value and non-matching suffix
+       -- filtering acceptable matches
+    -> String
+       -- ^ key to llookup
+    -> StringTrie a
+    -> Maybe (a, String)
+lookup' pred = go Nothing
+  where
+    --go :: Maybe (a, String) -> String -> StringTrie a -> Maybe (a, String)
+    go prev []       (STNode (Just v) _)   = Just (v, "")
+    go prev []       (STNode Nothing  _)   = prev
+    go prev match@(c:rest) (STNode end children) =
+      case M.lookup c children of
+        Nothing    -> prev'
+        Just child -> go prev' rest child
+      where
+        prev' = case end of
+                  Just v | pred v match -> Just (v, match)
+                  _ -> prev
+
+fromList :: [(String, a)] -> StringTrie a
+fromList = foldl' (\trie (k,v) -> insert k v trie) empty
+
+x :: StringTrie String
+x = fromList [ (k,k) | k <- ["hello", "hell", "world"] ]


=====================================
compiler/GHC/Driver/CmdLine.hs
=====================================
@@ -1,3 +1,4 @@
+{-# LANGUAGE BangPatterns #-}
 {-# LANGUAGE DeriveFunctor #-}
 {-# LANGUAGE RankNTypes #-}
 
@@ -27,6 +28,7 @@ import GHC.Prelude
 import GHC.Utils.Misc
 import GHC.Utils.Panic
 import GHC.Data.Bag
+import qualified GHC.Data.StringTrie as StringTrie
 import GHC.Types.SrcLoc
 import GHC.Types.Error
 import GHC.Utils.Error
@@ -34,8 +36,7 @@ import GHC.Driver.Errors.Types
 import GHC.Driver.Errors.Ppr () -- instance Diagnostic DriverMessage
 import GHC.Utils.Outputable (text)
 
-import Data.Function
-import Data.List (sortBy, intercalate, stripPrefix)
+import Data.List (intercalate)
 import Data.Word
 
 import GHC.ResponseFile
@@ -182,6 +183,8 @@ processArgs spec args handleRespFile = do
     (errs, warns, spare) <- runEwM action
     return (spare, bagToList errs, warns)
   where
+    !trie = StringTrie.fromList [ (flagName flag, flag) | flag <- spec ]
+
     action = process args []
 
     -- process :: [Located String] -> [Located String] -> EwM m [Located String]
@@ -192,7 +195,7 @@ processArgs spec args handleRespFile = do
         process (resp_args ++ args) spare
 
     process (locArg@(L _ ('-' : arg)) : args) spare =
-        case findArg spec arg of
+        case findArg trie arg of
             Just (rest, opt_kind) ->
                 case processOneArg opt_kind rest arg args of
                     Left err ->
@@ -249,31 +252,27 @@ processOneArg opt_kind rest arg args
         OptPrefix f       -> Right (f rest_no_eq, args)
         AnySuffix f       -> Right (f dash_arg, args)
 
-findArg :: [Flag m] -> String -> Maybe (String, OptKind m)
-findArg spec arg =
-    case sortBy (compare `on` (length . fst)) -- prefer longest matching flag
-           [ (removeSpaces rest, optKind)
-           | flag <- spec,
-             let optKind  = flagOptKind flag,
-             Just rest <- [stripPrefix (flagName flag) arg],
-             arg_ok optKind rest arg ]
-    of
-        []      -> Nothing
-        (one:_) -> Just one
-
-arg_ok :: OptKind t -> [Char] -> String -> Bool
-arg_ok (NoArg           _)  rest _   = null rest
-arg_ok (HasArg          _)  _    _   = True
-arg_ok (SepArg          _)  rest _   = null rest
-arg_ok (Prefix          _)  _    _   = True -- Missing argument checked for in processOneArg t
-                                            -- to improve error message (#12625)
-arg_ok (OptIntSuffix    _)  _    _   = True
-arg_ok (IntSuffix       _)  _    _   = True
-arg_ok (Word64Suffix    _)  _    _   = True
-arg_ok (FloatSuffix     _)  _    _   = True
-arg_ok (OptPrefix       _)  _    _   = True
-arg_ok (PassFlag        _)  rest _   = null rest
-arg_ok (AnySuffix       _)  _    _   = True
+findArg :: StringTrie.StringTrie (Flag m) -> String -> Maybe (String, OptKind m)
+findArg trie arg =
+    case StringTrie.lookup' okay arg trie of
+        Nothing           -> Nothing
+        Just (flag, rest) -> Just (removeSpaces rest, flagOptKind flag)
+  where
+    okay :: Flag m -> String -> Bool
+    okay flag rest =
+      case flagOptKind flag of
+        NoArg _        -> null rest
+        HasArg _       -> True
+        SepArg _       -> null rest
+        Prefix _       -> True -- Missing argument checked for in processOneArg t
+                               -- to improve error message (#12625)
+        OptIntSuffix _ -> True
+        IntSuffix _    -> True
+        Word64Suffix _ -> True
+        FloatSuffix _  -> True
+        OptPrefix _    -> True
+        PassFlag _     -> null rest
+        AnySuffix _    -> True
 
 -- | Parse an Int
 --


=====================================
compiler/ghc.cabal.in
=====================================
@@ -464,6 +464,7 @@ Library
         GHC.Data.Stream
         GHC.Data.Strict
         GHC.Data.StringBuffer
+        GHC.Data.StringTrie
         GHC.Data.TrieMap
         GHC.Data.Unboxed
         GHC.Data.UnionFind



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/796c5a95d40a62ddcc749364f8af31d72ed7c2bf

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/796c5a95d40a62ddcc749364f8af31d72ed7c2bf
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20250218/4578e3a3/attachment-0001.html>


More information about the ghc-commits mailing list