[commit: ghc] master: Reimplement minusList using Set (5d9378e)
git at git.haskell.org
git at git.haskell.org
Tue Mar 14 23:54:28 UTC 2017
Repository : ssh://git@git.haskell.org/ghc
On branch : master
Link : http://ghc.haskell.org/trac/ghc/changeset/5d9378efdaa9aafaac2c9180c7156bca31297c30/ghc
>---------------------------------------------------------------
commit 5d9378efdaa9aafaac2c9180c7156bca31297c30
Author: David Feuer <david.feuer at gmail.com>
Date: Tue Mar 14 19:53:39 2017 -0400
Reimplement minusList using Set
`minusList ms ns` was `O(m*n)`. Now it's `O((m + n) log n)`, which
should be a bit better.
Reviewers: austin, bgamari, mpickering
Reviewed By: mpickering
Subscribers: mpickering, rwbarton, thomie
Differential Revision: https://phabricator.haskell.org/D3341
>---------------------------------------------------------------
5d9378efdaa9aafaac2c9180c7156bca31297c30
compiler/coreSyn/CoreSyn.hs | 14 ++++++++++++++
compiler/utils/ListSetOps.hs | 30 +++++++++++++++++++++++++++---
2 files changed, 41 insertions(+), 3 deletions(-)
diff --git a/compiler/coreSyn/CoreSyn.hs b/compiler/coreSyn/CoreSyn.hs
index a51ec69..ad504ac 100644
--- a/compiler/coreSyn/CoreSyn.hs
+++ b/compiler/coreSyn/CoreSyn.hs
@@ -307,6 +307,20 @@ data AltCon
| DEFAULT -- ^ Trivial alternative: @case e of { _ -> ... }@
deriving (Eq, Data)
+-- This instance is a bit shady. It can only be used to compare AltCons for
+-- a single type constructor. Fortunately, it seems quite unlikely that we'll
+-- ever need to compare AltCons for different type constructors.
+instance Ord AltCon where
+ compare (DataAlt con1) (DataAlt con2) =
+ ASSERT( dataConTyCon con1 == dataConTyCon con2 )
+ compare (dataConTag con1) (dataConTag con2)
+ compare (DataAlt _) _ = LT
+ compare _ (DataAlt _) = GT
+ compare (LitAlt l1) (LitAlt l2) = compare l1 l2
+ compare (LitAlt _) DEFAULT = LT
+ compare DEFAULT DEFAULT = EQ
+ compare DEFAULT _ = GT
+
-- | Binding, used for top level bindings in a module and local bindings in a @let at .
-- If you edit this type, you may need to update the GHC formalism
diff --git a/compiler/utils/ListSetOps.hs b/compiler/utils/ListSetOps.hs
index e5315dd..88af48e 100644
--- a/compiler/utils/ListSetOps.hs
+++ b/compiler/utils/ListSetOps.hs
@@ -27,6 +27,7 @@ import Outputable
import Util
import Data.List
+import qualified Data.Set as S
getNth :: Outputable a => [a] -> Int -> a
getNth xs n = ASSERT2( xs `lengthExceeds` n, ppr n $$ ppr xs )
@@ -48,9 +49,32 @@ unionLists xs ys
= WARN(length xs > 100 || length ys > 100, ppr xs $$ ppr ys)
[x | x <- xs, isn'tIn "unionLists" x ys] ++ ys
-minusList :: (Eq a) => [a] -> [a] -> [a]
--- Everything in the first list that is not in the second list:
-minusList xs ys = [ x | x <- xs, isn'tIn "minusList" x ys]
+-- | Calculate the set difference of two lists. This is
+-- /O((m + n) log n)/, where we subtract a list of /n/ elements
+-- from a list of /m/ elements.
+--
+-- Extremely short cases are handled specially:
+-- When /m/ or /n/ is 0, this takes /O(1)/ time. When /m/ is 1,
+-- it takes /O(n)/ time.
+minusList :: Ord a => [a] -> [a] -> [a]
+-- There's no point building a set to perform just one lookup, so we handle
+-- extremely short lists specially. It might actually be better to use
+-- an O(m*n) algorithm when m is a little longer (perhaps up to 4 or even 5).
+-- The tipping point will be somewhere in the area of where /m/ and /log n/
+-- become comparable, but we probably don't want to work too hard on this.
+minusList [] _ = []
+minusList xs@[x] ys
+ | x `elem` ys = []
+ | otherwise = xs
+-- Using an empty set or a singleton would also be silly, so let's not.
+minusList xs [] = xs
+minusList xs [y] = filter (/= y) xs
+-- When each list has at least two elements, we build a set from the
+-- second argument, allowing us to filter the first argument fairly
+-- efficiently.
+minusList xs ys = filter (`S.notMember` yss) xs
+ where
+ yss = S.fromList ys
{-
************************************************************************
More information about the ghc-commits
mailing list