[commit: ghc] master: Reimplement minusList using Set (5d9378e)

git at git.haskell.org git at git.haskell.org
Tue Mar 14 23:54:28 UTC 2017


Repository : ssh://git@git.haskell.org/ghc

On branch  : master
Link       : http://ghc.haskell.org/trac/ghc/changeset/5d9378efdaa9aafaac2c9180c7156bca31297c30/ghc

>---------------------------------------------------------------

commit 5d9378efdaa9aafaac2c9180c7156bca31297c30
Author: David Feuer <david.feuer at gmail.com>
Date:   Tue Mar 14 19:53:39 2017 -0400

    Reimplement minusList using Set
    
    `minusList ms ns` was `O(m*n)`. Now it's `O((m + n) log n)`, which
    should be a bit better.
    
    Reviewers: austin, bgamari, mpickering
    
    Reviewed By: mpickering
    
    Subscribers: mpickering, rwbarton, thomie
    
    Differential Revision: https://phabricator.haskell.org/D3341


>---------------------------------------------------------------

5d9378efdaa9aafaac2c9180c7156bca31297c30
 compiler/coreSyn/CoreSyn.hs  | 14 ++++++++++++++
 compiler/utils/ListSetOps.hs | 30 +++++++++++++++++++++++++++---
 2 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/compiler/coreSyn/CoreSyn.hs b/compiler/coreSyn/CoreSyn.hs
index a51ec69..ad504ac 100644
--- a/compiler/coreSyn/CoreSyn.hs
+++ b/compiler/coreSyn/CoreSyn.hs
@@ -307,6 +307,20 @@ data AltCon
   | DEFAULT           -- ^ Trivial alternative: @case e of { _ -> ... }@
    deriving (Eq, Data)
 
+-- This instance is a bit shady. It can only be used to compare AltCons for
+-- a single type constructor. Fortunately, it seems quite unlikely that we'll
+-- ever need to compare AltCons for different type constructors.
+instance Ord AltCon where
+  compare (DataAlt con1) (DataAlt con2) =
+    ASSERT( dataConTyCon con1 == dataConTyCon con2 )
+    compare (dataConTag con1) (dataConTag con2)
+  compare (DataAlt _) _ = LT
+  compare _ (DataAlt _) = GT
+  compare (LitAlt l1) (LitAlt l2) = compare l1 l2
+  compare (LitAlt _) DEFAULT = LT
+  compare DEFAULT DEFAULT = EQ
+  compare DEFAULT _ = GT
+
 -- | Binding, used for top level bindings in a module and local bindings in a @let at .
 
 -- If you edit this type, you may need to update the GHC formalism
diff --git a/compiler/utils/ListSetOps.hs b/compiler/utils/ListSetOps.hs
index e5315dd..88af48e 100644
--- a/compiler/utils/ListSetOps.hs
+++ b/compiler/utils/ListSetOps.hs
@@ -27,6 +27,7 @@ import Outputable
 import Util
 
 import Data.List
+import qualified Data.Set as S
 
 getNth :: Outputable a => [a] -> Int -> a
 getNth xs n = ASSERT2( xs `lengthExceeds` n, ppr n $$ ppr xs )
@@ -48,9 +49,32 @@ unionLists xs ys
   = WARN(length xs > 100 || length ys > 100, ppr xs $$ ppr ys)
     [x | x <- xs, isn'tIn "unionLists" x ys] ++ ys
 
-minusList :: (Eq a) => [a] -> [a] -> [a]
--- Everything in the first list that is not in the second list:
-minusList xs ys = [ x | x <- xs, isn'tIn "minusList" x ys]
+-- | Calculate the set difference of two lists. This is
+-- /O((m + n) log n)/, where we subtract a list of /n/ elements
+-- from a list of /m/ elements.
+--
+-- Extremely short cases are handled specially:
+-- When /m/ or /n/ is 0, this takes /O(1)/ time. When /m/ is 1,
+-- it takes /O(n)/ time.
+minusList :: Ord a => [a] -> [a] -> [a]
+-- There's no point building a set to perform just one lookup, so we handle
+-- extremely short lists specially. It might actually be better to use
+-- an O(m*n) algorithm when m is a little longer (perhaps up to 4 or even 5).
+-- The tipping point will be somewhere in the area of where /m/ and /log n/
+-- become comparable, but we probably don't want to work too hard on this.
+minusList [] _ = []
+minusList xs@[x] ys
+  | x `elem` ys = []
+  | otherwise = xs
+-- Using an empty set or a singleton would also be silly, so let's not.
+minusList xs [] = xs
+minusList xs [y] = filter (/= y) xs
+-- When each list has at least two elements, we build a set from the
+-- second argument, allowing us to filter the first argument fairly
+-- efficiently.
+minusList xs ys = filter (`S.notMember` yss) xs
+  where
+    yss = S.fromList ys
 
 {-
 ************************************************************************



More information about the ghc-commits mailing list