[Git][ghc/ghc][wip/three-way-merge-sort] Improve performance of Data.List.sort(By)
Jade (@Jade)
gitlab at gitlab.haskell.org
Thu May 9 20:59:02 UTC 2024
Jade pushed to branch wip/three-way-merge-sort at Glasgow Haskell Compiler / GHC
Commits:
d9c303e3 by Jade at 2024-05-09T23:04:02+02:00
Improve performance of Data.List.sort(By)
This patch improves the algorithm to sort lists in base.
It does so using two strategies:
1) Use a four-way-merge instead of the 'default' two-way-merge.
This is able to save comparisons and allocations.
2) Use `(>) a b` over `compare a b == GT` and allow inlining and specialization.
This mainly benefits types with a fast (>).
Note that this *may* break instances with a *malformed* Ord instance
where `a > b` is *not* equal to `compare a b == GT`.
CLC proposal: https://github.com/haskell/core-libraries-committee/issues/236
Fixes #24280
-------------------------
Metric Decrease:
MultiLayerModulesTH_Make
T10421
T13719
T15164
T18698a
T18698b
T1969
T9872a
T9961
T18730
WWRec
-------------------------
- - - - -
5 changed files:
- libraries/base/changelog.md
- libraries/ghc-internal/src/GHC/Internal/Data/OldList.hs
- + testsuite/tests/lib/base/Sort.hs
- + testsuite/tests/lib/base/Sort.stdout
- testsuite/tests/lib/base/all.T
Changes:
=====================================
libraries/base/changelog.md
=====================================
@@ -4,6 +4,7 @@
* Add the `MonadFix` instance for `(,) a`, similar to the one for `Writer a` ([CLC proposal #238](https://github.com/haskell/core-libraries-committee/issues/238))
* Improve `toInteger :: Word32 -> Integer` on 64-bit platforms ([CLC proposal #259](https://github.com/haskell/core-libraries-committee/issues/259))
* Make `read` accept binary integer notation ([CLC proposal #177](https://github.com/haskell/core-libraries-committee/issues/177))
+ * Improve the performance of `Data.List.sort` using an improved merging strategy. Instead of `compare`, `sort` now uses `(>)` which may break *malformed* `Ord` instances ([CLC proposal #236](https://github.com/haskell/core-libraries-committee/issues/236))
## 4.20.0.0 *TBA*
* Deprecate `GHC.Pack` ([#21461](https://gitlab.haskell.org/ghc/ghc/-/issues/21461))
=====================================
libraries/ghc-internal/src/GHC/Internal/Data/OldList.hs
=====================================
@@ -1640,37 +1640,98 @@ and possibly to bear similarities to a 1982 paper by Richard O'Keefe:
Benchmarks show it to be often 2x the speed of the previous implementation.
Fixes ticket https://gitlab.haskell.org/ghc/ghc/issues/2143
+
+Further improved using a four-way merge, with an additional performance increase of ~20%
+https://gitlab.haskell.org/ghc/ghc/issues/24280
-}
-sort = sortBy compare
-sortBy cmp = mergeAll . sequences
+{-# INLINEABLE sort #-} -- allows specialization for the ord instance
+sort = actualSort (>)
+
+{-# INLINEABLE sortBy #-}
+sortBy cmp = actualSort (\x y -> cmp x y == GT)
+
+actualSort :: (a -> a -> Bool) -> [a] -> [a]
+actualSort gt ns
+ | [] <- ns = []
+ | [a] <- ns = [a]
+ | [a,b] <- ns = merge [a] [b]
+ | [a,b,c] <- ns = merge3 [a] [b] [c]
+ | [a,b,c,d] <- ns = merge4 [a] [b] [c] [d]
+ | otherwise = merge_all (sequences ns)
where
sequences (a:b:xs)
- | a `cmp` b == GT = descending b [a] xs
- | otherwise = ascending b (a:) xs
+ | a `gt` b = descending b [a] xs
+ | otherwise = ascending b (a:) xs
sequences xs = [xs]
descending a as (b:bs)
- | a `cmp` b == GT = descending b (a:as) bs
- descending a as bs = (a:as): sequences bs
+ | a `gt` b = descending b (a:as) bs
+ descending a as bs = (a:as): sequences bs
ascending a as (b:bs)
- | a `cmp` b /= GT = ascending b (\ys -> as (a:ys)) bs
- ascending a as bs = let !x = as [a]
- in x : sequences bs
-
- mergeAll [x] = x
- mergeAll xs = mergeAll (mergePairs xs)
-
- mergePairs (a:b:xs) = let !x = merge a b
- in x : mergePairs xs
- mergePairs xs = xs
+ | not (a `gt` b) = ascending b (\ys -> as (a:ys)) bs
+ ascending a as bs = let !x = as [a]
+ in x : sequences bs
+
+ merge_all [x] = x
+ merge_all xs = merge_all (reduce_once xs)
+
+ reduce_once [] = []
+ reduce_once [a] = [a]
+ reduce_once [a,b] = [merge a b]
+ reduce_once [a,b,c] = [merge3 a b c]
+ reduce_once [a,b,c,d,e] = [merge a b, merge3 c d e]
+ reduce_once [a,b,c,d,e,f] = [merge3 a b c, merge3 d e f]
+ reduce_once (a:b:c:d:xs) = let !x = merge4 a b c d
+ in x : reduce_once xs
merge as@(a:as') bs@(b:bs')
- | a `cmp` b == GT = b:merge as bs'
- | otherwise = a:merge as' bs
- merge [] bs = bs
- merge as [] = as
+ | a `gt` b = b : merge as bs'
+ | otherwise = a : merge as' bs
+ merge [] bs = bs
+ merge as [] = as
+
+ -- `merge3` is a manually fused version of `merge (merge as bs) cs`
+ merge3 as@(a:as') bs@(b:bs') cs
+ | a `gt` b = merge3X b as bs' cs
+ | otherwise = merge3X a as' bs cs
+ merge3 [] bs cs = merge bs cs
+ merge3 as [] cs = merge as cs
+
+ merge3X x as bs cs@(c:cs')
+ | x `gt` c = c : merge3X x as bs cs'
+ | otherwise = x : merge3 as bs cs
+ merge3X x as bs [] = x : merge as bs
+
+ merge3Y as@(a:as') y bs cs
+ | a `gt` y = y : merge3 as bs cs
+ | otherwise = a : merge3Y as' y bs cs
+ merge3Y [] x bs cs = x : merge bs cs
+
+ -- `merge4 as bs cs ds` is (essentially) a manually fused version of
+ -- `merge (merge as bs) (merge cs ds)`
+ merge4 as@(a:as') bs@(b:bs') cs ds
+ | a `gt` b = merge4X b as bs' cs ds
+ | otherwise = merge4X a as' bs cs ds
+ merge4 [] bs cs ds = merge3 bs cs ds
+ merge4 as [] cs ds = merge3 as cs ds
+
+ merge4X x as bs cs@(c:cs') ds@(d:ds')
+ | c `gt` d = merge4XY x as bs d cs ds'
+ | otherwise = merge4XY x as bs c cs' ds
+ merge4X x as bs [] ds = merge3X x as bs ds
+ merge4X x as bs cs [] = merge3X x as bs cs
+
+ merge4Y as@(a:as') bs@(b:bs') y cs ds
+ | a `gt` b = merge4XY b as bs' y cs ds
+ | otherwise = merge4XY a as' bs y cs ds
+ merge4Y as [] y cs ds = merge3Y as y cs ds
+ merge4Y [] bs y cs ds = merge3Y bs y cs ds
+
+ merge4XY x as bs y cs ds
+ | x `gt` y = y : merge4X x as bs cs ds
+ | otherwise = x : merge4Y as bs y cs ds
{-
sortBy cmp l = mergesort cmp l
=====================================
testsuite/tests/lib/base/Sort.hs
=====================================
@@ -0,0 +1,18 @@
+module Main where
+
+import Data.List (sort)
+import Data.Semigroup (Arg(..))
+
+main :: IO ()
+main = do
+ -- correctness
+ test @Int []
+ test [0]
+ test [8, 0, 2, 3, 6, 1, 5, 10, 4, 7, 9]
+
+ -- stability
+ test [Arg 1 0, Arg 0 0, Arg 0 1, Arg 1 1, Arg 0 2]
+ test [Arg 0 0, Arg 0 1, Arg 0 2]
+
+test :: (Ord a, Show a) => [a] -> IO ()
+test = print . sort
=====================================
testsuite/tests/lib/base/Sort.stdout
=====================================
@@ -0,0 +1,5 @@
+[]
+[0]
+[0,1,2,3,4,5,6,7,8,9,10]
+[Arg 0 0,Arg 0 1,Arg 0 2,Arg 1 0,Arg 1 1]
+[Arg 0 0,Arg 0 1,Arg 0 2]
=====================================
testsuite/tests/lib/base/all.T
=====================================
@@ -11,3 +11,4 @@ test('Monoid_ByteArray', normal, compile_and_run, [''])
test('Unsnoc', normal, compile_and_run, [''])
test('First-Semigroup-sconcat', normal, compile_and_run, [''])
test('First-Monoid-sconcat', normal, compile_and_run, [''])
+test('Sort', normal, compile_and_run, [''])
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/d9c303e3d0122b44ed5143ac2253535cc0b662b9
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/d9c303e3d0122b44ed5143ac2253535cc0b662b9
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240509/74bbf163/attachment-0001.html>
More information about the ghc-commits
mailing list