[Git][ghc/ghc][wip/marge_bot_batch_merge_job] 2 commits: Improve performance of Data.List.sort(By)

Marge Bot (@marge-bot) gitlab at gitlab.haskell.org
Fri May 10 21:17:17 UTC 2024



Marge Bot pushed to branch wip/marge_bot_batch_merge_job at Glasgow Haskell Compiler / GHC


Commits:
36679493 by Jade at 2024-05-10T17:17:08-04:00
Improve performance of Data.List.sort(By)

This patch improves the algorithm to sort lists in base.
It does so using two strategies:

1) Use a four-way-merge instead of the 'default' two-way-merge.
This is able to save comparisons and allocations.

2) Use `(>) a b` over `compare a b == GT` and allow inlining and specialization.
This mainly benefits types with a fast (>).

Note that this *may* break instances with a *malformed* Ord instance
where `a > b` is *not* equal to `compare a b == GT`.

CLC proposal: https://github.com/haskell/core-libraries-committee/issues/236

Fixes #24280

-------------------------
Metric Decrease:
    MultiLayerModulesTH_Make
    T10421
    T13719
    T15164
    T18698a
    T18698b
    T1969
    T9872a
    T9961
    T18730
    WWRec
    T12425
    T15703
-------------------------

- - - - -
6a9e55d1 by Matthew Pickering at 2024-05-10T17:17:10-04:00
Revert "ghcup-metadata: Drop output_name field"

This reverts commit ecbf22a6ac397a791204590f94c0afa82e29e79f.

This breaks the ghcup metadata generation on the nightly jobs.

- - - - -


6 changed files:

- .gitlab/rel_eng/mk-ghcup-metadata/mk_ghcup_metadata.py
- libraries/base/changelog.md
- libraries/ghc-internal/src/GHC/Internal/Data/OldList.hs
- + testsuite/tests/lib/base/Sort.hs
- + testsuite/tests/lib/base/Sort.stdout
- testsuite/tests/lib/base/all.T


Changes:

=====================================
.gitlab/rel_eng/mk-ghcup-metadata/mk_ghcup_metadata.py
=====================================
@@ -65,6 +65,7 @@ eprint(f"Supported platforms: {job_mapping.keys()}")
 class Artifact(NamedTuple):
     job_name: str
     download_name: str
+    output_name: str
     subdir: str
 
 # Platform spec provides a specification which is agnostic to Job
@@ -74,9 +75,11 @@ class PlatformSpec(NamedTuple):
     subdir: str
 
 source_artifact = Artifact('source-tarball'
+                          , 'ghc-{version}-src.tar.xz'
                           , 'ghc-{version}-src.tar.xz'
                           , 'ghc-{version}' )
 test_artifact = Artifact('source-tarball'
+                        , 'ghc-{version}-testsuite.tar.xz'
                         , 'ghc-{version}-testsuite.tar.xz'
                         , 'ghc-{version}/testsuite' )
 
@@ -161,6 +164,11 @@ def mk_one_metadata(release_mode, version, job_map, artifact):
           , "dlSubdir": artifact.subdir.format(version=version)
           , "dlHash" : h }
 
+    # Only add dlOutput if it is inconsistent with the filename inferred from the URL
+    output = artifact.output_name.format(version=version)
+    if Path(urlparse(final_url).path).name != output:
+        res["dlOutput"] = output
+
     eprint(res)
     return res
 


=====================================
libraries/base/changelog.md
=====================================
@@ -4,6 +4,7 @@
   * Add the `MonadFix` instance for `(,) a`, similar to the one for `Writer a` ([CLC proposal #238](https://github.com/haskell/core-libraries-committee/issues/238))
   * Improve `toInteger :: Word32 -> Integer` on 64-bit platforms ([CLC proposal #259](https://github.com/haskell/core-libraries-committee/issues/259))
   * Make `read` accept binary integer notation ([CLC proposal #177](https://github.com/haskell/core-libraries-committee/issues/177))
+  * Improve the performance of `Data.List.sort` using an improved merging strategy. Instead of `compare`, `sort` now uses `(>)` which may break *malformed* `Ord` instances ([CLC proposal #236](https://github.com/haskell/core-libraries-committee/issues/236))
 
 ## 4.20.0.0 *TBA*
   * Deprecate `GHC.Pack` ([#21461](https://gitlab.haskell.org/ghc/ghc/-/issues/21461))


=====================================
libraries/ghc-internal/src/GHC/Internal/Data/OldList.hs
=====================================
@@ -1640,37 +1640,98 @@ and possibly to bear similarities to a 1982 paper by Richard O'Keefe:
 
 Benchmarks show it to be often 2x the speed of the previous implementation.
 Fixes ticket https://gitlab.haskell.org/ghc/ghc/issues/2143
+
+Further improved using a four-way merge, with an additional performance increase of ~20%
+https://gitlab.haskell.org/ghc/ghc/issues/24280
 -}
 
-sort = sortBy compare
-sortBy cmp = mergeAll . sequences
+{-# INLINEABLE sort #-} -- allows specialization for the ord instance
+sort = actualSort (>)
+
+{-# INLINEABLE sortBy #-}
+sortBy cmp = actualSort (\x y -> cmp x y == GT)
+
+actualSort :: (a -> a -> Bool) -> [a] -> [a]
+actualSort gt ns
+  | []        <- ns = []
+  | [a]       <- ns = [a]
+  | [a,b]     <- ns = merge [a] [b]
+  | [a,b,c]   <- ns = merge3 [a] [b] [c]
+  | [a,b,c,d] <- ns = merge4 [a] [b] [c] [d]
+  | otherwise       = merge_all (sequences ns)
   where
     sequences (a:b:xs)
-      | a `cmp` b == GT = descending b [a]  xs
-      | otherwise       = ascending  b (a:) xs
+      | a `gt` b  = descending b [a]  xs
+      | otherwise = ascending  b (a:) xs
     sequences xs = [xs]
 
     descending a as (b:bs)
-      | a `cmp` b == GT = descending b (a:as) bs
-    descending a as bs  = (a:as): sequences bs
+      | a `gt` b       = descending b (a:as) bs
+    descending a as bs = (a:as): sequences bs
 
     ascending a as (b:bs)
-      | a `cmp` b /= GT = ascending b (\ys -> as (a:ys)) bs
-    ascending a as bs   = let !x = as [a]
-                          in x : sequences bs
-
-    mergeAll [x] = x
-    mergeAll xs  = mergeAll (mergePairs xs)
-
-    mergePairs (a:b:xs) = let !x = merge a b
-                          in x : mergePairs xs
-    mergePairs xs       = xs
+      | not (a `gt` b) = ascending b (\ys -> as (a:ys)) bs
+    ascending a as bs  = let !x = as [a]
+                         in x : sequences bs
+
+    merge_all [x] = x
+    merge_all xs  = merge_all (reduce_once xs)
+
+    reduce_once []            = []
+    reduce_once [a]           = [a]
+    reduce_once [a,b]         = [merge a b]
+    reduce_once [a,b,c]       = [merge3 a b c]
+    reduce_once [a,b,c,d,e]   = [merge a b, merge3 c d e]
+    reduce_once [a,b,c,d,e,f] = [merge3 a b c, merge3 d e f]
+    reduce_once (a:b:c:d:xs)  = let !x = merge4 a b c d
+                                in x : reduce_once xs
 
     merge as@(a:as') bs@(b:bs')
-      | a `cmp` b == GT = b:merge as  bs'
-      | otherwise       = a:merge as' bs
-    merge [] bs         = bs
-    merge as []         = as
+      | a `gt` b  = b : merge as  bs'
+      | otherwise = a : merge as' bs
+    merge [] bs   = bs
+    merge as []   = as
+
+    -- `merge3` is a manually fused version of `merge (merge as bs) cs`
+    merge3 as@(a:as') bs@(b:bs') cs
+      | a `gt` b  = merge3X b as  bs' cs
+      | otherwise = merge3X a as' bs  cs
+    merge3 [] bs cs = merge bs cs
+    merge3 as [] cs = merge as cs
+
+    merge3X x as bs cs@(c:cs')
+      | x `gt` c  = c : merge3X x as bs cs'
+      | otherwise = x : merge3    as bs cs
+    merge3X x as bs [] = x : merge as bs
+
+    merge3Y as@(a:as') y bs cs
+      | a `gt` y  = y : merge3  as    bs cs
+      | otherwise = a : merge3Y as' y bs cs
+    merge3Y [] x bs cs = x : merge bs cs
+
+    -- `merge4 as bs cs ds` is (essentially) a manually fused version of
+    -- `merge (merge as bs) (merge cs ds)`
+    merge4 as@(a:as') bs@(b:bs') cs ds
+      | a `gt` b  = merge4X b as  bs' cs ds
+      | otherwise = merge4X a as' bs  cs ds
+    merge4 [] bs cs ds = merge3 bs cs ds
+    merge4 as [] cs ds = merge3 as cs ds
+
+    merge4X x as bs cs@(c:cs') ds@(d:ds')
+      | c `gt` d  = merge4XY x as bs d cs  ds'
+      | otherwise = merge4XY x as bs c cs' ds
+    merge4X x as bs [] ds = merge3X x as bs ds
+    merge4X x as bs cs [] = merge3X x as bs cs
+
+    merge4Y as@(a:as') bs@(b:bs') y cs ds
+      | a `gt` b  = merge4XY b as  bs' y cs ds
+      | otherwise = merge4XY a as' bs  y cs ds
+    merge4Y as [] y cs ds = merge3Y as y cs ds
+    merge4Y [] bs y cs ds = merge3Y bs y cs ds
+
+    merge4XY x as bs y cs ds
+      | x `gt` y  = y : merge4X x as bs   cs ds
+      | otherwise = x : merge4Y   as bs y cs ds
 
 {-
 sortBy cmp l = mergesort cmp l


=====================================
testsuite/tests/lib/base/Sort.hs
=====================================
@@ -0,0 +1,18 @@
+module Main where
+
+import Data.List (sort)
+import Data.Semigroup (Arg(..))
+
+main :: IO ()
+main = do
+  -- correctness
+  test @Int []
+  test [0]
+  test [8, 0, 2, 3, 6, 1, 5, 10, 4, 7, 9]
+
+  -- stability
+  test [Arg 1 0, Arg 0 0, Arg 0 1, Arg 1 1, Arg 0 2]
+  test [Arg 0 0, Arg 0 1, Arg 0 2]
+
+test :: (Ord a, Show a) => [a] -> IO ()
+test = print . sort


=====================================
testsuite/tests/lib/base/Sort.stdout
=====================================
@@ -0,0 +1,5 @@
+[]
+[0]
+[0,1,2,3,4,5,6,7,8,9,10]
+[Arg 0 0,Arg 0 1,Arg 0 2,Arg 1 0,Arg 1 1]
+[Arg 0 0,Arg 0 1,Arg 0 2]


=====================================
testsuite/tests/lib/base/all.T
=====================================
@@ -11,3 +11,4 @@ test('Monoid_ByteArray', normal, compile_and_run, [''])
 test('Unsnoc', normal, compile_and_run, [''])
 test('First-Semigroup-sconcat', normal, compile_and_run, [''])
 test('First-Monoid-sconcat', normal, compile_and_run, [''])
+test('Sort', normal, compile_and_run, [''])



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/3335cc0bef6c495453e63d33f01f6ca40836da08...6a9e55d1c5f06863d86b4a24bf035c386d8f7d95

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/3335cc0bef6c495453e63d33f01f6ca40836da08...6a9e55d1c5f06863d86b4a24bf035c386d8f7d95
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240510/e329c480/attachment-0001.html>


More information about the ghc-commits mailing list