improvement for Data.List.sort
Frieder Kalisch
f.kalisch@physik.uni-heidelberg.de
Wed, 19 Feb 2003 14:35:36 +0100
--SUOF0GtieIMvvwua
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
>From libraries/base/Data/List.hs:
> If I heap profile the random_list case with only 10000 then I see
> random_list peaks at using about 2.5M of memory, whereas in the same
> program using List.sort it uses only 100k.
This is because the random number generator is evaluated too lazy.
We
can make mergesort more eager by replacing it with natural mergesort.
i.e.
mergesort :: (a -> a -> Ordering) -> [a] -> [a]
mergesort cmp = mergesort' cmp . map wrap
gets replaced by
mergesort :: (a -> a -> Ordering) -> [a] -> [a]
mergesort cmp = mergesort' cmp . map runner
-- | decomposes list into monotonic runs
runner :: (a -> a -> Ordering) -> [a] -> [[a]]
runner _ [] = []
runner cmp l = runner' l
where
-- | increasing runs
runner' xss@(x:xs) = case findrun (\a b->cmp a b/=GT) [x] xs of
(run, []) -> [reverse run]
([_], _) -> runner'' xss
(run, rest) -> reverse run : runner'' rest
-- | decreasing runs.
-- | We consider (x>y) instead of (x>=y) to ensure stability.
runner'' xss@(x:xs) = case findrun (\a b->cmp a b==GT) [x] xs of
(run, []) -> [run]
([_], _) -> runner' xss
(run, rest) -> run : runner' rest
-- | Do the work.
findrun _ a [] = (a, [])
findrun less a xss@(x:xs)
| (head a) `less` x = findrun less (x:a) xs
| otherwise = (a, xss)
For the Demo attatched, the speedup is more than threefold for N=10^5
and the stack usage drops from 7Mb to below 1M (with flag -O1).
Additionally, natural msort has the additional advantage, that it is
O(N) for (almost) (anti-)sorted lists. It is also a littlebit faster
on average, because runner actually does some work whereas wrap does
not.
Greetings,
--
Frieder Kalisch
f.kalisch@tphys.uni-heidelberg.de
--SUOF0GtieIMvvwua
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="Demo.hs"
module Main where
import List
import Random
import System
msort :: (Ord a) => [a] -> [a]
msort = msortBy compare
msortBy :: (a -> a -> Ordering) -> [a] -> [a]
msortBy cmp = msort' cmp . runner cmp
-- | mergesort on a list of runs, i.e.
msort' :: (a->a->Ordering) -> [[a]] -> [a]
msort' _ [] = []
msort' _ [x] = x
msort' cmp list = msort' cmp $ merge' list
where
merge' [] = []
merge' [x] = [x]
merge' (x1:x2:xs) = merge x1 x2 : merge' xs
-- | merge two runs
merge x [] = x
merge [] y = y
merge xss@(x:xs) yss@(y:ys)
| cmp x y==GT = y: merge xss ys
| otherwise = x: merge xs yss
-- | Decomposes list into monotonic runs. This turns mergesort into natural mergesort.
runner :: (a -> a -> Ordering) -> [a] -> [[a]]
runner _ [] = []
runner cmp l = runner' l
where
-- | increasing runs
runner' xss@(x:xs) = case findrun (\a b->cmp a b/=GT) [x] xs of
(run, []) -> [reverse run]
([_], _) -> runner'' xss
(run, rest) -> reverse run : runner'' rest
-- | decreasing runs.
-- | We consider (x>y) instead of (x>=y) to ensure stability.
runner'' xss@(x:xs) = case findrun (\a b->cmp a b==GT) [x] xs of
(run, []) -> [run]
([_], _) -> runner' xss
(run, rest) -> run : runner' rest
-- | Do the work.
findrun _ a [] = (a, [])
findrun less a xss@(x:xs)
| (head a) `less` x = findrun less (x:a) xs
| otherwise = (a, xss)
force :: [a] -> [a]
force [] = []
force (x:xs) = x `seq` x:force xs
demo0 = sort
demo1 = sort .force
demo2 = msort
demo :: [[Int]->[Int]]
demo = [demo0, demo1, demo2]
main = do
args <- getArgs
let demonum = read $args!!0
sortlength = read $args!!1 in
print .last .(demo!!demonum) .take sortlength .randoms
$mkStdGen 123456789
-- Usage : Demo demonum sortlength
-- sortlength = 10^5 is a suitable value.
--SUOF0GtieIMvvwua--