improvement for Data.List.sort

Frieder Kalisch f.kalisch@physik.uni-heidelberg.de
Wed, 19 Feb 2003 14:35:36 +0100


--SUOF0GtieIMvvwua
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline


>From libraries/base/Data/List.hs:
> If I heap profile the random_list case with only 10000 then I see
> random_list peaks at using about 2.5M of memory, whereas in the same
> program using List.sort it uses only 100k.

This is because the random number generator is evaluated too lazy.
 We
can make mergesort more eager by replacing it with natural mergesort.
i.e. 

mergesort :: (a -> a -> Ordering) -> [a] -> [a]
mergesort cmp = mergesort' cmp . map wrap

gets replaced by 

mergesort :: (a -> a -> Ordering) -> [a] -> [a]
mergesort cmp = mergesort' cmp . map runner

-- | decomposes list into monotonic runs
runner :: (a -> a -> Ordering) -> [a] -> [[a]]
runner _  [] = []
runner cmp l = runner' l
  where
-- | increasing runs
    runner' xss@(x:xs) = case findrun (\a b->cmp a b/=GT) [x] xs of
      (run, [])   -> [reverse run]
      ([_], _)    -> runner'' xss
      (run, rest) -> reverse run : runner'' rest
-- | decreasing runs. 
-- | We consider (x>y) instead of (x>=y) to ensure stability.
    runner'' xss@(x:xs) = case findrun (\a b->cmp a b==GT) [x] xs of
      (run, [])   -> [run]
      ([_], _)    -> runner' xss
      (run, rest) -> run : runner' rest
-- | Do the work.
    findrun _ a [] = (a, [])
    findrun less a xss@(x:xs)
      | (head a) `less` x = findrun less (x:a)  xs
      | otherwise  = (a, xss)


 For the Demo attatched, the speedup is more than threefold for N=10^5
and the stack usage drops from 7Mb to below 1M (with flag -O1).
Additionally, natural msort has the additional advantage, that it is
O(N) for (almost) (anti-)sorted lists. It is also a littlebit faster
on average, because runner actually does some work whereas wrap does
not.

Greetings,

-- 
Frieder Kalisch
f.kalisch@tphys.uni-heidelberg.de


--SUOF0GtieIMvvwua
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="Demo.hs"

module Main where

import List
import Random
import System

msort :: (Ord a) => [a] -> [a]
msort = msortBy compare

msortBy :: (a -> a -> Ordering) -> [a] -> [a]
msortBy cmp = msort' cmp . runner cmp

-- | mergesort on a list of runs, i.e.
msort' :: (a->a->Ordering) -> [[a]] -> [a]
msort' _ [] = []
msort' _ [x] = x
msort' cmp list = msort' cmp $ merge' list
 where 
   merge' []  = []
   merge' [x] = [x]
   merge' (x1:x2:xs) = merge x1 x2 : merge' xs
-- | merge two runs
   merge x [] = x
   merge [] y = y
   merge  xss@(x:xs) yss@(y:ys) 
     | cmp x y==GT = y: merge xss ys
     | otherwise   = x: merge xs yss

-- | Decomposes list into monotonic runs. This turns mergesort into natural mergesort.
runner :: (a -> a -> Ordering) -> [a] -> [[a]]
runner _  [] = []
runner cmp l = runner' l
  where
-- | increasing runs
    runner' xss@(x:xs) = case findrun (\a b->cmp a b/=GT) [x] xs of
      (run, [])   -> [reverse run]
      ([_], _)    -> runner'' xss
      (run, rest) -> reverse run : runner'' rest
-- | decreasing runs. 
-- | We consider (x>y) instead of (x>=y) to ensure stability.
    runner'' xss@(x:xs) = case findrun (\a b->cmp a b==GT) [x] xs of
      (run, [])   -> [run]
      ([_], _)    -> runner' xss
      (run, rest) -> run : runner' rest
-- | Do the work.
    findrun _ a [] = (a, [])
    findrun less a xss@(x:xs)
      | (head a) `less` x = findrun less (x:a)  xs
      | otherwise  = (a, xss)



force :: [a] -> [a]
force [] = []
force (x:xs) = x `seq` x:force xs


demo0 = sort
demo1 = sort .force
demo2 = msort


demo :: [[Int]->[Int]]
demo = [demo0, demo1, demo2]

main = do 
  args <- getArgs
  let demonum = read $args!!0
      sortlength = read $args!!1 in
    print .last .(demo!!demonum) .take sortlength .randoms 
	  $mkStdGen 123456789

-- Usage : Demo demonum sortlength
-- sortlength = 10^5 is a suitable value.

--SUOF0GtieIMvvwua--