List.sort

Ian Lynagh igloo@earth.li
Fri, 10 May 2002 13:26:58 +0100


--X1bOJ3K7DJ5YkBrT
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline


Hi all

I am curious as to why the List.sort implementation in GHC is a
quicksort algorithm rather than an algorithm that guarantees n log n
time in the worst case? I have attached a mergesort implementation along
with a few scripts to time it's performance, the results of which are
shown below (* means it didn't finish successfully - in all cases this
was due to a stack overflow).

I am also curious as to the cause of the last failure (this is using the
attached mergesort to sort
random_list = take 100000 $ randoms $ mkStdGen 100) as it manages
with sorted = [1 .. 100000] and revsorted = reverse [1 .. 100000]. Is
this because of some optimisations being done for the (rev)sorted cases
or is it a problem with randoms?

If I heap profile the random_list case with only 10000 then I see
random_list peaks at using about 2.5M of memory, whereas in the same
program using List.sort it uses only 100k.

Input style     Input length     Sort data     Sort alg    User time
stdin           10000            random_list   sort        2.82
stdin           10000            random_list   mergesort   2.96
stdin           10000            sorted        sort        31.37
stdin           10000            sorted        mergesort   1.90
stdin           10000            revsorted     sort        31.21
stdin           10000            revsorted     mergesort   1.88
stdin           100000           random_list   sort        *
stdin           100000           random_list   mergesort   *
stdin           100000           sorted        sort        *
stdin           100000           sorted        mergesort   *
stdin           100000           revsorted     sort        *
stdin           100000           revsorted     mergesort   *
func            10000            random_list   sort        0.31
func            10000            random_list   mergesort   0.91
func            10000            sorted        sort        19.09
func            10000            sorted        mergesort   0.15
func            10000            revsorted     sort        19.17
func            10000            revsorted     mergesort   0.16
func            100000           random_list   sort        3.85
func            100000           random_list   mergesort   *
func            100000           sorted        sort        5831.47
func            100000           sorted        mergesort   2.23
func            100000           revsorted     sort        5872.34
func            100000           revsorted     mergesort   2.24


Any comments gratefully received

Thanks
Ian


--X1bOJ3K7DJ5YkBrT
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="MergeSort.lhs"


> module MergeSort (mergesort) where

> mergesort :: (Ord a) => [a] -> [a]
> mergesort = mergesort' . map wrap

> mergesort' :: (Ord a) => [[a]] -> [a]
> mergesort' [] = []
> mergesort' [xs] = xs
> mergesort' xss = mergesort' (merge_pairs xss)

> merge_pairs :: (Ord a) => [[a]] -> [[a]]
> merge_pairs [] = []
> merge_pairs [xs] = [xs]
> merge_pairs (xs:ys:xss) = merge xs ys:merge_pairs xss

> merge :: (Ord a) => [a] -> [a] -> [a]
> merge xs [] = xs
> merge [] ys = ys
> merge (x:xs) (y:ys)
>  | x <= y    = x:merge    xs (y:ys)
>  | otherwise = y:merge (x:xs)   ys

> wrap :: a -> [a]
> wrap x = [x]

--X1bOJ3K7DJ5YkBrT
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="gen_lists.lhs"


> module Main where

> import Random

> main :: IO()
> main = do write_files 10000
>           write_files 100000

> write_files :: Int -> IO()
> write_files i
>   = do writeFile ("input-random_list-" ++ s) $ show $ random_list i
>        writeFile ("input-sorted-" ++ s)      $ show $ sorted i
>        writeFile ("input-revsorted-" ++ s)   $ show $ revsorted i
>   where s = show i

> random_list :: Int -> [Int]
> random_list i = take i $ randoms $ mkStdGen 100

> sorted :: Int -> [Int]
> sorted i = [1..i]

> revsorted :: Int -> [Int]
> revsorted i = reverse [1..i]


--X1bOJ3K7DJ5YkBrT
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="sort_func.lhs"


> module Main where

> import List
> import Random
> import MergeSort

> main :: IO()
> main = putStrLn $ show $ SORT DATA

> random_list :: [Int]
> random_list = take COUNT $ randoms $ mkStdGen 100

> sorted :: [Int]
> sorted = [1 .. COUNT]

> revsorted :: [Int]
> revsorted = reverse [1 .. COUNT]


--X1bOJ3K7DJ5YkBrT
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="sort_stdin.lhs"


> module Main where

> import List
> import Random
> import MergeSort

> main :: IO()
> main = getContents >>= putStrLn . show . SORT . (read :: String -> [Int])


--X1bOJ3K7DJ5YkBrT
Content-Type: text/x-sh; charset=us-ascii
Content-Disposition: attachment; filename="time.sh"

#!/bin/sh

function write_what() {
    printf "%-15s %-16s %-13s %-12s" "$@" >> results
}

rm -f *.hi *.o
hmake gen_lists -ghc -O2
./gen_lists

for SORT in sort mergesort
do
    rm -f *.hi *.o
    HFLAGS="-cpp -DSORT=$SORT" hmake sort_stdin -ghc -O2
    mv sort_stdin sort_stdin-$SORT
done

rm results
write_what "Input style" "Input length" "Sort data" "Sort alg"
echo "User time" >> results
for COUNT in 10000 100000
do
    for DATA in random_list sorted revsorted
    do
        for SORT in sort mergesort
        do
            write_what stdin $COUNT $DATA $SORT
            /usr/bin/time -o this.result -f "%U" ./sort_stdin-$SORT \
                < input-$DATA-$COUNT > /dev/null
            if [ $? -eq 0 ]
            then
                cat this.result >> results
            else
                echo "*" >> results
            fi
        done
    done
done

for COUNT in 10000 100000
do
    for DATA in random_list sorted revsorted
    do
        for SORT in sort mergesort
        do
            rm -f *.hi *.o sort_func
            HFLAGS="-cpp -DCOUNT=$COUNT -DDATA=$DATA -DSORT=$SORT" \
                hmake sort_func -ghc -O2

            write_what func $COUNT $DATA $SORT
            /usr/bin/time -o this.result -f "%U" ./sort_func > /dev/null
            if [ $? -eq 0 ]
            then
                cat this.result >> results
            else
                echo "*" >> results
            fi
        done
    done
done


--X1bOJ3K7DJ5YkBrT--