List.sort
Ian Lynagh
igloo@earth.li
Fri, 10 May 2002 13:26:58 +0100
--X1bOJ3K7DJ5YkBrT
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Hi all
I am curious as to why the List.sort implementation in GHC is a
quicksort algorithm rather than an algorithm that guarantees n log n
time in the worst case? I have attached a mergesort implementation along
with a few scripts to time it's performance, the results of which are
shown below (* means it didn't finish successfully - in all cases this
was due to a stack overflow).
I am also curious as to the cause of the last failure (this is using the
attached mergesort to sort
random_list = take 100000 $ randoms $ mkStdGen 100) as it manages
with sorted = [1 .. 100000] and revsorted = reverse [1 .. 100000]. Is
this because of some optimisations being done for the (rev)sorted cases
or is it a problem with randoms?
If I heap profile the random_list case with only 10000 then I see
random_list peaks at using about 2.5M of memory, whereas in the same
program using List.sort it uses only 100k.
Input style Input length Sort data Sort alg User time
stdin 10000 random_list sort 2.82
stdin 10000 random_list mergesort 2.96
stdin 10000 sorted sort 31.37
stdin 10000 sorted mergesort 1.90
stdin 10000 revsorted sort 31.21
stdin 10000 revsorted mergesort 1.88
stdin 100000 random_list sort *
stdin 100000 random_list mergesort *
stdin 100000 sorted sort *
stdin 100000 sorted mergesort *
stdin 100000 revsorted sort *
stdin 100000 revsorted mergesort *
func 10000 random_list sort 0.31
func 10000 random_list mergesort 0.91
func 10000 sorted sort 19.09
func 10000 sorted mergesort 0.15
func 10000 revsorted sort 19.17
func 10000 revsorted mergesort 0.16
func 100000 random_list sort 3.85
func 100000 random_list mergesort *
func 100000 sorted sort 5831.47
func 100000 sorted mergesort 2.23
func 100000 revsorted sort 5872.34
func 100000 revsorted mergesort 2.24
Any comments gratefully received
Thanks
Ian
--X1bOJ3K7DJ5YkBrT
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="MergeSort.lhs"
> module MergeSort (mergesort) where
> mergesort :: (Ord a) => [a] -> [a]
> mergesort = mergesort' . map wrap
> mergesort' :: (Ord a) => [[a]] -> [a]
> mergesort' [] = []
> mergesort' [xs] = xs
> mergesort' xss = mergesort' (merge_pairs xss)
> merge_pairs :: (Ord a) => [[a]] -> [[a]]
> merge_pairs [] = []
> merge_pairs [xs] = [xs]
> merge_pairs (xs:ys:xss) = merge xs ys:merge_pairs xss
> merge :: (Ord a) => [a] -> [a] -> [a]
> merge xs [] = xs
> merge [] ys = ys
> merge (x:xs) (y:ys)
> | x <= y = x:merge xs (y:ys)
> | otherwise = y:merge (x:xs) ys
> wrap :: a -> [a]
> wrap x = [x]
--X1bOJ3K7DJ5YkBrT
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="gen_lists.lhs"
> module Main where
> import Random
> main :: IO()
> main = do write_files 10000
> write_files 100000
> write_files :: Int -> IO()
> write_files i
> = do writeFile ("input-random_list-" ++ s) $ show $ random_list i
> writeFile ("input-sorted-" ++ s) $ show $ sorted i
> writeFile ("input-revsorted-" ++ s) $ show $ revsorted i
> where s = show i
> random_list :: Int -> [Int]
> random_list i = take i $ randoms $ mkStdGen 100
> sorted :: Int -> [Int]
> sorted i = [1..i]
> revsorted :: Int -> [Int]
> revsorted i = reverse [1..i]
--X1bOJ3K7DJ5YkBrT
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="sort_func.lhs"
> module Main where
> import List
> import Random
> import MergeSort
> main :: IO()
> main = putStrLn $ show $ SORT DATA
> random_list :: [Int]
> random_list = take COUNT $ randoms $ mkStdGen 100
> sorted :: [Int]
> sorted = [1 .. COUNT]
> revsorted :: [Int]
> revsorted = reverse [1 .. COUNT]
--X1bOJ3K7DJ5YkBrT
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="sort_stdin.lhs"
> module Main where
> import List
> import Random
> import MergeSort
> main :: IO()
> main = getContents >>= putStrLn . show . SORT . (read :: String -> [Int])
--X1bOJ3K7DJ5YkBrT
Content-Type: text/x-sh; charset=us-ascii
Content-Disposition: attachment; filename="time.sh"
#!/bin/sh
function write_what() {
printf "%-15s %-16s %-13s %-12s" "$@" >> results
}
rm -f *.hi *.o
hmake gen_lists -ghc -O2
./gen_lists
for SORT in sort mergesort
do
rm -f *.hi *.o
HFLAGS="-cpp -DSORT=$SORT" hmake sort_stdin -ghc -O2
mv sort_stdin sort_stdin-$SORT
done
rm results
write_what "Input style" "Input length" "Sort data" "Sort alg"
echo "User time" >> results
for COUNT in 10000 100000
do
for DATA in random_list sorted revsorted
do
for SORT in sort mergesort
do
write_what stdin $COUNT $DATA $SORT
/usr/bin/time -o this.result -f "%U" ./sort_stdin-$SORT \
< input-$DATA-$COUNT > /dev/null
if [ $? -eq 0 ]
then
cat this.result >> results
else
echo "*" >> results
fi
done
done
done
for COUNT in 10000 100000
do
for DATA in random_list sorted revsorted
do
for SORT in sort mergesort
do
rm -f *.hi *.o sort_func
HFLAGS="-cpp -DCOUNT=$COUNT -DDATA=$DATA -DSORT=$SORT" \
hmake sort_func -ghc -O2
write_what func $COUNT $DATA $SORT
/usr/bin/time -o this.result -f "%U" ./sort_func > /dev/null
if [ $? -eq 0 ]
then
cat this.result >> results
else
echo "*" >> results
fi
done
done
done
--X1bOJ3K7DJ5YkBrT--