paolo.veronelli at gmail.com paolo.veronelli at gmail.com
Mon Jan 1 12:30:04 EST 2007

```Good year everyone.

I'm timing the following script.I'm not expert to evaluate th O'ness of this code, I hope someone can do it.
The program clusters n integers in m buckets based on their distance.
Anyway I thing should be linear.So I timed som executions changing the first arg.

First argument n is the number of integers to be clustered.Their are choosen randomly between 0 and 100.
Second argument m is the number of buckets.For them I choose a value choosen like before.
In the timings second argument is always 10.

Points   Time (ms)
100      40
200      93
400      269
800      831
1600     3063
3200     12693
6400     54708

I'd like to know if the algorithm is the cause for this timings (it's not linear) or if I need to use other kinds of Arrays

Thanks

Paolino

----------------------------------------------------------------
import Data.Maybe
import Data.List
import Data.Array.Diff

import System.Environment
import Control.Arrow

import Random

inc l i = l // [(i,l!i + 1)]
switch l i = l // [(i,not (l!i))]
constArray n v = listArray (0,n-1) (repeat v)

data CState = CState
{clusters :: [(Int,Int)],remi :: Int,colsHeap ::DiffArray Int Int ,rowsFlag :: DiffArray Int Bool}

devil [] _ = return ()

devil (l@(row,col):ls) d = do
s@(CState cs r hs fs) <- get
let ns = s { clusters = (l:cs), rowsFlag = switch fs row, colsHeap = inc hs col }
update  | c < d               = put ns
| (c == d) && (r > 0) = put ns { remi = r - 1 }
| otherwise           = return ()
where c = hs ! col
when (not (fs ! row)) update
devil ls d

comp f g x y = (f x) `g` (f y)
swap = snd &&& fst
collapse = (head &&& unzip) >>> (fst *** snd)

mcluster :: [(Int,Int)] -> [(Int,[Int])]
mcluster ls = let
(lr,lc) = (f *** f) (unzip ls) where f = length.nub  -- coo space width
(k,r) = divMod lr lc
CState cs _ _ _ = execState (devil ls k)
CState{clusters = [],remi = r,colsHeap = constArray lc 0,rowsFlag = constArray lr False }
in map collapse . groupBy (comp fst (==)) . sort . map swap \$ cs

coupage ls = zip [0..] ls
delta fxy xs ys = [(abs(x-y),(n,m))|(n,x) <- coupage xs, (m,y) <- coupage ys]
decoupage ls n = fromJust \$ lookup n (coupage ls)

test xs ys =
let d = snd.unzip.sort \$ delta (\x y -> abs (x -y)) xs ys
in
map (decoupage ys *** map (decoupage xs)) (mcluster d)

-- call it with 2 args, the number ov values and the number of clusters
-- <prog> 101 10   will cluster 101 values in 10 clusters
main = do args <- getArgs
gen <- getStdGen
let [n,m] = map read args
let (ps,bs) = splitAt n (take (m + n) (randomRs (0,100) gen))
print \$ test ps bs

```