[Haskell-cafe] Arrays performance

Paolo Veronelli paolo.veronelli at gmail.com
Thu Jan 4 08:25:16 EST 2007


Quoting Paolo Veronelli <paolo.veronelli at gmail.com>:
> I paste new version in case you care give me some moe suggestion.



import Data.Maybe
import Data.List
import Data.Array.Diff

import System.Environment
import Control.Arrow 
import Control.Monad

import Random


inc l i = l // [(i,l!i + 1)]
switch l i = l // [(i,not (l!i))]
constArray n v = listArray (0,n-1) (repeat v)

data Folding = Folding 
  {clusters :: [(Int,Int)], remi :: Int, colsCount :: DiffArray Int Int ,rowsCheck :: DiffArray Int Bool}

result (Folding cs _ _ _) = cs

rcluster ls d s = let 
  devil s@(Folding cs r hs fs) l@(row,col) = let 
    ns = s { clusters = (l:cs), rowsCheck = switch fs row, colsCount = inc hs col } 
    rowtest  | c < d               = ns                                                                        
             | (c == d) && (r > 0) = ns { remi = r - 1 }                                         
             | otherwise           = s
      where c = hs ! col
    in if (not (fs ! row)) then rowtest else s                                                                    
  in foldl devil s ls
                     
mcluster :: (Int,Int) -> [(Int,Int)] ->  [(Int,[Int])]
mcluster (lr,lc) ls = let 
  (k,r) = divMod lr lc
  start = Folding{clusters = [],remi = r,colsCount = constArray lc 0,rowsCheck = constArray lr False } 
  cs = result $ rcluster ls k start
  in map collapse . groupBy (comp fst (==)) . sort . map swap $ cs 
      where             
    comp f g x y = (f x) `g` (f y) 
    swap = snd &&& fst 
    collapse = (head &&& unzip) >>> (fst *** snd)


cluster :: (Ord b) => (a -> a -> b) -> [a] -> [a] -> [(a,[a])]
cluster fxy xs ys =  let 
  mkArray (l,xs) = (listArray :: (Int,Int) -> [e] -> DiffArray Int e)  (0,l-1)  xs 
  xls = mkArray (lc,xs)
  yls = mkArray (rc,ys)
  (lc,rc) =  (length xs,length ys)
  in
  map ((yls !) *** map (xls !)) (mcluster (lc,rc) (snd.unzip.sort $ delta))
    where
  delta  = [(fxy x y,(n,m))|(n,x) <- zip [0..] xs, (m,y) <- zip [0..] ys]

 
-- call it with 2 args, the number ov values and the number of clusters
-- <prog> 101 10   will cluster 101 values in 10 clusters

points m n = do gen <- getStdGen
                return $ splitAt n (take (m + n) (randomRs (0,100::Int) gen))

test1 = do args <- getArgs
           return $ map read args :: IO [Int] 

main = do 
        [m,n] <- test1
        --let [m,n] = [10,3200]
        (ps,bs) <- points m n
        print $ cluster (\x y -> abs (x - y)) ps bs



More information about the Haskell-Cafe mailing list