[Haskell] Re: performance tuning Data.FiniteMap
oleg at pobox.com
oleg at pobox.com
Fri Feb 27 16:12:27 EST 2004
Hello!
If indeed the read performance is at premium and updates are
infrequent, by bother with ternary etc. trees -- why not to use just a
single, one-level array. Given a reasonable hash function, the
retrieval performance is O(1). And still, no IO/ST are necessary.
{-# OPTIONS -fglasgow-exts #-}
module Foo where
import Data.Array
import Data.List
import Data.HashTable (hashString)
import Data.Int (Int32)
class Hashy a where
hash:: a -> Int
data MyFM key val = MyFM { base:: Int
, purgatory:: [(key,val)]
, store:: Array Int [(key,val)]
} deriving Show
empty = MyFM {base = 41, purgatory = [],
store = listArray (0,base(empty)-1) $ repeat []}
lkup fm key = case lookup key (purgatory fm) of
t@(Just _) -> t
_ -> lookup key item
where item = (store fm)! hashv
hashv = (hash key) `mod` (base fm)
count = length . concat . elems . store
purgatory_limit = 10
ins fm key val
= rebuild_perhaps $ fm {purgatory = add_uniq (purgatory fm) key val}
where
rebuild_perhaps fm | length (purgatory fm) > purgatory_limit
= rebuild fm
rebuild_perhaps fm = fm
rebuild fm | 2*(count fm) > base fm = major_rebuild fm
rebuild fm = fm{purgatory = [], store = (store fm) // updates}
where
updates = map (retr . merge) $ groupBy gfirs $
sortBy sfirs $ map (\p@(k,v) -> (hashk k,p)) $ purgatory fm
hashk k = (hash k) `mod` (base fm)
gfirs (k1,_) (k2,_) = k1 == k2
sfirs (k1,_) (k2,_) = compare k1 k2
merge x = (fst$ head x, map snd x)
retr (h,v) = (h, unionBy gfirs v ((store fm)!h))
-- reallocate the hash table to the bigger size
major_rebuild fm = undefined -- exercise for the reader
-- add association (key,val) to the list, replacing an old association
-- with the same key, if any. At most one such association could have
-- existed
add_uniq [] key val = [(key,val)]
add_uniq ((hkey,_):t) key val | hkey == key = (key,val):t
add_uniq (h:t) key val = h: add_uniq t key val
instance Hashy String where
hash = fromInteger . toInteger . hashString
test1 = foldl (\fm v -> ins fm v v) empty $ map (:[]) ['a'..'h']
test2 = foldl (\fm v -> ins fm v v) test1 $ map (:[]) ['a'..'o']
test3 = foldl (\fm v -> ins fm v v) test2 $ map (:[]) ['a'..'o']
More information about the Haskell
mailing list