Data.HashTable bug
Josef Svenningsson
josefs at cs.chalmers.se
Fri Mar 19 12:13:23 EST 2004
On Fri, 19 Mar 2004, Simon Marlow wrote:
> I did post a hash table version of this program a while back, that I
> claimed was a factor of 3 faster at the time. Unfortunately the
> attachment in the archive is in base64 and I can't read it, and I've
> lost the code :-( If you can decode the attachment, it's here:
>
> http://www.haskell.org/pipermail/haskell-cafe/2001-July/002061.html
>
Does the code below look familiar?
/Josef
\begin{code}
-- compile with: ghc -O -package lang
-- run with: ./a.out +RTS -H10m -K4m <Input.txt
import MArray
import PackedString
import IOExts
import IO
import Char
import Monad
arr_size = 20000
main = do
h <- openFile "Usr.Dict.Words" ReadMode
sz <- hFileSize h
ps <- hGetPS h (fromIntegral sz)
tbl <- newArray (0,arr_size) []
mapM (addToHashTable tbl) (linesPS ps)
h <- openFile "Input.txt" ReadMode
sz <- hFileSize h
ps <- hGetPS h (fromIntegral sz)
let test s = do b <- elemHashTable s tbl
when (not b) (putStrLn (unpackPS s))
mapM test (linesPS ps)
type HashTable = IOArray Int [PackedString]
-- Looks bad, but GHC does a great job of optimising it:
hashPS :: PackedString -> Int
hashPS ps = foldr f 0 (map (ord.indexPS ps) [1..lengthPS ps])
where f n m = n + m * 128 `mod` 1048583
addToHashTable :: HashTable -> PackedString -> IO ()
addToHashTable tbl s = do
let h = hashPS s
index = h `mod` arr_size
r <- readArray tbl index
if (s `elem` r) then return () else do
writeArray tbl index (s : r)
elemHashTable :: PackedString -> HashTable -> IO Bool
elemHashTable s tbl = do
let h = hashPS s
index = h `mod` arr_size
r <- readArray tbl index
return (s `elem` r)
\end{code}
More information about the Glasgow-haskell-users
mailing list