Data.HashTable bug

Josef Svenningsson josefs at cs.chalmers.se
Fri Mar 19 12:13:23 EST 2004


On Fri, 19 Mar 2004, Simon Marlow wrote:

> I did post a hash table version of this program a while back, that I
> claimed was a factor of 3 faster at the time.  Unfortunately the
> attachment in the archive is in base64 and I can't read it, and I've
> lost the code :-(  If you can decode the attachment, it's here:
>
> http://www.haskell.org/pipermail/haskell-cafe/2001-July/002061.html
>
Does the code below look familiar?

	/Josef

\begin{code}
-- compile with: ghc -O -package lang
-- run with:     ./a.out +RTS -H10m -K4m <Input.txt

import MArray
import PackedString
import IOExts
import IO
import Char
import Monad

arr_size = 20000

main = do
  h <- openFile "Usr.Dict.Words" ReadMode
  sz <- hFileSize h
  ps <- hGetPS h (fromIntegral sz)

  tbl <- newArray (0,arr_size) []
  mapM (addToHashTable tbl) (linesPS ps)

  h <- openFile "Input.txt" ReadMode
  sz <- hFileSize h
  ps <- hGetPS h (fromIntegral sz)

  let test s = do b <- elemHashTable s tbl
		  when (not b) (putStrLn (unpackPS s))
  mapM test (linesPS ps)

type HashTable = IOArray Int [PackedString]

-- Looks bad, but GHC does a great job of optimising it:
hashPS :: PackedString -> Int
hashPS ps = foldr f 0 (map (ord.indexPS ps) [1..lengthPS ps])
  where f n m = n + m * 128 `mod` 1048583

addToHashTable :: HashTable -> PackedString -> IO ()
addToHashTable tbl s = do
  let h = hashPS s
      index = h `mod` arr_size
  r <- readArray tbl index
  if (s `elem` r) then return () else do
  writeArray tbl index (s : r)

elemHashTable :: PackedString -> HashTable -> IO Bool
elemHashTable s tbl = do
  let h = hashPS s
      index = h `mod` arr_size
  r <- readArray tbl index
  return (s `elem` r)
\end{code}


More information about the Glasgow-haskell-users mailing list