[Haskell-cafe] Data.Binary.encode slower than show
Grzegorz Chrupała
pitekus at gmail.com
Sun Jul 26 16:27:41 EDT 2009
Hi all,
I have a piece of code where I'm serializing a datastructure with the
following type [(Int, (Map DType (IntMap Int)))], using Binary.encode
The thing is it is very slow: actually quite a bit slower than just using
show.
This seems rather suspicious. Any idea what could be going on?
import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import Data.IntMap ((!))
import Vector
import Atom
import Control.Monad.State
import Indexing (indexBy)
import Data.List (foldl')
import System
import System.IO
import Debug.Trace
import qualified Data.Binary as Binary
import qualified Data.ByteString.Lazy as BS
data DType = Fol | For | Sol | Sor deriving (Eq,Enum,Ord,Show,Read)
instance Binary.Binary DType where
get = do x <- Binary.get
return (toEnum x)
put x = Binary.put (fromEnum x)
type Word = Int
type Count = Int
type WordMap = IntMap.IntMap
find = IntMap.findWithDefault IntMap.empty
distReps :: Map.Map (Word,Word) Count
-> [(Int, (Map.Map DType (WordMap Count)))]
distReps bidict =
let bigrams = Map.toList bidict
for = indexBy id bigrams
fol = indexBy swap bigrams
ws = map (fst . fst) bigrams
r = flip map ws
$ \w -> (w,Map.fromList [ (Fol , find w fol)
, (For , find w for)
, (Sol , sox for fol w )
, (Sor , sox fol for w ) ])
in trace (show $ r == r) r
sox :: WordMap (WordMap Count)
-> WordMap (WordMap Count)
-> Word
-> WordMap Count
sox fox foy w =
let xs = IntMap.keys (find w fox)
f !z x = let xv = find x foy
in xv == xv `seq` z `plus` xv
in foldl' f IntMap.empty xs
swap (!a,!b) = (b,a)
readBigrams = fmap (map (\ [w,w',c] -> ((w,w'),read c))
. map words
. lines )
getContents
main = do
[f] <- getArgs
bigrams <- readBigrams
let (bigrams',as) = flip runState empty (atomize bigrams)
dr = distReps (Map.fromList bigrams')
write f dr
hPutStrLn stderr "Done this"
write (f++".atom") as
--write f d = writeFile f (show d)
write f d = BS.writeFile f (Binary.encode d)
atomize xs = mapM f xs
where f ((w,w'),!i) = do
!i_w <- toAtom w'
!i_w' <- toAtom w
let r = ((i_w,i_w'),i)
r == r `seq` return r
--
Grzegorz
More information about the Haskell-Cafe
mailing list