[Haskell-cafe] More binary IO, compression, bytestrings and FFI fun

Donald Bruce Stewart dons at cse.unsw.edu.au
Mon Jul 9 00:42:49 EDT 2007


Processing larger amounts of data, compression, serialisation and calling C.

An elaboration of the previous example:

    * Build a largish structure in Haskell
    * Compress it in memory
    * Serialise it to disk
    * Deserialise it
    * Decompress
    * Pass it to C
    * Display the result

Pretty common pattern for low level stuff. We use zlib + lazy
bytestrings for streaming decompression, and Data.Binary for the
serialisation.

We will use

    * Foreign.* to generate the data
    * Wrap it as a lazy bytestring
    * Data.Binary to serialise it
    * Code.Compression.Gzip to compress/uncompress
    * Pass it to C and make a simple FFI call on the result
    * Display the result

Running:

    $ ghc -O2 A.hs --make

    $ time ./A                
    Built table
    Compressed      25600000 bytes
    Compressed size  2231545 bytes (91.28%)
    Decompressed    25600000 bytes
    Calling into C ...
    -8.742278e-8
    -0.6865875
    -0.7207948
    -0.1401903
    0.63918984
    0.7437966
    0.27236375
    -0.5763547
    -0.75708854
    -0.39026973
    ./A  2.98s user 0.11s system 94% cpu 3.275 total

The code:

    {-# OPTIONS -fglasgow-exts #-}

    -- 
    -- Some imports
    --
    import Foreign
    import Foreign.C.Types
    import Data.Int

    import qualified Data.ByteString.Lazy as L
    import qualified Data.ByteString.Base as S
    import qualified Data.ByteString      as S

    import Data.Binary
    import Codec.Compression.GZip

    import System.IO
    import Text.Printf
    import Control.Monad

    ------------------------------------------------------------------------
    -- Foreign Ptrs
    --
    -- A simple wrapper type
    --
    data Table = Table { floats :: ForeignPtr CFloat
                       , ints   :: ForeignPtr Int    }

    -- Statically fixed sizes
    floatSize = 4800000
    intSize   = 1600000

    totalBytes = sizeOf (undefined :: CFloat) * floatSize
               + sizeOf (undefined :: Int)    * intSize

    --
    -- Build a table populated with some defaults
    -- Float table filled with 'pi' , ints numbered consecutively
    --
    newTable :: IO Table
    newTable = do
        fp <- S.mallocByteString (floatSize * sizeOf (undefined :: CFloat))
        ip <- S.mallocByteString (intSize   * sizeOf (undefined :: Int   ))
        withForeignPtr fp $ \p ->
            forM_ [0..floatSize-1] $ \n ->
                pokeElemOff p n pi
        withForeignPtr ip $ \p ->
            forM_ [0..intSize-1]   $ \n ->
                pokeElemOff p n n
        return (Table fp ip)

    ------------------------------------------------------------------------
    -- Lazy ByteStrings
    --
    -- Convert ForeignPtr a to and from a lazy ByteString
    --
    toByteString   :: Storable a => ForeignPtr a -> Int -> L.ByteString
    toByteString (fp :: ForeignPtr a) n =
        L.fromChunks . (:[]) $ S.fromForeignPtr (castForeignPtr fp)
                                                (n * sizeOf (undefined :: a))

    --
    -- Flatten a lazy bytestring back to a ForeignPtr.
    --
    fromByteString :: Storable a => L.ByteString -> ForeignPtr a
    fromByteString lbs = castForeignPtr fp
       where (fp,_,n) = S.toForeignPtr . S.concat $ L.toChunks lbs

    ------------------------------------------------------------------------
    -- GZip and Data.Binary
    --
    -- Serialise a Table, compressing with gzip it as we go:
    --
    instance Binary Table where
        put (Table f i) = do
            put . compress . toByteString f $ floatSize
            put . compress . toByteString i $ intSize

        get = do
            fs <- liftM decompress get
            is <- liftM decompress get

            -- check we read the correct amount:
            if L.length fs + L.length is == fromIntegral totalBytes
                then return $ Table (fromByteString fs) (fromByteString is)
                else error "Partial read"

    ------------------------------------------------------------------------
    -- FFI
    --
    -- Example call to process the data using C functions.
    --
    rounded :: Int -> ForeignPtr CFloat -> IO [CFloat]
    rounded l fp = withForeignPtr fp $ \p -> go p
        where
            go p = forM [0..l-1] $ \n -> do
                        v <- peekElemOff p n
                        return $ c_tanhf (c_sinf (v + fromIntegral n))

    -- A random C function to use:    
    foreign import ccall unsafe "math.h sinf"  c_sinf  :: CFloat -> CFloat
    foreign import ccall unsafe "math.h tanhf" c_tanhf :: CFloat -> CFloat


    ------------------------------------------------------------------------
    --
    -- Now glue it all together
    -- 
    main = do
        table <- newTable
        putStrLn "Built table"

        -- write the data to disk, compressed with gzip as we go.
        encodeFile "/tmp/table.gz" table
        printf "Compressed      %d bytes\n" totalBytes

        -- how good was the compression?
        h <- openFile "/tmp/table.gz" ReadMode
        n <- hFileSize h
        hClose h
        printf "Compressed size  %d bytes (%0.2f%%)\n" n
                    (100 - (fromIntegral n/fromIntegral totalBytes*100) :: Double)

        -- load it back in, decompressing on the fly
        table' <- decodeFile "/tmp/table.gz"
        printf "Decompressed    %d bytes\n" totalBytes

        -- now process the floats with C
        printf "Calling into C ...\n"
        ps <- rounded 10 (floats table')
        forM_ ps print


More information about the Haskell-Cafe mailing list