[Haskell-cafe] Bit streams programs in Haskell

Donald Bruce Stewart dons at cse.unsw.edu.au
Wed Mar 22 22:15:24 EST 2006


per.gustafsson:
> 
> Haskell gurus,
> 
> We have made a proposal to extend the Erlang `binary' data type from
> being a sequence of bytes (a byte stream) to being a sequence of bits (a
> bitstream) with the ability to do pattern matching at the bit level.
> 
> Our experience in writing efficient (and beautiful) Haskell programs is
> close to (if not below) zero. Also, perhaps our mind might be suffering
> from severe case of strictness and might be completely unable to `think
> lazily'. So, we request your help in noticing obvious NO-NOs and stupid
> mistakes that we might have made. We even welcome completely different
> Haskell programs provided they adhere to the constraint mentioned
> above -- no mutation.

Ok, I rewrote the drop3 program to use packed, unboxed arrays as the
OCaml version does, instead of lazy boxed lists.

It now runs in 3.5s on my linux box, which is around which is around
what the OCaml version does.

    $ ghc B.hs
    $ ./a.out testdata.drop3
    3.617
    $ cmp testdata.drop3.haskell testdata.drop3.out

Comparing lazy list IO against packed array IO is a bit silly, so I suggest you
use the same buffer types in your Haskell code as you do in the OCaml code.
Otherwise the comparisons aren't very meaningful.  The problem is not so much
laziness, as you suggest, but that you're using a completely unsuitable data
type: lists, instead of (packed) strings.

You can most likely just translate your other OCaml programs into Haskell as I
have done here, which would be a good basis for a reasonable comparison.

You may also find the Haskell performance resource useful, 
    http://www.haskell.org/haskellwiki/Performance

Cheers,
  Don
-------------- next part --------------
{-# OPTIONS -O2 #-}
--
-- Translated from the OCaml version.
--

import Control.Monad
import Data.Char
import Data.Array.IO
import Data.Array.Base
import Data.Bits
import Data.Word
import System
import System.CPUTime
import System.IO
import Text.Printf

iter :: Int
iter = 10
    
main = do
    f         <- getArgs >>= return . head
    (arr,l)   <- slurp f 
    t0        <- getCPUTime
    (arr',l') <- replicateM iter (drop0xx arr (l*8)) >>= return . head
    t1        <- getCPUTime
    printf "%.3f\n" $ (fromInteger (t1 - t0) :: Float) / (fromInteger 10 ^ 12 :: Float)
    dump f arr' (1 + (snd . bounds) arr')

drop0xx = drop0xx' 0 0 0 []

drop0xx' :: Int -> Int -> Int -> [Int] -> Buffer -> Int -> IO (Buffer,Int)
drop0xx' inoff reg shifts acc str len
    | inoff `seq` reg `seq` shifts `seq` acc `seq` str `seq` len `seq` False = undefined
    | inoff' > len  = makeResult (reverse acc) reg shifts
    | otherwise     = do
        triple <- getTriple str inoff
        if triple >= 4 
            then let reg' = (reg `shiftL` 3) .|. triple 
                 in if shifts == 7
                    then drop0xx' inoff' 0    0          (reg':acc) str len
                    else drop0xx' inoff' reg' (shifts+1) acc        str len
            else drop0xx' inoff' reg shifts acc str len

    where inoff' = inoff + 3

getTriple :: Buffer -> Int -> IO Int
getTriple str inoff | str `seq` inoff `seq` False = undefined 
getTriple str inoff = do
    b0 <- str `unsafeRead` bitind     >>= return . fromIntegral
    b1 <- str `unsafeRead` (bitind+1) >>= return . fromIntegral
    return $! (if bitoff < 6
              then  b0 `shiftR` (5-bitoff)
              else (b0 `shiftL` (bitoff-5)) .|. (b1 `shiftR` (13-bitoff)))
          .&. 7

      where bitoff = inoff .&. 7
            bitind = inoff `shiftR` 3

makeResult :: [Int] -> Int -> Int -> IO (Buffer,Int)
makeResult list0 endpiece shifts = do
    arr <- newArray_ (0,triplebytesize + endpiecesize-1) :: IO Buffer

    let packList (triple:rest) ind = do 
             unsafeWrite arr ind     $ fromIntegral $ (triple `shiftR` 16) .&. 255
             unsafeWrite arr (ind+1) $ fromIntegral $ (triple `shiftR`  8) .&. 255
             unsafeWrite arr (ind+2) $ fromIntegral $ triple               .&. 255
             packList rest (ind+3)

        packList [] ind = 
            let c1 = endpiece `shiftL` ((shifts*3 - 8) .&. 255)
                s0 = shifts * 3 - 8
            in case endpiecesize of
                0 -> return ()
                1 -> do unsafeWrite arr ind $ fromIntegral $
                          endpiece `shiftL` (s0 .&. 255)
                2 -> do unsafeWrite arr ind $ fromIntegral $
                          endpiece `shiftL` (s0 .&. 255)
                        unsafeWrite arr (ind+1) $ fromIntegral $
                          endpiece `shiftL` ((s0-8) .&. 255)
    packList list0 0
    return (arr, triplebytesize * 8 + shifts * 3)

    where endpiecesize   = getNeededBytes shifts 
          triplebytesize = 3 * length list0

getNeededBytes shifts | shifts < 3 = 0
                      | shifts < 6 = 1
                      | otherwise  = 2
                      
------------------------------------------------------------------------

type Buffer = IOUArray Int Word8

slurp :: FilePath -> IO (Buffer, Int)
slurp f = do
    h   <- openBinaryFile f ReadMode
    l   <- hFileSize h
    arr <- newArray_ (0,fromIntegral l-1) :: IO Buffer
    hGetArray h arr (fromIntegral l)
    hClose h
    return (arr,fromIntegral l)

dump :: FilePath -> Buffer -> Int -> IO ()
dump f arr l = do
    h <- openBinaryFile (f ++ ".haskell") WriteMode
    hPutArray h arr l
    hClose h


More information about the Haskell-Cafe mailing list