[Haskell-cafe] Re: OCaml list sees abysmal Language Shootout results

Peter Simons simons at cryp.to
Thu Oct 7 07:01:19 EDT 2004


Keith Wansbrough writes:

 > Count me as a vote for the better-but-slightly-slower wc.

How about the attached program? On my machine it faster than
Tomasz's version, and I think it's still a fairly clean
source code. Using some random large file for input, I got
these results with time(1):

  real    0m33.883s             -- getarray/unsafeRead
  user    0m22.594s
  sys     0m2.493s

  real    0m30.435s             -- hgetbuf/peek
  user    0m13.958s
  sys     0m2.814s

Peter


module Main ( main ) where

import System.IO
import Foreign

bufsize :: Int                  -- our I/O buffer size
bufsize = 4096

type Count = Int32
data CountingState = ST !Bool !Count !Count !Count
                     deriving (Show)

initCST :: CountingState
initCST = ST True 0 0 0

wc :: Char -> CountingState -> CountingState
wc '\n' (ST _     l w c) = (ST True (l+1)  w   (c+1))
wc ' '  (ST _     l w c) = (ST True   l    w   (c+1))
wc '\t' (ST _     l w c) = (ST True   l    w   (c+1))
wc  _   (ST True  l w c) = (ST False  l  (w+1) (c+1))
wc  _   (ST False l w c) = (ST False  l    w   (c+1))

countBuf :: Ptr Word8 -> Int -> CountingState -> IO CountingState
countBuf _   0 st@(ST _ _ _ _) = return st
countBuf ptr n st@(ST _ _ _ _) = do
  c <- fmap (toEnum . fromEnum) (peek ptr)
  countBuf (ptr `plusPtr` 1) (n - 1) (wc c st)

loop :: Handle -> Ptr Word8 -> CountingState -> IO CountingState
loop h ptr st@(ST _ _ _ _) = do
  rc <- hGetBuf h ptr bufsize
  if rc == 0
     then return st
     else countBuf ptr rc st >>= (loop h ptr $!)

main :: IO ()
main = do
  allocaArray bufsize $ \ptr -> do
    ST _ l w c <- loop stdin ptr initCST
    putStrLn . shows l . (' ':) . shows w . (' ':) . shows c $ ""



More information about the Haskell-Cafe mailing list