[Haskell-cafe] Iteratee wrapper for attoparsec

Gregory Collins greg at gregorycollins.net
Mon Jan 18 18:22:52 EST 2010


John Lato <jwlato at gmail.com> writes:

> I don't know if I'd call it a hybrid, however there is a way to embed
> Parsec parsers (v.3 only) in iteratee.  The necessary code is
> available at:
>
> http://inmachina.net/~jwlato/haskell/ParsecIteratee.hs

This post inspired me to write an iteratee wrapper for attoparsec. The
attoparsec library has an incremental parser; this means that we don't
have to use John's lookahead buffer trick, restoring the iteratee
constant-space guarantee. The downside: I don't think the attoparsec
incremental parser is capable of maintaining the source position for
error reporting.

------------------------------------------------------------------------------
{-# LANGUAGE OverloadedStrings #-}

module Data.Attoparsec.Iteratee (parserToIteratee) where

------------------------------------------------------------------------------
import qualified Data.Attoparsec.Incremental as Atto
import           Data.Attoparsec.Incremental hiding (Result(..))
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import           Data.Iteratee
import           Data.Iteratee.WrappedByteString
import           Data.Monoid
import           Data.Word (Word8)
import           Prelude hiding (takeWhile)

-- for the examples at the bottom only
import           Control.Monad.Identity
import           Data.Char
import           Data.ByteString.Internal (w2c)

-- The principle is general enough to work for any 'StreamChunk' type (with
-- appropriate wrapping/unwrapping inserted), but I'm working with
-- "WrappedByteString Word8", sorry
type Stream         = StreamG WrappedByteString Word8
type Iteratee   m a = IterateeG WrappedByteString Word8 m a
type Enumerator m a = Iteratee m a -> m (Iteratee m a)

-- | Convert an attoparsec 'Parser' into an 'Iteratee'.
parserToIteratee :: (Monad m) =>
                    Parser a a
                 -> Iteratee m a
parserToIteratee p =
    IterateeG $ \s ->
        let r = case s of
                  (EOF Nothing)        -> Atto.parse p ""
                  (EOF (Just (Err e))) -> Atto.Failed e
                  (EOF (Just _))       -> Atto.Failed "seek not permitted"
                  (Chunk s')           -> Atto.parse p $ fromWrap s'
        in return $ Cont (runChunk r) Nothing

  where
    runChunk (Atto.Failed m) = throwErr $ Err m

    runChunk (Atto.Done rest r) =
        IterateeG $ \s -> return $ Done r (addToChunk rest s)

    runChunk oldr@(Atto.Partial f) =
        IterateeG $ \s ->
            case s of
              (EOF Nothing)  -> enforceDone f
              (EOF (Just e)) -> return $ Cont (throwErr e) (Just e)
              (Chunk s')     -> let x = fromWrap s'
                                    k = if L.null x then oldr else f x
                                in return $ Cont (runChunk k) Nothing

    -- you end an incremental parser by passing it the empty string
    enforceDone f =
        return $ case f "" of
          (Atto.Failed m   ) -> Cont (throwErr $ Err m) (Just $ Err m)
          (Atto.Done rest r) -> Done r $ Chunk (toWrap rest)
          (Atto.Partial _  ) -> Cont (throwErr eoi) (Just eoi)
      where
        eoi = Err "premature end of input"

-- | lazy bytestring -> wrapped bytestring
toWrap :: L.ByteString -> WrappedByteString Word8
toWrap = WrapBS . S.concat . L.toChunks

-- | wrapped bytestring -> lazy bytestring
fromWrap :: WrappedByteString Word8 -> L.ByteString
fromWrap = L.fromChunks . (:[]) . unWrap

-- | tack a lazy bytestring onto the front of an iteratee 'Stream'
addToChunk :: L.ByteString -> Stream -> Stream
addToChunk s (EOF Nothing) = Chunk $ toWrap s
addToChunk _ x@(EOF _)     = x
addToChunk s (Chunk w)     = Chunk $ toWrap s `mappend` w


------------------------------------------------------------------------------
-- And a quick example

sp :: Parser r ()
sp = () <$ takeWhile (isSpace . w2c)

digits :: Parser r String
digits = many1 (w2c <$> satisfy (isDigit . w2c))

number :: Parser r Int
number = read <$> digits

numberList :: Parser r [Int]
numberList = liftA2 (:) number (many (sp *> number))

ensureEOF :: Parser r ()
ensureEOF = endOfInput <|> reportError
  where
    reportError = do
        ch <- anyWord8
        let msg = concat [ "unexpected character '"
                         , [w2c ch]
                         , "'" ]
        fail msg

numberListIter :: (Monad m) => Iteratee m [Int]
numberListIter = parserToIteratee $ numberList <* ensureEOF

-- | Turn a strict bytestring into an enumerator
enumBS :: (Monad m) => S.ByteString -> Enumerator m a
enumBS bs = enumPure1Chunk $ WrapBS bs

_example :: [Int]
_example = runIdentity (enumerate numberListIter >>= run)
  where
    -- example, the source could be any enumerator
    enumerate = enumBS "1000 2000 3000 4000 5000 6000 7000"

_exampleWithError :: Either ErrMsg [Int]
_exampleWithError = runIdentity (enumerate numberListIter >>= run . checkErr)
  where
    enumerate = enumBS "1000 2000 3000 4000 5000 6000 7000q"

-- > *Data.Attoparsec.Iteratee> _example
-- > [1000,2000,3000,4000,5000,6000,7000]

-- > *Data.Attoparsec.Iteratee> _exampleWithError
-- > Left (Err "unexpected character 'q'")
------------------------------------------------------------------------------

G.
-- 
Gregory Collins <greg at gregorycollins.net>


More information about the Haskell-Cafe mailing list