[Haskell-cafe] Iteratee wrapper for attoparsec

Gregory Collins greg at gregorycollins.net
Thu Jan 21 12:27:56 EST 2010


Gregory Collins <greg at gregorycollins.net> writes:

> John Lato <jwlato at gmail.com> writes:
>
>> I don't know if I'd call it a hybrid, however there is a way to embed
>> Parsec parsers (v.3 only) in iteratee.  The necessary code is
>> available at:
>>
>> http://inmachina.net/~jwlato/haskell/ParsecIteratee.hs
>
> This post inspired me to write an iteratee wrapper for attoparsec. The
> attoparsec library has an incremental parser; this means that we don't
> have to use John's lookahead buffer trick, restoring the iteratee
> constant-space guarantee. The downside: I don't think the attoparsec
> incremental parser is capable of maintaining the source position for
> error reporting.

....and here's a version that might actually work (mea culpa)

------------------------------------------------------------------------------
{-# LANGUAGE OverloadedStrings #-}

module Data.Attoparsec.Iteratee (parserToIteratee) where

------------------------------------------------------------------------------
import qualified Data.Attoparsec.Incremental as Atto
import           Data.Attoparsec.Incremental hiding (Result(..))
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import           Data.Iteratee
import           Data.Iteratee.WrappedByteString
import           Data.Word (Word8)
import           Prelude hiding (takeWhile)

-- for the examples at the bottom only
import           Control.Monad.Identity
import           Data.Char
import           Data.ByteString.Internal (w2c)

-- The principle is general enough to work for any 'StreamChunk' type (with
-- appropriate wrapping/unwrapping inserted), but I'm working with
-- "WrappedByteString Word8", sorry
type Stream         = StreamG WrappedByteString Word8
type Iteratee   m   = IterateeG WrappedByteString Word8 m
type IterV      m   = IterGV WrappedByteString Word8 m
type Enumerator m a = Iteratee m a -> m (Iteratee m a)

parserToIteratee :: (Monad m) =>
                    Parser a a
                 -> Iteratee m a
parserToIteratee p = IterateeG $ f (\s -> parse p s)
  where
    f :: (Monad m) =>
         (L.ByteString -> Atto.Result a)
      -> Stream
      -> m (IterV m a)

    f k (EOF Nothing)  = finalChunk $ k ""
    f _ (EOF (Just e)) = reportError e
    f k (Chunk s)      = chunk (fromWrap s) k


    finalChunk :: (Monad m) => Atto.Result a -> m (IterV m a)
    finalChunk (Atto.Failed m) =
        return $ Cont (error $ show m)
                      (Just $ Err m)
    finalChunk (Atto.Done rest r) = return $ Done r (Chunk $ toWrap rest)
    finalChunk (Atto.Partial _) =
        return $ Cont (error "parser did not consume all input")
                      (Just $ Err "parser did not consume all input")

    reportError e = return $ Cont (error $ show e) (Just e)

    chunk :: (Monad m) =>
             L.ByteString
          -> (L.ByteString -> Atto.Result a)
          -> m (IterV m a)
    chunk s k = do
        let r = k s
        case r of
          (Atto.Failed m) -> return $
                             Cont (throwErr (Err m)) (Just $ Err m)
          (Atto.Done rest x) -> return $ Done x (Chunk $ toWrap rest)
          (Atto.Partial z) -> return $
                              Cont (IterateeG $ f z) Nothing

-- | lazy bytestring -> wrapped bytestring
toWrap :: L.ByteString -> WrappedByteString Word8
toWrap = WrapBS . S.concat . L.toChunks

-- | wrapped bytestring -> lazy bytestring
fromWrap :: WrappedByteString Word8 -> L.ByteString
fromWrap = L.fromChunks . (:[]) . unWrap


------------------------------------------------------------------------------
-- And a quick example

sp :: Parser r ()
sp = () <$ takeWhile (isSpace . w2c)

digits :: Parser r String
digits = many1 (w2c <$> satisfy (isDigit . w2c))

number :: Parser r Int
number = read <$> digits

numberList :: Parser r [Int]
numberList = liftA2 (:) number (many (sp *> number))

ensureEOF :: Parser r ()
ensureEOF = endOfInput <|> reportError
  where
    reportError = do
        ch <- anyWord8
        let msg = concat [ "unexpected character '"
                         , [w2c ch]
                         , "'" ]
        fail msg

numberListIter :: (Monad m) => Iteratee m [Int]
numberListIter = parserToIteratee $ numberList <* ensureEOF

-- | Turn a strict bytestring into an enumerator
enumBS :: (Monad m) => S.ByteString -> Enumerator m a
enumBS bs = enumPure1Chunk $ WrapBS bs

_example :: [Int]
_example = runIdentity (enumerate numberListIter >>= run)
  where
    -- example, the source could be any enumerator
    enumerate = enumBS "1000 2000 3000 4000 5000 6000 7000"

_exampleWithError :: Either ErrMsg [Int]
_exampleWithError = runIdentity (enumerate numberListIter >>= run . checkErr)
  where
    enumerate = enumBS "1000 2000 3000 4000 5000 6000 7000q"

-- > *Data.Attoparsec.Iteratee> _example
-- > [1000,2000,3000,4000,5000,6000,7000]

-- > *Data.Attoparsec.Iteratee> _exampleWithError
-- > Left (Err "unexpected character 'q'")
------------------------------------------------------------------------------

G.

-- 
Gregory Collins <greg at gregorycollins.net>


More information about the Haskell-Cafe mailing list