[Haskell-cafe] Iteratee wrapper for attoparsec
Gregory Collins
greg at gregorycollins.net
Mon Jan 18 18:22:52 EST 2010
John Lato <jwlato at gmail.com> writes:
> I don't know if I'd call it a hybrid, however there is a way to embed
> Parsec parsers (v.3 only) in iteratee. The necessary code is
> available at:
>
> http://inmachina.net/~jwlato/haskell/ParsecIteratee.hs
This post inspired me to write an iteratee wrapper for attoparsec. The
attoparsec library has an incremental parser; this means that we don't
have to use John's lookahead buffer trick, restoring the iteratee
constant-space guarantee. The downside: I don't think the attoparsec
incremental parser is capable of maintaining the source position for
error reporting.
------------------------------------------------------------------------------
{-# LANGUAGE OverloadedStrings #-}
module Data.Attoparsec.Iteratee (parserToIteratee) where
------------------------------------------------------------------------------
import qualified Data.Attoparsec.Incremental as Atto
import Data.Attoparsec.Incremental hiding (Result(..))
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import Data.Iteratee
import Data.Iteratee.WrappedByteString
import Data.Monoid
import Data.Word (Word8)
import Prelude hiding (takeWhile)
-- for the examples at the bottom only
import Control.Monad.Identity
import Data.Char
import Data.ByteString.Internal (w2c)
-- The principle is general enough to work for any 'StreamChunk' type (with
-- appropriate wrapping/unwrapping inserted), but I'm working with
-- "WrappedByteString Word8", sorry
type Stream = StreamG WrappedByteString Word8
type Iteratee m a = IterateeG WrappedByteString Word8 m a
type Enumerator m a = Iteratee m a -> m (Iteratee m a)
-- | Convert an attoparsec 'Parser' into an 'Iteratee'.
parserToIteratee :: (Monad m) =>
Parser a a
-> Iteratee m a
parserToIteratee p =
IterateeG $ \s ->
let r = case s of
(EOF Nothing) -> Atto.parse p ""
(EOF (Just (Err e))) -> Atto.Failed e
(EOF (Just _)) -> Atto.Failed "seek not permitted"
(Chunk s') -> Atto.parse p $ fromWrap s'
in return $ Cont (runChunk r) Nothing
where
runChunk (Atto.Failed m) = throwErr $ Err m
runChunk (Atto.Done rest r) =
IterateeG $ \s -> return $ Done r (addToChunk rest s)
runChunk oldr@(Atto.Partial f) =
IterateeG $ \s ->
case s of
(EOF Nothing) -> enforceDone f
(EOF (Just e)) -> return $ Cont (throwErr e) (Just e)
(Chunk s') -> let x = fromWrap s'
k = if L.null x then oldr else f x
in return $ Cont (runChunk k) Nothing
-- you end an incremental parser by passing it the empty string
enforceDone f =
return $ case f "" of
(Atto.Failed m ) -> Cont (throwErr $ Err m) (Just $ Err m)
(Atto.Done rest r) -> Done r $ Chunk (toWrap rest)
(Atto.Partial _ ) -> Cont (throwErr eoi) (Just eoi)
where
eoi = Err "premature end of input"
-- | lazy bytestring -> wrapped bytestring
toWrap :: L.ByteString -> WrappedByteString Word8
toWrap = WrapBS . S.concat . L.toChunks
-- | wrapped bytestring -> lazy bytestring
fromWrap :: WrappedByteString Word8 -> L.ByteString
fromWrap = L.fromChunks . (:[]) . unWrap
-- | tack a lazy bytestring onto the front of an iteratee 'Stream'
addToChunk :: L.ByteString -> Stream -> Stream
addToChunk s (EOF Nothing) = Chunk $ toWrap s
addToChunk _ x@(EOF _) = x
addToChunk s (Chunk w) = Chunk $ toWrap s `mappend` w
------------------------------------------------------------------------------
-- And a quick example
sp :: Parser r ()
sp = () <$ takeWhile (isSpace . w2c)
digits :: Parser r String
digits = many1 (w2c <$> satisfy (isDigit . w2c))
number :: Parser r Int
number = read <$> digits
numberList :: Parser r [Int]
numberList = liftA2 (:) number (many (sp *> number))
ensureEOF :: Parser r ()
ensureEOF = endOfInput <|> reportError
where
reportError = do
ch <- anyWord8
let msg = concat [ "unexpected character '"
, [w2c ch]
, "'" ]
fail msg
numberListIter :: (Monad m) => Iteratee m [Int]
numberListIter = parserToIteratee $ numberList <* ensureEOF
-- | Turn a strict bytestring into an enumerator
enumBS :: (Monad m) => S.ByteString -> Enumerator m a
enumBS bs = enumPure1Chunk $ WrapBS bs
_example :: [Int]
_example = runIdentity (enumerate numberListIter >>= run)
where
-- example, the source could be any enumerator
enumerate = enumBS "1000 2000 3000 4000 5000 6000 7000"
_exampleWithError :: Either ErrMsg [Int]
_exampleWithError = runIdentity (enumerate numberListIter >>= run . checkErr)
where
enumerate = enumBS "1000 2000 3000 4000 5000 6000 7000q"
-- > *Data.Attoparsec.Iteratee> _example
-- > [1000,2000,3000,4000,5000,6000,7000]
-- > *Data.Attoparsec.Iteratee> _exampleWithError
-- > Left (Err "unexpected character 'q'")
------------------------------------------------------------------------------
G.
--
Gregory Collins <greg at gregorycollins.net>
More information about the Haskell-Cafe
mailing list