[Haskell-cafe] Iteratee wrapper for attoparsec
Gregory Collins
greg at gregorycollins.net
Thu Jan 21 12:27:56 EST 2010
Gregory Collins <greg at gregorycollins.net> writes:
> John Lato <jwlato at gmail.com> writes:
>
>> I don't know if I'd call it a hybrid, however there is a way to embed
>> Parsec parsers (v.3 only) in iteratee. The necessary code is
>> available at:
>>
>> http://inmachina.net/~jwlato/haskell/ParsecIteratee.hs
>
> This post inspired me to write an iteratee wrapper for attoparsec. The
> attoparsec library has an incremental parser; this means that we don't
> have to use John's lookahead buffer trick, restoring the iteratee
> constant-space guarantee. The downside: I don't think the attoparsec
> incremental parser is capable of maintaining the source position for
> error reporting.
....and here's a version that might actually work (mea culpa)
------------------------------------------------------------------------------
{-# LANGUAGE OverloadedStrings #-}
module Data.Attoparsec.Iteratee (parserToIteratee) where
------------------------------------------------------------------------------
import qualified Data.Attoparsec.Incremental as Atto
import Data.Attoparsec.Incremental hiding (Result(..))
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import Data.Iteratee
import Data.Iteratee.WrappedByteString
import Data.Word (Word8)
import Prelude hiding (takeWhile)
-- for the examples at the bottom only
import Control.Monad.Identity
import Data.Char
import Data.ByteString.Internal (w2c)
-- The principle is general enough to work for any 'StreamChunk' type (with
-- appropriate wrapping/unwrapping inserted), but I'm working with
-- "WrappedByteString Word8", sorry
type Stream = StreamG WrappedByteString Word8
type Iteratee m = IterateeG WrappedByteString Word8 m
type IterV m = IterGV WrappedByteString Word8 m
type Enumerator m a = Iteratee m a -> m (Iteratee m a)
parserToIteratee :: (Monad m) =>
Parser a a
-> Iteratee m a
parserToIteratee p = IterateeG $ f (\s -> parse p s)
where
f :: (Monad m) =>
(L.ByteString -> Atto.Result a)
-> Stream
-> m (IterV m a)
f k (EOF Nothing) = finalChunk $ k ""
f _ (EOF (Just e)) = reportError e
f k (Chunk s) = chunk (fromWrap s) k
finalChunk :: (Monad m) => Atto.Result a -> m (IterV m a)
finalChunk (Atto.Failed m) =
return $ Cont (error $ show m)
(Just $ Err m)
finalChunk (Atto.Done rest r) = return $ Done r (Chunk $ toWrap rest)
finalChunk (Atto.Partial _) =
return $ Cont (error "parser did not consume all input")
(Just $ Err "parser did not consume all input")
reportError e = return $ Cont (error $ show e) (Just e)
chunk :: (Monad m) =>
L.ByteString
-> (L.ByteString -> Atto.Result a)
-> m (IterV m a)
chunk s k = do
let r = k s
case r of
(Atto.Failed m) -> return $
Cont (throwErr (Err m)) (Just $ Err m)
(Atto.Done rest x) -> return $ Done x (Chunk $ toWrap rest)
(Atto.Partial z) -> return $
Cont (IterateeG $ f z) Nothing
-- | lazy bytestring -> wrapped bytestring
toWrap :: L.ByteString -> WrappedByteString Word8
toWrap = WrapBS . S.concat . L.toChunks
-- | wrapped bytestring -> lazy bytestring
fromWrap :: WrappedByteString Word8 -> L.ByteString
fromWrap = L.fromChunks . (:[]) . unWrap
------------------------------------------------------------------------------
-- And a quick example
sp :: Parser r ()
sp = () <$ takeWhile (isSpace . w2c)
digits :: Parser r String
digits = many1 (w2c <$> satisfy (isDigit . w2c))
number :: Parser r Int
number = read <$> digits
numberList :: Parser r [Int]
numberList = liftA2 (:) number (many (sp *> number))
ensureEOF :: Parser r ()
ensureEOF = endOfInput <|> reportError
where
reportError = do
ch <- anyWord8
let msg = concat [ "unexpected character '"
, [w2c ch]
, "'" ]
fail msg
numberListIter :: (Monad m) => Iteratee m [Int]
numberListIter = parserToIteratee $ numberList <* ensureEOF
-- | Turn a strict bytestring into an enumerator
enumBS :: (Monad m) => S.ByteString -> Enumerator m a
enumBS bs = enumPure1Chunk $ WrapBS bs
_example :: [Int]
_example = runIdentity (enumerate numberListIter >>= run)
where
-- example, the source could be any enumerator
enumerate = enumBS "1000 2000 3000 4000 5000 6000 7000"
_exampleWithError :: Either ErrMsg [Int]
_exampleWithError = runIdentity (enumerate numberListIter >>= run . checkErr)
where
enumerate = enumBS "1000 2000 3000 4000 5000 6000 7000q"
-- > *Data.Attoparsec.Iteratee> _example
-- > [1000,2000,3000,4000,5000,6000,7000]
-- > *Data.Attoparsec.Iteratee> _exampleWithError
-- > Left (Err "unexpected character 'q'")
------------------------------------------------------------------------------
G.
--
Gregory Collins <greg at gregorycollins.net>
More information about the Haskell-Cafe
mailing list