[Haskell-cafe] Difficulties implementing an incremental parser
using Oleg-style left fold enumerator
Johan Tibell
johan.tibell at gmail.com
Sat Mar 8 03:56:36 EST 2008
Dear haskell-cafe,
I'm trying to write a parser combinator library with the following contraints:
* Parses LL(1) grammars.
* Is incremental i.e. it uses an Oleg style left fold enumerator to
receive its input.
* Is applicative but not monadic.
The problem -- maybe there are others too -- is that when a parser such as
many (byte 65)
is run it will always return a 'Partial' result waiting for more input
even though the enumerator is exhausted. In other words, there's no
way to detect end of input.
My current implementation of the parser type is
newtype Parser r a = Parser
{ unParser :: S -> (a -> S -> Result r) -> (S -> Result r) -> Result r }
where the first parameter is the parse state, the second a success
continuation, and the third a failure continuation. The only tricky
part (except for the above mentioned problem) is to implement the
choice operator. I implement mine as
instance Applicative (Parser r) where
pure a = ...
p <*> p' = Parser $ \s succ fail ->
flip (unParser p s) fail $ \f s' ->
unParser p' s' (succ . f) fail
which I think is correct.
Here follows my code. I hope someone has some idea how I could handle
the end of input problem correctly.
Thanks.
-- Johan
{-# LANGUAGE DeriveDataTypeable, Rank2Types #-}
-----------------------------------------------------------------------------
-- |
-- Module : Parsing.IParse
-- Copyright : (c) Johan Tibell 2008
-- License : BSD3-style (see LICENSE)
--
-- Maintainer : johan.tibell at gmail.com
-- Stability : experimental
-- Portability : portable
--
-- An incremental LL(1) parser combinator library.
--
-----------------------------------------------------------------------------
module Parsing.IParse
(
-- * The 'Parser' type
Parser,
Enumerator,
parse,
-- * Primitive parsers
satisfy,
byte,
module Control.Applicative
) where
import Control.Applicative (Alternative(..), Applicative(..))
import Control.Monad (Functor(..))
import qualified Data.ByteString as S
import Data.Int (Int64)
import Data.Typeable (Typeable, showsTypeRep, typeOf)
import Data.Word (Word8)
import Prelude hiding (fail, succ)
-- ---------------------------------------------------------------------
-- The Parser type
-- | The parse state.
data S = S {-# UNPACK #-} !S.ByteString {-# UNPACK #-} !Int64
deriving Show
-- | A parse either succeeds, fails or returns a suspension with which
-- the parsing can be resumed.
data Result a = Finished a S
| Failed Int64
| Partial (S.ByteString -> Result a)
deriving Typeable
-- | For debug output.
instance (Show a, Typeable a) => Show (Result a) where
showsPrec d (Finished a s) = showParen (d > 10) showStr
where showStr = showString "Finished " . showsPrec 11 a
. showString " " . showsPrec 11 s
showsPrec d (Failed pos) = showParen (d > 10) showStr
where showStr = showString "Failed " . showsPrec 11 pos
showsPrec d (Partial k) = showParen (d > 10) showStr
where showStr = showString "Partial " . showsTypeRep (typeOf k)
-- | A parser takes a parse state, a success continuation and a
-- failure continuation and returns a 'Result'.
newtype Parser r a = Parser
{ unParser :: S -> (a -> S -> Result r) -> (S -> Result r) -> Result r }
-- ---------------------------------------------------------------------
-- Instances
instance Functor (Parser r) where
fmap f p = Parser $ \s succ fail -> unParser p s (succ . f) fail
instance Applicative (Parser r) where
pure a = Parser $ \s succ _ -> succ a s
p <*> p' = Parser $ \s succ fail ->
flip (unParser p s) fail $ \f s' ->
unParser p' s' (succ . f) fail
instance Alternative (Parser r) where
empty = Parser $ \s _ fail -> fail s
p <|> p' = Parser $ \s@(S _ pos) succ fail ->
unParser p s succ $ \s'@(S _ pos') ->
if pos == pos'
then unParser p' s' succ fail
else fail s'
-- ---------------------------------------------------------------------
-- Running a parser
-- | The initial, empty parse state.
initState :: S
initState = S S.empty 0
-- | This is the final continuation that turns a successful parse into
-- a 'Result'.
finishedCont :: a -> S -> Result a
finishedCont v s = Finished v s
-- | This is the final continuation that turns an unsuccessful parse
-- into a 'Result'.
failedCont :: S -> Result a
failedCont (S _ pos) = Failed pos
-- | A enumerator is a partially applied left fold over some
-- 'S.ByteString' input. The caller supplies an initial seed and an
-- iteratee function. The iteratee function returns @Left seed@ if it
-- want to terminate the iteration early, otherwise @Right seed at .
type Enumerator m s = (s -> S.ByteString -> Either s s) -> s -> m s
-- | @parse p enumerator@ runs the parser @p@, pulling in new data
-- using @enumerator@ when necessary, and return @Left pos@ on failure
-- and @Right val remaining@ on success.
parse :: Monad m => Parser r r
-> (forall s. Enumerator m s)
-> m (Either Int64 (r, S.ByteString))
parse p enumerator =
-- First test if the parser can succeed without consuming any
-- input.
let seed = (unParser p) initState finishedCont failedCont
in case seed of
Failed pos -> return $ Left pos
Finished x (S s _) -> return $ Right (x, s)
_ ->
-- Otherwise, use the enumerator to feed the parser some
-- input.
do (result, pos) <- enumerator iter (seed, 0)
return $ case result of
Failed pos' -> Left pos'
Finished x (S s _) -> Right (x, s)
Partial _ -> Left pos
where
iter (Partial k, pos) chunk =
let pos' = pos + fromIntegral (S.length chunk)
in case k chunk of
partial@(Partial _) -> Right (partial, pos')
result -> Left (result, pos')
iter _ _ = error "Should be partial."
-- ---------------------------------------------------------------------
-- Primitive parsers
-- | The parser @satisfy p@ succeeds for any character for which the
-- supplied function @p@ returns 'True'. Returns the character that
-- is actually parsed.
satisfy :: (Word8 -> Bool) -> Parser r Word8
satisfy p =
Parser $ \st@(S s pos) succ fail ->
case S.uncons s of
Just (b, bs) -> if p b
then succ b (S bs (pos + 1))
else fail st
Nothing -> Partial $ \s' ->
unParser (satisfy p) (S s' pos) succ fail
-- | @byte b@ parses a single byte @b at . Returns the parsed byte
-- (i.e. @b@).
byte :: Word8 -> Parser r Word8
byte b = satisfy (== b)
More information about the Haskell-Cafe
mailing list