[Haskell-cafe] Difficulties implementing an incremental parser using Oleg-style left fold enumerator

Johan Tibell johan.tibell at gmail.com
Sat Mar 8 03:56:36 EST 2008


Dear haskell-cafe,

I'm trying to write a parser combinator library with the following contraints:

* Parses LL(1) grammars.

* Is incremental i.e. it uses an Oleg style left fold enumerator to
receive its input.

* Is applicative but not monadic.

The problem -- maybe there are others too -- is that when a parser such as

many (byte 65)

is run it will always return a 'Partial' result waiting for more input
even though the enumerator is exhausted. In other words, there's no
way to detect end of input.

My current implementation of the parser type is

newtype Parser r a = Parser
    { unParser :: S -> (a -> S -> Result r) -> (S -> Result r) -> Result r }

where the first parameter is the parse state, the second a success
continuation, and the third a failure continuation. The only tricky
part (except for the above mentioned problem) is to implement the
choice operator. I implement mine as

instance Applicative (Parser r) where
    pure a = ...
    p <*> p' = Parser $ \s succ fail ->
               flip (unParser p s) fail $ \f s' ->
                   unParser p' s' (succ . f) fail

which I think is correct.

Here follows my code. I hope someone has some idea how I could handle
the end of input problem correctly.

Thanks.

-- Johan

{-# LANGUAGE DeriveDataTypeable, Rank2Types #-}

-----------------------------------------------------------------------------
-- |
-- Module      :  Parsing.IParse
-- Copyright   :  (c) Johan Tibell 2008
-- License     :  BSD3-style (see LICENSE)
--
-- Maintainer  :  johan.tibell at gmail.com
-- Stability   :  experimental
-- Portability :  portable
--
-- An incremental LL(1) parser combinator library.
--
-----------------------------------------------------------------------------

module Parsing.IParse
    (
      -- * The 'Parser' type
      Parser,
      Enumerator,
      parse,

      -- * Primitive parsers
      satisfy,
      byte,

      module Control.Applicative
    ) where

import Control.Applicative (Alternative(..), Applicative(..))
import Control.Monad (Functor(..))
import qualified Data.ByteString as S
import Data.Int (Int64)
import Data.Typeable (Typeable, showsTypeRep, typeOf)
import Data.Word (Word8)
import Prelude hiding (fail, succ)

-- ---------------------------------------------------------------------
-- The Parser type

-- | The parse state.
data S = S {-# UNPACK #-} !S.ByteString {-# UNPACK #-} !Int64
         deriving Show

-- | A parse either succeeds, fails or returns a suspension with which
-- the parsing can be resumed.
data Result a = Finished a S
              | Failed Int64
              | Partial (S.ByteString -> Result a)
                deriving Typeable

-- | For debug output.
instance (Show a, Typeable a) => Show (Result a) where
    showsPrec d (Finished a s) = showParen (d > 10) showStr
        where showStr = showString "Finished " . showsPrec 11 a
                        . showString " " . showsPrec 11 s
    showsPrec d (Failed pos)   = showParen (d > 10) showStr
        where showStr = showString "Failed " . showsPrec 11 pos
    showsPrec d (Partial k)    = showParen (d > 10) showStr
        where showStr = showString "Partial " . showsTypeRep (typeOf k)

-- | A parser takes a parse state, a success continuation and a
-- failure continuation and returns a 'Result'.
newtype Parser r a = Parser
    { unParser :: S -> (a -> S -> Result r) -> (S -> Result r) -> Result r }

-- ---------------------------------------------------------------------
-- Instances

instance Functor (Parser r) where
    fmap f p = Parser $ \s succ fail -> unParser p s (succ . f) fail

instance Applicative (Parser r) where
    pure a = Parser $ \s succ _ -> succ a s
    p <*> p' = Parser $ \s succ fail ->
               flip (unParser p s) fail $ \f s' ->
                   unParser p' s' (succ . f) fail

instance Alternative (Parser r) where
    empty = Parser $ \s _ fail -> fail s
    p <|> p' = Parser $ \s@(S _ pos) succ fail ->
               unParser p s succ $ \s'@(S _ pos') ->
                   if pos == pos'
                   then unParser p' s' succ fail
                   else fail s'

-- ---------------------------------------------------------------------
-- Running a parser

-- | The initial, empty parse state.
initState :: S
initState = S S.empty 0

-- | This is the final continuation that turns a successful parse into
-- a 'Result'.
finishedCont :: a -> S -> Result a
finishedCont v s = Finished v s

-- | This is the final continuation that turns an unsuccessful parse
-- into a 'Result'.
failedCont :: S -> Result a
failedCont (S _ pos) = Failed pos

-- | A enumerator is a partially applied left fold over some
-- 'S.ByteString' input.  The caller supplies an initial seed and an
-- iteratee function.  The iteratee function returns @Left seed@ if it
-- want to terminate the iteration early, otherwise @Right seed at .
type Enumerator m s = (s -> S.ByteString -> Either s s) -> s -> m s

-- | @parse p enumerator@ runs the parser @p@, pulling in new data
-- using @enumerator@ when necessary, and return @Left pos@ on failure
-- and @Right val remaining@ on success.
parse :: Monad m => Parser r r
      -> (forall s. Enumerator m s)
      -> m (Either Int64 (r, S.ByteString))
parse p enumerator =
    -- First test if the parser can succeed without consuming any
    -- input.
    let seed = (unParser p) initState finishedCont failedCont
    in case seed of
         Failed pos         -> return $ Left pos
         Finished x (S s _) -> return $ Right (x, s)
         _                  ->
             -- Otherwise, use the enumerator to feed the parser some
             -- input.
             do (result, pos) <- enumerator iter (seed, 0)
                return $ case result of
                           Failed pos'        -> Left pos'
                           Finished x (S s _) -> Right (x, s)
                           Partial _          -> Left pos
    where
      iter (Partial k, pos) chunk =
          let pos' = pos + fromIntegral (S.length chunk)
          in case k chunk of
               partial@(Partial _) -> Right (partial, pos')
               result              -> Left (result, pos')
      iter _ _ = error "Should be partial."

-- ---------------------------------------------------------------------
-- Primitive parsers

-- | The parser @satisfy p@ succeeds for any character for which the
-- supplied function @p@ returns 'True'.  Returns the character that
-- is actually parsed.
satisfy :: (Word8 -> Bool) -> Parser r Word8
satisfy p =
    Parser $ \st@(S s pos) succ fail ->
        case S.uncons s of
          Just (b, bs) -> if p b
                          then succ b (S bs (pos + 1))
                          else fail st
          Nothing      -> Partial $ \s' ->
                          unParser (satisfy p) (S s' pos) succ fail

-- | @byte b@ parses a single byte @b at .  Returns the parsed byte
-- (i.e. @b@).
byte :: Word8 -> Parser r Word8
byte b = satisfy (== b)


More information about the Haskell-Cafe mailing list