[Haskell-cafe] a boring parser

Anatoly Yakovenko aeyakovenko at gmail.com
Wed Sep 30 23:04:23 EDT 2009


i got annoyed with Parsec and wrote a much more boring parser which
allows me to parse anything with any kind of matching i want.  Its
basically a combination of State and Error monads.

So i can use a grep like parser that matches via a regular expression
over a list of lines

grep re = do
  vv::B.ByteString <- any
  let (_,_,_,rv) = (vv =~
re)::(B.ByteString,B.ByteString,B.ByteString,[B.ByteString])
  case (rv) of
     [] -> throwError "no match"
     _ -> return $ rv

> run (grep $ C.pack "(hello)") $ [C.pack "hello world"]
Right (["hello"],[])

or use the same library to scan over a string by combining regular expressions

regex re = do
  make $ \ st -> do
     case (B.null st) of
        True -> throwError "eos"
        _ -> do
           let (_,_,after,rv) = (st =~
re)::(B.ByteString,B.ByteString,B.ByteString,[B.ByteString])
           case (rv) of
              [] -> throwError "no match"
              _ -> return $ (rv,after)



>  run (do aa <- regex $ C.pack "(hello)"; bb <- regex $ C.pack " (world)"; return (aa,bb) ) $ C.pack "hello world"
Right ((["hello"],["world"]),"")

or simply match integers in a list, or anything that is of type Eq

> run (many1 $ eq 1) [1,1,1,2,3,4]
Right ([1,1,1],[2,3,4])

i can define lt

lt cc = do
  vv <- any
  case (vv < cc) of
     True -> return $ vv
     _ -> throwError "no match"

and do

> run (many1 $ lt 5 <|> eq 5) [1..10]
Right ([1,2,3,4,5],[6,7,8,9,10])

here is the implementation

module Parser( ParserM     --type alias for the parser ParserM a b is
over "stream" a and returns b
            , make        --makes a parser from a matching function of
type :: stream -> m (match_data,stream)
                          --for example any is implemented via:
                          --any :: ParserM [a] a
                          --any = make $ \ ll ->
                          --   case (ll) of
                          --         (hh:tt) -> return $ (hh,tt)
                          --               _ -> throwError "eos
                          --matches and returns an element from a
list, which makes any of type :: ParserM [a] a
            , any         --matches any element from [a] type stream
            , eq          --matches an equal element from [Eq] stream,
trivialy implemented in terms of any
                          --eq :: Eq a => a -> ParserM [a] a
                          --eq cc = do
                          --   vv <- any
                          --   case (vv == cc) of
                          --      True -> return $ vv
                          --         _ -> throwError "no match
            , (<|>)       --or operator, tries the left one then the right one
            , manyTill    --collects the results of parser 1 until
parser 2 succeeds
            , many1       --collects the results of the parser, must
succeed at least once
            , many        --collects the results of a parser
            , run         --runs the parser
            ) where

import Control.Monad.State.Lazy
import Control.Monad.Error
import Test.QuickCheck
import Control.Monad.Identity
import Prelude hiding (any)

type ParserM a c = StateT a (ErrorT [Char] Identity) c

make pp = do
  st <- get
  (rv,nst) <- pp $ st
  put $ nst
  return $ rv

aa <|> bb = aa `catchError` \ _ -> bb

manyTill :: ParserM a c -> ParserM a d -> ParserM a ([c],d)
manyTill pp ee = do
  do dd <- ee
     return $ ([],dd)
  `catchError` \ _ -> do
     cc <- pp
     (ccs,dd) <- manyTill pp ee
     return $ (cc:ccs,dd)

many1 pp = do
  rv <- pp
  rest <- many1 pp `catchError` \ _ -> return $ []
  return $ rv : rest

many pp = do many1 pp
        <|> return []


any :: ParserM [a] a
any = make $ \ ll ->
  case (ll) of
     (hh:tt) -> return $ (hh,tt)
     _ -> throwError "eos"

eq :: Eq a => a -> ParserM [a] a
eq cc = do
  vv <- any
  case (vv == cc) of
     True -> return $ vv
     _ -> throwError "no match"

lt cc = do
  vv <- any
  case (vv < cc) of
     True -> return $ vv
     _ -> throwError "no match"

run pp dd = runIdentity $ runErrorT $ runStateT pp dd
run' = flip run


prop_MatchA = (Right ('a',"bc")) == (run' "abc" $ eq 'a')
prop_MatchEOS = (Left "eos") == (run' ""  $ eq 'a')
prop_MatchNoMatch = (Left "no match") == (run' ("bcd") $ eq 'a')

prop_MatchABC =(Right ('c',""))== (run' "abc" $ do  eq 'a'
                                                   eq 'b'
                                                   eq 'c')

prop_MatchA_C = (run' "abc" $ do eq 'a'
                                eq 'd' <|> eq 'b' <|> any
                                eq 'c') == (Right ('c',""))

prop_Or =      (run' "abc" $ do { eq 'a'
                               ; do     { eq 'b'
                                        ; eq 'd'
                                        }
                                 <|> do { eq 'b'
                                        ; eq 'c'
                                        }
                               }) == (Right ('c',""))

prop_UntilC = (Right (("",'c'),"")) == (run' ("c") $ manyTill any $ eq 'c')

prop_Until1 ls =
  let rv = run' (ls ++ [1]) $ manyTill any $ eq 1
  in case (rv) of
     Right ((ls,1),rest) -> (elem 1 ls) == False
     _ -> False

prop_all1 ls =
  let rv = run' ([1,1,1] ++ ls) $ many1 $ eq 1
  in case (rv) of
     Right (_,(1:_)) -> False
     Right ((1:1:1:_),_) -> True
     _ -> False


More information about the Haskell-Cafe mailing list