[Haskell-cafe] a boring parser
Anatoly Yakovenko
aeyakovenko at gmail.com
Wed Sep 30 23:04:23 EDT 2009
i got annoyed with Parsec and wrote a much more boring parser which
allows me to parse anything with any kind of matching i want. Its
basically a combination of State and Error monads.
So i can use a grep like parser that matches via a regular expression
over a list of lines
grep re = do
vv::B.ByteString <- any
let (_,_,_,rv) = (vv =~
re)::(B.ByteString,B.ByteString,B.ByteString,[B.ByteString])
case (rv) of
[] -> throwError "no match"
_ -> return $ rv
> run (grep $ C.pack "(hello)") $ [C.pack "hello world"]
Right (["hello"],[])
or use the same library to scan over a string by combining regular expressions
regex re = do
make $ \ st -> do
case (B.null st) of
True -> throwError "eos"
_ -> do
let (_,_,after,rv) = (st =~
re)::(B.ByteString,B.ByteString,B.ByteString,[B.ByteString])
case (rv) of
[] -> throwError "no match"
_ -> return $ (rv,after)
> run (do aa <- regex $ C.pack "(hello)"; bb <- regex $ C.pack " (world)"; return (aa,bb) ) $ C.pack "hello world"
Right ((["hello"],["world"]),"")
or simply match integers in a list, or anything that is of type Eq
> run (many1 $ eq 1) [1,1,1,2,3,4]
Right ([1,1,1],[2,3,4])
i can define lt
lt cc = do
vv <- any
case (vv < cc) of
True -> return $ vv
_ -> throwError "no match"
and do
> run (many1 $ lt 5 <|> eq 5) [1..10]
Right ([1,2,3,4,5],[6,7,8,9,10])
here is the implementation
module Parser( ParserM --type alias for the parser ParserM a b is
over "stream" a and returns b
, make --makes a parser from a matching function of
type :: stream -> m (match_data,stream)
--for example any is implemented via:
--any :: ParserM [a] a
--any = make $ \ ll ->
-- case (ll) of
-- (hh:tt) -> return $ (hh,tt)
-- _ -> throwError "eos
--matches and returns an element from a
list, which makes any of type :: ParserM [a] a
, any --matches any element from [a] type stream
, eq --matches an equal element from [Eq] stream,
trivialy implemented in terms of any
--eq :: Eq a => a -> ParserM [a] a
--eq cc = do
-- vv <- any
-- case (vv == cc) of
-- True -> return $ vv
-- _ -> throwError "no match
, (<|>) --or operator, tries the left one then the right one
, manyTill --collects the results of parser 1 until
parser 2 succeeds
, many1 --collects the results of the parser, must
succeed at least once
, many --collects the results of a parser
, run --runs the parser
) where
import Control.Monad.State.Lazy
import Control.Monad.Error
import Test.QuickCheck
import Control.Monad.Identity
import Prelude hiding (any)
type ParserM a c = StateT a (ErrorT [Char] Identity) c
make pp = do
st <- get
(rv,nst) <- pp $ st
put $ nst
return $ rv
aa <|> bb = aa `catchError` \ _ -> bb
manyTill :: ParserM a c -> ParserM a d -> ParserM a ([c],d)
manyTill pp ee = do
do dd <- ee
return $ ([],dd)
`catchError` \ _ -> do
cc <- pp
(ccs,dd) <- manyTill pp ee
return $ (cc:ccs,dd)
many1 pp = do
rv <- pp
rest <- many1 pp `catchError` \ _ -> return $ []
return $ rv : rest
many pp = do many1 pp
<|> return []
any :: ParserM [a] a
any = make $ \ ll ->
case (ll) of
(hh:tt) -> return $ (hh,tt)
_ -> throwError "eos"
eq :: Eq a => a -> ParserM [a] a
eq cc = do
vv <- any
case (vv == cc) of
True -> return $ vv
_ -> throwError "no match"
lt cc = do
vv <- any
case (vv < cc) of
True -> return $ vv
_ -> throwError "no match"
run pp dd = runIdentity $ runErrorT $ runStateT pp dd
run' = flip run
prop_MatchA = (Right ('a',"bc")) == (run' "abc" $ eq 'a')
prop_MatchEOS = (Left "eos") == (run' "" $ eq 'a')
prop_MatchNoMatch = (Left "no match") == (run' ("bcd") $ eq 'a')
prop_MatchABC =(Right ('c',""))== (run' "abc" $ do eq 'a'
eq 'b'
eq 'c')
prop_MatchA_C = (run' "abc" $ do eq 'a'
eq 'd' <|> eq 'b' <|> any
eq 'c') == (Right ('c',""))
prop_Or = (run' "abc" $ do { eq 'a'
; do { eq 'b'
; eq 'd'
}
<|> do { eq 'b'
; eq 'c'
}
}) == (Right ('c',""))
prop_UntilC = (Right (("",'c'),"")) == (run' ("c") $ manyTill any $ eq 'c')
prop_Until1 ls =
let rv = run' (ls ++ [1]) $ manyTill any $ eq 1
in case (rv) of
Right ((ls,1),rest) -> (elem 1 ls) == False
_ -> False
prop_all1 ls =
let rv = run' ([1,1,1] ++ ls) $ many1 $ eq 1
in case (rv) of
Right (_,(1:_)) -> False
Right ((1:1:1:_),_) -> True
_ -> False
More information about the Haskell-Cafe
mailing list