[Haskell-cafe] ReadP question

Chris Kuklewicz haskell at list.mightyreason.com
Thu Aug 31 11:46:14 EDT 2006


Tom Phoenix wrote:
> On 8/30/06, Chris Kuklewicz <haskell at list.mightyreason.com> wrote:
> 
>> > -- Simulate "(a?|b+|c*)*d" regular expression
> 
>> But 'go' seems to not terminate with the leading 'star'
> 
> Unless I'm missing something... The part of the pattern inside the
> parentheses should successfully match at least the empty string at the
> beginning of the string. Since it's regulated by the second (outer)
> 'star', it will keep matching as long as it keeps succeeding; since it
> keeps matching the empty string, it keeps matching forever in the same
> spot.
> 
> To solve this problem, your implementation of 'star' could perhaps be
> changed to answer "no more matches" rather than "infinitely many
> matches" once the body fails to consume any characters.
> 
> Hope this helps!
> 
> --Tom Phoenix

And that is indeed the solution.  But then I wanted $ end-of-line anchors (easy) 
and ^ begin-of-line anchors (annoying).  But it works now:

-- | Using ReadP to simulate regular expressions, finding the longest match
-- by Chris Kuklewicz, public domain
import Control.Monad
import Data.Set(Set,member)
import Data.Maybe(maybe)
import Text.ParserCombinators.ReadP

type R = Char -> ReadP (Int,Char)

dot :: R
dot _ = do x <- get
            return (1,x)

anyOf :: Set Char -> R
anyOf s _ = do
   x <- satisfy (`member` s)
   return (1,x)

noneOf :: Set Char -> R
noneOf s _ = do
   x <- satisfy (not.(`member` s))
   return (1,x)

c :: Char -> R
c x _ = char x >> return (1,x)

cs :: String -> R
cs [] prev = return (0,prev)
cs xs _ = string xs >> return (length xs,last xs)

atBOL prev =
   case prev of '\n' -> return (0,prev)
                _ -> pfail

atEOL prev = do
   rest <- look
   case rest of [] -> return (0,prev)
                ('\n':_) -> return (0,prev)
                _ -> pfail

-- Consume like x? x+ x* and return the length
quest,plus,star :: R -> R
quest x = x <|> (\prev -> return (0,prev))
plus  x = x +> star x
star  x prev = until0 0 prev
   where until0 t prev' = do
           (len,prev'') <- quest x prev'
           if (0==len)
             then return (t,prev'')
             else let tot = t + len
                  in seq tot (until0 tot prev'')

upToN :: Int -> R -> R
upToN n x = helper n
   where helper 0 prev t = return (t,prev)
         helper i prev t = do
           (len,prev') <- x prev
           if 0==len
             then return (t,prev')
             else helper (pred i) prev' $! t+len

ranged 0 Nothing x = star x
ranged 0 (Just n) x | n>0 = upToN n x
ranged m n x | (m>=0) && maybe True (\n'->n'>=m) n =
   doSeq (replicate m x) +> (ranged 0 (fmap (subtract m) n) x)
              | otherwise = (\prev -> return (0,prev))

infixr 6 +>
infixr 5 <|>
(+>),(<|>) :: R -> R -> R
(+>) x y = (\prev -> do
   (lenX,prev') <- x prev
   (lenY,prev'') <- y prev'
   let tot = lenX + lenY
   seq tot (return (tot,prev''))
  )

(<|>) x y = (\prev -> (x prev) +++ (y prev))

orSeq,doSeq :: [R] -> R
orSeq [] prev = return (0,prev)
orSeq xs prev = foldr1 (<|>) xs $ prev

doSeq [] prev = return (0,prev)
doSeq xs prev = foldr1 (+>) xs $ prev


-- Simulate "(^a|b+|c*|^.)*(d|_rest_)$" regular expression
test = star (orSeq [quest (c 'a')
                    ,plus (c 'b')
                    ,star (c 'c')
                    ,atBOL +> dot ]) +> doSeq [c 'd' <|> cs "_rest_",atEOL]

go foo = case readP_to_S (gather (test '\n')) foo of
            [] -> Nothing
            xs -> Just (last xs)





More information about the Haskell-Cafe mailing list