[Haskell-cafe] Re: Efficient parallel regular expressions

ajb at spamcop.net ajb at spamcop.net
Tue Nov 4 21:52:20 EST 2008


G'day all.

Quoting Achim Schneider <barsoap at web.de>:

> Considering that he's talking about a mud, I figure the grammar is a
> quite straightforward
>
> command = l[eft] | r[ight] | ... | t[ake] <item> | c[ast] <spell>
>
> That is, I'd be very surprised if you even need more than two or three
> characters lookahead, much less backtracking.

In the case of a command followed by arguments, it would make more
sense to use a keyword recogniser followed by a command-specific parser.

One suggestion follows.

Cheers,
Andrew Bromage
--------8<---CUT HERE---8<--------
module KeywordMatch (keywordMatch) where

import Data.List
import Data.Function
import Control.Arrow

-- Exercise: Why would it be wrong to curry this function?
keywordMatch :: (Ord k) => [([k],v)] -> [k] -> Maybe v
keywordMatch kvs
     = compileTrie . generateTrie . sortBy (compare `on` fst) $ kvs

data Trie k v
     = Trie (Maybe v) (Trie' k v)

data Trie' k v
     = Node0
     | Node1 k (Trie k v)
     | Node2 k (Trie k v) k (Trie k v)
     | Branch k (Trie' k v) (Trie k v) (Trie' k v)

generateTrie :: (Ord k) => [([k],v)] -> Trie k v
generateTrie (([],v):rest)
     = Trie (Just v) (generateTrie' rest)
generateTrie rest
     = Trie Nothing (generateTrie' rest)

generateTrie' :: (Ord k) => [([k],v)] -> Trie' k v
generateTrie' []
     = Node0
generateTrie' [(k:ks,v)]
     = Node1 k $ foldr (\k -> Trie Nothing . Node1 k) (Trie (Just v) Node0) ks
generateTrie' [(k1:ks1,v1),(k2:ks2,v2)]
     = Node2 k1 (generateTrie [(ks1,v1)]) k2 (generateTrie [(ks2,v2)])
generateTrie' kvs
     = gt . map (head.fst.head &&& map (first tail))
             . groupBy ((==) `on` head.fst) $ kvs
     where
         gt [] = Node0
         gt [(k,kvs)] = Node1 k (generateTrie kvs)
         gt [(k1,kvs1),(k2,kvs2)] = Node2 k1 (generateTrie kvs1)
                                          k2 (generateTrie kvs2)
         gt kvs
             = let (l,(k,m):r) = splitAt (length kvs `div` 2) kvs
               in Branch k (gt l) (generateTrie m) (gt r)

compileTrie :: (Ord k) => Trie k v -> [k] -> Maybe v
compileTrie (Trie emptyCase trie')
     = let ctrie' = compileTrie' trie'
       in \key -> case key of
                     [] -> emptyCase
                     (k:ks) -> ctrie' k ks

compileTrie' :: (Ord k) => Trie' k v -> k -> [k] -> Maybe v
compileTrie' Node0
     = \k ks -> Nothing
compileTrie' (Node1 k' t)
     = let t' = compileTrie t
       in \k ks -> if k == k' then t' ks else Nothing
compileTrie' (Node2 k1 t1 k2 t2)
     = let t1' = compileTrie t1
           t2' = compileTrie t2
       in \k ks -> if k == k1 then t1' ks
                   else if k == k2 then t2' ks
                   else Nothing
compileTrie' (Branch k' l m r)
     = let
         cl = compileTrie' l
         cm = compileTrie m
         cr = compileTrie' r
       in
         \k ks -> case compare k k' of
                     LT -> cl k ks
                     EQ -> cm ks
                     GT -> cr k ks

-- vim: ts=4:sts=4:expandtab


More information about the Haskell-Cafe mailing list