[Haskell-cafe] Re: Knuth Morris Pratt for Lazy Bytestrings implementation

Daniel Fischer daniel.is.fischer at web.de
Sun Aug 5 11:59:18 EDT 2007


Oooops, stupid error before, fixed below.
Missed it due to too few and simple tests, should've quickchecked :-/
>
> Cheers,
> Daniel
>
> -- KMP algorithm for lazy ByteStrings
> {-# OPTIONS_GHC -fbang-patterns #-}
> module KMP (kmpMatch) where
>
> import qualified Data.Array.Base as Base (unsafeAt)
> import Data.Array.Unboxed (UArray, listArray)
> import qualified Data.Array as A (listArray, (!), elems)
>
> import qualified Data.ByteString.Lazy as B
> import qualified Data.ByteString as S
> import qualified Data.ByteString.Base as B (unsafeHead, unsafeTail,
> unsafeDrop, unsafeIndex)
> import Data.Int (Int64)
> import Data.Word (Word8)
>
> kmpMatch :: B.ByteString -> B.ByteString -> Int64
> kmpMatch patLazy search
>
>     | B.null patLazy    = 0
>     | otherwise         = kmp 0 0 search
>
>       where
>         pat :: S.ByteString
>         pat = S.concat (B.toChunks patLazy)
>         patLen = S.length pat
>         sym :: Int -> Word8
>         sym = B.unsafeIndex pat
>         bord = A.listArray (0,patLen) $
>                     (-1):0:[getS (sym i) i + 1 | i <- [1 .. patLen - 1]]
>                where
>                 getS s n
>
>                     | m < 0 || s == sym m   = m
>                     | otherwise             = getS s m
>
>                       where
>                         m = bord A.! n
>         borders :: UArray Int Int
>         borders = listArray (0,patLen) $ A.elems bord
>         (?) :: UArray Int Int -> Int -> Int
>         (?) = Base.unsafeAt
>         getShift :: Word8 -> Int -> Int
>         getShift s n = help n
>             where
>               help k
>
>                 | m < 0 || sym m == s   = m
>                 | otherwise             = help m
>
>                   where
>                     m = borders ? k
>         kmp :: Int64 -> Int -> B.ByteString -> Int64
>         kmp !idx !match !srch
>
>             | patLen == match           = idx - fromIntegral match
>             | B.null srch               = -1
>             | sym match == B.head srch  = kmp (idx+1) (match+1) (B.tail
>             | srch) match == 0                = kmp (idx+1) 0 (B.tail srch)
>             | otherwise                 =
>               case getShift (B.head srch) match of
>                 -1      -> kmp idx 0 srch
>                 shft    -> kmp (idx+1) (shft+1) (B.tail srch)



More information about the Haskell-Cafe mailing list