[Haskell-cafe] Re: Knuth Morris Pratt for Lazy Bytestrings
implementation
Daniel Fischer
daniel.is.fischer at web.de
Sun Aug 5 11:59:18 EDT 2007
Oooops, stupid error before, fixed below.
Missed it due to too few and simple tests, should've quickchecked :-/
>
> Cheers,
> Daniel
>
> -- KMP algorithm for lazy ByteStrings
> {-# OPTIONS_GHC -fbang-patterns #-}
> module KMP (kmpMatch) where
>
> import qualified Data.Array.Base as Base (unsafeAt)
> import Data.Array.Unboxed (UArray, listArray)
> import qualified Data.Array as A (listArray, (!), elems)
>
> import qualified Data.ByteString.Lazy as B
> import qualified Data.ByteString as S
> import qualified Data.ByteString.Base as B (unsafeHead, unsafeTail,
> unsafeDrop, unsafeIndex)
> import Data.Int (Int64)
> import Data.Word (Word8)
>
> kmpMatch :: B.ByteString -> B.ByteString -> Int64
> kmpMatch patLazy search
>
> | B.null patLazy = 0
> | otherwise = kmp 0 0 search
>
> where
> pat :: S.ByteString
> pat = S.concat (B.toChunks patLazy)
> patLen = S.length pat
> sym :: Int -> Word8
> sym = B.unsafeIndex pat
> bord = A.listArray (0,patLen) $
> (-1):0:[getS (sym i) i + 1 | i <- [1 .. patLen - 1]]
> where
> getS s n
>
> | m < 0 || s == sym m = m
> | otherwise = getS s m
>
> where
> m = bord A.! n
> borders :: UArray Int Int
> borders = listArray (0,patLen) $ A.elems bord
> (?) :: UArray Int Int -> Int -> Int
> (?) = Base.unsafeAt
> getShift :: Word8 -> Int -> Int
> getShift s n = help n
> where
> help k
>
> | m < 0 || sym m == s = m
> | otherwise = help m
>
> where
> m = borders ? k
> kmp :: Int64 -> Int -> B.ByteString -> Int64
> kmp !idx !match !srch
>
> | patLen == match = idx - fromIntegral match
> | B.null srch = -1
> | sym match == B.head srch = kmp (idx+1) (match+1) (B.tail
> | srch) match == 0 = kmp (idx+1) 0 (B.tail srch)
> | otherwise =
> case getShift (B.head srch) match of
> -1 -> kmp idx 0 srch
> shft -> kmp (idx+1) (shft+1) (B.tail srch)
More information about the Haskell-Cafe
mailing list