[commit: packages/bytestring] master, revert-46-patch-1, wip/nix-local-build: Implement Rabin-Karp substring search. (2160e09)
git at git.haskell.org
git at git.haskell.org
Tue May 3 22:42:47 UTC 2016
Repository : ssh://git@git.haskell.org/bytestring
On branches: master,revert-46-patch-1,wip/nix-local-build
Link : http://git.haskell.org/packages/bytestring.git/commitdiff/2160e091e215fecc9177d55a37cd50fc253ba86a
>---------------------------------------------------------------
commit 2160e091e215fecc9177d55a37cd50fc253ba86a
Author: Sean <burton.seanr at gmail.com>
Date: Fri Sep 18 09:14:01 2015 +0100
Implement Rabin-Karp substring search.
Conflicts:
Data/ByteString.hs
>---------------------------------------------------------------
2160e091e215fecc9177d55a37cd50fc253ba86a
Data/ByteString.hs | 58 ++++++++++++++++++++++++++++++++++++++++--------------
1 file changed, 43 insertions(+), 15 deletions(-)
diff --git a/Data/ByteString.hs b/Data/ByteString.hs
index 42263a6..90eb301 100644
--- a/Data/ByteString.hs
+++ b/Data/ByteString.hs
@@ -1327,16 +1327,38 @@ isInfixOf p s = isJust (findSubstring p s)
--
-- > fst (breakSubstring x y)
--
+-- Note that calling `breakSubstring x` does some preprocessing work, so
+-- you should avoid uneccesarily duplicating breakSubstring calls with the same
+-- pattern.
+
breakSubstring :: ByteString -- ^ String to search for
-> ByteString -- ^ String to search in
-> (ByteString,ByteString) -- ^ Head and tail of string broken at substring
-
-breakSubstring pat src = search 0 src
+breakSubstring pat =
+ case lp of
+ 0 -> \src -> (empty,src)
+ 1 -> breakByte (unsafeHead pat)
+ _ -> karpRabin
where
- search !n !s
- | null s = (src,empty) -- not found
- | pat `isPrefixOf` s = (take n src,s)
- | otherwise = search (n+1) (unsafeTail s)
+ lp = length pat
+ k = 2891336453 :: Word32
+ rollingHash = foldl' (\h b -> h * k + fromIntegral b) 0
+ hp = rollingHash pat
+ m = k ^ lp
+ karpRabin src
+ | length src < lp = (src,empty)
+ | otherwise = search (rollingHash $ unsafeTake lp src) 0
+ where
+ search !hs !n
+ | hp == hs && pat `isPrefixOf` s = (unsafeTake n src,s)
+ | length src - n <= lp = (src,empty) -- not found
+ | otherwise = search hs' (n+1)
+ where
+ get = fromIntegral . unsafeIndex src
+ s = unsafeDrop n src
+ hs' = hs * k +
+ get (n + lp) -
+ m * get n
-- | Get the first index of a substring in another string,
-- or 'Nothing' if the string is not found.
@@ -1344,7 +1366,11 @@ breakSubstring pat src = search 0 src
findSubstring :: ByteString -- ^ String to search for.
-> ByteString -- ^ String to seach in.
-> Maybe Int
-findSubstring f i = listToMaybe (findSubstrings f i)
+findSubstring pat src
+ | null pat && null src = Just 0
+ | null b = Nothing
+ | otherwise = Just (length a)
+ where (a, b) = breakSubstring pat src
{-# DEPRECATED findSubstring "findSubstring is deprecated in favour of breakSubstring." #-}
@@ -1354,14 +1380,16 @@ findSubstring f i = listToMaybe (findSubstrings f i)
findSubstrings :: ByteString -- ^ String to search for.
-> ByteString -- ^ String to seach in.
-> [Int]
-findSubstrings pat str
- | null pat = [0 .. length str]
- | otherwise = search 0 str
- where
- search !n !s
- | null s = []
- | pat `isPrefixOf` s = n : search (n+1) (unsafeTail s)
- | otherwise = search (n+1) (unsafeTail s)
+findSubstrings pat src
+ | null pat = [0 .. ls]
+ | otherwise = search 0
+ where lp = length pat
+ ls = length src
+ search !n
+ | (n > ls - lp) || null b = []
+ | otherwise = let k = n + length a
+ in k : search (k + lp)
+ where (a, b) = breakSubstring pat (unsafeDrop n src)
{-# DEPRECATED findSubstrings "findSubstrings is deprecated in favour of breakSubstring." #-}
More information about the ghc-commits
mailing list