[commit: ghc] master: Improve performance of isSuffixOf (#9676) (49b05d6)

git at git.haskell.org git at git.haskell.org
Mon Oct 27 21:15:10 UTC 2014


Repository : ssh://git@git.haskell.org/ghc

On branch  : master
Link       : http://ghc.haskell.org/trac/ghc/changeset/49b05d6935b6677443a970a45138def66c6f8cee/ghc

>---------------------------------------------------------------

commit 49b05d6935b6677443a970a45138def66c6f8cee
Author: David Feuer <David.Feuer at gmail.com>
Date:   Mon Oct 27 22:12:07 2014 +0100

    Improve performance of isSuffixOf (#9676)
    
    The new implementation avoids reversing the "haystack" list, which can be
    very expensive.
    
    Reviewed By: ekmett
    
    Differential Revision: https://phabricator.haskell.org/D330


>---------------------------------------------------------------

49b05d6935b6677443a970a45138def66c6f8cee
 libraries/base/Data/OldList.hs         | 31 +++++++++++--
 libraries/base/tests/all.T             |  1 +
 libraries/base/tests/isSuffixOf.hs     | 10 ++++
 libraries/base/tests/isSuffixOf.stdout | 84 ++++++++++++++++++++++++++++++++++
 4 files changed, 122 insertions(+), 4 deletions(-)

diff --git a/libraries/base/Data/OldList.hs b/libraries/base/Data/OldList.hs
index 0e6709e..53685d8 100644
--- a/libraries/base/Data/OldList.hs
+++ b/libraries/base/Data/OldList.hs
@@ -292,11 +292,34 @@ isPrefixOf [] _         =  True
 isPrefixOf _  []        =  False
 isPrefixOf (x:xs) (y:ys)=  x == y && isPrefixOf xs ys
 
--- | The 'isSuffixOf' function takes two lists and returns 'True'
--- iff the first list is a suffix of the second.
--- Both lists must be finite.
+-- | The 'isSuffixOf' function takes two lists and returns 'True' iff
+-- the first list is a suffix of the second. The second list must be
+-- finite.
 isSuffixOf              :: (Eq a) => [a] -> [a] -> Bool
-isSuffixOf x y          =  reverse x `isPrefixOf` reverse y
+ns `isSuffixOf` hs      = maybe False id $ do
+      delta <- dropLengthMaybe ns hs
+      return $ ns == dropLength delta hs
+      -- Since dropLengthMaybe ns hs succeeded, we know that (if hs is finite)
+      -- length ns + length delta = length hs
+      -- so dropping the length of delta from hs will yield a suffix exactly
+      -- the length of ns.
+
+-- A version of drop that drops the length of the first argument from the
+-- second argument. If xs is longer than ys, xs will not be traversed in its
+-- entirety.  dropLength is also generally faster than (drop . length)
+-- Both this and dropLengthMaybe could be written as folds over their first
+-- arguments, but this reduces clarity with no benefit to isSuffixOf.
+dropLength :: [a] -> [b] -> [b]
+dropLength [] y = y
+dropLength _ [] = []
+dropLength (_:x') (_:y') = dropLength x' y'
+
+-- A version of dropLength that returns Nothing if the second list runs out of
+-- elements before the first.
+dropLengthMaybe :: [a] -> [b] -> Maybe [b]
+dropLengthMaybe [] y = Just y
+dropLengthMaybe _ [] = Nothing
+dropLengthMaybe (_:x') (_:y') = dropLengthMaybe x' y'
 
 -- | The 'isInfixOf' function takes two lists and returns 'True'
 -- iff the first list is contained, wholly and intact,
diff --git a/libraries/base/tests/all.T b/libraries/base/tests/all.T
index f80f542..edb5fc3 100644
--- a/libraries/base/tests/all.T
+++ b/libraries/base/tests/all.T
@@ -86,6 +86,7 @@ test('exceptionsrun002', 	normal, compile_and_run, [''])
 test('list001'	,	when(fast(), skip), compile_and_run, [''])
 test('list002', when(fast(), skip), compile_and_run, [''])
 test('list003', when(fast(), skip), compile_and_run, [''])
+test('isSuffixOf', normal, compile_and_run, [''])
 
 test('memo001',
      [extra_run_opts('+RTS -A10k -RTS'),
diff --git a/libraries/base/tests/isSuffixOf.hs b/libraries/base/tests/isSuffixOf.hs
new file mode 100644
index 0000000..bcbb77f
--- /dev/null
+++ b/libraries/base/tests/isSuffixOf.hs
@@ -0,0 +1,10 @@
+module Main (main) where
+import Data.List
+
+needles = ["","1","2","12","123","1234"]
+haystacks = ["","a","ab","abc","1","2","3","a1","1a",
+             "23","123","a123","ab123","abc123"]
+
+main :: IO()
+main = mapM_ print $ [needle `isSuffixOf` haystack
+                       | needle <- needles, haystack <- haystacks]
diff --git a/libraries/base/tests/isSuffixOf.stdout b/libraries/base/tests/isSuffixOf.stdout
new file mode 100644
index 0000000..adba395
--- /dev/null
+++ b/libraries/base/tests/isSuffixOf.stdout
@@ -0,0 +1,84 @@
+True
+True
+True
+True
+True
+True
+True
+True
+True
+True
+True
+True
+True
+True
+False
+False
+False
+False
+True
+False
+False
+True
+False
+False
+False
+False
+False
+False
+False
+False
+False
+False
+False
+True
+False
+False
+False
+False
+False
+False
+False
+False
+False
+False
+False
+False
+False
+False
+False
+False
+False
+False
+False
+False
+False
+False
+False
+False
+False
+False
+False
+False
+False
+False
+False
+False
+True
+True
+True
+True
+False
+False
+False
+False
+False
+False
+False
+False
+False
+False
+False
+False
+False
+False



More information about the ghc-commits mailing list