[commit: ghc] master: Make unwords and words fuse somewhat (e73ab54)

git at git.haskell.org git at git.haskell.org
Thu Nov 13 08:01:08 UTC 2014


Repository : ssh://git@git.haskell.org/ghc

On branch  : master
Link       : http://ghc.haskell.org/trac/ghc/changeset/e73ab5412935392c03ce736ebee2b1282932c2ff/ghc

>---------------------------------------------------------------

commit e73ab5412935392c03ce736ebee2b1282932c2ff
Author: David Feuer <David.Feuer at gmail.com>
Date:   Thu Nov 13 08:59:14 2014 +0100

    Make unwords and words fuse somewhat
    
    Make `words` a good producer and `unwords` a good consumer for list
    fusion. Thus `unwords . words` will avoid producing an intermediate list
    of words, although it will produce each individual word.
    
    Make `unwords` slightly lazier, so that
    `unwords (s : undefined) = s ++ undefined` instead of `= undefined`.
    
    Reviewed By: austin
    
    Differential Revision: https://phabricator.haskell.org/D375


>---------------------------------------------------------------

e73ab5412935392c03ce736ebee2b1282932c2ff
 libraries/base/Data/OldList.hs | 47 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 43 insertions(+), 4 deletions(-)

diff --git a/libraries/base/Data/OldList.hs b/libraries/base/Data/OldList.hs
index caad044..551b8be 100644
--- a/libraries/base/Data/OldList.hs
+++ b/libraries/base/Data/OldList.hs
@@ -754,6 +754,7 @@ groupBy eq (x:xs)       =  (x:ys) : groupBy eq zs
 inits                   :: [a] -> [[a]]
 inits                   = map toListSB . scanl' snocSB emptySB
 {-# NOINLINE inits #-}
+
 -- We do not allow inits to inline, because it plays havoc with Call Arity
 -- if it fuses with a consumer, and it would generally lead to serious
 -- loss of sharing if allowed to fuse with a producer.
@@ -1066,12 +1067,26 @@ unlines (l:ls) = l ++ '\n' : unlines ls
 -- | 'words' breaks a string up into a list of words, which were delimited
 -- by white space.
 words                   :: String -> [String]
+{-# NOINLINE [1] words #-}
 words s                 =  case dropWhile {-partain:Char.-}isSpace s of
                                 "" -> []
                                 s' -> w : words s''
                                       where (w, s'') =
                                              break {-partain:Char.-}isSpace s'
 
+{-# RULES
+"words" [~1] forall s . words s = build (\c n -> wordsFB c n s)
+"wordsList" [1] wordsFB (:) [] = words
+ #-}
+wordsFB :: ([Char] -> b -> b) -> b -> String -> b
+{-# NOINLINE [0] wordsFB #-}
+wordsFB c n = go
+  where
+    go s = case dropWhile isSpace s of
+             "" -> n
+             s' -> w `c` go s''
+                   where (w, s'') = break isSpace s'
+
 -- | 'unwords' is an inverse operation to 'words'.
 -- It joins words with separating spaces.
 unwords                 :: [String] -> String
@@ -1079,11 +1094,35 @@ unwords                 :: [String] -> String
 unwords []              =  ""
 unwords ws              =  foldr1 (\w s -> w ++ ' ':s) ws
 #else
--- HBC version (stolen)
--- here's a more efficient version
+-- Here's a lazier version that can get the last element of a
+-- _|_-terminated list.
+{-# NOINLINE [1] unwords #-}
 unwords []              =  ""
-unwords [w]             = w
-unwords (w:ws)          = w ++ ' ' : unwords ws
+unwords (w:ws)          = w ++ go ws
+  where
+    go []     = ""
+    go (v:vs) = ' ' : (v ++ go vs)
+
+-- In general, the foldr-based version is probably slightly worse
+-- than the HBC version, because it adds an extra space and then takes
+-- it back off again. But when it fuses, it reduces allocation. How much
+-- depends entirely on the average word length--it's most effective when
+-- the words are on the short side.
+{-# RULES
+"unwords" [~1] forall ws .
+   unwords ws = tailUnwords (foldr unwordsFB "" ws)
+"unwordsList" [1] forall ws .
+   tailUnwords (foldr unwordsFB "" ws) = unwords ws
+ #-}
+
+{-# INLINE [0] tailUnwords #-}
+tailUnwords           :: String -> String
+tailUnwords []        = []
+tailUnwords (_:xs)    = xs
+
+{-# INLINE [0] unwordsFB #-}
+unwordsFB               :: String -> String -> String
+unwordsFB w r           = ' ' : w ++ r
 #endif
 
 {- A "SnocBuilder" is a version of Chris Okasaki's banker's queue that supports



More information about the ghc-commits mailing list