[GHC] #11284: Lambda-lifting fails in simple Text example

GHC ghc-devs at haskell.org
Fri Sep 7 08:11:15 UTC 2018


#11284: Lambda-lifting fails in simple Text example
-------------------------------------+-------------------------------------
        Reporter:  bgamari           |                Owner:  (none)
            Type:  bug               |               Status:  closed
        Priority:  normal            |            Milestone:
       Component:  Compiler          |              Version:  7.10.3
      Resolution:  duplicate         |             Keywords:  LateLamLift
Operating System:  Unknown/Multiple  |         Architecture:
 Type of failure:  Runtime           |  Unknown/Multiple
  performance bug                    |            Test Case:
      Blocked By:                    |             Blocking:
 Related Tickets:  #5945, #11318     |  Differential Rev(s):
       Wiki Page:                    |
-------------------------------------+-------------------------------------
Description changed by sgraf:

Old description:

> Consider the example (which uses `Text`; I'm working on finding a more
> minimal example),
> {{{#!hs
> import Data.Char (isSpace)
> import Data.List (foldl')
> import GHC.Exts (build)
> import qualified  Data.Text as T
>
> longestWord :: T.Text -> Int
> longestWord t = foldl' max 0 $ map T.length $ fusedWords t
>
> fusedWords :: T.Text -> [T.Text]
> fusedWords t0 = build $ \cons nil ->
>   let go !t
>         | T.null t  = nil
>         | otherwise = let (w, rest) = T.span (not . isSpace) t
>                       in cons w (go $ T.dropWhile isSpace rest)
>   in go t0
>
> -- For reference
> data Text = Text
>     {-# UNPACK #-} !A.Array          -- payload (Word16 elements)
>     {-# UNPACK #-} !Int              -- offset (units of Word16, not
> Char)
>     {-# UNPACK #-} !Int              -- length (units of Word16, not
> Char)
> }}}
>
> `longestWord` here produces the simplified Core,
>
> {{{#!hs
> Ticket.$wgo :: GHC.Prim.ByteArray# -> GHC.Prim.Int# -> GHC.Prim.Int# ->
> [T.Text]
> Ticket.$wgo = ...
>
> -- > $wgo1 xs n = foldl' (\a b -> max a $ T.length b) n xs
> Ticket.$wgo1 :: [T.Text] -> GHC.Prim.Int# -> GHC.Prim.Int#
> Ticket.$wgo1 =
>   \ (w_s4GJ :: [T.Text]) (ww_s4GN :: GHC.Prim.Int#) ->
>     case w_s4GJ of _ {
>       [] -> ww_s4GN;
>       : y_a4vC ys_a4vD ->
>         case y_a4vC
>         of _ { Data.Text.Internal.Text dt_a4jP dt1_a4jQ dt2_a4jR ->
>         let {
>           a_a4jO :: GHC.Prim.Int#
>           a_a4jO = GHC.Prim.+# dt1_a4jQ dt2_a4jR } in
>         letrec {
>           -- For the love of all that is good, why must you allocate?
>           --
>           -- This loop is essentially `T.length`, the first argument
> being
>           -- the length accumulator and the second being an index into
> the
>           -- ByteArray#
>           $wloop_length_s4GI :: GHC.Prim.Int# -> GHC.Prim.Int# ->
> GHC.Prim.Int#
>           $wloop_length_s4GI =
>             \ (ww1_s4Gz :: GHC.Prim.Int#) (ww2_s4GD :: GHC.Prim.Int#) ->
>               -- Have we reached the end of the Text?
>               case GHC.Prim.tagToEnum# @ Bool (GHC.Prim.>=# ww2_s4GD
> a_a4jO)
>               of _ {
>                 False -> {
>                   ...
>                   -- in this body there are few cases analyses which
>                   -- classify the code-points we encounter. The branches
>                   -- are recursive calls of the form
>                   $wloop_length_s4GI (GHC.Prim.+# ww1_s4Gz 1)
> (GHC.Prim.+# ww2_s4GD 1)
>                   ...
>                 True -> ww1_s4Gz
>               }; } in
>         case $wloop_length_s4GI 0 dt1_a4jQ of ww1_s4GH { __DEFAULT ->
>         case GHC.Prim.tagToEnum# @ Bool (GHC.Prim.<=# ww_s4GN ww1_s4GH)
>         of _ {
>           False -> Ticket.$wgo1 ys_a4vD ww_s4GN;
>           True -> Ticket.$wgo1 ys_a4vD ww1_s4GH
>         }
>         }
>         }
>     }
>
> longestWord :: T.Text -> Int
> longestWord =
>   \ (w_s4GT :: T.Text) ->
>     case w_s4GT
>     of _ { Data.Text.Internal.Text ww1_s4GW ww2_s4GX ww3_s4GY ->
>     case Ticket.$wgo1 (Ticket.$wgo ww1_s4GW ww2_s4GX ww3_s4GY) 0
>     of ww4_s4H2 { __DEFAULT ->
>     GHC.Types.I# ww4_s4H2
>     }
>     }
> }}}
>
> Notice `$wloop_length_s4GI`: It should be a nice tight loop counting
> Unicode characters in the array `dt_a4jP` until it arrives at its end.
> However, GHC fails to lambda-lift this closure, thereby turning it into
> an allocating operation! Oh no!

New description:

 Consider the example (which uses `Text`; I'm working on finding a more
 minimal example),
 {{{#!hs
 {-# LANGUAGE BangPatterns #-}
 module T11284 where

 import Data.Char (isSpace)
 import Data.List (foldl')
 import GHC.Exts (build)
 import qualified  Data.Text as T
 import qualified  Data.Text.Array as A

 longestWord :: T.Text -> Int
 longestWord t = foldl' max 0 $ map T.length $ fusedWords t

 fusedWords :: T.Text -> [T.Text]
 fusedWords t0 = build $ \cons nil ->
   let go !t
         | T.null t  = nil
         | otherwise = let (w, rest) = T.span (not . isSpace) t
                       in cons w (go $ T.dropWhile isSpace rest)
   in go t0

 -- For reference
 data Text = Text
     {-# UNPACK #-} !A.Array          -- payload (Word16 elements)
     {-# UNPACK #-} !Int              -- offset (units of Word16, not Char)
     {-# UNPACK #-} !Int              -- length (units of Word16, not Char)
 }}}

 `longestWord` here produces the simplified Core,

 {{{#!hs
 Ticket.$wgo :: GHC.Prim.ByteArray# -> GHC.Prim.Int# -> GHC.Prim.Int# ->
 [T.Text]
 Ticket.$wgo = ...

 -- > $wgo1 xs n = foldl' (\a b -> max a $ T.length b) n xs
 Ticket.$wgo1 :: [T.Text] -> GHC.Prim.Int# -> GHC.Prim.Int#
 Ticket.$wgo1 =
   \ (w_s4GJ :: [T.Text]) (ww_s4GN :: GHC.Prim.Int#) ->
     case w_s4GJ of _ {
       [] -> ww_s4GN;
       : y_a4vC ys_a4vD ->
         case y_a4vC
         of _ { Data.Text.Internal.Text dt_a4jP dt1_a4jQ dt2_a4jR ->
         let {
           a_a4jO :: GHC.Prim.Int#
           a_a4jO = GHC.Prim.+# dt1_a4jQ dt2_a4jR } in
         letrec {
           -- For the love of all that is good, why must you allocate?
           --
           -- This loop is essentially `T.length`, the first argument being
           -- the length accumulator and the second being an index into the
           -- ByteArray#
           $wloop_length_s4GI :: GHC.Prim.Int# -> GHC.Prim.Int# ->
 GHC.Prim.Int#
           $wloop_length_s4GI =
             \ (ww1_s4Gz :: GHC.Prim.Int#) (ww2_s4GD :: GHC.Prim.Int#) ->
               -- Have we reached the end of the Text?
               case GHC.Prim.tagToEnum# @ Bool (GHC.Prim.>=# ww2_s4GD
 a_a4jO)
               of _ {
                 False -> {
                   ...
                   -- in this body there are few cases analyses which
                   -- classify the code-points we encounter. The branches
                   -- are recursive calls of the form
                   $wloop_length_s4GI (GHC.Prim.+# ww1_s4Gz 1) (GHC.Prim.+#
 ww2_s4GD 1)
                   ...
                 True -> ww1_s4Gz
               }; } in
         case $wloop_length_s4GI 0 dt1_a4jQ of ww1_s4GH { __DEFAULT ->
         case GHC.Prim.tagToEnum# @ Bool (GHC.Prim.<=# ww_s4GN ww1_s4GH)
         of _ {
           False -> Ticket.$wgo1 ys_a4vD ww_s4GN;
           True -> Ticket.$wgo1 ys_a4vD ww1_s4GH
         }
         }
         }
     }

 longestWord :: T.Text -> Int
 longestWord =
   \ (w_s4GT :: T.Text) ->
     case w_s4GT
     of _ { Data.Text.Internal.Text ww1_s4GW ww2_s4GX ww3_s4GY ->
     case Ticket.$wgo1 (Ticket.$wgo ww1_s4GW ww2_s4GX ww3_s4GY) 0
     of ww4_s4H2 { __DEFAULT ->
     GHC.Types.I# ww4_s4H2
     }
     }
 }}}

 Notice `$wloop_length_s4GI`: It should be a nice tight loop counting
 Unicode characters in the array `dt_a4jP` until it arrives at its end.
 However, GHC fails to lambda-lift this closure, thereby turning it into an
 allocating operation! Oh no!

--

-- 
Ticket URL: <http://ghc.haskell.org/trac/ghc/ticket/11284#comment:18>
GHC <http://www.haskell.org/ghc/>
The Glasgow Haskell Compiler


More information about the ghc-tickets mailing list