[GHC] #14936: GHC 8.4 performance regressions when using newtypes

GHC ghc-devs at haskell.org
Thu Mar 29 15:50:30 UTC 2018


#14936: GHC 8.4 performance regressions when using newtypes
-------------------------------------+-------------------------------------
        Reporter:  danilo2           |                Owner:  (none)
            Type:  bug               |               Status:  new
        Priority:  highest           |            Milestone:  8.4.2
       Component:  Compiler          |              Version:  8.4.1
      Resolution:                    |             Keywords:  SpecConstr
Operating System:  Unknown/Multiple  |         Architecture:
                                     |  Unknown/Multiple
 Type of failure:  None/Unknown      |            Test Case:
      Blocked By:                    |             Blocking:
 Related Tickets:                    |  Differential Rev(s):
       Wiki Page:                    |
-------------------------------------+-------------------------------------
Description changed by bgamari:

Old description:

> Here is some serious performance regression in the following code:
>
> {{{
>
> {-# LANGUAGE BangPatterns #-}
> {-# LANGUAGE ScopedTypeVariables #-}
> {-# LANGUAGE FlexibleContexts #-}
>
> module Main where
>
> import Prelude
> import qualified Foreign.Storable as Storable
> import qualified Control.Monad.State.Strict as S
> import Control.Monad.IO.Class
> import Foreign.Marshal.Alloc       (mallocBytes)
> import Criterion.Main
>
> newtype Foo a = Foo a
>
> intSize :: Int
> intSize = Storable.sizeOf (undefined :: Int)
>
> slow :: Int -> IO ()
> slow i = do
>     ptr <- mallocBytes $ 2 * intSize
>     Storable.pokeByteOff ptr intSize (0 :: Int)
>     let go 0 = pure ()
>         go j = do
>             Foo (!_, !off) <- S.get
>             !(x :: Int) <- liftIO $ Storable.peekByteOff ptr off
>             liftIO $ Storable.pokeByteOff ptr off $! (x + 1)
>             go (j - 1)
>     S.evalStateT (go i) (Foo ((0::Int),(intSize::Int)))
>

> fast :: Int -> IO ()
> fast i = do
>     ptr <- mallocBytes $ 2 * intSize
>     Storable.pokeByteOff ptr intSize (0 :: Int)
>     let go 0 = pure ()
>         go j = do
>             (!_, !off) <- S.get
>             !(x :: Int) <- liftIO $ Storable.peekByteOff ptr off
>             liftIO $ Storable.pokeByteOff ptr off $! (x + 1)
>             go (j - 1)
>     S.evalStateT (go i) ((0::Int),(intSize::Int))
>

> main :: IO ()
> main = defaultMain
>     [ bgroup "slow"
>         $ (\(i :: Int) -> bench ("10e" <> show i)
>         $ perRunEnv (return ())
>         $ \v -> slow (10 ^ i))  <$> [7..8]
>
>     , bgroup "fast"
>         $ (\(i :: Int) -> bench ("10e" <> show i)
>         $ perRunEnv (return ())
>         $ \v -> fast (10 ^ i))  <$> [7..8]
>     ]
> }}}
>
> Compiled with flags:
> `-threaded -funbox-strict-fields -O2 -fconstraint-solver-iterations=100
> -fexcess-precision -fexpose-all-unfoldings -flate-dmd-anal -fspec-constr-
> keen -fspecialise-aggressively -fstatic-argument-transformation -fmax-
> worker-args=200`
>
> The `slow` function executes 2 times slower than the `fast` one. The only
> difference is that the state is wrapped in a newtype. It was working
> properly in GHC 8.2 (both functions were equally fast - as fast as the
> current `fast` function).

New description:

 Here is some serious performance regression in the following code:

 {{{#!hs

 {-# LANGUAGE BangPatterns #-}
 {-# LANGUAGE ScopedTypeVariables #-}
 {-# LANGUAGE FlexibleContexts #-}

 module Main where

 import Prelude
 import qualified Foreign.Storable as Storable
 import qualified Control.Monad.State.Strict as S
 import Control.Monad.IO.Class
 import Foreign.Marshal.Alloc       (mallocBytes)
 import Criterion.Main

 newtype Foo a = Foo a

 intSize :: Int
 intSize = Storable.sizeOf (undefined :: Int)

 slow :: Int -> IO ()
 slow i = do
     ptr <- mallocBytes $ 2 * intSize
     Storable.pokeByteOff ptr intSize (0 :: Int)
     let go 0 = pure ()
         go j = do
             Foo (!_, !off) <- S.get
             !(x :: Int) <- liftIO $ Storable.peekByteOff ptr off
             liftIO $ Storable.pokeByteOff ptr off $! (x + 1)
             go (j - 1)
     S.evalStateT (go i) (Foo ((0::Int),(intSize::Int)))


 fast :: Int -> IO ()
 fast i = do
     ptr <- mallocBytes $ 2 * intSize
     Storable.pokeByteOff ptr intSize (0 :: Int)
     let go 0 = pure ()
         go j = do
             (!_, !off) <- S.get
             !(x :: Int) <- liftIO $ Storable.peekByteOff ptr off
             liftIO $ Storable.pokeByteOff ptr off $! (x + 1)
             go (j - 1)
     S.evalStateT (go i) ((0::Int),(intSize::Int))


 main :: IO ()
 main = defaultMain
     [ bgroup "slow"
         $ (\(i :: Int) -> bench ("10e" <> show i)
         $ perRunEnv (return ())
         $ \v -> slow (10 ^ i))  <$> [7..8]

     , bgroup "fast"
         $ (\(i :: Int) -> bench ("10e" <> show i)
         $ perRunEnv (return ())
         $ \v -> fast (10 ^ i))  <$> [7..8]
     ]
 }}}

 Compiled with flags:
 `-threaded -funbox-strict-fields -O2 -fconstraint-solver-iterations=100
 -fexcess-precision -fexpose-all-unfoldings -flate-dmd-anal -fspec-constr-
 keen -fspecialise-aggressively -fstatic-argument-transformation -fmax-
 worker-args=200`

 The `slow` function executes 2 times slower than the `fast` one. The only
 difference is that the state is wrapped in a newtype. It was working
 properly in GHC 8.2 (both functions were equally fast - as fast as the
 current `fast` function).

--

-- 
Ticket URL: <http://ghc.haskell.org/trac/ghc/ticket/14936#comment:6>
GHC <http://www.haskell.org/ghc/>
The Glasgow Haskell Compiler


More information about the ghc-tickets mailing list