[GHC] #13763: Performance regression (~34%) in 8.2.1, poor register allocation(?) in an inner loop over an array

GHC ghc-devs at haskell.org
Sun May 28 17:21:32 UTC 2017


#13763: Performance regression (~34%) in 8.2.1, poor register allocation(?) in an
inner loop over an array
-------------------------------------+-------------------------------------
        Reporter:  jberryman         |                Owner:  (none)
            Type:  bug               |               Status:  new
        Priority:  normal            |            Milestone:
       Component:  Compiler (NCG)    |              Version:  8.2.1-rc2
      Resolution:                    |             Keywords:  JoinPoints
Operating System:  Unknown/Multiple  |         Architecture:
 Type of failure:  Runtime           |  Unknown/Multiple
  performance bug                    |            Test Case:
      Blocked By:                    |             Blocking:
 Related Tickets:                    |  Differential Rev(s):
       Wiki Page:                    |
-------------------------------------+-------------------------------------

Comment (by RyanGlScott):

 Looking at the `-ddump-simpl` output, there are some noticeable
 differences in the Core emitted by 8.0.2 and 8.2.1-rc2.

 From 8.0.2, we have:

 {{{
 -- RHS size: {terms: 137, types: 72, coercions: 5}
 main1 :: State# RealWorld -> (# State# RealWorld, () #)
 main1 =
   \ (s :: State# RealWorld) ->
     case newAlignedPinnedByteArray# 8000000# 8# (s `cast` ...)
     of _ { (# ipv, ipv1 #) ->
     case unsafeFreezeByteArray# ipv1 ipv of _ { (# ipv2, ipv3 #) ->
     (# ipv2 `cast` ...,
        let {
          ipv4 :: Int#
          ipv4 = uncheckedIShiftRA# (sizeofByteArray# ipv3) 1# } in
        let {
          ipv5 :: Int#
          ipv5 = -# ipv4 (andI# ipv4 3#) } in
        let {
          ipv6 :: Int#
          ipv6 = uncheckedIShiftRA# ipv5 2# } in
        let {
          ipv7 :: Int#
          ipv7 = -# ipv4 1# } in
        letrec {
          $whashRemainingWord16s
            :: Word# -> Word# -> Word# -> Word# -> Int# -> Word#
          $whashRemainingWord16s =
            \ (ww :: Word#)
              (ww1 :: Word#)
              (ww2 :: Word#)
              (ww3 :: Word#)
              (ww4 :: Int#) ->
              case tagToEnum# (># ww4 ipv7) of _ {
                False ->
                  case indexWord16Array# ipv3 ww4 of wild1 { __DEFAULT ->
                  let {
                    v0 :: Word#
                    v0 = xor# ww wild1 } in
                  $whashRemainingWord16s
                    (xor# ww3 v0)
                    (xor# v0 ww1)
                    (xor# ww1 ww2)
                    (xor# ww2 ww3)
                    (+# ww4 1#)
                  };
                True -> ww
              }; } in
        letrec {
          $whash4Word16sLoop
            :: Word# -> Word# -> Word# -> Word# -> Int# -> Word#
          $whash4Word16sLoop =
            \ (ww :: Word#)
              (ww1 :: Word#)
              (ww2 :: Word#)
              (ww3 :: Word#)
              (ww4 :: Int#) ->
              case tagToEnum# (==# ww4 ipv6) of _ {
                False ->
                  case indexWord64Array# ipv3 ww4 of wild1 { __DEFAULT ->
                  let {
                    v0 :: Word#
                    v0 =
                      xor#
                        ww
                        (or#
                           (and# (uncheckedShiftRL# (byteSwap# wild1) 8#)
 71777214294589695##)
                           (and#
                              (uncheckedShiftL# (byteSwap# wild1) 8#)
                              18374966859414961920##)) } in
                  $whash4Word16sLoop
                    (xor# ww3 v0)
                    (xor# v0 ww1)
                    (xor# ww1 ww2)
                    (xor# ww2 ww3)
                    (+# ww4 1#)
                  };
                True -> $whashRemainingWord16s ww ww1 ww2 ww3 ipv5
              }; } in
        case $whash4Word16sLoop 99## 1## 2## 3## 0# of _ { __DEFAULT ->
        ()
        } #)
     }
     }

 -- RHS size: {terms: 1, types: 0, coercions: 3}
 main :: IO ()
 main = main1 `cast` ...
 }}}

 And from 8.2.1-rc2, we have:

 {{{
 -- RHS size: {terms: 134, types: 77, coercions: 72, joins: 2/8}
 main1 :: State# RealWorld -> (# State# RealWorld, () #)
 main1
   = \ (s :: State# RealWorld) ->
       case newAlignedPinnedByteArray# 8000000# 8# (s `cast` <Co:41>) of
       { (# ipv, ipv1 #) ->
       case unsafeFreezeByteArray# ipv1 ipv of { (# ipv2, ipv3 #) ->
       (# ipv2 `cast` <Co:31>,
          let {
            ipv4 :: Int#
            ipv4 = uncheckedIShiftRA# (sizeofByteArray# ipv3) 1# } in
          let {
            ixFinal :: Int#
            ixFinal = -# ipv4 1# } in
          let {
            word16sIx :: Int#
            word16sIx = -# ipv4 (andI# ipv4 3#) } in
          let {
            word16sIxWd :: Int#
            word16sIxWd = uncheckedIShiftRA# word16sIx 2# } in
          joinrec {
            $whashRemainingWord16s
              :: Word# -> Word# -> Word# -> Word# -> Int# -> ()
            $whashRemainingWord16s (ww :: Word#)
                                   (ww1 :: Word#)
                                   (ww2 :: Word#)
                                   (ww3 :: Word#)
                                   (ww4 :: Int#)
              = case tagToEnum# (># ww4 ixFinal) of {
                  False ->
                    case indexWord16Array# ipv3 ww4 of wild1 { __DEFAULT ->
                    let {
                      v0 :: Word#
                      v0 = xor# ww wild1 } in
                    jump $whashRemainingWord16s
                      (xor# ww3 v0)
                      (xor# v0 ww1)
                      (xor# ww1 ww2)
                      (xor# ww2 ww3)
                      (+# ww4 1#)
                    };
                  True -> ()
                }; } in
          joinrec {
            $whash4Word16sLoop
              :: Word# -> Word# -> Word# -> Word# -> Int# -> ()
            $whash4Word16sLoop (ww :: Word#)
                               (ww1 :: Word#)
                               (ww2 :: Word#)
                               (ww3 :: Word#)
                               (ww4 :: Int#)
              = case tagToEnum# (==# ww4 word16sIxWd) of {
                  False ->
                    case indexWord64Array# ipv3 ww4 of wild1 { __DEFAULT ->
                    let {
                      v0 :: Word#
                      v0
                        = xor#
                            ww
                            (or#
                               (and# (uncheckedShiftRL# (byteSwap# wild1)
 8#) 71777214294589695##)
                               (and#
                                  (uncheckedShiftL# (byteSwap# wild1) 8#)
                                  18374966859414961920##)) } in
                    jump $whash4Word16sLoop
                      (xor# ww3 v0)
                      (xor# v0 ww1)
                      (xor# ww1 ww2)
                      (xor# ww2 ww3)
                      (+# ww4 1#)
                    };
                  True -> jump $whashRemainingWord16s ww ww1 ww2 ww3
 word16sIx
                }; } in
          jump $whash4Word16sLoop 99## 1## 2## 3## 0# #)
       }
       }

 -- RHS size: {terms: 1, types: 0, coercions: 3, joins: 0/0}
 main :: IO ()
 main = main1 `cast` <Co:3>
 }}}

--
Ticket URL: <http://ghc.haskell.org/trac/ghc/ticket/13763#comment:3>
GHC <http://www.haskell.org/ghc/>
The Glasgow Haskell Compiler


More information about the ghc-tickets mailing list