[GHC] #13763: Performance regression (~34%) in 8.2.1, poor register allocation(?) in an inner loop over an array
GHC
ghc-devs at haskell.org
Sun May 28 17:21:32 UTC 2017
#13763: Performance regression (~34%) in 8.2.1, poor register allocation(?) in an
inner loop over an array
-------------------------------------+-------------------------------------
Reporter: jberryman | Owner: (none)
Type: bug | Status: new
Priority: normal | Milestone:
Component: Compiler (NCG) | Version: 8.2.1-rc2
Resolution: | Keywords: JoinPoints
Operating System: Unknown/Multiple | Architecture:
Type of failure: Runtime | Unknown/Multiple
performance bug | Test Case:
Blocked By: | Blocking:
Related Tickets: | Differential Rev(s):
Wiki Page: |
-------------------------------------+-------------------------------------
Comment (by RyanGlScott):
Looking at the `-ddump-simpl` output, there are some noticeable
differences in the Core emitted by 8.0.2 and 8.2.1-rc2.
From 8.0.2, we have:
{{{
-- RHS size: {terms: 137, types: 72, coercions: 5}
main1 :: State# RealWorld -> (# State# RealWorld, () #)
main1 =
\ (s :: State# RealWorld) ->
case newAlignedPinnedByteArray# 8000000# 8# (s `cast` ...)
of _ { (# ipv, ipv1 #) ->
case unsafeFreezeByteArray# ipv1 ipv of _ { (# ipv2, ipv3 #) ->
(# ipv2 `cast` ...,
let {
ipv4 :: Int#
ipv4 = uncheckedIShiftRA# (sizeofByteArray# ipv3) 1# } in
let {
ipv5 :: Int#
ipv5 = -# ipv4 (andI# ipv4 3#) } in
let {
ipv6 :: Int#
ipv6 = uncheckedIShiftRA# ipv5 2# } in
let {
ipv7 :: Int#
ipv7 = -# ipv4 1# } in
letrec {
$whashRemainingWord16s
:: Word# -> Word# -> Word# -> Word# -> Int# -> Word#
$whashRemainingWord16s =
\ (ww :: Word#)
(ww1 :: Word#)
(ww2 :: Word#)
(ww3 :: Word#)
(ww4 :: Int#) ->
case tagToEnum# (># ww4 ipv7) of _ {
False ->
case indexWord16Array# ipv3 ww4 of wild1 { __DEFAULT ->
let {
v0 :: Word#
v0 = xor# ww wild1 } in
$whashRemainingWord16s
(xor# ww3 v0)
(xor# v0 ww1)
(xor# ww1 ww2)
(xor# ww2 ww3)
(+# ww4 1#)
};
True -> ww
}; } in
letrec {
$whash4Word16sLoop
:: Word# -> Word# -> Word# -> Word# -> Int# -> Word#
$whash4Word16sLoop =
\ (ww :: Word#)
(ww1 :: Word#)
(ww2 :: Word#)
(ww3 :: Word#)
(ww4 :: Int#) ->
case tagToEnum# (==# ww4 ipv6) of _ {
False ->
case indexWord64Array# ipv3 ww4 of wild1 { __DEFAULT ->
let {
v0 :: Word#
v0 =
xor#
ww
(or#
(and# (uncheckedShiftRL# (byteSwap# wild1) 8#)
71777214294589695##)
(and#
(uncheckedShiftL# (byteSwap# wild1) 8#)
18374966859414961920##)) } in
$whash4Word16sLoop
(xor# ww3 v0)
(xor# v0 ww1)
(xor# ww1 ww2)
(xor# ww2 ww3)
(+# ww4 1#)
};
True -> $whashRemainingWord16s ww ww1 ww2 ww3 ipv5
}; } in
case $whash4Word16sLoop 99## 1## 2## 3## 0# of _ { __DEFAULT ->
()
} #)
}
}
-- RHS size: {terms: 1, types: 0, coercions: 3}
main :: IO ()
main = main1 `cast` ...
}}}
And from 8.2.1-rc2, we have:
{{{
-- RHS size: {terms: 134, types: 77, coercions: 72, joins: 2/8}
main1 :: State# RealWorld -> (# State# RealWorld, () #)
main1
= \ (s :: State# RealWorld) ->
case newAlignedPinnedByteArray# 8000000# 8# (s `cast` <Co:41>) of
{ (# ipv, ipv1 #) ->
case unsafeFreezeByteArray# ipv1 ipv of { (# ipv2, ipv3 #) ->
(# ipv2 `cast` <Co:31>,
let {
ipv4 :: Int#
ipv4 = uncheckedIShiftRA# (sizeofByteArray# ipv3) 1# } in
let {
ixFinal :: Int#
ixFinal = -# ipv4 1# } in
let {
word16sIx :: Int#
word16sIx = -# ipv4 (andI# ipv4 3#) } in
let {
word16sIxWd :: Int#
word16sIxWd = uncheckedIShiftRA# word16sIx 2# } in
joinrec {
$whashRemainingWord16s
:: Word# -> Word# -> Word# -> Word# -> Int# -> ()
$whashRemainingWord16s (ww :: Word#)
(ww1 :: Word#)
(ww2 :: Word#)
(ww3 :: Word#)
(ww4 :: Int#)
= case tagToEnum# (># ww4 ixFinal) of {
False ->
case indexWord16Array# ipv3 ww4 of wild1 { __DEFAULT ->
let {
v0 :: Word#
v0 = xor# ww wild1 } in
jump $whashRemainingWord16s
(xor# ww3 v0)
(xor# v0 ww1)
(xor# ww1 ww2)
(xor# ww2 ww3)
(+# ww4 1#)
};
True -> ()
}; } in
joinrec {
$whash4Word16sLoop
:: Word# -> Word# -> Word# -> Word# -> Int# -> ()
$whash4Word16sLoop (ww :: Word#)
(ww1 :: Word#)
(ww2 :: Word#)
(ww3 :: Word#)
(ww4 :: Int#)
= case tagToEnum# (==# ww4 word16sIxWd) of {
False ->
case indexWord64Array# ipv3 ww4 of wild1 { __DEFAULT ->
let {
v0 :: Word#
v0
= xor#
ww
(or#
(and# (uncheckedShiftRL# (byteSwap# wild1)
8#) 71777214294589695##)
(and#
(uncheckedShiftL# (byteSwap# wild1) 8#)
18374966859414961920##)) } in
jump $whash4Word16sLoop
(xor# ww3 v0)
(xor# v0 ww1)
(xor# ww1 ww2)
(xor# ww2 ww3)
(+# ww4 1#)
};
True -> jump $whashRemainingWord16s ww ww1 ww2 ww3
word16sIx
}; } in
jump $whash4Word16sLoop 99## 1## 2## 3## 0# #)
}
}
-- RHS size: {terms: 1, types: 0, coercions: 3, joins: 0/0}
main :: IO ()
main = main1 `cast` <Co:3>
}}}
--
Ticket URL: <http://ghc.haskell.org/trac/ghc/ticket/13763#comment:3>
GHC <http://www.haskell.org/ghc/>
The Glasgow Haskell Compiler
More information about the ghc-tickets
mailing list