[Haskell-cafe] GHC optimization issue
Joel Reymont
joelr1 at gmail.com
Thu Dec 22 09:08:44 EST 2005
Folks,
I have been trying to improve my byte swapping routines as part of my
effort to speed up serialization. I then tried to look at the core
output from GHC to see what it was converting my code into. Brandon
(skew on #haskell) helped me code a TH version but then I went with a
regular CPP version instead.
The point of contention is that the logical approach to optimization
does not produce expected results. The only difference between the
following two core outputs is the presence or absence of specialize
and inline pragmas. The code is the same.
The version without pragmas inlines foo1 into
Main.foo1 :: GHC.Word.Word16
[GlobalId]
[Str: DmdType]
Main.foo1 = case GHC.Word.Word16 GHC.Word.$wshift3 __word 3855 (-8)
of ww1_a2eo { __DEFAULT ->
case GHC.Word.Word16 GHC.Word.$wshift3 __word 15 8 of
ww_a2f4 { __DEFAULT ->
GHC.Word.W16# (GHC.Prim.narrow16Word#
(GHC.Prim.plusWord# ww1_a2eo ww_a2f4))
}
}
Whereas the version _with_ pragmas produces a function call:
Main.foo1 :: GHC.Word.Word16
[GlobalId]
[Str: DmdType]
Main.foo1 = case GHC.Word.Word16 Swap1.$w$sswap16 __word 3855
of ww1_s1Ia { __DEFAULT ->
GHC.Word.W16# ww1_s1Ia
}
Is there a reasonable explanation?
Both versions compiled thusly: ghc --make Foo.hs -O -ddump-simpl > foo
Swap:
{-# OPTIONS_GHC -fglasgow-exts -cpp #-}
module Swap1
(
swap16
)
where
import Data.Word
import Data.Int
import Data.Bits
#define BIG_ENDIAN 1
{-# SPECIALIZE swap16 :: Word16 -> Word16 #-}
{-# SPECIALIZE swap16 :: Int16 -> Int16 #-}
{-# INLINE swap16 #-}
swap16 :: Bits a => a -> a
#ifdef BIG_ENDIAN
swap16 v = (v `shiftR` 8) + ((v .&. 0xFF) `shiftL` 8)
#else
swap16 v = v
#endif
Foo:
module Main where
import Data.Word
import Swap1
foo1 :: Word16
foo1 = swap16 0x0f0f
main = putStrLn $ show foo1
Core output for the version WITH pragmas, see the version w/o pragmas
way below
==================== Tidy Core Rules ====================
"SPEC Swap1.swap16" __forall {$dBits_X1Co :: {Data.Bits.Bits
GHC.Int.Int16}}
Swap1.swap16 @ GHC.Int.Int16 $dBits_X1Co
= Swap1.$sswap16 ;
"SPEC Swap1.swap16" __forall {$dBits_X1Cv :: {Data.Bits.Bits
GHC.Word.Word16}}
Swap1.swap16 @ GHC.Word.Word16 $dBits_X1Cv
= Swap1.$sswap161 ;
==================== Tidy Core ====================
Main.foo1 :: GHC.Word.Word16
[GlobalId]
[Str: DmdType]
Main.foo1 = case GHC.Word.Word16 Swap1.$w$sswap16 __word 3855
of ww1_s1Ia { __DEFAULT ->
GHC.Word.W16# ww1_s1Ia
}
==================== Tidy Core ====================
Swap1.$w$sswap16 :: GHC.Prim.Word# -> GHC.Prim.Word#
[GlobalId]
[Arity 1
NoCafRefs
Str: DmdType L]
Swap1.$w$sswap16 = \ (ww_s1I5 :: GHC.Prim.Word#) ->
case GHC.Prim.Word# GHC.Word.$wshift3 ww_s1I5
(-8) of ww1_a1Gx { __DEFAULT ->
case GHC.Prim.Word# GHC.Word.$wshift3
(GHC.Prim.and# ww_s1I5 __word 255) 8
of ww11_a1Hd { __DEFAULT ->
GHC.Prim.narrow16Word# (GHC.Prim.plusWord#
ww1_a1Gx ww11_a1Hd)
}
}
Swap1.$w$sswap161 :: GHC.Prim.Int# -> GHC.Prim.Int#
[GlobalId]
[Arity 1
NoCafRefs
Str: DmdType L]
Swap1.$w$sswap161 = \ (ww_s1HV :: GHC.Prim.Int#) ->
case GHC.Prim.Int# GHC.Int.$wshift2 ww_s1HV
(-8) of ww1_a1Fk { __DEFAULT ->
case GHC.Prim.Int# GHC.Int.$wshift2
(GHC.Prim.word2Int#
(GHC.Prim.and# (GHC.Prim.int2Word# ww_s1HV) __word 255))
8
of ww11_a1G5 { __DEFAULT ->
GHC.Prim.narrow16Int# (GHC.Prim.+# ww1_a1Fk
ww11_a1G5)
}
}
Swap1.$sswap16 :: GHC.Int.Int16 -> GHC.Int.Int16
[GlobalId]
[Arity 1
Worker Swap1.$w$sswap161
NoCafRefs
Str: DmdType U(L)m]
Swap1.$sswap16 = __inline_me (\ (w_s1HT :: GHC.Int.Int16) ->
case GHC.Int.Int16 w_s1HT of w1_X1I5
{ GHC.Int.I16# ww_s1HV ->
case GHC.Int.Int16 Swap1.$w$sswap161
ww_s1HV of ww1_s1I0 { __DEFAULT ->
GHC.Int.I16# ww1_s1I0
}
})
Swap1.$sswap161 :: GHC.Word.Word16 -> GHC.Word.Word16
[GlobalId]
[Arity 1
Worker Swap1.$w$sswap16
NoCafRefs
Str: DmdType U(L)m]
Swap1.$sswap161 = __inline_me (\ (w_s1I3 :: GHC.Word.Word16) ->
case GHC.Word.Word16 w_s1I3 of
w1_X1Ih { GHC.Word.W16# ww_s1I5 ->
case GHC.Word.Word16 Swap1.$w
$sswap16 ww_s1I5 of ww1_s1Ia { __DEFAULT ->
GHC.Word.W16# ww1_s1Ia
}
})
Swap1.swap16 :: forall a_a1aQ. (Data.Bits.Bits a_a1aQ) => a_a1aQ ->
a_a1aQ
[GlobalId]
[Arity 1
NoCafRefs
Str: DmdType L]
Swap1.swap16 = __inline_me (\ (@ a_a1dB)
($dBits_a1ip :: {Data.Bits.Bits
a_a1dB}) ->
let {
$dNum_s1BO :: {GHC.Num.Num a_a1dB}
[Str: DmdType {a1ip->U
(SAAAAAAAAAAAAAAAAA)}]
$dNum_s1BO = Data.Bits.$p1Bits @
a_a1dB $dBits_a1ip } in
let {
lit_s1BN :: a_a1dB
[Str: DmdType {a1ip->U
(SAAAAAAAAAAAAAAAAA) s1BO->U(AAAAAAAAS)}]
lit_s1BN = GHC.Num.fromInteger @
a_a1dB $dNum_s1BO (GHC.Num.S# 255)
} in
\ (v_a1aW :: a_a1dB) ->
GHC.Num.+
@ a_a1dB
$dNum_s1BO
(Data.Bits.shiftR @ a_a1dB
$dBits_a1ip v_a1aW (GHC.Base.I# 8))
(Data.Bits.shiftL
@ a_a1dB
$dBits_a1ip
(Data.Bits..&. @ a_a1dB
$dBits_a1ip v_a1aW lit_s1BN)
(GHC.Base.I# 8)))
Core output without pragmas:
Main.foo1 :: GHC.Word.Word16
[GlobalId]
[Str: DmdType]
Main.foo1 = case GHC.Word.Word16 GHC.Word.$wshift3 __word 3855 (-8)
of ww1_a2eo { __DEFAULT ->
case GHC.Word.Word16 GHC.Word.$wshift3 __word 15 8 of
ww_a2f4 { __DEFAULT ->
GHC.Word.W16# (GHC.Prim.narrow16Word#
(GHC.Prim.plusWord# ww1_a2eo ww_a2f4))
}
}
---
Swap1.swap16 :: forall a_a1aQ. (Data.Bits.Bits a_a1aQ) => a_a1aQ ->
a_a1aQ
[GlobalId]
[Arity 1
NoCafRefs
Str: DmdType L]
Swap1.swap16 = \ (@ a_a1dB) ($dBits_a1ip :: {Data.Bits.Bits a_a1dB}) ->
let {
$dNum_s1BK :: {GHC.Num.Num a_a1dB}
[Str: DmdType]
$dNum_s1BK = case {GHC.Num.Num a_a1dB} $dBits_a1ip
of tpl_B1 { Data.Bits.:DBits tpl1_B2
tpl2_B3 tpl3_B4 tpl4_B5 tpl5_B6 tpl6_B7 tpl7_B8 tpl8_B9 tpl9_Ba
tpl10_Bb tpl11_Bc tpl12_Bd tpl13_Be tpl14_Bf tpl15_Bg tpl16_Bh
tpl17_Bi tpl18_Bj ->
tpl1_B2
} } in
let {
lit_s1BM :: a_a1dB
[Str: DmdType]
lit_s1BM = case a_a1dB $dNum_s1BK
of tpl_B1 { GHC.Num.:DNum tpl1_B2
tpl2_B3 tpl3_B4 tpl4_B5 tpl5_B6 tpl6_B7 tpl7_B8 tpl8_B9 tpl9_Ba ->
tpl9_Ba Swap1.lvl1
}
} in
\ (v_a1aW :: a_a1dB) ->
tpl3_B4
(case a_a1dB $dBits_a1ip
of tpl10_Xy { Data.Bits.:DBits tpl11_XB
tpl12_XD tpl13_XF tpl14_XH tpl15_XJ tpl16_XL tpl17_XN tpl18_XP
tpl19_XR tpl20_Bb tpl21_Bc tpl22_Bd tpl23_Be tpl24_Bf tpl25_Bg
tpl26_Bh tpl27_Bi tpl28_Bj ->
tpl26_Bh v_a1aW Swap1.lvl
})
(case a_a1dB $dBits_a1ip
of tpl10_Xy { Data.Bits.:DBits tpl11_XB
tpl12_XD tpl13_XF tpl14_XH tpl15_XJ tpl16_XL tpl17_XN tpl18_XP
tpl19_XR tpl20_Bb tpl21_Bc tpl22_Bd tpl23_Be tpl24_Bf tpl25_Bg
tpl26_Bh tpl27_Bi tpl28_Bj ->
tpl25_Bg (tpl12_XD v_a1aW lit_s1BM) Swap1.lvl
})
--
http://wagerlabs.com/
More information about the Haskell-Cafe
mailing list