[Haskell-cafe] GHC optimization issue

Joel Reymont joelr1 at gmail.com
Thu Dec 22 09:08:44 EST 2005


Folks,

I have been trying to improve my byte swapping routines as part of my  
effort to speed up serialization. I then tried to look at the core  
output from GHC to see what it was converting my code into. Brandon  
(skew on #haskell) helped me code a TH version but then I went with a  
regular CPP version instead.

The point of contention is that the logical approach to optimization  
does not produce expected results. The only difference between the  
following two core outputs is the presence or absence of specialize  
and inline pragmas. The code is the same.

The version without pragmas inlines foo1 into

Main.foo1 :: GHC.Word.Word16
[GlobalId]
[Str: DmdType]
Main.foo1 = case GHC.Word.Word16 GHC.Word.$wshift3 __word 3855 (-8)
             of ww1_a2eo { __DEFAULT ->
             case GHC.Word.Word16 GHC.Word.$wshift3 __word 15 8 of  
ww_a2f4 { __DEFAULT ->
             GHC.Word.W16# (GHC.Prim.narrow16Word#  
(GHC.Prim.plusWord# ww1_a2eo ww_a2f4))
             }
             }

Whereas the version _with_ pragmas produces a function call:

Main.foo1 :: GHC.Word.Word16
[GlobalId]
[Str: DmdType]
Main.foo1 = case GHC.Word.Word16 Swap1.$w$sswap16 __word 3855
             of ww1_s1Ia { __DEFAULT ->
             GHC.Word.W16# ww1_s1Ia
             }

Is there a reasonable explanation?

Both versions compiled thusly: ghc --make Foo.hs -O -ddump-simpl > foo

Swap:

{-# OPTIONS_GHC -fglasgow-exts -cpp #-}
module Swap1
(
swap16
)
where

import Data.Word
import Data.Int
import Data.Bits

#define BIG_ENDIAN 1

{-# SPECIALIZE swap16 :: Word16 -> Word16 #-}
{-# SPECIALIZE swap16 :: Int16 -> Int16 #-}
{-# INLINE swap16 #-}

swap16 :: Bits a => a -> a
#ifdef BIG_ENDIAN
swap16 v = (v `shiftR` 8) + ((v .&. 0xFF) `shiftL` 8)
#else
swap16 v = v
#endif

Foo:

module Main where

import Data.Word
import Swap1

foo1 :: Word16
foo1 = swap16 0x0f0f

main = putStrLn $ show foo1

Core output for the version WITH pragmas, see the version w/o pragmas  
way below

==================== Tidy Core Rules ====================
"SPEC Swap1.swap16" __forall {$dBits_X1Co :: {Data.Bits.Bits  
GHC.Int.Int16}}
                       Swap1.swap16 @ GHC.Int.Int16 $dBits_X1Co
                       = Swap1.$sswap16 ;
"SPEC Swap1.swap16" __forall {$dBits_X1Cv :: {Data.Bits.Bits  
GHC.Word.Word16}}
                       Swap1.swap16 @ GHC.Word.Word16 $dBits_X1Cv
                       = Swap1.$sswap161 ;


==================== Tidy Core ====================
Main.foo1 :: GHC.Word.Word16
[GlobalId]
[Str: DmdType]
Main.foo1 = case GHC.Word.Word16 Swap1.$w$sswap16 __word 3855
             of ww1_s1Ia { __DEFAULT ->
             GHC.Word.W16# ww1_s1Ia
             }

==================== Tidy Core ====================
Swap1.$w$sswap16 :: GHC.Prim.Word# -> GHC.Prim.Word#
[GlobalId]
[Arity 1
NoCafRefs
Str: DmdType L]
Swap1.$w$sswap16 = \ (ww_s1I5 :: GHC.Prim.Word#) ->
                      case GHC.Prim.Word# GHC.Word.$wshift3 ww_s1I5  
(-8) of ww1_a1Gx { __DEFAULT ->
                      case GHC.Prim.Word# GHC.Word.$wshift3  
(GHC.Prim.and# ww_s1I5 __word 255) 8
                      of ww11_a1Hd { __DEFAULT ->
                      GHC.Prim.narrow16Word# (GHC.Prim.plusWord#  
ww1_a1Gx ww11_a1Hd)
                      }
                      }

Swap1.$w$sswap161 :: GHC.Prim.Int# -> GHC.Prim.Int#
[GlobalId]
[Arity 1
NoCafRefs
Str: DmdType L]
Swap1.$w$sswap161 = \ (ww_s1HV :: GHC.Prim.Int#) ->
                       case GHC.Prim.Int# GHC.Int.$wshift2 ww_s1HV  
(-8) of ww1_a1Fk { __DEFAULT ->
                       case GHC.Prim.Int# GHC.Int.$wshift2
                                            (GHC.Prim.word2Int#  
(GHC.Prim.and# (GHC.Prim.int2Word# ww_s1HV) __word 255))
                                            8
                       of ww11_a1G5 { __DEFAULT ->
                       GHC.Prim.narrow16Int# (GHC.Prim.+# ww1_a1Fk  
ww11_a1G5)
                       }
                       }

Swap1.$sswap16 :: GHC.Int.Int16 -> GHC.Int.Int16
[GlobalId]
[Arity 1
Worker Swap1.$w$sswap161
NoCafRefs
Str: DmdType U(L)m]
Swap1.$sswap16 = __inline_me (\ (w_s1HT :: GHC.Int.Int16) ->
                                 case GHC.Int.Int16 w_s1HT of w1_X1I5  
{ GHC.Int.I16# ww_s1HV ->
                                 case GHC.Int.Int16 Swap1.$w$sswap161  
ww_s1HV of ww1_s1I0 { __DEFAULT ->
                                 GHC.Int.I16# ww1_s1I0
                                 }
                                 })

Swap1.$sswap161 :: GHC.Word.Word16 -> GHC.Word.Word16
[GlobalId]
[Arity 1
Worker Swap1.$w$sswap16
NoCafRefs
Str: DmdType U(L)m]
Swap1.$sswap161 = __inline_me (\ (w_s1I3 :: GHC.Word.Word16) ->
                                  case GHC.Word.Word16 w_s1I3 of  
w1_X1Ih { GHC.Word.W16# ww_s1I5 ->
                                  case GHC.Word.Word16 Swap1.$w 
$sswap16 ww_s1I5 of ww1_s1Ia { __DEFAULT ->
                                  GHC.Word.W16# ww1_s1Ia
                                  }
                                  })

Swap1.swap16 :: forall a_a1aQ. (Data.Bits.Bits a_a1aQ) => a_a1aQ ->  
a_a1aQ
[GlobalId]
[Arity 1
NoCafRefs
Str: DmdType L]
Swap1.swap16 = __inline_me (\ (@ a_a1dB)
                               ($dBits_a1ip :: {Data.Bits.Bits  
a_a1dB}) ->
                               let {
                                 $dNum_s1BO :: {GHC.Num.Num a_a1dB}
                                 [Str: DmdType {a1ip->U 
(SAAAAAAAAAAAAAAAAA)}]
                                 $dNum_s1BO = Data.Bits.$p1Bits @  
a_a1dB $dBits_a1ip } in
                               let {
                                 lit_s1BN :: a_a1dB
                                 [Str: DmdType {a1ip->U 
(SAAAAAAAAAAAAAAAAA) s1BO->U(AAAAAAAAS)}]
                                 lit_s1BN = GHC.Num.fromInteger @  
a_a1dB $dNum_s1BO (GHC.Num.S# 255)
                               } in
                                 \ (v_a1aW :: a_a1dB) ->
                                   GHC.Num.+
                                     @ a_a1dB
                                     $dNum_s1BO
                                     (Data.Bits.shiftR @ a_a1dB  
$dBits_a1ip v_a1aW (GHC.Base.I# 8))
                                     (Data.Bits.shiftL
                                        @ a_a1dB
                                        $dBits_a1ip
                                        (Data.Bits..&. @ a_a1dB  
$dBits_a1ip v_a1aW lit_s1BN)
                                        (GHC.Base.I# 8)))


Core output without pragmas:

Main.foo1 :: GHC.Word.Word16
[GlobalId]
[Str: DmdType]
Main.foo1 = case GHC.Word.Word16 GHC.Word.$wshift3 __word 3855 (-8)
             of ww1_a2eo { __DEFAULT ->
             case GHC.Word.Word16 GHC.Word.$wshift3 __word 15 8 of  
ww_a2f4 { __DEFAULT ->
             GHC.Word.W16# (GHC.Prim.narrow16Word#  
(GHC.Prim.plusWord# ww1_a2eo ww_a2f4))
             }
             }

---
Swap1.swap16 :: forall a_a1aQ. (Data.Bits.Bits a_a1aQ) => a_a1aQ ->  
a_a1aQ
[GlobalId]
[Arity 1
NoCafRefs
Str: DmdType L]
Swap1.swap16 = \ (@ a_a1dB) ($dBits_a1ip :: {Data.Bits.Bits a_a1dB}) ->
                  let {
                    $dNum_s1BK :: {GHC.Num.Num a_a1dB}
                    [Str: DmdType]
                    $dNum_s1BK = case {GHC.Num.Num a_a1dB} $dBits_a1ip
                                 of tpl_B1 { Data.Bits.:DBits tpl1_B2  
tpl2_B3 tpl3_B4 tpl4_B5 tpl5_B6 tpl6_B7 tpl7_B8 tpl8_B9 tpl9_Ba  
tpl10_Bb tpl11_Bc tpl12_Bd tpl13_Be tpl14_Bf tpl15_Bg tpl16_Bh  
tpl17_Bi tpl18_Bj ->
                                 tpl1_B2
                                 } } in
                  let {
                    lit_s1BM :: a_a1dB
                    [Str: DmdType]
                    lit_s1BM = case a_a1dB $dNum_s1BK
                               of tpl_B1 { GHC.Num.:DNum tpl1_B2  
tpl2_B3 tpl3_B4 tpl4_B5 tpl5_B6 tpl6_B7 tpl7_B8 tpl8_B9 tpl9_Ba ->
                               tpl9_Ba Swap1.lvl1
                               }
                  } in
                    \ (v_a1aW :: a_a1dB) ->
                      tpl3_B4
                        (case a_a1dB $dBits_a1ip
                         of tpl10_Xy { Data.Bits.:DBits tpl11_XB  
tpl12_XD tpl13_XF tpl14_XH tpl15_XJ tpl16_XL tpl17_XN tpl18_XP  
tpl19_XR tpl20_Bb tpl21_Bc tpl22_Bd tpl23_Be tpl24_Bf tpl25_Bg  
tpl26_Bh tpl27_Bi tpl28_Bj ->
                         tpl26_Bh v_a1aW Swap1.lvl
                         })
                        (case a_a1dB $dBits_a1ip
                         of tpl10_Xy { Data.Bits.:DBits tpl11_XB  
tpl12_XD tpl13_XF tpl14_XH tpl15_XJ tpl16_XL tpl17_XN tpl18_XP  
tpl19_XR tpl20_Bb tpl21_Bc tpl22_Bd tpl23_Be tpl24_Bf tpl25_Bg  
tpl26_Bh tpl27_Bi tpl28_Bj ->
                         tpl25_Bg (tpl12_XD v_a1aW lit_s1BM) Swap1.lvl
                         })

--
http://wagerlabs.com/







More information about the Haskell-Cafe mailing list