[Haskell-cafe] RE: GHC optimization issue

Simon Peyton-Jones simonpj at microsoft.com
Thu Dec 22 17:06:17 EST 2005


(Brief because I'm at home.)

Use INLINE (alone) to include swap at its call sites.  (That gives
perfect per-call-site specialisation.)

Use SPECIALISE (alone) to make a specialised copy of swap, which is then
not duplicated at each call site, but instead is called.

No need to use them both.

The fact that GHC inlines the (big) unspecialised version and not the
(apparently smaller) specialised version, is something we should look
into.

Simon

| -----Original Message-----
| From: Joel Reymont [mailto:joelr1 at gmail.com]
| Sent: 22 December 2005 14:09
| To: Haskell-Cafe Cafe
| Cc: Simon Peyton-Jones; Simon Marlow
| Subject: GHC optimization issue
| 
| Folks,
| 
| I have been trying to improve my byte swapping routines as part of my
| effort to speed up serialization. I then tried to look at the core
| output from GHC to see what it was converting my code into. Brandon
| (skew on #haskell) helped me code a TH version but then I went with a
| regular CPP version instead.
| 
| The point of contention is that the logical approach to optimization
| does not produce expected results. The only difference between the
| following two core outputs is the presence or absence of specialize
| and inline pragmas. The code is the same.
| 
| The version without pragmas inlines foo1 into
| 
| Main.foo1 :: GHC.Word.Word16
| [GlobalId]
| [Str: DmdType]
| Main.foo1 = case GHC.Word.Word16 GHC.Word.$wshift3 __word 3855 (-8)
|              of ww1_a2eo { __DEFAULT ->
|              case GHC.Word.Word16 GHC.Word.$wshift3 __word 15 8 of
| ww_a2f4 { __DEFAULT ->
|              GHC.Word.W16# (GHC.Prim.narrow16Word#
| (GHC.Prim.plusWord# ww1_a2eo ww_a2f4))
|              }
|              }
| 
| Whereas the version _with_ pragmas produces a function call:
| 
| Main.foo1 :: GHC.Word.Word16
| [GlobalId]
| [Str: DmdType]
| Main.foo1 = case GHC.Word.Word16 Swap1.$w$sswap16 __word 3855
|              of ww1_s1Ia { __DEFAULT ->
|              GHC.Word.W16# ww1_s1Ia
|              }
| 
| Is there a reasonable explanation?
| 
| Both versions compiled thusly: ghc --make Foo.hs -O -ddump-simpl > foo
| 
| Swap:
| 
| {-# OPTIONS_GHC -fglasgow-exts -cpp #-}
| module Swap1
| (
| swap16
| )
| where
| 
| import Data.Word
| import Data.Int
| import Data.Bits
| 
| #define BIG_ENDIAN 1
| 
| {-# SPECIALIZE swap16 :: Word16 -> Word16 #-}
| {-# SPECIALIZE swap16 :: Int16 -> Int16 #-}
| {-# INLINE swap16 #-}
| 
| swap16 :: Bits a => a -> a
| #ifdef BIG_ENDIAN
| swap16 v = (v `shiftR` 8) + ((v .&. 0xFF) `shiftL` 8)
| #else
| swap16 v = v
| #endif
| 
| Foo:
| 
| module Main where
| 
| import Data.Word
| import Swap1
| 
| foo1 :: Word16
| foo1 = swap16 0x0f0f
| 
| main = putStrLn $ show foo1
| 
| Core output for the version WITH pragmas, see the version w/o pragmas
| way below
| 
| ==================== Tidy Core Rules ====================
| "SPEC Swap1.swap16" __forall {$dBits_X1Co :: {Data.Bits.Bits
| GHC.Int.Int16}}
|                        Swap1.swap16 @ GHC.Int.Int16 $dBits_X1Co
|                        = Swap1.$sswap16 ;
| "SPEC Swap1.swap16" __forall {$dBits_X1Cv :: {Data.Bits.Bits
| GHC.Word.Word16}}
|                        Swap1.swap16 @ GHC.Word.Word16 $dBits_X1Cv
|                        = Swap1.$sswap161 ;
| 
| 
| ==================== Tidy Core ====================
| Main.foo1 :: GHC.Word.Word16
| [GlobalId]
| [Str: DmdType]
| Main.foo1 = case GHC.Word.Word16 Swap1.$w$sswap16 __word 3855
|              of ww1_s1Ia { __DEFAULT ->
|              GHC.Word.W16# ww1_s1Ia
|              }
| 
| ==================== Tidy Core ====================
| Swap1.$w$sswap16 :: GHC.Prim.Word# -> GHC.Prim.Word#
| [GlobalId]
| [Arity 1
| NoCafRefs
| Str: DmdType L]
| Swap1.$w$sswap16 = \ (ww_s1I5 :: GHC.Prim.Word#) ->
|                       case GHC.Prim.Word# GHC.Word.$wshift3 ww_s1I5
| (-8) of ww1_a1Gx { __DEFAULT ->
|                       case GHC.Prim.Word# GHC.Word.$wshift3
| (GHC.Prim.and# ww_s1I5 __word 255) 8
|                       of ww11_a1Hd { __DEFAULT ->
|                       GHC.Prim.narrow16Word# (GHC.Prim.plusWord#
| ww1_a1Gx ww11_a1Hd)
|                       }
|                       }
| 
| Swap1.$w$sswap161 :: GHC.Prim.Int# -> GHC.Prim.Int#
| [GlobalId]
| [Arity 1
| NoCafRefs
| Str: DmdType L]
| Swap1.$w$sswap161 = \ (ww_s1HV :: GHC.Prim.Int#) ->
|                        case GHC.Prim.Int# GHC.Int.$wshift2 ww_s1HV
| (-8) of ww1_a1Fk { __DEFAULT ->
|                        case GHC.Prim.Int# GHC.Int.$wshift2
|                                             (GHC.Prim.word2Int#
| (GHC.Prim.and# (GHC.Prim.int2Word# ww_s1HV) __word 255))
|                                             8
|                        of ww11_a1G5 { __DEFAULT ->
|                        GHC.Prim.narrow16Int# (GHC.Prim.+# ww1_a1Fk
| ww11_a1G5)
|                        }
|                        }
| 
| Swap1.$sswap16 :: GHC.Int.Int16 -> GHC.Int.Int16
| [GlobalId]
| [Arity 1
| Worker Swap1.$w$sswap161
| NoCafRefs
| Str: DmdType U(L)m]
| Swap1.$sswap16 = __inline_me (\ (w_s1HT :: GHC.Int.Int16) ->
|                                  case GHC.Int.Int16 w_s1HT of w1_X1I5
| { GHC.Int.I16# ww_s1HV ->
|                                  case GHC.Int.Int16 Swap1.$w$sswap161
| ww_s1HV of ww1_s1I0 { __DEFAULT ->
|                                  GHC.Int.I16# ww1_s1I0
|                                  }
|                                  })
| 
| Swap1.$sswap161 :: GHC.Word.Word16 -> GHC.Word.Word16
| [GlobalId]
| [Arity 1
| Worker Swap1.$w$sswap16
| NoCafRefs
| Str: DmdType U(L)m]
| Swap1.$sswap161 = __inline_me (\ (w_s1I3 :: GHC.Word.Word16) ->
|                                   case GHC.Word.Word16 w_s1I3 of
| w1_X1Ih { GHC.Word.W16# ww_s1I5 ->
|                                   case GHC.Word.Word16 Swap1.$w
| $sswap16 ww_s1I5 of ww1_s1Ia { __DEFAULT ->
|                                   GHC.Word.W16# ww1_s1Ia
|                                   }
|                                   })
| 
| Swap1.swap16 :: forall a_a1aQ. (Data.Bits.Bits a_a1aQ) => a_a1aQ ->
| a_a1aQ
| [GlobalId]
| [Arity 1
| NoCafRefs
| Str: DmdType L]
| Swap1.swap16 = __inline_me (\ (@ a_a1dB)
|                                ($dBits_a1ip :: {Data.Bits.Bits
| a_a1dB}) ->
|                                let {
|                                  $dNum_s1BO :: {GHC.Num.Num a_a1dB}
|                                  [Str: DmdType {a1ip->U
| (SAAAAAAAAAAAAAAAAA)}]
|                                  $dNum_s1BO = Data.Bits.$p1Bits @
| a_a1dB $dBits_a1ip } in
|                                let {
|                                  lit_s1BN :: a_a1dB
|                                  [Str: DmdType {a1ip->U
| (SAAAAAAAAAAAAAAAAA) s1BO->U(AAAAAAAAS)}]
|                                  lit_s1BN = GHC.Num.fromInteger @
| a_a1dB $dNum_s1BO (GHC.Num.S# 255)
|                                } in
|                                  \ (v_a1aW :: a_a1dB) ->
|                                    GHC.Num.+
|                                      @ a_a1dB
|                                      $dNum_s1BO
|                                      (Data.Bits.shiftR @ a_a1dB
| $dBits_a1ip v_a1aW (GHC.Base.I# 8))
|                                      (Data.Bits.shiftL
|                                         @ a_a1dB
|                                         $dBits_a1ip
|                                         (Data.Bits..&. @ a_a1dB
| $dBits_a1ip v_a1aW lit_s1BN)
|                                         (GHC.Base.I# 8)))
| 
| 
| Core output without pragmas:
| 
| Main.foo1 :: GHC.Word.Word16
| [GlobalId]
| [Str: DmdType]
| Main.foo1 = case GHC.Word.Word16 GHC.Word.$wshift3 __word 3855 (-8)
|              of ww1_a2eo { __DEFAULT ->
|              case GHC.Word.Word16 GHC.Word.$wshift3 __word 15 8 of
| ww_a2f4 { __DEFAULT ->
|              GHC.Word.W16# (GHC.Prim.narrow16Word#
| (GHC.Prim.plusWord# ww1_a2eo ww_a2f4))
|              }
|              }
| 
| ---
| Swap1.swap16 :: forall a_a1aQ. (Data.Bits.Bits a_a1aQ) => a_a1aQ ->
| a_a1aQ
| [GlobalId]
| [Arity 1
| NoCafRefs
| Str: DmdType L]
| Swap1.swap16 = \ (@ a_a1dB) ($dBits_a1ip :: {Data.Bits.Bits a_a1dB})
->
|                   let {
|                     $dNum_s1BK :: {GHC.Num.Num a_a1dB}
|                     [Str: DmdType]
|                     $dNum_s1BK = case {GHC.Num.Num a_a1dB} $dBits_a1ip
|                                  of tpl_B1 { Data.Bits.:DBits tpl1_B2
| tpl2_B3 tpl3_B4 tpl4_B5 tpl5_B6 tpl6_B7 tpl7_B8 tpl8_B9 tpl9_Ba
| tpl10_Bb tpl11_Bc tpl12_Bd tpl13_Be tpl14_Bf tpl15_Bg tpl16_Bh
| tpl17_Bi tpl18_Bj ->
|                                  tpl1_B2
|                                  } } in
|                   let {
|                     lit_s1BM :: a_a1dB
|                     [Str: DmdType]
|                     lit_s1BM = case a_a1dB $dNum_s1BK
|                                of tpl_B1 { GHC.Num.:DNum tpl1_B2
| tpl2_B3 tpl3_B4 tpl4_B5 tpl5_B6 tpl6_B7 tpl7_B8 tpl8_B9 tpl9_Ba ->
|                                tpl9_Ba Swap1.lvl1
|                                }
|                   } in
|                     \ (v_a1aW :: a_a1dB) ->
|                       tpl3_B4
|                         (case a_a1dB $dBits_a1ip
|                          of tpl10_Xy { Data.Bits.:DBits tpl11_XB
| tpl12_XD tpl13_XF tpl14_XH tpl15_XJ tpl16_XL tpl17_XN tpl18_XP
| tpl19_XR tpl20_Bb tpl21_Bc tpl22_Bd tpl23_Be tpl24_Bf tpl25_Bg
| tpl26_Bh tpl27_Bi tpl28_Bj ->
|                          tpl26_Bh v_a1aW Swap1.lvl
|                          })
|                         (case a_a1dB $dBits_a1ip
|                          of tpl10_Xy { Data.Bits.:DBits tpl11_XB
| tpl12_XD tpl13_XF tpl14_XH tpl15_XJ tpl16_XL tpl17_XN tpl18_XP
| tpl19_XR tpl20_Bb tpl21_Bc tpl22_Bd tpl23_Be tpl24_Bf tpl25_Bg
| tpl26_Bh tpl27_Bi tpl28_Bj ->
|                          tpl25_Bg (tpl12_XD v_a1aW lit_s1BM) Swap1.lvl
|                          })
| 
| --
| http://wagerlabs.com/
| 
| 
| 
| 



More information about the Haskell-Cafe mailing list