[Haskell-cafe] 64bit code output is less optimized than 32bit in ghc?

isto isto.aho at dnainternet.net
Sun Nov 5 15:47:34 EST 2006


Hi again,

Still playing with the Mersenne Twister and here is the
updated 64 bit version so that there are not so many 
constructor calls on next64 (together with updated
compiling flags). 

I was wondering why different runs can have such different
run times and the cause was found to be my system: also the
C version running times can vary (usually 0.65 but sometimes 0.3).

The 64 bit version took usually about 1.1 or 1.2 seconds while
32bit version required only 0.78 (against 0.65 with C for both
32 and 64 bit versions).

Since the real work horse here is the next64 function, I took a
look of Core.  There seems to be an extra case-statement in
64bit version and this might explain the performance drop (about
6 or 7 lines below _DEFAULT text on both versions below). 
Relevant parts of the Core below, code attached.  It is very 
possible that I'm missing something obvious here.

So what is happening here?  :)  

Thanks again for any comments!  

br, Isto

-------------------------------------------- Core (32 and 64 nexts)
Rec {
Mersenne.$wnext64 :: Data.Array.IO.Internals.IOUArray GHC.Base.Int
						      GHC.Word.Word64
		     -> GHC.Prim.Int#
		     -> GHC.Prim.State# GHC.Prim.RealWorld
		     -> (# GHC.Prim.State# GHC.Prim.RealWorld, (GHC.Word.Word64,
GHC.Base.Int) #)
[GlobalId]
[Arity 3
 Str: DmdType LLL]
Mersenne.$wnext64 =
  \ (w_s2Zq :: Data.Array.IO.Internals.IOUArray GHC.Base.Int
GHC.Word.Word64)
    (ww_s2Zt :: GHC.Prim.Int#)
    (w1_s2Zv :: GHC.Prim.State# GHC.Prim.RealWorld) ->
    case ww_s2Zt of ds_X2F1 {
      __DEFAULT ->
	case w_s2Zq
	of wild_a2Pp { Data.Array.Base.STUArray ds2_a2Pr ds3_a2Ps marr#_a2Pt ->
	case GHC.Prim.readWord64Array# @ GHC.Prim.RealWorld marr#_a2Pt ds_X2F1
w1_s2Zv
	of wild2_a2PA { (# s2#_a2PC, e#_a2PD #) ->
	(# s2#_a2PC,
	   ((case lit_r34C of wild1_a2Ol { GHC.Word.W64# y#_a2On ->
	     let {
	       ww1_a2NY [Just L] :: GHC.Prim.Word#
	       [Str: DmdType]
	       ww1_a2NY =
		 GHC.Prim.xor#
		   e#_a2PD
		   (GHC.Prim.and#
		      (GHC.Prim.uncheckedShiftRL# e#_a2PD 29) __word
6148914691236517205) } in
	     let {
	       ww2_X2Q0 [Just L] :: GHC.Prim.Word#
	       [Str: DmdType]
	       ww2_X2Q0 =
		 GHC.Prim.xor#
		   ww1_a2NY
		   (GHC.Prim.and#
		      (GHC.Prim.uncheckedShiftL# ww1_a2NY 17) __word
8202884508482404352) } in
	     let {
	       ww3_X2QE [Just L] :: GHC.Prim.Word#
	       [Str: DmdType]
	       ww3_X2QE =
		 GHC.Prim.xor#
		   ww2_X2Q0 (GHC.Prim.and# (GHC.Prim.uncheckedShiftL# ww2_X2Q0 37)
y#_a2On)
	     } in 
	       GHC.Word.W64# (GHC.Prim.xor# ww3_X2QE
(GHC.Prim.uncheckedShiftRL# ww3_X2QE 43))
	     }),
	    (GHC.Base.I# (GHC.Prim.+# ds_X2F1 1))) #)
	}
	};
      312 ->
	case Mersenne.generateNumbers64 w_s2Zq w1_s2Zv
	of wild_a2DL { (# new_s_a2DN, a87_a2DO #) ->
	case Mersenne.$wnext64 w_s2Zq 0 new_s_a2DN
	of wild1_X2Fy { (# new_s1_X2FB, a871_X2FD #) ->
	case a871_X2FD of wild2_Xar { (w2_aU2, iN_aU3) -> (# new_s1_X2FB,
wild2_Xar #) }
	}
	}
    }
end Rec }

Mersenne.next64 :: Data.Array.IO.Internals.IOUArray GHC.Base.Int
GHC.Word.Word64
		   -> GHC.Base.Int
		   -> GHC.IOBase.IO (GHC.Word.Word64, GHC.Base.Int)
[GlobalId]
[Arity 3
 Worker Mersenne.$wnext64
 Str: DmdType LU(L)L]
Mersenne.next64 =
  __inline_me (\ (w_s2Zq :: Data.Array.IO.Internals.IOUArray
GHC.Base.Int
							     GHC.Word.Word64)
		 (w1_s2Zr :: GHC.Base.Int)
		 (w2_s2Zv :: GHC.Prim.State# GHC.Prim.RealWorld) ->
		 case w1_s2Zr of w3_X30R { GHC.Base.I# ww_s2Zt ->
		 Mersenne.$wnext64 w_s2Zq ww_s2Zt w2_s2Zv
		 })





Rec {
Mersenne.$wnext32 :: Data.Array.IO.Internals.IOUArray GHC.Base.Int
						      GHC.Word.Word32
		     -> GHC.Prim.Int#
		     -> GHC.Prim.State# GHC.Prim.RealWorld
		     -> (# GHC.Prim.State# GHC.Prim.RealWorld, (GHC.Word.Word32,
GHC.Base.Int) #)
[GlobalId]
[Arity 3
 NoCafRefs
 Str: DmdType LLL]
Mersenne.$wnext32 =
  \ (w_s2YJ :: Data.Array.IO.Internals.IOUArray GHC.Base.Int
GHC.Word.Word32)
    (ww_s2YM :: GHC.Prim.Int#)
    (w1_s2YO :: GHC.Prim.State# GHC.Prim.RealWorld) ->
    case ww_s2YM of ds_X2CS {
      __DEFAULT ->
	case w_s2YJ
	of wild_a2Hd { Data.Array.Base.STUArray ds2_a2Hf ds3_a2Hj marr#_a2Hk ->
	case GHC.Prim.readWord32Array# @ GHC.Prim.RealWorld marr#_a2Hk ds_X2CS
w1_s2YO
	of wild2_a2Hr { (# s2#_a2Ht, e#_a2Hu #) ->
	(# s2#_a2Ht,
	   ((let {
	       ww1_a2Fr [Just L] :: GHC.Prim.Word#
	       [Str: DmdType]
	       ww1_a2Fr = GHC.Prim.xor# e#_a2Hu (GHC.Prim.uncheckedShiftRL#
e#_a2Hu 11) } in
	     let {
	       ww2_X2GX [Just L] :: GHC.Prim.Word#
	       [Str: DmdType]
	       ww2_X2GX =
		 GHC.Prim.xor#
		   ww1_a2Fr
		   (GHC.Prim.and#
		      (GHC.Prim.narrow32Word# (GHC.Prim.uncheckedShiftL# ww1_a2Fr 7))
		      __word 2636928640) } in
	     let {
	       ww3_X2Hp [Just L] :: GHC.Prim.Word#
	       [Str: DmdType]
	       ww3_X2Hp =
		 GHC.Prim.xor#
		   ww2_X2GX
		   (GHC.Prim.and#
		      (GHC.Prim.narrow32Word# (GHC.Prim.uncheckedShiftL# ww2_X2GX 15))
		      __word 4022730752)
	     } in 
	       GHC.Word.W32#
		 (GHC.Prim.xor# ww3_X2Hp (GHC.Prim.uncheckedShiftRL# ww3_X2Hp 18))),
	    (GHC.Base.I# (GHC.Prim.+# ds_X2CS 1))) #)
	}
	};
      624 ->
	case Mersenne.generateNumbers32 w_s2YJ w1_s2YO
	of wild_a2DL { (# new_s_a2DN, a87_a2DO #) ->
	case Mersenne.$wnext32 w_s2YJ 0 new_s_a2DN
	of wild1_X2F2 { (# new_s1_X2F5, a871_X2F7 #) ->
	case a871_X2F7 of wild2_X80 { (w2_aSH, iN_aSI) -> (# new_s1_X2F5,
wild2_X80 #) }
	}
	}
    }
end Rec }

Mersenne.next32 :: Data.Array.IO.Internals.IOUArray GHC.Base.Int
GHC.Word.Word32
		   -> GHC.Base.Int
		   -> GHC.IOBase.IO (GHC.Word.Word32, GHC.Base.Int)
[GlobalId]
[Arity 3
 Worker Mersenne.$wnext32
 NoCafRefs
 Str: DmdType LU(L)L]
Mersenne.next32 =
  __inline_me (\ (w_s2YJ :: Data.Array.IO.Internals.IOUArray
GHC.Base.Int
							     GHC.Word.Word32)
		 (w1_s2YK :: GHC.Base.Int)
		 (w2_s2YO :: GHC.Prim.State# GHC.Prim.RealWorld) ->
		 case w1_s2YK of w3_X2ZO { GHC.Base.I# ww_s2YM ->
		 Mersenne.$wnext32 w_s2YJ ww_s2YM w2_s2YO
		 })


-------------- next part --------------
A non-text attachment was scrubbed...
Name: Mersenne.hs
Type: text/x-haskell
Size: 4696 bytes
Desc: not available
Url : http://www.haskell.org/pipermail/haskell-cafe/attachments/20061105/a7f9291f/Mersenne-0001.bin
-------------- next part --------------
A non-text attachment was scrubbed...
Name: testMT.hs
Type: text/x-haskell
Size: 993 bytes
Desc: not available
Url : http://www.haskell.org/pipermail/haskell-cafe/attachments/20061105/a7f9291f/testMT-0001.bin


More information about the Haskell-Cafe mailing list