[Haskell-cafe] redundant loads and saves in code generated for recursive functions?
Jyotirmoy Bhattacharya
jyotirmoy at jyotirmoy.net
Wed Jul 30 12:10:56 UTC 2014
Hi,
It doesn't seem the same to me.
Unlike the bug you point to, the C-- does not have any extra stores. The
stores and loads appear first in the LLVM. I am attaching the C--, LLVM and
assembly codes for the function.
The real missed opportunity seems to me the absence of a recognition that
we are in fact making a tail call to ourselves. Recognizing that might
allow jumping to some point after the initial stores.
Jyotirmoy Bhattacharya
On Wed, Jul 30, 2014 at 4:14 PM, Johan Tibell <johan.tibell at gmail.com>
wrote:
> Hi Jyotirmoy,
>
> I didn't read your assembly carefully, but it sounds similar to
> https://ghc.haskell.org/trac/ghc/ticket/8905, which is not fixed yet.
>
> On Wed, Jul 30, 2014 at 12:03 PM, Jyotirmoy Bhattacharya
> <jyotirmoy at jyotirmoy.net> wrote:
> > On reading this again I realise that I got the order of loads and stores
> > wrong. The arguments are being stored on entering the function and loaded
> > before the call. But still, is there a chance of eliminating this
> > redundancy?
> >
> > Jyotirmoy
> >
> >
> > On Wed, Jul 30, 2014 at 1:54 PM, Jyotirmoy Bhattacharya
> > <jyotirmoy at jyotirmoy.net> wrote:
> >>
> >> Dear All,
> >>
> >> I am new to Haskell so please forgive me if I am asking about something
> >> already well-understood.
> >>
> >> I was trying to understand the performance of my Haskell program
> compiled
> >> with the LLVM backend. I used -ddump-llvm to dump the LLVM assembly and
> then
> >> ran llc -O3 on the resulting file to look at the native assembly.
> >>
> >> One of the generated function starts off with
> >> s5BH_info: # @s5BH_info
> >> # BB#0:
> >> subq $208, %rsp
> >> movq %r13, 200(%rsp)
> >> movq %rbp, 192(%rsp)
> >> movq %r12, 184(%rsp)
> >> movq %rbx, 176(%rsp)
> >> movq %r14, 168(%rsp)
> >> movq %rsi, 160(%rsp)
> >> movq %rdi, 152(%rsp)
> >> movq %r8, 144(%rsp)
> >> movq %r9, 136(%rsp)
> >> movq %r15, 128(%rsp)
> >> movss %xmm1, 124(%rsp)
> >> movss %xmm2, 120(%rsp)
> >> movss %xmm3, 116(%rsp)
> >> movss %xmm4, 112(%rsp)
> >> movsd %xmm5, 104(%rsp)
> >> movsd %xmm6, 96(%rsp)
> >>
> >> At some point down the line the function makes a tail call to itself and
> >> this is the code generated
> >> movq %r14, 168(%rsp)
> >> movq 200(%rsp), %r13
> >> movq 192(%rsp), %rbp
> >> movq 184(%rsp), %r12
> >> movq 176(%rsp), %rbx
> >> movq 128(%rsp), %r15
> >> movsd 104(%rsp), %xmm5
> >> addq $208, %rsp
> >> jmp s5BH_info
> >>
> >> So it looks like some values are being moved from registers to the stack
> >> only to be immediately moved from the stack to the register on entry to
> the
> >> function. It should be possible to eliminate both the load and the
> stores.
> >>
> >> Is this behaviour due to LLVM or GHC? If it is GHC, it this an
> >> optimization a newcomer can attempt to implement or are there deep
> issues
> >> here?
> >>
> >> Jyotirmoy Bhattacharya
> >>
> >>
> >
> >
> > _______________________________________________
> > Haskell-Cafe mailing list
> > Haskell-Cafe at haskell.org
> > http://www.haskell.org/mailman/listinfo/haskell-cafe
> >
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://www.haskell.org/pipermail/haskell-cafe/attachments/20140730/2ba38df8/attachment.html>
-------------- next part --------------
define internal cc 10 void @s5BH_info(i64* noalias nocapture %Base_Arg, i64* noalias nocapture %Sp_Arg, i64* noalias nocapture %Hp_Arg, i64 %R1_Arg, i64 %R2_Arg, i64 %R3_Arg, i64 %R4_Arg, i64 %R5_Arg, i64 %R6_Arg, i64 %SpLim_Arg, float %F1_Arg, float %F2_Arg, float %F3_Arg, float %F4_Arg, double %D1_Arg, double %D2_Arg) align 8 nounwind section "X98A__STRIP,__me1"
{
c6f9:
%Base_Var = alloca i64*, i32 1
store i64* %Base_Arg, i64** %Base_Var
%Sp_Var = alloca i64*, i32 1
store i64* %Sp_Arg, i64** %Sp_Var
%Hp_Var = alloca i64*, i32 1
store i64* %Hp_Arg, i64** %Hp_Var
%R1_Var = alloca i64, i32 1
store i64 %R1_Arg, i64* %R1_Var
%R2_Var = alloca i64, i32 1
store i64 %R2_Arg, i64* %R2_Var
%R3_Var = alloca i64, i32 1
store i64 %R3_Arg, i64* %R3_Var
%R4_Var = alloca i64, i32 1
store i64 %R4_Arg, i64* %R4_Var
%R5_Var = alloca i64, i32 1
store i64 %R5_Arg, i64* %R5_Var
%R6_Var = alloca i64, i32 1
store i64 %R6_Arg, i64* %R6_Var
%SpLim_Var = alloca i64, i32 1
store i64 %SpLim_Arg, i64* %SpLim_Var
%F1_Var = alloca float, i32 1
store float %F1_Arg, float* %F1_Var
%F2_Var = alloca float, i32 1
store float %F2_Arg, float* %F2_Var
%F3_Var = alloca float, i32 1
store float %F3_Arg, float* %F3_Var
%F4_Var = alloca float, i32 1
store float %F4_Arg, float* %F4_Var
%D1_Var = alloca double, i32 1
store double %D1_Arg, double* %D1_Var
%D2_Var = alloca double, i32 1
store double %D2_Arg, double* %D2_Var
%lc6ea = alloca i64, i32 1
%ls5B7 = alloca i64, i32 1
%ls5B9 = alloca i64, i32 1
%ls5Bb = alloca i64, i32 1
%ls5Ba = alloca i64, i32 1
%ls5Bf = alloca i64, i32 1
%ls5Bh = alloca i64, i32 1
%ls5Bj = alloca i64, i32 1
%ls5Bi = alloca i64, i32 1
%ls5Bm = alloca i64, i32 1
%ls5Bo = alloca i64, i32 1
%ls5FB = alloca i64, i32 1
%ls5Bq = alloca i64, i32 1
%ls5FA = alloca i64, i32 1
%ls5Bz = alloca double, i32 1
%ls5Fz = alloca i64, i32 1
%ls5Bv = alloca i64, i32 1
%ls5Fy = alloca i64, i32 1
%ls5BA = alloca double, i32 1
%ls5BB = alloca double, i32 1
%lc6eQ = alloca i64, i32 1
%ls5BE = alloca double, i32 1
%lc6eW = alloca i64, i32 1
%ls5Fx = alloca i64, i32 1
%lc6eZ = alloca i64, i32 1
%ls5FK = alloca i64, i32 1
%ls5FL = alloca i64, i32 1
%ls5FM = alloca i64, i32 1
%ln9iC = load i64* %R2_Var
%ln9iD = load i64* %R1_Var
%ln9iE = add i64 %ln9iD, 30
%ln9iF = inttoptr i64 %ln9iE to i64*
%ln9iG = load i64* %ln9iF, !tbaa !3
%ln9iH = icmp sge i64 %ln9iC, %ln9iG
%ln9iI = zext i1 %ln9iH to i64
store i64 %ln9iI, i64* %lc6ea
%ln9iJ = load i64* %lc6ea
%ln9iK = icmp uge i64 %ln9iJ, 1
br i1 %ln9iK, label %c6fb, label %n9iL
n9iL:
%ln9iM = load i64* %R2_Var
%ln9iN = load i64* %R1_Var
%ln9iO = add i64 %ln9iN, 22
%ln9iP = inttoptr i64 %ln9iO to i64*
%ln9iQ = load i64* %ln9iP, !tbaa !3
%ln9iR = sdiv i64 %ln9iM, %ln9iQ
store i64 %ln9iR, i64* %ls5B7
%ln9iS = load i64* %R2_Var
%ln9iT = load i64* %R1_Var
%ln9iU = add i64 %ln9iT, 22
%ln9iV = inttoptr i64 %ln9iU to i64*
%ln9iW = load i64* %ln9iV, !tbaa !3
%ln9iX = srem i64 %ln9iS, %ln9iW
store i64 %ln9iX, i64* %ls5B9
%ln9iY = load i64* %ls5B7
%ln9iZ = load i64* %R1_Var
%ln9j0 = add i64 %ln9iZ, 22
%ln9j1 = inttoptr i64 %ln9j0 to i64*
%ln9j2 = load i64* %ln9j1, !tbaa !3
%ln9j3 = mul i64 %ln9iY, %ln9j2
store i64 %ln9j3, i64* %ls5Bb
%ln9j4 = load i64* %ls5Bb
%ln9j5 = load i64* %ls5B9
%ln9j6 = add i64 %ln9j4, %ln9j5
store i64 %ln9j6, i64* %ls5Ba
%ln9j7 = load i64* %ls5Ba
%ln9j8 = load i64* %R1_Var
%ln9j9 = add i64 %ln9j8, 22
%ln9ja = inttoptr i64 %ln9j9 to i64*
%ln9jb = load i64* %ln9ja, !tbaa !3
%ln9jc = sdiv i64 %ln9j7, %ln9jb
store i64 %ln9jc, i64* %ls5Bf
%ln9jd = load i64* %ls5Ba
%ln9je = load i64* %R1_Var
%ln9jf = add i64 %ln9je, 22
%ln9jg = inttoptr i64 %ln9jf to i64*
%ln9jh = load i64* %ln9jg, !tbaa !3
%ln9ji = srem i64 %ln9jd, %ln9jh
store i64 %ln9ji, i64* %ls5Bh
%ln9jj = load i64* %ls5Bf
%ln9jk = load i64* %R1_Var
%ln9jl = add i64 %ln9jk, 22
%ln9jm = inttoptr i64 %ln9jl to i64*
%ln9jn = load i64* %ln9jm, !tbaa !3
%ln9jo = mul i64 %ln9jj, %ln9jn
store i64 %ln9jo, i64* %ls5Bj
%ln9jp = load i64* %ls5Bj
%ln9jq = load i64* %ls5Bh
%ln9jr = add i64 %ln9jp, %ln9jq
store i64 %ln9jr, i64* %ls5Bi
%ln9js = load i64* %ls5Bi
%ln9jt = load i64* %R1_Var
%ln9ju = add i64 %ln9jt, 22
%ln9jv = inttoptr i64 %ln9ju to i64*
%ln9jw = load i64* %ln9jv, !tbaa !3
%ln9jx = sdiv i64 %ln9js, %ln9jw
store i64 %ln9jx, i64* %ls5Bm
%ln9jy = load i64* %ls5Bi
%ln9jz = load i64* %R1_Var
%ln9jA = add i64 %ln9jz, 22
%ln9jB = inttoptr i64 %ln9jA to i64*
%ln9jC = load i64* %ln9jB, !tbaa !3
%ln9jD = srem i64 %ln9jy, %ln9jC
store i64 %ln9jD, i64* %ls5Bo
%ln9jE = load i64* %ls5Bm
%ln9jF = load i64* %R1_Var
%ln9jG = add i64 %ln9jF, 46
%ln9jH = inttoptr i64 %ln9jG to i64*
%ln9jI = load i64* %ln9jH, !tbaa !3
%ln9jJ = mul i64 %ln9jE, %ln9jI
store i64 %ln9jJ, i64* %ls5FB
%ln9jK = load i64* %ls5FB
%ln9jL = load i64* %ls5Bo
%ln9jM = add i64 %ln9jK, %ln9jL
store i64 %ln9jM, i64* %ls5Bq
%ln9jN = load i64* %R1_Var
%ln9jO = add i64 %ln9jN, 38
%ln9jP = inttoptr i64 %ln9jO to i64*
%ln9jQ = load i64* %ln9jP, !tbaa !3
%ln9jR = load i64* %ls5Bq
%ln9jS = add i64 %ln9jQ, %ln9jR
store i64 %ln9jS, i64* %ls5FA
%ln9jT = load i64* %R1_Var
%ln9jU = add i64 %ln9jT, 6
%ln9jV = inttoptr i64 %ln9jU to i64*
%ln9jW = load i64* %ln9jV, !tbaa !3
%ln9jX = add i64 %ln9jW, 16
%ln9jY = load i64* %ls5FA
%ln9jZ = shl i64 %ln9jY, 3
%ln9k0 = add i64 %ln9jX, %ln9jZ
%ln9k1 = inttoptr i64 %ln9k0 to double*
%ln9k2 = load double* %ln9k1, !tbaa !5
store double %ln9k2, double* %ls5Bz
%ln9k3 = load i64* %ls5Bm
%ln9k4 = load i64* %R1_Var
%ln9k5 = add i64 %ln9k4, 62
%ln9k6 = inttoptr i64 %ln9k5 to i64*
%ln9k7 = load i64* %ln9k6, !tbaa !3
%ln9k8 = mul i64 %ln9k3, %ln9k7
store i64 %ln9k8, i64* %ls5Fz
%ln9k9 = load i64* %ls5Fz
%ln9ka = load i64* %ls5Bo
%ln9kb = add i64 %ln9k9, %ln9ka
store i64 %ln9kb, i64* %ls5Bv
%ln9kc = load i64* %R1_Var
%ln9kd = add i64 %ln9kc, 54
%ln9ke = inttoptr i64 %ln9kd to i64*
%ln9kf = load i64* %ln9ke, !tbaa !3
%ln9kg = load i64* %ls5Bv
%ln9kh = add i64 %ln9kf, %ln9kg
store i64 %ln9kh, i64* %ls5Fy
%ln9ki = load i64* %R1_Var
%ln9kj = add i64 %ln9ki, 14
%ln9kk = inttoptr i64 %ln9kj to i64*
%ln9kl = load i64* %ln9kk, !tbaa !3
%ln9km = add i64 %ln9kl, 16
%ln9kn = load i64* %ls5Fy
%ln9ko = shl i64 %ln9kn, 3
%ln9kp = add i64 %ln9km, %ln9ko
%ln9kq = inttoptr i64 %ln9kp to double*
%ln9kr = load double* %ln9kq, !tbaa !5
store double %ln9kr, double* %ls5BA
%ln9ks = load double* %ls5Bz
%ln9kt = load double* %ls5BA
%ln9ku = fsub double %ln9ks, %ln9kt
store double %ln9ku, double* %ls5BB
%ln9kv = load double* %ls5BB
%ln9kw = fcmp oge double %ln9kv, 0x0000000000000000
%ln9kx = zext i1 %ln9kw to i64
store i64 %ln9kx, i64* %lc6eQ
%ln9ky = load i64* %lc6eQ
%ln9kz = icmp uge i64 %ln9ky, 1
br i1 %ln9kz, label %c6fd, label %n9kA
n9kA:
%ln9kB = load double* %ls5BB
%ln9kC = fsub double 0x8000000000000000, %ln9kB
store double %ln9kC, double* %ls5BE
%ln9kD = load double* %ls5BE
%ln9kE = load double* %D1_Var
%ln9kF = fcmp ole double %ln9kD, %ln9kE
%ln9kG = zext i1 %ln9kF to i64
store i64 %ln9kG, i64* %lc6eW
%ln9kH = load i64* %lc6eW
%ln9kI = icmp uge i64 %ln9kH, 1
br i1 %ln9kI, label %c6fg, label %n9kJ
n9kJ:
%ln9kK = load i64* %R2_Var
%ln9kL = add i64 %ln9kK, 1
store i64 %ln9kL, i64* %ls5Fx
%ln9kM = load i64* %ls5Fx
store i64 %ln9kM, i64* %R2_Var
%ln9kN = load double* %ls5BE
store double %ln9kN, double* %D1_Var
%ln9kO = load i64** %Base_Var
%ln9kP = load i64** %Sp_Var
%ln9kQ = load i64** %Hp_Var
%ln9kR = load i64* %R1_Var
%ln9kS = load i64* %R2_Var
%ln9kT = load i64* %SpLim_Var
%ln9kU = load double* %D1_Var
tail call cc 10 void (i64*,i64*,i64*,i64,i64,i64,i64,i64,i64,i64,float,float,float,float,double,double)* @s5BH_info( i64* %ln9kO, i64* %ln9kP, i64* %ln9kQ, i64 %ln9kR, i64 %ln9kS, i64 undef, i64 undef, i64 undef, i64 undef, i64 %ln9kT, float undef, float undef, float undef, float undef, double %ln9kU, double undef ) nounwind
ret void
c6fb:
%ln9kV = load double* %D1_Var
store double %ln9kV, double* %D1_Var
%ln9kW = load i64** %Sp_Var
%ln9kX = getelementptr inbounds i64* %ln9kW, i32 0
%ln9kY = bitcast i64* %ln9kX to i64*
%ln9kZ = load i64* %ln9kY, !tbaa !1
%ln9l0 = inttoptr i64 %ln9kZ to void (i64*, i64*, i64*, i64, i64, i64, i64, i64, i64, i64, float, float, float, float, double, double)*
%ln9l1 = load i64** %Base_Var
%ln9l2 = load i64** %Sp_Var
%ln9l3 = load i64** %Hp_Var
%ln9l4 = load i64* %R1_Var
%ln9l5 = load i64* %SpLim_Var
%ln9l6 = load double* %D1_Var
tail call cc 10 void (i64*,i64*,i64*,i64,i64,i64,i64,i64,i64,i64,float,float,float,float,double,double)* %ln9l0( i64* %ln9l1, i64* %ln9l2, i64* %ln9l3, i64 %ln9l4, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 %ln9l5, float undef, float undef, float undef, float undef, double %ln9l6, double undef ) nounwind
ret void
c6fd:
%ln9l7 = load double* %ls5BB
%ln9l8 = load double* %D1_Var
%ln9l9 = fcmp ole double %ln9l7, %ln9l8
%ln9la = zext i1 %ln9l9 to i64
store i64 %ln9la, i64* %lc6eZ
%ln9lb = load i64* %lc6eZ
%ln9lc = icmp uge i64 %ln9lb, 1
br i1 %ln9lc, label %c6fj, label %n9ld
n9ld:
%ln9le = load i64* %R2_Var
%ln9lf = add i64 %ln9le, 1
store i64 %ln9lf, i64* %ls5FK
%ln9lg = load i64* %ls5FK
store i64 %ln9lg, i64* %R2_Var
%ln9lh = load double* %ls5BB
store double %ln9lh, double* %D1_Var
%ln9li = load i64** %Base_Var
%ln9lj = load i64** %Sp_Var
%ln9lk = load i64** %Hp_Var
%ln9ll = load i64* %R1_Var
%ln9lm = load i64* %R2_Var
%ln9ln = load i64* %SpLim_Var
%ln9lo = load double* %D1_Var
tail call cc 10 void (i64*,i64*,i64*,i64,i64,i64,i64,i64,i64,i64,float,float,float,float,double,double)* @s5BH_info( i64* %ln9li, i64* %ln9lj, i64* %ln9lk, i64 %ln9ll, i64 %ln9lm, i64 undef, i64 undef, i64 undef, i64 undef, i64 %ln9ln, float undef, float undef, float undef, float undef, double %ln9lo, double undef ) nounwind
ret void
c6fj:
%ln9lp = load i64* %R2_Var
%ln9lq = add i64 %ln9lp, 1
store i64 %ln9lq, i64* %ls5FL
%ln9lr = load i64* %ls5FL
store i64 %ln9lr, i64* %R2_Var
%ln9ls = load i64** %Base_Var
%ln9lt = load i64** %Sp_Var
%ln9lu = load i64** %Hp_Var
%ln9lv = load i64* %R1_Var
%ln9lw = load i64* %R2_Var
%ln9lx = load i64* %SpLim_Var
%ln9ly = load double* %D1_Var
tail call cc 10 void (i64*,i64*,i64*,i64,i64,i64,i64,i64,i64,i64,float,float,float,float,double,double)* @s5BH_info( i64* %ln9ls, i64* %ln9lt, i64* %ln9lu, i64 %ln9lv, i64 %ln9lw, i64 undef, i64 undef, i64 undef, i64 undef, i64 %ln9lx, float undef, float undef, float undef, float undef, double %ln9ly, double undef ) nounwind
ret void
c6fg:
%ln9lz = load i64* %R2_Var
%ln9lA = add i64 %ln9lz, 1
store i64 %ln9lA, i64* %ls5FM
%ln9lB = load i64* %ls5FM
store i64 %ln9lB, i64* %R2_Var
%ln9lC = load i64** %Base_Var
%ln9lD = load i64** %Sp_Var
%ln9lE = load i64** %Hp_Var
%ln9lF = load i64* %R1_Var
%ln9lG = load i64* %R2_Var
%ln9lH = load i64* %SpLim_Var
%ln9lI = load double* %D1_Var
tail call cc 10 void (i64*,i64*,i64*,i64,i64,i64,i64,i64,i64,i64,float,float,float,float,double,double)* @s5BH_info( i64* %ln9lC, i64* %ln9lD, i64* %ln9lE, i64 %ln9lF, i64 %ln9lG, i64 undef, i64 undef, i64 undef, i64 undef, i64 %ln9lH, float undef, float undef, float undef, float undef, double %ln9lI, double undef ) nounwind
ret void
}
-------------- next part --------------
s5BH_info: # @s5BH_info
# BB#0: # %c6f9
subq $208, %rsp
movq %r13, 200(%rsp)
movq %rbp, 192(%rsp)
movq %r12, 184(%rsp)
movq %rbx, 176(%rsp)
movq %r14, 168(%rsp)
movq %rsi, 160(%rsp)
movq %rdi, 152(%rsp)
movq %r8, 144(%rsp)
movq %r9, 136(%rsp)
movq %r15, 128(%rsp)
movss %xmm1, 124(%rsp)
movss %xmm2, 120(%rsp)
movss %xmm3, 116(%rsp)
movss %xmm4, 112(%rsp)
movsd %xmm5, 104(%rsp)
movsd %xmm6, 96(%rsp)
movq 168(%rsp), %rax
movq 176(%rsp), %rcx
cmpq 30(%rcx), %rax
setge %al
movzbl %al, %eax
movq %rax, 88(%rsp)
jl .LBB99_1
# BB#5: # %c6fb
movsd 104(%rsp), %xmm5
movq 192(%rsp), %rbp
movq (%rbp), %rax
movq 200(%rsp), %r13
movq 184(%rsp), %r12
movq 176(%rsp), %rbx
movq 128(%rsp), %r15
addq $208, %rsp
jmpq *%rax # TAILCALL
.LBB99_1: # %n9iL
movq 168(%rsp), %rax
movq 176(%rsp), %rcx
cqto
idivq 22(%rcx)
movq %rax, 80(%rsp)
movq 168(%rsp), %rax
movq 176(%rsp), %rcx
cqto
idivq 22(%rcx)
movq %rdx, 72(%rsp)
movq 80(%rsp), %rax
movq 176(%rsp), %rcx
imulq 22(%rcx), %rax
movq %rax, 64(%rsp)
addq 72(%rsp), %rax
movq %rax, 56(%rsp)
movq 176(%rsp), %rcx
cqto
idivq 22(%rcx)
movq %rax, 48(%rsp)
movq 56(%rsp), %rax
movq 176(%rsp), %rcx
cqto
idivq 22(%rcx)
movq %rdx, 40(%rsp)
movq 48(%rsp), %rax
movq 176(%rsp), %rcx
imulq 22(%rcx), %rax
movq %rax, 32(%rsp)
addq 40(%rsp), %rax
movq %rax, 24(%rsp)
movq 176(%rsp), %rcx
cqto
idivq 22(%rcx)
movq %rax, 16(%rsp)
movq 24(%rsp), %rax
movq 176(%rsp), %rcx
cqto
idivq 22(%rcx)
movq %rdx, 8(%rsp)
movq 16(%rsp), %rax
movq 176(%rsp), %rcx
imulq 46(%rcx), %rax
movq %rax, (%rsp)
addq 8(%rsp), %rax
movq %rax, -8(%rsp)
movq 176(%rsp), %rcx
addq 38(%rcx), %rax
movq %rax, -16(%rsp)
movq 176(%rsp), %rcx
movq 6(%rcx), %rcx
movsd 16(%rcx,%rax,8), %xmm0
movsd %xmm0, -24(%rsp)
movq 16(%rsp), %rax
movq 176(%rsp), %rcx
imulq 62(%rcx), %rax
movq %rax, -32(%rsp)
addq 8(%rsp), %rax
movq %rax, -40(%rsp)
movq 176(%rsp), %rcx
addq 54(%rcx), %rax
movq %rax, -48(%rsp)
movq 176(%rsp), %rcx
movq 14(%rcx), %rcx
movsd 16(%rcx,%rax,8), %xmm0
movsd %xmm0, -56(%rsp)
movsd -24(%rsp), %xmm1
subsd %xmm0, %xmm1
xorps %xmm0, %xmm0
ucomisd %xmm0, %xmm1
movsd %xmm1, -64(%rsp)
setae %al
movzbl %al, %eax
movq %rax, -72(%rsp)
jb .LBB99_2
# BB#6: # %c6fd
movsd 104(%rsp), %xmm0
ucomisd -64(%rsp), %xmm0
setae %al
movzbl %al, %eax
movq %rax, -104(%rsp)
jb .LBB99_7
# BB#8: # %c6fj
movq 168(%rsp), %r14
incq %r14
movq %r14, -120(%rsp)
jmp .LBB99_9
.LBB99_2: # %n9kA
movsd -64(%rsp), %xmm0
xorpd .LCPI99_0(%rip), %xmm0
movsd %xmm0, -80(%rsp)
movsd 104(%rsp), %xmm1
ucomisd %xmm0, %xmm1
setae %al
movzbl %al, %eax
movq %rax, -88(%rsp)
jb .LBB99_3
# BB#10: # %c6fg
movq 168(%rsp), %r14
incq %r14
movq %r14, -128(%rsp)
.LBB99_9: # %c6fj
movq %r14, 168(%rsp)
movq 200(%rsp), %r13
movq 192(%rsp), %rbp
movq 184(%rsp), %r12
movq 176(%rsp), %rbx
movq 128(%rsp), %r15
movsd 104(%rsp), %xmm5
addq $208, %rsp
jmp s5BH_info # TAILCALL
.LBB99_7: # %n9ld
movq 168(%rsp), %rax
incq %rax
movq %rax, -112(%rsp)
movq %rax, 168(%rsp)
movsd -64(%rsp), %xmm5
jmp .LBB99_4
.LBB99_3: # %n9kJ
movq 168(%rsp), %rax
incq %rax
movq %rax, -96(%rsp)
movq %rax, 168(%rsp)
movsd -80(%rsp), %xmm5
.LBB99_4: # %n9kJ
movsd %xmm5, 104(%rsp)
movq 200(%rsp), %r13
movq 192(%rsp), %rbp
movq 184(%rsp), %r12
movq 176(%rsp), %rbx
movq 168(%rsp), %r14
movq 128(%rsp), %r15
addq $208, %rsp
jmp s5BH_info # TAILCALL
-------------- next part --------------
iter_s5BH_info()
{ label: iter_s5BH_info
rep:HeapRep 2 ptrs 6 nonptrs {
Fun {arity: 2 fun_type: ArgGen [True, True]} }
}
c6f9:
_c6ea::I64 = %MO_S_Ge_W64(R2, I64[R1 + 30]);
;
if (_c6ea::I64 >= 1) goto c6fb;
_s5B7::I64 = %MO_S_Quot_W64(R2, I64[R1 + 22]);
_s5B9::I64 = %MO_S_Rem_W64(R2, I64[R1 + 22]);
_s5Bb::I64 = _s5B7::I64 * I64[R1 + 22];
_s5Ba::I64 = _s5Bb::I64 + _s5B9::I64;
_s5Bf::I64 = %MO_S_Quot_W64(_s5Ba::I64, I64[R1 + 22]);
_s5Bh::I64 = %MO_S_Rem_W64(_s5Ba::I64, I64[R1 + 22]);
_s5Bj::I64 = _s5Bf::I64 * I64[R1 + 22];
_s5Bi::I64 = _s5Bj::I64 + _s5Bh::I64;
_s5Bm::I64 = %MO_S_Quot_W64(_s5Bi::I64, I64[R1 + 22]);
_s5Bo::I64 = %MO_S_Rem_W64(_s5Bi::I64, I64[R1 + 22]);
_s5FB::I64 = _s5Bm::I64 * I64[R1 + 46];
_s5Bq::I64 = _s5FB::I64 + _s5Bo::I64;
_s5FA::I64 = I64[R1 + 38] + _s5Bq::I64;
_s5Bz::F64 = F64[I64[R1 + 6] + 16 + (_s5FA::I64 << 3)];
_s5Fz::I64 = _s5Bm::I64 * I64[R1 + 62];
_s5Bv::I64 = _s5Fz::I64 + _s5Bo::I64;
_s5Fy::I64 = I64[R1 + 54] + _s5Bv::I64;
_s5BA::F64 = F64[I64[R1 + 14] + 16 + (_s5Fy::I64 << 3)];
_s5BB::F64 = %MO_F_Sub_W64(_s5Bz::F64, _s5BA::F64);
_c6eQ::I64 = %MO_F_Ge_W64(_s5BB::F64, 0.0 :: W64);
;
if (_c6eQ::I64 >= 1) goto c6fd;
_s5BE::F64 = %MO_F_Neg_W64(_s5BB::F64);
_c6eW::I64 = %MO_F_Le_W64(_s5BE::F64, D1);
;
if (_c6eW::I64 >= 1) goto c6fg;
_s5Fx::I64 = R2 + 1;
R2 = _s5Fx::I64;
D1 = _s5BE::F64;
jump iter_s5BH_info; // [R1, D1, R2]
c6fb:
D1 = D1;
jump (I64[Sp + 0]); // [D1]
c6fd:
_c6eZ::I64 = %MO_F_Le_W64(_s5BB::F64, D1);
;
if (_c6eZ::I64 >= 1) goto c6fj;
_s5FK::I64 = R2 + 1;
R2 = _s5FK::I64;
D1 = _s5BB::F64;
jump iter_s5BH_info; // [R1, D1, R2]
c6fj:
_s5FL::I64 = R2 + 1;
R2 = _s5FL::I64;
jump iter_s5BH_info; // [R1, D1, R2]
c6fg:
_s5FM::I64 = R2 + 1;
R2 = _s5FM::I64;
jump iter_s5BH_info; // [R1, D1, R2]
},
More information about the Haskell-Cafe
mailing list