[GHC] #8860: Optimized Cmm isn't
GHC
ghc-devs at haskell.org
Sat Mar 8 08:33:06 UTC 2014
#8860: Optimized Cmm isn't
------------------------------------+-------------------------------------
Reporter: tibbe | Owner:
Type: bug | Status: new
Priority: normal | Milestone:
Component: Compiler | Version: 7.6.3
Keywords: | Operating System: Unknown/Multiple
Architecture: Unknown/Multiple | Type of failure: None/Unknown
Difficulty: Unknown | Test Case:
Blocked By: | Blocking:
Related Tickets: |
------------------------------------+-------------------------------------
The optimizer seems to miss very basic optimizations. For example, look at
this segment (full source below):
{{{
_c1Cd::I64 = 0;
_c1Ce::I64 = 0;
_c1Cf::I64 = 8;
call MO_Memset(_c1Cc::I64, _c1Cd::I64, _c1Ce::I64, _c1Cf::I64);
}}}
I'd expect all these temporaries (that are only used ones) to be inlined,
but they are not and thus we fail to see the static arguments to the
`MO_Memset` call, which leads to further missed optimizations.
This used to work in the old codegen.
Furthermore, there are useless basic blocks in the output:
{{{
c1Cm:
goto c1C4;
c1C4:
}}}
I'd expect them to be eliminated.
.dump-cmm file:
{{{
==================== Cmm produced by new codegen ====================
2014-03-08 08:25:31.728672 UTC
[section "data" {
a_r1za_closure:
const a_r1za_info;
},
a_r1za_entry() // []
{ info_tbl: [(c1Cm,
label: a_r1za_info
rep:HeapRep static { Fun {arity: 1 fun_type:
ArgSpec 3} })]
stack_info: arg_space: 8 updfr_space: Just 8
}
{offset
c1Cm:
goto c1C4;
c1C4:
if ((old + 0) - <highSp> < SpLim) goto c1Cp; else goto c1Cq;
c1Cq:
goto c1C3;
c1C3:
Hp = Hp + 152;
if (Hp > HpLim) goto c1Cs; else goto c1Cr;
c1Cs:
HpAlloc = 152;
goto c1Cp;
c1Cp:
R1 = a_r1za_closure;
call (stg_gc_fun)(R1) args: 8, res: 0, upd: 8;
c1Cr:
I64[Hp - 144] = stg_MUT_ARR_PTRS_DIRTY_info;
I64[Hp - 136] = 16;
I64[Hp - 128] = 16;
_c1C6::I64 = Hp - 144;
_c1C7::I64 = _c1C6::I64 + 24;
goto c1C8;
c1C8:
if (_c1C7::I64 < (_c1C6::I64 + 128)) goto c1Ca; else goto c1C9;
c1Ca:
P64[_c1C7::I64] = ()_closure+1;
_c1C7::I64 = _c1C7::I64 + 8;
goto c1C8;
c1C9:
_c1Cb::I64 = _c1C6::I64 + 24;
_c1Cc::I64 = _c1Cb::I64 + 128;
_c1Cd::I64 = 0;
_c1Ce::I64 = 0;
_c1Cf::I64 = 8;
call MO_Memset(_c1Cc::I64, _c1Cd::I64, _c1Ce::I64, _c1Cf::I64);
_s1BY::P64 = _c1C6::I64;
_s1BY::P64 = _s1BY::P64;
goto c1Ch;
c1Ch:
_s1C1::P64 = _s1BY::P64;
goto c1Ck;
c1Ck:
R1 = ()_closure+1;
call (P64[(old + 8)])(R1) args: 8, res: 0, upd: 8;
}
}]
==================== Post control-flow optimisations ====================
2014-03-08 08:25:31.729764 UTC
{offset
c1Cm:
if ((old + 0) - <highSp> < SpLim) goto c1Cp; else goto c1Cq;
c1Cq:
Hp = Hp + 152;
if (Hp > HpLim) goto c1Cs; else goto c1Cr;
c1Cs:
HpAlloc = 152;
goto c1Cp;
c1Cp:
R1 = a_r1za_closure;
call (stg_gc_fun)(R1) args: 8, res: 0, upd: 8;
c1Cr:
I64[Hp - 144] = stg_MUT_ARR_PTRS_DIRTY_info;
I64[Hp - 136] = 16;
I64[Hp - 128] = 16;
_c1C6::I64 = Hp - 144;
_c1C7::I64 = _c1C6::I64 + 24;
goto c1C8;
c1C8:
if (_c1C7::I64 < (_c1C6::I64 + 128)) goto c1Ca; else goto c1C9;
c1Ca:
P64[_c1C7::I64] = ()_closure+1;
_c1C7::I64 = _c1C7::I64 + 8;
goto c1C8;
c1C9:
_c1Cb::I64 = _c1C6::I64 + 24;
_c1Cc::I64 = _c1Cb::I64 + 128;
_c1Cd::I64 = 0;
_c1Ce::I64 = 0;
_c1Cf::I64 = 8;
call MO_Memset(_c1Cc::I64, _c1Cd::I64, _c1Ce::I64, _c1Cf::I64);
_s1BY::P64 = _c1C6::I64;
_s1BY::P64 = _s1BY::P64;
_s1C1::P64 = _s1BY::P64;
R1 = ()_closure+1;
call (P64[(old + 8)])(R1) args: 8, res: 0, upd: 8;
}
==================== Layout Stack ====================
2014-03-08 08:25:31.730253 UTC
{offset
c1Cm:
goto c1Cq;
c1Cq:
Hp = Hp + 152;
if (Hp > HpLim) goto c1Cs; else goto c1Cr;
c1Cs:
HpAlloc = 152;
goto c1Cp;
c1Cp:
R1 = a_r1za_closure;
call (stg_gc_fun)(R1) args: 8, res: 0, upd: 8;
c1Cr:
I64[Hp - 144] = stg_MUT_ARR_PTRS_DIRTY_info;
I64[Hp - 136] = 16;
I64[Hp - 128] = 16;
_c1C6::I64 = Hp - 144;
_c1C7::I64 = _c1C6::I64 + 24;
goto c1C8;
c1C8:
if (_c1C7::I64 < (_c1C6::I64 + 128)) goto c1Ca; else goto c1C9;
c1Ca:
P64[_c1C7::I64] = ()_closure+1;
_c1C7::I64 = _c1C7::I64 + 8;
goto c1C8;
c1C9:
_c1Cb::I64 = _c1C6::I64 + 24;
_c1Cc::I64 = _c1Cb::I64 + 128;
_c1Cd::I64 = 0;
_c1Ce::I64 = 0;
_c1Cf::I64 = 8;
call MO_Memset(_c1Cc::I64, _c1Cd::I64, _c1Ce::I64, _c1Cf::I64);
_s1BY::P64 = _c1C6::I64;
_s1BY::P64 = _s1BY::P64;
_s1C1::P64 = _s1BY::P64;
R1 = ()_closure+1;
call (P64[Sp])(R1) args: 8, res: 0, upd: 8;
}
==================== CAFEnv ====================
2014-03-08 08:25:31.73074 UTC
[(c1C8, {}), (c1C9, {}), (c1Ca, {}), (c1Cm, {}), (c1Cp, {}),
(c1Cq, {}), (c1Cr, {}), (c1Cs, {})]
==================== after setInfoTableStackMap ====================
2014-03-08 08:25:31.730895 UTC
a_r1za_entry() // []
{ info_tbl: [(c1Cm,
label: a_r1za_info
rep:HeapRep static { Fun {arity: 1 fun_type: ArgSpec
3} })]
stack_info: arg_space: 8 updfr_space: Just 8
}
{offset
c1Cm:
goto c1Cq;
c1Cq:
Hp = Hp + 152;
if (Hp > HpLim) goto c1Cs; else goto c1Cr;
c1Cs:
HpAlloc = 152;
goto c1Cp;
c1Cp:
R1 = a_r1za_closure;
call (stg_gc_fun)(R1) args: 8, res: 0, upd: 8;
c1Cr:
I64[Hp - 144] = stg_MUT_ARR_PTRS_DIRTY_info;
I64[Hp - 136] = 16;
I64[Hp - 128] = 16;
_c1C6::I64 = Hp - 144;
_c1C7::I64 = _c1C6::I64 + 24;
goto c1C8;
c1C8:
if (_c1C7::I64 < (_c1C6::I64 + 128)) goto c1Ca; else goto c1C9;
c1Ca:
P64[_c1C7::I64] = ()_closure+1;
_c1C7::I64 = _c1C7::I64 + 8;
goto c1C8;
c1C9:
_c1Cb::I64 = _c1C6::I64 + 24;
_c1Cc::I64 = _c1Cb::I64 + 128;
_c1Cd::I64 = 0;
_c1Ce::I64 = 0;
_c1Cf::I64 = 8;
call MO_Memset(_c1Cc::I64, _c1Cd::I64, _c1Ce::I64, _c1Cf::I64);
_s1BY::P64 = _c1C6::I64;
_s1BY::P64 = _s1BY::P64;
_s1C1::P64 = _s1BY::P64;
R1 = ()_closure+1;
call (P64[Sp])(R1) args: 8, res: 0, upd: 8;
}
}
==================== Post control-flow optimisations ====================
2014-03-08 08:25:31.731383 UTC
a_r1za_entry() // []
{ info_tbl: [(c1Cm,
label: a_r1za_info
rep:HeapRep static { Fun {arity: 1 fun_type: ArgSpec
3} })]
stack_info: arg_space: 8 updfr_space: Just 8
}
{offset
c1Cm:
goto c1Cq;
c1Cq:
Hp = Hp + 152;
if (Hp > HpLim) goto c1Cs; else goto c1Cr;
c1Cs:
HpAlloc = 152;
goto c1Cp;
c1Cp:
R1 = a_r1za_closure;
call (stg_gc_fun)(R1) args: 8, res: 0, upd: 8;
c1Cr:
I64[Hp - 144] = stg_MUT_ARR_PTRS_DIRTY_info;
I64[Hp - 136] = 16;
I64[Hp - 128] = 16;
_c1C6::I64 = Hp - 144;
_c1C7::I64 = _c1C6::I64 + 24;
goto c1C8;
c1C8:
if (_c1C7::I64 < (_c1C6::I64 + 128)) goto c1Ca; else goto c1C9;
c1Ca:
P64[_c1C7::I64] = ()_closure+1;
_c1C7::I64 = _c1C7::I64 + 8;
goto c1C8;
c1C9:
_c1Cb::I64 = _c1C6::I64 + 24;
_c1Cc::I64 = _c1Cb::I64 + 128;
_c1Cd::I64 = 0;
_c1Ce::I64 = 0;
_c1Cf::I64 = 8;
call MO_Memset(_c1Cc::I64, _c1Cd::I64, _c1Ce::I64, _c1Cf::I64);
_s1BY::P64 = _c1C6::I64;
_s1BY::P64 = _s1BY::P64;
_s1C1::P64 = _s1BY::P64;
R1 = ()_closure+1;
call (P64[Sp])(R1) args: 8, res: 0, upd: 8;
}
}
==================== Post CPS Cmm ====================
2014-03-08 08:25:31.731772 UTC
[section "data" {
a_r1za_closure:
const a_r1za_info;
},
a_r1za_entry() // []
{ info_tbl: [(c1Cm,
label: a_r1za_info
rep:HeapRep static { Fun {arity: 1 fun_type:
ArgSpec 3} })]
stack_info: arg_space: 8 updfr_space: Just 8
}
{offset
c1Cm:
goto c1Cq;
c1Cq:
Hp = Hp + 152;
if (Hp > HpLim) goto c1Cs; else goto c1Cr;
c1Cs:
HpAlloc = 152;
goto c1Cp;
c1Cp:
R1 = a_r1za_closure;
call (stg_gc_fun)(R1) args: 8, res: 0, upd: 8;
c1Cr:
I64[Hp - 144] = stg_MUT_ARR_PTRS_DIRTY_info;
I64[Hp - 136] = 16;
I64[Hp - 128] = 16;
_c1C6::I64 = Hp - 144;
_c1C7::I64 = _c1C6::I64 + 24;
goto c1C8;
c1C8:
if (_c1C7::I64 < (_c1C6::I64 + 128)) goto c1Ca; else goto c1C9;
c1Ca:
P64[_c1C7::I64] = ()_closure+1;
_c1C7::I64 = _c1C7::I64 + 8;
goto c1C8;
c1C9:
_c1Cb::I64 = _c1C6::I64 + 24;
_c1Cc::I64 = _c1Cb::I64 + 128;
_c1Cd::I64 = 0;
_c1Ce::I64 = 0;
_c1Cf::I64 = 8;
call MO_Memset(_c1Cc::I64, _c1Cd::I64, _c1Ce::I64, _c1Cf::I64);
_s1BY::P64 = _c1C6::I64;
_s1BY::P64 = _s1BY::P64;
_s1C1::P64 = _s1BY::P64;
R1 = ()_closure+1;
call (P64[Sp])(R1) args: 8, res: 0, upd: 8;
}
}]
==================== Output Cmm ====================
2014-03-08 08:25:31.73228 UTC
[section "data" {
a_r1za_closure:
const a_r1za_info;
},
a_r1za_entry() // []
{ info_tbl: [(c1Cm,
label: a_r1za_info
rep:HeapRep static { Fun {arity: 1 fun_type:
ArgSpec 3} })]
stack_info: arg_space: 8 updfr_space: Just 8
}
{offset
c1Cm:
goto c1Cq;
c1Cq:
Hp = Hp + 152;
if (Hp > HpLim) goto c1Cs; else goto c1Cr;
c1Cs:
HpAlloc = 152;
goto c1Cp;
c1Cp:
R1 = a_r1za_closure;
call (stg_gc_fun)(R1) args: 8, res: 0, upd: 8;
c1Cr:
I64[Hp - 144] = stg_MUT_ARR_PTRS_DIRTY_info;
I64[Hp - 136] = 16;
I64[Hp - 128] = 16;
_c1C6::I64 = Hp - 144;
_c1C7::I64 = _c1C6::I64 + 24;
goto c1C8;
c1C8:
if (_c1C7::I64 < (_c1C6::I64 + 128)) goto c1Ca; else goto c1C9;
c1Ca:
P64[_c1C7::I64] = ()_closure+1;
_c1C7::I64 = _c1C7::I64 + 8;
goto c1C8;
c1C9:
_c1Cb::I64 = _c1C6::I64 + 24;
_c1Cc::I64 = _c1Cb::I64 + 128;
_c1Cd::I64 = 0;
_c1Ce::I64 = 0;
_c1Cf::I64 = 8;
call MO_Memset(_c1Cc::I64, _c1Cd::I64, _c1Ce::I64, _c1Cf::I64);
_s1BY::P64 = _c1C6::I64;
_s1BY::P64 = _s1BY::P64;
_s1C1::P64 = _s1BY::P64;
R1 = ()_closure+1;
call (P64[Sp])(R1) args: 8, res: 0, upd: 8;
}
}]
}}}
.dump-opt-cmm file:
{{{
==================== Optimised Cmm ====================
2014-03-08 08:25:31.732971 UTC
a_r1za_entry() // []
{ [(c1Cm,
a_r1za_info:
const 4294967299;
const 0;
const 15;)]
}
{offset
c1Cm:
goto c1Cq;
c1Cq:
Hp = Hp + 152;
if (Hp > I64[BaseReg + 856]) goto c1Cs; else goto c1Cr;
c1Cs:
I64[BaseReg + 904] = 152;
goto c1Cp;
c1Cp:
R1 = PicBaseReg + a_r1za_closure;
call (I64[BaseReg - 8])(R1) args: 8, res: 0, upd: 8;
c1Cr:
I64[Hp - 144] = I64[PicBaseReg +
stg_MUT_ARR_PTRS_DIRTY_info at GOTPCREL];
I64[Hp - 136] = 16;
I64[Hp - 128] = 16;
_c1C6::I64 = Hp - 144;
_c1C7::I64 = _c1C6::I64 + 24;
goto c1C8;
c1C8:
if (_c1C7::I64 < (_c1C6::I64 + 128)) goto c1Ca; else goto c1C9;
c1Ca:
I64[_c1C7::I64] = PicBaseReg + (()_closure+1);
_c1C7::I64 = _c1C7::I64 + 8;
goto c1C8;
c1C9:
_c1Cb::I64 = _c1C6::I64 + 24;
_c1Cc::I64 = _c1Cb::I64 + 128;
_c1Cd::I64 = 0;
_c1Ce::I64 = 0;
_c1Cf::I64 = 8;
call MO_Memset(_c1Cc::I64, _c1Cd::I64, _c1Ce::I64, _c1Cf::I64);
_s1BY::P64 = _c1C6::I64;
// nop
_s1C1::P64 = _s1BY::P64;
R1 = PicBaseReg + (()_closure+1);
call (P64[Sp])(R1) args: 8, res: 0, upd: 8;
}
}
}}}
--
Ticket URL: <http://ghc.haskell.org/trac/ghc/ticket/8860>
GHC <http://www.haskell.org/ghc/>
The Glasgow Haskell Compiler
More information about the ghc-tickets
mailing list