[GHC] #8860: Optimized Cmm isn't

GHC ghc-devs at haskell.org
Sat Mar 8 08:33:06 UTC 2014


#8860: Optimized Cmm isn't
------------------------------------+-------------------------------------
       Reporter:  tibbe             |             Owner:
           Type:  bug               |            Status:  new
       Priority:  normal            |         Milestone:
      Component:  Compiler          |           Version:  7.6.3
       Keywords:                    |  Operating System:  Unknown/Multiple
   Architecture:  Unknown/Multiple  |   Type of failure:  None/Unknown
     Difficulty:  Unknown           |         Test Case:
     Blocked By:                    |          Blocking:
Related Tickets:                    |
------------------------------------+-------------------------------------
 The optimizer seems to miss very basic optimizations. For example, look at
 this segment (full source below):

 {{{
            _c1Cd::I64 = 0;
            _c1Ce::I64 = 0;
            _c1Cf::I64 = 8;
            call MO_Memset(_c1Cc::I64, _c1Cd::I64, _c1Ce::I64, _c1Cf::I64);
 }}}

 I'd expect all these temporaries (that are only used ones) to be inlined,
 but they are not and thus we fail to see the static arguments to the
 `MO_Memset` call, which leads to further missed optimizations.

 This used to work in the old codegen.

 Furthermore, there are useless basic blocks in the output:

 {{{
        c1Cm:
            goto c1C4;
        c1C4:
 }}}

 I'd expect them to be eliminated.

 .dump-cmm file:

 {{{
 ==================== Cmm produced by new codegen ====================
 2014-03-08 08:25:31.728672 UTC

 [section "data" {
      a_r1za_closure:
          const a_r1za_info;
  },
  a_r1za_entry() //  []
          { info_tbl: [(c1Cm,
                        label: a_r1za_info
                        rep:HeapRep static { Fun {arity: 1 fun_type:
 ArgSpec 3} })]
            stack_info: arg_space: 8 updfr_space: Just 8
          }
      {offset
        c1Cm:
            goto c1C4;
        c1C4:
            if ((old + 0) - <highSp> < SpLim) goto c1Cp; else goto c1Cq;
        c1Cq:
            goto c1C3;
        c1C3:
            Hp = Hp + 152;
            if (Hp > HpLim) goto c1Cs; else goto c1Cr;
        c1Cs:
            HpAlloc = 152;
            goto c1Cp;
        c1Cp:
            R1 = a_r1za_closure;
            call (stg_gc_fun)(R1) args: 8, res: 0, upd: 8;
        c1Cr:
            I64[Hp - 144] = stg_MUT_ARR_PTRS_DIRTY_info;
            I64[Hp - 136] = 16;
            I64[Hp - 128] = 16;
            _c1C6::I64 = Hp - 144;
            _c1C7::I64 = _c1C6::I64 + 24;
            goto c1C8;
        c1C8:
            if (_c1C7::I64 < (_c1C6::I64 + 128)) goto c1Ca; else goto c1C9;
        c1Ca:
            P64[_c1C7::I64] = ()_closure+1;
            _c1C7::I64 = _c1C7::I64 + 8;
            goto c1C8;
        c1C9:
            _c1Cb::I64 = _c1C6::I64 + 24;
            _c1Cc::I64 = _c1Cb::I64 + 128;
            _c1Cd::I64 = 0;
            _c1Ce::I64 = 0;
            _c1Cf::I64 = 8;
            call MO_Memset(_c1Cc::I64, _c1Cd::I64, _c1Ce::I64, _c1Cf::I64);
            _s1BY::P64 = _c1C6::I64;
            _s1BY::P64 = _s1BY::P64;
            goto c1Ch;
        c1Ch:
            _s1C1::P64 = _s1BY::P64;
            goto c1Ck;
        c1Ck:
            R1 = ()_closure+1;
            call (P64[(old + 8)])(R1) args: 8, res: 0, upd: 8;
      }
  }]


 ==================== Post control-flow optimisations ====================
 2014-03-08 08:25:31.729764 UTC

 {offset
   c1Cm:
       if ((old + 0) - <highSp> < SpLim) goto c1Cp; else goto c1Cq;
   c1Cq:
       Hp = Hp + 152;
       if (Hp > HpLim) goto c1Cs; else goto c1Cr;
   c1Cs:
       HpAlloc = 152;
       goto c1Cp;
   c1Cp:
       R1 = a_r1za_closure;
       call (stg_gc_fun)(R1) args: 8, res: 0, upd: 8;
   c1Cr:
       I64[Hp - 144] = stg_MUT_ARR_PTRS_DIRTY_info;
       I64[Hp - 136] = 16;
       I64[Hp - 128] = 16;
       _c1C6::I64 = Hp - 144;
       _c1C7::I64 = _c1C6::I64 + 24;
       goto c1C8;
   c1C8:
       if (_c1C7::I64 < (_c1C6::I64 + 128)) goto c1Ca; else goto c1C9;
   c1Ca:
       P64[_c1C7::I64] = ()_closure+1;
       _c1C7::I64 = _c1C7::I64 + 8;
       goto c1C8;
   c1C9:
       _c1Cb::I64 = _c1C6::I64 + 24;
       _c1Cc::I64 = _c1Cb::I64 + 128;
       _c1Cd::I64 = 0;
       _c1Ce::I64 = 0;
       _c1Cf::I64 = 8;
       call MO_Memset(_c1Cc::I64, _c1Cd::I64, _c1Ce::I64, _c1Cf::I64);
       _s1BY::P64 = _c1C6::I64;
       _s1BY::P64 = _s1BY::P64;
       _s1C1::P64 = _s1BY::P64;
       R1 = ()_closure+1;
       call (P64[(old + 8)])(R1) args: 8, res: 0, upd: 8;
 }


 ==================== Layout Stack ====================
 2014-03-08 08:25:31.730253 UTC

 {offset
   c1Cm:
       goto c1Cq;
   c1Cq:
       Hp = Hp + 152;
       if (Hp > HpLim) goto c1Cs; else goto c1Cr;
   c1Cs:
       HpAlloc = 152;
       goto c1Cp;
   c1Cp:
       R1 = a_r1za_closure;
       call (stg_gc_fun)(R1) args: 8, res: 0, upd: 8;
   c1Cr:
       I64[Hp - 144] = stg_MUT_ARR_PTRS_DIRTY_info;
       I64[Hp - 136] = 16;
       I64[Hp - 128] = 16;
       _c1C6::I64 = Hp - 144;
       _c1C7::I64 = _c1C6::I64 + 24;
       goto c1C8;
   c1C8:
       if (_c1C7::I64 < (_c1C6::I64 + 128)) goto c1Ca; else goto c1C9;
   c1Ca:
       P64[_c1C7::I64] = ()_closure+1;
       _c1C7::I64 = _c1C7::I64 + 8;
       goto c1C8;
   c1C9:
       _c1Cb::I64 = _c1C6::I64 + 24;
       _c1Cc::I64 = _c1Cb::I64 + 128;
       _c1Cd::I64 = 0;
       _c1Ce::I64 = 0;
       _c1Cf::I64 = 8;
       call MO_Memset(_c1Cc::I64, _c1Cd::I64, _c1Ce::I64, _c1Cf::I64);
       _s1BY::P64 = _c1C6::I64;
       _s1BY::P64 = _s1BY::P64;
       _s1C1::P64 = _s1BY::P64;
       R1 = ()_closure+1;
       call (P64[Sp])(R1) args: 8, res: 0, upd: 8;
 }


 ==================== CAFEnv ====================
 2014-03-08 08:25:31.73074 UTC

 [(c1C8, {}), (c1C9, {}), (c1Ca, {}), (c1Cm, {}), (c1Cp, {}),
  (c1Cq, {}), (c1Cr, {}), (c1Cs, {})]


 ==================== after setInfoTableStackMap ====================
 2014-03-08 08:25:31.730895 UTC

 a_r1za_entry() //  []
         { info_tbl: [(c1Cm,
                       label: a_r1za_info
                       rep:HeapRep static { Fun {arity: 1 fun_type: ArgSpec
 3} })]
           stack_info: arg_space: 8 updfr_space: Just 8
         }
     {offset
       c1Cm:
           goto c1Cq;
       c1Cq:
           Hp = Hp + 152;
           if (Hp > HpLim) goto c1Cs; else goto c1Cr;
       c1Cs:
           HpAlloc = 152;
           goto c1Cp;
       c1Cp:
           R1 = a_r1za_closure;
           call (stg_gc_fun)(R1) args: 8, res: 0, upd: 8;
       c1Cr:
           I64[Hp - 144] = stg_MUT_ARR_PTRS_DIRTY_info;
           I64[Hp - 136] = 16;
           I64[Hp - 128] = 16;
           _c1C6::I64 = Hp - 144;
           _c1C7::I64 = _c1C6::I64 + 24;
           goto c1C8;
       c1C8:
           if (_c1C7::I64 < (_c1C6::I64 + 128)) goto c1Ca; else goto c1C9;
       c1Ca:
           P64[_c1C7::I64] = ()_closure+1;
           _c1C7::I64 = _c1C7::I64 + 8;
           goto c1C8;
       c1C9:
           _c1Cb::I64 = _c1C6::I64 + 24;
           _c1Cc::I64 = _c1Cb::I64 + 128;
           _c1Cd::I64 = 0;
           _c1Ce::I64 = 0;
           _c1Cf::I64 = 8;
           call MO_Memset(_c1Cc::I64, _c1Cd::I64, _c1Ce::I64, _c1Cf::I64);
           _s1BY::P64 = _c1C6::I64;
           _s1BY::P64 = _s1BY::P64;
           _s1C1::P64 = _s1BY::P64;
           R1 = ()_closure+1;
           call (P64[Sp])(R1) args: 8, res: 0, upd: 8;
     }
 }


 ==================== Post control-flow optimisations ====================
 2014-03-08 08:25:31.731383 UTC

 a_r1za_entry() //  []
         { info_tbl: [(c1Cm,
                       label: a_r1za_info
                       rep:HeapRep static { Fun {arity: 1 fun_type: ArgSpec
 3} })]
           stack_info: arg_space: 8 updfr_space: Just 8
         }
     {offset
       c1Cm:
           goto c1Cq;
       c1Cq:
           Hp = Hp + 152;
           if (Hp > HpLim) goto c1Cs; else goto c1Cr;
       c1Cs:
           HpAlloc = 152;
           goto c1Cp;
       c1Cp:
           R1 = a_r1za_closure;
           call (stg_gc_fun)(R1) args: 8, res: 0, upd: 8;
       c1Cr:
           I64[Hp - 144] = stg_MUT_ARR_PTRS_DIRTY_info;
           I64[Hp - 136] = 16;
           I64[Hp - 128] = 16;
           _c1C6::I64 = Hp - 144;
           _c1C7::I64 = _c1C6::I64 + 24;
           goto c1C8;
       c1C8:
           if (_c1C7::I64 < (_c1C6::I64 + 128)) goto c1Ca; else goto c1C9;
       c1Ca:
           P64[_c1C7::I64] = ()_closure+1;
           _c1C7::I64 = _c1C7::I64 + 8;
           goto c1C8;
       c1C9:
           _c1Cb::I64 = _c1C6::I64 + 24;
           _c1Cc::I64 = _c1Cb::I64 + 128;
           _c1Cd::I64 = 0;
           _c1Ce::I64 = 0;
           _c1Cf::I64 = 8;
           call MO_Memset(_c1Cc::I64, _c1Cd::I64, _c1Ce::I64, _c1Cf::I64);
           _s1BY::P64 = _c1C6::I64;
           _s1BY::P64 = _s1BY::P64;
           _s1C1::P64 = _s1BY::P64;
           R1 = ()_closure+1;
           call (P64[Sp])(R1) args: 8, res: 0, upd: 8;
     }
 }


 ==================== Post CPS Cmm ====================
 2014-03-08 08:25:31.731772 UTC

 [section "data" {
      a_r1za_closure:
          const a_r1za_info;
  },
  a_r1za_entry() //  []
          { info_tbl: [(c1Cm,
                        label: a_r1za_info
                        rep:HeapRep static { Fun {arity: 1 fun_type:
 ArgSpec 3} })]
            stack_info: arg_space: 8 updfr_space: Just 8
          }
      {offset
        c1Cm:
            goto c1Cq;
        c1Cq:
            Hp = Hp + 152;
            if (Hp > HpLim) goto c1Cs; else goto c1Cr;
        c1Cs:
            HpAlloc = 152;
            goto c1Cp;
        c1Cp:
            R1 = a_r1za_closure;
            call (stg_gc_fun)(R1) args: 8, res: 0, upd: 8;
        c1Cr:
            I64[Hp - 144] = stg_MUT_ARR_PTRS_DIRTY_info;
            I64[Hp - 136] = 16;
            I64[Hp - 128] = 16;
            _c1C6::I64 = Hp - 144;
            _c1C7::I64 = _c1C6::I64 + 24;
            goto c1C8;
        c1C8:
            if (_c1C7::I64 < (_c1C6::I64 + 128)) goto c1Ca; else goto c1C9;
        c1Ca:
            P64[_c1C7::I64] = ()_closure+1;
            _c1C7::I64 = _c1C7::I64 + 8;
            goto c1C8;
        c1C9:
            _c1Cb::I64 = _c1C6::I64 + 24;
            _c1Cc::I64 = _c1Cb::I64 + 128;
            _c1Cd::I64 = 0;
            _c1Ce::I64 = 0;
            _c1Cf::I64 = 8;
            call MO_Memset(_c1Cc::I64, _c1Cd::I64, _c1Ce::I64, _c1Cf::I64);
            _s1BY::P64 = _c1C6::I64;
            _s1BY::P64 = _s1BY::P64;
            _s1C1::P64 = _s1BY::P64;
            R1 = ()_closure+1;
            call (P64[Sp])(R1) args: 8, res: 0, upd: 8;
      }
  }]


 ==================== Output Cmm ====================
 2014-03-08 08:25:31.73228 UTC

 [section "data" {
      a_r1za_closure:
          const a_r1za_info;
  },
  a_r1za_entry() //  []
          { info_tbl: [(c1Cm,
                        label: a_r1za_info
                        rep:HeapRep static { Fun {arity: 1 fun_type:
 ArgSpec 3} })]
            stack_info: arg_space: 8 updfr_space: Just 8
          }
      {offset
        c1Cm:
            goto c1Cq;
        c1Cq:
            Hp = Hp + 152;
            if (Hp > HpLim) goto c1Cs; else goto c1Cr;
        c1Cs:
            HpAlloc = 152;
            goto c1Cp;
        c1Cp:
            R1 = a_r1za_closure;
            call (stg_gc_fun)(R1) args: 8, res: 0, upd: 8;
        c1Cr:
            I64[Hp - 144] = stg_MUT_ARR_PTRS_DIRTY_info;
            I64[Hp - 136] = 16;
            I64[Hp - 128] = 16;
            _c1C6::I64 = Hp - 144;
            _c1C7::I64 = _c1C6::I64 + 24;
            goto c1C8;
        c1C8:
            if (_c1C7::I64 < (_c1C6::I64 + 128)) goto c1Ca; else goto c1C9;
        c1Ca:
            P64[_c1C7::I64] = ()_closure+1;
            _c1C7::I64 = _c1C7::I64 + 8;
            goto c1C8;
        c1C9:
            _c1Cb::I64 = _c1C6::I64 + 24;
            _c1Cc::I64 = _c1Cb::I64 + 128;
            _c1Cd::I64 = 0;
            _c1Ce::I64 = 0;
            _c1Cf::I64 = 8;
            call MO_Memset(_c1Cc::I64, _c1Cd::I64, _c1Ce::I64, _c1Cf::I64);
            _s1BY::P64 = _c1C6::I64;
            _s1BY::P64 = _s1BY::P64;
            _s1C1::P64 = _s1BY::P64;
            R1 = ()_closure+1;
            call (P64[Sp])(R1) args: 8, res: 0, upd: 8;
      }
  }]
 }}}

 .dump-opt-cmm file:
 {{{
 ==================== Optimised Cmm ====================
 2014-03-08 08:25:31.732971 UTC

 a_r1za_entry() //  []
         { [(c1Cm,
             a_r1za_info:
                 const 4294967299;
                 const 0;
                 const 15;)]
         }
     {offset
       c1Cm:
           goto c1Cq;
       c1Cq:
           Hp = Hp + 152;
           if (Hp > I64[BaseReg + 856]) goto c1Cs; else goto c1Cr;
       c1Cs:
           I64[BaseReg + 904] = 152;
           goto c1Cp;
       c1Cp:
           R1 = PicBaseReg + a_r1za_closure;
           call (I64[BaseReg - 8])(R1) args: 8, res: 0, upd: 8;
       c1Cr:
           I64[Hp - 144] = I64[PicBaseReg +
 stg_MUT_ARR_PTRS_DIRTY_info at GOTPCREL];
           I64[Hp - 136] = 16;
           I64[Hp - 128] = 16;
           _c1C6::I64 = Hp - 144;
           _c1C7::I64 = _c1C6::I64 + 24;
           goto c1C8;
       c1C8:
           if (_c1C7::I64 < (_c1C6::I64 + 128)) goto c1Ca; else goto c1C9;
       c1Ca:
           I64[_c1C7::I64] = PicBaseReg + (()_closure+1);
           _c1C7::I64 = _c1C7::I64 + 8;
           goto c1C8;
       c1C9:
           _c1Cb::I64 = _c1C6::I64 + 24;
           _c1Cc::I64 = _c1Cb::I64 + 128;
           _c1Cd::I64 = 0;
           _c1Ce::I64 = 0;
           _c1Cf::I64 = 8;
           call MO_Memset(_c1Cc::I64, _c1Cd::I64, _c1Ce::I64, _c1Cf::I64);
           _s1BY::P64 = _c1C6::I64;
           // nop
           _s1C1::P64 = _s1BY::P64;
           R1 = PicBaseReg + (()_closure+1);
           call (P64[Sp])(R1) args: 8, res: 0, upd: 8;
     }
 }
 }}}

--
Ticket URL: <http://ghc.haskell.org/trac/ghc/ticket/8860>
GHC <http://www.haskell.org/ghc/>
The Glasgow Haskell Compiler


More information about the ghc-tickets mailing list