[Haskell-cafe] C-like Haskell

John Meacham john at repetae.net
Wed Feb 18 00:01:44 EST 2009


On Wed, Jan 28, 2009 at 04:42:49PM -0800, drblanco wrote:
> Here's my attempt, which takes about 75s for r=10^8.
> 
> circ2 r = (1+4*r) + 4 * (circ2' (rs+1) r 1 0)
>           where
>             rs = r^2
>             circ2' rad x y sum
>                 | x<y =	sum
>                 | rad<=rs = circ2' (rad+1+2*y) x (y+1) (sum+1+2*(x-y))
>                 | otherwise = circ2' (rad+1-2*x) (x-1) y sum
> 
> For comparison, the C code below runs in <1 second.
> 
> typedef unsigned long long bigint;
> 
> bigint gausscount(bigint r) {
>         bigint sum=0;
>         bigint x, y;
>         bigint rs=r*r;
>         bigint rad=rs+1;
>         x=r; y=1;
> 
>         while (y<x) {
>                 while (rad>rs) {
>                         rad-=2*x;
>                         rad++;
>                         x--;
>                 }
>                 sum+=1+2*(x-y);
>                 rad+=2*y+1;
>                 y++;
> 
>         }
>         sum*=4;
>         sum++;
> 
>         return sum;
> }


for the curious, here is the C output of jhc when fed circ2
annotated with the obvious bang-patterns (the strictness analyzer still
needs some help):

other than giving every intermediate value a name, and only performing
one operation a line (which doesn't bother gcc at all), it is pretty
darn close to the hand optimized C code...

variable guide: 
x -> v86 
y -> v88
rs -> v4146
rad -> v84
and sum is accumulated in v90


> static uint64_t
> circ2(uint64_t v12)
> {
>         uint64_t v212;
>         uint64_t v4146 = (v12 * v12);
>         uint64_t v4758 = (4 * v12);
>         uint64_t v4786 = (1 + v4758);
>         uint64_t v4160 = (1 + v4146);
>         uint64_t v84 = v4160;
>         uint64_t v86 = v12;
>         uint64_t v88 = 1;
>         uint64_t v90 = 0;
>         fW_a__fMain__3_ucirc2_t_u2:;
>         {   uint16_t v100000 = (((int64_t)v86) < ((int64_t)v88));
>             if (0 == v100000) {
>                 uint16_t v100002 = (((int64_t)v84) <= ((int64_t)v4146));
>                 if (0 == v100002) {
>                     uint64_t v4338 = (1 + v84);
>                     uint64_t v4352 = (2 * v86);
>                     uint64_t v4366 = (v4338 - v4352);
>                     uint64_t v4380 = (v86 - 1);
>                     v84 = v4366;
>                     v86 = v4380;
>                     v88 = v88;
>                     v90 = v90;
>                     goto fW_a__fMain__3_ucirc2_t_u2;
>                 } else {
>                     /* 1 */
>                     uint64_t v4226 = (1 + v90);
>                     uint64_t v4772 = (v86 - v88);
>                     uint64_t v4282 = (2 * v4772);
>                     uint64_t v4296 = (v4226 + v4282);
>                     uint64_t v4310 = (1 + v88);
>                     uint64_t v4240 = (1 + v84);
>                     uint64_t v4254 = (2 * v88);
>                     uint64_t v4324 = (v4240 + v4254);
>                     v84 = v4324;
>                     v86 = v86;
>                     v88 = v4310;
>                     v90 = v4296;
>                     goto fW_a__fMain__3_ucirc2_t_u2;
>                 }
>             } else {
>                 /* 1 */
>                 v212 = v90;
>             }
>         }
>         uint64_t v4174 = (4 * v212);
>         return v4786 + v4174;
> }


        John

-- 
John Meacham - ⑆repetae.net⑆john⑈


More information about the Haskell-Cafe mailing list