[Haskell-cafe] how to make haskell faster than python at finding primes?

Mon Aug 6 07:30:43 EDT 2007

Paulo Tanimoto wrote:
>> The challenge was the implement the modcount algorithm not to calculate
>> primes per se.
>> (see e.g. http://jjinux.blogspot.com/2005/11/io-comparison.html).
> 
> Can you show us the Python code?

Note this is python for the naive, accumulate and do modulus version. 
Not for modcount.  See below for ocaml version of modcount.

Having slept a few hours, I still think the modcount version should be 
faster than the naive version because you don't have to recalculate a 
full modulues operator for each new value.  You just increment and check 
equality.  So would love to get short fast haskell modcount.

-Alex-

---

#!/usr/bin/env python -OO

"""Find prime numbers.  See usage() for more information.

Author: JJ Behrens
Date: Sun Dec 30 03:36:58 PST 2001
Copyright: (c) JJ Behrens
Description:

Find prime numbers.  See usage() for more information.  The algorithm used
to determine if a given number, n, is prime is to keep a list of prime
numbers, p's, less than n and check if any p is a factor of n.

"""

import sys

"""Output usage information to the user.

mesg -- If this is not NULL, it will be output first as an error message.

"""
def usage(mesg):
   if mesg: sys.stderr.write("Error: %s\n" % mesg)
   sys.stderr.write("Usage: %s NUMBER_OF_PRIMES\n" % sys.argv[0])
   sys.stderr.write("Print out the first NUMBER_OF_PRIMES primes.\n")
   if mesg: sys.exit(1)
   else: sys.exit(0)

"""Output a prime number in a nice manner."""
def printPrime(p): print p

"""Is numCurr prime?

primeRecList -- This is the list of primes less than num_curr.

"""
def isPrime(numCurr, primeRecList):
   for p in primeRecList:
     if not numCurr % p: return 0
   else: return 1

"""Print out the first numPrimes primes.

numPrimes must be positive, of course.

"""
FIRST_PRIME = 2
def findPrimes(numPrimes):
   numCurr = FIRST_PRIME - 1
   primeRecList = []
   while numPrimes > 0:
     numCurr += 1
     if isPrime(numCurr, primeRecList):
         numPrimes -= 1
         printPrime(numCurr)
         primeRecList.append(numCurr)

if len(sys.argv) != 2: usage("missing NUMBER_OF_PRIMES")
try:
   numPrimes = int(sys.argv[1])
   if numPrimes < 1: raise ValueError
except ValueError: usage("NUMBER_OF_PRIMES must be a positive integer")
findPrimes(numPrimes)

------------
(* Author: JJ Behrens
    Date: Sun Nov  4 02:42:42 PST 2001
    Copyright: (c) JJ Behrens
    Description:

    Find prime numbers.  See usage() for more information.  The algorithm
    used to determine if a given number, n, is prime is to keep a list of
    tuples (p, mc) where each p is a prime less than n and each mc is
    n % p.  If n is prime, then no mc is 0.  The effeciency of this
    algorithm is wholly determined by how efficiently one can maintain this
    list.  mc does not need to be recalculated using a full % operation
    when moving from n to n + 1 (increment and then reset to 0 if mc = p).
    Furthermore, another performance enhancement is to use lazy evaluation
    of mc (i.e. collect multiple increments into one addition and one
    modulo--this avoids a traversal of the entire list for values of n that
    are easy to factor).  As far as I know, I'm the inventor of this
    algorithm. *)

(* We'll contain a list of [prime_rec]'s that replace the simple list of
    primes that are used in simple algorithms.

    [prime] This is the prime, as before.

    [count] Given [n], [count] = [n] % [prime].

    [updated] One way to keep [count] up to date is to update it for each
      new [n].  However, this would traversing the entire list of
      [prime_rec]'s for each new value of [n].  Hence, we'll only update
      [count] each time that [prime] is considered as a possible factor
      of [n].  When we do update [count], we'll set [updated] to [n].
      E.g., if [count] has not been updated since [n1] and [n] is now [n2],
      then [updated] will be [n1].  If [prime] is now considered as a
      factor of [n2], then we'll set [updated] to [n2] and [count] to
      [count] + [n2] - [n1] % [prime].  If [count] is now 0, [prime] is
      indeed a factor of [n2].
*)
type prime_rec =
   { prime : int;
     mutable count: int;
     mutable updated: int }

(* Output usage information to the user.  If [mesg] is provided, it will
    be output first as an error message. *)
let usage ?(mesg = "") () =
   if not (mesg = "") then Printf.fprintf stderr "Error: %s\n" mesg;
   Printf.fprintf stderr "Usage: %s NUMBER_OF_PRIMES\n" Sys.argv.(0);
   prerr_string "Print out the first NUMBER_OF_PRIMES primes.\n";
   if mesg = "" then exit 0 else exit 1

(* Output a prime number in a nice manner. *)
let print_prime p =
   Printf.printf "%d\n" p

(* Find [numerator] % [divisor] quickly by assuming that [numerator] will
    usually be less than [opt_tries] * [divisor].  Just leave [opt_tries]
    to its default value unless you plan on doing some tuning. *)
let rec fast_mod ?(opt_tries = 2) numerator divisor =
   match opt_tries with
     0 -> numerator mod divisor
   | _ -> begin
     if numerator < divisor then numerator
     else fast_mod ~opt_tries:(opt_tries - 1) (numerator - divisor) divisor
   end

(* Loop over the [prime_rec_list] and look for a factor of [num_curr].  Do
    updates to the [prime_rec]'s as described in the definition of
    [prime_rec].  If we find a factor, immediately return false.  Otherwise,
    continue until we prove that no prime in [prime_rec_list] is a factor
    of [num_curr], at which time we can return true. *)
let rec is_prime num_curr prime_rec_list =
   match prime_rec_list with
     [] -> true
   | head :: tail -> begin
     let overflowed = head.count + num_curr - head.updated in
     head.count <- (fast_mod overflowed head.prime);
     head.updated <- num_curr;
     if head.count = 0 then false
     else is_prime num_curr tail
   end

(* Print out the first [num_primes] primes.  [num_primes] must be positive.
    Leave everything else to its default value. *)
let rec find_primes ?(num_curr = 2) ?(prime_rec_list = []) num_primes =
   match num_primes with
     0 -> ()
   | _ -> begin
     if is_prime num_curr prime_rec_list then begin
       print_prime num_curr;
       let to_append = { prime = num_curr;
                         count = 0;
                         updated = num_curr } in
       find_primes ~num_curr:(num_curr + 1)
                   (* Smallest numbers first for performance. *)
                   ~prime_rec_list:(List.append prime_rec_list [to_append])
                   (num_primes - 1)
     end else find_primes ~num_curr:(num_curr + 1)
                          ~prime_rec_list:prime_rec_list
                          num_primes
   end

let main () =
   try
     if (Array.length Sys.argv) != 2
     then usage ~mesg:"missing NUMBER_OF_PRIMES" ();
     let num_primes = int_of_string Sys.argv.(1) in
     if num_primes < 1 then usage ~mesg:"NUMBER_OF_PRIMES must be 
positive" ();
     find_primes num_primes;
     exit 0
   with Failure why -> usage ~mesg:"NUMBER_OF_PRIMES must be an integer" 
();;

main ()