[Haskell-cafe] how to make haskell faster than python at finding
primes?
Alex Jacobson
alex at alexjacobson.com
Mon Aug 6 07:30:43 EDT 2007
Paulo Tanimoto wrote:
>> The challenge was the implement the modcount algorithm not to calculate
>> primes per se.
>> (see e.g. http://jjinux.blogspot.com/2005/11/io-comparison.html).
>
> Can you show us the Python code?
Note this is python for the naive, accumulate and do modulus version.
Not for modcount. See below for ocaml version of modcount.
Having slept a few hours, I still think the modcount version should be
faster than the naive version because you don't have to recalculate a
full modulues operator for each new value. You just increment and check
equality. So would love to get short fast haskell modcount.
-Alex-
---
#!/usr/bin/env python -OO
"""Find prime numbers. See usage() for more information.
Author: JJ Behrens
Date: Sun Dec 30 03:36:58 PST 2001
Copyright: (c) JJ Behrens
Description:
Find prime numbers. See usage() for more information. The algorithm used
to determine if a given number, n, is prime is to keep a list of prime
numbers, p's, less than n and check if any p is a factor of n.
"""
import sys
"""Output usage information to the user.
mesg -- If this is not NULL, it will be output first as an error message.
"""
def usage(mesg):
if mesg: sys.stderr.write("Error: %s\n" % mesg)
sys.stderr.write("Usage: %s NUMBER_OF_PRIMES\n" % sys.argv[0])
sys.stderr.write("Print out the first NUMBER_OF_PRIMES primes.\n")
if mesg: sys.exit(1)
else: sys.exit(0)
"""Output a prime number in a nice manner."""
def printPrime(p): print p
"""Is numCurr prime?
primeRecList -- This is the list of primes less than num_curr.
"""
def isPrime(numCurr, primeRecList):
for p in primeRecList:
if not numCurr % p: return 0
else: return 1
"""Print out the first numPrimes primes.
numPrimes must be positive, of course.
"""
FIRST_PRIME = 2
def findPrimes(numPrimes):
numCurr = FIRST_PRIME - 1
primeRecList = []
while numPrimes > 0:
numCurr += 1
if isPrime(numCurr, primeRecList):
numPrimes -= 1
printPrime(numCurr)
primeRecList.append(numCurr)
if len(sys.argv) != 2: usage("missing NUMBER_OF_PRIMES")
try:
numPrimes = int(sys.argv[1])
if numPrimes < 1: raise ValueError
except ValueError: usage("NUMBER_OF_PRIMES must be a positive integer")
findPrimes(numPrimes)
------------
(* Author: JJ Behrens
Date: Sun Nov 4 02:42:42 PST 2001
Copyright: (c) JJ Behrens
Description:
Find prime numbers. See usage() for more information. The algorithm
used to determine if a given number, n, is prime is to keep a list of
tuples (p, mc) where each p is a prime less than n and each mc is
n % p. If n is prime, then no mc is 0. The effeciency of this
algorithm is wholly determined by how efficiently one can maintain this
list. mc does not need to be recalculated using a full % operation
when moving from n to n + 1 (increment and then reset to 0 if mc = p).
Furthermore, another performance enhancement is to use lazy evaluation
of mc (i.e. collect multiple increments into one addition and one
modulo--this avoids a traversal of the entire list for values of n that
are easy to factor). As far as I know, I'm the inventor of this
algorithm. *)
(* We'll contain a list of [prime_rec]'s that replace the simple list of
primes that are used in simple algorithms.
[prime] This is the prime, as before.
[count] Given [n], [count] = [n] % [prime].
[updated] One way to keep [count] up to date is to update it for each
new [n]. However, this would traversing the entire list of
[prime_rec]'s for each new value of [n]. Hence, we'll only update
[count] each time that [prime] is considered as a possible factor
of [n]. When we do update [count], we'll set [updated] to [n].
E.g., if [count] has not been updated since [n1] and [n] is now [n2],
then [updated] will be [n1]. If [prime] is now considered as a
factor of [n2], then we'll set [updated] to [n2] and [count] to
[count] + [n2] - [n1] % [prime]. If [count] is now 0, [prime] is
indeed a factor of [n2].
*)
type prime_rec =
{ prime : int;
mutable count: int;
mutable updated: int }
(* Output usage information to the user. If [mesg] is provided, it will
be output first as an error message. *)
let usage ?(mesg = "") () =
if not (mesg = "") then Printf.fprintf stderr "Error: %s\n" mesg;
Printf.fprintf stderr "Usage: %s NUMBER_OF_PRIMES\n" Sys.argv.(0);
prerr_string "Print out the first NUMBER_OF_PRIMES primes.\n";
if mesg = "" then exit 0 else exit 1
(* Output a prime number in a nice manner. *)
let print_prime p =
Printf.printf "%d\n" p
(* Find [numerator] % [divisor] quickly by assuming that [numerator] will
usually be less than [opt_tries] * [divisor]. Just leave [opt_tries]
to its default value unless you plan on doing some tuning. *)
let rec fast_mod ?(opt_tries = 2) numerator divisor =
match opt_tries with
0 -> numerator mod divisor
| _ -> begin
if numerator < divisor then numerator
else fast_mod ~opt_tries:(opt_tries - 1) (numerator - divisor) divisor
end
(* Loop over the [prime_rec_list] and look for a factor of [num_curr]. Do
updates to the [prime_rec]'s as described in the definition of
[prime_rec]. If we find a factor, immediately return false. Otherwise,
continue until we prove that no prime in [prime_rec_list] is a factor
of [num_curr], at which time we can return true. *)
let rec is_prime num_curr prime_rec_list =
match prime_rec_list with
[] -> true
| head :: tail -> begin
let overflowed = head.count + num_curr - head.updated in
head.count <- (fast_mod overflowed head.prime);
head.updated <- num_curr;
if head.count = 0 then false
else is_prime num_curr tail
end
(* Print out the first [num_primes] primes. [num_primes] must be positive.
Leave everything else to its default value. *)
let rec find_primes ?(num_curr = 2) ?(prime_rec_list = []) num_primes =
match num_primes with
0 -> ()
| _ -> begin
if is_prime num_curr prime_rec_list then begin
print_prime num_curr;
let to_append = { prime = num_curr;
count = 0;
updated = num_curr } in
find_primes ~num_curr:(num_curr + 1)
(* Smallest numbers first for performance. *)
~prime_rec_list:(List.append prime_rec_list [to_append])
(num_primes - 1)
end else find_primes ~num_curr:(num_curr + 1)
~prime_rec_list:prime_rec_list
num_primes
end
let main () =
try
if (Array.length Sys.argv) != 2
then usage ~mesg:"missing NUMBER_OF_PRIMES" ();
let num_primes = int_of_string Sys.argv.(1) in
if num_primes < 1 then usage ~mesg:"NUMBER_OF_PRIMES must be
positive" ();
find_primes num_primes;
exit 0
with Failure why -> usage ~mesg:"NUMBER_OF_PRIMES must be an integer"
();;
main ()
More information about the Haskell-Cafe
mailing list