[commit: nofib] master: Add the fasta shootout benchmark (3f1d50e)

Johan Tibell johan.tibell at gmail.com
Wed Feb 6 19:12:11 CET 2013


Repository : ssh://darcs.haskell.org//srv/darcs/nofib

On branch  : master

http://hackage.haskell.org/trac/ghc/changeset/3f1d50e465cf5c575836c4777e4385b58d952b87

>---------------------------------------------------------------

commit 3f1d50e465cf5c575836c4777e4385b58d952b87
Author: Johan Tibell <johan.tibell at gmail.com>
Date:   Wed Feb 6 09:46:52 2013 -0800

    Add the fasta shootout benchmark
    
    The benchmark is not enabled by default as the runstdtest.prl script
    doesn't find the automated stdout files on the first run, thus requiring
    make to be run twice.

>---------------------------------------------------------------

 .gitignore               |    1 +
 shootout/fasta/Main.hs   |   58 +++++++++++++++++++
 shootout/fasta/Makefile  |   38 +++++++++++++
 shootout/fasta/fasta-c.c |  137 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 234 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore
index a778995..c3908ab 100644
--- a/.gitignore
+++ b/.gitignore
@@ -53,6 +53,7 @@ real/veritas/veritas
 
 shootout/binary-trees/binary-trees
 shootout/fannkuch-redux/fannkuch-redux
+shootout/fasta/fasta
 shootout/n-body/n-body
 shootout/pidigits/pidigits
 shootout/spectral-norm/spectral-norm
diff --git a/shootout/fasta/Main.hs b/shootout/fasta/Main.hs
new file mode 100644
index 0000000..4bd0849
--- /dev/null
+++ b/shootout/fasta/Main.hs
@@ -0,0 +1,58 @@
+{-  The Computer Language Benchmarks Game 
+
+    http://benchmarkgame.alioth.debian.org/
+
+    contributed by Bryan O'Sullivan
+-}
+
+import Control.Monad
+import Data.ByteString.Unsafe
+import Foreign.Ptr
+import Foreign.Storable
+import System.Environment
+import qualified Data.ByteString.Char8 as B
+import qualified Data.ByteString.Lazy.Char8 as L
+
+main = do
+    n <- getArgs >>= readIO.head
+    writeAlu ">ONE Homo sapiens alu" (L.take (fromIntegral n*2) (L.cycle alu))
+    make ">TWO IUB ambiguity codes" (n*3) iub 42 >>=
+      void . make ">THREE Homo sapiens frequency" (n*5) homosapiens
+
+writeAlu name s0 = B.putStrLn name >> go s0
+ where go s = L.putStrLn h >> unless (L.null t) (go t)
+         where (h,t) = L.splitAt 60 s
+
+make name n0 tbl seed0 = do
+  B.putStrLn name
+  let modulus = 139968
+      fill ((c,p):cps) j =
+	let !k = min modulus (floor (fromIntegral modulus * (p::Float) + 1))
+	in B.replicate (k - j) c : fill cps k
+      fill _ _ = []
+      lookupTable = B.concat $ fill (scanl1 (\(_,p) (c,q) -> (c,p+q)) tbl) 0
+      line = B.replicate 60 '\0'
+  unsafeUseAsCString line $ \ptr -> do
+    let make' n !i seed
+	    | n > (0::Int) = do
+		let newseed = rem (seed * 3877 + 29573) modulus
+		plusPtr ptr i `poke` unsafeIndex lookupTable newseed
+		if i+1 >= 60
+		    then puts line 60 >> make' (n-1) 0 newseed
+		    else make' (n-1) (i+1) newseed
+	    | otherwise = when (i > 0) (puts line i) >> return seed
+    make' n0 0 seed0
+
+alu = "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGG\
+    \TCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGG\
+    \CGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGC\
+    \GGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"
+
+iub = [('a',0.27),('c',0.12),('g',0.12),('t',0.27),('B',0.02)
+      ,('D',0.02),('H',0.02),('K',0.02),('M',0.02),('N',0.02)
+      ,('R',0.02),('S',0.02),('V',0.02),('W',0.02),('Y',0.02)]
+
+homosapiens = [('a',0.3029549426680),('c',0.1979883004921)
+              ,('g',0.1975473066391),('t',0.3015094502008)]
+
+puts bs n = B.putStrLn (B.take n bs)
diff --git a/shootout/fasta/Makefile b/shootout/fasta/Makefile
new file mode 100644
index 0000000..fe28652
--- /dev/null
+++ b/shootout/fasta/Makefile
@@ -0,0 +1,38 @@
+TOP = ../..
+include $(TOP)/mk/boilerplate.mk
+
+# Override default SRCS; the default is all source files, but
+# we don't want to include fasta-c.c
+SRCS = Main.hs
+
+FAST_OPTS = 250000
+NORM_OPTS = 2500000
+SLOW_OPTS = 25000000  # official shootout setting
+
+# The benchmark game also uses -fllvm, which we can't since it might
+# not be available on the developer's machine.
+HC_OPTS += -O2 -XBangPatterns -XOverloadedStrings -package bytestring
+
+#------------------------------------------------------------------
+# Create output to validate against
+
+# FIXME: You have to run make twice for the runstdtest.prl script to
+# find the various stdout files correctly.
+
+fasta-c : fasta-c.o
+	gcc $< -o $@
+
+fasta.faststdout : fasta-c
+	./fasta-c $(FAST_OPTS) > $@
+
+fasta.stdout : fasta-c
+	./fasta-c $(NORM_OPTS) > $@
+
+fasta.slowstdout : fasta-c
+	./fasta-c $(SLOW_OPTS) > $@
+
+STDOUT_FILES = fasta.faststdout fasta.stdout fasta.slowstdout
+
+all boot :: $(STDOUT_FILES)
+
+include $(TOP)/mk/target.mk
diff --git a/shootout/fasta/fasta-c.c b/shootout/fasta/fasta-c.c
new file mode 100644
index 0000000..5779316
--- /dev/null
+++ b/shootout/fasta/fasta-c.c
@@ -0,0 +1,137 @@
+/* The Computer Language Benchmarks Game
+ * http://benchmarksgame.alioth.debian.org/
+ *
+ *  contributed by Mr Ledrug
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+typedef struct {
+   float p;
+   char c;
+} amino;
+
+amino iub[] = {
+   { 0.27, 'a' }, { 0.12, 'c' }, { 0.12, 'g' },
+   { 0.27, 't' }, { 0.02, 'B' }, { 0.02, 'D' },
+   { 0.02, 'H' }, { 0.02, 'K' }, { 0.02, 'M' },
+   { 0.02, 'N' }, { 0.02, 'R' }, { 0.02, 'S' },
+   { 0.02, 'V' }, { 0.02, 'W' }, { 0.02, 'Y' },
+   { 0, 0 }
+};
+
+amino homosapiens[] = {
+   {0.3029549426680, 'a'},
+   {0.1979883004921, 'c'},
+   {0.1975473066391, 'g'},
+   {0.3015094502008, 't'},
+   {0, 0}
+};
+
+#define RMAX 139968U
+#define RA 3877U
+#define RC 29573U
+#define WIDTH 60
+#define LENGTH(a) (sizeof(a)/sizeof(a[0]))
+
+inline void str_write(char *s) {
+   write(fileno(stdout), s, strlen(s));
+}
+
+void str_repeat(char *s, int outlen) {
+   int len = strlen(s) * (1 + WIDTH);
+   outlen += outlen / WIDTH;
+
+   char *ss = s;
+   char *buf = malloc(len);
+   int pos = 0;
+
+   while (pos < len) {
+      if (!*ss) ss = s;
+      buf[pos++] = *ss++;
+      if (pos >= len) break;
+      if (pos % (WIDTH + 1) == WIDTH)
+         buf[pos++] = '\n';
+   }
+
+   int fd = fileno(stdout);
+   int l = 0;
+   while (outlen > 0) {
+      l = outlen > len ? len : outlen;
+      write(fd, buf, l);
+      outlen -= len;
+   }
+   if (buf[l-1] != '\n') str_write("\n");
+
+   free(buf);
+}
+
+static char *alu =
+   "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG"
+   "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA"
+   "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT"
+   "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA"
+   "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG"
+   "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC"
+   "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA";
+
+inline unsigned int rnd(void) {
+   static unsigned rseed = 42;
+   return rseed = (rseed * RA + RC) % RMAX;
+}
+
+char lookup[RMAX];
+void rand_fasta(amino *s, size_t outlen) {
+   int fd = fileno(stdout);
+   char buf[WIDTH+1];
+
+   int i, j, k;
+   float sum = 0;
+   for (i = j = k = 0; s[i].p && k < RMAX; i++) {
+      if (s[i].p) {
+         sum += s[i].p;
+         k = RMAX * sum + 1;
+      }
+      else
+         k = RMAX;
+      if (k > RMAX) k = RMAX;
+      memset(lookup + j, s[i].c, k - j);
+      j = k;
+   }
+
+   i = 0;
+   buf[WIDTH] = '\n';
+   while (outlen--) {
+      buf[i++] = lookup[rnd()];
+      if (i == WIDTH) {
+         write(fd, buf, WIDTH + 1);
+         i = 0;
+      }
+   }
+   if (i) {
+      buf[i] = '\n';
+      write(fd, buf, i + 1);
+   }
+}
+
+int main(int argc, char **argv) {
+   int n;
+   if (argc < 2 || (n = atoi(argv[1])) <= 0) {
+      printf("usage: %s length\n", argv[0]);
+      return 0;
+   }
+
+   str_write(">ONE Homo sapiens alu\n");
+   str_repeat(alu, n * 2);
+
+   str_write(">TWO IUB ambiguity codes\n");
+   rand_fasta(iub, n * 3);
+
+   str_write(">THREE Homo sapiens frequency\n");
+   rand_fasta(homosapiens, n * 5);
+
+   return 0;
+}





More information about the ghc-commits mailing list