[commit: nofib] master: Add the reverse-complement shootout benchmark (3f5152b)

Johan Tibell johan.tibell at gmail.com
Wed Feb 6 20:02:43 CET 2013


Repository : ssh://darcs.haskell.org//srv/darcs/nofib

On branch  : master

http://hackage.haskell.org/trac/ghc/changeset/3f5152b0ce77947e14e61be456065495120feda8

>---------------------------------------------------------------

commit 3f5152b0ce77947e14e61be456065495120feda8
Author: Johan Tibell <johan.tibell at gmail.com>
Date:   Wed Feb 6 11:02:32 2013 -0800

    Add the reverse-complement shootout benchmark

>---------------------------------------------------------------

 .gitignore                              |   13 ++++
 shootout/reverse-complement/Main.hs     |   89 ++++++++++++++++++++++++++++
 shootout/reverse-complement/Makefile    |   80 +++++++++++++++++++++++++
 shootout/reverse-complement/revcomp-c.c |   96 +++++++++++++++++++++++++++++++
 4 files changed, 278 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore
index c3908ab..8b1066b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -54,8 +54,21 @@ real/veritas/veritas
 shootout/binary-trees/binary-trees
 shootout/fannkuch-redux/fannkuch-redux
 shootout/fasta/fasta
+shootout/fasta/fasta-c
+shootout/fasta/fasta.faststdout
+shootout/fasta/fasta.slowstdout
+shootout/fasta/fasta.stdout
 shootout/n-body/n-body
 shootout/pidigits/pidigits
+shootout/reverse-complement/fasta-c
+shootout/reverse-complement/revcomp-c
+shootout/reverse-complement/revcomp-input250000.txt
+shootout/reverse-complement/revcomp-input2500000.txt
+shootout/reverse-complement/revcomp-input25000000.txt
+shootout/reverse-complement/reverse-complement
+shootout/reverse-complement/reverse-complement.faststdout
+shootout/reverse-complement/reverse-complement.slowstdout
+shootout/reverse-complement/reverse-complement.stdout
 shootout/spectral-norm/spectral-norm
 
 spectral/ansi/ansi
diff --git a/shootout/reverse-complement/Main.hs b/shootout/reverse-complement/Main.hs
new file mode 100644
index 0000000..8c29474
--- /dev/null
+++ b/shootout/reverse-complement/Main.hs
@@ -0,0 +1,89 @@
+{-
+The Computer Language Benchmarks Game
+http://benchmarksgame.alioth.debian.org/
+
+contributed by Louis Wasserman
+-}
+
+import Control.Monad
+import Foreign
+import Data.ByteString.Internal
+import System.IO
+
+data Buf = Buf !Int !Int !(Ptr Word8) 
+
+withBuf run = run . Buf 0 ini =<< mallocBytes ini
+  where ini = 1024
+
+newSize len sz
+  | len <= sz  = sz
+  | otherwise  = newSize len (2 * sz)
+
+{-# INLINE putBuf #-}
+putBuf pS lS (Buf lD szD pD) run
+  | lD' > szD  = do
+    let szD' = newSize lD' szD
+    pD' <- reallocBytes pD szD'
+    copyArray (pD' +* lD) pS lS
+    run (Buf lD' szD' pD')
+  | otherwise  = do
+    copyArray (pD +* lD) pS lS
+    run (Buf lD' szD pD)
+  where lD' = lD + lS
+
+findChar p n c zero one = do
+    q <- memchr p c (fromIntegral (n :: Int))
+    if q == nullPtr then zero else one $! q `minusPtr` p
+
+clearBuf (Buf _ lB pB) = Buf 0 lB pB
+
+main = allocaArray 82 $ \ line ->
+  let go !buf = do
+      !m <- hGetBuf stdin line 82
+      if m == 0 then revcomp buf else do
+        findChar line m (c2w '>') 
+          (putBuf line m buf go)
+          (\ end -> do
+            putBuf line end buf revcomp
+            putBuf (line +* end) (m - end) (clearBuf buf)
+              go)
+    in withBuf go
+
+(+*) = advancePtr
+
+{-# INLINE comps #-}
+comps = Prelude.zipWith (\ a b -> (fromEnum a, c2w b)) "AaCcGgTtUuMmRrYyKkVvHhDdBb" 
+  "TTGGCCAAAAKKYYRRMMBBDDHHVV"
+
+ca :: Ptr Word8
+ca = inlinePerformIO $ do
+       !a <- mallocArray 200
+       mapM_ (\ i -> pokeByteOff a (fromIntegral i) i ) [0..199::Word8]
+       mapM_ (uncurry (pokeByteOff a)) comps
+       return a
+
+revcomp (Buf lBuf _ pBuf) = when (lBuf > 0) $ ca `seq`
+  findChar pBuf lBuf (c2w '\n') undefined $ \ begin -> let
+    begin' = begin + 1
+    rc :: Ptr Word8 -> Ptr Word8 -> IO ()
+    rc !i !j | i < j = do
+      x <- peek i
+      if x == c2w '\n' then let !i' = i +* 1 in rc1 j i' =<< peek i'
+        else rc1 j i x
+    rc i j = when (i == j) (poke i =<< comp =<< peek i)
+    
+    rc1 !j !i !xi = do
+      y <- peek j
+      if y == c2w '\n' then let !j' = j +* (-1) in rc2 i xi j' =<< peek j'
+        else rc2 i xi j y
+    
+    comp = peekElemOff ca . fromIntegral
+    
+    rc2 !i !xi !j !xj = do
+      poke j =<< comp xi
+      poke i =<< comp xj
+      rc (i +* 1) (j +* (-1))
+    in do
+      hPutBuf stdout pBuf begin'
+      rc (pBuf +* begin') (pBuf +* (lBuf - 1))
+      hPutBuf stdout (pBuf +* begin') (lBuf - begin - 1)
diff --git a/shootout/reverse-complement/Makefile b/shootout/reverse-complement/Makefile
new file mode 100644
index 0000000..96d7e87
--- /dev/null
+++ b/shootout/reverse-complement/Makefile
@@ -0,0 +1,80 @@
+TOP = ../..
+include $(TOP)/mk/boilerplate.mk
+
+# Override default SRCS; the default is all source files, but
+# we don't want to include revcomp-c.c
+SRCS = Main.hs
+
+# These values are only used in this file. They are ignored by the
+# executable itself.
+FAST_OPTS = 250000
+NORM_OPTS = 2500000
+SLOW_OPTS = 25000000  # official shootout setting
+
+# The benchmark game also uses -fllvm, which we can't since it might
+# not be available on the developer's machine.
+HC_OPTS += -O2 -XBangPatterns -funfolding-use-threshold=32 -XMagicHash \
+	-XUnboxedTuples
+
+#------------------------------------------------------------------
+# Create input
+
+fasta-c : ../fasta/fasta-c.o
+	gcc $< -o $@
+
+revcomp-input250000.txt : fasta-c
+	./fasta-c $(FAST_OPTS) > $@
+
+revcomp-input2500000.txt : fasta-c
+	./fasta-c $(NORM_OPTS) > $@
+
+revcomp-input25000000.txt : fasta-c
+	./fasta-c $(SLOW_OPTS) > $@
+
+ifeq "$(mode)" "slow"
+ INPUT_FILE = revcomp-input25000000.txt
+else
+ ifeq "$(mode)" "fast"
+  INPUT_FILE = revcomp-input250000.txt
+ else
+  INPUT_FILE = revcomp-input2500000.txt
+ endif
+endif
+
+SRC_RUNTEST_OPTS += -i $(INPUT_FILE)
+
+all boot :: $(INPUT_FILE)
+
+#------------------------------------------------------------------
+# Create output to validate against
+
+revcomp-c : revcomp-c.o
+	gcc $< -o $@
+
+reverse-complement.faststdout : revcomp-c
+	./revcomp-c < $(INPUT_FILE) > $@
+
+reverse-complement.stdout : revcomp-c
+	./revcomp-c < $(INPUT_FILE) > $@
+
+reverse-complement.slowstdout : revcomp-c
+	./revcomp-c < $(INPUT_FILE) > $@
+
+# Since the stdout files are created during the run the runstdtest
+# script doesn't correctly pick them up, so we have to specify them
+# explicitly here.
+ifeq "$(mode)" "slow"
+ STDOUT_FILE = reverse-complement.slowstdout
+else
+ ifeq "$(mode)" "fast"
+  STDOUT_FILE = reverse-complement.faststdout
+ else
+  STDOUT_FILE = reverse-complement.stdout
+ endif
+endif
+
+SRC_RUNTEST_OPTS += -o1 $(STDOUT_FILE) 
+
+all boot :: $(STDOUT_FILE)
+
+include $(TOP)/mk/target.mk
diff --git a/shootout/reverse-complement/revcomp-c.c b/shootout/reverse-complement/revcomp-c.c
new file mode 100644
index 0000000..610c799
--- /dev/null
+++ b/shootout/reverse-complement/revcomp-c.c
@@ -0,0 +1,96 @@
+/* The Computer Language Benchmarks Game
+ * http://benchmarksgame.alioth.debian.org/
+
+   contributed by Mr Ledrug
+*/
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <string.h>
+
+char *pairs = "ATCGGCTAUAMKRYWWSSYRKMVBHDDHBVNN\n\n";
+char tbl[128];
+
+typedef struct work_s work_t;
+struct work_s {
+   pthread_t id;
+   work_t *next;
+   char *begin, *end;
+};
+
+void *process(void *ww) {
+   work_t *w = ww;
+   char *from = w->begin, *to = w->end;
+   while (*from++ != '\n');
+
+   size_t len = to - from;
+   size_t off = 60 - (len % 61);
+
+   if (off) {
+      char *m;
+      for (m = from + 60 - off; m < to; m += 61) {
+         memmove(m + 1, m, off);
+         *m = '\n';
+      }
+   }
+
+   char c;
+   for (to--; from <= to; from++, to--)
+      c = tbl[(int)*from], *from = tbl[(int)*to], *to = c;
+
+   return 0;
+}
+
+int main() {
+   char *s;
+   for (s = pairs; *s; s += 2) {
+      tbl[toupper(s[0])] = s[1];
+      tbl[tolower(s[0])] = s[1];
+   }
+
+
+   size_t buflen = 1024, len, end = 0;
+   char *buf = malloc(1024);
+
+   int in = fileno(stdin);
+   while ((len = read(in, buf + end, buflen - 256 - end))) {
+      end += len;
+      if (end < buflen - 256) break;
+      buf = realloc(buf, buflen *= 2);
+   }
+   buf[end] = '>';
+
+   work_t *work = 0;
+   char *from, *to = buf + end - 1;
+   while (1) {
+      for (from = to; *from != '>'; from--);
+
+      work_t *w = malloc(sizeof(work_t));
+      w->begin = from;
+      w->end = to;
+      w->next = work;
+      work = w;
+
+      pthread_create(&w->id, 0, process, w);
+
+      to = from - 1;
+      if (to < buf) break;
+   }
+
+   while (work) {
+      work_t *w = work;
+      work = work->next;
+      pthread_join(w->id, 0);
+      free(w);
+   }
+
+   write(fileno(stdout), buf, end);
+   free(buf);
+
+   return 0;
+}





More information about the ghc-commits mailing list