[commit: nofib] master: Add the reverse-complement shootout benchmark (3f5152b)
Johan Tibell
johan.tibell at gmail.com
Wed Feb 6 20:02:43 CET 2013
Repository : ssh://darcs.haskell.org//srv/darcs/nofib
On branch : master
http://hackage.haskell.org/trac/ghc/changeset/3f5152b0ce77947e14e61be456065495120feda8
>---------------------------------------------------------------
commit 3f5152b0ce77947e14e61be456065495120feda8
Author: Johan Tibell <johan.tibell at gmail.com>
Date: Wed Feb 6 11:02:32 2013 -0800
Add the reverse-complement shootout benchmark
>---------------------------------------------------------------
.gitignore | 13 ++++
shootout/reverse-complement/Main.hs | 89 ++++++++++++++++++++++++++++
shootout/reverse-complement/Makefile | 80 +++++++++++++++++++++++++
shootout/reverse-complement/revcomp-c.c | 96 +++++++++++++++++++++++++++++++
4 files changed, 278 insertions(+), 0 deletions(-)
diff --git a/.gitignore b/.gitignore
index c3908ab..8b1066b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -54,8 +54,21 @@ real/veritas/veritas
shootout/binary-trees/binary-trees
shootout/fannkuch-redux/fannkuch-redux
shootout/fasta/fasta
+shootout/fasta/fasta-c
+shootout/fasta/fasta.faststdout
+shootout/fasta/fasta.slowstdout
+shootout/fasta/fasta.stdout
shootout/n-body/n-body
shootout/pidigits/pidigits
+shootout/reverse-complement/fasta-c
+shootout/reverse-complement/revcomp-c
+shootout/reverse-complement/revcomp-input250000.txt
+shootout/reverse-complement/revcomp-input2500000.txt
+shootout/reverse-complement/revcomp-input25000000.txt
+shootout/reverse-complement/reverse-complement
+shootout/reverse-complement/reverse-complement.faststdout
+shootout/reverse-complement/reverse-complement.slowstdout
+shootout/reverse-complement/reverse-complement.stdout
shootout/spectral-norm/spectral-norm
spectral/ansi/ansi
diff --git a/shootout/reverse-complement/Main.hs b/shootout/reverse-complement/Main.hs
new file mode 100644
index 0000000..8c29474
--- /dev/null
+++ b/shootout/reverse-complement/Main.hs
@@ -0,0 +1,89 @@
+{-
+The Computer Language Benchmarks Game
+http://benchmarksgame.alioth.debian.org/
+
+contributed by Louis Wasserman
+-}
+
+import Control.Monad
+import Foreign
+import Data.ByteString.Internal
+import System.IO
+
+data Buf = Buf !Int !Int !(Ptr Word8)
+
+withBuf run = run . Buf 0 ini =<< mallocBytes ini
+ where ini = 1024
+
+newSize len sz
+ | len <= sz = sz
+ | otherwise = newSize len (2 * sz)
+
+{-# INLINE putBuf #-}
+putBuf pS lS (Buf lD szD pD) run
+ | lD' > szD = do
+ let szD' = newSize lD' szD
+ pD' <- reallocBytes pD szD'
+ copyArray (pD' +* lD) pS lS
+ run (Buf lD' szD' pD')
+ | otherwise = do
+ copyArray (pD +* lD) pS lS
+ run (Buf lD' szD pD)
+ where lD' = lD + lS
+
+findChar p n c zero one = do
+ q <- memchr p c (fromIntegral (n :: Int))
+ if q == nullPtr then zero else one $! q `minusPtr` p
+
+clearBuf (Buf _ lB pB) = Buf 0 lB pB
+
+main = allocaArray 82 $ \ line ->
+ let go !buf = do
+ !m <- hGetBuf stdin line 82
+ if m == 0 then revcomp buf else do
+ findChar line m (c2w '>')
+ (putBuf line m buf go)
+ (\ end -> do
+ putBuf line end buf revcomp
+ putBuf (line +* end) (m - end) (clearBuf buf)
+ go)
+ in withBuf go
+
+(+*) = advancePtr
+
+{-# INLINE comps #-}
+comps = Prelude.zipWith (\ a b -> (fromEnum a, c2w b)) "AaCcGgTtUuMmRrYyKkVvHhDdBb"
+ "TTGGCCAAAAKKYYRRMMBBDDHHVV"
+
+ca :: Ptr Word8
+ca = inlinePerformIO $ do
+ !a <- mallocArray 200
+ mapM_ (\ i -> pokeByteOff a (fromIntegral i) i ) [0..199::Word8]
+ mapM_ (uncurry (pokeByteOff a)) comps
+ return a
+
+revcomp (Buf lBuf _ pBuf) = when (lBuf > 0) $ ca `seq`
+ findChar pBuf lBuf (c2w '\n') undefined $ \ begin -> let
+ begin' = begin + 1
+ rc :: Ptr Word8 -> Ptr Word8 -> IO ()
+ rc !i !j | i < j = do
+ x <- peek i
+ if x == c2w '\n' then let !i' = i +* 1 in rc1 j i' =<< peek i'
+ else rc1 j i x
+ rc i j = when (i == j) (poke i =<< comp =<< peek i)
+
+ rc1 !j !i !xi = do
+ y <- peek j
+ if y == c2w '\n' then let !j' = j +* (-1) in rc2 i xi j' =<< peek j'
+ else rc2 i xi j y
+
+ comp = peekElemOff ca . fromIntegral
+
+ rc2 !i !xi !j !xj = do
+ poke j =<< comp xi
+ poke i =<< comp xj
+ rc (i +* 1) (j +* (-1))
+ in do
+ hPutBuf stdout pBuf begin'
+ rc (pBuf +* begin') (pBuf +* (lBuf - 1))
+ hPutBuf stdout (pBuf +* begin') (lBuf - begin - 1)
diff --git a/shootout/reverse-complement/Makefile b/shootout/reverse-complement/Makefile
new file mode 100644
index 0000000..96d7e87
--- /dev/null
+++ b/shootout/reverse-complement/Makefile
@@ -0,0 +1,80 @@
+TOP = ../..
+include $(TOP)/mk/boilerplate.mk
+
+# Override default SRCS; the default is all source files, but
+# we don't want to include revcomp-c.c
+SRCS = Main.hs
+
+# These values are only used in this file. They are ignored by the
+# executable itself.
+FAST_OPTS = 250000
+NORM_OPTS = 2500000
+SLOW_OPTS = 25000000 # official shootout setting
+
+# The benchmark game also uses -fllvm, which we can't since it might
+# not be available on the developer's machine.
+HC_OPTS += -O2 -XBangPatterns -funfolding-use-threshold=32 -XMagicHash \
+ -XUnboxedTuples
+
+#------------------------------------------------------------------
+# Create input
+
+fasta-c : ../fasta/fasta-c.o
+ gcc $< -o $@
+
+revcomp-input250000.txt : fasta-c
+ ./fasta-c $(FAST_OPTS) > $@
+
+revcomp-input2500000.txt : fasta-c
+ ./fasta-c $(NORM_OPTS) > $@
+
+revcomp-input25000000.txt : fasta-c
+ ./fasta-c $(SLOW_OPTS) > $@
+
+ifeq "$(mode)" "slow"
+ INPUT_FILE = revcomp-input25000000.txt
+else
+ ifeq "$(mode)" "fast"
+ INPUT_FILE = revcomp-input250000.txt
+ else
+ INPUT_FILE = revcomp-input2500000.txt
+ endif
+endif
+
+SRC_RUNTEST_OPTS += -i $(INPUT_FILE)
+
+all boot :: $(INPUT_FILE)
+
+#------------------------------------------------------------------
+# Create output to validate against
+
+revcomp-c : revcomp-c.o
+ gcc $< -o $@
+
+reverse-complement.faststdout : revcomp-c
+ ./revcomp-c < $(INPUT_FILE) > $@
+
+reverse-complement.stdout : revcomp-c
+ ./revcomp-c < $(INPUT_FILE) > $@
+
+reverse-complement.slowstdout : revcomp-c
+ ./revcomp-c < $(INPUT_FILE) > $@
+
+# Since the stdout files are created during the run the runstdtest
+# script doesn't correctly pick them up, so we have to specify them
+# explicitly here.
+ifeq "$(mode)" "slow"
+ STDOUT_FILE = reverse-complement.slowstdout
+else
+ ifeq "$(mode)" "fast"
+ STDOUT_FILE = reverse-complement.faststdout
+ else
+ STDOUT_FILE = reverse-complement.stdout
+ endif
+endif
+
+SRC_RUNTEST_OPTS += -o1 $(STDOUT_FILE)
+
+all boot :: $(STDOUT_FILE)
+
+include $(TOP)/mk/target.mk
diff --git a/shootout/reverse-complement/revcomp-c.c b/shootout/reverse-complement/revcomp-c.c
new file mode 100644
index 0000000..610c799
--- /dev/null
+++ b/shootout/reverse-complement/revcomp-c.c
@@ -0,0 +1,96 @@
+/* The Computer Language Benchmarks Game
+ * http://benchmarksgame.alioth.debian.org/
+
+ contributed by Mr Ledrug
+*/
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <string.h>
+
+char *pairs = "ATCGGCTAUAMKRYWWSSYRKMVBHDDHBVNN\n\n";
+char tbl[128];
+
+typedef struct work_s work_t;
+struct work_s {
+ pthread_t id;
+ work_t *next;
+ char *begin, *end;
+};
+
+void *process(void *ww) {
+ work_t *w = ww;
+ char *from = w->begin, *to = w->end;
+ while (*from++ != '\n');
+
+ size_t len = to - from;
+ size_t off = 60 - (len % 61);
+
+ if (off) {
+ char *m;
+ for (m = from + 60 - off; m < to; m += 61) {
+ memmove(m + 1, m, off);
+ *m = '\n';
+ }
+ }
+
+ char c;
+ for (to--; from <= to; from++, to--)
+ c = tbl[(int)*from], *from = tbl[(int)*to], *to = c;
+
+ return 0;
+}
+
+int main() {
+ char *s;
+ for (s = pairs; *s; s += 2) {
+ tbl[toupper(s[0])] = s[1];
+ tbl[tolower(s[0])] = s[1];
+ }
+
+
+ size_t buflen = 1024, len, end = 0;
+ char *buf = malloc(1024);
+
+ int in = fileno(stdin);
+ while ((len = read(in, buf + end, buflen - 256 - end))) {
+ end += len;
+ if (end < buflen - 256) break;
+ buf = realloc(buf, buflen *= 2);
+ }
+ buf[end] = '>';
+
+ work_t *work = 0;
+ char *from, *to = buf + end - 1;
+ while (1) {
+ for (from = to; *from != '>'; from--);
+
+ work_t *w = malloc(sizeof(work_t));
+ w->begin = from;
+ w->end = to;
+ w->next = work;
+ work = w;
+
+ pthread_create(&w->id, 0, process, w);
+
+ to = from - 1;
+ if (to < buf) break;
+ }
+
+ while (work) {
+ work_t *w = work;
+ work = work->next;
+ pthread_join(w->id, 0);
+ free(w);
+ }
+
+ write(fileno(stdout), buf, end);
+ free(buf);
+
+ return 0;
+}
More information about the ghc-commits
mailing list