[commit: ghc] wip/improve-pext-pdep: Efficient pdep implementation (513aa2b)

git at git.haskell.org git at git.haskell.org
Sun Jan 6 09:26:45 UTC 2019


Repository : ssh://git@git.haskell.org/ghc

On branch  : wip/improve-pext-pdep
Link       : http://ghc.haskell.org/trac/ghc/changeset/513aa2b8f06ea8499392e60408bd0753e018d9ed/ghc

>---------------------------------------------------------------

commit 513aa2b8f06ea8499392e60408bd0753e018d9ed
Author: Peter Trommler <ptrommler at acm.org>
Date:   Sat Oct 27 22:46:17 2018 +0200

    Efficient pdep implementation


>---------------------------------------------------------------

513aa2b8f06ea8499392e60408bd0753e018d9ed
 libraries/ghc-prim/cbits/pdep.c | 107 +++++++++++++++++++++++++++++++++-------
 1 file changed, 90 insertions(+), 17 deletions(-)

diff --git a/libraries/ghc-prim/cbits/pdep.c b/libraries/ghc-prim/cbits/pdep.c
index 58e8611..2769008 100644
--- a/libraries/ghc-prim/cbits/pdep.c
+++ b/libraries/ghc-prim/cbits/pdep.c
@@ -4,40 +4,113 @@
 StgWord64
 hs_pdep64(StgWord64 src, StgWord64 mask)
 {
-  uint64_t result = 0;
+  uint64_t m0, mk, mp, mv, t;
+  uint64_t array[6];
 
-  while (1) {
-    // Mask out all but the lowest bit
-    const uint64_t lowest = (-mask & mask);
+  m0 = mask;
+  mk = ~mask << 1;
 
-    if (lowest == 0) {
-      break;
-    }
-
-    const uint64_t lsb = (uint64_t)((int64_t)(src << 63) >> 63);
-
-    result |= lsb & lowest;
-    mask &= ~lowest;
-    src >>= 1;
+  for (int i = 0; i < 6 ; i++) {
+    mp = mk ^ (mk << 1);
+    mp = mp ^ (mp << 2);
+    mp = mp ^ (mp << 4);
+    mp = mp ^ (mp << 8);
+    mp = mp ^ (mp << 16);
+    mp = mp ^ (mp << 32);
+    mv = mp & mask;
+    array[i] = mv;
+    mask = (mask ^ mv) | (mv >> (1 << i));
+    mk = mk & ~mp;
   }
 
-  return result;
+  for (int i = 5; i >= 0; i--) {
+    mv = array[i];
+    t = src << (1 << i);
+    src = (src & ~ mv) | (t & mv);
+  }
+  return src & m0;
 }
 
 StgWord
 hs_pdep32(StgWord src, StgWord mask)
 {
-  return hs_pdep64(src, mask);
+  uint32_t m0, mk, mp, mv, t;
+  uint32_t array[5];
+
+  m0 = mask;
+  mk = ~mask << 1;
+
+  for (int i = 0; i < 5 ; i++) {
+    mp = mk ^ (mk << 1);
+    mp = mp ^ (mp << 2);
+    mp = mp ^ (mp << 4);
+    mp = mp ^ (mp << 8);
+    mp = mp ^ (mp << 16);
+    mv = mp & mask;
+    array[i] = mv;
+    mask = (mask ^ mv) | (mv >> (1 << i));
+    mk = mk & ~mp;
+  }
+
+  for (int i = 4; i >= 0; i--) {
+    mv = array[i];
+    t = src << (1 << i);
+    src = (src & ~ mv) | (t & mv);
+  }
+  return src & m0;
 }
 
 StgWord
 hs_pdep16(StgWord src, StgWord mask)
 {
-  return hs_pdep64(src, mask);
+  uint16_t m0, mk, mp, mv, t;
+  uint16_t array[4];
+
+  m0 = mask;
+  mk = ~mask << 1;
+
+  for (int i = 0; i < 4 ; i++) {
+    mp = mk ^ (mk << 1);
+    mp = mp ^ (mp << 2);
+    mp = mp ^ (mp << 4);
+    mp = mp ^ (mp << 8);
+    mv = mp & mask;
+    array[i] = mv;
+    mask = (mask ^ mv) | (mv >> (1 << i));
+    mk = mk & ~mp;
+  }
+
+  for (int i = 3; i >= 0; i--) {
+    mv = array[i];
+    t = src << (1 << i);
+    src = (src & ~ mv) | (t & mv);
+  }
+  return src & m0;
 }
 
 StgWord
 hs_pdep8(StgWord src, StgWord mask)
 {
-  return hs_pdep64(src, mask);
+  uint8_t m0, mk, mp, mv, t;
+  uint8_t array[3];
+
+  m0 = mask;
+  mk = ~mask << 1;
+
+  for (int i = 0; i < 3 ; i++) {
+    mp = mk ^ (mk << 1);
+    mp = mp ^ (mp << 2);
+    mp = mp ^ (mp << 4);
+    mv = mp & mask;
+    array[i] = mv;
+    mask = (mask ^ mv) | (mv >> (1 << i));
+    mk = mk & ~mp;
+  }
+
+  for (int i = 2; i >= 0; i--) {
+    mv = array[i];
+    t = src << (1 << i);
+    src = (src & ~ mv) | (t & mv);
+  }
+  return src & m0;
 }



More information about the ghc-commits mailing list