[commit: ghc] improve-pext-pdep: Efficient pdep implementation (eec2317)
git at git.haskell.org
git at git.haskell.org
Thu Dec 27 16:59:21 UTC 2018
Repository : ssh://git@git.haskell.org/ghc
On branch : improve-pext-pdep
Link : http://ghc.haskell.org/trac/ghc/changeset/eec2317e56333b52388037da9c49fe1af86663cc/ghc
>---------------------------------------------------------------
commit eec2317e56333b52388037da9c49fe1af86663cc
Author: Peter Trommler <ptrommler at acm.org>
Date: Sat Oct 27 22:46:17 2018 +0200
Efficient pdep implementation
>---------------------------------------------------------------
eec2317e56333b52388037da9c49fe1af86663cc
libraries/ghc-prim/cbits/pdep.c | 107 +++++++++++++++++++++++++++++++++-------
1 file changed, 90 insertions(+), 17 deletions(-)
diff --git a/libraries/ghc-prim/cbits/pdep.c b/libraries/ghc-prim/cbits/pdep.c
index 58e8611..2769008 100644
--- a/libraries/ghc-prim/cbits/pdep.c
+++ b/libraries/ghc-prim/cbits/pdep.c
@@ -4,40 +4,113 @@
StgWord64
hs_pdep64(StgWord64 src, StgWord64 mask)
{
- uint64_t result = 0;
+ uint64_t m0, mk, mp, mv, t;
+ uint64_t array[6];
- while (1) {
- // Mask out all but the lowest bit
- const uint64_t lowest = (-mask & mask);
+ m0 = mask;
+ mk = ~mask << 1;
- if (lowest == 0) {
- break;
- }
-
- const uint64_t lsb = (uint64_t)((int64_t)(src << 63) >> 63);
-
- result |= lsb & lowest;
- mask &= ~lowest;
- src >>= 1;
+ for (int i = 0; i < 6 ; i++) {
+ mp = mk ^ (mk << 1);
+ mp = mp ^ (mp << 2);
+ mp = mp ^ (mp << 4);
+ mp = mp ^ (mp << 8);
+ mp = mp ^ (mp << 16);
+ mp = mp ^ (mp << 32);
+ mv = mp & mask;
+ array[i] = mv;
+ mask = (mask ^ mv) | (mv >> (1 << i));
+ mk = mk & ~mp;
}
- return result;
+ for (int i = 5; i >= 0; i--) {
+ mv = array[i];
+ t = src << (1 << i);
+ src = (src & ~ mv) | (t & mv);
+ }
+ return src & m0;
}
StgWord
hs_pdep32(StgWord src, StgWord mask)
{
- return hs_pdep64(src, mask);
+ uint32_t m0, mk, mp, mv, t;
+ uint32_t array[5];
+
+ m0 = mask;
+ mk = ~mask << 1;
+
+ for (int i = 0; i < 5 ; i++) {
+ mp = mk ^ (mk << 1);
+ mp = mp ^ (mp << 2);
+ mp = mp ^ (mp << 4);
+ mp = mp ^ (mp << 8);
+ mp = mp ^ (mp << 16);
+ mv = mp & mask;
+ array[i] = mv;
+ mask = (mask ^ mv) | (mv >> (1 << i));
+ mk = mk & ~mp;
+ }
+
+ for (int i = 4; i >= 0; i--) {
+ mv = array[i];
+ t = src << (1 << i);
+ src = (src & ~ mv) | (t & mv);
+ }
+ return src & m0;
}
StgWord
hs_pdep16(StgWord src, StgWord mask)
{
- return hs_pdep64(src, mask);
+ uint16_t m0, mk, mp, mv, t;
+ uint16_t array[4];
+
+ m0 = mask;
+ mk = ~mask << 1;
+
+ for (int i = 0; i < 4 ; i++) {
+ mp = mk ^ (mk << 1);
+ mp = mp ^ (mp << 2);
+ mp = mp ^ (mp << 4);
+ mp = mp ^ (mp << 8);
+ mv = mp & mask;
+ array[i] = mv;
+ mask = (mask ^ mv) | (mv >> (1 << i));
+ mk = mk & ~mp;
+ }
+
+ for (int i = 3; i >= 0; i--) {
+ mv = array[i];
+ t = src << (1 << i);
+ src = (src & ~ mv) | (t & mv);
+ }
+ return src & m0;
}
StgWord
hs_pdep8(StgWord src, StgWord mask)
{
- return hs_pdep64(src, mask);
+ uint8_t m0, mk, mp, mv, t;
+ uint8_t array[3];
+
+ m0 = mask;
+ mk = ~mask << 1;
+
+ for (int i = 0; i < 3 ; i++) {
+ mp = mk ^ (mk << 1);
+ mp = mp ^ (mp << 2);
+ mp = mp ^ (mp << 4);
+ mv = mp & mask;
+ array[i] = mv;
+ mask = (mask ^ mv) | (mv >> (1 << i));
+ mk = mk & ~mp;
+ }
+
+ for (int i = 2; i >= 0; i--) {
+ mv = array[i];
+ t = src << (1 << i);
+ src = (src & ~ mv) | (t & mv);
+ }
+ return src & m0;
}
More information about the ghc-commits
mailing list