Commit 134e99ba authored by Adam P. Goucher's avatar Adam P. Goucher

More correct

parent 02cb35a9
Pipeline #49156681 passed with stages
in 7 minutes and 37 seconds
......@@ -25,7 +25,7 @@
constexpr static uint32_t middle = ((1u << (32 - K)) - (1u << K)); // e.g. 0x3ffffffcu
constexpr static uint32_t left = - (1u << K) ; // e.g. 0xfffffffcu
for (int i = K; i < H + K; i++) {
d[i] = ((n_d[i] & middle) >> 28) | (d[i] & left);
d[i] = ((n_d[i] & middle) >> (32 - 2*K)) | (d[i] & left);
}
}
......@@ -34,49 +34,49 @@
constexpr static uint32_t middle = ((1u << (32 - K)) - (1u << K)); // e.g. 0x3ffffffcu
constexpr static uint32_t right = ((1u << (32 - K)) - 1u); // e.g. 0x3fffffffu
for (int i = K; i < H + K; i++) {
d[i] = ((n_d[i] & middle) << 28) | (d[i] & right);
d[i] = ((n_d[i] & middle) << (32 - 2*K)) | (d[i] & right);
}
}
template<int H, int K>
inline void _copyBoundary12(uint32_t * __restrict__ d, uint32_t * __restrict__ n1_d, uint32_t * __restrict__ n2_d) {
for (int i = 0; i < K; i++) {
d[i] = ((n1_d[H+i] << 14) & 0xffff0000u) | ((n2_d[H+i] >> 14) & 0x0000ffffu);
d[i] = ((n1_d[H+i] << (16 - K)) & 0xffff0000u) | ((n2_d[H+i] >> (16 - K)) & 0x0000ffffu);
}
}
template<int H, int K>
inline void _copyBoundary1(uint32_t * __restrict__ d, uint32_t * __restrict__ n_d) {
for (int i = 0; i < K; i++) {
d[i] = ((n_d[H+i] << 14) & 0xffff0000u) | (d[i] & 0x0000ffffu);
d[i] = ((n_d[H+i] << (16 - K)) & 0xffff0000u) | (d[i] & 0x0000ffffu);
}
}
template<int H, int K>
inline void _copyBoundary2(uint32_t * __restrict__ d, uint32_t * __restrict__ n_d) {
for (int i = 0; i < K; i++) {
d[i] = ((n_d[H+i] >> 14) & 0x0000ffffu) | (d[i] & 0xffff0000u);
d[i] = ((n_d[H+i] >> (16 - K)) & 0x0000ffffu) | (d[i] & 0xffff0000u);
}
}
template<int H, int K>
inline void _copyBoundary45(uint32_t * __restrict__ d, uint32_t * __restrict__ n4_d, uint32_t * __restrict__ n5_d) {
for (int i = K; i < 2*K; i++) {
d[H+i] = ((n5_d[i] << 14) & 0xffff0000u) | ((n4_d[i] >> 14) & 0x0000ffffu);
d[H+i] = ((n5_d[i] << (16 - K)) & 0xffff0000u) | ((n4_d[i] >> (16 - K)) & 0x0000ffffu);
}
}
template<int H, int K>
inline void _copyBoundary4(uint32_t * __restrict__ d, uint32_t * __restrict__ n_d) {
for (int i = K; i < 2*K; i++) {
d[H+i] = ((n_d[i] >> 14) & 0x0000ffffu) | (d[H+i] & 0xffff0000u);
d[H+i] = ((n_d[i] >> (16 - K)) & 0x0000ffffu) | (d[H+i] & 0xffff0000u);
}
}
template<int H, int K>
inline void _copyBoundary5(uint32_t * __restrict__ d, uint32_t * __restrict__ n_d) {
for (int i = K; i < 2*K; i++) {
d[H+i] = ((n_d[i] << 14) & 0xffff0000u) | (d[H+i] & 0x0000ffffu);
d[H+i] = ((n_d[i] << (16 - K)) & 0xffff0000u) | (d[H+i] & 0x0000ffffu);
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment