Commit cde068d4 authored by Adam P. Goucher's avatar Adam P. Goucher

Considerable refactoring for cleanliness and efficiency

parent c66abc0b
......@@ -7,31 +7,43 @@
namespace apg {
uint64_t determine_direction(uint64_t * inleaf) {
uint64_t centres[10];
iterate_var_leaf(-4, inleaf, centres);
uint64_t z64_centre_to_u64(uint64_t * inleaf, int x, int y) {
/*
* Provided this is inlined and x, y are compile-time constants,
* this should just involve 6 shifts, 3 ORs, and 2 ANDs:
*/
int xs = 4 + x;
int ys = (4 + y) << 3;
uint64_t bitmask = (0x0101010101010101ull << xs) - 0x0101010101010101ull;
uint64_t left = (inleaf[0] >> ys) | (inleaf[2] << (64 - ys));
uint64_t right = (inleaf[1] >> ys) | (inleaf[3] << (64 - ys));
uint64_t result = ((right & bitmask) << (8 - xs)) | ((left & (~bitmask)) >> xs);
return result;
}
uint64_t determine_direction(int rule, uint64_t * inleaf) {
uint64_t centre;
iterate_var_leaf(rule, -4, inleaf, &centre);
uint64_t dmap = 0;
for (uint64_t i = 0; i < 8; i++) {
dmap |= (((centres[0] == centres[i+2]) ? 1 : 0) << i);
}
dmap |= ((centre == z64_centre_to_u64(inleaf, -1, -1)) ? 1 : 0); // SE
dmap |= ((centre == z64_centre_to_u64(inleaf, 0, -2)) ? 2 : 0); // S
dmap |= ((centre == z64_centre_to_u64(inleaf, 1, -1)) ? 4 : 0); // SW
dmap |= ((centre == z64_centre_to_u64(inleaf, 2, 0)) ? 8 : 0); // W
dmap |= ((centre == z64_centre_to_u64(inleaf, 1, 1)) ? 16 : 0); // NW
dmap |= ((centre == z64_centre_to_u64(inleaf, 0, 2)) ? 32 : 0); // N
dmap |= ((centre == z64_centre_to_u64(inleaf, -1, 1)) ? 64 : 0); // NE
dmap |= ((centre == z64_centre_to_u64(inleaf, -2, 0)) ? 128 : 0); // E
uint64_t lmask = 0;
if (centres[1]) {
if (centre) {
if (dmap & 170) {
lmask |= 1;
}
if (dmap & 17) {
if (centres[1] & 0xff7f3f1f0f070301ull) { lmask |= 1; }
if (centres[1] & 0x80c0e0f0f8fcfeffull) { lmask |= 2; }
lmask |= 3;
}
if (dmap & 68) {
if (centres[1] & 0x0103070f1f3f7fffull) { lmask |= 1; }
if (centres[1] & 0xfffefcf8f0e0c080ull) { lmask |= 2; }
if (dmap & 85) {
lmask |= 7;
}
}
......
......@@ -15,43 +15,22 @@
void iter4_var_leaf(uint64_t * inleaf, uint64_t * centres) {
/*
* Find the 8-by-8 centre after iterating a 16-by-16 leaf for a
* further 4 iterations in the rule. This returns both the live
* cells and the history envelope.
* further 4 iterations in the rule.
*/
int bis = apg::best_instruction_set();
uint32_t d[16];
uint32_t e[16];
uint32_t h[16];
std::memset(h, 0, 64);
if (bis >= 9) {
apg::z64_to_r32_centre_avx(inleaf, d);
iterate_avx_16_12(d, e, 0, 0, 0, false);
iterate_avx_12_8(d+2, e+2, 0, 0, 0, false);
} else {
apg::z64_to_r32_centre_ssse3(inleaf, d);
iterate_sse2_16_12(d, e, 0, 0, 0, false);
iterate_sse2_12_8(d+2, e+2, 0, 0, 0, false);
}
// BESZEL:
centres[2] = r32_centre_to_u64(d, -1, -1);
centres[3] = r32_centre_to_u64(d, 0, -2);
centres[4] = r32_centre_to_u64(d, 1, -1);
centres[5] = r32_centre_to_u64(d, 2, 0);
// ULQOMA:
centres[6] = r32_centre_to_u64(d, 1, 1);
centres[7] = r32_centre_to_u64(d, 0, 2);
centres[8] = r32_centre_to_u64(d, -1, 1);
centres[9] = r32_centre_to_u64(d, -2, 0);
if (bis >= 9) {
iterate_avx_16_12(d, e, h, 0, 0, false);
iterate_avx_12_8(d+2, e+2, h+2, 0, 0, false);
} else {
iterate_sse2_16_12(d, e, h, 0, 0, false);
iterate_sse2_12_8(d+2, e+2, h+2, 0, 0, false);
}
centres[0] = r32_centre_to_u64(d, 0, 0);
centres[1] = r32_centre_to_u64(h, 0, 0);
}
bool iterate_var_leaf(int n, uint64_t * inleaves, uint64_t * outleaf) {
......@@ -64,9 +43,6 @@
*
* iterate_var_leaf( 4, ...) <-- run a 32-by-32 tile 4 gens;
* iterate_var_leaf(-4, ...) <-- run a 16-by-16 tile 4 gens;
*
* Oh, and it also stores the history layer because ... erm ...
* negative timedeltas refer to history?
*/
iter4_var_leaf(inleaves, outleaf);
return false;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment