Commit c7619cfe authored by Adam P. Goucher's avatar Adam P. Goucher

Faster population determination in upattern

parent c22a0818
Pipeline #45954139 passed with stages
in 19 minutes and 17 seconds
#ifndef LIFELIB_VERSION /*
__version__=[x.replace('"', '') for x in '''
*/
#define LIFELIB_VERSION "ll2.1.11"
#define LIFELIB_VERSION "ll2.1.12"
// '''.split() if ('ll' in x)][0][2:]
#endif
......@@ -23,7 +23,7 @@
int32_t population; // 4 bytes
uint16_t updateflags; // 2 bytes
bool populationCurrent; // 1 byte
bool populationOld; // 1 byte
bool hashCurrent; // 1 byte
// sizeof(UTile<N, M>) == 128N + 72 bytes (no alignment space wasted)
......@@ -104,7 +104,8 @@
diff[0] |= (outleafx[3 + 4*i] ^ d[0 + 4*i]);
}
if (diff[0] | diff[1] | diff[2] | diff[3]) {
populationCurrent = false;
if (!populationOld) { owner->popchanged.push_back(this); }
populationOld = true;
hashCurrent = false;
if (updateflags == 0) { owner->modified.push_back(this); }
updateflags |= 64;
......@@ -136,8 +137,9 @@
return (a[4*i+3] | b[4*i+2] | c[4*i+1] | d[4*i]);
}
int countPopulation() {
if (populationCurrent) { return population; }
int countPopulation(upattern<UTile<N, M>, 16>* owner) {
if (!populationOld) { return population; }
owner->population -= population;
int pop = 0;
uint64_t diff[4] = {0ull};
for (int i = 0; i < M; i++) {
......@@ -149,8 +151,9 @@
for (int i = 0; i < 4; i++) {
pop += __builtin_popcountll(diff[i]);
}
owner->population += population;
population = pop;
populationCurrent = true;
populationOld = false;
return pop;
}
......@@ -166,6 +169,8 @@
void eu64(upattern<UTile<N, M>, 16>* owner, int z, uint8_t dx, uint8_t dy, uint64_t v) {
for (int i = 0; i < 6; i++) { owner->updateNeighbour(this, i); }
if (!populationOld) { owner->popchanged.push_back(this); }
populationOld = true;
if (updateflags == 0) { owner->modified.push_back(this); }
updateflags |= 64;
......
......@@ -72,7 +72,7 @@
// ^^^ we really do need both the prefix 'inline' and the attribute 'always_inline' for this to work ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
uint32_t hashTile() {
if (currentflags & 2) { return hash; }
if (!(currentflags & 2)) { return hash; }
uint32_t partialhash = 0;
for (int i = K; i < H + K; i++) {
......@@ -80,7 +80,7 @@
}
hash = partialhash;
currentflags |= 2;
currentflags &= (~2);
return partialhash;
}
......@@ -103,16 +103,18 @@
}
int countPopulation() {
int countPopulation(upattern<VTile<H,K>, 32 - 2*K, H>* owner) {
// Check memoized value:
if (currentflags & 1) { return population; }
if (!(currentflags & 1)) { return population; }
owner->population -= population;
// Casting to 64-bit values so we can halve the number of calls
// to the POPCNT instruction. If H = 44, for example, we only
// need to perform 22 copies of the instruction.
population = countpop64((uint64_t *) (d + K));
currentflags |= 1;
owner->population += population;
currentflags &= (~1);
return population;
}
......@@ -143,6 +145,8 @@
if ((v == 0) || (z >= 2)) { return; }
for (int i = 0; i < 6; i++) { owner->updateNeighbour(this, i); }
if (!(currentflags & 1)) { owner->popchanged.push_back(this); }
currentflags = 3;
if (updateflags == 0) { owner->modified.push_back(this); }
updateflags |= 64;
......
......@@ -76,7 +76,8 @@
}
if ((r != 1) && (diffs[0] & 0x3ffffffcu)) {
currentflags = 0;
if (!(currentflags & 1)) { owner->popchanged.push_back(this); }
currentflags = 3;
if (updateflags == 0) { owner->modified.push_back(this); }
updateflags |= 64;
if (diffs[0] & 0x30000000u) { owner->updateNeighbour(this, 0); }
......
......@@ -90,7 +90,8 @@
}
if ((r != 1) && (diffs[0] & 0x3ffffffcu)) {
currentflags = 0;
if (!(currentflags & 1)) { owner->popchanged.push_back(this); }
currentflags = 3;
if (updateflags == 0) { owner->modified.push_back(this); }
updateflags |= 64;
if (diffs[0] & 0x30000000u) { owner->updateNeighbour(this, 0); }
......
......@@ -51,9 +51,11 @@ namespace apg {
indirected_map<uint64_t, T> tiles;
std::vector<T*> modified;
std::vector<T*> temp_modified;
std::vector<T*> popchanged;
uint64_t tilesProcessed;
uint64_t gensElapsed;
int population;
T* coords2ptr(int64_t x, int64_t w) {
// Returns a pointer to tile x + omega*w.
......@@ -78,6 +80,7 @@ namespace apg {
torus_width = 0;
torus_height = 0;
lastmant = 1;
population = 0;
}
upattern(int width, int height) {
......@@ -87,6 +90,7 @@ namespace apg {
torus_width = width / W;
torus_height = height / H;
lastmant = 1;
population = 0;
if ((width == 0) || (height == 0)) { return; }
......@@ -199,14 +203,12 @@ namespace apg {
int totalPopulation() {
int population = 0;
uint32_t totalnodes = tiles.elements.totalnodes;
for (uint32_t it = 0; it != totalnodes; ++it) {
T* sqt = tiles.elements.ind2ptr(it);
population += sqt->countPopulation();
for (auto it = popchanged.begin(); it != popchanged.end(); ++it) {
(*it)->countPopulation(this);
}
popchanged.clear();
return population;
}
......@@ -298,7 +300,7 @@ namespace apg {
for (uint32_t it = 0; it != totalnodes; ++it) {
VTile<H> *sqt = curralgo->tiles.elements.ind2ptr(it);
if (sqt->countPopulation() == 0) { continue; }
if (sqt->countPopulation(curralgo) == 0) { continue; }
int64_t tx = (sqt->coords & 0xffffffffu) - 0x80000000u;
int64_t tw = (sqt->coords >> 32) - 0x80000000u;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment