Commit 9f4d4493 by Adam P. Goucher

### Initial streamlife code

parent 1a22cf2d
 ... ... @@ -12,8 +12,8 @@ namespace apg { template > class lifetree : public lifetree_abstract { template class lifetree_generic : public lifetree_abstract { public: hypertree, J > htree; ... ... @@ -92,124 +92,7 @@ namespace apg { if (part < 4*N) { return pptr->key.x[part]; } else { return 0; } } hypernode iterate_recurse(hypernode hnode, uint64_t mantissa, uint64_t exponent, int rule, int history) { /* * Given a 2^n-by-2^n square represented by a hypernode, return the * central 2^(n-1)-by-2^(n-1) subsquare advanced by M * (2 ** E) * generations. * * This uses Gosper's HashLife algorithm down to a base-case where * n = 5 (i.e. computing the 16-by-16 interior of a 32-by-32 grid) * is performed by vectorised bitsliced assembly code. */ // std::cerr << "Calling iterate_recurse((" << hnode.index << ", " << hnode.depth << "), "; // std::cerr << mantissa << ", " << exponent << ", " << rule << ", " << history << ")" << std::endl; if (hnode.index == 0) { // Node is empty; return an empty node of the next size down: return hypernode(0, hnode.depth - 1); } // Extract the pointer to the node: kiventry, I, J >* pptr = ind2ptr_nonleaf(hnode.depth, hnode.index); // Determine whether 1 or 2 stages are necessary: bool bothstages = (hnode.depth <= (1 + exponent)); // Return the result if we've previously cached it: uint64_t gcdesc = pptr->gcflags >> 9; uint64_t hrule = (rule << 1) + (history & 1); if ((gcdesc & 7) == (mantissa - 1) && (hrule == ((gcdesc >> 3) & 15))) { uint64_t gcexp = gcdesc >> 7; if (gcexp == (1 + exponent) || (bothstages && (gcexp >= hnode.depth))) { // The exponent and mantissa are compatible with their desired values: return hypernode(pptr->value.res, hnode.depth - 1); } } if (hnode.depth == 1) { // Set up the memory locations: nicearray outleaf = {0ull}; uint64_t* inleafxs[4]; for (int i = 0; i < 4; i++) { inleafxs[i] = ind2ptr_leaf(pptr->key.x[i])->key.x; } universal_leaf_iterator(rule, history, mantissa, inleafxs, outleaf.x); I finalnode = make_leaf(outleaf); if (mantissa != 0) { // Cache the result to save additional recomputation: pptr->value.res = finalnode; uint64_t new_gcdesc = ((1 + exponent) << 7) | (hrule << 3) | (mantissa - 1); pptr->gcflags = (pptr->gcflags & 511) | (new_gcdesc << 9); } // Return the result: return hypernode(finalnode, 0); } else { // Extract the pointers for the children: kiventry, I, J >* pptr_tl = ind2ptr_nonleaf(hnode.depth-1, pptr->key.x[0]); kiventry, I, J >* pptr_tr = ind2ptr_nonleaf(hnode.depth-1, pptr->key.x[1]); kiventry, I, J >* pptr_bl = ind2ptr_nonleaf(hnode.depth-1, pptr->key.x[2]); kiventry, I, J >* pptr_br = ind2ptr_nonleaf(hnode.depth-1, pptr->key.x[3]); // Determine the centre square and return if (mantissa == 0): nicearray cc = {pptr_tl->key.x[3], pptr_tr->key.x[2], pptr_bl->key.x[1], pptr_br->key.x[0]}; hypernode hncc = make_nonleaf_hn(hnode.depth-1, cc); if (mantissa == 0) { return hncc; } // Actual HashLife algorithm begins here: nicearray tc = {pptr_tl->key.x[1], pptr_tr->key.x[0], pptr_tl->key.x[3], pptr_tr->key.x[2]}; nicearray bc = {pptr_bl->key.x[1], pptr_br->key.x[0], pptr_bl->key.x[3], pptr_br->key.x[2]}; nicearray cl = {pptr_tl->key.x[2], pptr_tl->key.x[3], pptr_bl->key.x[0], pptr_bl->key.x[1]}; nicearray cr = {pptr_tr->key.x[2], pptr_tr->key.x[3], pptr_br->key.x[0], pptr_br->key.x[1]}; // Compute the nine subnodes after the first stage: uint64_t newmant = bothstages ? mantissa : 0; hypernode xcc = iterate_recurse(hncc, newmant, exponent, rule, history); hypernode xtc = iterate_recurse(make_nonleaf_hn(hnode.depth - 1, tc), newmant, exponent, rule, history); hypernode xbc = iterate_recurse(make_nonleaf_hn(hnode.depth - 1, bc), newmant, exponent, rule, history); hypernode xcl = iterate_recurse(make_nonleaf_hn(hnode.depth - 1, cl), newmant, exponent, rule, history); hypernode xcr = iterate_recurse(make_nonleaf_hn(hnode.depth - 1, cr), newmant, exponent, rule, history); hypernode xtl = iterate_recurse(hypernode(pptr->key.x[0], hnode.depth - 1), newmant, exponent, rule, history); hypernode xtr = iterate_recurse(hypernode(pptr->key.x[1], hnode.depth - 1), newmant, exponent, rule, history); hypernode xbl = iterate_recurse(hypernode(pptr->key.x[2], hnode.depth - 1), newmant, exponent, rule, history); hypernode xbr = iterate_recurse(hypernode(pptr->key.x[3], hnode.depth - 1), newmant, exponent, rule, history); // Compute the four subnodes after the second stage: nicearray tl = {xtl.index, xtc.index, xcl.index, xcc.index}; nicearray tr = {xtc.index, xtr.index, xcc.index, xcr.index}; nicearray bl = {xcl.index, xcc.index, xbl.index, xbc.index}; nicearray br = {xcc.index, xcr.index, xbc.index, xbr.index}; hypernode ytl = iterate_recurse(make_nonleaf_hn(hnode.depth - 1, tl), mantissa, exponent, rule, history); hypernode ytr = iterate_recurse(make_nonleaf_hn(hnode.depth - 1, tr), mantissa, exponent, rule, history); hypernode ybl = iterate_recurse(make_nonleaf_hn(hnode.depth - 1, bl), mantissa, exponent, rule, history); hypernode ybr = iterate_recurse(make_nonleaf_hn(hnode.depth - 1, br), mantissa, exponent, rule, history); // Assemble the four subnodes and calculate the result: nicearray y = {ytl.index, ytr.index, ybl.index, ybr.index}; I finalnode = make_nonleaf(hnode.depth - 1, y); // Cache the result to save additional recomputation: pptr->value.res = finalnode; uint64_t new_gcdesc = ((1 + exponent) << 7) | (hrule << 3) | (mantissa - 1); pptr->gcflags = (pptr->gcflags & 511) | (new_gcdesc << 9); // Return the result: return hypernode(finalnode, hnode.depth - 1); } } virtual hypernode iterate_recurse(hypernode hnode, uint64_t mantissa, uint64_t exponent, int rule, int history) = 0; uint64_t write_macrocell_leaf(std::ostream &outstream, uint64_t leaf, std::map *subleaf2int, ... ... @@ -1266,11 +1149,6 @@ namespace apg { } } lifetree(uint64_t maxmem) { // maxmem is specified in MiB, so we left-shift by 20: this->gc_threshold = maxmem << 20; } hypernode pyramid_up(hypernode hnode) { I z = 0; ... ... @@ -1320,6 +1198,161 @@ namespace apg { } } nicearray ninechildren(hypernode hnode) { // Extract the pointer to the node: auto pptr = ind2ptr_nonleaf(hnode.depth, hnode.index); // Extract the pointers for the children: kiventry, I, J >* pptr_tl = ind2ptr_nonleaf(hnode.depth-1, pptr->key.x[0]); kiventry, I, J >* pptr_tr = ind2ptr_nonleaf(hnode.depth-1, pptr->key.x[1]); kiventry, I, J >* pptr_bl = ind2ptr_nonleaf(hnode.depth-1, pptr->key.x[2]); kiventry, I, J >* pptr_br = ind2ptr_nonleaf(hnode.depth-1, pptr->key.x[3]); nicearray cc = {pptr_tl->key.x[3], pptr_tr->key.x[2], pptr_bl->key.x[1], pptr_br->key.x[0]}; nicearray tc = {pptr_tl->key.x[1], pptr_tr->key.x[0], pptr_tl->key.x[3], pptr_tr->key.x[2]}; nicearray bc = {pptr_bl->key.x[1], pptr_br->key.x[0], pptr_bl->key.x[3], pptr_br->key.x[2]}; nicearray cl = {pptr_tl->key.x[2], pptr_tl->key.x[3], pptr_bl->key.x[0], pptr_bl->key.x[1]}; nicearray cr = {pptr_tr->key.x[2], pptr_tr->key.x[3], pptr_br->key.x[0], pptr_br->key.x[1]}; nicearray res = {pptr->key.x[0], make_nonleaf(hnode.depth - 1, tc), pptr->key.x[1], make_nonleaf(hnode.depth - 1, cl), make_nonleaf(hnode.depth - 1, cc), make_nonleaf(hnode.depth - 1, cr), pptr->key.x[2], make_nonleaf(hnode.depth - 1, bc), pptr->key.x[3]}; return res; } nicearray fourchildren(hypernode hnode, nicearray frags) { auto fragments = frags.x; nicearray tl = {fragments[0], fragments[1], fragments[3], fragments[4]}; nicearray tr = {fragments[1], fragments[2], fragments[4], fragments[5]}; nicearray bl = {fragments[3], fragments[4], fragments[6], fragments[7]}; nicearray br = {fragments[4], fragments[5], fragments[7], fragments[8]}; nicearray res = {make_nonleaf(hnode.depth - 1, tl), make_nonleaf(hnode.depth - 1, tr), make_nonleaf(hnode.depth - 1, bl), make_nonleaf(hnode.depth - 1, br)}; return res; } hypernode iterate_recurse1(hypernode hnode, uint64_t mantissa, uint64_t exponent, int rule, int history) { /* * Given a 2^n-by-2^n square represented by a hypernode, return the * central 2^(n-1)-by-2^(n-1) subsquare advanced by M * (2 ** E) * generations. * * This uses Gosper's HashLife algorithm down to a base-case where * n = 5 (i.e. computing the 16-by-16 interior of a 32-by-32 grid) * is performed by vectorised bitsliced assembly code. */ // std::cerr << "Calling iterate_recurse((" << hnode.index << ", " << hnode.depth << "), "; // std::cerr << mantissa << ", " << exponent << ", " << rule << ", " << history << ")" << std::endl; if (hnode.index == 0) { // Node is empty; return an empty node of the next size down: return hypernode(0, hnode.depth - 1); } // Extract the pointer to the node: kiventry, I, J >* pptr = ind2ptr_nonleaf(hnode.depth, hnode.index); // Determine whether 1 or 2 stages are necessary: bool bothstages = (hnode.depth <= (1 + exponent)); // Return the result if we've previously cached it: uint64_t gcdesc = pptr->gcflags >> 9; uint64_t hrule = (rule << 1) + (history & 1); if ((gcdesc & 7) == (mantissa - 1) && (hrule == ((gcdesc >> 3) & 15))) { uint64_t gcexp = gcdesc >> 7; if (gcexp == (1 + exponent) || (bothstages && (gcexp >= hnode.depth))) { // The exponent and mantissa are compatible with their desired values: return hypernode(pptr->value.res, hnode.depth - 1); } } if (hnode.depth == 1) { // Set up the memory locations: nicearray outleaf = {0ull}; uint64_t* inleafxs[4]; for (int i = 0; i < 4; i++) { inleafxs[i] = ind2ptr_leaf(pptr->key.x[i])->key.x; } universal_leaf_iterator(rule, history, mantissa, inleafxs, outleaf.x); I finalnode = make_leaf(outleaf); if (mantissa != 0) { // Cache the result to save additional recomputation: pptr->value.res = finalnode; uint64_t new_gcdesc = ((1 + exponent) << 7) | (hrule << 3) | (mantissa - 1); pptr->gcflags = (pptr->gcflags & 511) | (new_gcdesc << 9); } // Return the result: return hypernode(finalnode, 0); } else { auto ch9 = ninechildren(hnode); if (mantissa == 0) { return hypernode(ch9.x[4], hnode.depth - 1); } uint64_t newmant = bothstages ? mantissa : 0; for (uint64_t i = 0; i < 9; i++) { auto fh = iterate_recurse1(hypernode(ch9.x[i], hnode.depth - 1), newmant, exponent, rule, history); ch9.x[i] = fh.index; } auto ch4 = fourchildren(hnode, ch9); for (uint64_t i = 0; i < 4; i++) { auto fh = iterate_recurse1(hypernode(ch4.x[i], hnode.depth - 1), mantissa, exponent, rule, history); ch4.x[i] = fh.index; } I finalnode = make_nonleaf(hnode.depth - 1, ch4); // Cache the result to save additional recomputation: pptr->value.res = finalnode; uint64_t new_gcdesc = ((1 + exponent) << 7) | (hrule << 3) | (mantissa - 1); pptr->gcflags = (pptr->gcflags & 511) | (new_gcdesc << 9); // Return the result: return hypernode(finalnode, hnode.depth - 1); } } }; template > class lifetree : public lifetree_generic { public: using lifetree_generic::iterate_recurse; using lifetree_generic::iterate_recurse1; lifetree(uint64_t maxmem) { // maxmem is specified in MiB, so we left-shift by 20: this->gc_threshold = maxmem << 20; } hypernode iterate_recurse(hypernode hnode, uint64_t mantissa, uint64_t exponent, int rule, int history) { return iterate_recurse1(hnode, mantissa, exponent, rule, history); } }; }
streamlife.h 0 → 100644