Commit 9f4d4493 authored by Adam P. Goucher's avatar Adam P. Goucher
Browse files

Initial streamlife code

parent 1a22cf2d
......@@ -12,8 +12,8 @@
namespace apg {
template<typename I, int N, typename J = lifemeta<I> >
class lifetree : public lifetree_abstract<I> {
template<typename I, int N, typename J>
class lifetree_generic : public lifetree_abstract<I> {
public:
hypertree<I, 4, J, nicearray<uint64_t, 4*N>, J > htree;
......@@ -92,124 +92,7 @@ namespace apg {
if (part < 4*N) { return pptr->key.x[part]; } else { return 0; }
}
hypernode<I> iterate_recurse(hypernode<I> hnode, uint64_t mantissa, uint64_t exponent, int rule, int history) {
/*
* Given a 2^n-by-2^n square represented by a hypernode, return the
* central 2^(n-1)-by-2^(n-1) subsquare advanced by M * (2 ** E)
* generations.
*
* This uses Gosper's HashLife algorithm down to a base-case where
* n = 5 (i.e. computing the 16-by-16 interior of a 32-by-32 grid)
* is performed by vectorised bitsliced assembly code.
*/
// std::cerr << "Calling iterate_recurse((" << hnode.index << ", " << hnode.depth << "), ";
// std::cerr << mantissa << ", " << exponent << ", " << rule << ", " << history << ")" << std::endl;
if (hnode.index == 0) {
// Node is empty; return an empty node of the next size down:
return hypernode<I>(0, hnode.depth - 1);
}
// Extract the pointer to the node:
kiventry<nicearray<I, 4>, I, J >* pptr = ind2ptr_nonleaf(hnode.depth, hnode.index);
// Determine whether 1 or 2 stages are necessary:
bool bothstages = (hnode.depth <= (1 + exponent));
// Return the result if we've previously cached it:
uint64_t gcdesc = pptr->gcflags >> 9;
uint64_t hrule = (rule << 1) + (history & 1);
if ((gcdesc & 7) == (mantissa - 1) && (hrule == ((gcdesc >> 3) & 15))) {
uint64_t gcexp = gcdesc >> 7;
if (gcexp == (1 + exponent) || (bothstages && (gcexp >= hnode.depth))) {
// The exponent and mantissa are compatible with their desired values:
return hypernode<I>(pptr->value.res, hnode.depth - 1);
}
}
if (hnode.depth == 1) {
// Set up the memory locations:
nicearray<uint64_t, 4*N> outleaf = {0ull};
uint64_t* inleafxs[4];
for (int i = 0; i < 4; i++) {
inleafxs[i] = ind2ptr_leaf(pptr->key.x[i])->key.x;
}
universal_leaf_iterator<N>(rule, history, mantissa, inleafxs, outleaf.x);
I finalnode = make_leaf(outleaf);
if (mantissa != 0) {
// Cache the result to save additional recomputation:
pptr->value.res = finalnode;
uint64_t new_gcdesc = ((1 + exponent) << 7) | (hrule << 3) | (mantissa - 1);
pptr->gcflags = (pptr->gcflags & 511) | (new_gcdesc << 9);
}
// Return the result:
return hypernode<I>(finalnode, 0);
} else {
// Extract the pointers for the children:
kiventry<nicearray<I, 4>, I, J >* pptr_tl = ind2ptr_nonleaf(hnode.depth-1, pptr->key.x[0]);
kiventry<nicearray<I, 4>, I, J >* pptr_tr = ind2ptr_nonleaf(hnode.depth-1, pptr->key.x[1]);
kiventry<nicearray<I, 4>, I, J >* pptr_bl = ind2ptr_nonleaf(hnode.depth-1, pptr->key.x[2]);
kiventry<nicearray<I, 4>, I, J >* pptr_br = ind2ptr_nonleaf(hnode.depth-1, pptr->key.x[3]);
// Determine the centre square and return if (mantissa == 0):
nicearray<I, 4> cc = {pptr_tl->key.x[3], pptr_tr->key.x[2], pptr_bl->key.x[1], pptr_br->key.x[0]};
hypernode<I> hncc = make_nonleaf_hn(hnode.depth-1, cc);
if (mantissa == 0) { return hncc; }
// Actual HashLife algorithm begins here:
nicearray<I, 4> tc = {pptr_tl->key.x[1], pptr_tr->key.x[0], pptr_tl->key.x[3], pptr_tr->key.x[2]};
nicearray<I, 4> bc = {pptr_bl->key.x[1], pptr_br->key.x[0], pptr_bl->key.x[3], pptr_br->key.x[2]};
nicearray<I, 4> cl = {pptr_tl->key.x[2], pptr_tl->key.x[3], pptr_bl->key.x[0], pptr_bl->key.x[1]};
nicearray<I, 4> cr = {pptr_tr->key.x[2], pptr_tr->key.x[3], pptr_br->key.x[0], pptr_br->key.x[1]};
// Compute the nine subnodes after the first stage:
uint64_t newmant = bothstages ? mantissa : 0;
hypernode<I> xcc = iterate_recurse(hncc, newmant, exponent, rule, history);
hypernode<I> xtc = iterate_recurse(make_nonleaf_hn(hnode.depth - 1, tc), newmant, exponent, rule, history);
hypernode<I> xbc = iterate_recurse(make_nonleaf_hn(hnode.depth - 1, bc), newmant, exponent, rule, history);
hypernode<I> xcl = iterate_recurse(make_nonleaf_hn(hnode.depth - 1, cl), newmant, exponent, rule, history);
hypernode<I> xcr = iterate_recurse(make_nonleaf_hn(hnode.depth - 1, cr), newmant, exponent, rule, history);
hypernode<I> xtl = iterate_recurse(hypernode<I>(pptr->key.x[0], hnode.depth - 1), newmant, exponent, rule, history);
hypernode<I> xtr = iterate_recurse(hypernode<I>(pptr->key.x[1], hnode.depth - 1), newmant, exponent, rule, history);
hypernode<I> xbl = iterate_recurse(hypernode<I>(pptr->key.x[2], hnode.depth - 1), newmant, exponent, rule, history);
hypernode<I> xbr = iterate_recurse(hypernode<I>(pptr->key.x[3], hnode.depth - 1), newmant, exponent, rule, history);
// Compute the four subnodes after the second stage:
nicearray<I, 4> tl = {xtl.index, xtc.index, xcl.index, xcc.index};
nicearray<I, 4> tr = {xtc.index, xtr.index, xcc.index, xcr.index};
nicearray<I, 4> bl = {xcl.index, xcc.index, xbl.index, xbc.index};
nicearray<I, 4> br = {xcc.index, xcr.index, xbc.index, xbr.index};
hypernode<I> ytl = iterate_recurse(make_nonleaf_hn(hnode.depth - 1, tl), mantissa, exponent, rule, history);
hypernode<I> ytr = iterate_recurse(make_nonleaf_hn(hnode.depth - 1, tr), mantissa, exponent, rule, history);
hypernode<I> ybl = iterate_recurse(make_nonleaf_hn(hnode.depth - 1, bl), mantissa, exponent, rule, history);
hypernode<I> ybr = iterate_recurse(make_nonleaf_hn(hnode.depth - 1, br), mantissa, exponent, rule, history);
// Assemble the four subnodes and calculate the result:
nicearray<I, 4> y = {ytl.index, ytr.index, ybl.index, ybr.index};
I finalnode = make_nonleaf(hnode.depth - 1, y);
// Cache the result to save additional recomputation:
pptr->value.res = finalnode;
uint64_t new_gcdesc = ((1 + exponent) << 7) | (hrule << 3) | (mantissa - 1);
pptr->gcflags = (pptr->gcflags & 511) | (new_gcdesc << 9);
// Return the result:
return hypernode<I>(finalnode, hnode.depth - 1);
}
}
virtual hypernode<I> iterate_recurse(hypernode<I> hnode, uint64_t mantissa, uint64_t exponent, int rule, int history) = 0;
uint64_t write_macrocell_leaf(std::ostream &outstream, uint64_t leaf,
std::map<uint64_t, uint64_t> *subleaf2int,
......@@ -1266,11 +1149,6 @@ namespace apg {
}
}
lifetree(uint64_t maxmem) {
// maxmem is specified in MiB, so we left-shift by 20:
this->gc_threshold = maxmem << 20;
}
hypernode<I> pyramid_up(hypernode<I> hnode) {
I z = 0;
......@@ -1320,6 +1198,161 @@ namespace apg {
}
}
nicearray<I, 9> ninechildren(hypernode<I> hnode) {
// Extract the pointer to the node:
auto pptr = ind2ptr_nonleaf(hnode.depth, hnode.index);
// Extract the pointers for the children:
kiventry<nicearray<I, 4>, I, J >* pptr_tl = ind2ptr_nonleaf(hnode.depth-1, pptr->key.x[0]);
kiventry<nicearray<I, 4>, I, J >* pptr_tr = ind2ptr_nonleaf(hnode.depth-1, pptr->key.x[1]);
kiventry<nicearray<I, 4>, I, J >* pptr_bl = ind2ptr_nonleaf(hnode.depth-1, pptr->key.x[2]);
kiventry<nicearray<I, 4>, I, J >* pptr_br = ind2ptr_nonleaf(hnode.depth-1, pptr->key.x[3]);
nicearray<I, 4> cc = {pptr_tl->key.x[3], pptr_tr->key.x[2], pptr_bl->key.x[1], pptr_br->key.x[0]};
nicearray<I, 4> tc = {pptr_tl->key.x[1], pptr_tr->key.x[0], pptr_tl->key.x[3], pptr_tr->key.x[2]};
nicearray<I, 4> bc = {pptr_bl->key.x[1], pptr_br->key.x[0], pptr_bl->key.x[3], pptr_br->key.x[2]};
nicearray<I, 4> cl = {pptr_tl->key.x[2], pptr_tl->key.x[3], pptr_bl->key.x[0], pptr_bl->key.x[1]};
nicearray<I, 4> cr = {pptr_tr->key.x[2], pptr_tr->key.x[3], pptr_br->key.x[0], pptr_br->key.x[1]};
nicearray<I, 9> res = {pptr->key.x[0], make_nonleaf(hnode.depth - 1, tc), pptr->key.x[1],
make_nonleaf(hnode.depth - 1, cl), make_nonleaf(hnode.depth - 1, cc), make_nonleaf(hnode.depth - 1, cr),
pptr->key.x[2], make_nonleaf(hnode.depth - 1, bc), pptr->key.x[3]};
return res;
}
nicearray<I, 4> fourchildren(hypernode<I> hnode, nicearray<I, 9> frags) {
auto fragments = frags.x;
nicearray<I, 4> tl = {fragments[0], fragments[1], fragments[3], fragments[4]};
nicearray<I, 4> tr = {fragments[1], fragments[2], fragments[4], fragments[5]};
nicearray<I, 4> bl = {fragments[3], fragments[4], fragments[6], fragments[7]};
nicearray<I, 4> br = {fragments[4], fragments[5], fragments[7], fragments[8]};
nicearray<I, 4> res = {make_nonleaf(hnode.depth - 1, tl), make_nonleaf(hnode.depth - 1, tr),
make_nonleaf(hnode.depth - 1, bl), make_nonleaf(hnode.depth - 1, br)};
return res;
}
hypernode<I> iterate_recurse1(hypernode<I> hnode, uint64_t mantissa, uint64_t exponent, int rule, int history) {
/*
* Given a 2^n-by-2^n square represented by a hypernode, return the
* central 2^(n-1)-by-2^(n-1) subsquare advanced by M * (2 ** E)
* generations.
*
* This uses Gosper's HashLife algorithm down to a base-case where
* n = 5 (i.e. computing the 16-by-16 interior of a 32-by-32 grid)
* is performed by vectorised bitsliced assembly code.
*/
// std::cerr << "Calling iterate_recurse((" << hnode.index << ", " << hnode.depth << "), ";
// std::cerr << mantissa << ", " << exponent << ", " << rule << ", " << history << ")" << std::endl;
if (hnode.index == 0) {
// Node is empty; return an empty node of the next size down:
return hypernode<I>(0, hnode.depth - 1);
}
// Extract the pointer to the node:
kiventry<nicearray<I, 4>, I, J >* pptr = ind2ptr_nonleaf(hnode.depth, hnode.index);
// Determine whether 1 or 2 stages are necessary:
bool bothstages = (hnode.depth <= (1 + exponent));
// Return the result if we've previously cached it:
uint64_t gcdesc = pptr->gcflags >> 9;
uint64_t hrule = (rule << 1) + (history & 1);
if ((gcdesc & 7) == (mantissa - 1) && (hrule == ((gcdesc >> 3) & 15))) {
uint64_t gcexp = gcdesc >> 7;
if (gcexp == (1 + exponent) || (bothstages && (gcexp >= hnode.depth))) {
// The exponent and mantissa are compatible with their desired values:
return hypernode<I>(pptr->value.res, hnode.depth - 1);
}
}
if (hnode.depth == 1) {
// Set up the memory locations:
nicearray<uint64_t, 4*N> outleaf = {0ull};
uint64_t* inleafxs[4];
for (int i = 0; i < 4; i++) {
inleafxs[i] = ind2ptr_leaf(pptr->key.x[i])->key.x;
}
universal_leaf_iterator<N>(rule, history, mantissa, inleafxs, outleaf.x);
I finalnode = make_leaf(outleaf);
if (mantissa != 0) {
// Cache the result to save additional recomputation:
pptr->value.res = finalnode;
uint64_t new_gcdesc = ((1 + exponent) << 7) | (hrule << 3) | (mantissa - 1);
pptr->gcflags = (pptr->gcflags & 511) | (new_gcdesc << 9);
}
// Return the result:
return hypernode<I>(finalnode, 0);
} else {
auto ch9 = ninechildren(hnode);
if (mantissa == 0) { return hypernode<I>(ch9.x[4], hnode.depth - 1); }
uint64_t newmant = bothstages ? mantissa : 0;
for (uint64_t i = 0; i < 9; i++) {
auto fh = iterate_recurse1(hypernode<I>(ch9.x[i], hnode.depth - 1), newmant, exponent, rule, history);
ch9.x[i] = fh.index;
}
auto ch4 = fourchildren(hnode, ch9);
for (uint64_t i = 0; i < 4; i++) {
auto fh = iterate_recurse1(hypernode<I>(ch4.x[i], hnode.depth - 1), mantissa, exponent, rule, history);
ch4.x[i] = fh.index;
}
I finalnode = make_nonleaf(hnode.depth - 1, ch4);
// Cache the result to save additional recomputation:
pptr->value.res = finalnode;
uint64_t new_gcdesc = ((1 + exponent) << 7) | (hrule << 3) | (mantissa - 1);
pptr->gcflags = (pptr->gcflags & 511) | (new_gcdesc << 9);
// Return the result:
return hypernode<I>(finalnode, hnode.depth - 1);
}
}
};
template<typename I, int N, typename J = lifemeta<I> >
class lifetree : public lifetree_generic<I, N, J> {
public:
using lifetree_generic<I, N, J>::iterate_recurse;
using lifetree_generic<I, N, J>::iterate_recurse1;
lifetree(uint64_t maxmem) {
// maxmem is specified in MiB, so we left-shift by 20:
this->gc_threshold = maxmem << 20;
}
hypernode<I> iterate_recurse(hypernode<I> hnode, uint64_t mantissa, uint64_t exponent, int rule, int history) {
return iterate_recurse1(hnode, mantissa, exponent, rule, history);
}
};
}
#pragma once
#include "lifetree.h"
#include "avxlife/directions.h"
#define BESZEL 0
#define ULQOMA 1
namespace apg {
template<typename I>
struct streammeta {
I res;
I aux;
uint64_t lanes;
};
template<typename I>
class streamtree : public lifetree_generic<I, 1, streammeta<I> > {
public:
using lifetree_generic<I, 1, streammeta<I> >::htree;
using lifetree_generic<I, 1, streammeta<I> >::iterate_recurse;
using lifetree_generic<I, 1, streammeta<I> >::iterate_recurse1;
using lifetree_generic<I, 1, streammeta<I> >::ninechildren;
streamtree(uint64_t maxmem) {
// maxmem is specified in MiB, so we left-shift by 20:
this->gc_threshold = maxmem << 20;
}
/*
* Streamlife operates on pairs of hashtiles instead of individual
* hashtiles. We want to memoize the results of these computations.
*
* Key: nicearray<I, 3> = (beszel_tile, ulqoma_tile, gencount)
* Value: nicearray<I, 2> = (beszel_tile', ulqoma_tile')
*/
std::vector<kivtable<nicearray<I, 3>, I, nicearray<I, 2> >* > biresults;
uint64_t node2lanes(int rule, I depth, I index) {
if (index == 0) {
return 0xffff;
}
if (depth == 0) {
auto pptr = this->ind2ptr_leaf(index);
if ((pptr->value.lanes & 0xffff0000ull) != ((rule + 1) << 16)) {
pptr->value.lanes = determine_direction(rule, pptr->key.x) | ((rule + 1) << 16);
}
return pptr->value.lanes & 0xffffffff0000ffffull;
} else {
auto pptr = this->ind2ptr_nonleaf(depth, index);
if ((pptr->value.lanes & 0xffff0000ull) != ((rule + 1) << 16)) {
uint64_t childlanes[9];
uint64_t adml = 0xff;
/*
* Short-circuit evaluation using the corner children.
* This will handle the vast majority of random tiles.
*/
if (adml != 0) { childlanes[0] = node2lanes(rule, depth - 1, pptr->key.x[0]); adml &= childlanes[0]; }
if (adml != 0) { childlanes[2] = node2lanes(rule, depth - 1, pptr->key.x[1]); adml &= childlanes[2]; }
if (adml != 0) { childlanes[6] = node2lanes(rule, depth - 1, pptr->key.x[2]); adml &= childlanes[6]; }
if (adml != 0) { childlanes[8] = node2lanes(rule, depth - 1, pptr->key.x[3]); adml &= childlanes[8]; }
if (adml == 0) { pptr->value.lanes = ((rule + 1) << 16); return 0; }
if (depth == 1) {
auto pptr_tl = ind2ptr_leaf(pptr->key.x[0]);
auto pptr_tr = ind2ptr_leaf(pptr->key.x[1]);
auto pptr_bl = ind2ptr_leaf(pptr->key.x[2]);
auto pptr_br = ind2ptr_leaf(pptr->key.x[3]);
nicearray<uint64_t, 4> cc = {pptr_tl->key.x[3], pptr_tr->key.x[2], pptr_bl->key.x[1], pptr_br->key.x[0]};
nicearray<uint64_t, 4> tc = {pptr_tl->key.x[1], pptr_tr->key.x[0], pptr_tl->key.x[3], pptr_tr->key.x[2]};
nicearray<uint64_t, 4> bc = {pptr_bl->key.x[1], pptr_br->key.x[0], pptr_bl->key.x[3], pptr_br->key.x[2]};
nicearray<uint64_t, 4> cl = {pptr_tl->key.x[2], pptr_tl->key.x[3], pptr_bl->key.x[0], pptr_bl->key.x[1]};
nicearray<uint64_t, 4> cr = {pptr_tr->key.x[2], pptr_tr->key.x[3], pptr_br->key.x[0], pptr_br->key.x[1]};
childlanes[1] = node2lanes(rule, depth - 1, this->make_leaf(tc));
childlanes[3] = node2lanes(rule, depth - 1, this->make_leaf(cl));
childlanes[4] = node2lanes(rule, depth - 1, this->make_leaf(cc));
childlanes[5] = node2lanes(rule, depth - 1, this->make_leaf(cr));
childlanes[7] = node2lanes(rule, depth - 1, this->make_leaf(bc));
} else {
auto pptr_tl = ind2ptr_nonleaf(depth - 1, pptr->key.x[0]);
auto pptr_tr = ind2ptr_nonleaf(depth - 1, pptr->key.x[1]);
auto pptr_bl = ind2ptr_nonleaf(depth - 1, pptr->key.x[2]);
auto pptr_br = ind2ptr_nonleaf(depth - 1, pptr->key.x[3]);
nicearray<I, 4> cc = {pptr_tl->key.x[3], pptr_tr->key.x[2], pptr_bl->key.x[1], pptr_br->key.x[0]};
nicearray<I, 4> tc = {pptr_tl->key.x[1], pptr_tr->key.x[0], pptr_tl->key.x[3], pptr_tr->key.x[2]};
nicearray<I, 4> bc = {pptr_bl->key.x[1], pptr_br->key.x[0], pptr_bl->key.x[3], pptr_br->key.x[2]};
nicearray<I, 4> cl = {pptr_tl->key.x[2], pptr_tl->key.x[3], pptr_bl->key.x[0], pptr_bl->key.x[1]};
nicearray<I, 4> cr = {pptr_tr->key.x[2], pptr_tr->key.x[3], pptr_br->key.x[0], pptr_br->key.x[1]};
childlanes[1] = node2lanes(rule, depth - 1, make_nonleaf(depth - 1, tc));
childlanes[3] = node2lanes(rule, depth - 1, make_nonleaf(depth - 1, cl));
childlanes[4] = node2lanes(rule, depth - 1, make_nonleaf(depth - 1, cc));
childlanes[5] = node2lanes(rule, depth - 1, make_nonleaf(depth - 1, cr));
childlanes[7] = node2lanes(rule, depth - 1, make_nonleaf(depth - 1, bc));
}
adml &= (childlanes[1] & childlanes[3] & childlanes[4] & childlanes[5] & childlanes[7]);
for (uint64_t i = 0; i < 9; i++) {
childlanes[i] >>= 32;
}
uint64_t lanes = 0;
#define ROTR32(X, Y) (((X) >> (Y)) | ((X) << (32 - (Y))))
#define ROTL32(X, Y) (((X) << (Y)) | ((X) >> (32 - (Y))))
/*
* Lane numbers are modulo 32, with each lane being either
* 8 rows, 8 columns, or 8hd (in either diagonal direction)
*/
uint64_t a = (depth < 6) ? (1 << (depth - 1)) : 0;
uint64_t a2 = (2 * a) & 31;
if (adml & 0x88) {
// Horizontal lanes
lanes |= ROTL32(childlanes[0] | childlanes[1] | childlanes[2], a);
lanes |= (childlanes[3] | childlanes[4] | childlanes[5]);
lanes |= ROTR32(childlanes[6] | childlanes[7] | childlanes[8], a);
}
if (adml & 0x44) {
lanes |= ROTL32(childlanes[0], a2);
lanes |= ROTL32(childlanes[3] | childlanes[1], a);
lanes |= (childlanes[6] | childlanes[4] | childlanes[2]);
lanes |= ROTR32(childlanes[7] | childlanes[5], a);
lanes |= ROTR32(childlanes[8], a2);
}
if (adml & 0x22) {
// Vertical lanes
lanes |= ROTL32(childlanes[0] | childlanes[3] | childlanes[6], a);
lanes |= (childlanes[1] | childlanes[4] | childlanes[7]);
lanes |= ROTR32(childlanes[2] | childlanes[5] | childlanes[8], a);
}
if (adml & 0x11) {
lanes |= ROTL32(childlanes[2], a2);
lanes |= ROTL32(childlanes[1] | childlanes[5], a);
lanes |= (childlanes[0] | childlanes[4] | childlanes[8]);
lanes |= ROTR32(childlanes[3] | childlanes[7], a);
lanes |= ROTR32(childlanes[6], a2);
}
pptr->value.lanes = adml | ((rule + 1) << 16) | (lanes << 32);
}
return pptr->value.lanes & 0xffffffff0000ffffull;
}
}
uint64_t is_solitonic(hypernode<I> hnode, int rule) {
uint64_t lanes1 = node2lanes(rule, hnode.depth, hnode.index);
uint64_t lanes2 = node2lanes(rule, hnode.depth, hnode.index2);
uint64_t commonlanes = (lanes1 & lanes2) >> 32;
if (commonlanes) { return 0; }
return ((((lanes1 >> 4) & lanes2) | ((lanes2 >> 4) & lanes1)) & 15);
}
hypernode<I> iterate_recurse(hypernode<I> hnode, uint64_t mantissa, uint64_t exponent,
int rule, int history) {
hypernode<I> part1(hnode.index, hnode.depth);
hypernode<I> part2(hnode.index2, hnode.depth);
if (is_solitonic(hnode, rule)) {
// BESZEL and ULQOMA tiles are provably non-interacting:
I i1 = iterate_recurse1(part1, mantissa, exponent, rule, history);
I i2 = iterate_recurse1(part2, mantissa, exponent, rule, history);
return hypernode<I>(i1, i2, hnode.depth - 1);
} else {
uint64_t hrule = (rule << 1) + (history & 1);
I gcdesc = ((1 + exponent) << 7) | (hrule << 3) | (mantissa - 1);
nicearray<I, 4> k = {hnode.index, hnode.index2, hnode.depth, gcdesc};
I p = biresults.getnode(k, false);
if (p == ((I) -1)) {
hypernode<I> res(0, 0, hnode.depth - 1);
if (hnode.depth == 1) {
hypernode<I> hnode2 = boolean_recurse(part1, part2, 1);
I i3 = iterate_recurse1(hnode2, mantissa, exponent, rule, history);
if (i3 != 0) {
uint64_t lanes = node2lanes(rule, hnode2.depth, hnode2.index);
if (lanes & 240) {
res.index2 = i3;
} else {
res.index = i3;
}
}
} else {
auto ch91 = ninechildren(part1);
auto ch92 = ninechildren(part2);
if (mantissa == 0) {
res.index = ch91.x[4];
res.index2 = ch92.x[4];
} else {
bool bothstages = (hnode.depth <= (1 + exponent));
uint64_t newmant = bothstages ? mantissa : 0;
for (uint64_t i = 0; i < 9; i++) {
auto fh = iterate_recurse(hypernode<I>(ch91.x[i], ch92.x[i], hnode.depth - 1), newmant, exponent, rule, history);
ch91.x[i] = fh.index; ch92.x[i] = fh.index2;
}
auto ch41 = fourchildren(part1, ch91);
auto ch42 = fourchildren(part2, ch92);
for (uint64_t i = 0; i < 4; i++) {
auto fh = iterate_recurse(hypernode<I>(ch41.x[i], ch42.x[i], hnode.depth - 1), mantissa, exponent, rule, history);
ch41.x[i] = fh.index; ch42.x[i] = fh.index2;
}
res.index = make_nonleaf(hnode.depth - 1, ch41);
res.index2 = make_nonleaf(hnode.depth - 1, ch42);
}
}
nicearray<I, 2> v = {res.index, res.index2};
p = biresults.setnode(k, v);
}
auto xptr = biresults.ind2ptr(p);
}
}
};
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment