Commit d0b9b542 authored by Adam P. Goucher's avatar Adam P. Goucher

Higher-range outer-totalistic rules

parent 0866a3ce
#ifndef LIFELIB_VERSION /*
__version__=[x.replace('"', '') for x in '''
*/
#define LIFELIB_VERSION "ll2.0.16"
#define LIFELIB_VERSION "ll2.0.17"
// '''.split() if ('ll' in x)][0][2:]
#endif
......@@ -441,6 +441,85 @@ namespace apg {
: "xmm0", "xmm1", "xmm2", "memory" );
}
void hrot_ssse3(uint64_t *cc, uint64_t *nc, const uint8_t *ruledata) {
asm (
"pxor %%xmm2, %%xmm2 \n\t"
"movups 32(%2), %%xmm5 \n\t"
"movups 48(%2), %%xmm6 \n\t"
"movups (%0), %%xmm0 \n\t"
"movups (%1), %%xmm1 \n\t"
"movups (%2), %%xmm3 \n\t"
"movups 16(%2), %%xmm4 \n\t"
"pcmpeqb %%xmm2, %%xmm0 \n\t"
"movdqa %%xmm6, %%xmm7 \n\t"
"pshufb %%xmm1, %%xmm7 \n\t"
"psrld $3, %%xmm1 \n\t"
"pand %%xmm5, %%xmm1 \n\t"
"pshufb %%xmm1, %%xmm3 \n\t"
"pshufb %%xmm1, %%xmm4 \n\t"
"pand %%xmm3, %%xmm0 \n\t"
"pxor %%xmm4, %%xmm0 \n\t"
"pand %%xmm7, %%xmm0 \n\t"
"pcmpeqb %%xmm2, %%xmm0 \n\t"
"movups %%xmm0, (%1) \n\t"
"movups 16(%0), %%xmm0 \n\t"
"movups 16(%1), %%xmm1 \n\t"
"movups (%2), %%xmm3 \n\t"
"movups 16(%2), %%xmm4 \n\t"
"pcmpeqb %%xmm2, %%xmm0 \n\t"
"movdqa %%xmm6, %%xmm7 \n\t"
"pshufb %%xmm1, %%xmm7 \n\t"
"psrld $3, %%xmm1 \n\t"
"pand %%xmm5, %%xmm1 \n\t"
"pshufb %%xmm1, %%xmm3 \n\t"
"pshufb %%xmm1, %%xmm4 \n\t"
"pand %%xmm3, %%xmm0 \n\t"
"pxor %%xmm4, %%xmm0 \n\t"
"pand %%xmm7, %%xmm0 \n\t"
"pcmpeqb %%xmm2, %%xmm0 \n\t"
"movups %%xmm0, 16(%1) \n\t"
"movups 32(%0), %%xmm0 \n\t"
"movups 32(%1), %%xmm1 \n\t"
"movups (%2), %%xmm3 \n\t"
"movups 16(%2), %%xmm4 \n\t"
"pcmpeqb %%xmm2, %%xmm0 \n\t"
"movdqa %%xmm6, %%xmm7 \n\t"
"pshufb %%xmm1, %%xmm7 \n\t"
"psrld $3, %%xmm1 \n\t"
"pand %%xmm5, %%xmm1 \n\t"
"pshufb %%xmm1, %%xmm3 \n\t"
"pshufb %%xmm1, %%xmm4 \n\t"
"pand %%xmm3, %%xmm0 \n\t"
"pxor %%xmm4, %%xmm0 \n\t"
"pand %%xmm7, %%xmm0 \n\t"
"pcmpeqb %%xmm2, %%xmm0 \n\t"
"movups %%xmm0, 32(%1) \n\t"
"movups 48(%0), %%xmm0 \n\t"
"movups 48(%1), %%xmm1 \n\t"
"movups (%2), %%xmm3 \n\t"
"movups 16(%2), %%xmm4 \n\t"
"pcmpeqb %%xmm2, %%xmm0 \n\t"
"movdqa %%xmm6, %%xmm7 \n\t"
"pshufb %%xmm1, %%xmm7 \n\t"
"psrld $3, %%xmm1 \n\t"
"pand %%xmm5, %%xmm1 \n\t"
"pshufb %%xmm1, %%xmm3 \n\t"
"pshufb %%xmm1, %%xmm4 \n\t"
"pand %%xmm3, %%xmm0 \n\t"
"pxor %%xmm4, %%xmm0 \n\t"
"pand %%xmm7, %%xmm0 \n\t"
"pcmpeqb %%xmm2, %%xmm0 \n\t"
"movups %%xmm0, 48(%1) \n\t"
: /* no output operands -- implicitly volatile */
: "r" (cc), "r" (nc), "r" (ruledata)
: "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "memory" );
}
void ltl_kernel(uint64_t *inleaves, uint64_t *outleafx, int range, uint8_t bmin, uint8_t bmax, uint8_t smin, uint8_t smax) {
uint64_t lb1 = (bmin ^ smin);
......@@ -472,4 +551,22 @@ namespace apg {
bytes2bits(b, outleafx);
}
void hrot_kernel(uint64_t *inleaves, uint64_t *outleafx, int range, const uint8_t *ruledata) {
uint64_t a[128];
uint64_t b[32];
uint64_t c[32];
uint64_t d[4] = {inleaves[3], inleaves[6], inleaves[9], inleaves[12]};
bits2bytes_sse2(inleaves, a);
bits2bytes_sse2(inleaves+4, (a+32));
bits2bytes_sse2(inleaves+8, (a+64));
bits2bytes_sse2(inleaves+12, (a+96));
bits2bytes_sse2(d, c);
convolve2d(a, b, range);
hrot_ssse3(c, b, ruledata);
hrot_ssse3(c + 8, b + 8, ruledata);
hrot_ssse3(c + 16, b + 16, ruledata);
hrot_ssse3(c + 24, b + 24, ruledata);
bytes2bits(b, outleafx);
}
}
......@@ -12,3 +12,4 @@ genus_list.append({'name': 'ltl', 'regex': 'r[234567]b[1-9][0-9]*t[1-9][0-9]*s[1
genus_list.append({'name': 'gltl', 'regex': 'g[1-9][0-9]*r[234567]b[1-9][0-9]*t[1-9][0-9]*s[1-9][0-9]*t[1-9][0-9]*'})
genus_list.append({'name': 'isogeny', 'regex': 'g[1-9][0-9]*b[1-9ceaiknjqrytwz-]*s[0-9ceaiknjqrytwz-]*'})
genus_list.append({'name': 'bsfkl', 'regex': 'b1?2?3?4?5?6?7?8?s0?1?2?3?4?5?6?7?8?f0?1?2?3?4?5?6?7?8?k0?1?2?3?4?5?6?7?8?l0?1?2?3?4?5?6?7?8?'})
genus_list.append({'name': 'hrot', 'regex': 'r[2345]b[0-9a-f]*s[0-9a-f]*z?'})
from .ltl import mantissa, family, bitplanes
def get_hrot_params(rulestring):
rulerange = int(rulestring[1])
dhexchars = rulerange * (rulerange + 1)
gparams = rulestring[3:].replace('z', '').split('s')
if len(gparams[0]) != dhexchars:
raise ValueError("Birth conditions for range-%d rules must have exactly %d hex characters, not %d" % (rulerange, dhexchars, len(gparams[0])))
if len(gparams[1]) != dhexchars:
raise ValueError("Survival conditions for range-%d rules must have exactly %d hex characters, not %d" % (rulerange, dhexchars, len(gparams[1])))
gparams = tuple([rulerange] + [int(x, 16) for x in gparams])
return gparams
def create_rule(rulestring):
rulerange, b_int, s_int = get_hrot_params(rulestring)
with open('iterators_%s.h' % rulestring, 'w') as f:
f.write('#pragma once\n')
f.write('#include <stdint.h>\n')
f.write('#include "../ltl.h"\n')
f.write('namespace %s {\n\n' % rulestring.replace('-', '_'))
b_int = 2 * b_int
s_int = 4 * s_int + (2 if ('z' in rulestring) else 0)
births = [(b_int >> (8 * i)) & 255 for i in range(16)]
survivals = [(s_int >> (8 * i)) & 255 for i in range(16)]
marray = [b ^ s for (b, s) in zip(births, survivals)]
marray += [255 ^ s for s in survivals]
marray += ([15] * 16)
marray += [1, 2, 4, 8, 16, 32, 64, 128]
marray += [1, 2, 4, 8, 16, 32, 64, 128]
# print(marray)
f.write('const static uint8_t ruledata[] __attribute__((aligned(64))) = {%d,\n' % marray[0])
for i in range(3):
currstring = ' '
for j in range(i*21+1, i*21+22):
currstring += ((' %3d};' if (j == 63) else ' %3d,') % marray[j])
f.write(currstring + '\n')
f.write(' bool iterate_var_leaf(uint64_t *inleaves, uint64_t *outleaf) {\n\n')
f.write(' apg::hrot_kernel(inleaves, outleaf, %d, ruledata);\n' % rulerange)
f.write(' return false;\n')
f.write(' }\n')
f.write('}\n')
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment