Commit 32e4151c authored by Adam P. Goucher's avatar Adam P. Goucher 💬

Merge branch 'genera' into 'master'

Genera

See merge request !2
parents 17551f59 72dceea8
**/*.pyc
**/*.pyo
from ._version import __version__
#ifndef LIFELIB_VERSION /*
__version__=[x.replace('"', '') for x in '''
*/
#define LIFELIB_VERSION "ll1.4"
// '''.split() if ('ll' in x)][0]
#endif
......@@ -33,10 +33,8 @@
if (bis >= 9) {
apg::z64_to_r32_centre_avx(inleaf, d);
// displaycentre(d);
iterate_avx_16_12(d, e, 0, 0, 0, false);
iterate_avx_12_8(d+2, e+2, 0, 0, 0, false);
// displaycentre(d);
} else {
apg::z64_to_r32_centre_ssse3(inleaf, d);
iterate_sse2_16_12(d, e, 0, 0, 0, false);
......@@ -44,8 +42,42 @@
}
centres[0] = r32_centre_to_u64(d, 0, 0);
// std::cerr << inleaf[0] << " " << inleaf[1] << " " << inleaf[2] << " " << inleaf[3] << ": " << centres[0] << std::endl;
}
int iterate_var_32_28(uint32_t* d, uint32_t* diffs) {
uint32_t e[32];
int bis = apg::best_instruction_set();
if (bis >= 10) {
return iterate_avx2_32_28(d, e, 0, 0, diffs, false);
} else if (bis >= 9) {
return iterate_avx_32_28(d, e, 0, 0, diffs, false);
} else {
return iterate_sse2_32_28(d, e, 0, 0, diffs, false);
}
}
int iterate_var_32_28(uint32_t* d, uint32_t* h, uint32_t* diffs) {
uint32_t e[32];
int bis = apg::best_instruction_set();
if (bis >= 10) {
return iterate_avx2_32_28(d, e, h, 0, diffs, false);
} else if (bis >= 9) {
return iterate_avx_32_28(d, e, h, 0, diffs, false);
} else {
return iterate_sse2_32_28(d, e, h, 0, diffs, false);
}
}
int iterate_var_32_28(uint32_t* d, uint32_t* h, uint32_t* j, uint32_t* diffs) {
uint32_t e[32];
int bis = apg::best_instruction_set();
if (bis >= 10) {
return iterate_avx2_32_28(d, e, h, j, diffs, false);
} else if (bis >= 9) {
return iterate_avx_32_28(d, e, h, j, diffs, false);
} else {
return iterate_sse2_32_28(d, e, h, j, diffs, false);
}
}
bool iterate_var_leaf(int n, uint64_t * inleaves, uint64_t * outleaf) {
......
......@@ -8,6 +8,20 @@ namespace apg {
return -1;
}
int uli_get_family(int rule) {
switch (rule) {
case 0 : return 0;
}
return 0;
}
uint64_t uli_valid_mantissa(int rule) {
switch (rule) {
case 0 : return 511;
}
return 3;
}
int iterate_var_leaf(int rule, int n, uint64_t * inleaves, uint64_t * outleaf) {
switch(rule) {
case 0 :
......@@ -17,17 +31,9 @@ namespace apg {
}
int iterate_var_32_28(int rule, uint32_t* d, uint32_t * diffs) {
uint32_t e[32];
int bis = apg::best_instruction_set();
switch(rule) {
case 0 :
if (bis >= 10) {
return b3s23::iterate_avx2_32_28(d, e, 0, 0, diffs, false);
} else if (bis >= 9) {
return b3s23::iterate_avx_32_28(d, e, 0, 0, diffs, false);
} else {
return b3s23::iterate_sse2_32_28(d, e, 0, 0, diffs, false);
}
return b3s23::iterate_var_32_28(d, diffs);
}
return -1;
}
......@@ -41,17 +47,9 @@ namespace apg {
}
int iterate_var_32_28(int rule, uint32_t* d, uint32_t* h, uint32_t * diffs) {
uint32_t e[32];
int bis = apg::best_instruction_set();
switch(rule) {
case 0 :
if (bis >= 10) {
return b3s23::iterate_avx2_32_28(d, e, h, 0, diffs, false);
} else if (bis >= 9) {
return b3s23::iterate_avx_32_28(d, e, h, 0, diffs, false);
} else {
return b3s23::iterate_sse2_32_28(d, e, h, 0, diffs, false);
}
return b3s23::iterate_var_32_28(d, h, diffs);
}
return -1;
}
......@@ -65,35 +63,11 @@ namespace apg {
}
int iterate_var_32_28(int rule, uint32_t* d, uint32_t* h, uint32_t* j, uint32_t * diffs) {
uint32_t e[32];
int bis = apg::best_instruction_set();
switch(rule) {
case 0 :
if (bis >= 10) {
return b3s23::iterate_avx2_32_28(d, e, h, j, diffs, false);
} else if (bis >= 9) {
return b3s23::iterate_avx_32_28(d, e, h, j, diffs, false);
} else {
return b3s23::iterate_sse2_32_28(d, e, h, j, diffs, false);
}
return b3s23::iterate_var_32_28(d, h, j, diffs);
}
return -1;
}
int uli_get_family(int rule) {
switch (rule) {
case 0 :
return 0;
}
return 0;
}
uint64_t uli_valid_mantissa(int rule) {
switch (rule) {
case 0 :
return 511;
}
return 3;
}
}
#!/usr/bin/python
from sys import argv
rulestring = argv[1]
isotrans = {}
centre = 0
for c in rulestring.lower().replace('v', 'r'):
if c in '012345678':
lastloc = int(c)
isotrans[(centre, lastloc)] = "+";
elif (c == 'b'):
centre = 0
elif (c == 's'):
centre = 1
elif (c == '/'):
centre = 1 - centre
elif c in 'ceaiknjqrytwz-':
isotrans[(centre, lastloc)] += c
lord = "";
lord += "_ceaccaieaeaknja_ceaccaieaeaknjaekejanaairerririekejanaairerriri";
lord += "ccknncqnaijaqnwaccknncqnaijaqnwakykkqyqjrtjnzrqakykkqyqjrtjnzrqa";
lord += "ekirkyrtejerkkjnekirkyrtejerkkjnekejjkrnejecjyccekejjkrnejecjycc";
lord += "anriqyzraariqjqaanriqyzraariqjqajkjywkqkrnccqkncjkjywkqkrnccqknc";
lord += "cnkqccnnkqkqyykjcnkqccnnkqkqyykjaqjwinaarzjqtrnaaqjwinaarzjqtrna";
lord += "ccyyccyennkjyekeccyyccyennkjyekenykknejeirykrikenykknejeirykrike";
lord += "aqrznyirjwjqkkykaqrznyirjwjqkkykaqrqajiarqcnnkccaqrqajiarqcnnkcc";
lord += "intrneriaanajekeintrneriaanajekeajnkaeaeiaccaec_ajnkaeaeiaccaec_";
popcounts = [0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4]
lord2 = []
for i in xrange(512):
centre = (i >> 4) & 1
ncount = popcounts[i & 15] + popcounts[i >> 5]
if (centre, ncount) in isotrans:
fragment = isotrans[(centre, ncount)]
if (len(fragment) == 1):
lord2.append(1)
else:
l = 1 if (fragment[1] == '-') else 0
for c in fragment:
if (c == lord[i]):
l = 1 - l
lord2.append(l)
else:
lord2.append(0)
rule_letters = {}
rule_letters[1] = "ce" ;
rule_letters[2] = "ceaikn" ;
rule_letters[3] = "ceaiknjqry" ;
rule_letters[4] = "ceaiknjqrytwz" ;
rule_letters[5] = "ceaiknjqry" ;
rule_letters[6] = "ceaikn" ;
rule_letters[7] = "ce" ;
canonicals = [['' for i in xrange(9)] for j in xrange(2)]
canstring = ''
for i in xrange(512):
centre = (i >> 4) & 1
ncount = popcounts[i & 15] + popcounts[i >> 5]
if (lord2[i] == 1):
canonicals[centre][ncount] += lord[i]
for (centre, z) in enumerate('bs'):
canstring += z
for ncount in xrange(9):
goodies = set(canonicals[centre][ncount])
if (len(goodies) == 0):
continue
canstring += str(ncount)
if ncount not in rule_letters:
continue
a = ''.join(sorted([c for c in rule_letters[ncount] if c in goodies]))
b = '-' + (''.join(sorted([c for c in rule_letters[ncount] if c not in goodies])))
if (len(b) == 1):
continue
elif (len(b) < len(a)):
canstring += b
else:
canstring += a
if (rulestring != canstring):
print('\033[31;1mError:\033[0m %s is a non-canonical version of %s' % (rulestring, canstring))
print('Please re-run the code with the canonical rulestring:')
print('./recompile.sh --rule \033[36;1m%s\033[0m' % canstring)
exit(1)
print('Compressing 512-bit lookup table for rule %s...' % rulestring)
lut9 = [sum([(lord2[8*i+j] << j) for j in xrange(8)]) for i in xrange(64)]
print('Creating magic sauce for rule %s...' % rulestring)
def oi(i0, i1, i2, i4, i5, i6):
thisbyte = 0
for i3 in xrange(2):
for i7 in xrange(2):
for i8 in xrange(2):
thisindex = sum([(j << i) for (i, j) in enumerate([i0, i1, i2, i3, i4, i5, i6, i7, i8])])
thisbit = 1 - lord2[thisindex]
thisbyte |= (thisbit << (4 * i3 + 2 * i7 + i8))
return thisbyte
def ora(i4, i5):
return [oi(i0, i1, i2, i4, i5, i6) for i6 in xrange(2) for i1 in xrange(2) for i2 in xrange(2) for i0 in xrange(2)]
origarrays = [ora(i4, i5) for i5 in xrange(2) for i4 in xrange(2)]
maskarrays = [[0 for i in xrange(16)] for j in xrange(8)]
maskarrays[0] = [a ^ b for (a, b) in zip(origarrays[0], origarrays[2])]
maskarrays[1] = [a ^ b for (a, b) in zip(origarrays[1], origarrays[3])]
maskarrays[2] = origarrays[2]
maskarrays[3] = origarrays[3]
maskarrays[4] = [j for i in xrange(8) for j in [2 ** i, 0]]
maskarrays[5] = [j for i in xrange(8) for j in [0, 2 ** i]]
maskarrays[6] = [127] * 16
maskarrays[7] = [112] * 16
lut9 += [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]
lut9 += [255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0]
lut9 += [255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0]
with open('lifelogic/ma_%s.h' % rulestring, 'w') as f:
# Double every mask array for avx2 compatibility:
marray = [x for m in maskarrays for x in (m + m)]
f.write('// Two hundred and fifty-six seemingly arbitrary bytes\n')
f.write('const static uint8_t __magicsauce[] __attribute__((aligned(64))) = {%d,\n' % marray[0])
for i in xrange(15):
currstring = ' '
for j in xrange(i*17+1, i*17+18):
currstring += ((' %3d};' if (j == 255) else ' %3d,') % marray[j])
f.write(currstring + '\n')
f.write('\n// 512-bit lookup table with extra things appended\n')
f.write('const static uint8_t lut9[] __attribute__((aligned(64))) = {%d, %d, %d, %d,\n' % tuple(lut9[:4]))
for i in xrange(6):
currstring = ' '
for j in xrange(i*18+4, i*18+22):
currstring += ((' %3d};' if (j == 111) else ' %3d,') % lut9[j])
f.write(currstring + '\n')
print('...completed.')
#pragma once
#define LIFELIB_VERSION "ll1.31"
#include "_version.py"
#include "bitbounds.h"
#include <stdint.h>
......
import re
from importlib import import_module
from genuslist import genus_list
def obtain_genus(rulestring):
for g in genus_list:
m = re.match(g['regex'] + '$', rulestring)
if m is not None:
return g['name']
raise ValueError('Rule "%s" does not belong to any genus' % rulestring)
def genus_to_module(genus):
m = import_module('.' + genus, __name__)
return m
def rule_property(rulestring, attribute):
m = genus_to_module(obtain_genus(rulestring))
attr = getattr(m, attribute)
if callable(attr):
attr = attr(rulestring)
return attr
def create_rule(rulestring):
rule_property(rulestring, 'create_rule')
This diff is collapsed.
from _iwriter import iwriter
family = 2
mantissa = {0, 1}
def bitplanes(rulestring):
bplanes = int(rulestring[1:rulestring.index('b')])
bplanes = 2 if (bplanes == 3) else len(bin(bplanes - 3))
return bplanes
def create_rule(rulestring):
logstring = rulestring[rulestring.index('b'):]
for iset in [['sse2'], ['sse2', 'avx'], ['sse2', 'avx', 'avx2']]:
with open('ll_%s_%s.asm' % (iset[-1], logstring), 'w') as f:
ix = genewriter(f, iset)
ix.genlogic(logstring)
with open('iterators_%s.h' % rulestring, 'w') as f:
f.write('#pragma once\n')
f.write('#include <stdint.h>\n')
f.write('#include "../lifeconsts.h"\n')
f.write('#include "../lifeperm.h"\n')
f.write('#include "../eors.h"\n')
f.write('namespace %s {\n\n' % rulestring.replace('-', '_'))
for iset in [['sse2'], ['sse2', 'avx'], ['sse2', 'avx', 'avx2']]:
iw = genewriter(f, iset)
iw.write_function(rulestring, 20, 16)
iw.write_iterator()
gwrite_leaf_iterator(f, int(rulestring[1:rulestring.index('b')]))
f.write('}\n')
class genewriter(iwriter):
def write_function(self, rulestring, rowcount, dwidth):
name = 'iterate_%s_%d_%d' % (self.besti, rowcount, dwidth)
params = 'uint32_t * __restrict__ d, uint32_t * __restrict__ e, uint32_t * __restrict__ h, uint32_t * __restrict__ j'
self.f.write(' void %s(%s) {\n' % (name, params))
logstring = rulestring[rulestring.index('b'):]
self.assemble(logstring, 0, rowcount, dwidth)
self.f.write(' for (int i = 1; i < %d; i++) {\n' % (rowcount - 1))
self.f.write(' e[i-1] &= (~h[i]);\n')
self.f.write(' j[i] = d[i] & (~e[i-1]);\n')
self.f.write(' d[i] = e[i-1];\n')
self.f.write(' }\n')
self.f.write(' return;\n')
self.f.write(' }\n\n')
def write_iterator(self):
name = 'iterate_var_%s' % self.besti
self.f.write(' void %s(uint32_t * __restrict__ d, uint32_t * __restrict__ h) {\n' % name)
self.f.write(' uint32_t e[32];\n')
self.f.write(' uint32_t j[32];\n')
self.f.write(' iterate_%s_20_16(d+6, e+6, h+6, j+6);\n' % self.besti)
self.f.write(' for (int i = 8; i < 24; i++) { h[i] = j[i]; }\n')
self.f.write(' return;\n')
self.f.write(' }\n\n')
def gwli_bsi(f, bsi, msi):
f.write(' apg::z64_to_r32_%s(inleaves, d);\n' % bsi)
f.write(' apg::z64_to_r32_%s(hleaves, h);\n' % bsi)
f.write(' iterate_var_%s(d, h);\n' % bsi)
f.write(' apg::r32_centre_to_z64_%s(d, outleaf);\n' % msi)
f.write(' apg::r32_centre_to_z64_%s(h, outleaf2);\n' % msi)
def gwrite_leaf_iterator(f, nstates):
name = 'iterate_var_leaf'
params = 'uint64_t * inleaves, uint64_t * hleaves, uint64_t * outleaf'
f.write(' bool %s(%s) {\n' % (name, params))
f.write(' uint64_t outleaf2[4];')
f.write(' int bis = apg::best_instruction_set();\n')
f.write(' uint32_t d[32];\n')
f.write(' uint32_t h[32];\n')
f.write(' if (bis >= 10) {\n')
gwli_bsi(f, 'avx2', 'avx2')
f.write(' } else if (bis >= 9) {\n')
gwli_bsi(f, 'avx', 'avx')
f.write(' } else if (bis >= 7) {\n')
gwli_bsi(f, 'sse2', 'sse4')
f.write(' } else {\n')
gwli_bsi(f, 'sse2', 'ssse3')
f.write(' }\n')
br = '' if (nstates == 3) else (bin(nstates - 3)[2:])[::-1]
# We run 256 parallel binary counters in outleaf:
f.write(' for (int i = 0; i < 4; i++) {\n')
f.write(' uint64_t carry = outleaf[4+i];\n')
for i, c in enumerate(br):
f.write(' outleaf[%d+i] ^= carry;\n' % (4 * i + 8))
f.write(' carry &= outleaf[%d+i];\n' % (4 * i + 8))
if (c == '1'):
f.write(' outleaf[%d+i] |= outleaf2[i];\n' % (4 * i + 8))
# else:
# f.write(' outleaf[%d+i] &= outleaf2[i];\n' % (4 * i + 8))
f.write(' outleaf[4+i] ^= carry;\n')
for i, c in enumerate(br):
f.write(' outleaf[%d+i] ^= carry;\n' % (4 * i + 8))
f.write(' outleaf[4+i] |= outleaf2[i];\n')
f.write(' }\n')
f.write(' return false;\n')
f.write(' }\n\n')
# This file defines the genera accepted by lifelib. Earlier genera take
# precedence over later genera, so (for example) the rule b3s23 belongs
# to the genus 'lifelike' rather than 'isotropic'.
genus_list = []
genus_list.append({'name': 'lifelike', 'regex': 'b0?1?2?3?4?5?6?7?8?s0?1?2?3?4?5?6?7?8?'})
genus_list.append({'name': 'generations', 'regex': 'g[1-9][0-9]*b1?2?3?4?5?6?7?8?s0?1?2?3?4?5?6?7?8?'})
genus_list.append({'name': 'isotropic', 'regex': 'b[0-9ceaiknjqrytwz-]*s[0-9ceaiknjqrytwz-]*'})
genus_list.append({'name': 'ltl', 'regex': 'r[234567]b[1-9][0-9]*t[1-9][0-9]*s[1-9][0-9]*t[1-9][0-9]*'})
family = 4
bitplanes = 1
mantissa = {0, 1}
def create_rule(rulestring):
isotrope(rulestring)
logstring = rulestring[rulestring.index('b'):]
with open('iterators_%s.h' % rulestring, 'w') as f:
f.write('#pragma once\n')
f.write('#include <stdint.h>\n')
f.write('#include <cstdlib>\n')
f.write('#include <iostream>\n')
f.write('#include "../eors.h"\n')
f.write('#include "../lifeperm.h"\n')
f.write('namespace %s {\n\n' % rulestring.replace('-', '_'))
f.write('#include "ma_%s.h"\n\n' % rulestring)
f.write('#include "../isoluts.h"\n')
f.write('}\n')
def isotrope(rulestring):
isotrans = {}
centre = 0
for c in rulestring.lower().replace('v', 'r'):
if c in '012345678':
lastloc = int(c)
isotrans[(centre, lastloc)] = "+";
elif (c == 'b'):
centre = 0
elif (c == 's'):
centre = 1
elif (c == '/'):
centre = 1 - centre
elif c in 'ceaiknjqrytwz-':
isotrans[(centre, lastloc)] += c
lord = "";
lord += "_ceaccaieaeaknja_ceaccaieaeaknjaekejanaairerririekejanaairerriri";
lord += "ccknncqnaijaqnwaccknncqnaijaqnwakykkqyqjrtjnzrqakykkqyqjrtjnzrqa";
lord += "ekirkyrtejerkkjnekirkyrtejerkkjnekejjkrnejecjyccekejjkrnejecjycc";
lord += "anriqyzraariqjqaanriqyzraariqjqajkjywkqkrnccqkncjkjywkqkrnccqknc";
lord += "cnkqccnnkqkqyykjcnkqccnnkqkqyykjaqjwinaarzjqtrnaaqjwinaarzjqtrna";
lord += "ccyyccyennkjyekeccyyccyennkjyekenykknejeirykrikenykknejeirykrike";
lord += "aqrznyirjwjqkkykaqrznyirjwjqkkykaqrqajiarqcnnkccaqrqajiarqcnnkcc";
lord += "intrneriaanajekeintrneriaanajekeajnkaeaeiaccaec_ajnkaeaeiaccaec_";
popcounts = [0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4]
lord2 = []
for i in xrange(512):
centre = (i >> 4) & 1
ncount = popcounts[i & 15] + popcounts[i >> 5]
if (centre, ncount) in isotrans:
fragment = isotrans[(centre, ncount)]
if (len(fragment) == 1):
lord2.append(1)
else:
l = 1 if (fragment[1] == '-') else 0
for c in fragment:
if (c == lord[i]):
l = 1 - l
lord2.append(l)
else:
lord2.append(0)
rule_letters = {}
rule_letters[1] = "ce" ;
rule_letters[2] = "ceaikn" ;
rule_letters[3] = "ceaiknjqry" ;
rule_letters[4] = "ceaiknjqrytwz" ;
rule_letters[5] = "ceaiknjqry" ;
rule_letters[6] = "ceaikn" ;
rule_letters[7] = "ce" ;
canonicals = [['' for i in xrange(9)] for j in xrange(2)]
canstring = ''
for i in xrange(512):
centre = (i >> 4) & 1
ncount = popcounts[i & 15] + popcounts[i >> 5]
if (lord2[i] == 1):
canonicals[centre][ncount] += lord[i]
for (centre, z) in enumerate('bs'):
canstring += z
for ncount in xrange(9):
goodies = set(canonicals[centre][ncount])
if (len(goodies) == 0):
continue
canstring += str(ncount)
if ncount not in rule_letters:
continue
a = ''.join(sorted([c for c in rule_letters[ncount] if c in goodies]))
b = '-' + (''.join(sorted([c for c in rule_letters[ncount] if c not in goodies])))
if (len(b) == 1):
continue
elif (len(b) < len(a)):
canstring += b
else:
canstring += a
if (rulestring != canstring):
print('Please re-run the code with the canonical rulestring:')
print('./recompile.sh --rule \033[36;1m%s\033[0m' % canstring)
raise ValueError('Error: %s is a non-canonical version of %s' % (rulestring, canstring))
print('Compressing 512-bit lookup table for rule %s...' % rulestring)
lut9 = [sum([(lord2[8*i+j] << j) for j in xrange(8)]) for i in xrange(64)]
print('Creating magic sauce for rule %s...' % rulestring)
def oi(i0, i1, i2, i4, i5, i6):
thisbyte = 0
for i3 in xrange(2):
for i7 in xrange(2):
for i8 in xrange(2):
thisindex = sum([(j << i) for (i, j) in enumerate([i0, i1, i2, i3, i4, i5, i6, i7, i8])])
thisbit = 1 - lord2[thisindex]
thisbyte |= (thisbit << (4 * i3 + 2 * i7 + i8))
return thisbyte
def ora(i4, i5):
return [oi(i0, i1, i2, i4, i5, i6) for i6 in xrange(2) for i1 in xrange(2) for i2 in xrange(2) for i0 in xrange(2)]
origarrays = [ora(i4, i5) for i5 in xrange(2) for i4 in xrange(2)]
maskarrays = [[0 for i in xrange(16)] for j in xrange(8)]
maskarrays[0] = [a ^ b for (a, b) in zip(origarrays[0], origarrays[2])]
maskarrays[1] = [a ^ b for (a, b) in zip(origarrays[1], origarrays[3])]
maskarrays[2] = origarrays[2]
maskarrays[3] = origarrays[3]
maskarrays[4] = [j for i in xrange(8) for j in [2 ** i, 0]]
maskarrays[5] = [j for i in xrange(8) for j in [0, 2 ** i]]
maskarrays[6] = [127] * 16
maskarrays[7] = [112] * 16
lut9 += [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]
lut9 += [255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0]
lut9 += [255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0]
with open('ma_%s.h' % rulestring, 'w') as f:
# Double every mask array for avx2 compatibility:
marray = [x for m in maskarrays for x in (m + m)]
f.write('// Two hundred and fifty-six seemingly arbitrary bytes\n')
f.write('const static uint8_t __magicsauce[] __attribute__((aligned(64))) = {%d,\n' % marray[0])
for i in xrange(15):
currstring = ' '
for j in xrange(i*17+1, i*17+18):
currstring += ((' %3d};' if (j == 255) else ' %3d,') % marray[j])
f.write(currstring + '\n')
f.write('\n// 512-bit lookup table with extra things appended\n')
f.write('const static uint8_t lut9[] __attribute__((aligned(64))) = {%d, %d, %d, %d,\n' % tuple(lut9[:4]))
for i in xrange(6):
currstring = ' '
for j in xrange(i*18+4, i*18+22):
currstring += ((' %3d};' if (j == 111) else ' %3d,') % lut9[j])
f.write(currstring + '\n')
print('...completed.')
from _iwriter import iwriter
family = 0
bitplanes = 1
def mantissa(rulestring):
if 'b0' in rulestring:
return {0, 2, 4, 6, 8}
else:
return {0, 1, 2, 3, 4, 5, 6, 7, 8}
def create_rule(rulestring):
logstring = rulestring[rulestring.index('b'):]
for iset in [['sse2'], ['sse2', 'avx'], ['sse2', 'avx', 'avx2']]:
with open('ll_%s_%s.asm' % (iset[-1], logstring), 'w') as f:
ix = lifewriter(f, iset)
ix.genlogic(logstring)
with open('iterators_%s.h' % rulestring, 'w') as f:
f.write('#pragma once\n')
f.write('#include <stdint.h>\n')
f.write('#include "../lifeconsts.h"\n')
f.write('#include "../lifeperm.h"\n')
f.write('#include "../eors.h"\n')
f.write('namespace %s {\n\n' % rulestring.replace('-', '_'))
for iset in [['sse2'], ['sse2', 'avx'], ['sse2', 'avx', 'avx2']]:
iw = lifewriter(f, iset)
iw.write_function(rulestring, 32, 28)
iw.write_function(rulestring, 28, 24)
iw.write_function(rulestring, 24, 20)
iw.write_function(rulestring, 20, 16)
iw.write_function(rulestring, 16, 12)
iw.write_function(rulestring, 12, 8)
iw.write_iterator()
f.write('\n#include "../leaf_iterators.h"\n')
f.write('}\n')
class lifewriter(iwriter):