Commit f424ae81 authored by Adam P. Goucher's avatar Adam P. Goucher

Disentangle instructions

parent 8e362a45
Pipeline #47880609 passed with stages
in 7 minutes and 39 seconds
......@@ -402,13 +402,13 @@ def write_all_iterators(f, hist, rules, families):
f.write('\n\n#endif\n\n')
def reset_tree(rule='b3s23'):
def reset_tree(rule='b3s23', throw_error=False):
try:
cwd = os.path.abspath(os.getcwd())
generate_code([rule], clean_before=True)
except:
if (rule != "b3s23"):
if throw_error:
raise
else:
import warnings
......
This diff is collapsed.
......@@ -44,7 +44,7 @@ def create_rule(rulestring):
class b3s23writer(iwriter_base):
def load_and_hshift(self, i, oddgen, terminal, from_reg=False):
def load_and_hshift(self, i, oddgen, terminal, from_reg=False, prepare=False):
d = '(%1)' if (oddgen) else '(%0)'
......@@ -67,6 +67,9 @@ class b3s23writer(iwriter_base):
self.printinstr('%s %s, %s' % (accessor, d, inreg))
if prepare:
self.preparethings(prepare)
if ('avx' in self.iset):
self.printinstr('vpsrld $1, %s, %s6' % (inreg, regname))
self.printinstr('vpslld $1, %s, %s1' % (inreg, regname))
......@@ -79,10 +82,8 @@ class b3s23writer(iwriter_base):
def horizontal_adders(self, i):
if ('avx512' in self.iset):
# self.trogicgate(0b01100110, 6, 1, 9 - 7 * (i % 2)) # XOR2
self.logicgate('pxord', 6, 1, 9 - 7 * (i % 2))
self.trogicgate(0b11101000, 6, 1, 7 - 7 * (i % 2), 12 - 7 * (i % 2)) # MAJ
# self.trogicgate(0b01100110, 7 - 7 * (i % 2), 9 - 7 * (i % 2), 11 - 7 * (i % 2)) # XOR3
self.logicgate('pxord', 6, 1, 9 - 7 * (i % 2))
self.logicgate('pxord', 7 - 7 * (i % 2), 9 - 7 * (i % 2), 11 - 7 * (i % 2))
else:
self.logicgate('pxor', 6, 1, 9 - 7 * (i % 2))
......@@ -178,14 +179,15 @@ class b3s23writer(iwriter_base):
self.write16n(64, 15, 0, '(%1)')
elif (diff == 'final'):
self.logicgate('pxord', 6, 8, 8)
self.printinstr('vshufi32x4 $78, %s, %s, %s' % ('%%zmm15', '%%zmm15', '%%zmm13'))
self.logicgate('pord', 13, 15, 15)
if (n == 48):
self.printinstr('vshufi32x4 $78, %s, %s, %s' % ('%%zmm15', '%%zmm15', '%%zmm14'))
self.printinstr('vshufi32x4 $78, %s, %s, %s' % ('%%zmm8', '%%zmm8', '%%zmm13'))
self.printinstr("vpord %%ymm8, %%ymm15, %%ymm15")
self.write16n(32, 15, 64, '(%1)')
self.printinstr('vpternlogd $254, %%zmm8, %%zmm14, %%zmm15')
self.write16n(16, 13, 96, '(%1)')
self.write16n(32, 15, 64, '(%1)')
else:
self.printinstr('vshufi32x4 $78, %s, %s, %s' % ('%%zmm15', '%%zmm15', '%%zmm13'))
self.logicgate('pord', 13, 15, 15)
self.write16n( n, 8, 64, '(%1)')
self.write16n(32, 15, 32, '(%1)')
else:
......@@ -247,9 +249,10 @@ class b3s23writer(iwriter_base):
def assemble(self, rulestring, oddgen, rowcount, dwidth, regtemp=False):
prepare = False
if (not regtemp) or (oddgen == 0):
self.prologue()
self.preparethings(dwidth)
prepare = dwidth
rpr = 16 if ('avx512' in self.iset) else (8 if ('avx2' in self.iset) else 4)
......@@ -260,7 +263,8 @@ class b3s23writer(iwriter_base):
if (i < riters):
terminal = max(0, ((i + 1) * rpr - rowcount) // 4)
self.load_and_hshift(i, oddgen, terminal, from_reg=(regtemp and oddgen))
self.load_and_hshift(i, oddgen, terminal, from_reg=(regtemp and oddgen), prepare=prepare)
prepare = False
self.horizontal_adders(i)
if (i > 0):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment