Commit c0d09db6 authored by Adam P. Goucher's avatar Adam P. Goucher

Potential for GPU searching of other lifelike rules

parent b8993ec3
......@@ -22,6 +22,52 @@ def create_rule(rulestring):
ix = iwriter(f, iset)
ix.genlogic(logstring)
with open('iterators_gpu.h', 'w') as f:
f.write('''#define ADVANCE_TILE_64(a, b, tmp, dst) { \\
uint64_cu al = ROTL64(a, 1); \\
uint64_cu ar = ROTR64(a, 1); \\
uint64_cu xor2 = al ^ ar; \\
(dst)[threadIdx.x] = xor2 ^ a; \\
__syncthreads(); \\
uint64_cu uda = (dst)[u] & (dst)[d]; \\
uint64_cu udx = (dst)[u] ^ (dst)[d]; \\
uint64_cu xmm8 = ((dst)[threadIdx.x] & udx) | uda; \\
uint64_cu xmm10 = (dst)[threadIdx.x] ^ udx; \\
(tmp)[threadIdx.x] = (al & ar) | (a & xor2); \\
__syncthreads(); \\
udx = (tmp)[u] ^ (tmp)[d]; \\
uda = (tmp)[u] & (tmp)[d]; \\
uint64_cu xmm9 = ((tmp)[threadIdx.x] & udx) | uda; \\
uint64_cu xmm11 = (tmp)[threadIdx.x] ^ udx; \\
xor2 = xmm8 & xmm11; \\
xmm8 ^= xmm11; \\\n''')
rchars, negate, beexor, essxor = rule2gates(rulestring)
usetopbit = (essxor or beexor)
regnames = ["xmm10", "xmm8", "xmm9", "a", "xor2", "udx"]
opnames = [" & ", " | ", " & ~", "nonsense", " ^ "]
if (usetopbit):
f.write(''' xmm11 = xor2 & xmm9; \\\n''')
if (essxor and not beexor):
f.write(''' xmm11 &= a; \\\n''')
if (beexor and not essxor):
f.write(''' xmm11 &= (~a); \\\n''')
f.write(''' xmm9 ^= xor2; \\\n''')
for i in range(0, len(rchars), 4):
f.write(' %s = %s %s %s; \\\n' % (regnames[rchars[i]], regnames[rchars[i+1]], opnames[rchars[i+3]], regnames[rchars[i+2]]))
if usetopbit:
f.write(''' xmm10 ^= xmm11; \\\n''')
if negate:
f.write(''' b = ~xmm10; \\\n}\n\n''')
else:
f.write(''' b = xmm10; \\\n}\n\n''')
with open('iterators_%s.h' % rulestring, 'w') as f:
f.write('#pragma once\n')
f.write('#include <stdint.h>\n')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment