Commit 07455b05 authored by Adam P. Goucher's avatar Adam P. Goucher

Common subexpression elimination

parent c2ed21df
......@@ -9,28 +9,7 @@
__global__ void copyhashes_C1(uint64_cu *multiverse, uint32_cu *univec, uint64_cu *hashes, uint32_cu offset, bool initial) {
// Initialise memory:
uint32_cu hashnum = blockIdx.x + offset;
uint32_cu uniidx = univec[blockIdx.x];
uint64_cu b = 0x4000600000ull + hashnum;
/*
Memory map:
0x0000 -- 0x0003: header (lowest 19 bits = index; middle 2 bits = usize; upper 11 bits = gencount / 6)
0x0004 -- 0x01ff: flags for each of 127 tiles
0x0200 -- 0xffff: 127 tiles
*/
int usize = (multiverse[uniidx << 13] >> 19) & 3;
if (initial) { usize = 3; }
__syncthreads();
uint32_cu threadnum = (uniidx << 13) + threadIdx.x;
if (threadIdx.x) { b = 0; }
multiverse[threadnum] = b; b = 0;
#include "cphash_header.h"
// Copy SHA-256 hash:
if ((threadIdx.x >= 24) && (threadIdx.x < 40)) {
......@@ -68,47 +47,13 @@ __global__ void copyhashes_C1(uint64_cu *multiverse, uint32_cu *univec, uint64_c
ADVANCE_TILE_64(b, b, tmp1, tmp2)
#endif
// Save into multiverse:
multiverse[threadnum + 64] = b;
// Zero remaining array:
ZERO_SIX_TILES; ZERO_SIX_TILES; ZERO_SIX_TILES;
if (usize == 0) { return; } // 19 tiles
ZERO_SIX_TILES; ZERO_SIX_TILES; ZERO_SIX_TILES;
if (usize == 1) { return; } // 37 tiles
ZERO_SIX_TILES; ZERO_SIX_TILES; ZERO_SIX_TILES;
ZERO_SIX_TILES; ZERO_SIX_TILES; ZERO_SIX_TILES;
ZERO_SIX_TILES; ZERO_SIX_TILES; ZERO_SIX_TILES;
if (usize == 2) { return; } // 91 tiles
ZERO_SIX_TILES; ZERO_SIX_TILES; ZERO_SIX_TILES;
ZERO_SIX_TILES; ZERO_SIX_TILES; ZERO_SIX_TILES;
#include "cphash_footer.h"
}
__global__ void copyhashes_D2_p1(uint64_cu *multiverse, uint32_cu *univec, uint64_cu *hashes, uint32_cu offset, bool initial) {
// Initialise memory:
uint32_cu hashnum = blockIdx.x + offset;
uint32_cu uniidx = univec[blockIdx.x];
uint64_cu b = 0x4000600000ull + hashnum;
int usize = (multiverse[uniidx << 13] >> 19) & 3;
if (initial) { usize = 3; }
__syncthreads();
#include "cphash_header.h"
uint32_cu threadnum = (uniidx << 13) + threadIdx.x;
if (threadIdx.x) { b = 0; }
multiverse[threadnum] = b; b = 0;
// Copy SHA-256 hash:
if ((threadIdx.x >= 32) && (threadIdx.x < 48)) {
b = hashes[(hashnum << 2) + (threadIdx.x >> 2) - 8];
b = (b >> (16 * (threadIdx.x & 3)));
......@@ -120,45 +65,12 @@ __global__ void copyhashes_D2_p1(uint64_cu *multiverse, uint32_cu *univec, uint6
b = ((b & 0x00ff) << 32) | ((b & 0xff00) << 16);
}
// Save into multiverse:
multiverse[threadnum + 64] = b;
// Zero remaining array:
ZERO_SIX_TILES; ZERO_SIX_TILES; ZERO_SIX_TILES;
if (usize == 0) { return; } // 19 tiles
ZERO_SIX_TILES; ZERO_SIX_TILES; ZERO_SIX_TILES;
if (usize == 1) { return; } // 37 tiles
ZERO_SIX_TILES; ZERO_SIX_TILES; ZERO_SIX_TILES;
ZERO_SIX_TILES; ZERO_SIX_TILES; ZERO_SIX_TILES;
ZERO_SIX_TILES; ZERO_SIX_TILES; ZERO_SIX_TILES;
if (usize == 2) { return; } // 91 tiles
ZERO_SIX_TILES; ZERO_SIX_TILES; ZERO_SIX_TILES;
ZERO_SIX_TILES; ZERO_SIX_TILES; ZERO_SIX_TILES;
#include "cphash_footer.h"
}
__global__ void copyhashes_D2_p2(uint64_cu *multiverse, uint32_cu *univec, uint64_cu *hashes, uint32_cu offset, bool initial) {
// Initialise memory:
uint32_cu hashnum = blockIdx.x + offset;
uint32_cu uniidx = univec[blockIdx.x];
uint64_cu b = 0x4000600000ull + hashnum;
int usize = (multiverse[uniidx << 13] >> 19) & 3;
if (initial) { usize = 3; }
__syncthreads();
uint32_cu threadnum = (uniidx << 13) + threadIdx.x;
if (threadIdx.x) { b = 0; }
multiverse[threadnum] = b; b = 0;
#include "cphash_header.h"
// Copy SHA-256 hash:
if ((threadIdx.x >= 32) && (threadIdx.x < 48)) {
......@@ -172,26 +84,7 @@ __global__ void copyhashes_D2_p2(uint64_cu *multiverse, uint32_cu *univec, uint6
b = ((b & 0x00ff) << 32) | ((b & 0xff00) << 16);
}
// Save into multiverse:
multiverse[threadnum + 64] = b;
// Zero remaining array:
ZERO_SIX_TILES; ZERO_SIX_TILES; ZERO_SIX_TILES;
if (usize == 0) { return; } // 19 tiles
ZERO_SIX_TILES; ZERO_SIX_TILES; ZERO_SIX_TILES;
if (usize == 1) { return; } // 37 tiles
ZERO_SIX_TILES; ZERO_SIX_TILES; ZERO_SIX_TILES;
ZERO_SIX_TILES; ZERO_SIX_TILES; ZERO_SIX_TILES;
ZERO_SIX_TILES; ZERO_SIX_TILES; ZERO_SIX_TILES;
if (usize == 2) { return; } // 91 tiles
ZERO_SIX_TILES; ZERO_SIX_TILES; ZERO_SIX_TILES;
ZERO_SIX_TILES; ZERO_SIX_TILES; ZERO_SIX_TILES;
#include "cphash_footer.h"
}
void copyhashes(std::string full_symmetry, int universes_left, uint64_cu *multiverse,
......
// Save into multiverse:
multiverse[threadnum + 64] = b;
// Zero remaining array:
ZERO_SIX_TILES; ZERO_SIX_TILES; ZERO_SIX_TILES;
if (usize == 0) { return; } // 19 tiles
ZERO_SIX_TILES; ZERO_SIX_TILES; ZERO_SIX_TILES;
if (usize == 1) { return; } // 37 tiles
ZERO_SIX_TILES; ZERO_SIX_TILES; ZERO_SIX_TILES;
ZERO_SIX_TILES; ZERO_SIX_TILES; ZERO_SIX_TILES;
ZERO_SIX_TILES; ZERO_SIX_TILES; ZERO_SIX_TILES;
if (usize == 2) { return; } // 91 tiles
ZERO_SIX_TILES; ZERO_SIX_TILES; ZERO_SIX_TILES;
ZERO_SIX_TILES; ZERO_SIX_TILES; ZERO_SIX_TILES;
// Initialise memory:
uint32_cu hashnum = blockIdx.x + offset;
uint32_cu uniidx = univec[blockIdx.x];
uint64_cu b = 0x4000600000ull + hashnum;
/*
Memory map:
0x0000 -- 0x0003: header (lowest 19 bits = index; middle 2 bits = usize; upper 11 bits = gencount / 6)
0x0004 -- 0x01ff: flags for each of 127 tiles
0x0200 -- 0xffff: 127 tiles
*/
int usize = (multiverse[uniidx << 13] >> 19) & 3;
if (initial) { usize = 3; }
__syncthreads();
uint32_cu threadnum = (uniidx << 13) + threadIdx.x;
if (threadIdx.x) { b = 0; }
multiverse[threadnum] = b; b = 0;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment