Commit 6a228772 authored by Ondrej Mosnáček's avatar Ondrej Mosnáček

Optionally precompute Argon2i ref block positions

parent 336b0b79
......@@ -19,10 +19,14 @@ private:
std::uint32_t passes, lanes, segmentBlocks;
std::uint32_t batchSize;
bool bySegment;
bool precompute;
cudaEvent_t start, end;
cudaStream_t stream;
void *memory;
void *refs;
void precomputeRefs();
void runKernelSegment(std::uint32_t blockSize,
std::uint32_t pass, std::uint32_t slice);
......@@ -45,7 +49,7 @@ public:
Argon2KernelRunner(std::uint32_t type, std::uint32_t version,
std::uint32_t passes, std::uint32_t lanes,
std::uint32_t segmentBlocks, std::uint32_t batchSize,
bool bySegment);
bool bySegment, bool precompute);
~Argon2KernelRunner();
void run(std::uint32_t blockSize);
......
......@@ -59,7 +59,7 @@ public:
ProcessingUnit(
const ProgramContext *programContext, const Argon2Params *params,
const Device *device, std::size_t batchSize,
bool bySegment = true);
bool bySegment = true, bool precomputeRefs = false);
void beginProcessing();
void endProcessing();
......
......@@ -69,7 +69,7 @@ public:
ProcessingUnit(
const ProgramContext *programContext, const Argon2Params *params,
const Device *device, std::size_t batchSize,
bool bySegment = true);
bool bySegment = true, bool precomputeRefs = false);
void beginProcessing();
void endProcessing();
......
This diff is collapsed.
......@@ -13,12 +13,13 @@ namespace cuda {
ProcessingUnit::ProcessingUnit(
const ProgramContext *programContext, const Argon2Params *params,
const Device *device, std::size_t batchSize, bool bySegment)
const Device *device, std::size_t batchSize, bool bySegment,
bool precomputeRefs)
: programContext(programContext), params(params), device(device),
runner(programContext->getArgon2Type(),
programContext->getArgon2Version(), params->getTimeCost(),
params->getLanes(), params->getSegmentBlocks(), batchSize,
bySegment),
bySegment, precomputeRefs),
bestBlockSize(1)
{
CudaException::check(cudaSetDevice(device->getDeviceIndex()));
......
......@@ -9,10 +9,11 @@ namespace opencl {
ProcessingUnit::ProcessingUnit(
const ProgramContext *programContext, const Argon2Params *params,
const Device *device, std::size_t batchSize,
bool bySegment)
bool bySegment, bool precomputeRefs)
: programContext(programContext), params(params),
device(device), batchSize(batchSize), bySegment(bySegment)
{
// TODO: implement precomputeRefs
// FIXME: check memSize out of bounds
auto &clContext = programContext->getContext();
auto lanes = params->getLanes();
......
......@@ -23,7 +23,7 @@ private:
argon2::Version version;
std::size_t t_cost, m_cost, lanes;
std::size_t batchSize, samples;
bool bySegment;
bool bySegment, precomputeRefs;
std::string outputMode, outputType;
bool beVerbose;
......@@ -36,18 +36,19 @@ public:
std::size_t getLanes() const { return lanes; }
std::size_t getBatchSize() const { return batchSize; }
bool isBySegment() const { return bySegment; }
bool isPrecomputeRefs() const { return precomputeRefs; }
bool isVerbose() const { return beVerbose; }
BenchmarkDirector(const std::string &progname,
argon2::Type type, argon2::Version version,
std::size_t t_cost, std::size_t m_cost, std::size_t lanes,
std::size_t batchSize, bool bySegment,
std::size_t samples,
bool precomputeRefs, std::size_t samples,
const std::string &outputMode,
const std::string &outputType)
: progname(progname), type(type), version(version),
t_cost(t_cost), m_cost(m_cost), lanes(lanes),
batchSize(batchSize), samples(samples), bySegment(bySegment),
t_cost(t_cost), m_cost(m_cost), lanes(lanes), batchSize(batchSize),
samples(samples), bySegment(bySegment), precomputeRefs(precomputeRefs),
outputMode(outputMode), outputType(outputType),
beVerbose(outputMode == "verbose")
{
......
......@@ -20,7 +20,7 @@ public:
director.getTimeCost(), director.getMemoryCost(),
director.getLanes()),
unit(&pc, &params, &device, director.getBatchSize(),
director.isBySegment())
director.isBySegment(), director.isPrecomputeRefs())
{
}
......
......@@ -28,6 +28,7 @@ struct Arguments
std::size_t lanes = 1;
std::size_t batchSize = 16;
std::string kernelType = "by-segment";
bool precomputeRefs = false;
std::size_t sampleCount = 10;
};
......@@ -86,6 +87,9 @@ static CommandLineParser<Arguments> buildCmdLineParser()
new ArgumentOption<Arguments>(
[] (Arguments &state, const std::string &type) { state.kernelType = type; },
"kernel-type", 'k', "Kernel type (by-segment|oneshot)", "by-segment", "TYPE"),
new FlagOption<Arguments>(
[] (Arguments &state) { state.precomputeRefs = true; },
"precompute-refs", 'p', "precompute reference indices with Argon2i"),
new FlagOption<Arguments>(
[] (Arguments &state) { state.showHelp = true; },
......@@ -142,8 +146,8 @@ int main(int, const char * const *argv)
}
BenchmarkDirector director(argv[0], type, version,
args.t_cost, args.m_cost, args.lanes,
args.batchSize, bySegment, args.sampleCount,
args.t_cost, args.m_cost, args.lanes, args.batchSize,
bySegment, args.precomputeRefs, args.sampleCount,
args.outputMode, args.outputType);
if (args.mode == "opencl") {
OpenCLExecutive exec(args.deviceIndex, args.listDevices);
......
......@@ -20,7 +20,7 @@ public:
director.getTimeCost(), director.getMemoryCost(),
director.getLanes()),
unit(&pc, &params, &device, director.getBatchSize(),
director.isBySegment())
director.isBySegment(), director.isPrecomputeRefs())
{
}
......
#include "argon2-gpu-common/argon2params.h"
#include <iostream>
#include <array>
#include <cstdint>
#include <cstring>
......@@ -79,29 +80,40 @@ std::size_t runTests(const GlobalContext &global, const Device &device,
std::size_t failures = 0;
ProgramContext progCtx(&global, { device }, type, version);
for (auto bySegment : {true, false}) {
for (auto tc = casesFrom; tc < casesTo; ++tc) {
std::cout << " " << (bySegment ? "[by-segment] " : "[oneshot] ");
tc->dump(std::cout);
std::cout << "... ";
auto &params = tc->getParams();
ProcessingUnit pu(&progCtx, &params, &device, 1, bySegment);
const std::array<bool, 2> precomputeOpts = { false, true };
auto precBegin = precomputeOpts.begin();
auto precEnd = precomputeOpts.end();
if (type != ARGON2_I) {
precEnd--;
}
for (auto precIt = precBegin; precIt != precEnd; precIt++) {
for (auto tc = casesFrom; tc < casesTo; ++tc) {
bool precompute = *precIt;
std::cout << " "
<< (bySegment ? "[by-segment] " : "[oneshot] ")
<< (precompute ? "[precompute] " : "[in-place] ");
tc->dump(std::cout);
std::cout << "... ";
{
typename ProcessingUnit::PasswordWriter writer(pu);
writer.setPassword(tc->getInput(), tc->getInputLength());
}
pu.beginProcessing();
pu.endProcessing();
auto &params = tc->getParams();
ProcessingUnit pu(&progCtx, &params, &device, 1, bySegment,
precompute);
{
typename ProcessingUnit::PasswordWriter writer(pu);
writer.setPassword(tc->getInput(), tc->getInputLength());
}
pu.beginProcessing();
pu.endProcessing();
typename ProcessingUnit::HashReader hash(pu);
bool res = std::memcmp(tc->getOutput(), hash.getHash(),
params.getOutputLength()) == 0;
if (!res) {
++failures;
std::cout << "FAIL" << std::endl;
} else {
std::cout << "PASS" << std::endl;
typename ProcessingUnit::HashReader hash(pu);
bool res = std::memcmp(tc->getOutput(), hash.getHash(),
params.getOutputLength()) == 0;
if (!res) {
++failures;
std::cout << "FAIL" << std::endl;
} else {
std::cout << "PASS" << std::endl;
}
}
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment