Commit 4e831fc0 authored by hjhornbeck's avatar hjhornbeck

- Added the Boost variant of TOMS748, which passes the quality checks.

Surprisingly, it now runs slightly slower than my code, even for -v 2.
- Since the changes might effect LinearLUTsampler, I verified that too
is working A-OK.
parent a1be839e
......@@ -519,8 +519,22 @@ class MyBisectionSampler : public GPUsampler {
fp* sample( FineTiming* ft = nullptr ) const;
};
// TOMS748. Usually quicker than bisection. Adapted from Boost's implementation
class TOMS748sampler : public GPUsampler {
// TOMS748. Usually quicker than bisection.
class TOMS748sampler : public Sampler {
// the registration trigger
static bool registered;
public:
uint id() const;
string name() const;
bool setup( const Parameters& params, FineTiming* ft = nullptr );
fp* sample( FineTiming* ft = nullptr ) const;
};
// now the one adapted from Boost's implementation
class MyTOMS748sampler : public GPUsampler {
// the registration trigger
static bool registered;
......
......@@ -112,7 +112,7 @@ bool LinearLUTsamplerV2::setup( const Parameters& p, FineTiming* ft ) {
fp* output;
// hijack another sampler to generate the table
TOMS748sampler generator;
MyTOMS748sampler generator;
Parameters genP = params;
genP.samples = params.binSize;
......
......@@ -23,6 +23,7 @@ REGISTER(BisectionSampler)
REGISTER(MyBisectionSampler)
REGISTER(TOMS748sampler)
REGISTER(MyTOMS748sampler)
/**************************************************************************
......@@ -46,7 +47,7 @@ struct bisection_boost : public cpu_functor {
const fp epsilon;
bisection_boost(const uint a, fp* b, const uint c, fp* d, fp e) :
bisection_boost(const uint a, fp* b, const uint c, fp* d, const fp e) :
cpu_functor(a, b, c, d), epsilon{e} {}
void operator()(const uint offset, const uint stride) const;
......@@ -93,6 +94,19 @@ struct toms748 : public unified_functor {
HEMI_DEV_CALLABLE fp sample( const fp target, const fp CDFmax, const fp scalar ) const;
};
struct toms748_boost : public cpu_functor {
const fp epsilon;
const uint max_iter;
toms748_boost(const uint a, fp* b, const uint c, fp* d, const fp e, const uint f) :
cpu_functor(a, b, c, d), epsilon{e}, max_iter{f} {}
void operator()(const uint offset, const uint stride) const;
};
// now fill out those functors
HEMI_DEV_CALLABLE void bisection_uni::operator()(const uint o, const uint s) const {
......@@ -147,13 +161,13 @@ HEMI_DEV_CALLABLE void bisection_uni::operator()(const uint o, const uint s) con
void bisection_boost::operator()(const uint o, const uint stride) const {
// make this easier to access
LinearHG stored = { lhg_count, lhg_data };
const LinearHG stored = { lhg_count, lhg_data };
// set up the scalar
fp CDFmax = 0;
fp CDFmin = 0;
hengreen1Idata( stored, CDFmax, CDFmin );
fp scalar = CDFmax - CDFmin;
const fp scalar = CDFmax - CDFmin;
// set up the required precision
int digits = -log(epsilon)/log(2.);
......@@ -169,7 +183,7 @@ void bisection_boost::operator()(const uint o, const uint stride) const {
return (CDFmax - hengreen1I( cos_x, stored )) - target;
},
-1., 1., tol );
(fp)-1., (fp)1., tol );
output[offset] = 0.5*(bracket.first + bracket.second);
offset += stride;
......@@ -445,6 +459,41 @@ HEMI_DEV_CALLABLE void toms748::operator()(const uint o, const uint s) const {
} // toms748
void toms748_boost::operator()(const uint o, const uint stride) const {
// make this easier to access
const LinearHG stored = { lhg_count, lhg_data };
// set up the scalar
fp CDFmax = 0;
fp CDFmin = 0;
hengreen1Idata( stored, CDFmax, CDFmin );
const fp scalar = CDFmax - CDFmin;
// set up the required precision
int digits = -log(epsilon)/log(2.);
eps_tolerance<fp> tol(digits);
uint offset = o;
while( offset < samples ) { // while we're within bounds
// for each sample, pick a target
fp target = output[offset] * scalar; // adjust this to the proper range
uintmax_t iter = max_iter;
std::pair<fp,fp> bracket = toms748_solve( [=](const fp cos_x) {
return (CDFmax - hengreen1I( cos_x, stored )) - target;
},
(fp)-1., (fp)1., tol, iter );
assert( iter > 0 ); // sanity check
output[offset] = 0.5*(bracket.first + bracket.second);
offset += stride;
}
} // toms748_boost
/**************************************************************************
* SIMPLE FUNCTIONS
......@@ -453,10 +502,12 @@ HEMI_DEV_CALLABLE void toms748::operator()(const uint o, const uint s) const {
uint BisectionSampler::id() const { return (3 << 24) | 1; }
uint MyBisectionSampler::id() const { return (3 << 24) | 2; }
uint TOMS748sampler::id() const { return (3 << 24) | 16; }
uint MyTOMS748sampler::id() const { return (3 << 24) | 17; }
string BisectionSampler::name() const { return "root-finding via bisection, Boost"; }
string MyBisectionSampler::name() const { return "root-finding via bisection"; }
string TOMS748sampler::name() const { return "root-finding via TOMS748"; }
string TOMS748sampler::name() const { return "root-finding via TOMS748, Boost"; }
string MyTOMS748sampler::name() const { return "root-finding via TOMS748"; }
......@@ -550,14 +601,14 @@ fp* MyBisectionSampler::sample( FineTiming* t ) const {
/**************************************************************************
* TOMS748sampler
* MyTOMS748sampler
*/
bool TOMS748sampler::setup( const Parameters& p, FineTiming* ft ) {
bool MyTOMS748sampler::setup( const Parameters& p, FineTiming* ft ) {
if( !registered ) {
cout << "ERROR: Why isn't TOMS748sampler registered?!" << endl;
cout << "ERROR: Why isn't MyTOMS748sampler registered?!" << endl;
return false;
}
......@@ -585,9 +636,9 @@ bool TOMS748sampler::setup( const Parameters& p, FineTiming* ft ) {
return true;
} // TOMS748sampler::setup
} // MyTOMS748sampler::setup
fp* TOMS748sampler::sample( FineTiming* t ) const {
fp* MyTOMS748sampler::sample( FineTiming* t ) const {
// some preliminary timing setup
ull startTime;
......@@ -635,10 +686,10 @@ fp* TOMS748sampler::sample( FineTiming* t ) const {
return output;
} // TOMS748sampler::sample
} // MyTOMS748sampler::sample
/**************************************************************************
* MyBisectionSampler
* BisectionSampler
*/
bool BisectionSampler::setup( const Parameters& p, FineTiming* ft ) {
......@@ -682,3 +733,47 @@ fp* BisectionSampler::sample( FineTiming* t ) const {
} // BisectionSampler::sample
/**************************************************************************
* TOMS748sampler
*/
bool TOMS748sampler::setup( const Parameters& p, FineTiming* ft ) {
if( !registered ) {
cout << "ERROR: Why isn't BisectionSampler registered?!" << endl;
return false;
}
// store some key variables
params = p;
return true;
} // TOMS748sampler::setup
fp* TOMS748sampler::sample( FineTiming* t ) const {
// some preliminary timing setup
ull startTime;
if( t != nullptr )
startTime = setTimer();
fp* output = nullptr;
if( !allocResults( &output, nullptr ) )
return nullptr;
if( t != nullptr )
incTimer( startTime, t->transferToDev );
toms748_boost calc( params.lhg.count, params.lhg.data, params.samples, output,
params.epsilon, params.maxIt );
launch( calc );
if( t != nullptr )
incTimer( startTime, t->computation );
return output;
} // TOMS748sampler::sample
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment