Commit 6ea73704 authored by Jan Oliver Oelerich's avatar Jan Oliver Oelerich

Significantly improved performance by switching on choice of efficient FFT sizes. Stolen from GPAW.

parent 0d950403
......@@ -29,6 +29,10 @@ using namespace stemsalabim;
void GridManager::generateGrids() {
Params &p = Params::getInstance();
double density = Params::getInstance().samplingDensity();
_sampling_x = (unsigned int)algorithms::getFFTEfficientSize((int)ceil(density * _crystal->sizeX()));
_sampling_y = (unsigned int)algorithms::getFFTEfficientSize((int)ceil(density * _crystal->sizeY()));
generateKSpace();
double dz = p.sliceThickness();
......
......@@ -415,7 +415,8 @@ namespace stemsalabim {
* @return x grid sampling
*/
unsigned int samplingX() const {
return (unsigned int) algorithms::round_even(Params::getInstance().samplingDensity() * _crystal->sizeX());
return _sampling_x;
//return (unsigned int) algorithms::round_even(Params::getInstance().samplingDensity() * _crystal->sizeX());
}
/*!
......@@ -424,9 +425,19 @@ namespace stemsalabim {
* @return y grid sampling
*/
unsigned int samplingY() const {
return (unsigned int) algorithms::round_even(Params::getInstance().samplingDensity() * _crystal->sizeY());
return _sampling_y;
//return (unsigned int) algorithms::round_even(Params::getInstance().samplingDensity() * _crystal->sizeY());
}
float densityX() const {
return _sampling_x / (float)_crystal->sizeX();
}
float densityY() const {
return _sampling_y / (float)_crystal->sizeY();
}
/*!
* Return a wave object to multiply other waves with, that
* will bandwidth limit the wave if requested.
......@@ -640,6 +651,9 @@ namespace stemsalabim {
std::vector<double> _cbed_x_grid;
std::vector<double> _cbed_y_grid;
unsigned int _sampling_x;
unsigned int _sampling_y;
Wave _bandwidth_limit_mask;
};
......
......@@ -715,13 +715,14 @@ void Simulation::generatePropagator() {
unsigned int lx = _gridman->samplingX();
unsigned int ly = _gridman->samplingY();
double sdense2 = pow(1. / 3. * p.samplingDensity(), 2);
double sdense2_x = pow(1. / 3. * _gridman->densityX(), 2);
double sdense2_y = pow(1. / 3. * _gridman->densityY(), 2);
_propagator.init(lx, ly);
for(unsigned int ix = 0; ix < lx; ix++) {
for(unsigned int iy = 0; iy < ly; iy++) {
if(!p.bandwidthLimiting() || pow(_gridman->kx(ix), 2) + pow(_gridman->ky(iy), 2) < sdense2) {
if(!p.bandwidthLimiting() || pow(_gridman->kx(ix), 2) / sdense2_x + pow(_gridman->ky(iy), 2) / sdense2_y < 1) {
_propagator(ix, iy) = polar(1.0, scale * (float) (pow(_gridman->kx(ix), 2) + pow(_gridman->ky(iy), 2)));
} else {
_propagator(ix, iy) = 0.0;
......
......@@ -85,7 +85,7 @@ Slice::calculatePotential(const shared_ptr<FPConfManager> &fpman, const shared_p
// define a maximal radius above which an atomic potential is zero
// it is ok to have the same number for x and y, presumably, as the error
// is very small when the densities are not equal.
int pix_max = (int) ceil(p.maxPotentialRadius() * p.samplingDensity());
int pix_max = (int) ceil(p.maxPotentialRadius() * max(gridman->samplingX(), gridman->samplingY()));
// Scattering calculations
vector<vector<float>> vz(samples_x, vector<float>(samples_y, 0));
......
......@@ -94,14 +94,10 @@ namespace stemsalabim { namespace atomic {
}
}
private:
explicit Scattering(double cache_grating = 72, double cutoff = 3.0)
: _cutoff(cutoff)
, _grating(cache_grating) {
}
static double feKirklandReal(double r, const std::shared_ptr<Element> &element) {
if(r < 0.0018)
return feKirklandReal(0.0018, element);
std::vector<double> sp = element->scatteringParamsDT();
double pi2 = pi * pi;
double r2 = r * r;
......@@ -116,6 +112,13 @@ namespace stemsalabim { namespace atomic {
return sum * PREFACTOR_ATOMPOT;
}
private:
explicit Scattering(double cache_grating = 72, double cutoff = 3.0)
: _cutoff(cutoff)
, _grating(cache_grating) {
}
void populateCache(const std::shared_ptr<Element> &element) {
auto cache_len = (unsigned long)ceil(_grating * _cutoff);
//auto v = std::vector<double>(cache_len, 0);
......@@ -126,8 +129,7 @@ namespace stemsalabim { namespace atomic {
double factor = pi / (2 * _cutoff * 0.15);
double tapering_radius = 0.85 * _cutoff;
v[0] = feKirklandReal(0.0018, element);
for(unsigned int i = 1; i < cache_len; ++i) {
for(unsigned int i = 0; i < cache_len; ++i) {
double distance = i / _grating;
v[i] = feKirklandReal(distance, element);
......
......@@ -427,29 +427,31 @@ namespace stemsalabim {
return absf;
}
}
inline bool checkFFTEfficient(int n) {
if(n==1)
return true;
else {
if(n % 2 == 0)
return checkFFTEfficient(n / 2);
if(n % 3 == 0)
return checkFFTEfficient(n / 3);
if(n % 5 == 0)
return checkFFTEfficient(n / 5);
if(n % 7 == 0)
return checkFFTEfficient(n / 7);
inline bool checkFFTEfficient(int n) {
if(n==1)
return true;
else {
if(n % 2 == 0)
return checkFFTEfficient(n / 2);
if(n % 3 == 0)
return checkFFTEfficient(n / 3);
if(n % 5 == 0)
return checkFFTEfficient(n / 5);
if(n % 7 == 0)
return checkFFTEfficient(n / 7);
}
return false;
}
return false;
}
inline int getFFTEfficientSize(int N) {
while(!checkFFTEfficient(N)) {
N++;
inline int getFFTEfficientSize(int N) {
while(!checkFFTEfficient(N)) {
N++;
}
return N;
}
return N;
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment