Commit 36a5678d authored by Jan Oliver Oelerich's avatar Jan Oliver Oelerich

Made a bunch of improvements to make the calculatePixel function quicker.

parent 3d91e97a
......@@ -110,21 +110,45 @@ void GridManager::generateGrids() {
unsigned int every_nslices_cbed = p.cbedSaveEveryNSlices();
// determine which z coordinates (thicknesses) of slices are stored in the output file
if(every_nslices_adf == 0) {
_adf_slice_coords.push_back(_crystal->slices().back()->z());
} else {
for(unsigned long slice_counter = 1; slice_counter <= nslices; slice_counter++)
if(slice_counter % every_nslices_adf == 0 || slice_counter == nslices)
_adf_slice_coords.push_back(_crystal->slices()[slice_counter - 1]->z());
_adf_store_slice.resize(nslices);
_cbed_store_slice.resize(nslices);
for(const shared_ptr<Slice> &slice: _crystal->slices()) {
unsigned int snum = slice->id() + 1;
_adf_store_slice[slice->id()] = false;
_cbed_store_slice[slice->id()] = false;
if(snum == nslices || (p.adfSaveEveryNSlices() > 0 && snum % every_nslices_adf == 0)) {
_adf_slice_coords.push_back(slice->z());
_adf_store_slice[slice->id()] = true;
}
if(snum == nslices || (p.cbedSaveEveryNSlices() > 0 && snum % every_nslices_cbed == 0)) {
_cbed_slice_coords.push_back(slice->z());
_cbed_store_slice[slice->id()] = true;
}
}
// determine which z coordinates (thicknesses) of slices are stored in the output file
if(every_nslices_cbed == 0) {
_cbed_slice_coords.push_back(_crystal->slices().back()->z());
} else {
for(unsigned long slice_counter = 1; slice_counter <= nslices; slice_counter++)
if(slice_counter % every_nslices_cbed == 0 || slice_counter == nslices)
_cbed_slice_coords.push_back(_crystal->slices()[slice_counter - 1]->z());
// generate the indices for the ADF detector array bins.
// here, we iterate the k space and calculate the wave angles.
// Then, we determine the corresponding detector angle and sum the intensity up.
_adf_bin_index.resize(samplingX());
for(unsigned int ix = 0; ix < samplingX(); ix++) {
_adf_bin_index[ix].resize(samplingY());
for(unsigned int iy = 0; iy < samplingY(); iy++) {
double k2 = sqrt(pow(kx(ix), 2) + pow(ky(iy), 2)) * p.wavelength();
unsigned int index = algorithms::getIndexOfAdaptiveSpace(k2 * 1e3,
get<0>(p.adfDetectorAngles()),
get<1>(p.adfDetectorAngles()),
get<2>(p.adfDetectorAngles()),
p.adfDetectorIntervalExponent(),
true);
if(index >= 0 && index < (long) _detector_grid.size())
_adf_bin_index[ix][iy] = index;
else
_adf_bin_index[ix][iy] = -1;
}
}
// defocus grid
......
......@@ -447,6 +447,18 @@ namespace stemsalabim {
return _cbed_y_grid;
}
bool adfStoreSlice(unsigned int id) const {
return _adf_store_slice[id];
}
bool cbedStoreSlice(unsigned int id) const {
return _cbed_store_slice[id];
}
int adfBinIndex(unsigned int ix, unsigned int iy) const {
return _adf_bin_index[ix][iy];
}
/*!
* Get the defocus values that are simulated.
* @return vector of defoci (in Angstrom)
......@@ -692,6 +704,11 @@ namespace stemsalabim {
std::vector<double> _x_space;
std::vector<double> _y_space;
std::vector<bool> _adf_store_slice;
std::vector<bool> _cbed_store_slice;
std::vector<std::vector<int>> _adf_bin_index;
std::vector<double> _adf_x_grid;
std::vector<double> _adf_y_grid;
std::vector<double> _cbed_x_grid;
......
......@@ -308,9 +308,12 @@ void Simulation::calculatePixel(ScanPoint &point, const double defocus) {
vector<float> adf_intensities;
vector<float> cbed_intensities;
vector<float> detector_intensities;
if(point.adf)
if(point.adf) {
adf_intensities.reserve(_gridman->adfSliceCoords().size() * _gridman->adfDetectorGrid().size());
detector_intensities.resize(_gridman->adfDetectorGrid().size());
}
if(point.cbed)
cbed_intensities.reserve(_gridman->storedCbedSizeX() *
......@@ -324,9 +327,7 @@ void Simulation::calculatePixel(ScanPoint &point, const double defocus) {
makeProbe(wave, defocus, point.x, point.y);
// propagate through each slice and the following vacuum
unsigned int slice_counter = 0;
for(const shared_ptr<Slice> &slice: _crystal->slices()) {
slice_counter++;
slice->transmit(wave);
......@@ -342,34 +343,20 @@ void Simulation::calculatePixel(ScanPoint &point, const double defocus) {
if(p.normalizeAlways())
wave.normalize();
if(point.adf &&
(slice_counter == _crystal->numberOfSlices() ||
(p.adfSaveEveryNSlices() > 0 && slice_counter % p.adfSaveEveryNSlices() == 0))) {
if(point.adf && _gridman->adfStoreSlice(slice->id())) {
// calculate intensities for the various angles in a smart way
vector<float> detector_intensities(_gridman->adfDetectorGrid().size(), 0);
double k2;
double intens;
long ind;
std::fill(detector_intensities.begin(), detector_intensities.end(), 0);
// here, we iterate the k space and calculate the wave angles.
// Then, we determine the corresponding detector angle and sum the intensity up.
for(unsigned int ix = 0; ix < _gridman->samplingX(); ix++) {
for(unsigned int iy = 0; iy < _gridman->samplingY(); iy++) {
k2 = sqrt(pow(_gridman->kx(ix), 2) + pow(_gridman->ky(iy), 2)) * p.wavelength();
ind = algorithms::getIndexOfAdaptiveSpace(k2 * 1e3,
get<0>(p.adfDetectorAngles()),
get<1>(p.adfDetectorAngles()),
get<2>(p.adfDetectorAngles()),
p.adfDetectorIntervalExponent(),
true);
if(ind >= 0 && ind < (long) detector_intensities.size()) {
int ind = _gridman->adfBinIndex(ix, iy);
if(ind >= 0) {
// check for not a number
intens = (float) pow(abs(wave(ix, iy)), 2);
float intens = (float) pow(abs(wave(ix, iy)), 2);
if(!::isnan(intens))
detector_intensities[ind] += intens;
}
......@@ -381,9 +368,7 @@ void Simulation::calculatePixel(ScanPoint &point, const double defocus) {
}
}
if(point.cbed &&
(slice_counter == _crystal->numberOfSlices() ||
(p.cbedSaveEveryNSlices() > 0 && slice_counter % p.cbedSaveEveryNSlices() == 0))) {
if(point.cbed && _gridman->cbedStoreSlice(slice->id())) {
// first, we cut off high frequencies from the wave if necessary. Note, that we follow
// the FFTW storage scheme for frequencies, hence, half of the values are in the lower
......
......@@ -73,6 +73,7 @@ namespace stemsalabim { namespace {
double inv_scale = grating_atompot * grating_atompot / (length_atompot * length_atompot);
potential_cache.init((unsigned int) grating_atompot, (unsigned int) grating_atompot, std::complex<float>(1.0, 0.0));
potential_cache.setIsKSpace(true);
potential_cache.fftwQuickPlanning(true);
double kx, ky, k;
for(int ix = 0; ix < grating_atompot; ix++) {
......
......@@ -240,7 +240,7 @@ namespace stemsalabim { namespace {
init(lx, ly, std::complex<float>(0, 0));
}
fftwf_plan &getForwardPlan(unsigned int lx, unsigned int ly) {
const fftwf_plan &getForwardPlan(unsigned int lx, unsigned int ly, bool quick_planning) {
static std::map<std::tuple<unsigned int, unsigned int>, fftwf_plan> cache;
std::unique_lock<std::mutex> qlck(_s_init_mutex);
......@@ -258,12 +258,12 @@ namespace stemsalabim { namespace {
reinterpret_cast<fftwf_complex *>(&data[0]),
reinterpret_cast<fftwf_complex *>(&data[0]),
FFTW_BACKWARD,
FFTW_PATIENT);
quick_planning ? FFTW_ESTIMATE : FFTW_PATIENT);
return cache[key];
}
fftwf_plan &getBackwardPlan(unsigned int lx, unsigned int ly) {
const fftwf_plan &getBackwardPlan(unsigned int lx, unsigned int ly, bool quick_planning) {
static std::map<std::tuple<unsigned int, unsigned int>, fftwf_plan> cache;
......@@ -282,7 +282,7 @@ namespace stemsalabim { namespace {
reinterpret_cast<fftwf_complex *>(&data[0]),
reinterpret_cast<fftwf_complex *>(&data[0]),
FFTW_FORWARD,
FFTW_PATIENT);
quick_planning ? FFTW_ESTIMATE : FFTW_PATIENT);
return cache[key];
}
......@@ -313,7 +313,7 @@ namespace stemsalabim { namespace {
void forwardFFT() {
_is_kspace = true;
auto start = std::chrono::high_resolution_clock::now();
fftwf_execute_dft(getForwardPlan(_lx, _ly),
fftwf_execute_dft(getForwardPlan(_lx, _ly, _fftw_quick_planning),
reinterpret_cast<fftwf_complex *>(&_data[0]),
reinterpret_cast<fftwf_complex *>(&_data[0]));
auto elapsed = std::chrono::high_resolution_clock::now() - start;
......@@ -327,6 +327,9 @@ namespace stemsalabim { namespace {
_data.clear();
}
void fftwQuickPlanning(bool quick) {
_fftw_quick_planning = quick;
}
/*!
* Returns if the WF is initialized.
* @return true if initialized
......@@ -342,7 +345,7 @@ namespace stemsalabim { namespace {
_is_kspace = false;
auto start = std::chrono::high_resolution_clock::now();
fftwf_execute_dft(getBackwardPlan(_lx, _ly),
fftwf_execute_dft(getBackwardPlan(_lx, _ly, _fftw_quick_planning),
reinterpret_cast<fftwf_complex *>(&_data[0]),
reinterpret_cast<fftwf_complex *>(&_data[0]));
auto elapsed = std::chrono::high_resolution_clock::now() - start;
......@@ -404,6 +407,8 @@ namespace stemsalabim { namespace {
bool _is_kspace{false};
bool _fftw_quick_planning{false};
bool _initialized{false};
......
......@@ -215,7 +215,7 @@ namespace stemsalabim {
return adaptiveSpace(min, max, num_steps, 1.0, include_max);
}
inline long
inline int
getIndexOfAdaptiveSpace(double val, double min, double max, unsigned int num_steps, float exponent,
bool include_max) {
......@@ -229,9 +229,9 @@ namespace stemsalabim {
double last_value = (double) pow((num_steps - 1) * (max - min) / (num_steps - offset), exponent);
return (long) floor(pow((val - min) / (max - min) * last_value, 1. / exponent) /
(max - min) *
(num_steps - offset));
return (int) floor(pow((val - min) / (max - min) * last_value, 1. / exponent) /
(max - min) *
(num_steps - offset));
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment