Commit eb0b93ed authored by Jan Oliver Oelerich's avatar Jan Oliver Oelerich

Added alternative parallelization scheme and improved status output.

parent 73d2a561
......@@ -48,13 +48,15 @@ void Simulation::init() {
_io = std::make_shared<IO>();
if(mpi_env.isMpi())
output::print("[%s, %d] Hello from %s, MPI rank %d/%d, about to spawn %d threads.\n",
output::print("[%s, %d] Hello from %s (rank %d/%d), spawning %d threads.\n",
mpi_env.name(),
mpi_env.rank(),
mpi_env.name(),
mpi_env.rank(),
mpi_env.size(),
p.numberOfThreads());
else
output::print("This is a single-node calculation using %d threads.\n", p.numberOfThreads());
// Read and communicate the mean square displacements of the frozen phonon
// approximation.
......@@ -127,7 +129,30 @@ void Simulation::run() {
high_resolution_clock::time_point start_sim, t1;
start_sim = high_resolution_clock::now();
SimulationStateManager state_manager(_gridman);
// decide on parallelization scheme
// When the number of MPI processors is much smaller than the number of configurations,
// parallelize on configuration level.
int number_configurations_total = prms.numberOfConfigurations() * prms.numberOfDefoci();
bool parallelize_configurations = false;
if(mpi_env.isMpi() && 3 * mpi_env.size() < number_configurations_total) {
parallelize_configurations = true;
} else if(mpi_env.isMpi() &&
mpi_env.size() <= number_configurations_total &&
number_configurations_total % mpi_env.size() == 0) {
parallelize_configurations = true;
}
if(mpi_env.isMpi() && parallelize_configurations) {
printMaster(output::fmt("%d configurations are calculated in parallel with %d threads each.\n",
mpi_env.size(),
prms.numberOfThreads()));
} else {
printMaster(output::fmt("Pixels are distributed among %d procs with %d threads each.\n",
mpi_env.size(),
prms.numberOfThreads()));
}
SimulationStateManager state_manager(_gridman, parallelize_configurations);
// loop over simulations
for(SimulationState &st: state_manager) {
......@@ -153,7 +178,7 @@ void Simulation::run() {
for(shared_ptr<Slice> &slice: _gridman->slices())
printMaster(st,
output::fmt("Slice %d [%.3fnm - %.3fnm] contains %d atoms.\n",
output::fmt("Slice %d [%.3fnm - %.3fnm] %d atoms.\n",
slice->id(),
slice->z(),
(slice->z() + slice->thickness()),
......@@ -194,13 +219,13 @@ void Simulation::run() {
}
printLine(st,
output::fmt("Finished calculation of transmission functions in %s.\n",
output::fmt("Finished transmission functions in %s.\n",
output::humantime(algorithms::getTimeSince(t1))));
_io->generateTemporaryResultFile();
if(mpi_env.isMaster()) {
multisliceMaster(st);
if(mpi_env.isMaster() || parallelize_configurations) {
multisliceMaster(st, !parallelize_configurations);
} else {
multisliceWorker(st);
}
......@@ -211,6 +236,8 @@ void Simulation::run() {
auto start_io = high_resolution_clock::now();
// note that when configurations are run in parallel, it is crucial that rank 0 writes to the output
// NC file first, because otherwise the averaging of results will fail.
for(int rank = 0; rank < mpi_env.size(); rank++) {
if(rank == mpi_env.rank()) {
auto start = high_resolution_clock::now();
......@@ -391,13 +418,12 @@ void Simulation::printLine(const SimulationState &st, const string &line) {
auto &mpi_env = mpi::Environment::getInstance();
if(mpi_env.isSlave())
output::print("[%d/%d, %d/%d] [%s, %d] %s",
if(mpi_env.isMpi())
output::print("[%d/%d, %d/%d] [rank %d] %s",
st.idefocus() + 1,
p.numberOfDefoci(),
st.iconf() + 1,
p.numberOfConfigurations(),
mpi_env.name(),
mpi_env.rank(),
line);
else
......@@ -437,7 +463,7 @@ void Simulation::storeResultData(unsigned int idefocus, unsigned int iconf, cons
}
}
void Simulation::multisliceMaster(const SimulationState &st) {
void Simulation::multisliceMaster(const SimulationState &st, bool do_mpi_queue) {
Params &prms = Params::getInstance();
......@@ -446,9 +472,6 @@ void Simulation::multisliceMaster(const SimulationState &st) {
// initialize an MPI environment
auto &mpi_env = mpi::Environment::getInstance();
if(!mpi_env.isMaster())
output::error("This function cannot be called from an MPI slave!");
// This vector<float> holds all pixels/scan points of the STEM image to be calculated.
vector<ScanPoint> work_packages = _gridman->scanPoints();
......@@ -473,7 +496,7 @@ void Simulation::multisliceMaster(const SimulationState &st) {
// now, wait for incoming results and, upon receiving one, send a new work package out if
// there are any left
if(mpi_env.isMpi()) {
if(mpi_env.isMpi() && mpi_env.isMaster() && do_mpi_queue) {
mpi::Status s;
......@@ -499,15 +522,11 @@ void Simulation::multisliceMaster(const SimulationState &st) {
// progress output
float p = scan_work.progress();
if(p * 100 > last_prcnt) {
if(p * 100 > last_prcnt + 10) {
last_prcnt = (unsigned int) ceil(p * 100);
printLine(st,
output::fmt("progr: %d%%, eta/conf: %s, eta/defocus: %s, eta/sim: %s\n",
ceil(p * 100),
output::humantime(st.etaConf(p)),
output::humantime(st.etaDefocus(p)),
output::humantime(st.etaSimulation(p))));
output::fmt("progr: %d%%, eta/conf: %s\n", ceil(p * 100), output::humantime(st.etaConf(p))));
}
} while(!scan_work.finished());
......@@ -537,15 +556,11 @@ void Simulation::multisliceMaster(const SimulationState &st) {
float p = scan_work.progress();
if(p * 100 > last_prcnt) {
if(p * 100 > last_prcnt + 10) {
last_prcnt = (unsigned int) ceil(p * 100);
printLine(st,
output::fmt("progr: %d%%, eta/conf: %s, eta/defocus: %s, eta/sim: %s\n",
ceil(p * 100),
output::humantime(st.etaConf(p)),
output::humantime(st.etaDefocus(p)),
output::humantime(st.etaSimulation(p))));
output::fmt("progr: %d%%, eta/conf: %s\n", ceil(p * 100), output::humantime(st.etaConf(p))));
}
}
......
......@@ -112,7 +112,7 @@ namespace stemsalabim {
* result structures. However, additionally the master also carries out some multi-slice simulations
* when more than one thread is available.
*/
void multisliceMaster(const SimulationState &st);
void multisliceMaster(const SimulationState &st, bool do_mpi_queue);
/*!
* This is the part of the multi-slice simulations that is carried out by the MPI slaves,
......
#include <utility>
/*
* STEMsalabim: Magical STEM image simulations
*
......@@ -131,19 +133,33 @@ namespace stemsalabim {
/*!
* Constructor. Prepares a vector of all the SimulationState objects that the simulation will go through.
*/
explicit SimulationStateManager(const std::shared_ptr<GridManager> &gridman)
: _gridman(gridman) {
explicit SimulationStateManager(std::shared_ptr<GridManager> gridman, bool parallel)
: _gridman(std::move(gridman))
, _parallel(parallel) {
Params &prms = Params::getInstance();
if(_parallel) {
auto &mpi_env = mpi::Environment::getInstance();
_rank = mpi_env.rank();
_states.resize((unsigned long long)mpi_env.size());
} else {
_rank = 0;
_states.resize(1);
}
unsigned int rank_index = 0;
for(unsigned int idefocus = 0; idefocus < prms.numberOfDefoci(); idefocus++) {
for(unsigned int iconf = 0; iconf < prms.numberOfConfigurations(); iconf++) {
unsigned int iteration = idefocus * prms.numberOfConfigurations() + iconf;
_states.push_back(SimulationState(idefocus,
iconf,
prms.numberOfDefoci(),
prms.numberOfConfigurations(),
gridman->defoci()[idefocus],
iteration));
if(rank_index >= _states.size())
rank_index = 0;
_states[rank_index].push_back(SimulationState(idefocus,
iconf,
prms.numberOfDefoci(),
prms.numberOfConfigurations(),
_gridman->defoci()[idefocus],
iteration));
rank_index++;
}
}
}
......@@ -151,13 +167,13 @@ namespace stemsalabim {
typedef typename std::vector<SimulationState>::iterator iterator;
typedef typename std::vector<SimulationState>::const_iterator const_iterator;
iterator begin() { return _states.begin(); }
iterator begin() { return _states[_rank].begin(); }
const_iterator begin() const { return _states.begin(); }
const_iterator begin() const { return _states[_rank].begin(); }
iterator end() { return _states.end(); }
iterator end() { return _states[_rank].end(); }
const_iterator end() const { return _states.end(); }
const_iterator end() const { return _states[_rank].end(); }
private:
......@@ -165,7 +181,13 @@ namespace stemsalabim {
std::shared_ptr<GridManager> _gridman;
/// container of the SimulationState s
std::vector<SimulationState> _states;
std::vector<std::vector<SimulationState>> _states;
/// whether or not the configurations are calculated in parallel
bool _parallel{false};
/// The index (rank if parallel) in _states that is calculated on this proc.
int _rank;
};
}
......
......@@ -57,7 +57,7 @@ namespace stemsalabim {
p.cell()->initScattering();
SimulationStateManager state_manager(gridman);
SimulationStateManager state_manager(gridman, false);
// loop over simulations
for(SimulationState &st: state_manager) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment