Commit 9aa96c2c authored by Jack Poulson's avatar Jack Poulson

Refactoring/extending implementation of inclusive/exclusive factorization/DPP timings.

parent 71dd5e44
......@@ -40,7 +40,30 @@ struct SupernodalDPPControl {
// The number of columns to group into a single task when multithreading
// the scalar structure formation.
Int sort_grain_size = 200;
// The minimum ratio of the amount of work in a subtree relative to the
// nominal amount of flops assigned to each thread (total_work / max_threads)
// before OpenMP subtasks are launched in the subtree.
double parallel_ratio_threshold = 0.02;
// The minimum number of flops in a subtree before OpenMP subtasks are
// generated.
double min_parallel_threshold = 1e5;
#endif // ifdef CATAMARI_OPENMP
#ifdef CATAMARI_ENABLE_TIMERS
// The max number of levels of the supernodal tree to visualize timings of.
Int max_timing_levels = 4;
// Whether isolated diagonal entries should have their timings visualized.
bool avoid_timing_isolated_roots = true;
// The name of the Graphviz file for the inclusive timing annotations.
std::string inclusive_timings_filename = "inclusive.gv";
// The name of the Graphviz file for the exclusive timing annotations.
std::string exclusive_timings_filename = "exclusive.gv";
#endif // ifdef CATAMARI_ENABLE_TIMERS
};
// The user-facing data structure for storing a supernodal LDL'-based DPP
......
......@@ -39,15 +39,10 @@ SupernodalDPP<Field>::SupernodalDPP(const CoordinateMatrix<Field>& matrix,
}
total_work_ =
std::accumulate(work_estimates_.begin(), work_estimates_.end(), 0.);
// TODO(Jack Poulson): Make these two parameters configurable.
const double kParallelRatioThreshold = 0.02;
const double kMinParallelThreshold = 1.e5;
const double min_parallel_ratio_work =
(total_work_ * kParallelRatioThreshold) / max_threads;
(total_work_ * control_.parallel_ratio_threshold) / max_threads;
min_parallel_work_ =
std::max(kMinParallelThreshold, min_parallel_ratio_work);
std::max(control_.min_parallel_threshold, min_parallel_ratio_work);
return;
}
......
......@@ -10,6 +10,8 @@
#include <algorithm>
#include "catamari/io_utils.hpp"
#include "catamari/dpp/supernodal_dpp.hpp"
namespace catamari {
......@@ -176,6 +178,9 @@ std::vector<Int> SupernodalDPP<Field>::LeftLookingSample(
supernodal_ldl::LeftLookingSharedState shared_state;
shared_state.rel_rows.Resize(num_supernodes);
shared_state.intersect_ptrs.Resize(num_supernodes);
#ifdef CATAMARI_ENABLE_TIMERS
shared_state.exclusive_timers.Resize(num_supernodes);
#endif // ifdef CATAMARI_ENABLE_TIMERS
std::random_device random_device;
PrivateState private_state;
......@@ -189,13 +194,22 @@ std::vector<Int> SupernodalDPP<Field>::LeftLookingSample(
// Note that any postordering of the supernodal elimination forest suffices.
for (Int supernode = 0; supernode < num_supernodes; ++supernode) {
CATAMARI_START_TIMER(shared_state.exclusive_timers[supernode]);
LeftLookingSupernodeUpdate(supernode, &shared_state, &private_state);
LeftLookingSupernodeSample(supernode, maximum_likelihood, &private_state,
&sample);
CATAMARI_STOP_TIMER(shared_state.exclusive_timers[supernode]);
}
std::sort(sample.begin(), sample.end());
#ifdef CATAMARI_ENABLE_TIMERS
TruncatedForestTimersToDot(
control_.exclusive_timings_filename, shared_state.exclusive_timers,
ordering_.assembly_forest, control_.max_timing_levels,
control_.avoid_timing_isolated_roots);
#endif // ifdef CATAMARI_ENABLE_TIMERS
return sample;
}
......
......@@ -11,6 +11,8 @@
#include <algorithm>
#include "catamari/io_utils.hpp"
#include "catamari/dpp/supernodal_dpp.hpp"
namespace catamari {
......@@ -237,6 +239,8 @@ void SupernodalDPP<Field>::LeftLookingSubtree(
const Int child_end = ordering_.assembly_forest.child_offsets[supernode + 1];
const Int num_children = child_end - child_beg;
CATAMARI_START_TIMER(shared_state->inclusive_timers[supernode]);
for (Int child_index = 0; child_index < num_children; ++child_index) {
const Int child =
ordering_.assembly_forest.children[child_beg + child_index];
......@@ -246,12 +250,17 @@ void SupernodalDPP<Field>::LeftLookingSubtree(
sample);
}
CATAMARI_START_TIMER(shared_state->exclusive_timers[supernode]);
LeftLookingSupernodeUpdate(supernode, shared_state, private_state);
std::vector<Int> subsample;
LeftLookingSupernodeSample(supernode, maximum_likelihood, private_state,
&subsample);
sample->insert(sample->end(), subsample.begin(), subsample.end());
CATAMARI_STOP_TIMER(shared_state->inclusive_timers[supernode]);
CATAMARI_STOP_TIMER(shared_state->exclusive_timers[supernode]);
}
template <class Field>
......@@ -271,6 +280,8 @@ void SupernodalDPP<Field>::OpenMPLeftLookingSubtree(
const Int child_end = ordering_.assembly_forest.child_offsets[supernode + 1];
const Int num_children = child_end - child_beg;
CATAMARI_START_TIMER(shared_state->inclusive_timers[supernode]);
// NOTE: We could alternatively avoid switch to maintaining a single, shared
// boolean list of length 'num_rows' which flags each entry as 'in' or
// 'out' of the sample.
......@@ -288,6 +299,8 @@ void SupernodalDPP<Field>::OpenMPLeftLookingSubtree(
private_states, subsample);
}
CATAMARI_START_TIMER(shared_state->exclusive_timers[supernode]);
// Merge the subsamples into the current sample.
for (const std::vector<Int>& subsample : subsamples) {
sample->insert(sample->end(), subsample.begin(), subsample.end());
......@@ -302,6 +315,9 @@ void SupernodalDPP<Field>::OpenMPLeftLookingSubtree(
private_states, &subsample);
sample->insert(sample->end(), subsample.begin(), subsample.end());
CATAMARI_STOP_TIMER(shared_state->inclusive_timers[supernode]);
CATAMARI_STOP_TIMER(shared_state->exclusive_timers[supernode]);
}
template <class Field>
......@@ -326,6 +342,10 @@ std::vector<Int> SupernodalDPP<Field>::OpenMPLeftLookingSample(
supernodal_ldl::LeftLookingSharedState shared_state;
shared_state.rel_rows.Resize(num_supernodes);
shared_state.intersect_ptrs.Resize(num_supernodes);
#ifdef CATAMARI_ENABLE_TIMERS
shared_state.inclusive_timers.Resize(num_supernodes);
shared_state.exclusive_timers.Resize(num_supernodes);
#endif // ifdef CATAMARI_ENABLE_TIMERS
std::random_device random_device;
Buffer<PrivateState> private_states(max_threads);
......@@ -377,6 +397,17 @@ std::vector<Int> SupernodalDPP<Field>::OpenMPLeftLookingSample(
std::sort(sample.begin(), sample.end());
#ifdef CATAMARI_ENABLE_TIMERS
TruncatedForestTimersToDot(
control_.inclusive_timings_filename, shared_state.inclusive_timers,
ordering_.assembly_forest, control_.max_timing_levels,
control_.avoid_timing_isolated_roots);
TruncatedForestTimersToDot(
control_.exclusive_timings_filename, shared_state.exclusive_timers,
ordering_.assembly_forest, control_.max_timing_levels,
control_.avoid_timing_isolated_roots);
#endif // ifdef CATAMARI_ENABLE_TIMERS
return sample;
}
......
......@@ -10,6 +10,8 @@
#include <algorithm>
#include "catamari/io_utils.hpp"
#include "catamari/dpp/supernodal_dpp.hpp"
namespace catamari {
......@@ -80,6 +82,8 @@ void SupernodalDPP<Field>::RightLookingSubtree(
const Int child_end = ordering_.assembly_forest.child_offsets[supernode + 1];
const Int num_children = child_end - child_beg;
CATAMARI_START_TIMER(shared_state->inclusive_timers[supernode]);
for (Int child_index = 0; child_index < num_children; ++child_index) {
const Int child =
ordering_.assembly_forest.children[child_beg + child_index];
......@@ -89,10 +93,15 @@ void SupernodalDPP<Field>::RightLookingSubtree(
sample);
}
CATAMARI_START_TIMER(shared_state->exclusive_timers[supernode]);
std::vector<Int> subsample;
RightLookingSupernodeSample(supernode, maximum_likelihood, shared_state,
private_state, &subsample);
sample->insert(sample->end(), subsample.begin(), subsample.end());
CATAMARI_STOP_TIMER(shared_state->inclusive_timers[supernode]);
CATAMARI_STOP_TIMER(shared_state->exclusive_timers[supernode]);
}
template <class Field>
......@@ -115,6 +124,10 @@ std::vector<Int> SupernodalDPP<Field>::RightLookingSample(
supernodal_ldl::RightLookingSharedState<Field> shared_state;
shared_state.schur_complement_buffers.Resize(num_supernodes);
shared_state.schur_complements.Resize(num_supernodes);
#ifdef CATAMARI_ENABLE_TIMERS
shared_state.inclusive_timers.Resize(num_supernodes);
shared_state.exclusive_timers.Resize(num_supernodes);
#endif // ifdef CATAMARI_ENABLE_TIMERS
// We only need the random number generator.
std::random_device random_device;
......@@ -129,6 +142,17 @@ std::vector<Int> SupernodalDPP<Field>::RightLookingSample(
std::sort(sample.begin(), sample.end());
#ifdef CATAMARI_ENABLE_TIMERS
TruncatedForestTimersToDot(
control_.inclusive_timings_filename, shared_state.inclusive_timers,
ordering_.assembly_forest, control_.max_timing_levels,
control_.avoid_timing_isolated_roots);
TruncatedForestTimersToDot(
control_.exclusive_timings_filename, shared_state.exclusive_timers,
ordering_.assembly_forest, control_.max_timing_levels,
control_.avoid_timing_isolated_roots);
#endif // ifdef CATAMARI_ENABLE_TIMERS
return sample;
}
......
......@@ -11,6 +11,8 @@
#include <algorithm>
#include "catamari/io_utils.hpp"
#include "catamari/dpp/supernodal_dpp.hpp"
namespace catamari {
......@@ -103,6 +105,8 @@ void SupernodalDPP<Field>::OpenMPRightLookingSubtree(
const Int child_end = ordering_.assembly_forest.child_offsets[supernode + 1];
const Int num_children = child_end - child_beg;
CATAMARI_START_TIMER(shared_state->inclusive_timers[supernode]);
// NOTE: We could alternatively avoid switch to maintaining a single, shared
// boolean list of length 'num_rows' which flags each entry as 'in' or
// 'out' of the sample.
......@@ -123,6 +127,8 @@ void SupernodalDPP<Field>::OpenMPRightLookingSubtree(
private_states, subsample);
}
CATAMARI_START_TIMER(shared_state->exclusive_timers[supernode]);
// Merge the subsamples into the current sample.
for (const std::vector<Int>& subsample : subsamples) {
sample->insert(sample->end(), subsample.begin(), subsample.end());
......@@ -133,6 +139,9 @@ void SupernodalDPP<Field>::OpenMPRightLookingSubtree(
OpenMPRightLookingSupernodeSample(supernode, maximum_likelihood, shared_state,
private_states, &subsample);
sample->insert(sample->end(), subsample.begin(), subsample.end());
CATAMARI_STOP_TIMER(shared_state->inclusive_timers[supernode]);
CATAMARI_STOP_TIMER(shared_state->exclusive_timers[supernode]);
}
template <class Field>
......@@ -156,6 +165,10 @@ std::vector<Int> SupernodalDPP<Field>::OpenMPRightLookingSample(
supernodal_ldl::RightLookingSharedState<Field> shared_state;
shared_state.schur_complement_buffers.Resize(num_supernodes);
shared_state.schur_complements.Resize(num_supernodes);
#ifdef CATAMARI_ENABLE_TIMERS
shared_state.inclusive_timers.Resize(num_supernodes);
shared_state.exclusive_timers.Resize(num_supernodes);
#endif // ifdef CATAMARI_ENABLE_TIMERS
const int max_threads = omp_get_max_threads();
......@@ -213,6 +226,17 @@ std::vector<Int> SupernodalDPP<Field>::OpenMPRightLookingSample(
std::sort(sample.begin(), sample.end());
#ifdef CATAMARI_ENABLE_TIMERS
TruncatedForestTimersToDot(
control_.inclusive_timings_filename, shared_state.inclusive_timers,
ordering_.assembly_forest, control_.max_timing_levels,
control_.avoid_timing_isolated_roots);
TruncatedForestTimersToDot(
control_.exclusive_timings_filename, shared_state.exclusive_timers,
ordering_.assembly_forest, control_.max_timing_levels,
control_.avoid_timing_isolated_roots);
#endif // ifdef CATAMARI_ENABLE_TIMERS
return sample;
}
......
......@@ -53,7 +53,30 @@ struct Control {
// The number of columns to group into a single task when multithreading
// the scalar structure formation.
Int sort_grain_size = 200;
// The minimum ratio of the amount of work in a subtree relative to the
// nominal amount of flops assigned to each thread (total_work / max_threads)
// before OpenMP subtasks are launched in the subtree.
double parallel_ratio_threshold = 0.02;
// The minimum number of flops in a subtree before OpenMP subtasks are
// generated.
double min_parallel_threshold = 1e5;
#endif // ifdef CATAMARI_OPENMP
#ifdef CATAMARI_ENABLE_TIMERS
// The max number of levels of the supernodal tree to visualize timings of.
Int max_timing_levels = 4;
// Whether isolated diagonal entries should have their timings visualized.
bool avoid_timing_isolated_roots = true;
// The name of the Graphviz file for the inclusive timing annotations.
std::string inclusive_timings_filename = "inclusive.gv";
// The name of the Graphviz file for the exclusive timing annotations.
std::string exclusive_timings_filename = "exclusive.gv";
#endif // ifdef CATAMARI_ENABLE_TIMERS
};
// The user-facing data structure for storing a supernodal LDL' factorization.
......@@ -239,7 +262,6 @@ class Factorization {
void LowerTriangularSolveRecursion(Int supernode,
BlasMatrixView<Field>* right_hand_sides,
Buffer<Field>* workspace) const;
#ifdef CATAMARI_OPENMP
void OpenMPLowerTriangularSolveRecursion(
Int supernode, BlasMatrixView<Field>* right_hand_sides,
......@@ -250,7 +272,6 @@ class Factorization {
void LowerSupernodalTrapezoidalSolve(Int supernode,
BlasMatrixView<Field>* right_hand_sides,
Buffer<Field>* workspace) const;
#ifdef CATAMARI_OPENMP
void OpenMPLowerSupernodalTrapezoidalSolve(
Int supernode, BlasMatrixView<Field>* right_hand_sides,
......@@ -262,7 +283,6 @@ class Factorization {
void LowerTransposeTriangularSolveRecursion(
Int supernode, BlasMatrixView<Field>* right_hand_sides,
Buffer<Field>* packed_input_buf) const;
#ifdef CATAMARI_OPENMP
void OpenMPLowerTransposeTriangularSolveRecursion(
Int supernode, BlasMatrixView<Field>* right_hand_sides,
......
......@@ -12,6 +12,7 @@
#include "catamari/dense_basic_linear_algebra.hpp"
#include "catamari/dense_factorizations.hpp"
#include "catamari/io_utils.hpp"
#include "catamari/ldl/supernodal_ldl/factorization.hpp"
......@@ -163,43 +164,6 @@ bool Factorization<Field>::LeftLookingSupernodeFinalize(Int main_supernode,
return true;
}
template <class Field>
LDLResult Factorization<Field>::LeftLooking(
const CoordinateMatrix<Field>& matrix) {
#ifdef CATAMARI_OPENMP
if (omp_get_max_threads() > 1) {
return OpenMPLeftLooking(matrix);
}
#endif
const Int num_supernodes = ordering_.supernode_sizes.Size();
LeftLookingSharedState shared_state;
shared_state.rel_rows.Resize(num_supernodes);
shared_state.intersect_ptrs.Resize(num_supernodes);
PrivateState<Field> private_state;
private_state.row_structure.Resize(num_supernodes);
private_state.pattern_flags.Resize(num_supernodes);
private_state.scaled_transpose_buffer.Resize(
max_supernode_size_ * max_supernode_size_, Field{0});
private_state.workspace_buffer.Resize(
max_supernode_size_ * (max_supernode_size_ - 1), Field{0});
LDLResult result;
// Note that any postordering of the supernodal elimination forest suffices.
for (Int supernode = 0; supernode < num_supernodes; ++supernode) {
LeftLookingSupernodeUpdate(supernode, matrix, &shared_state,
&private_state);
const bool succeeded = LeftLookingSupernodeFinalize(supernode, &result);
if (!succeeded) {
return result;
}
}
return result;
}
template <class Field>
bool Factorization<Field>::LeftLookingSubtree(
Int supernode, const CoordinateMatrix<Field>& matrix,
......@@ -209,6 +173,8 @@ bool Factorization<Field>::LeftLookingSubtree(
const Int child_end = ordering_.assembly_forest.child_offsets[supernode + 1];
const Int num_children = child_end - child_beg;
CATAMARI_START_TIMER(shared_state->inclusive_timers[supernode]);
Buffer<int> successes(num_children);
Buffer<LDLResult> result_contributions(num_children);
......@@ -219,11 +185,13 @@ bool Factorization<Field>::LeftLookingSubtree(
CATAMARI_ASSERT(ordering_.assembly_forest.parents[child] == supernode,
"Incorrect child index");
LDLResult& result_contribution = result_contributions[child_index];
successes[child_index] = LeftLookingSubtree(
child, matrix, shared_state, private_state, &result_contribution);
successes[child_index] =
LeftLookingSubtree(child, matrix, shared_state, private_state,
&result_contributions[child_index]);
}
CATAMARI_START_TIMER(shared_state->exclusive_timers[supernode]);
// Merge the child results (stopping if a failure is detected).
bool succeeded = true;
for (Int child_index = 0; child_index < num_children; ++child_index) {
......@@ -239,9 +207,64 @@ bool Factorization<Field>::LeftLookingSubtree(
succeeded = LeftLookingSupernodeFinalize(supernode, result);
}
CATAMARI_STOP_TIMER(shared_state->inclusive_timers[supernode]);
CATAMARI_STOP_TIMER(shared_state->exclusive_timers[supernode]);
return succeeded;
}
template <class Field>
LDLResult Factorization<Field>::LeftLooking(
const CoordinateMatrix<Field>& matrix) {
#ifdef CATAMARI_OPENMP
if (omp_get_max_threads() > 1) {
return OpenMPLeftLooking(matrix);
}
#endif
const Int num_supernodes = ordering_.supernode_sizes.Size();
LeftLookingSharedState shared_state;
shared_state.rel_rows.Resize(num_supernodes);
shared_state.intersect_ptrs.Resize(num_supernodes);
#ifdef CATAMARI_ENABLE_TIMERS
shared_state.inclusive_timers.Resize(num_supernodes);
shared_state.exclusive_timers.Resize(num_supernodes);
#endif // ifdef CATAMARI_ENABLE_TIMERS
PrivateState<Field> private_state;
private_state.row_structure.Resize(num_supernodes);
private_state.pattern_flags.Resize(num_supernodes);
private_state.scaled_transpose_buffer.Resize(
max_supernode_size_ * max_supernode_size_, Field{0});
private_state.workspace_buffer.Resize(
max_supernode_size_ * (max_supernode_size_ - 1), Field{0});
LDLResult result;
// Note that any postordering of the supernodal elimination forest suffices.
for (Int supernode = 0; supernode < num_supernodes; ++supernode) {
LeftLookingSupernodeUpdate(supernode, matrix, &shared_state,
&private_state);
const bool succeeded = LeftLookingSupernodeFinalize(supernode, &result);
if (!succeeded) {
return result;
}
}
#ifdef CATAMARI_ENABLE_TIMERS
TruncatedForestTimersToDot(
control_.inclusive_timings_filename, shared_state.inclusive_timers,
ordering_.assembly_forest, control_.max_timing_levels,
control_.avoid_timing_isolated_roots);
TruncatedForestTimersToDot(
control_.exclusive_timings_filename, shared_state.exclusive_timers,
ordering_.assembly_forest, control_.max_timing_levels,
control_.avoid_timing_isolated_roots);
#endif // ifdef CATAMARI_ENABLE_TIMERS
return result;
}
} // namespace supernodal_ldl
} // namespace catamari
......
......@@ -13,6 +13,7 @@
#include "catamari/dense_basic_linear_algebra.hpp"
#include "catamari/dense_factorizations.hpp"
#include "catamari/io_utils.hpp"
#include "catamari/ldl/supernodal_ldl/factorization.hpp"
......@@ -245,6 +246,8 @@ bool Factorization<Field>::OpenMPLeftLookingSubtree(
const Int child_end = ordering_.assembly_forest.child_offsets[supernode + 1];
const Int num_children = child_end - child_beg;
CATAMARI_START_TIMER(shared_state->inclusive_timers[supernode]);
Buffer<int> successes(num_children);
Buffer<LDLResult> result_contributions(num_children);
......@@ -261,6 +264,8 @@ bool Factorization<Field>::OpenMPLeftLookingSubtree(
private_states, &result_contributions[child_index]);
}
CATAMARI_START_TIMER(shared_state->exclusive_timers[supernode]);
bool succeeded = true;
for (Int child_index = 0; child_index < num_children; ++child_index) {
if (!successes[child_index]) {
......@@ -281,6 +286,9 @@ bool Factorization<Field>::OpenMPLeftLookingSubtree(
OpenMPLeftLookingSupernodeFinalize(supernode, private_states, result);
}
CATAMARI_STOP_TIMER(shared_state->inclusive_timers[supernode]);
CATAMARI_STOP_TIMER(shared_state->exclusive_timers[supernode]);
return succeeded;
}
......@@ -294,6 +302,10 @@ LDLResult Factorization<Field>::OpenMPLeftLooking(
LeftLookingSharedState shared_state;
shared_state.rel_rows.Resize(num_supernodes);
shared_state.intersect_ptrs.Resize(num_supernodes);
#ifdef CATAMARI_ENABLE_TIMERS
shared_state.inclusive_timers.Resize(num_supernodes);
shared_state.exclusive_timers.Resize(num_supernodes);
#endif // ifdef CATAMARI_ENABLE_TIMERS
Buffer<PrivateState<Field>> private_states(max_threads);
......@@ -329,15 +341,10 @@ LDLResult Factorization<Field>::OpenMPLeftLooking(
}
const double total_work =
std::accumulate(work_estimates.begin(), work_estimates.end(), 0.);
// TODO(Jack Poulson): Make these two parameters configurable.
const double kParallelRatioThreshold = 0.02;
const double kMinParallelThreshold = 1.e5;
const double min_parallel_ratio_work =
(total_work * kParallelRatioThreshold) / max_threads;
(total_work * control_.parallel_ratio_threshold) / max_threads;
const double min_parallel_work =
std::max(kMinParallelThreshold, min_parallel_ratio_work);
std::max(control_.min_parallel_threshold, min_parallel_ratio_work);
LDLResult result;
Buffer<int> successes(num_roots);
......@@ -346,10 +353,9 @@ LDLResult Factorization<Field>::OpenMPLeftLooking(
if (total_work < min_parallel_work) {
for (Int root_index = 0; root_index < num_roots; ++root_index) {
const Int root = ordering_.assembly_forest.roots[root_index];
LDLResult& result_contribution = result_contributions[root_index];
successes[root_index] =
LeftLookingSubtree(root, matrix, &shared_state, &private_states[0],
&result_contribution);
&result_contributions[root_index]);
}
} else {
const int old_max_threads = GetMaxBlasThreads();
......@@ -378,6 +384,17 @@ LDLResult Factorization<Field>::OpenMPLeftLooking(
MergeContribution(result_contributions[index], &result);
}
#ifdef CATAMARI_ENABLE_TIMERS
TruncatedForestTimersToDot(
control_.inclusive_timings_filename, shared_state.inclusive_timers,
ordering_.assembly_forest, control_.max_timing_levels,
control_.avoid_timing_isolated_roots);
TruncatedForestTimersToDot(
control_.exclusive_timings_filename, shared_state.exclusive_timers,
ordering_.assembly_forest, control_.max_timing_levels,
control_.avoid_timing_isolated_roots);
#endif // ifdef CATAMARI_ENABLE_TIMERS
return result;
}
......
......@@ -88,9 +88,7 @@ bool Factorization<Field>::RightLookingSubtree(
const Int child_end = ordering_.assembly_forest.child_offsets[supernode + 1];
const Int num_children = child_end - child_beg;
#ifdef CATAMARI_ENABLE_TIMERS
shared_state->inclusive_timers[supernode].Start();
#endif // ifdef CATAMARI_ENABLE_TIMERS
CATAMARI_START_TIMER(shared_state->inclusive_timers[supernode]);
Buffer<int> successes(num_children);
Buffer<LDLResult> result_contributions(num_children);
......@@ -106,9 +104,7 @@ bool Factorization<Field>::RightLookingSubtree(
child, matrix, shared_state, private_state, &result_contribution);
}
#ifdef CATAMARI_ENABLE_TIMERS
shared_state->exclusive_timers[supernode].Start();
#endif // ifdef CATAMARI_ENABLE_TIMERS
CATAMARI_START_TIMER(shared_state->exclusive_timers[supernode]);
// Merge the children's results (stopping if a failure is detected).
bool succeeded = true;
......@@ -139,10 +135,8 @@ bool Factorization<Field>::RightLookingSubtree(
}
}
#ifdef CATAMARI_ENABLE_TIMERS
shared_state->inclusive_timers[supernode].Stop();
shared_state->exclusive_timers[supernode].Stop();
#endif // ifdef CATAMARI_ENABLE_TIMERS
CATAMARI_STOP_TIMER(shared_state->inclusive_timers[supernode]);
CATAMARI_STOP_TIMER(shared_state->exclusive_timers[supernode]);
return succeeded;
}
......@@ -154,7 +148,7 @@ LDLResult Factorization<Field>::RightLooking(
if (omp_get_max_threads() > 1) {
return OpenMPRightLooking(matrix);
}
#endif
#endif // ifdef CATAMARI_OPENMP
const Int num_supernodes = ordering_.supernode_sizes.Size();
const Int num_roots = ordering_.assembly_forest.roots.Size();
......@@ -193,17 +187,14 @@ LDLResult Factorization<Field>::RightLooking(
}
#ifdef CATAMARI_ENABLE_TIMERS
// TODO(Jack Poulson): Make these parameters configurable.
const Int max_levels = 4;
const bool avoid_isolated_roots = true;
const std::string inclusive_filename = "inclusive.gv";
const std::string exclusive_filename = "exclusive.gv";
TruncatedForestTimersToDot(inclusive_filename, shared_state.inclusive_timers,
ordering_.assembly_forest, max_levels,
avoid_isolated_roots);
TruncatedForestTimersToDot(exclusive_filename, shared_state.exclusive_timers,
ordering_.assembly_forest, max_levels,
avoid_isolated_roots);
TruncatedForestTimersToDot(
control_.inclusive_timings_filename, shared_state.inclusive_timers,
ordering_.assembly_forest, control_.max_timing_levels,
control_.avoid_timing_isolated_roots);
TruncatedForestTimersToDot(
control_.exclusive_timings_filename, shared_state.exclusive_timers,
ordering_.assembly_forest, control_.max_timing_levels,
control_.avoid_timing_isolated_roots);
#endif // ifdef CATAMARI_ENABLE_TIMERS
return result;
......
......@@ -124,9 +124,7 @@ bool Factorization<Field>::OpenMPRightLookingSubtree(
const Int child_end = ordering_.assembly_forest.child_offsets[supernode + 1];
const Int num_children = child_end - child_beg;
#ifdef CATAMARI_ENABLE_TIMERS
shared_state->inclusive_timers[supernode].Start();
#endif // ifdef CATAMARI_ENABLE_TIMERS
CATAMARI_START_TIMER(shared_state->inclusive_timers[supernode]);
Buffer<int> successes(num_children);
Buffer<LDLResult> result_contributions(num_children);
......@@ -146,9 +144,7 @@ bool Factorization<Field>::OpenMPRightLookingSubtree(
private_states, &result_contributions[child_index]);
}
#ifdef CATAMARI_ENABLE_TIMERS
shared_state->exclusive_timers[supernode].Start();
#endif // ifdef CATAMARI_ENABLE_TIMERS
CATAMARI_START_TIMER(shared_state->exclusive_timers[supernode]);
// Merge the child results (stopping if a failure is detected).
bool succeeded = true;
......@@ -180,10 +176,8 @@ bool Factorization<Field>::OpenMPRightLookingSubtree(
}
}
#ifdef CATAMARI_ENABLE_TIMERS
shared_state->inclusive_timers[supernode].Stop();
shared_state->exclusive_timers[supernode].Stop();
#endif // ifdef CATAMARI_ENABLE_TIMERS
CATAMARI_STOP_TIMER(shared_state->inclusive_timers[supernode]);
CATAMARI_STOP_TIMER(shared_state->exclusive_timers[supernode]);