Optimize maxCoeff and friends
Reference issue
What does this implement/fix?
Provides an evaluator to optimize maxCoeff and friends. This vectorized approach delays determining the index of the extreme value (or some other criteria provided by a custom functor) until the very end, which is the most costly portion of the evaluation. This also provides the (albeit minor) benefit of using the linear access evaluator for a 2d matrix where possible.
Additional information
Ubuntu clang version 18.1.3 -DNDEBUG -O3 -mavx2
1d: vector expression with linear access
2d: matrix expression with linear access
2d_force: matrix expression with outer-inner access
| test | before | after | change |
|---|---|---|---|
| find_coeff_1d/1 | 1.12 | 0.891 | -20.45% |
| find_coeff_1d/8 | 3.65 | 2.53 | -30.68% |
| find_coeff_1d/64 | 15.8 | 4.72 | -70.13% |
| find_coeff_1d/512 | 98.6 | 22.1 | -77.59% |
| find_coeff_1d/1024 | 190 | 40 | -78.95% |
| find_coeff_2d/1 | 1.43 | 1.11 | -22.38% |
| find_coeff_2d/8 | 14.9 | 5 | -66.44% |
| find_coeff_2d/64 | 774 | 166 | -78.55% |
| find_coeff_2d/512 | 48549 | 9624 | -80.18% |
| find_coeff_2d/1024 | 191214 | 48234 | -74.77% |
| find_coeff_2d_force/1 | 1.48 | 2.23 | 50.68% |
| find_coeff_2d_force/8 | 14.6 | 9.25 | -36.64% |
| find_coeff_2d_force/64 | 776 | 225 | -71.01% |
| find_coeff_2d_force/512 | 48523 | 14195 | -70.75% |
| find_coeff_2d_force/1024 | 191161 | 54681 | -71.40% |
#include <benchmark/benchmark.h>
#include <Eigen/Core>
using namespace Eigen;
using T = float;
using Mat = MatrixX<T>;
using Vec = VectorX<T>;
static void find_coeff_1d(benchmark::State& state) {
Index n = state.range(0);
Vec a(n);
a.setRandom();
for (auto s : state) {
Index idx;
T res = a.maxCoeff(&idx);
benchmark::DoNotOptimize(idx);
benchmark::DoNotOptimize(res);
}
}
static void find_coeff_2d(benchmark::State& state) {
Index n = state.range(0);
Mat a(n, n);
a.setRandom();
for (auto s : state) {
Index row, col;
T res = a.maxCoeff(&row, &col);
benchmark::DoNotOptimize(row);
benchmark::DoNotOptimize(col);
benchmark::DoNotOptimize(res);
}
}
static void find_coeff_2d_force(benchmark::State& state) {
Index n = state.range(0);
Mat a(n, n);
auto a_block = a.topLeftCorner(n,n);
a.setRandom();
for (auto s : state) {
Index row, col;
T res = a_block.maxCoeff(&row, &col);
benchmark::DoNotOptimize(row);
benchmark::DoNotOptimize(col);
benchmark::DoNotOptimize(res);
}
}
BENCHMARK(find_coeff_1d)->Range(1<<0, 1<<10);
BENCHMARK(find_coeff_2d)->Range(1<<0, 1<<10);
BENCHMARK(find_coeff_2d_force)->Range(1<<0, 1<<10);
BENCHMARK_MAIN();
Edited by Charles Schlosser