Optimize maxCoeff and friends

Reference issue

What does this implement/fix?

Provides an evaluator to optimize maxCoeff and friends. This vectorized approach delays determining the index of the extreme value (or some other criteria provided by a custom functor) until the very end, which is the most costly portion of the evaluation. This also provides the (albeit minor) benefit of using the linear access evaluator for a 2d matrix where possible.

Additional information

Ubuntu clang version 18.1.3 -DNDEBUG -O3 -mavx2

1d: vector expression with linear access

2d: matrix expression with linear access

2d_force: matrix expression with outer-inner access

test before after change
find_coeff_1d/1 1.12 0.891 -20.45%
find_coeff_1d/8 3.65 2.53 -30.68%
find_coeff_1d/64 15.8 4.72 -70.13%
find_coeff_1d/512 98.6 22.1 -77.59%
find_coeff_1d/1024 190 40 -78.95%
find_coeff_2d/1 1.43 1.11 -22.38%
find_coeff_2d/8 14.9 5 -66.44%
find_coeff_2d/64 774 166 -78.55%
find_coeff_2d/512 48549 9624 -80.18%
find_coeff_2d/1024 191214 48234 -74.77%
find_coeff_2d_force/1 1.48 2.23 50.68%
find_coeff_2d_force/8 14.6 9.25 -36.64%
find_coeff_2d_force/64 776 225 -71.01%
find_coeff_2d_force/512 48523 14195 -70.75%
find_coeff_2d_force/1024 191161 54681 -71.40%
#include <benchmark/benchmark.h>
#include <Eigen/Core>
using namespace Eigen;

using T = float;
using Mat = MatrixX<T>;
using Vec = VectorX<T>;

static void find_coeff_1d(benchmark::State& state) {
  Index n = state.range(0);
  Vec a(n);
  a.setRandom();
  for (auto s : state) {
    Index idx;
    T res = a.maxCoeff(&idx);
    benchmark::DoNotOptimize(idx);
    benchmark::DoNotOptimize(res);
  }
}

static void find_coeff_2d(benchmark::State& state) {
  Index n = state.range(0);
  Mat a(n, n);
  a.setRandom();
  for (auto s : state) {
    Index row, col;
    T res = a.maxCoeff(&row, &col);
    benchmark::DoNotOptimize(row);
    benchmark::DoNotOptimize(col);
    benchmark::DoNotOptimize(res);
  }
}

static void find_coeff_2d_force(benchmark::State& state) {
  Index n = state.range(0);
  Mat a(n, n);
  auto a_block = a.topLeftCorner(n,n);
  a.setRandom();
  for (auto s : state) {
    Index row, col;
    T res = a_block.maxCoeff(&row, &col);
    benchmark::DoNotOptimize(row);
    benchmark::DoNotOptimize(col);
    benchmark::DoNotOptimize(res);
  }
}

BENCHMARK(find_coeff_1d)->Range(1<<0, 1<<10);
BENCHMARK(find_coeff_2d)->Range(1<<0, 1<<10);
BENCHMARK(find_coeff_2d_force)->Range(1<<0, 1<<10);
BENCHMARK_MAIN();
Edited by Charles Schlosser

Merge request reports

Loading