Enable packet segment in partial redux

Reference issue

What does this implement/fix?

Additional information

Ubuntu clang version 18.1.3 -DNDEBUG -O3 -mavx2

Benchmark Time CPU Iterations
test_segment_rowwise_control/1 1.12 -8.93% 1.12 -8.93% 623303252 8.73%
test_segment_rowwise_control/8 3.16 -12.03% 3.16 -12.03% 220053279 14.35%
test_segment_rowwise_control/64 72.3 -1.24% 72.2 -1.11% 9705963 1.11%
test_segment_rowwise_control/512 14123 -0.57% 14121 -0.59% 49681 0.36%
test_segment_rowwise_control/1024 245572 -0.83% 245503 -0.82% 2796 1.25%
test_segment_rowwise_1/1 1.13 -9.73% 1.13 -9.73% 624569605 9.44%
test_segment_rowwise_1/8 3.16 -11.71% 3.16 -11.71% 221901482 13.36%
test_segment_rowwise_1/64 72.3 -1.38% 72.3 -1.38% 9686812 0.65%
test_segment_rowwise_1/512 14104 -0.50% 14101 -0.48% 49521 0.64%
test_segment_rowwise_1/1024 245312 -0.70% 245256 -0.68% 2834 -0.88%
test_segment_rowwise_half/1 4.5 -42.00% 4.5 -42.00% 156829481 70.60%
test_segment_rowwise_half/8 11.8 -47.80% 11.8 -47.80% 58620389 94.46%
test_segment_rowwise_half/64 120 -22.08% 120 -22.08% 5777833 29.63%
test_segment_rowwise_half/512 9379 -4.62% 9376 -4.61% 74755 4.94%
test_segment_rowwise_half/1024 64958 -3.75% 64956 -3.77% 10817 2.12%
test_segment_rowwise_m1/1 9.64 -65.25% 9.63 -65.21% 72592615 188.59%
test_segment_rowwise_m1/8 20 -64.90% 20 -64.90% 35250256 183.58%
test_segment_rowwise_m1/64 174 -35.63% 174 -35.63% 3999856 55.68%
test_segment_rowwise_m1/512 13056 -6.36% 13055 -6.37% 53592 7.75%
test_segment_rowwise_m1/1024 76313 -2.59% 76298 -2.61% 9102 1.90%
#include <benchmark/benchmark.h>
#include <Eigen/Core>
using namespace Eigen;

using T = float;
using Mat = MatrixX<T>;
using Vec = VectorX<T>;

static void test_segment_rowwise_control(benchmark::State& state) {
  constexpr Index PacketSize = internal::packet_traits<T>::size;
  Index n = numext::round_down(state.range(0), PacketSize);
  Mat A(n, n);
  Vec b(n);
  A.setRandom();
  for (auto s : state) {
    b = A.rowwise().sum();
    benchmark::DoNotOptimize(A);
    benchmark::DoNotOptimize(b);
  }
}

static void test_segment_rowwise_1(benchmark::State& state) {
  constexpr Index PacketSize = internal::packet_traits<T>::size;
  Index n = numext::round_down(state.range(0), PacketSize);
  Mat A(n, n);
  Vec b(n);
  A.setRandom();
  for (auto s : state) {
    b = A.rowwise().sum();
    benchmark::DoNotOptimize(A);
    benchmark::DoNotOptimize(b);
  }
}

static void test_segment_rowwise_half(benchmark::State& state) {
  constexpr Index PacketSize = internal::packet_traits<T>::size;
  Index n = numext::round_down(state.range(0), PacketSize) + PacketSize / 2;
  Mat A(n, n);
  Vec b(n);
  A.setRandom();
  for (auto s : state) {
    b = A.rowwise().sum();
    benchmark::DoNotOptimize(A);
    benchmark::DoNotOptimize(b);
  }
}

static void test_segment_rowwise_m1(benchmark::State& state) {
  constexpr Index PacketSize = internal::packet_traits<T>::size;
  Index n = numext::round_down(state.range(0), PacketSize) + PacketSize - 1;
  Mat A(n, n);
  Vec b(n);
  A.setRandom();
  for (auto s : state) {
    b = A.rowwise().sum();
    benchmark::DoNotOptimize(A);
    benchmark::DoNotOptimize(b);
  }
}

BENCHMARK(test_segment_rowwise_control)->Range(1<<0, 1<<10);
BENCHMARK(test_segment_rowwise_1)->Range(1<<0, 1<<10);
BENCHMARK(test_segment_rowwise_half)->Range(1<<0, 1<<10);
BENCHMARK(test_segment_rowwise_m1)->Range(1<<0, 1<<10);
BENCHMARK_MAIN();
Edited by Charles Schlosser

Merge request reports

Loading