Bug with `Eigen::Matrix2f::inverse()` method with CUDA

Summary

When running Eigen::Matrix2f inverse() on CUDA, it produces unspecified launch failure. The problem is also encounter with Eigen::Matrix3f.

Environment

  • Operating System : Windows and Ubunto (via WSL)
  • Architecture : 64-bit operating system, x64-based processor
  • Eigen Version : latest from master

Minimal Example

Please find minimal runnable example here https://github.com/Ahdhn/eigen_inverse_cuda_bug/tree/master

#include <assert.h>
#include <cuda_runtime.h>
#include <stdio.h>
#include <Eigen/Dense>

//********************** CUDA HandelError
#ifndef _CUDA_ERROR_
#define _CUDA_ERROR_
inline void HandleError(cudaError_t err, const char* file, int line)
{
    // Error handling micro, wrap it around function whenever possible
    if (err != cudaSuccess) {
        printf("\n%s in %s at line %d\n", cudaGetErrorString(err), file, line);
        exit(EXIT_FAILURE);
    }
}
#define CUDA_ERROR(err) (HandleError(err, __FILE__, __LINE__))
#endif
//******************************************************************************

__global__ void foo()
{
    Eigen::Matrix2f M;
    M << 10, 2,  //
        4, 10;

    printf("\n M = \n {%f, %f \n %f, %f}", M(0, 0), M(0, 1), M(1, 0), M(1, 1));

    auto M_inv = M.inverse();

    printf("\n M_inv = \n {%f, %f \n %f, %f}\n",
           M_inv(0, 0),
           M_inv(0, 1),
           M_inv(1, 0),
           M_inv(1, 1));
}
int main(int argc, char** argv)
{
    foo<<<1, 1>>>();
    CUDA_ERROR(cudaDeviceSynchronize());
    return 0;
}

Steps to reproduce

mkdir build
cd build 
cmake ..
make -j 4 
./bin/eigen_inverse

What is the current bug behavior?

The code compiles fine but when the code is executed, this is the output

./bin/eigen_inverse 

 M = 
 {10.000000, 2.000000 
 4.000000, 10.000000}
unspecified launch failure in /home/ahmed/eigen_inverse_cuda_bug/eigen_inverse/include/eigen_inverse.cu at line 30

Anything else that might help

  • Have a plan to fix this issue.
Edited by Antonio Sánchez