Commit 1c096ead authored by Jehferson Mello's avatar Jehferson Mello
Browse files

Revamped camera for almost all projects

- Camera can now be freely positioned and target arbitrary spots
- New Camera is now a templated class in common/datatypes.hxx
- Vector source file has been moved to common/Vec.hxx
- The HIP and CUDA workarounds in Vector have been merged in the new
class
- SYCL is not yet converted and disabled because it's broken on my
machine, as is ROCM's OpenCL, which is quite annoying
parent e9facb95
......@@ -25,7 +25,7 @@ if(TB_ISPC_FOUND)
endif()
if(TB_SYCL_FOUND)
add_subdirectory(SYCL)
#add_subdirectory(SYCL)
endif()
if(TB_OMP_FOUND)
......
......@@ -3,7 +3,10 @@ project(rmCUDA-nvcc LANGUAGES CXX CUDA)
set(NVCC_CXX_SRCS "main.cpp"
"FracGen.hpp"
"Vec.cuh" )
"${TB_COMMON_SRC_DIR}/Vec.hxx"
"${TB_COMMON_SRC_DIR}/dataTypes.hxx"
"${TB_COMMON_SRC_DIR}/defines.hxx"
)
set(NVCC_CUDA_SRCS "FracGen.cu" )
......@@ -47,7 +50,10 @@ if(BUILD_CUDA_CLANG)
set(CLANG_CXX_SRCS "main.cpp"
"FracGen.hpp"
"Vec.cuh" )
"${TB_COMMON_SRC_DIR}/Vec.hxx"
"${TB_COMMON_SRC_DIR}/dataTypes.hxx"
"${TB_COMMON_SRC_DIR}/defines.hxx"
)
set(CLANG_CUDA_SRCS "${CMAKE_CURRENT_BINARY_DIR}/FracGenClang.cu" )
......
......@@ -44,7 +44,12 @@ __host__ __device__ float Vec3<float>::mod()
__device__ static const size_t maxRaySteps = 7500;
__device__ static const float collisionMinDist = 0.00055f;
__device__ static const float cameraX = 0.0f;
__device__ static const float cameraY = 0.0f;
__device__ static const float cameraZ = -3.8f;
__device__ static const float targetX = 0.0f;
__device__ static const float targetY = 0.0f;
__device__ static const float targetZ = 0.0f;
// coulouring parameters
......@@ -309,7 +314,7 @@ __device__ RGBA getColour(const Vec4f& steps)
*
******************************************************************************/
__device__ Vec4f trace( const Camera& cam, const Screen& s, int x, int y, const estimatorFunction& f)
__device__ Vec4f trace( const Camera<float>& cam, int x, int y, const estimatorFunction& f)
{
/**
* This function taken from
......@@ -319,15 +324,15 @@ __device__ Vec4f trace( const Camera& cam, const Screen& s, int x, int y, const
float totalDistance = 0.0f;
unsigned int steps;
Vec3f pixelPosition = s.topLeft + Vec3f{s.pixelWidth*x, s.pixelHeight * y, 0.f};
Vec3f pixelPosition = cam.ScreenTopLeft() + (cam.ScreenRight() * static_cast<float>(x)) + (cam.ScreenUp() * static_cast<float>(y));
Vec3f rayDir = pixelPosition - static_cast<Vec3f>(cam.pos);
Vec3f rayDir = pixelPosition - cam.Pos();
rayDir.normalise();
Vec3f p;
for (steps=0; steps < maxRaySteps; steps++)
{
p = cam.pos + (rayDir * totalDistance);
p = cam.Pos() + (rayDir * totalDistance);
/**
* You can enable this and use nvstd::function instead of hardcoding the estimator
* it works, which is a plus for CUDA but it's quite slower so I thought it fairer to
......@@ -343,8 +348,7 @@ __device__ Vec4f trace( const Camera& cam, const Screen& s, int x, int y, const
return Vec4f{p,static_cast<float>(steps)};
}
__global__ void traceRegion(RGBA* data,
Camera cam, Screen scr)
__global__ void traceRegion(RGBA* data, Camera<float> cam)
{
/**
......@@ -361,13 +365,13 @@ __global__ void traceRegion(RGBA* data,
int col = blockIdx.x * blockDim.x + threadIdx.x;
int row = blockIdx.y * blockDim.y + threadIdx.y;
int index = ((row*scr.width)+col);
if (col >= scr.width || row >= scr.height)
int index = ((row*cam.ScreenWidth())+col);
if (col >= cam.ScreenWidth() || row >= cam.ScreenHeight())
{
return;
}
reinterpret_cast<RGBA*>(data)[index] = getColour( trace(cam, scr, col, row, box) );
data[index] = getColour( trace(cam, col, row, box) );
}
/******************************************************************************
......@@ -378,34 +382,12 @@ __global__ void traceRegion(RGBA* data,
void FracGen::Generate(int width, int height)
{
/*
* calculate the rectangle which represents the screen (camera z near) in object space
* No need to have an actual general camera so I'm just assuming the camera
* always sits on the Z axis and always has (0,1,0) as it's up vector
* This allows me to cheat a lot and not have to actually go into the
* linear algebra side and write something like gluUnproject
*/
Screen s;
Vec3f screenPlaneOrigin{cam->pos.X(),cam->pos.Y(),cam->pos.Z() + cam->near};
float screenPlaneHeight = 2*(cam->near*sin(cam->fovY/2));
screenPlaneHeight = screenPlaneHeight < 0 ? -screenPlaneHeight : screenPlaneHeight;
float screenPlaneWidth = screenPlaneHeight * cam->AR;
// if 0,0 is top left, pixel height needs to be a negative
s.width = width;
s.height = height;
s.pixelHeight = (-1.f) * screenPlaneHeight / s.height;
s.pixelWidth = screenPlaneWidth / s.width;
s.topLeft = Vec3f {screenPlaneOrigin.X() - (screenPlaneWidth/2),
screenPlaneOrigin.Y() + (screenPlaneHeight/2),
screenPlaneOrigin.Z() };
dim3 threadsPerBlock(16,16);
dim3 numBlocks(std::ceil(static_cast<float>(width)/threadsPerBlock.x), std::ceil(static_cast<float>(height)/threadsPerBlock.y));
RGBA* devVect;
cudaMalloc(&devVect, outSize());
traceRegion<<<numBlocks,threadsPerBlock>>>(devVect, *cam, s);
traceRegion<<<numBlocks,threadsPerBlock>>>(devVect, *cam);
cudaCheck(__LINE__);
cudaDeviceSynchronize();
cudaCheck(__LINE__);
......@@ -416,18 +398,9 @@ void FracGen::Generate(int width, int height)
FracGen::FracGen(bool benching, size_t width, size_t height)
: bench{benching}
, cam{new Camera}
{
outBuffer = std::make_shared< colourVec >(width*height);
cam->AR = static_cast<double>(width)/static_cast<double>(height);
// Position here is more or less ignored. It's used for initial screen calculation but the
// rays are launched from a different distance, specified in the .cl file
cam->pos = Vec3f{0, 0, cameraZ};
cam->target = Vec3f{0,0,0};
cam->up = Vec3f{0,1,0};
cam->near = 0.1f;
cam->fovY = 45;
cam = std::make_shared<Camera<float> >(Vec3f{cameraX, cameraY, cameraZ},Vec3f{targetX,targetY,targetZ}, width, height, 0.1f, 45);
static bool once = false;
......
......@@ -5,10 +5,10 @@
#include <cstdint>
#include <memory>
#include "Vec.cuh"
#include <cuda_runtime.h>
#include "Vec.hxx"
#include "dataTypes.hxx"
struct Camera{ Vec3f pos, up, target; float AR, near, fovY;};
struct Screen{ Vec3f topLeft; int width, height; float pixelWidth, pixelHeight;};
using colourType = float;
using RGBA = Vec4<colourType>;
using colourVec = std::vector<RGBA>;
......@@ -30,7 +30,7 @@ public:
private:
bool bench;
std::shared_ptr<Camera> cam;
std::shared_ptr<Camera<float> > cam;
FracPtr outBuffer;
};
......
......@@ -6,8 +6,10 @@ set(tbHIP_CXX_SRCS ${tbHIP_CXX_SRCS}
)
set(tbHIP_HIP_HDRS ${tbHIP_HIP_HDRS}
"Vec.hxx"
)
"${TB_COMMON_SRC_DIR}/Vec.hxx"
"${TB_COMMON_SRC_DIR}/dataTypes.hxx"
"${TB_COMMON_SRC_DIR}/defines.hxx"
)
set(tbHIP_HIP_SRCS ${tbHIP_HIP_SRCS}
"FracGen.cxx"
......
......@@ -58,7 +58,12 @@ __host__ __device__ float Vec3<float>::mod()
__device__ static const size_t maxRaySteps = 7500;
__device__ static const float collisionMinDist = 0.00055f;
__device__ static const float cameraX = 0.0f;
__device__ static const float cameraY = 0.0f;
__device__ static const float cameraZ = -3.8f;
__device__ static const float targetX = 0.0f;
__device__ static const float targetY = 0.0f;
__device__ static const float targetZ = 0.0f;
// coulouring parameters
......@@ -319,7 +324,7 @@ __device__ RGBA getColour(const Vec4f& steps)
*
******************************************************************************/
__device__ Vec4f trace( const Camera& cam, const Screen& s, int x, int y/*, const estimatorFunction& f*/)
__device__ Vec4f trace( const Camera<float>& cam, int x, int y/*, const estimatorFunction& f*/)
{
/**
* This function taken from
......@@ -329,15 +334,15 @@ __device__ Vec4f trace( const Camera& cam, const Screen& s, int x, int y/*, con
float totalDistance = 0.0f;
unsigned int steps;
Vec3f pixelPosition = s.topLeft + Vec3f{s.pixelWidth*x, s.pixelHeight * y, 0.f};
Vec3f pixelPosition = cam.ScreenTopLeft() + (cam.ScreenRight() * static_cast<float>(x)) + (cam.ScreenUp() * static_cast<float>(y));
Vec3f rayDir = pixelPosition - static_cast<Vec3f>(cam.pos);
Vec3f rayDir = pixelPosition - cam.Pos();
rayDir.normalise();
Vec3f p;
for (steps=0; steps < maxRaySteps; steps++)
{
p = cam.pos + (rayDir * totalDistance);
p = cam.Pos() + (rayDir * totalDistance);
//float distance = f(p);
float distance = boxDist(p);
totalDistance += distance;
......@@ -348,20 +353,19 @@ __device__ Vec4f trace( const Camera& cam, const Screen& s, int x, int y/*, con
return Vec4f{p,static_cast<float>(steps)};
}
__global__ void traceRegion(RGBA* data,
Camera cam, Screen scr)
__global__ void traceRegion(RGBA* data, Camera<float> cam)
{
int col = blockIdx.x * blockDim.x + threadIdx.x;
int row = blockIdx.y * blockDim.y + threadIdx.y;
int index = ((row*scr.width)+col);
if (col >= scr.width || row >= scr.height)
int index = ((row*cam.ScreenWidth())+col);
if (col >= cam.ScreenWidth() || row >= cam.ScreenHeight())
{
return;
}
data[index] = getColour( trace(cam, scr, col, row) );
data[index] = getColour( trace(cam, col, row) );
}
/******************************************************************************
......@@ -372,34 +376,12 @@ __global__ void traceRegion(RGBA* data,
void FracGen::Generate(int width, int height)
{
/*
* calculate the rectangle which represents the screen (camera z near) in object space
* No need to have an actual general camera so I'm just assuming the camera
* always sits on the Z axis and always has (0,1,0) as it's up vector
* This allows me to cheat a lot and not have to actually go into the
* linear algebra side and write something like gluUnproject
*/
Screen s;
Vec3f screenPlaneOrigin{cam->pos.X(),cam->pos.Y(),cam->pos.Z() + cam->near};
float screenPlaneHeight = 2*(cam->near*sin(cam->fovY/2));
screenPlaneHeight = screenPlaneHeight < 0 ? -screenPlaneHeight : screenPlaneHeight;
float screenPlaneWidth = screenPlaneHeight * cam->AR;
// if 0,0 is top left, pixel height needs to be a negative
s.width = width;
s.height = height;
s.pixelHeight = (-1.f) * screenPlaneHeight / s.height;
s.pixelWidth = screenPlaneWidth / s.width;
s.topLeft = Vec3f {screenPlaneOrigin.X() - (screenPlaneWidth/2),
screenPlaneOrigin.Y() + (screenPlaneHeight/2),
screenPlaneOrigin.Z() };
dim3 threadsPerBlock(16,16);
dim3 numBlocks(std::ceil(static_cast<float>(width)/threadsPerBlock.x), std::ceil(static_cast<float>(height)/threadsPerBlock.y));
RGBA* devVect;
hipMalloc(&devVect, outSize());
hipLaunchKernelGGL(traceRegion, numBlocks,threadsPerBlock, 0, 0,devVect, *cam, s);
hipLaunchKernelGGL(traceRegion, numBlocks,threadsPerBlock, 0, 0,devVect, *cam);
hipCheck(__LINE__);
hipDeviceSynchronize();
hipCheck(__LINE__);
......@@ -410,18 +392,9 @@ void FracGen::Generate(int width, int height)
FracGen::FracGen(bool benching, size_t width, size_t height)
: bench{benching}
, cam{new Camera}
{
outBuffer = std::make_shared< colourVec >(width*height);
cam->AR = static_cast<double>(width)/static_cast<double>(height);
// Position here is more or less ignored. It's used for initial screen calculation but the
// rays are launched from a different distance, specified in the .cl file
cam->pos = Vec3f{0, 0, cameraZ};
cam->target = Vec3f{0,0,0};
cam->up = Vec3f{0,1,0};
cam->near = 0.1f;
cam->fovY = 45;
cam = std::make_shared<Camera<float> >(Vec3f{cameraX, cameraY, cameraZ},Vec3f{targetX,targetY,targetZ}, width, height, 0.1f, 45);
static bool once = false;
hipDeviceProp_t prop;
......
......@@ -5,10 +5,9 @@
#include <cstdint>
#include <memory>
#include "Vec.hxx"
#include <hip/hip_runtime.h>
#include "dataTypes.hxx"
struct Camera{ Vec3f pos, up, target; float AR, near, fovY;};
struct Screen{ Vec3f topLeft; int width, height; float pixelWidth, pixelHeight;};
using colourType = float;
using RGBA = Vec4<colourType>;
using colourVec = std::vector<RGBA>;
......@@ -30,7 +29,7 @@ public:
private:
bool bench;
std::shared_ptr<Camera> cam;
std::shared_ptr<Camera<float>> cam;
FracPtr outBuffer;
};
......
#ifndef WD_VEC3_HPP_DEFINED
#define WD_VEC3_HPP_DEFINED
#include <cmath>
#include <hip/hip_runtime.h>
template < typename T >
class Vec3
{
public:
__device__ __host__ explicit Vec3() : x(0), y(0), z(0) {}
__device__ __host__ explicit Vec3(T xVal, T yVal, T zVal): x{xVal}, y{yVal}, z{zVal} {}
__device__ __host__ Vec3 (const Vec3<T>& ref): x{ref.X()}, y{ref.Y()}, z{ref.Z()} {}
__device__ __host__ Vec3& operator= (const Vec3<T>& ref) {this->x = ref.X(); this->y = ref.Y(); this->z = ref.Z(); return *this;}
template < typename U >
__device__ __host__ operator Vec3<U>() const noexcept
{
return Vec3<U>{static_cast<U>(x),static_cast<U>(y),static_cast<U>(z)};
}
__device__ __host__ T X() const { return x; }
__device__ __host__ T Y() const { return y; }
__device__ __host__ T Z() const { return z; }
__device__ __host__ void setX(T newX) { x = newX; }
__device__ __host__ void setY(T newY) { y = newY; }
__device__ __host__ void setZ(T newZ) { z = newZ; }
__device__ __host__ T R() const { return x; }
__device__ __host__ T G() const { return y; }
__device__ __host__ T B() const { return z; }
__device__ __host__ void setR(T r) { x = r; }
__device__ __host__ void setG(T g) { y = g; }
__device__ __host__ void setB(T b) { z = b; }
__device__ __host__ Vec3& operator+=(const Vec3& rhs)
{
this->x += rhs.x;
this->y += rhs.y;
this->z += rhs.z;
return *this;
}
__device__ __host__ Vec3& operator*=(const T& f)
{
x *= f;
y *= f;
z *= f;
return *this;
}
__device__ __host__ Vec3& operator/=(const T& f)
{
x /= f;
y /= f;
z /= f;
return *this;
}
__device__ __host__ Vec3& operator*=(const Vec3<T>& rhs)
{
x *= rhs.X();
y *= rhs.Y();
z *= rhs.Z();
return *this;
}
__device__ __host__ T sqMod() const
{
return x*x+y*y+z*z;
}
__device__ __host__ T mod()
{
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wfloat-conversion"
#pragma clang diagnostic ignored "-Wconversion"
return sqrt(sqMod());
#pragma clang diagnostic pop
}
__device__ __host__ Vec3<T> clamp(T min, T max)
{
Vec3<T> ret{x,y,z};
ret.x = x < min ? min : (x > max ? max: x);
ret.y = y < min ? min : (y > max ? max: y);
ret.z = z < min ? min : (z > max ? max: z);
// ret.x = x < min ? - min + x : (x > max ? max - x: x);
// ret.y = y < min ? - min + y : (y > max ? max - y: y);
// ret.z = z < min ? - min + z : (z > max ? max - z: z);
return ret;
}
__device__ __host__ Vec3<T>& normalise()
{
T m = this->mod();
x /= m;
y /= m;
z /= m;
return *this;
}
static constexpr const double flEpsilon = 0.00001;
__device__ __host__ static bool flEquals(float f1, float f2);
__device__ __host__ static bool dbEquals(double f1, double f2);
__device__ __host__ static bool flZero(float f);
__device__ __host__ static bool dbZero(double f);
protected:
T x;
T y;
T z;
};
template <typename T>
__device__ __host__ bool Vec3<T>::flEquals(float f1, float f2)
{
return( fabs(static_cast<double>(f1 - f2)) <= Vec3::flEpsilon);
}
template <typename T>
__device__ __host__ bool Vec3<T>::dbEquals(double f1, double f2)
{
return( fabs(f1 - f2) <= Vec3::flEpsilon);
}
template <typename T>
__device__ __host__ bool Vec3<T>::flZero(float f)
{
return( fabs(static_cast<double>(f)) <= Vec3::flEpsilon);
}
template <typename T>
__device__ __host__ bool Vec3<T>::dbZero(double f)
{
return( fabs(f) <= Vec3::flEpsilon);
}
template <typename T>
__device__ __host__ Vec3<T> operator+(const Vec3<T>& a, const Vec3<T>& b)
{
Vec3<T> res = a;
res += b;
return res;
}
template <typename T>
__device__ __host__ Vec3<T> operator*(const Vec3<T>&a, T f)
{
Vec3<T> res = a;
res *= f;
return res;
}
template <typename T>
__device__ __host__ Vec3<T> operator/(const Vec3<T>&a, T f)
{
Vec3<T> res = a;
res /= f;
return res;
}
template <typename T>
__device__ __host__ bool operator == (const Vec3<T>&a, const Vec3<T>& b)
{
return ( Vec3<T>::flEquals(a.X(),b.X()) &&
Vec3<T>::flEquals(a.Y(),b.Y()) &&
Vec3<T>::flEquals(a.Z(),b.Z()) );
}
template <typename T>
__device__ __host__ bool operator != (const Vec3<T>&a, const Vec3<T>& b)
{
return !(a==b);
}
template <typename T>
__device__ __host__ Vec3<T> operator- (const Vec3<T>& v)
{
Vec3<T> ret( -(v.X()), -(v.Y()), -(v.Z()) );
return ret;
}
template <typename T>
__device__ __host__ Vec3<T> operator- (const Vec3<T>& a, const Vec3<T>& b)
{
return Vec3<T>(a.X() - b.X(), a.Y()-b.Y(), a.Z() - b.Z());
}
template <typename T>
__device__ __host__ static Vec3<T> crossProd (const Vec3<T>& a, const Vec3<T>& b)
{
return Vec3<T>(a.Y()*b.Z() - a.Z()*b.Y(), a.Z()*b.X() - a.X()*b.Z(), a.X()*b.Y() - a.Y()*b.X());
}
template <typename T>
__device__ __host__ static T dotProd (const Vec3<T>& a, const Vec3<T>& b)
{
return a.X()*b.X() + a.Y()*b.Y() + a.Z()*b.Z();
}
template <typename T>
__device__ __host__ static Vec3<T> triNormal(const Vec3<T>& a, const Vec3<T>& b, const Vec3<T>& c)
{
//TODO: winding checks and tuning.
//ALSO: I feel this is not the best place for this to be so it's kind of temporary for now
Vec3<T> ab = b-a;
Vec3<T> ac = c-a;
Vec3<T> normal = crossProd(ab, ac);
normal.normalise();
return normal;
}
template <typename T>
__device__ __host__ static Vec3<T> triNormal(const Vec3<T>* v)
{
return triNormal(v[0],v[1],v[2]);
}
using Vec3f = Vec3<float>;
using Vec3i = Vec3<int>;
using Vec3d = Vec3<double>;