Commit 6f4efe98 authored by Jehferson Mello's avatar Jehferson Mello
Browse files

Added TBB

parent f1494488
......@@ -9,6 +9,12 @@ set(CMAKE_VERBOSE_MAKEFILE ON)
set(CMAKE_INSTALL_PREFIX ${PROJECT_SOURCE_DIR}/redist)
###############################################################################
#
# This messy bit is a bunch of find_package calls and work around that
#
###############################################################################
find_package(SDL2)
......@@ -59,17 +65,25 @@ if(CMAKE_CUDA_COMPILER)
set(CUDA_CLANG_DIR "${CUDA_PATH}" CACHE PATH "Path to your CUDA toolkit for Clang/hipSYCL")
endif()
find_package(TBB)
###############################################################################
#
# Actual management of the found stuff begins here
#
###############################################################################
set(TB_CUDA_FOUND FALSE)
set(TB_MPI_FOUND FALSE)
set(TB_SDL_FOUND FALSE)
set(TB_OMP_FOUND FALSE)
set(TB_OCL_FOUND FALSE)
set(TB_VK_FOUND FALSE)
set(TB_SYCL_FOUND FALSE)
set(TB_HIP_FOUND FALSE)
set(TB_MPI_FOUND FALSE)
set(TB_OCL_FOUND FALSE)
set(TB_OMP_FOUND FALSE)
set(TB_PNG_FOUND FALSE)
set(TB_SDL_FOUND FALSE)
set(TB_SYCL_FOUND FALSE)
set(TB_TBB_FOUND FALSE)
set(TB_VK_FOUND FALSE)
message(" ")
......@@ -77,14 +91,6 @@ message("*********************************")
message("Toybrot dependency probe")
message(" ")
if(TARGET PNG::PNG)
set(TB_PNG_FOUND TRUE)
message("PNG: OK")
else()
message("PNG: NOT FOUND")
endif()
if(CMAKE_CUDA_COMPILER)
message("CUDA: OK")
set(TB_CUDA_FOUND TRUE)
......@@ -92,13 +98,6 @@ else()
message("CUDA: NOT FOUND")
endif()
if(TARGET SDL2::SDL2)
set(TB_SDL_FOUND TRUE)
message("SDL2: OK")
else()
message("SDL2: NOT FOUND")
endif()
if(HIP_FOUND)
set(TB_HIP_FOUND TRUE)
message("HIP: OK (${HIP_VERSION})")
......@@ -106,11 +105,18 @@ else()
message("HIP: NOT FOUND")
endif()
if(HIPSYCL_SYCLCC)
set(TB_SYCL_FOUND TRUE)
message("SYCL: OK (hipSYCL)")
if(TARGET MPI::MPI_CXX)
set(TB_MPI_FOUND TRUE)
message("MPI: OK")
else()
message("SYCL: NOT FOUND")
message("MPI: NOT FOUND")
endif()
if(TARGET OpenCL::OpenCL)
set(TB_OCL_FOUND TRUE)
message("OpenCL: OK")
else()
message("OpenCL: NOT FOUND")
endif()
if(TARGET OpenMP::OpenMP_CXX OR CMAKE_CXX_COMPILER MATCHES "aomp")
......@@ -120,13 +126,33 @@ else()
message("OpenMP: NOT FOUND")
endif()
if(TARGET OpenCL::OpenCL)
set(TB_OCL_FOUND TRUE)
message("OpenCL: OK")
if(TARGET PNG::PNG)
set(TB_PNG_FOUND TRUE)
message("PNG: OK")
else()
message("OpenCL: NOT FOUND")
message("PNG: NOT FOUND")
endif()
if(TARGET SDL2::SDL2)
set(TB_SDL_FOUND TRUE)
message("SDL2: OK")
else()
message("SDL2: NOT FOUND")
endif()
if(HIPSYCL_SYCLCC)
set(TB_SYCL_FOUND TRUE)
message("SYCL: OK (hipSYCL)")
else()
message("SYCL: NOT FOUND")
endif()
if(TARGET TBB::tbb)
set(TB_TBB_FOUND TRUE)
message("TBB: OK")
else()
message("TBB: NOT FOUND")
endif()
if(TARGET Vulkan::Vulkan)
find_program(SLANGCOMPILER "glslangValidator" DOC "spir-v shader compiler")
......@@ -142,13 +168,6 @@ else()
message("Vulkan: NOT FOUND")
endif()
if(TARGET MPI::MPI_CXX)
set(TB_MPI_FOUND TRUE)
message("MPI: OK")
else()
message("MPI: NOT FOUND")
endif()
message(" ")
message("*********************************")
message(" ")
......
......@@ -29,4 +29,8 @@ if(TB_VK_FOUND)
add_subdirectory(Vulkan)
endif()
if(TB_TBB_FOUND)
add_subdirectory(TBB)
endif()
#add_subdirectory(MPICLI)
project(rmTBB LANGUAGES CXX)
set(TBB_CXX_SRCS "main.cpp"
"FracGen.hpp"
"Vec.hpp" )
set(TBB_TBB_SRCS "FracGen.cpp" )
add_executable(${PROJECT_NAME} ${TBB_CXX_SRCS} ${TBB_TBB_SRCS} )
if(NOT WIN32)
target_link_libraries(${PROJECT_NAME} PRIVATE pthread)
endif()
if(TB_SDL_FOUND)
target_link_libraries(${PROJECT_NAME} PRIVATE SDL2::SDL2)
target_compile_definitions(${PROJECT_NAME} PRIVATE "TOYBROT_ENABLE_GUI")
target_sources(${PROJECT_NAME} PRIVATE "FracGenWindow.cpp" "FracGenWindow.hpp")
endif()
if(TB_PNG_FOUND)
target_link_libraries(${PROJECT_NAME} PRIVATE PNG::PNG)
target_compile_definitions(${PROJECT_NAME} PRIVATE "TOYBROT_ENABLE_PNG")
target_sources(${PROJECT_NAME} PRIVATE "pngWriter.cpp" "pngWriter.hpp" )
endif()
target_link_libraries(${PROJECT_NAME} PRIVATE TBB::tbb)
install(TARGETS ${PROJECT_NAME} RUNTIME DESTINATION bin)
#include "FracGen.hpp"
#include <iostream>
#include <cfloat>
#include <functional>
#include <atomic>
#include <vector>
#include <tbb/parallel_for.h>
#include <tbb/blocked_range2d.h>
//static Vec3f boxMaxes;
//static Vec3f boxMins;
using estimatorFunction = std::function<float(Vec3f)>;
/******************************************************************************
*
* Tweakable parameters
*
******************************************************************************/
static constexpr const size_t maxRaySteps = 7500;
static constexpr const float collisionMinDist = 0.00055f;
static constexpr const float cameraZ = -3.8f;
// coulouring parameters
static constexpr const float hueFactor = -60.0f;
static constexpr const int hueOffset = 365;
static constexpr const float valueFactor = 32;
static constexpr const float valueRange = 1.0f;
static constexpr const float valueClamp = 0.9f;
static constexpr const float satValue = 0.5f;
static constexpr const float bgValue = 0.05f;
static constexpr const float bgAlpha = 1.0f;
// Mandelbox constants
static constexpr const float fixedRadiusSq = 2.2f;
static constexpr const float minRadiusSq = 0.8f;
static constexpr const float foldingLimit = 1.45;
static constexpr const float boxScale = -3.5f;
static constexpr const size_t boxIterations = 30;
/******************************************************************************
*
* Distance estimator functions and helpers
*
******************************************************************************/
void sphereFold(Vec3f& z, float& dz)
{
float r2 = z.sqMod();
if ( r2 < minRadiusSq)
{
// linear inner scaling
float temp = (fixedRadiusSq/minRadiusSq);
z *= temp;
dz *= temp;
}
else if(r2<fixedRadiusSq)
{
// this is the actual sphere inversion
float temp =(fixedRadiusSq/r2);
z *= temp;
dz*= temp;
}
}
void boxFold(Vec3f& z)
{
z = z.clamp(-foldingLimit, foldingLimit)* 2.0f - z;
}
float boxDist(const Vec3f& p)
{
/**
* Distance estimator for a mandelbox
*
* Distance estimator adapted from
* https://http://blog.hvidtfeldts.net/index.php/2011/11/distance-estimated-3d-fractals-vi-the-mandelbox/
*/
const Vec3f& offset = p;
float dr = boxScale;
Vec3f z{p};
for (size_t n = 0; n < boxIterations; n++)
{
boxFold(z); // Reflect
sphereFold(z,dr); // Sphere Inversion
z = z * boxScale + offset; // Scale & Translate
dr = dr * std::abs(boxScale) + 1.0f;
}
float r = z.mod();
return r/std::abs(dr);
}
float bulbDist(const Vec3f& p)
{
/**
* Distance estimator for a mandelbulb
*
* Distance estimator adapted from
* https://www.iquilezles.org/www/articles/mandelbulb/mandelbulb.htm
* https://www.shadertoy.com/view/ltfSWn
*/
Vec3f w = p;
float m = w.sqMod();
//vec4 trap = vec4(abs(w),m);
float dz = 3.0f;
for( int i=0; i<4; i++ )
{
#if 1
float m2 = m*m;
float m4 = m2*m2;
dz = 8.0f*sqrt(m4*m2*m)*dz + 1.0f;
float x = w.X(); float x2 = x*x; float x4 = x2*x2;
float y = w.Y(); float y2 = y*y; float y4 = y2*y2;
float z = w.Z(); float z2 = z*z; float z4 = z2*z2;
float k3 = x2 + z2;
float k2 = 1/sqrt( k3*k3*k3*k3*k3*k3*k3 );
float k1 = x4 + y4 + z4 - 6.0f*y2*z2 - 6.0f*x2*y2 + 2.0f*z2*x2;
float k4 = x2 - y2 + z2;
w.setX(p.X() + 64.0f*x*y*z*(x2-z2)*k4*(x4-6.0f*x2*z2+z4)*k1*k2);
w.setY(p.Y() + -16.0f*y2*k3*k4*k4 + k1*k1);
w.setZ(p.Z() + -8.0f*y*k4*(x4*x4 - 28.0f*x4*x2*z2 + 70.0f*x4*z4 - 28.0f*x2*z2*z4 + z4*z4)*k1*k2);
#else
dz = 8.0*pow(sqrt(m),7.0)*dz + 1.0;
//dz = 8.0*pow(m,3.5)*dz + 1.0;
float r = w.mod();
float b = 8.0*acos( w.Y()/r);
float a = 8.0*atan2( w.X(), w.Z() );
w = p + Vec3f( sin(b)*sin(a), cos(b), sin(b)*cos(a) ) * pow(r,8.0);
#endif
// trap = min( trap, vec4(abs(w),m) );
m = w.sqMod();
if( m > 256.0f )
break;
}
return 0.25f*log(m)*sqrt(m)/dz;
}
float sphereDist(Vec3f p)
{
float radius = 2.f;
return p.mod() - radius;
}
/******************************************************************************
*
* Coulouring functions and helpers
*
******************************************************************************/
RGBA HSVtoRGB(int H, float S, float V)
{
/**
* adapted from
* https://gist.github.com/kuathadianto/200148f53616cbd226d993b400214a7f
*/
RGBA output;
float C = S * V;
float X = C * (1 - std::abs(std::fmod(H / 60.0, 2) - 1));
float m = V - C;
float Rs, Gs, Bs;
if(H >= 0 && H < 60)
{
Rs = C;
Gs = X;
Bs = 0;
}
else if(H >= 60 && H < 120)
{
Rs = X;
Gs = C;
Bs = 0;
}
else if(H >= 120 && H < 180)
{
Rs = 0;
Gs = C;
Bs = X;
}
else if(H >= 180 && H < 240)
{
Rs = 0;
Gs = X;
Bs = C;
}
else if(H >= 240 && H < 300)
{
Rs = X;
Gs = 0;
Bs = C;
}
else {
Rs = C;
Gs = 0;
Bs = X;
}
output.setR(Rs + m);
output.setG(Gs + m);
output.setB(Bs + m);
output.setA( 1.0f );
return output;
}
RGBA getColour(const Vec4f& steps)
{
RGBA colour;
Vec3f position(steps.X(),steps.Y(),steps.Z());
RGBA background;
background.setR(bgValue);
background.setG(bgValue);
background.setB(bgValue);
background.setA(bgAlpha);
if(steps.W() == maxRaySteps)
{
return background;
}
// This is a good place to float check your bounds if you need to
// boxMins.setX(std::min(boxMins.X(),position.X()));
// boxMins.setY(std::min(boxMins.Y(),position.Y()));
// boxMins.setZ(std::min(boxMins.Z(),position.Z()));
// boxMaxes.setX(std::max(boxMaxes.X(),position.X()));
// boxMaxes.setY(std::max(boxMaxes.Y(),position.Y()));
// boxMaxes.setZ(std::max(boxMaxes.Z(),position.Z()));
float saturation = satValue;
int hue = std::abs((static_cast<int>(position.Z() * hueFactor) + hueOffset) % 360);
float value = valueRange*(1.0f - std::min(steps.W()*valueFactor/maxRaySteps, valueClamp));
colour = HSVtoRGB(hue, saturation, value);
// Simplest colouring, based only on steps (roughly distance from camera)
// colour.r = static_cast<uint8_t>(255*value);
// colour.g = static_cast<uint8_t>(255*value);
// colour.b = static_cast<uint8_t>(255*value);
return colour;
}
/******************************************************************************
*
* Ray marching functions and helpers
*
******************************************************************************/
Vec4f trace(const Camera& cam, const Screen& s, int x, int y, const estimatorFunction& f)
{
/**
* This function taken from
* http://blog.hvidtfeldts.net/index.php/2011/06/distance-estimated-3d-fractals-part-i/
*/
float totalDistance = 0.0f;
unsigned int steps;
Vec3f pixelPosition = s.topLeft + Vec3f{s.pixelWidth*x, s.pixelHeight * y, 0.f};
Vec3f rayDir = pixelPosition - static_cast<Vec3f>(cam.pos);
rayDir.normalise();
Vec3f p;
for (steps=0; steps < maxRaySteps; steps++)
{
p = cam.pos + (rayDir * totalDistance);
float distance = f(p);
totalDistance += distance;
if (distance < collisionMinDist) break;
}
//return both the steps and the actual position in space for colouring purposes
return Vec4f{p,static_cast<float>(steps)};
}
void traceRegion(colourVec& data,
const Camera& cam, const Screen& scr,
const estimatorFunction& f,
const tbb::blocked_range2d<size_t,size_t>& range)
{
for(size_t h = range.rows().begin(); h < range.rows().end(); h++)
{
for(size_t w = range.cols().begin(); w < range.cols().end(); w++ )
{
data[(h*scr.width)+w] = getColour(trace(cam, scr, w, h, f));
}
}
}
/******************************************************************************
*
* Thread spawning section
*
******************************************************************************/
bool FracGen::Generate(int width, int height)
{
static std::atomic<int> h {0};
bool finishedGeneration = false;
int heightStep = bench ? height : 10;
estimatorFunction bulb(bulbDist);
estimatorFunction box(boxDist);
estimatorFunction sphere(sphereDist);
/*
* calculate the rectangle which represents the screen (camera z near) in object space
* No need to have an actual general camera so I'm just assuming the camera
* always sits on the Z axis and always has (0,1,0) as it's up vector
* This allows me to cheat a lot and not have to actually go into the
* linear algebra side and write something like gluUnproject
*/
Screen s;
Vec3f screenPlaneOrigin{cam->pos.X(),cam->pos.Y(),cam->pos.Z() + cam->near};
float screenPlaneHeight = 2*(cam->near*sin(cam->fovY/2));
screenPlaneHeight = screenPlaneHeight < 0 ? -screenPlaneHeight : screenPlaneHeight;
float screenPlaneWidth = screenPlaneHeight * cam->AR;
// if 0,0 is top left, pixel height needs to be a negative
s.width = width;
s.height = height;
s.pixelHeight = (-1.f) * screenPlaneHeight / s.height;
s.pixelWidth = screenPlaneWidth / s.width;
s.topLeft = Vec3f {screenPlaneOrigin.X() - (screenPlaneWidth/2),
screenPlaneOrigin.Y() + (screenPlaneHeight/2),
screenPlaneOrigin.Z() };
auto tbbTrace = [this, s, f = box](const tbb::blocked_range2d<size_t,size_t>& range)
{traceRegion(*(this->outBuffer), *(this->cam), s, f, range); };
// Minor optimization in TBB: Since we can define the range here, we don't need to check bounds in
// traceRegion. This is very similar to how SYCL and OpenCL do things
tbb::blocked_range2d<size_t,size_t> range(h.load(), std::min(h.load()+heightStep, height), 0, width);
tbb::parallel_for(range, tbbTrace);
h+= heightStep;
/*
* TBB doesn't seem to like the way of doing things where I know how
* many tasks there are and am collecting the return from them.
* At least not in the initial tutorials and beginner documentation
*/
if (h >=height)
{
h.store(0);
finishedGeneration = true;
}
return finishedGeneration;
}
FracGen::FracGen(bool benching, size_t width, size_t height)
: bench{benching}
, cam{new Camera}
{
outBuffer = std::make_shared< colourVec >(width*height);
cam->AR = static_cast<double>(width)/static_cast<double>(height);
// Position here is more or less ignored. It's used for initial screen calculation but the
// rays are launched from a different distance, specified in the .cl file
cam->pos = Vec3d{0, 0, cameraZ};
cam->target = Vec3d{0,0,0};
cam->up = Vec3d{0,1,0};
cam->near = 0.1;
cam->fovY = 45;
static bool once = false;
if(!bench || !once)
{
std::cout << "Running on TBB "<< TBB_VERSION_MAJOR <<"."<< TBB_VERSION_MINOR << std::endl;
once = true;
}
}
FracGen::~FracGen()
{}
#ifndef FRACGEN_HPP_DEFINED
#define FRACGEN_HPP_DEFINED
<