Commit bac6cdfd authored by Mohammad Umair's avatar Mohammad Umair

revert not required changes

Signed-off-by: Mohammad Umair's avatarUmair <[email protected]>
parent 62267e85
......@@ -23,9 +23,9 @@ ENDIF()
INCLUDE(CheckCXXCompilerFlag)
SET(CUDA_SEPARABLE_COMPILATION ON)
FIND_PACKAGE(CUDA 10.0 REQUIRED)
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --default-stream per-thread" )
INCLUDE_DIRECTORIES(include/ .)
......
......@@ -14,5 +14,5 @@ cd build/
# Change NVCC flags as needed
cmake ../ -DCMAKE_INSTALL_PREFIX=$DIR -DCUDA_NVCC_FLAGS="-arch=sm_61"
make -j8 install VERBOSE=1
# make -j8 install
#make -j8 install VERBOSE=1
make -j8 install
......@@ -31,7 +31,7 @@ static const int MAX_COUNTS_PER_QUERY = 1024;
static const int MAX_VARS_FIRST_STAGE = 5;
static const int MAX_COUNTS_FIRST_STAGE = 1 << 5; //considering all variables have arity of 2
#define STREAM_COUNT 2
#define STREAM_COUNT 1
static std::atomic_flag isStreamFree[STREAM_COUNT] = {ATOMIC_FLAG_INIT};
template <int N> class GPUCounter {
......@@ -72,7 +72,7 @@ public:
// printf("sending to CPU\n");
return this->radCounter->apply(xa_vect, pa_vect, F);
}
printf("sending to GPU %d\n", streamId);
// printf("sending to GPU %d\n", streamId);
int paSize = pa_vect.size();
std::vector<int> xi(1 + paSize);
......@@ -113,7 +113,6 @@ public:
resultList_, // results array for Nijk
resultListPa_, // results array for Nij
intermediaResult_, // memory for intermediate results
streams[streamId],
streamId);
//TODO: fix this condition
......@@ -259,18 +258,18 @@ template <int N, typename Iter> GPUCounter<N> create_GPUCounter(int n, int m, It
delete[] tempBvPtr;
// expected size = (number of configurations in the query) * sizeof(uint64_t)
cucheck_dev(cudaMallocManaged(&p.resultList_, sizeof(uint64_t) * MAX_COUNTS_PER_QUERY * STREAM_COUNT));
cucheck_dev(cudaMallocManaged(&p.resultListPa_, sizeof(uint64_t) * MAX_COUNTS_PER_QUERY * STREAM_COUNT));
cudaMallocManaged(&p.resultList_, sizeof(uint64_t) * MAX_COUNTS_PER_QUERY * STREAM_COUNT);
cudaMallocManaged(&p.resultListPa_, sizeof(uint64_t) * MAX_COUNTS_PER_QUERY * STREAM_COUNT);
cucheck_dev(cudaMalloc(&p.intermediaResult_, sizeof(uint64_t) * bitvectorSize * 32 * STREAM_COUNT));
cudaMalloc(&p.intermediaResult_, sizeof(uint64_t) * bitvectorSize * 32 * STREAM_COUNT);
p.streams.resize(STREAM_COUNT);
for (int i = 0; i < STREAM_COUNT; ++i) {
cucheck_dev(cudaStreamCreate(&p.streams[i]));
cudaStreamCreate(&p.streams[i]);
}
cucheck_dev(cudaDeviceSynchronize());
cudaDeviceSynchronize();
return p;
} // create_GPUCounter
......
......@@ -5,21 +5,14 @@
#include <cinttypes>
#include <vector>
#include <assert.h>
#include <cuda_runtime.h>
#ifdef __CUDACC__
#define CUDA_CALLABLE __host__ __device__
#define cucheck_dev(call) \
{ \
cudaError_t cucheck_err = (call); \
if(cucheck_err != cudaSuccess) { \
const char *err_str = cudaGetErrorString(cucheck_err); \
printf("%s (%d): %s\n", __FILE__, __LINE__, err_str); \
assert(0); \
} \
}
#else
#define CUDA_CALLABLE
#endif
void copyAritiesToDevice(
int streamId,
......@@ -36,7 +29,6 @@ void cudaCallBlockCount(const uint block_count,
uint64_t* results,
uint64_t *resultsPa,
uint64_t* intermediateData,
cudaStream_t stream,
int streamId);
#endif // GPU_UTIL
CUDA_ADD_LIBRARY(gpu_util gpu_util.cu)
\ No newline at end of file
CUDA_ADD_LIBRARY(gpu_util STATIC gpu_util.cu)
This diff is collapsed.
......@@ -3,19 +3,14 @@
rm gpu_util.o
rm ../examples/query
rm query.o
rm gpu_util_link.o
#compile cuda code
nvcc -x cu -g -G -arch=sm_61 -c gpu_util.cu -o gpu_util.o -I../include
#link cuda code
# nvcc -arch=sm_61 -dlink -o gpu_util_link.o gpu_util.o -lcudadevrt -lcudart
nvcc -arch=sm_61 -rdc=true -c gpu_util.cu -lcudadevrt -o gpu_util.o -I../include
#create static library from object code
ar rcs gpu_util.a gpu_util.o
#compile C++ code
g++ -fopenmp -std=c++14 -c ../examples/query.cpp -o query.o -I/usr/local/cuda-10.0/include -I../include
#link using nvcc
# nvcc --default-stream per-thread -arch=sm_61 query.o gpu_util.o -Xcompiler -fopenmp -std=c++14 -O3 -o ../examples/query \
# -I/usr/local/cuda-10.0/include -I../include /usr/local/cuda-10.0/lib64/libcudart_static.a \
# -ldl -lrt /usr/local/cuda-10.0/lib64/libcudart_static.a
#link using g++
g++ query.o gpu_util.o -fopenmp -std=c++14 -o ../examples/query \
-I/usr/local/cuda-10.0/include -I../include -L/usr/local/cuda-10.0/lib64 -lcudart -lcudadevrt
\ No newline at end of file
g++ -fopenmp -std=c++14 -O3 -c ../examples/query.cpp -o query.o -I/util/common/cuda/cuda-10.0/include -I../include
#link
nvcc -arch=sm_61 query.o gpu_util.a -Xcompiler -fopenmp -std=c++14 -O3 -o ../examples/query \
-I/util/common/cuda/cuda-10.0/include -I../include /util/common/cuda/cuda-10.0/lib64/libcudart_static.a \
-ldl -lrt /util/common/cuda/cuda-10.0/lib64/libcudart_static.a
\ No newline at end of file
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment