9 #if defined(NANOVDB_USE_CUDA) 10 #include <cuda_runtime_api.h> 13 #if defined(NANOVDB_USE_TBB) 14 #include <tbb/parallel_for.h> 15 #include <tbb/blocked_range.h> 24 template<std::size_t...>
29 template<std::size_t N, std::size_t... Is>
34 template<std::size_t... Is>
40 #if defined(__CUDACC__) 42 static inline bool checkCUDA(cudaError_t result,
const char* file,
const int line)
44 if (result != cudaSuccess) {
45 std::cerr <<
"CUDA Runtime API error " << result <<
" in file " << file <<
", line " << line <<
" : " << cudaGetErrorString(result) <<
".\n";
51 #define NANOVDB_CUDA_SAFE_CALL(x) checkCUDA(x, __FILE__, __LINE__) 53 static inline void checkErrorCUDA(cudaError_t result,
const char* file,
const int line)
55 if (result != cudaSuccess) {
56 std::cerr <<
"CUDA Runtime API error " << result <<
" in file " << file <<
", line " << line <<
" : " << cudaGetErrorString(result) <<
".\n";
61 #define NANOVDB_CUDA_CHECK_ERROR(result, file, line) checkErrorCUDA(result, file, line) 65 template<
typename Fn,
typename... Args>
69 ApplyFunc(
int count,
int blockSize,
const Fn& fn, Args... args)
71 , mBlockSize(blockSize)
77 template<std::size_t... Is>
80 mFunc(start, end, std::get<Is>(mArgs)...);
85 int start = i * mBlockSize;
86 int end = i * mBlockSize + mBlockSize;
92 #if defined(NANOVDB_USE_TBB) 93 void operator()(
const tbb::blocked_range<int>& r)
const 95 int start = r.begin();
107 std::tuple<Args...> mArgs;
110 #if defined(__CUDACC__) 112 template<
int WorkPerThread,
typename FnT,
typename... Args>
113 __global__ void parallelForKernel(
int numItems, FnT f, Args... args)
115 for (
int j=0;j<WorkPerThread;++j)
117 int i = threadIdx.x + blockIdx.x * blockDim.x + j * blockDim.x * gridDim.x;
119 f(i, i + 1, args...);
127 #if defined(__CUDACC__) 129 NANOVDB_CUDA_CHECK_ERROR(cudaDeviceSynchronize(), file, line);
134 inline void computeFill(
bool useCuda,
void* data, uint8_t value,
size_t size)
137 #if defined(__CUDACC__) 138 cudaMemset(data, value, size);
141 std::memset(data, value, size);
145 template<
typename FunctorT,
typename... Args>
146 inline void computeForEach(
bool useCuda,
int numItems,
int blockSize,
const char* file,
int line,
const FunctorT&
op, Args... args)
152 #if defined(__CUDACC__) 153 static const int WorkPerThread = 1;
154 int blockCount = ((numItems/WorkPerThread) + (blockSize - 1)) / blockSize;
155 parallelForKernel<WorkPerThread, FunctorT, Args...><<<blockCount, blockSize, 0, 0>>>(numItems,
op, args...);
156 NANOVDB_CUDA_CHECK_ERROR(cudaGetLastError(), file, line);
159 #if defined(NANOVDB_USE_TBB) 160 tbb::blocked_range<int> range(0, numItems, blockSize);
163 for (
int i = 0; i < numItems; ++i)
164 op(i, i + 1, args...);
172 #if defined(__CUDACC__) 173 cudaMemcpy(dst, src, size, cudaMemcpyDeviceToHost);
176 std::memcpy(dst, src, size);
180 inline void computeCopy(
bool useCuda,
void* dst,
const void* src,
size_t size)
183 #if defined(__CUDACC__) 184 cudaMemcpy(dst, src, size, cudaMemcpyDeviceToDevice);
187 std::memcpy(dst, src, size);
Definition: ComputePrimitives.h:23
void computeDownload(bool useCuda, void *dst, const void *src, size_t size)
Definition: ComputePrimitives.h:169
Definition: ComputePrimitives.h:30
void computeForEach(bool useCuda, int numItems, int blockSize, const char *file, int line, const FunctorT &op, Args...args)
Definition: ComputePrimitives.h:146
void computeFill(bool useCuda, void *data, uint8_t value, size_t size)
Definition: ComputePrimitives.h:134
void computeSync(bool useCuda, const char *file, int line)
Definition: ComputePrimitives.h:125
Definition: ComputePrimitives.h:25
void call(int start, int end, cxx14::index_sequence< Is... >) const
Definition: ComputePrimitives.h:78
void operator()(int i) const
Definition: ComputePrimitives.h:83
ApplyFunc(int count, int blockSize, const Fn &fn, Args...args)
Definition: ComputePrimitives.h:69
Definition: ComputePrimitives.h:66
#define __global__
Definition: Util.h:76
void computeCopy(bool useCuda, void *dst, const void *src, size_t size)
Definition: ComputePrimitives.h:180