OpenVDB  12.0.0
ComputePrimitives.h
Go to the documentation of this file.
1 // Copyright Contributors to the OpenVDB Project
2 // SPDX-License-Identifier: Apache-2.0
3 
4 /// @file ComputePrimitives.h
5 /// @brief A collection of parallel compute primitives
6 
7 #pragma once
8 
9 #if defined(NANOVDB_USE_CUDA)
10 #include <cuda_runtime_api.h>
11 #endif
12 
13 #if defined(NANOVDB_USE_TBB)
14 #include <tbb/parallel_for.h>
15 #include <tbb/blocked_range.h>
16 #endif
17 
18 #include <utility>
19 #include <tuple>
20 
21 
22 // forward compatibility for C++14 Standard Library
23 namespace cxx14 {
24 template<std::size_t...>
26 {
27 };
28 
29 template<std::size_t N, std::size_t... Is>
30 struct make_index_sequence : make_index_sequence<N - 1, N - 1, Is...>
31 {
32 };
33 
34 template<std::size_t... Is>
35 struct make_index_sequence<0, Is...> : index_sequence<Is...>
36 {
37 };
38 } // namespace cxx14
39 
40 #if defined(__CUDACC__)
41 
42 static inline bool checkCUDA(cudaError_t result, const char* file, const int line)
43 {
44  if (result != cudaSuccess) {
45  std::cerr << "CUDA Runtime API error " << result << " in file " << file << ", line " << line << " : " << cudaGetErrorString(result) << ".\n";
46  return false;
47  }
48  return true;
49 }
50 
51 #define NANOVDB_CUDA_SAFE_CALL(x) checkCUDA(x, __FILE__, __LINE__)
52 
53 static inline void checkErrorCUDA(cudaError_t result, const char* file, const int line)
54 {
55  if (result != cudaSuccess) {
56  std::cerr << "CUDA Runtime API error " << result << " in file " << file << ", line " << line << " : " << cudaGetErrorString(result) << ".\n";
57  exit(1);
58  }
59 }
60 
61 #define NANOVDB_CUDA_CHECK_ERROR(result, file, line) checkErrorCUDA(result, file, line)
62 
63 #endif
64 
65 template<typename Fn, typename... Args>
66 class ApplyFunc
67 {
68 public:
69  ApplyFunc(int count, int blockSize, const Fn& fn, Args... args)
70  : mCount(count)
71  , mBlockSize(blockSize)
72  , mArgs(args...)
73  , mFunc(fn)
74  {
75  }
76 
77  template<std::size_t... Is>
78  void call(int start, int end, cxx14::index_sequence<Is...>) const
79  {
80  mFunc(start, end, std::get<Is>(mArgs)...);
81  }
82 
83  void operator()(int i) const
84  {
85  int start = i * mBlockSize;
86  int end = i * mBlockSize + mBlockSize;
87  if (end > mCount)
88  end = mCount;
89  call(start, end, cxx14::make_index_sequence<sizeof...(Args)>());
90  }
91 
92 #if defined(NANOVDB_USE_TBB)
93  void operator()(const tbb::blocked_range<int>& r) const
94  {
95  int start = r.begin();
96  int end = r.end();
97  if (end > mCount)
98  end = mCount;
99  call(start, end, cxx14::make_index_sequence<sizeof...(Args)>());
100  }
101 #endif
102 
103 private:
104  int mCount;
105  int mBlockSize;
106  Fn mFunc;
107  std::tuple<Args...> mArgs;
108 };
109 
110 #if defined(__CUDACC__)
111 
112 template<int WorkPerThread, typename FnT, typename... Args>
113 __global__ void parallelForKernel(int numItems, FnT f, Args... args)
114 {
115  for (int j=0;j<WorkPerThread;++j)
116  {
117  int i = threadIdx.x + blockIdx.x * blockDim.x + j * blockDim.x * gridDim.x;
118  if (i < numItems)
119  f(i, i + 1, args...);
120  }
121 }
122 
123 #endif
124 
125 inline void computeSync(bool useCuda, const char* file, int line)
126 {
127 #if defined(__CUDACC__)
128  if (useCuda) {
129  NANOVDB_CUDA_CHECK_ERROR(cudaDeviceSynchronize(), file, line);
130  }
131 #endif
132 }
133 
134 inline void computeFill(bool useCuda, void* data, uint8_t value, size_t size)
135 {
136  if (useCuda) {
137 #if defined(__CUDACC__)
138  cudaMemset(data, value, size);
139 #endif
140  } else {
141  std::memset(data, value, size);
142  }
143 }
144 
145 template<typename FunctorT, typename... Args>
146 inline void computeForEach(bool useCuda, int numItems, int blockSize, const char* file, int line, const FunctorT& op, Args... args)
147 {
148  if (numItems == 0)
149  return;
150 
151  if (useCuda) {
152 #if defined(__CUDACC__)
153  static const int WorkPerThread = 1;
154  int blockCount = ((numItems/WorkPerThread) + (blockSize - 1)) / blockSize;
155  parallelForKernel<WorkPerThread, FunctorT, Args...><<<blockCount, blockSize, 0, 0>>>(numItems, op, args...);
156  NANOVDB_CUDA_CHECK_ERROR(cudaGetLastError(), file, line);
157 #endif
158  } else {
159 #if defined(NANOVDB_USE_TBB)
160  tbb::blocked_range<int> range(0, numItems, blockSize);
161  tbb::parallel_for(range, ApplyFunc<FunctorT, Args...>(numItems, blockSize, op, args...));
162 #else
163  for (int i = 0; i < numItems; ++i)
164  op(i, i + 1, args...);
165 #endif
166  }
167 }
168 
169 inline void computeDownload(bool useCuda, void* dst, const void* src, size_t size)
170 {
171  if (useCuda) {
172 #if defined(__CUDACC__)
173  cudaMemcpy(dst, src, size, cudaMemcpyDeviceToHost);
174 #endif
175  } else {
176  std::memcpy(dst, src, size);
177  }
178 }
179 
180 inline void computeCopy(bool useCuda, void* dst, const void* src, size_t size)
181 {
182  if (useCuda) {
183 #if defined(__CUDACC__)
184  cudaMemcpy(dst, src, size, cudaMemcpyDeviceToDevice);
185 #endif
186  } else {
187  std::memcpy(dst, src, size);
188  }
189 }
Definition: ComputePrimitives.h:23
void computeDownload(bool useCuda, void *dst, const void *src, size_t size)
Definition: ComputePrimitives.h:169
OutGridT XformOp & op
Definition: ValueTransformer.h:139
Definition: ComputePrimitives.h:30
void computeForEach(bool useCuda, int numItems, int blockSize, const char *file, int line, const FunctorT &op, Args...args)
Definition: ComputePrimitives.h:146
void computeFill(bool useCuda, void *data, uint8_t value, size_t size)
Definition: ComputePrimitives.h:134
void computeSync(bool useCuda, const char *file, int line)
Definition: ComputePrimitives.h:125
Definition: ComputePrimitives.h:25
void call(int start, int end, cxx14::index_sequence< Is... >) const
Definition: ComputePrimitives.h:78
void operator()(int i) const
Definition: ComputePrimitives.h:83
ApplyFunc(int count, int blockSize, const Fn &fn, Args...args)
Definition: ComputePrimitives.h:69
Definition: ComputePrimitives.h:66
#define __global__
Definition: Util.h:76
void computeCopy(bool useCuda, void *dst, const void *src, size_t size)
Definition: ComputePrimitives.h:180