Skip to content

Commit e9c07a4

Browse files
committed
making helpers private; inlining/cleanup
1 parent 6b1dd40 commit e9c07a4

File tree

5 files changed

+22
-38
lines changed

5 files changed

+22
-38
lines changed

nanovdb/nanovdb/tools/cuda/CoarsenGrid.cuh

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
77
\authors Efty Sifakis
88
9-
\brief 2x Coarsening/Downsampling of NanoVDB indexGrids on the device
9+
\brief 2x Topological coarsening of NanoVDB indexGrids on the device
1010
1111
\warning The header file contains cuda device code so be sure
1212
to only include it in .cu files (or other .cuh files)
@@ -62,6 +62,7 @@ public:
6262
GridHandle<BufferT>
6363
getHandle(const BufferT &buffer = BufferT());
6464

65+
private:
6566
void coarsenRoot();
6667

6768
void coarsenInternalNodes();
@@ -70,7 +71,6 @@ public:
7071

7172
void coarsenLeafNodes();
7273

73-
private:
7474
static constexpr unsigned int mNumThreads = 128;// for kernels spawned via lambdaKernel (others may specialize)
7575
static unsigned int numBlocks(unsigned int n) {return (n + mNumThreads - 1) / mNumThreads;}
7676

@@ -80,9 +80,6 @@ private:
8080
int mVerbose{0};
8181
const GridT *mDeviceSrcGrid;
8282
TreeData mSrcTreeData;
83-
84-
public:
85-
const GridT* deviceSrcGrid() const { return mDeviceSrcGrid; }
8683
};// tools::cuda::CoarsenGrid<BuildT>
8784

8885
//-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
@@ -224,7 +221,7 @@ void CoarsenGrid<BuildT>::processGridTreeRoot()
224221
{
225222
// Copy GridData from source grid
226223
// By convention: this will duplicate grid name and map. Others will be reset later
227-
cudaCheck(cudaMemcpyAsync(&mBuilder.data()->getGrid(), deviceSrcGrid()->data(), GridT::memUsage(), cudaMemcpyDeviceToDevice, mStream));
224+
cudaCheck(cudaMemcpyAsync(&mBuilder.data()->getGrid(), mDeviceSrcGrid->data(), GridT::memUsage(), cudaMemcpyDeviceToDevice, mStream));
228225
util::cuda::lambdaKernel<<<1, 1, 0, mStream>>>(1, topology::detail::BuildGridTreeRootFunctor<BuildT>(), mBuilder.deviceData());
229226
cudaCheckError();
230227
}// CoarsenGrid<BuildT>::processGridTreeRoot

nanovdb/nanovdb/tools/cuda/DilateGrid.cuh

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ public:
6666
GridHandle<BufferT>
6767
getHandle(const BufferT &buffer = BufferT());
6868

69+
private:
6970
void dilateRoot();
7071

7172
void dilateInternalNodes();
@@ -74,7 +75,6 @@ public:
7475

7576
void dilateLeafNodes();
7677

77-
private:
7878
static constexpr unsigned int mNumThreads = 128;// for kernels spawned via lambdaKernel (others may specialize)
7979
static unsigned int numBlocks(unsigned int n) {return (n + mNumThreads - 1) / mNumThreads;}
8080

@@ -85,9 +85,6 @@ private:
8585
const GridT *mDeviceSrcGrid;
8686
morphology::NearestNeighbors mOp{morphology::NN_FACE_EDGE_VERTEX};
8787
TreeData mSrcTreeData;
88-
89-
public:
90-
const GridT* deviceSrcGrid() const { return mDeviceSrcGrid; }
9188
};// tools::cuda::DilateGrid<BuildT>
9289

9390
//-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
@@ -238,17 +235,17 @@ void DilateGrid<BuildT>::dilateInternalNodes()
238235
using Op = util::morphology::cuda::DilateInternalNodesFunctor<BuildT, morphology::NN_FACE>;
239236
util::cuda::operatorKernel<Op>
240237
<<<dim3(mSrcTreeData.mNodeCount[1],Op::SlicesPerLowerNode,1), Op::MaxThreadsPerBlock, 0, mStream>>>
241-
(deviceSrcGrid(), mBuilder.deviceProcessedRoot(), mBuilder.deviceUpperMasks(), mBuilder.deviceLowerMasks()); }
238+
(mDeviceSrcGrid, mBuilder.deviceProcessedRoot(), mBuilder.deviceUpperMasks(), mBuilder.deviceLowerMasks()); }
242239
else if (mOp == morphology::NN_FACE_EDGE) {
243240
using Op = util::morphology::cuda::DilateInternalNodesFunctor<BuildT, morphology::NN_FACE_EDGE>;
244241
util::cuda::operatorKernel<Op>
245242
<<<dim3(mSrcTreeData.mNodeCount[1],Op::SlicesPerLowerNode,1), Op::MaxThreadsPerBlock, 0, mStream>>>
246-
(deviceSrcGrid(), mBuilder.deviceProcessedRoot(), mBuilder.deviceUpperMasks(), mBuilder.deviceLowerMasks()); }
243+
(mDeviceSrcGrid, mBuilder.deviceProcessedRoot(), mBuilder.deviceUpperMasks(), mBuilder.deviceLowerMasks()); }
247244
else if (mOp == morphology::NN_FACE_EDGE_VERTEX) {
248245
using Op = util::morphology::cuda::DilateInternalNodesFunctor<BuildT, morphology::NN_FACE_EDGE_VERTEX>;
249246
util::cuda::operatorKernel<Op>
250247
<<<dim3(mSrcTreeData.mNodeCount[1],Op::SlicesPerLowerNode,1), Op::MaxThreadsPerBlock, 0, mStream>>>
251-
(deviceSrcGrid(), mBuilder.deviceProcessedRoot(), mBuilder.deviceUpperMasks(), mBuilder.deviceLowerMasks()); }
248+
(mDeviceSrcGrid, mBuilder.deviceProcessedRoot(), mBuilder.deviceUpperMasks(), mBuilder.deviceLowerMasks()); }
252249
}
253250
}// DilateGrid<BuildT>::dilateInternalNodes
254251

@@ -259,7 +256,7 @@ void DilateGrid<BuildT>::processGridTreeRoot()
259256
{
260257
// Copy GridData from source grid
261258
// By convention: this will duplicate grid name and map. Others will be reset later
262-
cudaCheck(cudaMemcpyAsync(&mBuilder.data()->getGrid(), deviceSrcGrid()->data(), GridT::memUsage(), cudaMemcpyDeviceToDevice, mStream));
259+
cudaCheck(cudaMemcpyAsync(&mBuilder.data()->getGrid(), mDeviceSrcGrid->data(), GridT::memUsage(), cudaMemcpyDeviceToDevice, mStream));
263260
util::cuda::lambdaKernel<<<1, 1, 0, mStream>>>(1, topology::detail::BuildGridTreeRootFunctor<BuildT>(), mBuilder.deviceData());
264261
cudaCheckError();
265262
}// DilateGrid<BuildT>::processGridTreeRoot
@@ -276,14 +273,14 @@ void DilateGrid<BuildT>::dilateLeafNodes()
276273
using Op = util::morphology::cuda::DilateLeafNodesFunctor<BuildT, morphology::NN_FACE>;
277274
util::cuda::operatorKernel<Op>
278275
<<<dim3(mBuilder.data()->nodeCount[1],Op::SlicesPerLowerNode,1), Op::MaxThreadsPerBlock, 0, mStream>>>
279-
(deviceSrcGrid(), static_cast<GridT*>(mBuilder.data()->d_bufferPtr)); }
276+
(mDeviceSrcGrid, static_cast<GridT*>(mBuilder.data()->d_bufferPtr)); }
280277
else if (mOp == morphology::NN_FACE_EDGE)
281278
throw std::runtime_error("dilateLeafNodes() not implemented for NN_FACE_EDGE stencil");
282279
else if (mOp == morphology::NN_FACE_EDGE_VERTEX) {
283280
using Op = util::morphology::cuda::DilateLeafNodesFunctor<BuildT, morphology::NN_FACE_EDGE_VERTEX>;
284281
util::cuda::operatorKernel<Op>
285282
<<<dim3(mBuilder.data()->nodeCount[1],Op::SlicesPerLowerNode,1), Op::MaxThreadsPerBlock>>>
286-
(deviceSrcGrid(), static_cast<GridT*>(mBuilder.data()->d_bufferPtr)); }
283+
(mDeviceSrcGrid, static_cast<GridT*>(mBuilder.data()->d_bufferPtr)); }
287284
}
288285

289286
// Update leaf offsets and prefix sums

nanovdb/nanovdb/tools/cuda/MergeGrids.cuh

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ public:
6363
GridHandle<BufferT>
6464
getHandle(const BufferT &buffer = BufferT());
6565

66+
private:
6667
void mergeRoot();
6768

6869
void mergeInternalNodes();
@@ -71,7 +72,6 @@ public:
7172

7273
void mergeLeafNodes();
7374

74-
private:
7575
static constexpr unsigned int mNumThreads = 128;// for kernels spawned via lambdaKernel (others may specialize)
7676
static unsigned int numBlocks(unsigned int n) {return (n + mNumThreads - 1) / mNumThreads;}
7777

@@ -83,10 +83,6 @@ private:
8383
const GridT *mDeviceSrcGrid2;
8484
TreeData mSrcTreeData1;
8585
TreeData mSrcTreeData2;
86-
87-
public:
88-
const GridT* deviceSrcGrid1() const { return mDeviceSrcGrid1; }
89-
const GridT* deviceSrcGrid2() const { return mDeviceSrcGrid2; }
9086
};// tools::cuda::MergeGrids<BuildT>
9187

9288
//-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
@@ -236,12 +232,12 @@ void MergeGrids<BuildT>::mergeInternalNodes()
236232
if (mSrcTreeData1.mNodeCount[1]) { // Unless the first grid to merge is empty
237233
util::cuda::operatorKernel<Op>
238234
<<<mSrcTreeData1.mNodeCount[1], Op::MaxThreadsPerBlock, 0, mStream>>>
239-
(deviceSrcGrid1(), mBuilder.deviceProcessedRoot(), mBuilder.deviceUpperMasks(), mBuilder.deviceLowerMasks());
235+
(mDeviceSrcGrid1, mBuilder.deviceProcessedRoot(), mBuilder.deviceUpperMasks(), mBuilder.deviceLowerMasks());
240236
}
241237
if (mSrcTreeData2.mNodeCount[1]) { // Unless the second grid to merge is empty
242238
util::cuda::operatorKernel<Op>
243239
<<<mSrcTreeData2.mNodeCount[1], Op::MaxThreadsPerBlock, 0, mStream>>>
244-
(deviceSrcGrid2(), mBuilder.deviceProcessedRoot(), mBuilder.deviceUpperMasks(), mBuilder.deviceLowerMasks());
240+
(mDeviceSrcGrid2, mBuilder.deviceProcessedRoot(), mBuilder.deviceUpperMasks(), mBuilder.deviceLowerMasks());
245241
}
246242
}// MergeGrids<BuildT>::mergeInternalNodes
247243

@@ -250,10 +246,10 @@ void MergeGrids<BuildT>::mergeInternalNodes()
250246
template <typename BuildT>
251247
void MergeGrids<BuildT>::processGridTreeRoot()
252248
{
253-
// Copy GridData from source grid
249+
// Copy GridData from first source grid
254250
// TODO: Check for instances where extra processing is needed
255251
// TODO: check that the second grid input has consistent GridData, too
256-
cudaCheck(cudaMemcpyAsync(&mBuilder.data()->getGrid(), deviceSrcGrid1()->data(), GridT::memUsage(), cudaMemcpyDeviceToDevice, mStream));
252+
cudaCheck(cudaMemcpyAsync(&mBuilder.data()->getGrid(), mDeviceSrcGrid1->data(), GridT::memUsage(), cudaMemcpyDeviceToDevice, mStream));
257253
util::cuda::lambdaKernel<<<1, 1, 0, mStream>>>(1, topology::detail::BuildGridTreeRootFunctor<BuildT>(), mBuilder.deviceData());
258254
cudaCheckError();
259255
}// MergeGrids<BuildT>::processGridTreeRoot
@@ -267,12 +263,12 @@ void MergeGrids<BuildT>::mergeLeafNodes()
267263
if (mSrcTreeData1.mNodeCount[1]) { // Unless first input grid is empty
268264
util::cuda::operatorKernel<Op>
269265
<<<dim3(mSrcTreeData1.mNodeCount[1],Op::SlicesPerLowerNode,1), Op::MaxThreadsPerBlock, 0, mStream>>>
270-
(deviceSrcGrid1(), static_cast<GridT*>(mBuilder.data()->d_bufferPtr));
266+
(mDeviceSrcGrid1, static_cast<GridT*>(mBuilder.data()->d_bufferPtr));
271267
}
272268
if (mSrcTreeData2.mNodeCount[1]) { // Unless second input grid is empty
273269
util::cuda::operatorKernel<Op>
274270
<<<dim3(mSrcTreeData2.mNodeCount[1],Op::SlicesPerLowerNode,1), Op::MaxThreadsPerBlock, 0, mStream>>>
275-
(deviceSrcGrid2(), static_cast<GridT*>(mBuilder.data()->d_bufferPtr));
271+
(mDeviceSrcGrid2, static_cast<GridT*>(mBuilder.data()->d_bufferPtr));
276272
}
277273

278274
// Update leaf offsets and prefix sums

nanovdb/nanovdb/tools/cuda/PruneGrid.cuh

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ public:
6363
GridHandle<BufferT>
6464
getHandle(const BufferT &buffer = BufferT());
6565

66+
private:
6667
void pruneRoot();
6768

6869
void pruneInternalNodes();
@@ -71,7 +72,6 @@ public:
7172

7273
void pruneLeafNodes();
7374

74-
private:
7575
static constexpr unsigned int mNumThreads = 128;// for kernels spawned via lambdaKernel (others may specialize)
7676
static unsigned int numBlocks(unsigned int n) {return (n + mNumThreads - 1) / mNumThreads;}
7777

@@ -82,9 +82,6 @@ private:
8282
const GridT *mDeviceSrcGrid;
8383
const Mask<3> *mDeviceSrcLeafMask;
8484
TreeData mSrcTreeData;
85-
86-
public:
87-
const GridT* deviceSrcGrid() const { return mDeviceSrcGrid; }
8885
};// tools::cuda::PruneGrid<BuildT>
8986

9087
//-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
@@ -227,7 +224,7 @@ void PruneGrid<BuildT>::processGridTreeRoot()
227224
{
228225
// Copy GridData from source grid
229226
// By convention: this will duplicate grid name and map. Others will be reset later
230-
cudaCheck(cudaMemcpyAsync(&mBuilder.data()->getGrid(), deviceSrcGrid()->data(), GridT::memUsage(), cudaMemcpyDeviceToDevice, mStream));
227+
cudaCheck(cudaMemcpyAsync(&mBuilder.data()->getGrid(), mDeviceSrcGrid->data(), GridT::memUsage(), cudaMemcpyDeviceToDevice, mStream));
231228
util::cuda::lambdaKernel<<<1, 1, 0, mStream>>>(1, topology::detail::BuildGridTreeRootFunctor<BuildT>(), mBuilder.deviceData());
232229
cudaCheckError();
233230
}// PruneGrid<BuildT>::processGridTreeRoot

nanovdb/nanovdb/tools/cuda/RefineGrid.cuh

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
77
\authors Efty Sifakis
88
9-
\brief 2x Subdivision/Upsampling of NanoVDB indexGrids on the device
9+
\brief 2x Topological refinement of NanoVDB indexGrids on the device
1010
1111
\warning The header file contains cuda device code so be sure
1212
to only include it in .cu files (or other .cuh files)
@@ -62,6 +62,7 @@ public:
6262
GridHandle<BufferT>
6363
getHandle(const BufferT &buffer = BufferT());
6464

65+
private:
6566
void refineRoot();
6667

6768
void refineInternalNodes();
@@ -70,7 +71,6 @@ public:
7071

7172
void refineLeafNodes();
7273

73-
private:
7474
static constexpr unsigned int mNumThreads = 128;// for kernels spawned via lambdaKernel (others may specialize)
7575
static unsigned int numBlocks(unsigned int n) {return (n + mNumThreads - 1) / mNumThreads;}
7676

@@ -80,9 +80,6 @@ private:
8080
int mVerbose{0};
8181
const GridT *mDeviceSrcGrid;
8282
TreeData mSrcTreeData;
83-
84-
public:
85-
const GridT* deviceSrcGrid() const { return mDeviceSrcGrid; }
8683
};// tools::cuda::RefineGrid<BuildT>
8784

8885
//-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
@@ -239,7 +236,7 @@ void RefineGrid<BuildT>::processGridTreeRoot()
239236
{
240237
// Copy GridData from source grid
241238
// By convention: this will duplicate grid name and map. Others will be reset later
242-
cudaCheck(cudaMemcpyAsync(&mBuilder.data()->getGrid(), deviceSrcGrid()->data(), GridT::memUsage(), cudaMemcpyDeviceToDevice, mStream));
239+
cudaCheck(cudaMemcpyAsync(&mBuilder.data()->getGrid(), mDeviceSrcGrid->data(), GridT::memUsage(), cudaMemcpyDeviceToDevice, mStream));
243240
util::cuda::lambdaKernel<<<1, 1, 0, mStream>>>(1, topology::detail::BuildGridTreeRootFunctor<BuildT>(), mBuilder.deviceData());
244241
cudaCheckError();
245242
}// RefineGrid<BuildT>::processGridTreeRoot

0 commit comments

Comments
 (0)