Skip to content

Commit 64b8084

Browse files
Merge pull request #773 from Devsh-Graphics-Programming/bindless_blit
Typo Fixes for Acceleration Structures
2 parents 32f20c7 + e0ed1ae commit 64b8084

14 files changed

+69
-1179
lines changed

examples_tests

include/nbl/video/IGPUAccelerationStructure.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,13 +81,13 @@ class IGPUAccelerationStructure : public asset::IAccelerationStructure, public I
8181
// https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-vkCmdBuildAccelerationStructuresIndirectKHR-geometry-03673
8282
static inline bool invalidInputBuffer(const asset::SBufferBinding<const BufferType>& binding, const size_t byteOffset, const size_t count, const size_t elementSize, const size_t alignment)
8383
{
84-
if (!binding.buffer || binding.offset+byteOffset+count*elementSize<binding.buffer->getSize())
84+
if (!binding.buffer || binding.offset+byteOffset+count*elementSize>binding.buffer->getSize())
8585
return true;
8686

8787
if constexpr (std::is_same_v<BufferType,IGPUBuffer>)
8888
{
8989
const auto deviceAddr = binding.buffer->getDeviceAddress();
90-
if (!deviceAddr==0ull || !core::is_aligned_to(deviceAddr,alignment))
90+
if (deviceAddr==0ull || !core::is_aligned_to(deviceAddr,alignment))
9191
return true;
9292

9393
if (!binding.buffer->getCreationParams().usage.hasFlags(IGPUBuffer::E_USAGE_FLAGS::EUF_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT))
@@ -228,7 +228,7 @@ class IGPUBottomLevelAccelerationStructure : public asset::IBottomLevelAccelerat
228228

229229
const size_t vertexSize = asset::getTexelOrBlockBytesize(geometry.vertexFormat);
230230
// TODO: improve in line with the spec https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-vkCmdBuildAccelerationStructuresKHR-pInfos-03711
231-
const size_t vertexAlignment = core::max(vertexSize/4u,1ull);
231+
const size_t vertexAlignment = core::max(core::roundDownToPoT(vertexSize/asset::getFormatChannelCount(geometry.vertexFormat)),1ull);
232232
// https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkAccelerationStructureGeometryTrianglesDataKHR-vertexStride-03735
233233
if (!core::is_aligned_to(geometry.vertexStride,vertexAlignment))
234234
return false;

include/nbl/video/ILogicalDevice.h

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -406,7 +406,7 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe
406406
const uint32_t* const pMaxPrimitiveCounts
407407
) const
408408
{
409-
if (invalidFeaturesForASBuild<Geometry::buffer_t>(motionBlur))
409+
if (invalidFeaturesForASBuild<typename Geometry::buffer_t>(motionBlur))
410410
{
411411
NBL_LOG_ERROR("Required features are not enabled");
412412
return {};
@@ -567,17 +567,19 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe
567567
return false;
568568
break;
569569
}
570+
// https://vulkan.lunarg.com/doc/view/1.3.290.0/windows/1.3-extensions/vkspec.html#VUID-vkWriteAccelerationStructuresPropertiesKHR-accelerationStructureHostCommands-03585
570571
if (!getEnabledFeatures().accelerationStructureHostCommands)
571572
{
572573
NBL_LOG_ERROR("Feature `acceleration structure` host commands is not enabled");
573574
return false;
574575
}
576+
// https://vulkan.lunarg.com/doc/view/1.3.290.0/windows/1.3-extensions/vkspec.html#VUID-vkWriteAccelerationStructuresPropertiesKHR-buffer-03733
575577
for (const auto& as : accelerationStructures)
576-
if (invalidAccelerationStructureForHostOperations(as))
577-
{
578-
NBL_LOG_ERROR("Invalid acceleration structure for host operations");
579-
return false;
580-
}
578+
if (invalidAccelerationStructureForHostOperations(as))
579+
{
580+
NBL_LOG_ERROR("Invalid acceleration structure for host operations");
581+
return false;
582+
}
581583
// unfortunately cannot validate if they're built and if they're built with the right flags
582584
return writeAccelerationStructuresProperties_impl(accelerationStructures,type,data,stride);
583585
}

include/nbl/video/utilities/CComputeBlit.h

Lines changed: 9 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -134,10 +134,15 @@ class CComputeBlit : public core::IReferenceCounted
134134
// the absolute minimum needed to store a single pixel of a worst case format (precise, all 4 channels)
135135
constexpr auto singlePixelStorage = 4*sizeof(hlsl::float32_t);
136136
constexpr auto ratio = singlePixelStorage/sizeof(uint16_t);
137-
const auto paddedAlphaBinCount = core::min(core::roundUp(baseBucketCount,workgroupSize),workgroupSize*ratio);
137+
// atomicAdd gets performed on MSB or LSB of a single DWORD
138+
const auto paddedAlphaBinCount = core::min(core::roundUp<uint16_t>(baseBucketCount,workgroupSize*2),workgroupSize*ratio);
138139
return paddedAlphaBinCount*layersToBlit;
139140
}
140-
141+
142+
static inline uint32_t getNormalizationByteSize(const uint16_t workgroupSize, const asset::E_FORMAT intermediateAlpha, const uint32_t layersToBlit)
143+
{
144+
return getAlphaBinCount(workgroupSize,intermediateAlpha,layersToBlit)*sizeof(uint16_t)+sizeof(uint32_t)+sizeof(uint32_t);
145+
}
141146
#if 0
142147

143148
//! Returns the number of output texels produced by one workgroup, deciding factor is `m_availableSharedMemory`.
@@ -337,19 +342,14 @@ class CComputeBlit : public core::IReferenceCounted
337342
{
338343
dispatch_info_t dispatchInfo;
339344
buildAlphaTestDispatchInfo(dispatchInfo, inImageExtent, inImageType, layersToBlit);
340-
341-
cmdbuf->bindDescriptorSets(asset::EPBP_COMPUTE, alphaTestPipeline->getLayout(), 0u, 1u, &alphaTestDS);
342-
cmdbuf->bindComputePipeline(alphaTestPipeline);
345+
// bind omitted
343346
dispatchHelper(cmdbuf, alphaTestPipeline->getLayout(), pushConstants, dispatchInfo);
344347
}
345348

346349
{
347350
dispatch_info_t dispatchInfo;
348351
buildBlitDispatchInfo<BlitUtilities>(dispatchInfo, inImageExtent, outImageExtent, inImageFormat, inImageType, kernels, workgroupSize, layersToBlit);
349-
350-
video::IGPUDescriptorSet* ds_raw[] = { blitDS, blitWeightsDS };
351-
cmdbuf->bindDescriptorSets(asset::EPBP_COMPUTE, blitPipeline->getLayout(), 0, 2, ds_raw);
352-
cmdbuf->bindComputePipeline(blitPipeline);
352+
// bind omitted
353353
dispatchHelper(cmdbuf, blitPipeline->getLayout(), pushConstants, dispatchInfo);
354354
}
355355

@@ -359,39 +359,6 @@ class CComputeBlit : public core::IReferenceCounted
359359
dispatch_info_t dispatchInfo;
360360
buildNormalizationDispatchInfo(dispatchInfo, outImageExtent, inImageType, layersToBlit);
361361

362-
assert(coverageAdjustmentScratchBuffer);
363-
IGPUCommandBuffer::SPipelineBarrierDependencyInfo depInfo;
364-
// Memory dependency to ensure the alpha test pass has finished writing to alphaTestCounterBuffer
365-
video::IGPUCommandBuffer::SPipelineBarrierDependencyInfo::buffer_barrier_t alphaTestBarrier = {};
366-
alphaTestBarrier.barrier.dep.srcStageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT;
367-
alphaTestBarrier.barrier.dep.srcAccessMask = asset::ACCESS_FLAGS::SHADER_WRITE_BITS;
368-
alphaTestBarrier.barrier.dep.dstStageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT;
369-
alphaTestBarrier.barrier.dep.dstAccessMask = asset::ACCESS_FLAGS::SHADER_READ_BITS;
370-
alphaTestBarrier.range.buffer = coverageAdjustmentScratchBuffer;
371-
alphaTestBarrier.range.size = coverageAdjustmentScratchBuffer->getSize();
372-
alphaTestBarrier.range.offset = 0;
373-
374-
// Memory dependency to ensure that the previous compute pass has finished writing to the output image,
375-
// also transitions the layout of said image: GENERAL -> SHADER_READ_ONLY_OPTIMAL
376-
video::IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t readyForNorm = {};
377-
readyForNorm.barrier.dep.srcStageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT;
378-
readyForNorm.barrier.dep.srcAccessMask = asset::ACCESS_FLAGS::SHADER_WRITE_BITS;
379-
readyForNorm.barrier.dep.dstStageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT;
380-
readyForNorm.barrier.dep.dstAccessMask = asset::ACCESS_FLAGS::SHADER_READ_BITS;
381-
readyForNorm.oldLayout = video::IGPUImage::LAYOUT::GENERAL;
382-
readyForNorm.newLayout = video::IGPUImage::LAYOUT::READ_ONLY_OPTIMAL;
383-
readyForNorm.image = normalizationInImage.get();
384-
readyForNorm.subresourceRange.aspectMask = asset::IImage::EAF_COLOR_BIT;
385-
readyForNorm.subresourceRange.levelCount = 1u;
386-
readyForNorm.subresourceRange.layerCount = normalizationInImage->getCreationParameters().arrayLayers;
387-
388-
depInfo.bufBarriers = { &alphaTestBarrier, &alphaTestBarrier + 1 };
389-
depInfo.imgBarriers = { &readyForNorm, &readyForNorm + 1 };
390-
391-
cmdbuf->pipelineBarrier(asset::E_DEPENDENCY_FLAGS::EDF_NONE, depInfo);
392-
393-
cmdbuf->bindDescriptorSets(asset::EPBP_COMPUTE, normalizationPipeline->getLayout(), 0u, 1u, &normalizationDS);
394-
cmdbuf->bindComputePipeline(normalizationPipeline);
395362
dispatchHelper(cmdbuf, normalizationPipeline->getLayout(), pushConstants, dispatchInfo);
396363
}
397364
}

0 commit comments

Comments
 (0)