@@ -134,10 +134,15 @@ class CComputeBlit : public core::IReferenceCounted
134
134
// the absolute minimum needed to store a single pixel of a worst case format (precise, all 4 channels)
135
135
constexpr auto singlePixelStorage = 4 *sizeof (hlsl::float32_t );
136
136
constexpr auto ratio = singlePixelStorage/sizeof (uint16_t );
137
- const auto paddedAlphaBinCount = core::min (core::roundUp (baseBucketCount,workgroupSize),workgroupSize*ratio);
137
+ // atomicAdd gets performed on MSB or LSB of a single DWORD
138
+ const auto paddedAlphaBinCount = core::min (core::roundUp<uint16_t >(baseBucketCount,workgroupSize*2 ),workgroupSize*ratio);
138
139
return paddedAlphaBinCount*layersToBlit;
139
140
}
140
-
141
+
142
+ static inline uint32_t getNormalizationByteSize (const uint16_t workgroupSize, const asset::E_FORMAT intermediateAlpha, const uint32_t layersToBlit)
143
+ {
144
+ return getAlphaBinCount (workgroupSize,intermediateAlpha,layersToBlit)*sizeof (uint16_t )+sizeof (uint32_t )+sizeof (uint32_t );
145
+ }
141
146
#if 0
142
147
143
148
//! Returns the number of output texels produced by one workgroup, deciding factor is `m_availableSharedMemory`.
@@ -337,19 +342,14 @@ class CComputeBlit : public core::IReferenceCounted
337
342
{
338
343
dispatch_info_t dispatchInfo;
339
344
buildAlphaTestDispatchInfo(dispatchInfo, inImageExtent, inImageType, layersToBlit);
340
-
341
- cmdbuf->bindDescriptorSets(asset::EPBP_COMPUTE, alphaTestPipeline->getLayout(), 0u, 1u, &alphaTestDS);
342
- cmdbuf->bindComputePipeline(alphaTestPipeline);
345
+ // bind omitted
343
346
dispatchHelper(cmdbuf, alphaTestPipeline->getLayout(), pushConstants, dispatchInfo);
344
347
}
345
348
346
349
{
347
350
dispatch_info_t dispatchInfo;
348
351
buildBlitDispatchInfo<BlitUtilities>(dispatchInfo, inImageExtent, outImageExtent, inImageFormat, inImageType, kernels, workgroupSize, layersToBlit);
349
-
350
- video::IGPUDescriptorSet* ds_raw[] = { blitDS, blitWeightsDS };
351
- cmdbuf->bindDescriptorSets(asset::EPBP_COMPUTE, blitPipeline->getLayout(), 0, 2, ds_raw);
352
- cmdbuf->bindComputePipeline(blitPipeline);
352
+ // bind omitted
353
353
dispatchHelper(cmdbuf, blitPipeline->getLayout(), pushConstants, dispatchInfo);
354
354
}
355
355
@@ -359,39 +359,6 @@ class CComputeBlit : public core::IReferenceCounted
359
359
dispatch_info_t dispatchInfo;
360
360
buildNormalizationDispatchInfo(dispatchInfo, outImageExtent, inImageType, layersToBlit);
361
361
362
- assert(coverageAdjustmentScratchBuffer);
363
- IGPUCommandBuffer::SPipelineBarrierDependencyInfo depInfo;
364
- // Memory dependency to ensure the alpha test pass has finished writing to alphaTestCounterBuffer
365
- video::IGPUCommandBuffer::SPipelineBarrierDependencyInfo::buffer_barrier_t alphaTestBarrier = {};
366
- alphaTestBarrier.barrier.dep.srcStageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT;
367
- alphaTestBarrier.barrier.dep.srcAccessMask = asset::ACCESS_FLAGS::SHADER_WRITE_BITS;
368
- alphaTestBarrier.barrier.dep.dstStageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT;
369
- alphaTestBarrier.barrier.dep.dstAccessMask = asset::ACCESS_FLAGS::SHADER_READ_BITS;
370
- alphaTestBarrier.range.buffer = coverageAdjustmentScratchBuffer;
371
- alphaTestBarrier.range.size = coverageAdjustmentScratchBuffer->getSize();
372
- alphaTestBarrier.range.offset = 0;
373
-
374
- // Memory dependency to ensure that the previous compute pass has finished writing to the output image,
375
- // also transitions the layout of said image: GENERAL -> SHADER_READ_ONLY_OPTIMAL
376
- video::IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t readyForNorm = {};
377
- readyForNorm.barrier.dep.srcStageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT;
378
- readyForNorm.barrier.dep.srcAccessMask = asset::ACCESS_FLAGS::SHADER_WRITE_BITS;
379
- readyForNorm.barrier.dep.dstStageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT;
380
- readyForNorm.barrier.dep.dstAccessMask = asset::ACCESS_FLAGS::SHADER_READ_BITS;
381
- readyForNorm.oldLayout = video::IGPUImage::LAYOUT::GENERAL;
382
- readyForNorm.newLayout = video::IGPUImage::LAYOUT::READ_ONLY_OPTIMAL;
383
- readyForNorm.image = normalizationInImage.get();
384
- readyForNorm.subresourceRange.aspectMask = asset::IImage::EAF_COLOR_BIT;
385
- readyForNorm.subresourceRange.levelCount = 1u;
386
- readyForNorm.subresourceRange.layerCount = normalizationInImage->getCreationParameters().arrayLayers;
387
-
388
- depInfo.bufBarriers = { &alphaTestBarrier, &alphaTestBarrier + 1 };
389
- depInfo.imgBarriers = { &readyForNorm, &readyForNorm + 1 };
390
-
391
- cmdbuf->pipelineBarrier(asset::E_DEPENDENCY_FLAGS::EDF_NONE, depInfo);
392
-
393
- cmdbuf->bindDescriptorSets(asset::EPBP_COMPUTE, normalizationPipeline->getLayout(), 0u, 1u, &normalizationDS);
394
- cmdbuf->bindComputePipeline(normalizationPipeline);
395
362
dispatchHelper(cmdbuf, normalizationPipeline->getLayout(), pushConstants, dispatchInfo);
396
363
}
397
364
}
0 commit comments