diff --git a/26_CentralLimitBoxBlur/CMakeLists.txt b/26_CentralLimitBoxBlur/CMakeLists.txt new file mode 100644 index 000000000..bd3146859 --- /dev/null +++ b/26_CentralLimitBoxBlur/CMakeLists.txt @@ -0,0 +1,19 @@ +nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") + +if(NBL_EMBED_BUILTIN_RESOURCES) + set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) + set(RESOURCE_DIR "app_resources") + + get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) + + file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") + foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") + endforeach() + + ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") + + LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) +endif() \ No newline at end of file diff --git a/26_CentralLimitBoxBlur/app_resources/descriptors.hlsl b/26_CentralLimitBoxBlur/app_resources/descriptors.hlsl new file mode 100644 index 000000000..a2226fa45 --- /dev/null +++ b/26_CentralLimitBoxBlur/app_resources/descriptors.hlsl @@ -0,0 +1,50 @@ +#include "nbl/builtin/hlsl/blur/common.hlsl" + +[[vk::binding( 0, 0 )]] Texture2D input; +[[vk::binding( 1, 0 )]] RWTexture2D output; + + +// TODO: figure the proper way to do templated BufferAccessors +struct BufferAccessor +{ + uint32_t2 chosenAxis; + + nbl::hlsl::float32_t get( const uint32_t linearIndex, const uint32_t channel ) + { + uint32_t3 texSize; + input.GetDimensions( 0, texSize.x, texSize.y, texSize.z ); + + uint32_t axisSize = dot( texSize.xy, chosenAxis ); + + uint32_t2 coordinate = { linearIndex % axisSize, linearIndex / axisSize }; + float32_t data = 0.f; + if( all( coordinate < texSize.xy ) ) + { + float32_t4 pixel = input[ coordinate.xy ]; + data = pixel[ channel ]; + } + + return data; + } + + void set( const uint32_t linearIndex, const uint32_t channel, NBL_CONST_REF_ARG( float32_t ) val ) + { + uint32_t2 texSize; + output.GetDimensions( texSize.x, texSize.y ); + + uint32_t axisSize = dot( texSize, chosenAxis ); + + uint32_t2 coordinate = { linearIndex % axisSize, linearIndex / axisSize }; + if( all( coordinate < texSize ) ) + { + output[ coordinate.xy ][ channel ] = val; + } + } +}; + +BufferAccessor BufferAccessorCtor( uint32_t2 chosenAxis ) +{ + BufferAccessor ba; + ba.chosenAxis = chosenAxis; + return ba; +} diff --git a/26_CentralLimitBoxBlur/app_resources/main.comp.hlsl b/26_CentralLimitBoxBlur/app_resources/main.comp.hlsl new file mode 100644 index 000000000..dbcef350e --- /dev/null +++ b/26_CentralLimitBoxBlur/app_resources/main.comp.hlsl @@ -0,0 +1,32 @@ +// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#pragma shader_stage(compute) + +#include "nbl/builtin/hlsl/blur/common.hlsl" +#include "descriptors.hlsl" + +#include "nbl/builtin/hlsl/blur/box_blur.hlsl" + +[[vk::push_constant]] +BoxBlurParams boxBlurParams; + +[numthreads( WORKGROUP_SIZE, 1, 1 )] +void main( uint3 invocationID : SV_DispatchThreadID ) +{ + uint32_t direction = boxBlurParams.getDirection(); + uint32_t wrapMode = boxBlurParams.getWrapMode(); + nbl::hlsl::float32_t4 borderColor = float32_t4(1.f, 0.f, 1.f, 1.f); + if( boxBlurParams.getWrapMode() == WRAP_MODE_CLAMP_TO_BORDER ) + { + borderColor = boxBlurParams.getBorderColor(); + } + + BufferAccessor textureAccessor = BufferAccessorCtor( boxBlurParams.chosenAxis ); + + for( uint32_t ch = 0; ch < boxBlurParams.getChannelCount(); ++ch ) + { + BoxBlur( ch, boxBlurParams.radius, wrapMode, borderColor, textureAccessor ); + } +} diff --git a/26_CentralLimitBoxBlur/main.cpp b/26_CentralLimitBoxBlur/main.cpp new file mode 100644 index 000000000..f89fd09b8 --- /dev/null +++ b/26_CentralLimitBoxBlur/main.cpp @@ -0,0 +1,339 @@ +// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + + +// I've moved out a tiny part of this example into a shared header for reuse, please open and read it. +#include "../common/MonoDeviceApplication.hpp" +#include "../common/MonoAssetManagerAndBuiltinResourceApplication.hpp" + +#include + +#include "CArchive.h" + +using namespace nbl; +using namespace core; +using namespace system; +using namespace asset; +using namespace video; + +#define _NBL_PLATFORM_WINDOWS_ + +class BoxBlurDemo final : public examples::MonoDeviceApplication, public examples::MonoAssetManagerAndBuiltinResourceApplication +{ + using device_base_t = examples::MonoDeviceApplication; + using asset_base_t = examples::MonoAssetManagerAndBuiltinResourceApplication; + +public: + BoxBlurDemo( + const path& _localInputCWD, + const path& _localOutputCWD, + const path& _sharedInputCWD, + const path& _sharedOutputCWD + ) : system::IApplicationFramework( _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD ) + {} + + bool onAppInitialized( smart_refctd_ptr&& system ) override + { + // Remember to call the base class initialization! + if( !device_base_t::onAppInitialized( std::move( system ) ) ) + { + return false; + } + if( !asset_base_t::onAppInitialized( std::move( system ) ) ) + { + return false; + } + + constexpr uint32_t WorkgroupSize = 256; + constexpr uint32_t AxisDimension = 3; + constexpr uint32_t PassesPerAxis = 4; + + constexpr uint32_t WorkgroupCount = 2048; + + IAssetLoader::SAssetLoadParams lparams = {}; + lparams.logger = m_logger.get(); + lparams.workingDirectory = ""; + auto checkedLoad = [ & ]( const char* filePath ) -> smart_refctd_ptr + { + // The `IAssetManager::getAsset` function is very complex, in essencee it: + // 1. takes a cache key or an IFile, if you gave it an `IFile` skip to step 3 + // 2. it consults the loader override about how to get an `IFile` from your cache key + // 3. handles any failure in opening an `IFile` (which is why it takes a supposed filename), it allows the override to give a different file + // 4. tries to derive a working directory if you haven't provided one + // 5. looks for the assets in the cache if you haven't disabled that in the loader parameters + // 5a. lets the override choose relevant assets from the ones found under the cache key + // 5b. if nothing was found it lets the override intervene one last time + // 6. if there's no file to load from, return no assets + // 7. try all loaders associated with a file extension + // 8. then try all loaders by opening the file and checking if it will load + // 9. insert loaded assets into cache if required + // 10. restore assets from dummy state if needed (more on that in other examples) + // Take the docs with a grain of salt, the `getAsset` will be rewritten to deal with restores better in the near future. + nbl::asset::SAssetBundle bundle = m_assetMgr->getAsset( filePath, lparams ); + if( bundle.getContents().empty() ) + { + m_logger->log( "Asset %s failed to load! Are you sure it exists?", ILogger::ELL_ERROR, filePath ); + return nullptr; + } + // All assets derive from `nbl::asset::IAsset`, and can be casted down if the type matches + static_assert( std::is_base_of_v ); + // The type of the root assets in the bundle is not known until runtime, so this is kinda like a `dynamic_cast` which will return nullptr on type mismatch + auto typedAsset = IAsset::castDown( bundle.getContents()[ 0 ] ); // just grab the first asset in the bundle + if( !typedAsset ) + { + m_logger->log( "Asset type mismatch want %d got %d !", ILogger::ELL_ERROR, T::AssetType, bundle.getAssetType() ); + + } + return typedAsset; + }; + + auto textureToBlur = checkedLoad.operator()< nbl::asset::ICPUImage >( "app_resources/tex.jpg" ); + const auto& inCpuTexInfo = textureToBlur->getCreationParameters(); + + auto createGPUImages = [ & ]( + core::bitflag usageFlags, + std::string_view name, + smart_refctd_ptr&& imgOut, + smart_refctd_ptr&& imgViewOut + ) { + video::IGPUImage::SCreationParams gpuImageCreateInfo; + gpuImageCreateInfo.flags = inCpuTexInfo.flags; + gpuImageCreateInfo.type = inCpuTexInfo.type; + gpuImageCreateInfo.extent = inCpuTexInfo.extent; + gpuImageCreateInfo.mipLevels = inCpuTexInfo.mipLevels; + gpuImageCreateInfo.arrayLayers = inCpuTexInfo.arrayLayers; + gpuImageCreateInfo.samples = inCpuTexInfo.samples; + gpuImageCreateInfo.tiling = video::IGPUImage::TILING::OPTIMAL; + gpuImageCreateInfo.usage = usageFlags | asset::IImage::EUF_TRANSFER_DST_BIT; + gpuImageCreateInfo.queueFamilyIndexCount = 0u; + gpuImageCreateInfo.queueFamilyIndices = nullptr; + + gpuImageCreateInfo.format = m_physicalDevice->promoteImageFormat( + { inCpuTexInfo.format, gpuImageCreateInfo.usage }, gpuImageCreateInfo.tiling + ); + auto gpuImage = m_device->createImage( std::move( gpuImageCreateInfo ) ); + + auto gpuImageMemReqs = gpuImage->getMemoryReqs(); + gpuImageMemReqs.memoryTypeBits &= m_physicalDevice->getDeviceLocalMemoryTypeBits(); + m_device->allocate( gpuImageMemReqs, gpuImage.get(), video::IDeviceMemoryAllocation::EMAF_NONE ); + + auto imgView = m_device->createImageView( { + .flags = IGPUImageView::ECF_NONE, + .subUsages = usageFlags, + .image = gpuImage, + .viewType = IGPUImageView::ET_2D, + .format = gpuImageCreateInfo.format + } ); + gpuImage->setObjectDebugName( name.data() ); + imgView->setObjectDebugName( ( std::string{ name } + "view" ).c_str() ); + imgOut = gpuImage; + imgViewOut = imgView; + }; + + + smart_refctd_ptr inputGpuImg; + smart_refctd_ptr outputGpuImg; + smart_refctd_ptr inputGpuImgView; + smart_refctd_ptr outputGpuImgView; + createGPUImages( IGPUImage::EUF_SAMPLED_BIT, "InputImg", std::move(inputGpuImg), std::move(inputGpuImgView)); + createGPUImages( IGPUImage::EUF_STORAGE_BIT, "OutputImg", std::move(outputGpuImg), std::move(outputGpuImgView)); + + + auto computeMain = checkedLoad.operator()< nbl::asset::ICPUShader >( "app_resources/main.comp.hlsl" ); + smart_refctd_ptr overridenUnspecialized = CHLSLCompiler::createOverridenCopy( + computeMain.get(), + "#define WORKGROUP_SIZE %s\n#define PASSES_PER_AXIS %d\n#define AXIS_DIM %d\n", + std::to_string( WorkgroupSize ).c_str(), AxisDimension, PassesPerAxis + ); + smart_refctd_ptr shader = m_device->createShader( overridenUnspecialized.get() ); + if( !shader ) + { + return logFail( "Creation of a GPU Shader to from CPU Shader source failed!" ); + } + + + // TODO: move to shaderd cpp/hlsl descriptors file + NBL_CONSTEXPR_STATIC nbl::video::IGPUDescriptorSetLayout::SBinding bindings[] = { + { + .binding = 0, + .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::ESS_COMPUTE, + .count = 1, + .samplers = nullptr + }, + { + .binding = 1, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::ESS_COMPUTE, + .count = 1, + .samplers = nullptr + } + }; + smart_refctd_ptr dsLayout = m_device->createDescriptorSetLayout( bindings ); + if( !dsLayout ) + { + return logFail( "Failed to create a Descriptor Layout!\n" ); + } + const asset::SPushConstantRange pushConst[] = { {.stageFlags = IShader::ESS_COMPUTE, .offset = 0, .size = sizeof( BoxBlurParams )} }; + smart_refctd_ptr pplnLayout = m_device->createPipelineLayout( pushConst, smart_refctd_ptr(dsLayout)); + if( !pplnLayout ) + { + return logFail( "Failed to create a Pipeline Layout!\n" ); + } + + smart_refctd_ptr pipeline; + { + IGPUComputePipeline::SCreationParams params = {}; + params.layout = pplnLayout.get(); + params.shader.entryPoint = "main"; + params.shader.shader = shader.get(); + // we'll cover the specialization constant API in another example + if( !m_device->createComputePipelines( nullptr, { ¶ms, 1 }, &pipeline ) ) + { + return logFail( "Failed to create pipelines (compile & link shaders)!\n" ); + } + } + smart_refctd_ptr sampler = m_device->createSampler( { .TextureWrapU = ISampler::ETC_CLAMP_TO_EDGE } ); + smart_refctd_ptr ds; + smart_refctd_ptr pool = m_device->createDescriptorPoolForDSLayouts( + IDescriptorPool::ECF_NONE, { &dsLayout.get(),1 } ); + ds = pool->createDescriptorSet( std::move( dsLayout ) ); + { + IGPUDescriptorSet::SDescriptorInfo info[ 2 ]; + info[ 0 ].desc = inputGpuImgView; + info[ 0 ].info.image = { .sampler = sampler, .imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL }; + info[ 1 ].desc = outputGpuImgView; + info[ 1 ].info.image = { .sampler = nullptr, .imageLayout = IImage::LAYOUT::GENERAL }; + + IGPUDescriptorSet::SWriteDescriptorSet writes[] = { + { .dstSet = ds.get(), .binding = 0, .arrayElement = 0, .count = 1, .info = &info[ 0 ] }, + { .dstSet = ds.get(), .binding = 1, .arrayElement = 0, .count = 1, .info = &info[ 1 ] }, + }; + m_device->updateDescriptorSets( writes, {} ); + } + + uint32_t computeQueueIndex = getComputeQueue()->getFamilyIndex(); + IQueue* queue = m_device->getQueue( computeQueueIndex, 0 ); + + smart_refctd_ptr cmdbuf; + smart_refctd_ptr cmdpool = m_device->createCommandPool( + computeQueueIndex, IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT ); + if( !cmdpool->createCommandBuffers( IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &cmdbuf ) ) + { + return logFail( "Failed to create Command Buffers!\n" ); + } + + constexpr size_t StartedValue = 0; + constexpr size_t FinishedValue = 45; + static_assert( FinishedValue > StartedValue ); + smart_refctd_ptr progress = m_device->createSemaphore( StartedValue ); + + IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[] = { {.cmdbuf = cmdbuf.get()} }; + + nbl::video::SIntendedSubmitInfo::SFrontHalf frontHalf = { .queue = queue, .commandBuffers = cmdbufs }; + smart_refctd_ptr assetStagingMngr = + make_smart_refctd_ptr( smart_refctd_ptr( m_device ), smart_refctd_ptr( m_logger ) ); + + cmdbuf->begin( IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT ); + + queue->startCapture(); + bool uploaded = assetStagingMngr->updateImageViaStagingBufferAutoSubmit( + frontHalf, textureToBlur->getBuffer(), inCpuTexInfo.format, + inputGpuImg.get(), IImage::LAYOUT::UNDEFINED, textureToBlur->getRegions() + ); + queue->endCapture(); + if( !uploaded ) + { + return logFail( "Failed to upload cpu tex!\n" ); + } + + cmdbuf->reset( IGPUCommandBuffer::RESET_FLAGS::NONE ); + + BoxBlurParams pushConstData = {}; + + + cmdbuf->begin( IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT ); + cmdbuf->beginDebugMarker( "My Compute Dispatch", core::vectorSIMDf( 0, 1, 0, 1 ) ); + nbl::video::IGPUCommandBuffer::SImageResolve regions[] = { + { + .srcSubresource = { .layerCount = 1 }, + .srcOffset = {}, + .dstSubresource = { .layerCount = 1 }, + .dstOffset = {}, + .extent = inputGpuImg->getCreationParameters().extent + } + }; + cmdbuf->resolveImage( + inputGpuImg.get(), IImage::LAYOUT::UNDEFINED, + inputGpuImg.get(), IImage::LAYOUT::GENERAL, + std::size( regions ), regions ); + nbl::video::IGPUCommandBuffer::SImageResolve regionsOut[] = { + { + .srcSubresource = {.layerCount = 1 }, + .srcOffset = {}, + .dstSubresource = {.layerCount = 1 }, + .dstOffset = {}, + .extent = outputGpuImg->getCreationParameters().extent + } + }; + cmdbuf->resolveImage( + outputGpuImg.get(), IImage::LAYOUT::UNDEFINED, + outputGpuImg.get(), IImage::LAYOUT::GENERAL, + std::size( regionsOut ), regionsOut ); + cmdbuf->bindComputePipeline( pipeline.get() ); + cmdbuf->bindDescriptorSets( nbl::asset::EPBP_COMPUTE, pplnLayout.get(), 0, 1, &ds.get() ); + cmdbuf->pushConstants( pplnLayout.get(), IShader::ESS_COMPUTE, 0, sizeof( BoxBlurParams ), &pushConstData ); + cmdbuf->dispatch( WorkgroupCount, 1, 1 ); + + const nbl::asset::SMemoryBarrier barriers[] = { + { + .srcStageMask = nbl::asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask= nbl::asset::ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask= nbl::asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask= nbl::asset::ACCESS_FLAGS::SHADER_READ_BITS, + } + }; + cmdbuf->pipelineBarrier( nbl::asset::EDF_NONE, { .memBarriers = barriers } ); + + cmdbuf->dispatch( WorkgroupCount, 1, 1 ); + cmdbuf->endDebugMarker(); + // Normally you'd want to perform a memory barrier when using the output of a compute shader or renderpass, + // however waiting on a timeline semaphore (or fence) on the Host makes all Device writes visible. + cmdbuf->end(); + + { + // The IGPUCommandBuffer is the only object whose usage does not get automagically tracked internally, you're responsible for holding onto it as long as the GPU needs it. + // So this is why our commandbuffer, even though its transient lives in the scope equal or above the place where we wait for the submission to be signalled as complete. + const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[] = { {.cmdbuf = cmdbuf.get()} }; + // But we do need to signal completion by incrementing the Timeline Semaphore counter as soon as the compute shader is done + const IQueue::SSubmitInfo::SSemaphoreInfo signals[] = { {.semaphore = progress.get(),.value = FinishedValue,.stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT} }; + // Default, we have no semaphores to wait on before we can start our workload + IQueue::SSubmitInfo submitInfos[] = { { .commandBuffers = cmdbufs, .signalSemaphores = signals } }; + + // We have a cool integration with RenderDoc that allows you to start and end captures programmatically. + // This is super useful for debugging multi-queue workloads and by default RenderDoc delimits captures only by Swapchain presents. + queue->startCapture(); + queue->submit( submitInfos ); + queue->endCapture(); + } + // As the name implies this function will not progress until the fence signals or repeated waiting returns an error. + const ISemaphore::SWaitInfo waitInfos[] = { { .semaphore = progress.get(), .value = FinishedValue } }; + m_device->blockForSemaphores( waitInfos ); + + + return true; + } + + // Platforms like WASM expect the main entry point to periodically return control, hence if you want a crossplatform app, you have to let the framework deal with your "game loop" + void workLoopBody() override {} + + // Whether to keep invoking the above. In this example because its headless GPU compute, we do all the work in the app initialization. + bool keepRunning() override { return false; } + +}; + + +NBL_MAIN_FUNC( BoxBlurDemo ) \ No newline at end of file diff --git a/66_PropertyPools/CMakeLists.txt b/66_PropertyPools/CMakeLists.txt new file mode 100644 index 000000000..bc1624875 --- /dev/null +++ b/66_PropertyPools/CMakeLists.txt @@ -0,0 +1,24 @@ +include(common RESULT_VARIABLE RES) +if(NOT RES) + message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") +endif() + +nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") + +if(NBL_EMBED_BUILTIN_RESOURCES) + set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) + set(RESOURCE_DIR "app_resources") + + get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) + + file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") + foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") + endforeach() + + ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") + + LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) +endif() \ No newline at end of file diff --git a/66_PropertyPools/app_resources/common.hlsl b/66_PropertyPools/app_resources/common.hlsl new file mode 100644 index 000000000..6f339aa13 --- /dev/null +++ b/66_PropertyPools/app_resources/common.hlsl @@ -0,0 +1,22 @@ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +// Unfortunately not every piece of C++14 metaprogramming syntax is available in HLSL 202x +// https://github.com/microsoft/DirectXShaderCompiler/issues/5751#issuecomment-1800847954 +typedef nbl::hlsl::float32_t3 input_t; +typedef nbl::hlsl::float32_t output_t; + +NBL_CONSTEXPR_STATIC_INLINE uint32_t MaxPossibleElementCount = 1 << 20; + +struct PushConstantData +{ + uint64_t inputAddress; + uint64_t outputAddress; + uint32_t dataElementCount; +}; + +NBL_CONSTEXPR uint32_t WorkgroupSize = 256; + +#include "nbl/builtin/hlsl/random/xoroshiro.hlsl" + +// Yes we do have our own re-creation of C++'s STL in HLSL2021 ! +#include "nbl/builtin/hlsl/limits.hlsl" \ No newline at end of file diff --git a/66_PropertyPools/app_resources/shader.comp.hlsl b/66_PropertyPools/app_resources/shader.comp.hlsl new file mode 100644 index 000000000..4aeef0e0f --- /dev/null +++ b/66_PropertyPools/app_resources/shader.comp.hlsl @@ -0,0 +1,33 @@ +#include "common.hlsl" + +// just a small test +#include "nbl/builtin/hlsl/jit/device_capabilities.hlsl" + +[[vk::push_constant]] PushConstantData pushConstants; + +// does absolutely nothing, a later example will show how it gets used +template +void dummyTraitTest() {} + +[numthreads(WorkgroupSize,1,1)] +void main(uint32_t3 ID : SV_DispatchThreadID) +{ + dummyTraitTest(); + if (ID.x>=pushConstants.dataElementCount) + return; + + const input_t self = vk::RawBufferLoad(pushConstants.inputAddress+sizeof(input_t)*ID.x); + + nbl::hlsl::Xoroshiro64StarStar rng = nbl::hlsl::Xoroshiro64StarStar::construct(uint32_t2(pushConstants.dataElementCount,ID.x)^0xdeadbeefu); + + float32_t acc = nbl::hlsl::numeric_limits::max; + const static uint32_t OthersToTest = 15; + [[unroll(OthersToTest)]] + for (uint32_t i=0; i(pushConstants.inputAddress+sizeof(input_t)*offset); + acc = min(length(other-self),acc); + } + vk::RawBufferStore(pushConstants.outputAddress+sizeof(float32_t)*ID.x,acc); +} \ No newline at end of file diff --git a/66_PropertyPools/config.json.template b/66_PropertyPools/config.json.template new file mode 100644 index 000000000..717d05d53 --- /dev/null +++ b/66_PropertyPools/config.json.template @@ -0,0 +1,28 @@ +{ + "enableParallelBuild": true, + "threadsPerBuildProcess" : 2, + "isExecuted": false, + "scriptPath": "", + "cmake": { + "configurations": [ "Release", "Debug", "RelWithDebInfo" ], + "buildModes": [], + "requiredOptions": [] + }, + "profiles": [ + { + "backend": "vulkan", // should be none + "platform": "windows", + "buildModes": [], + "runConfiguration": "Release", // we also need to run in Debug nad RWDI because foundational example + "gpuArchitectures": [] + } + ], + "dependencies": [], + "data": [ + { + "dependencies": [], + "command": [""], + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/66_PropertyPools/main.cpp b/66_PropertyPools/main.cpp new file mode 100644 index 000000000..e1ab9d7b3 --- /dev/null +++ b/66_PropertyPools/main.cpp @@ -0,0 +1,419 @@ +// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + + +#include "nbl/video/surface/CSurfaceVulkan.h" + +#include "../common/BasicMultiQueueApplication.hpp" +#include "../common/MonoAssetManagerAndBuiltinResourceApplication.hpp" + +namespace nbl::examples +{ + +using namespace nbl; +using namespace core; +using namespace system; +using namespace ui; +using namespace asset; +using namespace video; + +// Virtual Inheritance because apps might end up doing diamond inheritance +class WindowedApplication : public virtual BasicMultiQueueApplication +{ + using base_t = BasicMultiQueueApplication; + + public: + using base_t::base_t; + + virtual video::IAPIConnection::SFeatures getAPIFeaturesToEnable() override + { + auto retval = base_t::getAPIFeaturesToEnable(); + // We only support one swapchain mode, surface, the other one is Display which we have not implemented yet. + retval.swapchainMode = video::E_SWAPCHAIN_MODE::ESM_SURFACE; + return retval; + } + + // New function, we neeed to know about surfaces to create ahead of time + virtual core::vector getSurfaces() const = 0; + + virtual core::set filterDevices(const core::SRange& physicalDevices) const + { + const auto firstFilter = base_t::filterDevices(physicalDevices); + + video::SPhysicalDeviceFilter deviceFilter = {}; + + const auto surfaces = getSurfaces(); + deviceFilter.requiredSurfaceCompatibilities = surfaces.data(); + deviceFilter.requiredSurfaceCompatibilitiesCount = surfaces.size(); + + return deviceFilter(physicalDevices); + } + + virtual bool onAppInitialized(smart_refctd_ptr&& system) + { + // Remember to call the base class initialization! + if (!base_t::onAppInitialized(std::move(system))) + return false; + + #ifdef _NBL_PLATFORM_WINDOWS_ + m_winMgr = nbl::ui::IWindowManagerWin32::create(); + #else + #error "Unimplemented!" + #endif + } + + core::smart_refctd_ptr m_winMgr; +}; + + +// Before we get onto creating a window, we need to discuss how Nabla handles input, clipboards and cursor control +class IWindowClosedCallback : public virtual nbl::ui::IWindow::IEventCallback +{ + public: + IWindowClosedCallback() : m_gotWindowClosedMsg(false) {} + + // unless you create a separate callback per window, both will "trip" this condition + bool windowGotClosed() const {return m_gotWindowClosedMsg;} + + private: + bool onWindowClosed_impl() override + { + m_gotWindowClosedMsg = true; + return true; + } + + bool m_gotWindowClosedMsg; +}; + +// We inherit from an application that tries to find Graphics and Compute queues +// because applications with presentable images often want to perform Graphics family operations +// Virtual Inheritance because apps might end up doing diamond inheritance +class SingleNonResizableWindowApplication : public virtual WindowedApplication +{ + using base_t = WindowedApplication; + + protected: + virtual IWindow::SCreationParams getWindowCreationParams() const + { + IWindow::SCreationParams params = {}; + params.callback = make_smart_refctd_ptr(); + params.width = 640; + params.height = 480; + params.x = 32; + params.y = 32; + params.flags = IWindow::ECF_NONE; + params.windowCaption = "SingleNonResizableWindowApplication"; + return params; + } + + core::smart_refctd_ptr m_window; + core::smart_refctd_ptr m_surface; + + public: + using base_t::base_t; + + virtual bool onAppInitialized(smart_refctd_ptr&& system) override + { + // Remember to call the base class initialization! + if (!base_t::onAppInitialized(std::move(system))) + return false; + + m_window = m_winMgr->createWindow(getWindowCreationParams()); + m_surface = video::CSurfaceVulkanWin32::create(core::smart_refctd_ptr(m_api),core::smart_refctd_ptr_static_cast(m_window)); + return true; + } + + virtual core::vector getSurfaces() const + { + return {{m_surface.get()/*,EQF_NONE*/}}; + } + + virtual bool keepRunning() override + { + if (!m_window || reinterpret_cast(m_window->getEventCallback())->windowGotClosed()) + return false; + + return true; + } +}; +} + + +using namespace nbl; +using namespace core; +using namespace system; +using namespace ui; +using namespace asset; +using namespace video; + + +#include "app_resources/common.hlsl" +#include "nbl/builtin/hlsl/bit.hlsl" + + +// In this application we'll cover buffer streaming, Buffer Device Address (BDA) and push constants +class PropertyPoolsApp final : public examples::SingleNonResizableWindowApplication, public examples::MonoAssetManagerAndBuiltinResourceApplication +{ + using device_base_t = examples::MonoDeviceApplication; + using asset_base_t = examples::MonoAssetManagerAndBuiltinResourceApplication; + + // This is the first example that submits multiple workloads in-flight. + // What the shader does is it computes the minimum distance of each point against K other random input points. + // Having the GPU randomly access parts of the buffer requires it to be DEVICE_LOCAL for performance. + // Then the CPU downloads the results and finds the median minimum distance via quick-select. + // This bizzare synthetic workload was specifically chosen for its unfriendliness towards simple buffer usage. + // The fact we have variable sized workloads and run them in a loop means we either have to dynamically + // suballocate from a single buffer or have K worst-case sized buffers we round robin for K-workloads in flight. + // Creating and destroying buffers at runtime is not an option as those are very expensive operations. + // Also since CPU needs to heapify the outputs, we need to have the GPU write them into RAM not VRAM. + smart_refctd_ptr m_pipeline; + + // The Utility class has lots of methods to handle staging without relying on ReBAR or EXT_host_image_copy as well as more complex methods we'll cover later. + // Until EXT_host_image_copy becomes ubiquitous across all Nabla Core Profile devices, you need to stage image copies from an IGPUBuffer to an IGPUImage. + // Why use Staging for buffers in the age of ReBAR? While GPU workloads overlap the CPU, individual GPU workloads's execution might not overlap each other + // but their data might. In this case you want to "precisely" time the data update on the GPU timeline between the end and start of a workload. + // For very small updates you could use the commandbuffer updateBuffer method, but it has a size limit and the data enqueued takes up space in the commandpool. + // Sometimes it might be unfeasible to either have multiple copies or update references to those copies without a cascade update. + // One example is the transformation graph of nodes in a scene, where a copy-on-write of a node would require the update the offset/pointer held by + // any other node that refers to it. This quickly turns into a cascade that would force you to basically create a full copy of the entire data structure + // after most updates. Whereas with staging you'd "queue up" the much smaller set of updates to apply between each computation step which uses the graph. + // Another example are UBO and SSBO bindings, where once you run out of dynamic bindings, you can no longer easily change offsets without introducting extra indirection in shaders. + // Actually staging can help you re-use a commandbuffer because you don't need to re-record it if you don't need to change the offsets at which you bind! + // Finally ReBAR is a precious resource, my 8GB RTX 3070 only reports a 214MB Heap backing HOST_VISIBLE and DEVICE_LOCAL device local memory type. + smart_refctd_ptr m_utils; + + // We call them downstreaming and upstreaming, simply by how we used them so far. + // Meaning that upstreaming is uncached and usually ReBAR (DEVICE_LOCAL), for simple memcpy like sequential writes. + // While the downstreaming is CACHED and not DEVICE_LOCAL for fast random acess by the CPU. + // However there are cases when you'd want to use a buffer with flags identical to the default downstreaming buffer for uploads, + // such cases is when a CPU needs to build a data-structure in-place (due to memory constraints) before GPU accesses it, + // one example are Host Acceleration Structure builds (BVH building requires lots of repeated memory accesses). + // When choosing the memory properties of a mapped buffer consider which processor (CPU or GPU) needs faster access in event of a cache-miss. + nbl::video::StreamingTransientDataBufferMT<>* m_upStreamingBuffer; + StreamingTransientDataBufferMT<>* m_downStreamingBuffer; + // These are Buffer Device Addresses + uint64_t m_upStreamingBufferAddress; + uint64_t m_downStreamingBufferAddress; + + smart_refctd_ptr m_propertyPoolHandler; + smart_refctd_ptr m_scratchBuffer; + smart_refctd_ptr m_addressBuffer; + smart_refctd_ptr m_transferSrcBuffer; + smart_refctd_ptr m_transferDstBuffer; + std::vector m_data; + + // You can ask the `nbl::core::GeneralpurposeAddressAllocator` used internally by the Streaming Buffers give out offsets aligned to a certain multiple (not only Power of Two!) + uint32_t m_alignment; + + // The pool cache is just a formalized way of round-robining command pools and resetting + reusing them after their most recent submit signals finished. + // Its a little more ergonomic to use if you don't have a 1:1 mapping between frames and pools. + smart_refctd_ptr m_poolCache; + + // We'll run the iterations in reverse, easier to write "keep running" + uint32_t m_iteration = 200; + + static constexpr uint64_t TransfersAmount = 1024; + static constexpr uint64_t MaxValuesPerTransfer = 512; + + public: + // Yay thanks to multiple inheritance we cannot forward ctors anymore + PropertyPoolsApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : + system::IApplicationFramework(_localInputCWD,_localOutputCWD,_sharedInputCWD,_sharedOutputCWD) {} + + // we stuff all our work here because its a "single shot" app + bool onAppInitialized(smart_refctd_ptr&& system) override + { + // Remember to call the base class initialization! + if (!device_base_t::onAppInitialized(std::move(system))) + return false; + if (!asset_base_t::onAppInitialized(std::move(system))) + return false; + + m_propertyPoolHandler = core::make_smart_refctd_ptr(core::smart_refctd_ptr(m_device)); + + auto createBuffer = [&](uint64_t size, core::bitflag flags, const char* name, bool hostVisible) + { + video::IGPUBuffer::SCreationParams creationParams; + creationParams.size = ((size + 3) / 4) * 4; // Align + creationParams.usage = flags + | asset::IBuffer::EUF_STORAGE_BUFFER_BIT + | asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT + | asset::IBuffer::EUF_INLINE_UPDATE_VIA_CMDBUF; + + auto buffer = m_device->createBuffer(std::move(creationParams)); + nbl::video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = buffer->getMemoryReqs(); + if (hostVisible) + reqs.memoryTypeBits &= m_device->getPhysicalDevice()->getDownStreamingMemoryTypeBits(); + m_device->allocate(reqs, buffer.get(), nbl::video::IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS::EMAF_DEVICE_ADDRESS_BIT); + buffer->setObjectDebugName(name); + + return buffer; + }; + + m_scratchBuffer = createBuffer(sizeof(nbl::hlsl::property_pools::TransferRequest) * TransfersAmount, core::bitflag(asset::IBuffer::EUF_TRANSFER_DST_BIT), "m_scratchBuffer", false); + m_addressBuffer = createBuffer(sizeof(uint32_t) * TransfersAmount * MaxValuesPerTransfer, core::bitflag(asset::IBuffer::EUF_NONE), "m_addressBuffer", false); + m_transferSrcBuffer = createBuffer(sizeof(uint16_t) * TransfersAmount * MaxValuesPerTransfer, core::bitflag(asset::IBuffer::EUF_TRANSFER_DST_BIT), "m_transferSrcBuffer", false); + m_transferDstBuffer = createBuffer(sizeof(uint16_t) * TransfersAmount * MaxValuesPerTransfer, core::bitflag(asset::IBuffer::EUF_NONE), "m_transferDstBuffer", true); + + for (uint16_t i = 0; i < uint16_t((uint32_t(1) << 16) - 1); i++) + m_data.push_back(i); + + // this time we load a shader directly from a file + smart_refctd_ptr shader; + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = m_logger.get(); + lp.workingDirectory = ""; // virtual root + + auto assetBundle = m_assetMgr->getAsset("app_resources/shader.comp.hlsl",lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + return logFail("Could not load shader!"); + + // lets go straight from ICPUSpecializedShader to IGPUSpecializedShader + auto source = IAsset::castDown(assets[0]); + // The down-cast should not fail! + assert(source); + + IGPUObjectFromAssetConverter::SParams conversionParams = {}; + conversionParams.device = m_device.get(); + conversionParams.assetManager = m_assetMgr.get(); + created_gpu_object_array convertedGPUObjects = std::make_unique()->getGPUObjectsFromAssets(&source,&source+1,conversionParams); + if (convertedGPUObjects->empty() || !convertedGPUObjects->front()) + return logFail("Conversion of a CPU Specialized Shader to GPU failed!"); + + shader = convertedGPUObjects->front(); + } + + // The StreamingTransientDataBuffers are actually composed on top of another useful utility called `CAsyncSingleBufferSubAllocator` + // The difference is that the streaming ones are made on top of ranges of `IGPUBuffer`s backed by mappable memory, whereas the + // `CAsyncSingleBufferSubAllocator` just allows you suballocate subranges of any `IGPUBuffer` range with deferred/latched frees. + constexpr uint32_t DownstreamBufferSize = sizeof(output_t)<<24; + constexpr uint32_t UpstreamBufferSize = sizeof(input_t)<<24; + m_utils = make_smart_refctd_ptr(smart_refctd_ptr(m_device),smart_refctd_ptr(m_logger),DownstreamBufferSize,UpstreamBufferSize); + if (!m_utils) + return logFail("Failed to create Utilities!"); + m_upStreamingBuffer = m_utils->getDefaultUpStreamingBuffer(); + m_downStreamingBuffer = m_utils->getDefaultDownStreamingBuffer(); + m_upStreamingBufferAddress = m_device->getBufferDeviceAddress(m_upStreamingBuffer->getBuffer()); + m_downStreamingBufferAddress = m_device->getBufferDeviceAddress(m_downStreamingBuffer->getBuffer()); + + // People love Reflection but I prefer Shader Sources instead! + const nbl::asset::SPushConstantRange pcRange = {.stageFlags=IShader::ESS_COMPUTE,.offset=0,.size=sizeof(PushConstantData)}; + + // This time we'll have no Descriptor Sets or Layouts because our workload has a widely varying size + // and using traditional SSBO bindings would force us to update the Descriptor Set every frame. + // I even started writing this sample with the use of Dynamic SSBOs, however the length of the buffer range is not dynamic + // only the offset. This means that we'd have to write the "worst case" length into the descriptor set binding. + // Then this has a knock-on effect that we couldn't allocate closer to the end of the streaming buffer than the "worst case" size. + m_pipeline = m_device->createComputePipeline(nullptr,m_device->createPipelineLayout(&pcRange,&pcRange+1),std::move(shader)); + + const auto& deviceLimits = m_device->getPhysicalDevice()->getLimits(); + // The ranges of non-coherent mapped memory you flush or invalidate need to be aligned. You'll often see a value of 64 reported by devices + // which just happens to coincide with a CPU cache line size. So we ask our streaming buffers during allocation to give us properly aligned offsets. + // Sidenote: For SSBOs, UBOs, BufferViews, Vertex Buffer Bindings, Acceleration Structure BDAs, Shader Binding Tables, Descriptor Buffers, etc. + // there is also a requirement to bind buffers at offsets which have a certain alignment. Memory binding to Buffers and Images also has those. + // We'll align to max of coherent atom size even if the memory is coherent, + // and we also need to take into account BDA shader loads need to be aligned to the type being loaded. + m_alignment = core::max(deviceLimits.nonCoherentAtomSize,alignof(float)); + + // We'll allow subsequent iterations to overlap each other on the GPU, the only limiting factors are + // the amount of memory in the streaming buffers and the number of commandpools we can use simultaenously. + constexpr auto MaxConcurrency = 64; + // Since this time we don't throw the Command Pools away and we'll reset them instead, we don't create the pools with the transient flag + m_poolCache = make_smart_refctd_ptr(m_device.get(),getComputeQueue()->getFamilyIndex(), IGPUCommandPool::ECF_NONE, MaxConcurrency); + + return true; + } + + // Ok this time we'll actually have a work loop (maybe just for the sake of future WASM so we don't timeout a Browser Tab with an unresponsive script) + bool keepRunning() override { return m_iteration; } + + // Finally the first actual work-loop + void workLoopBody() override + { + m_iteration--; + IGPUQueue* const queue = getComputeQueue(); + + // Obtain our command pool once one gets recycled + uint32_t poolIx; + do + { + poolIx = m_poolCache->acquirePool(); + } while (poolIx==ICommandPoolCache::invalid_index); + + smart_refctd_ptr cmdbuf; + { + m_device->createCommandBuffers(m_poolCache->getPool(poolIx),IGPUCommandBuffer::EL_PRIMARY,1,&cmdbuf); + // lets record, its still a one time submit because we have to re-record with different push constants each time + cmdbuf->begin(IGPUCommandBuffer::EU_ONE_TIME_SUBMIT_BIT); + cmdbuf->bindComputePipeline(m_pipeline.get()); + + // COMMAND RECORDING + uint32_t dataSize = (((sizeof(uint16_t) * m_data.size()) + 3) / 4) * 4; + uint32_t maxUpload = 65536; + for (uint32_t offset = 0; offset < dataSize; offset += maxUpload) + { + cmdbuf->updateBuffer(m_transferSrcBuffer.get(), offset, maxUpload, &m_data[offset / sizeof(uint16_t)]); + } + CPropertyPoolHandler::TransferRequest transferRequest; + transferRequest.memblock = asset::SBufferRange { 0, sizeof(uint16_t) * m_data.size(), core::smart_refctd_ptr(m_transferSrcBuffer) }; + transferRequest.elementSize = m_data.size(); + transferRequest.elementCount = 1; + transferRequest.buffer = asset::SBufferBinding { 0, core::smart_refctd_ptr(m_transferDstBuffer) }; + + m_propertyPoolHandler->transferProperties(cmdbuf.get(), nullptr, + asset::SBufferBinding{0, core::smart_refctd_ptr(m_scratchBuffer)}, + asset::SBufferBinding{0, core::smart_refctd_ptr(m_addressBuffer)}, + &transferRequest, &transferRequest + 1, + m_logger.get(), 0, MaxValuesPerTransfer + ); + + cmdbuf->end(); + } + + // TODO: redo with a single timeline semaphore + auto fence = m_device->createFence(IGPUFence::ECF_UNSIGNALED); + { + IGPUQueue::SSubmitInfo submitInfo = {}; + submitInfo.commandBufferCount = 1; + submitInfo.commandBuffers = &cmdbuf.get(); + + queue->startCapture(); + queue->submit(1u,&submitInfo,fence.get()); + queue->endCapture(); + } + + { + // Readback ds + auto mem = m_transferDstBuffer->getBoundMemory(); + assert(mem->isMappable()); + auto ptr = m_device->mapMemory(nbl::video::IDeviceMemoryAllocation::MappedMemoryRange(mem, 0, mem->getAllocationSize()), video::IDeviceMemoryAllocation::EMCAF_READ); + auto uint16_t_ptr = static_cast(ptr); + + for (uint32_t i = 0; i < 128; i++) + { + uint16_t value = uint16_t_ptr[i]; + std::printf("%i, ", value); + } + std::printf("\n"); + m_device->unmapMemory(mem); + } + + // We can also actually latch our Command Pool reset and its return to the pool of free pools! + m_poolCache->releaseSet(m_device.get(),smart_refctd_ptr(fence),poolIx); + } + + bool onAppTerminated() override + { + // Need to make sure that there are no events outstanding if we want all lambdas to eventually execute before `onAppTerminated` + // (the destructors of the Command Pool Cache and Streaming buffers will still wait for all lambda events to drain) + while (m_downStreamingBuffer->cull_frees()) {} + + return device_base_t::onAppTerminated(); + } +}; + + +NBL_MAIN_FUNC(PropertyPoolsApp) \ No newline at end of file diff --git a/66_PropertyPools/pipeline.groovy b/66_PropertyPools/pipeline.groovy new file mode 100644 index 000000000..1a7b043a4 --- /dev/null +++ b/66_PropertyPools/pipeline.groovy @@ -0,0 +1,50 @@ +import org.DevshGraphicsProgramming.Agent +import org.DevshGraphicsProgramming.BuilderInfo +import org.DevshGraphicsProgramming.IBuilder + +class CStreamingAndBufferDeviceAddressBuilder extends IBuilder +{ + public CStreamingAndBufferDeviceAddressBuilder(Agent _agent, _info) + { + super(_agent, _info) + } + + @Override + public boolean prepare(Map axisMapping) + { + return true + } + + @Override + public boolean build(Map axisMapping) + { + IBuilder.CONFIGURATION config = axisMapping.get("CONFIGURATION") + IBuilder.BUILD_TYPE buildType = axisMapping.get("BUILD_TYPE") + + def nameOfBuildDirectory = getNameOfBuildDirectory(buildType) + def nameOfConfig = getNameOfConfig(config) + + agent.execute("cmake --build ${info.rootProjectPath}/${nameOfBuildDirectory}/${info.targetProjectPathRelativeToRoot} --target ${info.targetBaseName} --config ${nameOfConfig} -j12 -v") + + return true + } + + @Override + public boolean test(Map axisMapping) + { + return true + } + + @Override + public boolean install(Map axisMapping) + { + return true + } +} + +def create(Agent _agent, _info) +{ + return new CStreamingAndBufferDeviceAddressBuilder(_agent, _info) +} + +return this \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 6a20a33a9..5b104b06a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -43,7 +43,7 @@ if(NBL_BUILD_EXAMPLES) endif() add_subdirectory(23_ArithmeticUnitTest EXCLUDE_FROM_ALL) # add_subdirectory(23_Autoexposure EXCLUDE_FROM_ALL) - # add_subdirectory(25_Blur EXCLUDE_FROM_ALL) + add_subdirectory(26_CentralLimitBoxBlur EXCLUDE_FROM_ALL) add_subdirectory(25_FilterTest EXCLUDE_FROM_ALL) # add_subdirectory(36_CUDAInterop EXCLUDE_FROM_ALL) @@ -65,5 +65,6 @@ if(NBL_BUILD_EXAMPLES) #add_subdirectory(61_UI EXCLUDE_FROM_ALL) add_subdirectory(62_CAD EXCLUDE_FROM_ALL) add_subdirectory(62_SchusslerTest EXCLUDE_FROM_ALL) + add_subdirectory(66_PropertyPools EXCLUDE_FROM_ALL) add_subdirectory(0_ImportanceSamplingEnvMaps EXCLUDE_FROM_ALL) #TODO: integrate back into 42 endif() \ No newline at end of file diff --git a/common/MonoDeviceApplication.hpp b/common/MonoDeviceApplication.hpp index b77e3442c..6a4911da7 100644 --- a/common/MonoDeviceApplication.hpp +++ b/common/MonoDeviceApplication.hpp @@ -18,7 +18,7 @@ class MonoDeviceApplication : public virtual MonoSystemMonoLoggerApplication public: using base_t::base_t; - protected: + public: // need this one for skipping passing all args into ApplicationFramework MonoDeviceApplication() = default;