diff --git a/22.RaytracedAO/README.md b/22.RaytracedAO/README.md index 374c16e2e..03bec2bc1 100644 --- a/22.RaytracedAO/README.md +++ b/22.RaytracedAO/README.md @@ -52,13 +52,25 @@ Multiple Sensor tags in mitsuba XML's is now supported. This feature helps you h You can switch between those sensors using `PAGE UP/DOWN` Keys defined in more detail below. +### Properties added to \: + +| Property Name | Description | Type | Default Value | +|-----------------|-------------------------------------------|---------|----------------| +| hideEnvironment | Replace bakcground with Transparent Alpha | boolean | false | + +Note that we don't support Mitsuba's `hideEmitters` + ### Properties added to \: -| Property Name | Description | Type | Default Value | -|---------------|-----------------------|-------|------------------------------------------| -| moveSpeed | Camera Movement Speed | float | NaN -> Will be deduced from scene bounds | -| zoomSpeed | Camera Zoom Speed | float | NaN -> Will be deduced from scene bounds | -| rotateSpeed | Camera Rotation Speed | float | 300.0 | +| Property Name | Description | Type | Default Value | +|---------------|-------------------------------------------------------------------------------------|---------|------------------------------------------| +| up | Up Vector to determine roll around view axis and the north pole to rotate around | vector | 0.0, 1.0, 0.0 | +| moveSpeed | Camera Movement Speed | float | NaN -> Will be deduced from scene bounds | +| zoomSpeed | Camera Zoom Speed | float | NaN -> Will be deduced from scene bounds | +| rotateSpeed | Camera Rotation Speed | float | 300.0 | +| clipPlaneN\* | Worldspace coefficients for a plane equation of the form `a*x + b*y + c*z + w >= 0` | vector | 0.0, 0.0, 0.0, 0.0 (disabled) | + +\* N ranges from 0 to 5 #### Properties added to \: @@ -93,6 +105,7 @@ You can switch between those sensors using `PAGE UP/DOWN` Keys defined in more d ```xml + @@ -165,13 +178,14 @@ So the full width, height are 1152x1152 (64+1024+64=1152) | Property Name | Description | Type | Default Value | |---------------|------------------------------------------------------------------------------------------------------------------|-------|---------------| -| normalizeEnergy | Parameter to normalize the intensity of emission profile.
1) If `normalizeEnergy` is zero, it will not perform any normalization. (no normalization)
2) If `normalizeEnergy` is negative, it will normalize the intensity by dividing out the maximum intensity. (normalization by max)
3) If `normalizeEnergy` is positive, it will first normalize the intensity by average energy and multiply `normalizeEnergy` to intensity. (normalization by energy) | float | 0.0 (no normalization) | -| filename | The filename of the IES profile. | string | "" | +| normalization | Parameter to normalize the intensity of emission profile.
1) If `normalization` is `NONE` or invalid/none of the below, it will not perform any normalization.
2) If `normalization` is `UNIT_MAX`, it will normalize the intensity by dividing out the maximum intensity. (normalization by max)
3) If `normalization` is `UNIT_AVERAGE_OVER_IMPLIED_DOMAIN`, it will integrate the profile over the hemisphere as well as the solid angles where the profile has emission above 0. This has an advantage over a plain average as you don't need to care whether the light is a sphere, hemisphere, or a spotlight of a given aperture. (normalization by energy)
4) If `normalization` is `UNIT_AVERAGE_OVER_FULL_DOMAIN` we behave like `UNIT_AVERAGE` but presume the soild angle of the domain is `(CIESProfile::vAngles.front()-CIESProfile::vAngles.back())*4.f` | string | ""
(no normalization) | +| flatten | Optional "blend" of the original profile value with the average value, if negative we use the average as if for `UNIT_AVERAGE_OVER_FULL_DOMAIN` if positive we use the average as-if for `UNIT_AVERAGE_OVER_IMPLIED_DOMAIN`.
This is useful when the emitter appears "not bright enough" when observing from directions outside the main power-lobes.
Valid range is 0.0 to 1.0, value gets treated with `min(abs(flatten),1.f)` to make it conform.
A value equal to 1.0 or -1.0 will render your IES profile uniform, so its not something you should use and a warning will be emitted. | float | 0.0 | +| filename | The filename of the IES profile. | string | "" | NOTE: **\** tag of emitter node can be used to orient the emission direction of IES light. -#### Example of Area Light with IES Profile +#### Example of Area Light with IES Profile which flattens its profile against a full Sphere or Hemisphere average ```xml @@ -180,7 +194,8 @@ NOTE: **\** tag of emitter node can be used to orient the emission d - + + diff --git a/22.RaytracedAO/Renderer.cpp b/22.RaytracedAO/Renderer.cpp index 1e4b15c8b..265f4986d 100644 --- a/22.RaytracedAO/Renderer.cpp +++ b/22.RaytracedAO/Renderer.cpp @@ -51,7 +51,7 @@ Renderer::Renderer(IVideoDriver* _driver, IAssetManager* _assetManager, scene::I m_rrManager(ext::RadeonRays::Manager::create(m_driver)), m_prevView(), m_prevCamTform(), m_sceneBound(FLT_MAX,FLT_MAX,FLT_MAX,-FLT_MAX,-FLT_MAX,-FLT_MAX), m_maxAreaLightLuma(0.f), m_framesDispatched(0u), m_rcpPixelSize{0.f,0.f}, - m_staticViewData{ {0u,0u},0u,0u,0u,0u,core::infinity(),{}}, m_raytraceCommonData{0.f,0u,0u,0u,core::matrix3x4SIMD()}, + m_staticViewData{ {0u,0u},0u,0u,0u,0u,false,core::infinity(),{}}, m_raytraceCommonData{0.f,0u,0u,0u,core::matrix3x4SIMD()}, m_indirectDrawBuffers{nullptr},m_cullPushConstants{core::matrix4SIMD(),1.f,0u,0u,0u},m_cullWorkGroups(0u), m_raygenWorkGroups{0u,0u},m_visibilityBuffer(nullptr),m_colorBuffer(nullptr), m_envMapImportanceSampling(_driver) @@ -75,6 +75,9 @@ Renderer::Renderer(IVideoDriver* _driver, IAssetManager* _assetManager, scene::I m_littleDownloadBuffer->getBoundMemory()->mapMemoryRange(IDriverMemoryAllocation::EMCAF_READ,{0,sizeof(uint32_t)}); } + // no deferral for now + m_fragGPUShader = gpuSpecializedShaderFromFile(m_assetManager,m_driver,"../fillVisBuffer.frag"); + // set up Visibility Buffer pipeline { IGPUDescriptorSetLayout::SBinding binding; @@ -98,26 +101,9 @@ Renderer::Renderer(IVideoDriver* _driver, IAssetManager* _assetManager, scene::I m_cullDSLayout = m_driver->createGPUDescriptorSetLayout(bindings,bindings+cullingDescriptorCount); } m_perCameraRasterDSLayout = core::smart_refctd_ptr(m_cullDSLayout); - { - core::smart_refctd_ptr shaders[] = {gpuSpecializedShaderFromFile(m_assetManager,m_driver,"../fillVisBuffer.vert"),gpuSpecializedShaderFromFile(m_assetManager,m_driver,"../fillVisBuffer.frag")}; - SPrimitiveAssemblyParams primitiveAssembly; - primitiveAssembly.primitiveType = EPT_TRIANGLE_LIST; - SRasterizationParams raster; - raster.faceCullingMode = EFCM_NONE; - auto _visibilityBufferFillPipelineLayout = m_driver->createGPUPipelineLayout( - nullptr,nullptr, - core::smart_refctd_ptr(m_rasterInstanceDataDSLayout), - core::smart_refctd_ptr(m_additionalGlobalDSLayout), - core::smart_refctd_ptr(m_cullDSLayout) - ); - m_visibilityBufferFillPipeline = m_driver->createGPURenderpassIndependentPipeline( - nullptr,std::move(_visibilityBufferFillPipelineLayout),&shaders->get(),&shaders->get()+2u, - SVertexInputParams{},SBlendParams{},primitiveAssembly,raster - ); - } { - constexpr auto raytracingCommonDescriptorCount = 10u; + constexpr auto raytracingCommonDescriptorCount = 11u; IGPUDescriptorSetLayout::SBinding bindings[raytracingCommonDescriptorCount]; fillIotaDescriptorBindingDeclarations(bindings,ISpecializedShader::ESS_COMPUTE,raytracingCommonDescriptorCount); bindings[0].type = asset::EDT_UNIFORM_BUFFER; @@ -127,9 +113,10 @@ Renderer::Renderer(IVideoDriver* _driver, IAssetManager* _assetManager, scene::I bindings[4].type = asset::EDT_STORAGE_BUFFER; bindings[5].type = asset::EDT_STORAGE_IMAGE; bindings[6].type = asset::EDT_STORAGE_IMAGE; - bindings[7].type = asset::EDT_COMBINED_IMAGE_SAMPLER; + bindings[7].type = asset::EDT_STORAGE_IMAGE; bindings[8].type = asset::EDT_COMBINED_IMAGE_SAMPLER; bindings[9].type = asset::EDT_COMBINED_IMAGE_SAMPLER; + bindings[10].type = asset::EDT_COMBINED_IMAGE_SAMPLER; m_commonRaytracingDSLayout = m_driver->createGPUDescriptorSetLayout(bindings,bindings+raytracingCommonDescriptorCount); } @@ -158,7 +145,7 @@ Renderer::Renderer(IVideoDriver* _driver, IAssetManager* _assetManager, scene::I m_closestHitDSLayout = m_driver->createGPUDescriptorSetLayout(bindings,bindings+closestHitDescriptorCount); } { - constexpr auto resolveDescriptorCount = 7u; + constexpr auto resolveDescriptorCount = 8u; IGPUDescriptorSetLayout::SBinding bindings[resolveDescriptorCount]; fillIotaDescriptorBindingDeclarations(bindings,ISpecializedShader::ESS_COMPUTE,resolveDescriptorCount); bindings[0].type = asset::EDT_UNIFORM_BUFFER; @@ -168,9 +155,11 @@ Renderer::Renderer(IVideoDriver* _driver, IAssetManager* _assetManager, scene::I bindings[2].samplers = &sampler; bindings[3].type = asset::EDT_COMBINED_IMAGE_SAMPLER; bindings[3].samplers = &sampler; - bindings[4].type = asset::EDT_STORAGE_IMAGE; + bindings[4].type = asset::EDT_COMBINED_IMAGE_SAMPLER; + bindings[4].samplers = &sampler; bindings[5].type = asset::EDT_STORAGE_IMAGE; bindings[6].type = asset::EDT_STORAGE_IMAGE; + bindings[7].type = asset::EDT_STORAGE_IMAGE; m_resolveDSLayout = m_driver->createGPUDescriptorSetLayout(bindings,bindings+resolveDescriptorCount); } @@ -212,8 +201,11 @@ Renderer::InitializationData Renderer::initSceneObjects(const SAssetBundle& mesh { case Enum::DIRECT: maxPathDepth = 2u; + hideEnvironment = integrator->direct.hideEnvironment; break; case Enum::PATH: + hideEnvironment = integrator->path.hideEnvironment; + [[fallthrough]]; case Enum::VOL_PATH_SIMPLE: case Enum::VOL_PATH: case Enum::BDPT: @@ -293,7 +285,7 @@ Renderer::InitializationData Renderer::initSceneObjects(const SAssetBundle& mesh // one instance data per instance of a batch core::smart_refctd_ptr newInstanceDataBuffer; - constexpr uint16_t minTrisBatch = MAX_TRIANGLES_IN_BATCH>>1u; + constexpr uint16_t minTrisBatch = MAX_TRIANGLES_IN_BATCH>>3u; // allow small allocations to fight fragmentation constexpr uint16_t maxTrisBatch = MAX_TRIANGLES_IN_BATCH; constexpr uint8_t minVertexSize = asset::getTexelOrBlockBytesize()+ @@ -304,8 +296,8 @@ Renderer::InitializationData Renderer::initSceneObjects(const SAssetBundle& mesh constexpr uint16_t minIndicesBatch = minTrisBatch*kIndicesPerTriangle; CPUMeshPacker::AllocationParams allocParams; - allocParams.vertexBuffSupportedByteSize = 1u<<31u; - allocParams.vertexBufferMinAllocByteSize = minTrisBatch*minVertexSize; + allocParams.vertexBuffSupportedByteSize = (1u<<31u)-1; // RTX cards + allocParams.vertexBufferMinAllocByteSize = minTrisBatch*minVertexSize; // under max vertex reuse allocParams.indexBuffSupportedCnt = (allocParams.vertexBuffSupportedByteSize/allocParams.vertexBufferMinAllocByteSize)*minIndicesBatch; allocParams.indexBufferMinAllocCnt = minIndicesBatch; allocParams.MDIDataBuffSupportedCnt = allocParams.indexBuffSupportedCnt/minIndicesBatch; @@ -318,7 +310,7 @@ Renderer::InitializationData Renderer::initSceneObjects(const SAssetBundle& mesh IMeshPackerV2Base::SupportedFormatsContainer formats; formats.insert(EF_R32G32B32_SFLOAT); - formats.insert(EF_R32G32_UINT); + formats.insert(EF_R32G32B32_UINT); auto cpump = core::make_smart_refctd_ptr>(allocParams,formats,minTrisBatch,maxTrisBatch); uint32_t mdiBoundMax=0u,batchInstanceBoundTotal=0u; core::vector allocData; @@ -347,20 +339,23 @@ Renderer::InitializationData Renderer::initSceneObjects(const SAssetBundle& mesh assert(meshBuffer->getInstanceCount()==instanceCount); // We'll disable certain attributes to ensure we only copy position, normal and uv attribute SVertexInputParams& vertexInput = meshBuffer->getPipeline()->getVertexInputParams(); - // but we'll pack normals and UVs together to save one SSBO binding (and quantize UVs to half floats) + // but we'll pack normals and UVs together to save one SSBO binding, but no quantization of UVs to keep accurate floating point precision for baricentrics constexpr auto freeBinding = 15u; vertexInput.attributes[combinedNormalUVAttributeIx].binding = freeBinding; - vertexInput.attributes[combinedNormalUVAttributeIx].format = EF_R32G32_UINT; + vertexInput.attributes[combinedNormalUVAttributeIx].format = EF_R32G32B32_UINT; vertexInput.attributes[combinedNormalUVAttributeIx].relativeOffset = 0u; vertexInput.enabledBindingFlags |= 0x1u<getBaseVertex(); + struct CombinedNormalUV { - uint32_t nml; - uint16_t u,v; + uint32_t normal; + float u, v; }; + static_assert(sizeof(CombinedNormalUV) == sizeof(float) * 3u); + auto newBuff = core::make_smart_refctd_ptr(sizeof(CombinedNormalUV)*approxVxCount); auto* dst = reinterpret_cast(newBuff->getPointer())+meshBuffer->getBaseVertex(); meshBuffer->setVertexBufferBinding({0u,newBuff},freeBinding); @@ -369,11 +364,11 @@ Renderer::InitializationData Renderer::initSceneObjects(const SAssetBundle& mesh vertexInput.attributes[normalAttr].format = EF_R32_UINT; for (auto i=0u; igetAttribute(&dst[i].nml,normalAttr,i); + meshBuffer->getAttribute(&dst[i].normal,normalAttr,i); core::vectorSIMDf uv; meshBuffer->getAttribute(uv,2u,i); - dst[i].u = core::Float16Compressor::compress(uv.x); - dst[i].v = core::Float16Compressor::compress(uv.y); + dst[i].u = uv.x; + dst[i].v = uv.y; } } @@ -401,7 +396,11 @@ Renderer::InitializationData Renderer::initSceneObjects(const SAssetBundle& mesh allocData.resize(meshBuffersToProcess.size()); - cpump->alloc(allocData.data(),meshBuffersToProcess.begin(),meshBuffersToProcess.end()); + if (!cpump->alloc(allocData.data(),meshBuffersToProcess.begin(),meshBuffersToProcess.end())) + { + printf("[ERROR] Failed to Allocate Mesh data in SSBOs, quitting!\n"); + exit(-42); + } cpump->shrinkOutputBuffersSize(); cpump->instantiateDataStorage(); @@ -1139,6 +1138,7 @@ void Renderer::initSceneResources(SAssetBundle& meshes, nbl::io::path&& _sampleS } std::cout << "\tmaxPathDepth = " << maxPathDepth << std::endl; std::cout << "\tnoRussianRouletteDepth = " << noRussianRouletteDepth << std::endl; + std::cout << "\thideEnvironment = " << hideEnvironment << std::endl; std::cout << "\tmaxSamples = " << maxSensorSamples << std::endl; } } @@ -1178,7 +1178,7 @@ void Renderer::deinitSceneResources() m_finalEnvmap = nullptr; m_envMapImportanceSampling.deinitResources(); - m_staticViewData = {{0u,0u},0u,0u,0u,0u,core::infinity(),{}}; + m_staticViewData = {{0u,0u},0u,0u,0u,0u,false,core::infinity(),{}}; auto rr = m_rrManager->getRadeonRaysAPI(); rr->DetachAll(); @@ -1194,6 +1194,7 @@ void Renderer::deinitSceneResources() maxPathDepth = DefaultPathDepth; noRussianRouletteDepth = 5u; + hideEnvironment = false; maxSensorSamples = MaxFreeviewSamples; } @@ -1223,7 +1224,8 @@ void Renderer::initScreenSizedResources( int32_t cascadeCount, float cascadeLuminanceBase, float cascadeLuminanceStart, - const float Emin + const float Emin, + const nbl::core::vector& clipPlanes ) { float maxEmitterRadianceLuma; @@ -1238,7 +1240,8 @@ void Renderer::initScreenSizedResources( const float RGB19E7_MaxLuma = std::exp2(63.f); if (cascadeCountstd::numeric_limits::min(); if (core::isnan(cascadeLuminanceStart)) cascadeLuminanceStart = baseIsKnown ? (maxEmitterRadianceLuma*std::pow(cascadeLuminanceBase,-cascadeSegmentCount)):Emin; + // rationale, we don't have NEE and BRDF importance sampling samples with throughput <= 1.0 + // However we have RIS, and that can complicate this assumption a bit if (!baseIsKnown) cascadeLuminanceBase = core::max(std::pow(maxEmitterRadianceLuma/cascadeLuminanceStart,1.f/cascadeSegmentCount),1.0625f); std::cout << "Re-Weighting Monte Carlo = ENABLED [cascadeCount: "< void @@ -1299,6 +1305,17 @@ void Renderer::initScreenSizedResources( m_staticViewData.sampleSequenceStride = SampleSequence::computeQuantizedDimensions(maxPathDepth); auto stream = std::ofstream("runtime_defines.glsl"); + for (auto i=0; im_global.getVTStorageViewCount() << "\n" << m_globalMeta->m_global.m_materialCompilerGLSL_declarations << "#ifndef MAX_RAYS_GENERATED\n" @@ -1315,6 +1332,9 @@ void Renderer::initScreenSizedResources( // cull m_cullGPUShader = gpuSpecializedShaderFromFile(m_assetManager,m_driver,"../cull.comp"); + // visbuffer + m_vertGPUShader = gpuSpecializedShaderFromFile(m_assetManager, m_driver, "../fillVisBuffer.vert"); + // raygen m_raygenGPUShader = gpuSpecializedShaderFromFile(m_assetManager,m_driver,"../raygen.comp"); @@ -1360,7 +1380,7 @@ void Renderer::initScreenSizedResources( if (static_cast(m_driver)->runningInRenderdoc()) // makes Renderdoc capture the modifications done by OpenCL { interopBuffer.buffer = m_driver->createUpStreamingGPUBufferOnDedMem(size); - //interopBuffer.buffer->getBoundMemory()->mapMemoryRange(IDriverMemoryAllocation::EMCAF_WRITE,{0u,size}) +// interopBuffer.buffer->getBoundMemory()->mapMemoryRange(IDriverMemoryAllocation::EMCAF_READ_AND_WRITE,{0u,size}); } else interopBuffer.buffer = m_driver->createDeviceLocalGPUBufferOnDedMem(size); @@ -1388,11 +1408,12 @@ void Renderer::initScreenSizedResources( m_accumulation = createScreenSizedTexture(EF_R32G32_UINT,(cascadeCount+1u)*m_staticViewData.samplesPerPixelPerDispatch); // one more (first) layer because of accumulation metadata for a path m_albedoAcc = createScreenSizedTexture(EF_R32_UINT,m_staticViewData.samplesPerPixelPerDispatch); m_normalAcc = createScreenSizedTexture(EF_R32_UINT,m_staticViewData.samplesPerPixelPerDispatch); + m_maskAcc = createScreenSizedTexture(EF_R16_UNORM,m_staticViewData.samplesPerPixelPerDispatch); m_tonemapOutput = createScreenSizedTexture(EF_R16G16B16A16_SFLOAT); m_albedoRslv = createScreenSizedTexture(EF_A2B10G10R10_UNORM_PACK32); m_normalRslv = createScreenSizedTexture(EF_R16G16B16A16_SFLOAT); - constexpr uint32_t MaxDescritorUpdates = 10u; + constexpr uint32_t MaxDescritorUpdates = 11u; IGPUDescriptorSet::SDescriptorInfo infos[MaxDescritorUpdates]; IGPUDescriptorSet::SWriteDescriptorSet writes[MaxDescritorUpdates]; @@ -1409,20 +1430,21 @@ void Renderer::initScreenSizedResources( setImageInfo(infos+2,asset::EIL_GENERAL,core::smart_refctd_ptr(m_accumulation)); setImageInfo(infos+5,asset::EIL_GENERAL,core::smart_refctd_ptr(m_albedoAcc)); setImageInfo(infos+6,asset::EIL_GENERAL,core::smart_refctd_ptr(m_normalAcc)); + setImageInfo(infos+7,asset::EIL_GENERAL,core::smart_refctd_ptr(m_maskAcc)); // envmap { - setImageInfo(infos+7,asset::EIL_GENERAL,core::smart_refctd_ptr(m_finalEnvmap)); + setImageInfo(infos+8,asset::EIL_GENERAL,core::smart_refctd_ptr(m_finalEnvmap)); ISampler::SParams samplerParams = { ISampler::ETC_REPEAT, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_FLOAT_OPAQUE_BLACK, ISampler::ETF_LINEAR, ISampler::ETF_LINEAR, ISampler::ESMM_LINEAR, 0u, false, ECO_ALWAYS }; - infos[7].image.sampler = m_driver->createGPUSampler(samplerParams); - infos[7].image.imageLayout = EIL_SHADER_READ_ONLY_OPTIMAL; + infos[8].image.sampler = m_driver->createGPUSampler(samplerParams); + infos[8].image.imageLayout = EIL_SHADER_READ_ONLY_OPTIMAL; } // warpmap { - setImageInfo(infos+8,asset::EIL_GENERAL,core::smart_refctd_ptr(warpMap)); + setImageInfo(infos+9,asset::EIL_GENERAL,core::smart_refctd_ptr(warpMap)); ISampler::SParams samplerParams = { ISampler::ETC_REPEAT, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_FLOAT_OPAQUE_BLACK, ISampler::ETF_LINEAR, ISampler::ETF_LINEAR, ISampler::ESMM_LINEAR, 0u, false, ECO_ALWAYS }; - infos[8].image.sampler = m_driver->createGPUSampler(samplerParams); - infos[8].image.imageLayout = EIL_SHADER_READ_ONLY_OPTIMAL; + infos[9].image.sampler = m_driver->createGPUSampler(samplerParams); + infos[9].image.imageLayout = EIL_SHADER_READ_ONLY_OPTIMAL; } IGPUDescriptorSet::SDescriptorInfo luminanceDescriptorInfo = {}; @@ -1442,7 +1464,7 @@ void Renderer::initScreenSizedResources( for (auto i=0u; i<2u; i++) m_commonRaytracingDS[i] = m_driver->createGPUDescriptorSet(core::smart_refctd_ptr(m_commonRaytracingDSLayout)); - constexpr auto descriptorUpdateCount = 10u; + constexpr auto descriptorUpdateCount = 11u; setDstSetAndDescTypesOnWrites(m_commonRaytracingDS[0].get(),writes,infos,{ EDT_UNIFORM_BUFFER, EDT_UNIFORM_TEXEL_BUFFER, @@ -1451,17 +1473,18 @@ void Renderer::initScreenSizedResources( EDT_STORAGE_BUFFER, EDT_STORAGE_IMAGE, EDT_STORAGE_IMAGE, + EDT_STORAGE_IMAGE, EDT_COMBINED_IMAGE_SAMPLER, EDT_COMBINED_IMAGE_SAMPLER, }); // Set last write - writes[9].binding = 9u; - writes[9].arrayElement = 0u; - writes[9].count = 1u; - writes[9].descriptorType = EDT_COMBINED_IMAGE_SAMPLER; - writes[9].dstSet = m_commonRaytracingDS[0].get(); - writes[9].info = &luminanceDescriptorInfo; + writes[10].binding = 10u; + writes[10].arrayElement = 0u; + writes[10].count = 1u; + writes[10].descriptorType = EDT_COMBINED_IMAGE_SAMPLER; + writes[10].dstSet = m_commonRaytracingDS[0].get(); + writes[10].info = &luminanceDescriptorInfo; m_driver->updateDescriptorSets(descriptorUpdateCount,writes,0u,nullptr); // set up second DS @@ -1538,23 +1561,24 @@ void Renderer::initScreenSizedResources( } setImageInfo(infos+2,asset::EIL_GENERAL,std::move(albedoSamplerView)); setImageInfo(infos+3,asset::EIL_GENERAL,core::smart_refctd_ptr(m_normalAcc)); - setImageInfo(infos+4,asset::EIL_GENERAL,core::smart_refctd_ptr(m_tonemapOutput)); + setImageInfo(infos+4,asset::EIL_GENERAL,core::smart_refctd_ptr(m_maskAcc)); + setImageInfo(infos+5,asset::EIL_GENERAL,core::smart_refctd_ptr(m_tonemapOutput)); core::smart_refctd_ptr albedoStorageView; { IGPUImageView::SCreationParams viewparams = m_albedoRslv->getCreationParameters(); viewparams.format = EF_R32_UINT; albedoStorageView = m_driver->createGPUImageView(std::move(viewparams)); } - setImageInfo(infos+5,asset::EIL_GENERAL,std::move(albedoStorageView)); - setImageInfo(infos+6,asset::EIL_GENERAL,core::smart_refctd_ptr(m_normalRslv)); + setImageInfo(infos+6,asset::EIL_GENERAL,std::move(albedoStorageView)); + setImageInfo(infos+7,asset::EIL_GENERAL,core::smart_refctd_ptr(m_normalRslv)); setDstSetAndDescTypesOnWrites(m_resolveDS.get(),writes,infos,{ EDT_UNIFORM_BUFFER, - EDT_COMBINED_IMAGE_SAMPLER,EDT_COMBINED_IMAGE_SAMPLER,EDT_COMBINED_IMAGE_SAMPLER, + EDT_COMBINED_IMAGE_SAMPLER,EDT_COMBINED_IMAGE_SAMPLER,EDT_COMBINED_IMAGE_SAMPLER,EDT_COMBINED_IMAGE_SAMPLER, EDT_STORAGE_IMAGE,EDT_STORAGE_IMAGE,EDT_STORAGE_IMAGE }); } - m_driver->updateDescriptorSets(7u,writes,0u,nullptr); + m_driver->updateDescriptorSets(8u,writes,0u,nullptr); m_visibilityBuffer = m_driver->addFrameBuffer(); m_visibilityBuffer->attach(EFAP_DEPTH_ATTACHMENT,createScreenSizedTexture(EF_D32_SFLOAT)); @@ -1602,6 +1626,7 @@ void Renderer::deinitScreenSizedResources() m_accumulation = m_tonemapOutput = nullptr; m_albedoAcc = m_albedoRslv = nullptr; m_normalAcc = m_normalRslv = nullptr; + m_maskAcc = nullptr; glFinish(); @@ -1631,6 +1656,7 @@ void Renderer::deinitScreenSizedResources() m_staticViewData.maxPathDepth = DefaultPathDepth; m_staticViewData.noRussianRouletteDepth = 5u; m_staticViewData.samplesPerPixelPerDispatch = 1u; + m_staticViewData.hideEnvmap = false; m_staticViewData.envMapPDFNormalizationFactor = core::infinity(); m_staticViewData.cascadeParams = {}; m_totalRaysCast = 0ull; @@ -1851,7 +1877,24 @@ bool Renderer::render(nbl::ITimer* timer, const float kappa, const float Emin, c bool compiledShaders = compileShadersFuture.get(); if(compiledShaders) { - m_cullPipeline = m_driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(m_cullPipelineLayout), core::smart_refctd_ptr(m_cullGPUShader)); + m_cullPipeline = m_driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(m_cullPipelineLayout), core::smart_refctd_ptr(m_cullGPUShader)); + { + IGPUSpecializedShader* shaders[] = {m_vertGPUShader.get(),m_fragGPUShader.get()}; + SPrimitiveAssemblyParams primitiveAssembly; + primitiveAssembly.primitiveType = EPT_TRIANGLE_LIST; + SRasterizationParams raster; + raster.faceCullingMode = EFCM_NONE; + auto _visibilityBufferFillPipelineLayout = m_driver->createGPUPipelineLayout( + nullptr,nullptr, + core::smart_refctd_ptr(m_rasterInstanceDataDSLayout), + core::smart_refctd_ptr(m_additionalGlobalDSLayout), + core::smart_refctd_ptr(m_cullDSLayout) + ); + m_visibilityBufferFillPipeline = m_driver->createGPURenderpassIndependentPipeline( + nullptr,std::move(_visibilityBufferFillPipelineLayout),shaders,shaders+2u, + SVertexInputParams{},SBlendParams{},primitiveAssembly,raster + ); + } m_raygenPipeline = m_driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(m_raygenPipelineLayout), core::smart_refctd_ptr(m_raygenGPUShader)); m_closestHitPipeline = m_driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(m_closestHitPipelineLayout), core::smart_refctd_ptr(m_closestHitGPUShader)); m_resolvePipeline = m_driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(m_resolvePipelineLayout), core::smart_refctd_ptr(m_resolveGPUShader)); @@ -2095,7 +2138,17 @@ bool Renderer::traceBounce(uint32_t& raycount) std::cout << "[ERROR] RadeonRays Timed Out" << std::endl; return false; } + + if (static_cast(m_driver)->runningInRenderdoc()) + { + auto touchAllBytes = [](IGPUBuffer* buf)->void + { + auto ptr = reinterpret_cast(buf->getBoundMemory()->getMappedPointer()); + }; + touchAllBytes(m_intersectionBuffer[descSetIx].buffer.get()); + } } + // compute bounce (accumulate contributions and optionally generate rays) { diff --git a/22.RaytracedAO/Renderer.h b/22.RaytracedAO/Renderer.h index 912eadd7b..5c8e45738 100644 --- a/22.RaytracedAO/Renderer.h +++ b/22.RaytracedAO/Renderer.h @@ -55,7 +55,8 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac int32_t cascadeCount, float cascadeLuminanceBase, float cascadeLuminanceStart, - const float Emin + const float Emin, + const nbl::core::vector& clipPlanes={} ); void deinitScreenSizedResources(); @@ -180,6 +181,7 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac nbl::core::smart_refctd_ptr m_resolvePipelineLayout; nbl::core::smart_refctd_ptr m_cullGPUShader; + nbl::core::smart_refctd_ptr m_vertGPUShader,m_fragGPUShader; nbl::core::smart_refctd_ptr m_raygenGPUShader; nbl::core::smart_refctd_ptr m_closestHitGPUShader; nbl::core::smart_refctd_ptr m_resolveGPUShader; @@ -207,7 +209,8 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac nbl::core::smart_refctd_ptr bufferView; } sampleSequence; uint16_t maxPathDepth; - uint16_t noRussianRouletteDepth; + uint16_t noRussianRouletteDepth : 15; + uint16_t hideEnvironment : 1; uint32_t maxSensorSamples; // scene specific data @@ -254,6 +257,7 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac nbl::core::smart_refctd_ptr m_accumulation,m_tonemapOutput; nbl::core::smart_refctd_ptr m_albedoAcc,m_albedoRslv; nbl::core::smart_refctd_ptr m_normalAcc,m_normalRslv; + nbl::core::smart_refctd_ptr m_maskAcc; nbl::video::IFrameBuffer* m_visibilityBuffer,* m_colorBuffer; // Resources used for envmap sampling diff --git a/22.RaytracedAO/closestHit.comp b/22.RaytracedAO/closestHit.comp index a801b48d3..85d746b54 100644 --- a/22.RaytracedAO/closestHit.comp +++ b/22.RaytracedAO/closestHit.comp @@ -71,9 +71,10 @@ void main() // positions const vec3 lastVxPos = load_positions(batchInstanceData,indices); + if (!bool(batchInstanceData.determinantSignBit&0x80000000u)) + normalizedG = -normalizedG; + const bool frontfacing = dot(normalizedV,normalizedG)>=0.f; - const bool frontfacing = bool((batchInstanceData.determinantSignBit^floatBitsToUint(dot(normalizedV,normalizedG)))&0x80000000u); - // get material const nbl_glsl_MC_oriented_material_t material = nbl_glsl_MC_material_data_t_getOriented(batchInstanceData.material,frontfacing); contrib.color = contrib.albedo = nbl_glsl_MC_oriented_material_t_getEmissive(material, normalizedV); @@ -97,7 +98,7 @@ void main() ); const vec3 origin = dPdBary*compactBary+lastVxPos; - generate_next_rays( + rayMask = generate_next_rays( MAX_RAYS_GENERATED,material,frontfacing,vertex_depth, scramble_start_state,sampleID,outPixelLocation,origin, throughput,aovThroughputScale,contrib.albedo,contrib.worldspaceNormal @@ -117,22 +118,42 @@ void main() contrib.color *= throughput; const vec3 aovThroughput = throughput*aovThroughputScale; // - const bool pathToBeContinued = bool(rayMask); - if (pathToBeContinued) - addAccumulation(contrib.color,accumulationLocation); - else + if (isRWMCEnabled()) { - // need whole path throughput when splatting - contrib.color += fetchAccumulation(accumulationLocation); - const nbl_glsl_RWMC_SplattingParameters splat = nbl_glsl_RWMC_getCascade(staticViewData.cascadeParams,nbl_glsl_MC_colorToScalar(contrib.color)/pc.cummon.rcpFramesDispatched); - for (uint j=0u; j<2u; j++) - addAccumulationCascade( - contrib.color*splat.cascadeWeights[j],accumulationLocation, - samplesPerPixelPerDispatch,splat.lowerCascade+j - ); + const bool pathToBeContinued = bool(rayMask); + if (pathToBeContinued) + addAccumulation(contrib.color,accumulationLocation); + else + { + // need whole path throughput when splatting + contrib.color += fetchAccumulation(accumulationLocation); + const nbl_glsl_RWMC_SplattingParameters splat = nbl_glsl_RWMC_getCascade(staticViewData.cascadeParams,nbl_glsl_MC_colorToScalar(contrib.color)/pc.cummon.rcpFramesDispatched); + for (uint j=0u; j<2u; j++) + addAccumulationCascade( + contrib.color*splat.cascadeWeights[j],accumulationLocation, + samplesPerPixelPerDispatch,splat.lowerCascade+j + ); + } } + else + addAccumulation(contrib.color,accumulationLocation); // addAlbedo(contrib.albedo*aovThroughput,accumulationLocation); addWorldspaceNormal(contrib.worldspaceNormal*nbl_glsl_MC_colorToScalar(aovThroughput),accumulationLocation); + // only misses contribute to transparency + if (bool(staticViewData.sampleSequenceStride_hideEnvmap>>31)) + { + float mask = 0.f; + if (!hit) + { + // make the luma of throughput dictate transparency + mask = dot(aovThroughput,transpose(nbl_glsl_sRGBtoXYZ)[1]); + // only count transmissions + const vec2 texCoordUV = (vec2(accumulationLocation.xy)+vec2(0.5))/vec2(getImageDimensions(staticViewData)); + const vec3 seeThroughDir = normalize(mat3(pc.cummon.viewDirReconFactors)*vec3(texCoordUV,1.f)); + mask *= pow(max(dot(normalizedV,seeThroughDir),0.f),1024.f); + } + addMask(mask,accumulationLocation); + } } } \ No newline at end of file diff --git a/22.RaytracedAO/fillVisBuffer.frag b/22.RaytracedAO/fillVisBuffer.frag index 9bce3dc26..88a18455a 100644 --- a/22.RaytracedAO/fillVisBuffer.frag +++ b/22.RaytracedAO/fillVisBuffer.frag @@ -29,7 +29,7 @@ void main() vec2 bary = nbl_glsl_barycentric_frag_get(); const int triangleIDBitcount = findMSB(MAX_TRIANGLES_IN_BATCH-1)+1; - frontFacingTriangleIDDrawID_unorm16Bary_dBarydScreenHalf2x2[0] = bitfieldInsert(BackfacingBit_BatchInstanceGUID,gl_PrimitiveID,31-triangleIDBitcount,triangleIDBitcount)^(gl_FrontFacing ? 0x0u:0x80000000u); + frontFacingTriangleIDDrawID_unorm16Bary_dBarydScreenHalf2x2[0] = bitfieldInsert(BackfacingBit_BatchInstanceGUID,gl_PrimitiveID,31-triangleIDBitcount,triangleIDBitcount); frontFacingTriangleIDDrawID_unorm16Bary_dBarydScreenHalf2x2[1] = packUnorm2x16(bary); frontFacingTriangleIDDrawID_unorm16Bary_dBarydScreenHalf2x2[2] = packHalf2x16(dFdx(bary)); frontFacingTriangleIDDrawID_unorm16Bary_dBarydScreenHalf2x2[3] = packHalf2x16(dFdy(bary)); diff --git a/22.RaytracedAO/fillVisBuffer.vert b/22.RaytracedAO/fillVisBuffer.vert index 6c9279e02..eca6aa925 100644 --- a/22.RaytracedAO/fillVisBuffer.vert +++ b/22.RaytracedAO/fillVisBuffer.vert @@ -10,6 +10,8 @@ #define _NBL_GLSL_EXT_MITSUBA_LOADER_INSTANCE_DATA_BINDING_ 0 #include "virtualGeometry.glsl" +#include "runtime_defines.glsl" + layout(set=2, binding=0, row_major) readonly restrict buffer PerInstancePerCamera { DrawData_t data[]; @@ -31,4 +33,25 @@ void main() const vec3 modelPos = nbl_glsl_fetchVtxPos(gl_VertexIndex,InstData.data[batchInstanceGUID]); nbl_glsl_barycentric_vert_set(modelPos); gl_Position = nbl_glsl_pseudoMul4x4with3x1(self.MVP,modelPos); + + // clipping +#ifdef CLIP_PLANE_0 + const vec4 worldPos = vec4(nbl_glsl_pseudoMul3x4with3x1(InstData.data[batchInstanceGUID].tform,modelPos),1.0); + gl_ClipDistance[0] = dot(CLIP_PLANE_0,worldPos); +#ifdef CLIP_PLANE_1 + gl_ClipDistance[1] = dot(CLIP_PLANE_1,worldPos); +#ifdef CLIP_PLANE_2 + gl_ClipDistance[2] = dot(CLIP_PLANE_2,worldPos); +#ifdef CLIP_PLANE_3 + gl_ClipDistance[3] = dot(CLIP_PLANE_3,worldPos); +#ifdef CLIP_PLANE_4 + gl_ClipDistance[4] = dot(CLIP_PLANE_4,worldPos); +#ifdef CLIP_PLANE_5 + gl_ClipDistance[5] = dot(CLIP_PLANE_5,worldPos); +#endif +#endif +#endif +#endif +#endif +#endif } diff --git a/22.RaytracedAO/main.cpp b/22.RaytracedAO/main.cpp index 5c44b2396..855a6ac63 100644 --- a/22.RaytracedAO/main.cpp +++ b/22.RaytracedAO/main.cpp @@ -60,6 +60,9 @@ class RaytracerExampleEventReceiver : public nbl::IEventReceiver case ReloadKey: reloadKeyPressed = true; break; + case OverloadCameraKey: + overloadCameraKeyPressed = true; + break; case QuitKey: running = false; return true; @@ -89,6 +92,8 @@ class RaytracerExampleEventReceiver : public nbl::IEventReceiver inline bool isReloadKeyPressed() const { return reloadKeyPressed; } + inline bool isOverloadCameraKeyPressed() const { return overloadCameraKeyPressed; } + inline void resetKeys() { skipKeyPressed = false; @@ -98,6 +103,7 @@ class RaytracerExampleEventReceiver : public nbl::IEventReceiver screenshotKeyPressed = false; logProgressKeyPressed = false; reloadKeyPressed = false; + overloadCameraKeyPressed = false; } private: @@ -110,6 +116,7 @@ class RaytracerExampleEventReceiver : public nbl::IEventReceiver static constexpr nbl::EKEY_CODE LogProgressKey = nbl::KEY_KEY_L; static constexpr nbl::EKEY_CODE BeautyKey = nbl::KEY_KEY_B; static constexpr nbl::EKEY_CODE ReloadKey = nbl::KEY_F5; + static constexpr nbl::EKEY_CODE OverloadCameraKey = nbl::KEY_KEY_C; bool running; bool renderingBeauty; @@ -121,6 +128,7 @@ class RaytracerExampleEventReceiver : public nbl::IEventReceiver bool screenshotKeyPressed; bool logProgressKeyPressed; bool reloadKeyPressed; + bool overloadCameraKeyPressed; }; struct PersistentState @@ -264,7 +272,14 @@ int main(int argc, char** argv) for (auto i = 1ul; i < argc; ++i) arguments.emplace_back(argv[i]); } - + std::cout << std::endl; + std::cout << "-- Build URL:" << std::endl; + std::cout << NBL_BUILD_URL << std::endl; + std::cout << std::endl; + std::cout << "-- Build log:" << std::endl; + std::cout << NBL_GIT_LOG << std::endl; + std::cout << std::endl; + bool applicationIsReloaded = false; PersistentState applicationState; { @@ -536,6 +551,7 @@ int main(int argc, char** argv) float Emin = 0.05f; bool envmap = false; float envmapRegFactor = 0.0f; + core::vector clipPlanes; scene::CSceneNodeAnimatorCameraModifiedMaya* getInteractiveCameraAnimator() { @@ -614,8 +630,8 @@ int main(int argc, char** argv) }; const bool shouldHaveSensorIdxInFileName = globalMeta->m_global.m_sensors.size() > 1; - std::vector sensors = std::vector(); - std::vector cubemapRenders = std::vector(); + std::vector sensors; + std::vector cubemapRenders; auto extractAndAddToSensorData = [&](const ext::MitsubaLoader::CElementSensor& sensor, uint32_t idx) -> bool { @@ -683,6 +699,17 @@ int main(int argc, char** argv) return false; } mainSensorData.type = sensor.type; + + for (auto i=0; iclipPlanes[i]; + if ((plane!=core::vectorSIMDf()).any()) + { + mainSensorData.clipPlanes.push_back(plane); + printf("Found Clip Plane %f,%f,%f,%f\n",plane[0],plane[1],plane[2],plane[3]); + } + } + mainSensorData.rotateSpeed = cameraBase->rotateSpeed; mainSensorData.stepZoomSpeed = cameraBase->zoomSpeed; mainSensorData.moveSpeed = cameraBase->moveSpeed; @@ -753,6 +780,9 @@ int main(int argc, char** argv) auto tpose = core::transpose(core::matrix4SIMD(relativeTransform)); mainCamUp = tpose.rows[1]; mainCamView = tpose.rows[2]; + + std::cout << "\t Camera Reconstructed UpVector = <" << mainCamUp.x << "," << mainCamUp.y << "," << mainCamUp.z << ">" << std::endl; + std::cout << "\t Camera Reconstructed Forward = <" << mainCamView.x << "," << mainCamView.y << "," << mainCamView.z << ">" << std::endl; } float realFoVDegrees; @@ -874,8 +904,19 @@ int main(int argc, char** argv) staticCamera->setTarget(target.getAsVector3df()); } - if (core::dot(core::normalize(core::cross(staticCamera->getUpVector(),mainCamView)),core::cross(mainCamUp,mainCamView)).x<0.99f) - staticCamera->setUpVector(mainCamUp); + { + auto declaredUp = cameraBase->up; + auto reconstructedRight = core::cross(declaredUp,mainCamView); + auto actualRight = core::cross(mainCamUp,mainCamView); + // special formulation avoiding multiple sqrt and inversesqrt to preserve precision + const float dp = core::dot(reconstructedRight,actualRight).x/core::sqrt((core::dot(reconstructedRight,reconstructedRight)*core::dot(actualRight,actualRight)).x); + const float pb = core::dot(declaredUp,mainCamView).x/core::sqrt((core::dot(declaredUp,declaredUp)*core::dot(mainCamView,mainCamView)).x); + std::cout << "\t Camera Reconstructed UpVector match score = "<< dp << std::endl; + if (dp>0.97f && dp<1.03f && abs(pb)<0.9996f) + staticCamera->setUpVector(declaredUp); + else + staticCamera->setUpVector(mainCamUp); + } // if (ortho) @@ -1059,7 +1100,7 @@ int main(int argc, char** argv) if(needsReinit) { renderer->deinitScreenSizedResources(); - renderer->initScreenSizedResources(sensor.width,sensor.height,sensor.envmapRegFactor,sensor.cascadeCount,sensor.cascadeLuminanceBase,sensor.cascadeLuminanceStart,sensor.Emin); + renderer->initScreenSizedResources(sensor.width,sensor.height,sensor.envmapRegFactor,sensor.cascadeCount,sensor.cascadeLuminanceBase,sensor.cascadeLuminanceStart,sensor.Emin,sensor.clipPlanes); } smgr->setActiveCamera(sensor.staticCamera); @@ -1194,7 +1235,7 @@ int main(int argc, char** argv) { renderer->deinitScreenSizedResources(); const auto& sensorData = sensors[activeSensor]; - renderer->initScreenSizedResources(sensorData.width,sensorData.height,sensorData.envmapRegFactor,sensorData.cascadeCount,sensorData.cascadeLuminanceBase,sensorData.cascadeLuminanceStart,sensorData.Emin); + renderer->initScreenSizedResources(sensorData.width,sensorData.height,sensorData.envmapRegFactor,sensorData.cascadeCount,sensorData.cascadeLuminanceBase,sensorData.cascadeLuminanceStart,sensorData.Emin,sensorData.clipPlanes); } smgr->setActiveCamera(sensors[activeSensor].interactiveCamera); @@ -1217,12 +1258,39 @@ int main(int argc, char** argv) sensors[activeSensor].resetInteractiveCamera(); std::cout << "Interactive Camera Position and Target has been Reset." << std::endl; } - if(receiver.isNextPressed()) + else if(receiver.isOverloadCameraKeyPressed()) + { + pfd::open_file file("Choose XML file to overload camera with (only first sensor overrides)", "../../media/mitsuba", { "XML files (.xml)", "*.xml" }); + if (!file.result().empty()) + { + const auto filePath = file.result()[0]; + using namespace nbl::asset; + smart_refctd_ptr mitsubaMetadata; + { + static const IAssetLoader::SAssetLoadParams mitsubaLoaderParams = { 0, nullptr, IAssetLoader::ECF_DONT_CACHE_REFERENCES, nullptr, IAssetLoader::ELPF_LOAD_METADATA_ONLY }; + auto meshes_bundle = device->getAssetManager()->getAsset(filePath.data(),mitsubaLoaderParams); + if (!meshes_bundle.getContents().empty()) + mitsubaMetadata = smart_refctd_ptr(static_cast(meshes_bundle.getMetadata())); + } + if (!mitsubaMetadata || mitsubaMetadata->m_global.m_sensors.empty()) + os::Printer::log("ERROR (" + std::to_string(__LINE__) + " line): The xml file is invalid/cannot be loaded! File path: " + filePath, ELL_ERROR); + else + { + const uint32_t originalSensorCount = sensors.size(); + uint32_t idx = originalSensorCount; + for (const auto& sensor : mitsubaMetadata->m_global.m_sensors) + extractAndAddToSensorData(sensor,idx++); + setActiveSensor(originalSensorCount); + } + writeLastRunState = true; + } + } + else if(receiver.isNextPressed()) { setActiveSensor(activeSensor + 1); writeLastRunState = true; } - if(receiver.isPreviousPressed()) + else if(receiver.isPreviousPressed()) { setActiveSensor(activeSensor - 1); writeLastRunState = true; @@ -1322,7 +1390,8 @@ int main(int argc, char** argv) auto samples = renderer->getTotalSamplesComputed(); auto rays = renderer->getTotalRaysCast(); const double microsecondsElapsed = std::chrono::duration_cast(std::chrono::steady_clock::now()-start).count(); - str << L"Raytraced Shadows Demo - Nabla Engine MegaSamples: " << samples/1000000ull + str << L"Nabla Path Tracer: " << applicationState.zipPath.c_str() << "\\" << applicationState.xmlPath.c_str() + << " MegaSamples: " << samples/1000000ull << " MSample/s: " << double(samples)/microsecondsElapsed << " MRay/s: " << double(rays)/microsecondsElapsed; diff --git a/22.RaytracedAO/raygen.comp b/22.RaytracedAO/raygen.comp index 71e6ff7f2..2b45dec4d 100644 --- a/22.RaytracedAO/raygen.comp +++ b/22.RaytracedAO/raygen.comp @@ -41,11 +41,10 @@ void main() if (hit) { // vis buffer decode - const bool frontfacing = !bool(visBuffer[0]&0x80000000u); const int triangleIDBitcount = findMSB(MAX_TRIANGLES_IN_BATCH-1)+1; const uint triangleID = bitfieldExtract(visBuffer[0],31-triangleIDBitcount,triangleIDBitcount); const uint batchInstanceGUID = bitfieldExtract(visBuffer[0],0,31-triangleIDBitcount); - const vec2 compactBary = unpackUnorm2x16(visBuffer[1]); +//const vec2 compactBary = unpackUnorm2x16(visBuffer[1]); #ifdef TEX_PREFETCH_STREAM // TODO: separate pipeline and separate out the barycentric derivative FBO attachment, only write if need to, only fetch if `needs_texture_prefetch` const mat2 dBarydScreen = mat2(unpackHalf2x16(visBuffer[2]),unpackHalf2x16(visBuffer[3])); @@ -57,17 +56,47 @@ void main() // load vertex data const vec3 lastVxPos = load_positions(batchInstanceData,indices); + if (!bool(batchInstanceData.determinantSignBit&0x80000000u)) + normalizedG = -normalizedG; + const float VdotG = dot(normalizedV,normalizedG); + const bool frontfacing = VdotG>=0.f; - // get material while waiting for indices + // get material const nbl_glsl_MC_oriented_material_t material = nbl_glsl_MC_material_data_t_getOriented(batchInstanceData.material,frontfacing); contrib.color = contrib.albedo = nbl_glsl_MC_oriented_material_t_getEmissive(material,normalizedV); // little optimization for non-twosided materials if (material.genchoice_count!=0u) { - // get initial scramble key while waiting for vertex positions + // get initial scramble key const nbl_glsl_xoroshiro64star_state_t scramble_start_state = texelFetch(scramblebuf,ivec2(outPixelLocation),0).rg; + vec3 origin; + #if RECOMPUTE_BARY + // we know the ray will intersect the triangle + vec2 compactBary; + { + // reversed order of arguments for each cross cause V is negative + const vec3 ray_cross_e2 = cross(dPdBary[1],normalizedV); + const float detRcp = 1.f/dot(dPdBary[0],ray_cross_e2); + // assert(!isinf(detRcp)); + const vec3 s = (pc.cummon.viewDirReconFactors[3]-lastVxPos)*detRcp; + const float u = dot(s,ray_cross_e2); + // assert(0.f<=u && u<=1.f) + const vec3 s_cross_e1 = cross(s,dPdBary[0]); + const float v = -dot(normalizedV,s_cross_e1); + // assert(0.f<=v && v<=1.f) + compactBary = vec2(u,v); + // + const float t = dot(dPdBary[1],s_cross_e1); + //assert(t>0.f); + origin = pc.cummon.viewDirReconFactors[3]-normalizedV*t; + } + #else + const vec2 compactBary = unpackUnorm2x16(visBuffer[1]); + #endif + origin = dPdBary*compactBary+lastVxPos; + // normalizedN = load_normal_and_prefetch_textures( batchInstanceData,indices,compactBary,material @@ -75,10 +104,6 @@ void main() ,dBarydScreen #endif ); - - const vec3 origin = dPdBary*compactBary+lastVxPos; - // does this buy us any precision? (answer run CI!) - //normalizedV = normalize(pc.cummon.viewDirReconFactors[3]-origin); // generate rays const uint vertex_depth = 1u; @@ -105,38 +130,57 @@ void main() { const uvec3 coord = uvec3(outPixelLocation,i); - nbl_glsl_RWMC_SplattingParameters splat = nbl_glsl_RWMC_getCascade(staticViewData.cascadeParams,luma); - const bool pathToBeContinued = bool((rayMask>>i)&0x1u); - if (pathToBeContinued) - { - storeAccumulation(contrib.color*pc.cummon.rcpFramesDispatched,coord); - splat.cascadeWeights = vec2(0.f,0.f); - } - const uint higherCascade = splat.lowerCascade+1u; - const uint cascadeCount = staticViewData.cascadeParams.penultimateCascadeIx+2u; - for (uint cascadeIx=0u; cascadeIx>i)&0x1u); + if (pathToBeContinued) + { + storeAccumulation(contrib.color*pc.cummon.rcpFramesDispatched,coord); + splat.cascadeWeights = vec2(0.f,0.f); + } + + const uint higherCascade = splat.lowerCascade+1u; + const uint cascadeCount = staticViewData.cascadeParams.penultimateCascadeIx+2u; + for (uint cascadeIx=0u; cascadeIx>31); // clear accumulations totally if beginning a new frame if (firstFrame) { + if (!isRWMCEnabled()) + storeAccumulation(contrib.color,coord); storeAlbedo(contrib.albedo,coord); storeWorldspaceNormal(contrib.worldspaceNormal,coord); + storeMask(hideEnvmap&&(!hit) ? 1.f:0.f,coord); } else { + if (!isRWMCEnabled()) + { + const vec3 prev = fetchAccumulation(coord); + const vec3 delta = (contrib.color-prev)*pc.cummon.rcpFramesDispatched; + if (any(greaterThan(abs(delta),vec3(exp2(-19.f))))) + storeAccumulation(prev+delta,coord); + } addAlbedo(contrib.albedo,coord,pc.cummon.rcpFramesDispatched); addWorldspaceNormal(contrib.worldspaceNormal,coord,pc.cummon.rcpFramesDispatched); + if (hideEnvmap) + addMask(hit ? 0.f:1.f,coord,pc.cummon.rcpFramesDispatched); } } } diff --git a/22.RaytracedAO/raytraceCommon.glsl b/22.RaytracedAO/raytraceCommon.glsl index fe7fc667c..10f49273f 100644 --- a/22.RaytracedAO/raytraceCommon.glsl +++ b/22.RaytracedAO/raytraceCommon.glsl @@ -44,10 +44,11 @@ layout(set = 2, binding = 4) restrict coherent buffer RayCount // maybe remove c // aovs layout(set = 2, binding = 5, r32ui) restrict uniform uimage2DArray albedoAOV; layout(set = 2, binding = 6, r32ui) restrict uniform uimage2DArray normalAOV; +layout(set = 2, binding = 7, r16) restrict uniform image2DArray maskAOV; // environment emitter -layout(set = 2, binding = 7) uniform sampler2D envMap; -layout(set = 2, binding = 8) uniform sampler2D warpMap; -layout(set = 2, binding = 9) uniform sampler2D luminance; +layout(set = 2, binding = 8) uniform sampler2D envMap; +layout(set = 2, binding = 9) uniform sampler2D warpMap; +layout(set = 2, binding = 10) uniform sampler2D luminance; void clear_raycount() { @@ -73,6 +74,11 @@ uvec3 get_triangle_indices(in nbl_glsl_ext_Mitsuba_Loader_instance_data_t batchI #include #include +bool isRWMCEnabled() +{ + return staticViewData.cascadeParams.penultimateCascadeIx!=uint(-2); +} + vec3 fetchAccumulation(in uvec3 coord) { const uvec2 data = imageLoad(accumulation,ivec3(coord)).rg; @@ -85,7 +91,7 @@ void storeAccumulation(in vec3 color, in uvec3 coord) } void addAccumulation(in vec3 delta, in uvec3 coord) { - if (any(greaterThan(delta,vec3(exp2(-19.f))))) + if (any(greaterThan(abs(delta),vec3(exp2(-19.f))))) { const vec3 prev = fetchAccumulation(coord); const vec3 newVal = prev+delta; @@ -98,20 +104,18 @@ void addAccumulation(in vec3 delta, in uvec3 coord) // TODO: use a R17G17B17_UNORM format matched to cascade range, then use 13 bits to store last spp count (max 8k spp renders) // This way we can avoid writing every cascade every path storage -void nextSampleAccumulationCascade(in vec3 weightedDelta, uvec3 coord, in uint samplesPerPixelPerDispatch, in uint cascadeIndex, in float rcpN) +void nextSampleAccumulationCascade(in bool firstFrame, in vec3 weightedDelta, uvec3 coord, in uint samplesPerPixelPerDispatch, in uint cascadeIndex, in float rcpN) { // but leave first index in the array for the ray accumulation metadata, hence the +1 coord.z += (cascadeIndex+1u)*samplesPerPixelPerDispatch; - const vec3 prev = fetchAccumulation(coord); + const vec3 prev = firstFrame ? vec3(0.0):fetchAccumulation(coord); const vec3 newVal = prev+(weightedDelta-prev)*rcpN; - // TODO: do a better check, compare actually encoded values for difference - const uvec3 diff = floatBitsToUint(newVal)^floatBitsToUint(prev); - if (bool((diff.x|diff.y|diff.z)&0x7ffffff0u)) - storeAccumulation(newVal,coord); + // always store, cause we need to reset the value + storeAccumulation(newVal,coord); } void addAccumulationCascade(in vec3 weightedDelta, uvec3 coord, in uint samplesPerPixelPerDispatch, in uint cascadeIndex) { - if (any(greaterThan(weightedDelta,vec3(exp2(-19.f))))) + if (any(greaterThan(abs(weightedDelta),vec3(exp2(-19.f))))) { // but leave first index in the array for the ray accumulation metadata, hence the +1 coord.z += (cascadeIndex+1u)*samplesPerPixelPerDispatch; @@ -174,6 +178,29 @@ void addWorldspaceNormal(vec3 delta, in uvec3 coord) impl_addWorldspaceNormal(delta,coord,0.f,false); } +void storeMask(in float mask, in uvec3 coord) +{ + imageStore(maskAOV,ivec3(coord),vec4(mask,0.f,0.f,0.f)); +} +void impl_addMask(float delta, in uvec3 coord, in float rcpN, in bool newSample) +{ + const float prev = imageLoad(maskAOV,ivec3(coord)).r; + if (newSample) + delta = (delta-prev)*rcpN; + if (abs(delta)>1.f/65536.f) + storeMask(prev+delta,coord); +} +// for starting a new sample +void addMask(float delta, in uvec3 coord, in float rcpN) +{ + impl_addMask(delta,coord,rcpN,true); +} +// for adding to the last sample +void addMask(float delta, in uvec3 coord) +{ + impl_addMask(delta,coord,0.f,false); +} + // due to memory limitations we can only do 6k renders // so that's 13 bits for width, 12 bits for height, which leaves us with 7 bits for throughput void packOutPixelLocationAndAoVThroughputFactor(out float val, in uvec2 outPixelLocation, in float aovThroughputFactor) @@ -279,6 +306,10 @@ vec3 load_normal_and_prefetch_textures( dUVdBary = mat2(uvs[0]-uvs[2],uvs[1]-uvs[2]); const vec2 UV = dUVdBary*compactBary+uvs[2]; + // flip the tangent frame if mesh got flipped to undo Left Handed tangent frame + if (!bool(batchInstanceData.determinantSignBit&0x80000000u)) + dUVdBary = -dUVdBary; + // the direction/winding of the UV-space parallelogram doesn't matter for texture filtering const mat2 dUVdScreen = nbl_glsl_applyChainRule2D(dUVdBary,dBarydScreen); nbl_glsl_MC_runTexPrefetchStream(tps,UV,dUVdScreen*pc.cummon.textureFootprintFactor); } @@ -321,7 +352,7 @@ mat2x3 rand6d(in uvec3 scramble_keys[2], in int _sample, int depth) // decrement depth because first vertex is rasterized and picked with a different sample sequence --depth; // - const int offset = int(_sample*staticViewData.sampleSequenceStride)+depth*SAMPLING_STRATEGY_COUNT; + const int offset = _sample*int(staticViewData.sampleSequenceStride_hideEnvmap&0x7fFFffFFu)+depth*SAMPLING_STRATEGY_COUNT; const nbl_glsl_sampling_quantized3D quant1 = texelFetch(quantizedSampleSequence, offset).xy; const nbl_glsl_sampling_quantized3D quant2 = texelFetch(quantizedSampleSequence, offset+1).xy; @@ -508,8 +539,7 @@ uint generate_next_rays( // the 1.03125f adjusts for the fact that the normal might be too short (inversesqrt precision) const float inversesqrt_precision = 1.03125f; - // TODO: investigate why we can't use `normalizedN` here - const vec3 ray_offset_vector = normalize(cross(dPdBary[0],dPdBary[1]))*inversesqrt_precision; + const vec3 ray_offset_vector = normalizedG*inversesqrt_precision; float origin_offset = nbl_glsl_numeric_limits_float_epsilon(120u); // I pulled the constants out of my @$$ origin_offset += dot(abs(ray_offset_vector),abs(origin))*nbl_glsl_numeric_limits_float_epsilon(128u); @@ -523,6 +553,7 @@ uint generate_next_rays( //const vec3 geomNormal = cross(dPdBary[0],dPdBary[1]); //float ray_offset = ?; //ray_offset = nbl_glsl_ieee754_next_ulp_away_from_zero(ray_offset); + const vec3 ray_offset = ray_offset_vector*origin_offset; const vec3 ray_origin[2] = {origin+ray_offset,origin-ray_offset}; uint offset = 0u; diff --git a/22.RaytracedAO/raytraceCommon.h b/22.RaytracedAO/raytraceCommon.h index a070b2a94..595fc7198 100644 --- a/22.RaytracedAO/raytraceCommon.h +++ b/22.RaytracedAO/raytraceCommon.h @@ -98,11 +98,13 @@ struct StaticViewData_t uint8_t maxPathDepth; uint8_t noRussianRouletteDepth; uint16_t samplesPerPixelPerDispatch; + uint32_t sampleSequenceStride : 31; + uint32_t hideEnvmap : 1; #else uint imageDimensions; uint maxPathDepth_noRussianRouletteDepth_samplesPerPixelPerDispatch; + uint sampleSequenceStride_hideEnvmap; #endif - uint sampleSequenceStride; // this is a very small number actually, probably 20 bits left to play with float envMapPDFNormalizationFactor; nbl_glsl_RWMC_CascadeParameters cascadeParams; }; diff --git a/22.RaytracedAO/resolve.comp b/22.RaytracedAO/resolve.comp index b46b0f725..33541d08a 100644 --- a/22.RaytracedAO/resolve.comp +++ b/22.RaytracedAO/resolve.comp @@ -13,9 +13,10 @@ layout(set = 0, binding = 0, row_major) uniform StaticViewData layout(set = 0, binding = 1) uniform usampler2DArray colorSamples; layout(set = 0, binding = 2) uniform sampler2DArray albedoSamples; layout(set = 0, binding = 3) uniform usampler2DArray normalSamples; -layout(set = 0, binding = 4, rgba16f) restrict uniform image2D framebuffer; -layout(set = 0, binding = 5, r32ui) restrict uniform uimage2D albedo; -layout(set = 0, binding = 6, rgba16f) restrict uniform image2D normals; +layout(set = 0, binding = 4) uniform sampler2DArray maskSamples; +layout(set = 0, binding = 5, rgba16f) restrict uniform image2D framebuffer; +layout(set = 0, binding = 6, r32ui) restrict uniform uimage2D albedo; +layout(set = 0, binding = 7, rgba16f) restrict uniform image2D normals; layout(push_constant, row_major) uniform PushConstants { @@ -60,6 +61,10 @@ vec3 nbl_glsl_RWMC_sampleCascadeTexel(ivec2 coord, in ivec2 offset, in uint casc return value/float(samplesPerPixelPerDispatch); } +bool isRWMCEnabled() +{ + return staticViewData.cascadeParams.penultimateCascadeIx!=uint(-2); +} void main() { @@ -68,22 +73,30 @@ void main() { samplesPerPixelPerDispatch = bitfieldExtract(staticViewData.maxPathDepth_noRussianRouletteDepth_samplesPerPixelPerDispatch,16,16); - vec3 acc = nbl_glsl_RWMC_reweight(pc.rwmcReweightingParams,pixelCoord); + vec3 acc; + if (isRWMCEnabled()) + acc = nbl_glsl_RWMC_reweight(pc.rwmcReweightingParams,pixelCoord); + else // its a pretty ok function, reusing it + acc = nbl_glsl_RWMC_sampleCascadeTexel(pixelCoord,ivec2(0,0),-1); + vec3 alb = texelFetch(albedoSamples,ivec3(pixelCoord,0),0).rgb; vec3 nml = nbl_glsl_decodeRGB10A2_SNORM(texelFetch(normalSamples,ivec3(pixelCoord,0),0).r).xyz; + float msk = texelFetch(maskSamples,ivec3(pixelCoord,0),0).r; for (uint i=1u; icreateGPUShader(core::make_smart_refctd_ptr(R"===( @@ -428,12 +409,12 @@ void main() #include "../ShaderCommon.glsl" layout(binding = 0, std430) restrict readonly buffer DenoisedImageInputBuffer { - f16vec3_packed inDenoisedBuffer[]; + uvec2 inDenoisedBuffer[]; }; #define _NBL_GLSL_EXT_FFT_INPUT_DESCRIPTOR_DEFINED_ layout(binding = 1, std430) restrict buffer NoisyImageInputBufferAndSpectrumOutputBuffer { - uint16_t data[]; + uvec2 data[]; } aliasedBuffer[2]; #define _NBL_GLSL_EXT_FFT_OUTPUT_DESCRIPTOR_DEFINED_ @@ -466,12 +447,7 @@ uint nbl_glsl_ext_FFT_Parameters_t_getDirection() void nbl_glsl_ext_FFT_setData(in uvec3 coordinate, in uint channel, in nbl_glsl_complex complex_value) { const uint index = ((channel<>16); - aliasedBuffer[1].data[index*4+2] = uint16_t(asUint.y&0xffffu); - aliasedBuffer[1].data[index*4+3] = uint16_t(asUint.y>>16); + aliasedBuffer[1].data[index] = floatBitsToUint(complex_value); } #define _NBL_GLSL_EXT_FFT_SET_DATA_DEFINED_ @@ -480,7 +456,7 @@ void nbl_glsl_ext_FFT_setData(in uvec3 coordinate, in uint channel, in nbl_glsl_ #include "nbl/builtin/glsl/ext/FFT/default_compute_fft.comp" -vec3 preloadedPixels[(_NBL_GLSL_EXT_FFT_MAX_DIM_SIZE_-1u)/_NBL_GLSL_WORKGROUP_SIZE_+1u]; +vec4 preloadedPixels[(_NBL_GLSL_EXT_FFT_MAX_DIM_SIZE_-1u)/_NBL_GLSL_WORKGROUP_SIZE_+1u]; void main() { @@ -502,21 +478,25 @@ void main() ivec3 coordinate = oldCoord; nbl_glsl_ext_FFT_wrap_coord(coordinate); // const uint index = coordinate.y*pc.data.imageWidth+coordinate.x; - const vec3 denoised = vec3(inDenoisedBuffer[index].x,inDenoisedBuffer[index].y,inDenoisedBuffer[index].z); - vec3 noisy; - for (uint c=0; c<3; c++) - noisy[c] = unpackHalf2x16(uint(aliasedBuffer[0].data[index*3+c]))[0]; + const uvec2 denoisedData = inDenoisedBuffer[index]; + const vec4 denoised = vec4(unpackHalf2x16(denoisedData[0]),unpackHalf2x16(denoisedData[1])); + vec4 noisy; + { + uvec2 noisyData = aliasedBuffer[0].data[index]; + noisy.rg = unpackHalf2x16(noisyData[0]); + noisy.ba = unpackHalf2x16(noisyData[1]); // error "warning C7050: "noisy.zw" might be used before being initialized" is wrong + } preloadedPixels[t] = mix(denoised,noisy,pc.data.denoiseBlendFactor); // const bool contributesToLuma = all(equal(coordinate,oldCoord)); - scaledLogLuma += nbl_glsl_ext_LumaMeter_local_process(contributesToLuma,preloadedPixels[t]); + scaledLogLuma += nbl_glsl_ext_LumaMeter_local_process(contributesToLuma,preloadedPixels[t].rgb); } nbl_glsl_ext_LumaMeter_setFirstPassOutput(nbl_glsl_ext_LumaMeter_workgroup_process(scaledLogLuma)); // prevent overlap between different usages of shared memory barrier(); // Virtual Threads Calculation - for(uint channel=0u; channel<3u; channel++) + for(uint channel=0u; channel<4u; channel++) { for (uint t=0u; tgetAsset("../../media/kernels/physical_flare_512.exr",lp); // TODO: make it a builtins? - for (size_t i=0; i < inputFilesAmount; i++) + for (size_t i=0; igetRegions(); + // no mip chain, etc. assert(regions.begin()+1u==regions.end()); const auto& region = regions.begin()[0]; + // there is an explicit buffer row length assert(region.bufferRowLength); outParam.colorTexelSize = asset::getTexelOrBlockBytesize(colorCreationParams.format); } @@ -1028,6 +1020,8 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe { auto kerDim = outParam.kernel->getCreationParameters().extent; float kernelScale,minKernelScale; + // portrait vs landscape, get smallest dimension + // the kernelScale makes sure that resampled kernel resolution will match the image to be blurred scaled by `bloomRelativeScale` if (extent.width1.f) os::Printer::log(imageIDString + "Bloom Kernel loose sharpness, increase resolution of bloom kernel or reduce its relative scale!", ELL_WARNING); + // kernel cannot be smaller than 2x2 else if (kernelScale auto { auto tmp = extent; @@ -1058,14 +1054,16 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe } return tmp; }(); + // we abuse the same buffer as temporary storage for the Kernel FFT (two spans needed) fftScratchSize = core::max(FFTClass::getOutputBufferSize(usingHalfFloatFFTStorage,outParam.scaledKernelExtent,colorChannelsFFT)*2u,fftScratchSize); - fftScratchSize = core::max(FFTClass::getOutputBufferSize(usingHalfFloatFFTStorage,marginSrcDim,colorChannelsFFT),fftScratchSize); + // and for the main image FFT (alpha included) + fftScratchSize = core::max(FFTClass::getOutputBufferSize(usingHalfFloatFFTStorage,marginSrcDim,allChannelsFFT),fftScratchSize); // TODO: maybe move them to nested loop and compute JIT { auto* fftPushConstants = outParam.fftPushConstants; auto* fftDispatchInfo = outParam.fftDispatchInfo; const ISampler::E_TEXTURE_CLAMP fftPadding[2] = {ISampler::ETC_MIRROR,ISampler::ETC_MIRROR}; - const auto passes = FFTClass::buildParameters(false,colorChannelsFFT,extent,fftPushConstants,fftDispatchInfo,fftPadding,marginSrcDim); + const auto passes = FFTClass::buildParameters(false,allChannelsFFT,extent,fftPushConstants,fftDispatchInfo,fftPadding,marginSrcDim); { // override for less work and storage (dont need to store the extra padding of the last axis after iFFT) fftPushConstants[1].output_strides.x = fftPushConstants[0].input_strides.x; @@ -1081,6 +1079,7 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe } fftDispatchInfo[2] = fftDispatchInfo[0]; } + // only a 2D FFT assert(passes==2); } @@ -1103,6 +1102,7 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe { os::Printer::log(imageIDString + "Image extent of the Albedo Channel does not match the Color Channel, Albedo Channel will not be used!", ELL_ERROR); albedoImage = nullptr; + continue; } else outParam.denoiserType = EII_ALBEDO; @@ -1144,7 +1144,7 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe size_t denoiserStateBufferSize = 0ull; { size_t scratchBufferSize = fftScratchSize; - size_t tempBufferSize = fftScratchSize; + size_t tempBufferSize = forcedOptiXFormatPixelCumExclSizes[EII_COUNT]*maxResolution[0]*maxResolution[1]; for (uint32_t i=0u; igetCreationParameters(); - assert(asset::getTexelOrBlockBytesize(creationParameters.format)==param.colorTexelSize); // set up some image pitch and offset info shaderConstants.inImageTexelPitch[j] = image->getRegions().begin()[0].bufferRowLength; inImageByteOffset[j] = offsetPair->getOffset(); @@ -1460,8 +1459,8 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe // always need at least two input noisy buffers due to having to keep noisy colour around for (uint32_t j=0u; jtileAndInvoke( @@ -1618,6 +1617,7 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe // image view core::smart_refctd_ptr imageView; + // size needed to download denoised, bloomed and tonemapped image const uint32_t colorBufferBytesize = param.height*param.width*param.colorTexelSize; { // create image @@ -1783,7 +1783,7 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe // convert to EF_R8G8B8_SRGB and save it as .png and .jpg { - auto newImageView = getConvertedImageView(imageView->getCreationParameters().image, EF_R8G8B8_SRGB); + auto newImageView = getConvertedImageView(imageView->getCreationParameters().image, EF_R8G8B8A8_SRGB); IAssetWriter::SAssetWriteParams wp(newImageView.get()); std::string fileName = outputFileBundle[i].value().c_str(); diff --git a/50.IESProfileTest/CMakeLists.txt b/50.IESProfileTest/CMakeLists.txt index 71c002ad5..52e8e83f2 100644 --- a/50.IESProfileTest/CMakeLists.txt +++ b/50.IESProfileTest/CMakeLists.txt @@ -4,4 +4,7 @@ if(NOT RES) message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") endif() -nbl_create_executable_project("" "" "" "") \ No newline at end of file +nbl_create_executable_project("" "" "" nlohmann_json::nlohmann_json) + +add_dependencies(${EXECUTABLE_NAME} nlohmann_json::nlohmann_json) +target_include_directories(${EXECUTABLE_NAME} PUBLIC $) \ No newline at end of file diff --git a/50.IESProfileTest/compute/cdc.comp b/50.IESProfileTest/compute/cdc.comp index 037d01a36..390d63acb 100644 --- a/50.IESProfileTest/compute/cdc.comp +++ b/50.IESProfileTest/compute/cdc.comp @@ -224,11 +224,11 @@ bool isWithinSCDomain(vec2 point) void main() { - const float VERTICAL_INVERSE = 1.0f / TEXTURE_SIZE; - const float HORIZONTAL_INVERSE = 1.0f / TEXTURE_SIZE; - - const ivec2 pixelCoordinates = ivec2(gl_GlobalInvocationID.xy); const ivec2 destinationSize = imageSize(outIESCandelaImage); + const ivec2 pixelCoordinates = ivec2(gl_GlobalInvocationID.xy); + + const float VERTICAL_INVERSE = 1.0f / float(destinationSize.x); + const float HORIZONTAL_INVERSE = 1.0f / float(destinationSize.y); if (all(lessThan(pixelCoordinates, destinationSize))) { diff --git a/50.IESProfileTest/compute/common.h b/50.IESProfileTest/compute/common.h index 378625732..edbc94104 100644 --- a/50.IESProfileTest/compute/common.h +++ b/50.IESProfileTest/compute/common.h @@ -8,7 +8,6 @@ #define M_HALF_PI M_PI/2.0f // would be cool if we have this define somewhere or GLSL do #define QUANT_ERROR_ADMISSIBLE 1/1024 -#define TEXTURE_SIZE 1024u #define WORKGROUP_SIZE 256u #define WORKGROUP_DIMENSION 16u diff --git a/50.IESProfileTest/inputs.json b/50.IESProfileTest/inputs.json new file mode 100644 index 000000000..d6b4ce528 --- /dev/null +++ b/50.IESProfileTest/inputs.json @@ -0,0 +1,14 @@ +{ + "directories": [ + "../media/mitsuba/ies/packages/leomoon-dot-com_ies-lights-pack/ies-lights-pack" + ], + "files": [ + "../media/mitsuba/ies/ISOTROPIC/007cfb11e343e2f42e3b476be4ab684e.ies", + "../media/mitsuba/ies/ANIISOTROPIC/QUAD_SYMMETRY/0275171fb664c1b3f024d1e442a68d22.ies", + "../media/mitsuba/ies/ANIISOTROPIC/HALF_SYMMETRY/1392a1ba55b67d3e0ae7fd63527f3e78.ies", + "../media/mitsuba/ies/ANIISOTROPIC/OTHER_HALF_SYMMETRY/028e97564391140b1476695ae7a46fa4.ies", + "../media/mitsuba/ies/NO_LATERAL_SYMMET/4b88bf886b39cfa63094e70e1afa680e.ies" + ], + "gui": true, + "writeAssets": false +} \ No newline at end of file diff --git a/50.IESProfileTest/main.cpp b/50.IESProfileTest/main.cpp index 79106697b..7aa640f67 100644 --- a/50.IESProfileTest/main.cpp +++ b/50.IESProfileTest/main.cpp @@ -2,14 +2,37 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h +#define BENCHMARK_TILL_FIRST_FRAME + #include #include #include #include "nbl/ext/ScreenShot/ScreenShot.h" #include "compute/common.h" +#include + +// small hack to compile with the json library +namespace std +{ + int sprintf_s(char* buffer, size_t size, const char* format, ...) { + va_list args; + va_start(args, format); + int result = ::sprintf_s(buffer, size, format, args); + va_end(args); + return result; + } +} + +#include "nlohmann/json.hpp" using namespace nbl; using namespace core; +using json = nlohmann::json; + +#ifdef BENCHMARK_TILL_FIRST_FRAME +const std::chrono::steady_clock::time_point startBenchmark = std::chrono::high_resolution_clock::now(); +bool stopBenchamrkFlag = false; +#endif class IESCompute { @@ -71,7 +94,7 @@ class IESCompute driver->bindDescriptorSets(EPBP_COMPUTE, gpue.cPipeline->getLayout(), 0u, 1u, &gpue.cDescriptorSet.get(), nullptr); driver->pushConstants(gpue.cPipeline->getLayout(), asset::ISpecializedShader::ESS_COMPUTE, 0u, sizeof(PushConstant), &pushConstant); - _NBL_STATIC_INLINE_CONSTEXPR auto xGroups = (TEXTURE_SIZE - 1u) / WORKGROUP_DIMENSION + 1u; + const auto xGroups = (getActiveProfile().getOptimalIESResolution().x - 1u) / WORKGROUP_DIMENSION + 1u; driver->dispatch(xGroups, xGroups, 1u); COpenGLExtensionHandler::extGlMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); @@ -91,6 +114,16 @@ class IESCompute { driver->blitRenderTargets(fbo, nullptr, false, false); driver->endScene(); + + #ifdef BENCHMARK_TILL_FIRST_FRAME + if (!stopBenchamrkFlag) + { + const std::chrono::steady_clock::time_point stopBenchmark = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(stopBenchmark - startBenchmark); + std::cout << "Time taken till first render pass: " << duration.count() << " milliseconds" << std::endl; + stopBenchamrkFlag = true; + } + #endif } void updateZDegree(const asset::CIESProfile::IES_STORAGE_FORMAT& degreeOffset) @@ -109,8 +142,11 @@ class IESCompute generalPurposeOffset = newOffset; // not elegant way to do it here but lets leave it as it is - updateCDescriptorSets(); - pushConstant.maxIValueReciprocal = (float)getActiveProfile().getMaxCandelaValue(); + updateCDescriptorSets(); // flush descriptor set + updateGDescriptorSets(); // flush descriptor set + + const auto& profile = getActiveProfile(); + pushConstant.maxIValue = (float)profile.getMaxCandelaValue(); } } @@ -143,18 +179,12 @@ class IESCompute }; auto& gpue = m_gpue; - - gpue.dImageIESC = std::move(createGPUImageView(TEXTURE_SIZE, TEXTURE_SIZE)); - gpue.dImageS = std::move(createGPUImageView(TEXTURE_SIZE, TEXTURE_SIZE)); - gpue.dImageD = std::move(createGPUImageView(TEXTURE_SIZE, TEXTURE_SIZE)); - gpue.dImageTMask = std::move(createGPUImageView(TEXTURE_SIZE, TEXTURE_SIZE)); - - createSSBOBuffers(); + createGPUDescriptors(); + const auto initIdx = generalPurposeOffset; // Compute { const std::vector bindings = getCBindings(); - { auto descriptorSetLayout = driver->createGPUDescriptorSetLayout(bindings.data(), bindings.data() + bindings.size()); asset::SPushConstantRange range = { asset::ISpecializedShader::ESS_COMPUTE, 0u, sizeof(PushConstant) }; @@ -164,22 +194,6 @@ class IESCompute } { - { - { - gpue.cinfos[EB_IMAGE_IES_C].desc = core::smart_refctd_ptr(gpue.dImageIESC); - gpue.cinfos[EB_IMAGE_IES_C].image = { nullptr, asset::EIL_GENERAL }; - - gpue.cinfos[EB_IMAGE_S].desc = core::smart_refctd_ptr(gpue.dImageS); - gpue.cinfos[EB_IMAGE_S].image = { nullptr, asset::EIL_GENERAL }; - - gpue.cinfos[EB_IMAGE_D].desc = core::smart_refctd_ptr(gpue.dImageD); - gpue.cinfos[EB_IMAGE_D].image = { nullptr, asset::EIL_GENERAL }; - - gpue.cinfos[EB_IMAGE_T_MASK].desc = core::smart_refctd_ptr(gpue.dImageTMask); - gpue.cinfos[EB_IMAGE_T_MASK].image = { nullptr, asset::EIL_GENERAL }; - } - } - for (auto i = 0; i < EB_SIZE; i++) { gpue.cwrites[i].dstSet = gpue.cDescriptorSet.get(); @@ -203,14 +217,7 @@ class IESCompute // Graphics { - const std::vector bindings = - { - { EB_IMAGE_IES_C, asset::EDT_COMBINED_IMAGE_SAMPLER, 1, asset::ISpecializedShader::ESS_FRAGMENT, nullptr }, - { EB_IMAGE_S, asset::EDT_COMBINED_IMAGE_SAMPLER, 1, asset::ISpecializedShader::ESS_FRAGMENT, nullptr }, - { EB_IMAGE_D, asset::EDT_COMBINED_IMAGE_SAMPLER, 1, asset::ISpecializedShader::ESS_FRAGMENT, nullptr }, - { EB_IMAGE_T_MASK, asset::EDT_COMBINED_IMAGE_SAMPLER, 1, asset::ISpecializedShader::ESS_FRAGMENT, nullptr } - }; - + const std::vector bindings = getGBindings(); { auto descriptorSetLayout = driver->createGPUDescriptorSetLayout(bindings.data(), bindings.data() + bindings.size()); @@ -238,39 +245,23 @@ class IESCompute return driver->createGPUSampler({ asset::ISampler::ETC_CLAMP_TO_EDGE,asset::ISampler::ETC_CLAMP_TO_EDGE,asset::ISampler::ETC_CLAMP_TO_EDGE,asset::ISampler::ETBC_FLOAT_OPAQUE_BLACK,asset::ISampler::ETF_LINEAR,asset::ISampler::ETF_LINEAR,asset::ISampler::ESMM_LINEAR,0u,false,asset::ECO_ALWAYS }); }; - _NBL_STATIC_INLINE_CONSTEXPR uint8_t NBL_D_IMAGES_AMOUNT = 4u; + gpue.sampler = createSampler(); - IGPUDescriptorSet::SDescriptorInfo infos[NBL_D_IMAGES_AMOUNT]; + for (auto i = 0; i < gpue.NBL_D_IMAGES_AMOUNT; i++) { - infos[EB_IMAGE_IES_C].desc = core::smart_refctd_ptr(gpue.dImageIESC); - infos[EB_IMAGE_IES_C].image = { createSampler(),asset::EIL_SHADER_READ_ONLY_OPTIMAL}; - - infos[EB_IMAGE_S].desc = core::smart_refctd_ptr(gpue.dImageS); - infos[EB_IMAGE_S].image = { createSampler(),asset::EIL_SHADER_READ_ONLY_OPTIMAL }; - - infos[EB_IMAGE_D].desc = core::smart_refctd_ptr(gpue.dImageD); - infos[EB_IMAGE_D].image = { createSampler(),asset::EIL_SHADER_READ_ONLY_OPTIMAL }; - - infos[EB_IMAGE_T_MASK].desc = core::smart_refctd_ptr(gpue.dImageTMask); - infos[EB_IMAGE_T_MASK].image = { createSampler(),asset::EIL_SHADER_READ_ONLY_OPTIMAL }; + gpue.gwrites[i].dstSet = gpue.gDescriptorSet.get(); + gpue.gwrites[i].binding = i; + gpue.gwrites[i].count = 1u; + gpue.gwrites[i].arrayElement = 0u; + gpue.gwrites[i].descriptorType = asset::EDT_COMBINED_IMAGE_SAMPLER; + gpue.gwrites[i].info = gpue.ginfos + i; } - video::IGPUDescriptorSet::SWriteDescriptorSet writes[NBL_D_IMAGES_AMOUNT]; - for (auto i = 0; i < NBL_D_IMAGES_AMOUNT; i++) - { - writes[i].dstSet = gpue.gDescriptorSet.get(); - writes[i].binding = i; - writes[i].count = 1u; - writes[i].arrayElement = 0u; - writes[i].descriptorType = asset::EDT_COMBINED_IMAGE_SAMPLER; - writes[i].info = &infos[i]; - } - - driver->updateDescriptorSets(NBL_D_IMAGES_AMOUNT, writes, 0u, nullptr); + updateGDescriptorSets(); } } - void createSSBOBuffers() + void createGPUDescriptors() { auto createCPUBuffer = [&](const auto& pInput) { @@ -293,22 +284,27 @@ class IESCompute cssbod.hAngles = createGPUBuffer(createCPUBuffer(profile.getHoriAngles())); cssbod.vAngles = createGPUBuffer(createCPUBuffer(profile.getVertAngles())); cssbod.data = createGPUBuffer(createCPUBuffer(profile.getData())); + + const auto optimalResolution = profile.getOptimalIESResolution(); + + cssbod.dImageIESC = std::move(createGPUImageView(optimalResolution.x, optimalResolution.y)); + cssbod.dImageS = std::move(createGPUImageView(optimalResolution.x, optimalResolution.y)); + cssbod.dImageD = std::move(createGPUImageView(optimalResolution.x, optimalResolution.y)); + cssbod.dImageTMask = std::move(createGPUImageView(optimalResolution.x, optimalResolution.y)); } } void updateCDescriptorSets() { + fillImageDescriptorInfo(generalPurposeOffset, m_gpue.cinfos[EB_IMAGE_IES_C]); + fillImageDescriptorInfo(generalPurposeOffset, m_gpue.cinfos[EB_IMAGE_S]); + fillImageDescriptorInfo(generalPurposeOffset, m_gpue.cinfos[EB_IMAGE_D]); + fillImageDescriptorInfo(generalPurposeOffset, m_gpue.cinfos[EB_IMAGE_T_MASK]); + fillSSBODescriptorInfo(generalPurposeOffset, m_gpue.cinfos[EB_SSBO_HA]); fillSSBODescriptorInfo(generalPurposeOffset, m_gpue.cinfos[EB_SSBO_VA]); fillSSBODescriptorInfo(generalPurposeOffset, m_gpue.cinfos[EB_SSBO_D]); - const std::vector bindings = getCBindings(); - { - auto descriptorSetLayout = driver->createGPUDescriptorSetLayout(bindings.data(), bindings.data() + bindings.size()); - asset::SPushConstantRange range = { asset::ISpecializedShader::ESS_COMPUTE, 0u, sizeof(PushConstant) }; - m_gpue.cDescriptorSet = driver->createGPUDescriptorSet(std::move(descriptorSetLayout)); // I guess it can be done better - } - const core::smart_refctd_ptr proxy(m_gpue.cPipeline->getLayout()->getDescriptorSetLayout(0)); m_gpue.cDescriptorSet = core::smart_refctd_ptr(driver->createGPUDescriptorSet(core::smart_refctd_ptr(proxy))); @@ -318,6 +314,22 @@ class IESCompute driver->updateDescriptorSets(EB_SIZE, m_gpue.cwrites, 0u, nullptr); } + void updateGDescriptorSets() + { + fillImageDescriptorInfo(generalPurposeOffset, m_gpue.ginfos[EB_IMAGE_IES_C]); + fillImageDescriptorInfo(generalPurposeOffset, m_gpue.ginfos[EB_IMAGE_S]); + fillImageDescriptorInfo(generalPurposeOffset, m_gpue.ginfos[EB_IMAGE_D]); + fillImageDescriptorInfo(generalPurposeOffset, m_gpue.ginfos[EB_IMAGE_T_MASK]); + + const core::smart_refctd_ptr proxy(m_gpue.gPipeline->getLayout()->getDescriptorSetLayout(3)); + m_gpue.gDescriptorSet = core::smart_refctd_ptr(driver->createGPUDescriptorSet(core::smart_refctd_ptr(proxy))); + + for (auto i = 0; i < m_gpue.NBL_D_IMAGES_AMOUNT; i++) + m_gpue.gwrites[i].dstSet = m_gpue.gDescriptorSet.get(); + + driver->updateDescriptorSets(m_gpue.NBL_D_IMAGES_AMOUNT, m_gpue.gwrites, 0u, nullptr); + } + template void fillSSBODescriptorInfo(const size_t assetIndex, IGPUDescriptorSet::SDescriptorInfo& info) { @@ -339,6 +351,29 @@ class IESCompute info.buffer = { 0, proxy->getSize() }; } + template + void fillImageDescriptorInfo(const size_t assetIndex, IGPUDescriptorSet::SDescriptorInfo& info) + { + static_assert(binding == EB_IMAGE_IES_C || binding == EB_IMAGE_S || binding == EB_IMAGE_D || binding == EB_IMAGE_T_MASK); + + const auto& profile = getProfile(assetIndex); + auto& cssbod = m_gpue.CSSBOD[assetIndex]; + + core::smart_refctd_ptr proxy; + + if constexpr (binding == EB_IMAGE_IES_C) + proxy = core::smart_refctd_ptr(cssbod.dImageIESC); + else if (binding == EB_IMAGE_S) + proxy = core::smart_refctd_ptr(cssbod.dImageS); + else if (binding == EB_IMAGE_D) + proxy = core::smart_refctd_ptr(cssbod.dImageD); + else + proxy = core::smart_refctd_ptr(cssbod.dImageTMask); + + info.desc = core::smart_refctd_ptr(proxy); + info.image = { core::smart_refctd_ptr(m_gpue.sampler), asset::EIL_SHADER_READ_ONLY_OPTIMAL }; + } + template auto createGPUImageView(const size_t& width, const size_t& height) { @@ -385,6 +420,19 @@ class IESCompute return bindings; } + std::vector getGBindings() + { + const std::vector bindings = + { + { EB_IMAGE_IES_C, asset::EDT_COMBINED_IMAGE_SAMPLER, 1, asset::ISpecializedShader::ESS_FRAGMENT, nullptr }, + { EB_IMAGE_S, asset::EDT_COMBINED_IMAGE_SAMPLER, 1, asset::ISpecializedShader::ESS_FRAGMENT, nullptr }, + { EB_IMAGE_D, asset::EDT_COMBINED_IMAGE_SAMPLER, 1, asset::ISpecializedShader::ESS_FRAGMENT, nullptr }, + { EB_IMAGE_T_MASK, asset::EDT_COMBINED_IMAGE_SAMPLER, 1, asset::ISpecializedShader::ESS_FRAGMENT, nullptr } + }; + + return bindings; + } + template video::IFrameBuffer* createFBO(const size_t& width, const size_t& height) { @@ -401,6 +449,8 @@ class IESCompute struct GPUE { + _NBL_STATIC_INLINE_CONSTEXPR uint8_t NBL_D_IMAGES_AMOUNT = 4u; + // Compute core::smart_refctd_ptr cPipeline; core::smart_refctd_ptr cDescriptorSet; @@ -411,6 +461,7 @@ class IESCompute struct CSSBODescriptor { core::smart_refctd_ptr vAngles, hAngles, data; + core::smart_refctd_ptr dImageIESC, dImageS, dImageD, dImageTMask; }; std::vector CSSBOD; @@ -420,20 +471,19 @@ class IESCompute core::smart_refctd_ptr gDescriptorSet; core::smart_refctd_ptr mBuffer; + IGPUDescriptorSet::SDescriptorInfo ginfos[NBL_D_IMAGES_AMOUNT]; + IGPUDescriptorSet::SWriteDescriptorSet gwrites[NBL_D_IMAGES_AMOUNT]; + // Shared data - core::smart_refctd_ptr dImageIESC; - core::smart_refctd_ptr dImageS; - core::smart_refctd_ptr dImageD; - core::smart_refctd_ptr dImageTMask; + core::smart_refctd_ptr sampler; } m_gpue; #include "nbl/nblpack.h" struct PushConstant { - float maxIValueReciprocal; + float maxIValue; float zAngleDegreeRotation; IESCompute::E_MODE mode = IESCompute::EM_CDC; - uint32_t dummy; } PACK_STRUCT; #include "nbl/nblunpack.h" @@ -544,38 +594,149 @@ int main() asset::IAssetLoader::SAssetLoadParams lparams; lparams.loaderFlags; - - constexpr auto IES_INPUTS = std::array - { - std::string_view("../../media/mitsuba/ies/ISOTROPIC/007cfb11e343e2f42e3b476be4ab684e.ies"), - std::string_view("../../media/mitsuba/ies/ANIISOTROPIC/QUAD_SYMMETRY/0275171fb664c1b3f024d1e442a68d22.ies"), - std::string_view("../../media/mitsuba/ies/ANIISOTROPIC/HALF_SYMMETRY/1392a1ba55b67d3e0ae7fd63527f3e78.ies"), - std::string_view("../../media/mitsuba/ies/ANIISOTROPIC/OTHER_HALF_SYMMETRY/028e97564391140b1476695ae7a46fa4.ies"), - std::string_view("../../media/mitsuba/ies/NO_LATERAL_SYMMET/4b88bf886b39cfa63094e70e1afa680e.ies"), + + auto readJSON = [](const std::string& filePath) + { + std::ifstream file(filePath.data()); + if (!file.is_open()) { + printf("Invalid input json \"%s\" file! Aborting..", filePath.data()); + exit(0x45); + } + + std::stringstream buffer; + buffer << file.rdbuf(); + + return buffer.str(); }; + const auto INPUT_JSON_FILE_PATH_FS = std::filesystem::absolute("../inputs.json"); + const auto INPUT_JSON_FILE_PATH = INPUT_JSON_FILE_PATH_FS.string(); + const auto jsonBuffer = readJSON(INPUT_JSON_FILE_PATH); + if (jsonBuffer.empty()) { + printf("Read input json \"%s\" file is empty! Aborting..\n", INPUT_JSON_FILE_PATH.c_str()); + exit(0x45); + } + + const auto jsonMap = json::parse(jsonBuffer.c_str()); + + if (!jsonMap["directories"].is_array()) + { + printf("Input json \"%s\" file's field \"directories\" is not an array! Aborting..\n", INPUT_JSON_FILE_PATH.c_str()); + exit(0x45); + } + + if (!jsonMap["files"].is_array()) + { + printf("Input json \"%s\" file's field \"files\" is not an array! Aborting..\n", INPUT_JSON_FILE_PATH.c_str()); + exit(0x45); + } + + if (!jsonMap["writeAssets"].is_boolean()) + { + printf("Input json \"%s\" file's field \"writeAssets\" is not a boolean! Aborting..\n", INPUT_JSON_FILE_PATH.c_str()); + exit(0x45); + } + + const auto&& IES_INPUTS = [&]() + { + std::vector inputFilePaths; + + auto addFile = [&inputFilePaths, &INPUT_JSON_FILE_PATH_FS](const std::string_view filePath) -> void + { + auto path = std::filesystem::path(filePath); + + if (!path.is_absolute()) + path = std::filesystem::absolute(INPUT_JSON_FILE_PATH_FS.parent_path() / path); + + if (std::filesystem::exists(path) && std::filesystem::is_regular_file(path) && path.extension() == ".ies") + inputFilePaths.push_back(path.string()); + else + { + printf("Invalid input path \"%s\"! Aborting..\n", path.string().c_str()); + exit(0x45); + } + }; + + auto addFiles = [&inputFilePaths, &INPUT_JSON_FILE_PATH_FS, &addFile](const std::string_view directoryPath) -> void + { + auto directory(std::filesystem::absolute(INPUT_JSON_FILE_PATH_FS.parent_path() / directoryPath)); + if (!std::filesystem::exists(directory) || !std::filesystem::is_directory(directory)) { + printf("Invalid input directory \"%s\"! Aborting..\n", directoryPath.data()); + exit(0x45); + } + + for (const auto& entry : std::filesystem::directory_iterator(directory)) + addFile(entry.path().string().c_str()); + }; + + // parse json + { + std::vector jDirectories; + jsonMap["directories"].get_to(jDirectories); + + for (const auto& it : jDirectories) + addFiles(it); + + std::vector jFiles; + jsonMap["files"].get_to(jFiles); + + for (const auto& it : jFiles) + addFile(it); + } + + return std::move(inputFilePaths); + }(); + + const bool GUI = [&]() + { + bool b = false; + jsonMap["gui"].get_to(b); + + return b; + }(); + + const bool WRITE_ASSETS = [&]() + { + bool b = false; + jsonMap["writeAssets"].get_to(b); + + return b; + }(); + const auto ASSETS = [&]() { + size_t loaded = {}, total = IES_INPUTS.size(); std::vector assets; std::vector outStems; - for (size_t i = 0; i < IES_INPUTS.size(); ++i) + for (size_t i = 0; i < total; ++i) { - auto asset = device->getAssetManager()->getAsset(IES_INPUTS[i].data(), lparams); - const auto stem = std::filesystem::path(IES_INPUTS[i].data()).stem().string(); + auto asset = device->getAssetManager()->getAsset(IES_INPUTS[i].c_str(), lparams); + const auto* path = IES_INPUTS[i].c_str(); + const auto stem = std::filesystem::path(IES_INPUTS[i].c_str()).stem().string(); if (asset.getMetadata()) { assets.emplace_back(std::move(asset)); outStems.push_back(stem); + ++loaded; } else - printf("Could not load metadata from \"%s\" asset! Skipping..", stem.c_str()); + printf("Could not load metadata from \"%s\" asset! Skipping..\n", path); } + printf("Loaded [%s/%s] assets! Status: %s\n", std::to_string(loaded).c_str(), std::to_string(total).c_str(), loaded == total ? "PASSING" : "FAILING"); return std::make_pair(assets, outStems); }(); + if (GUI) + printf("GUI Mode: ON\n"); + else + { + printf("GUI Mode: OFF\nExiting..."); + exit(0); + } + IESCompute iesComputeEnvironment(driver, am, ASSETS.first); IESExampleEventReceiver receiver; device->setEventReceiver(&receiver); @@ -640,22 +801,23 @@ int main() receiver.reset(); } - for (size_t i = 0; i < ASSETS.first.size(); ++i) - { - const auto& bundle = ASSETS.first[i]; - const auto& stem = ASSETS.second[i]; + if(WRITE_ASSETS) + for (size_t i = 0; i < ASSETS.first.size(); ++i) + { + const auto& bundle = ASSETS.first[i]; + const auto& stem = ASSETS.second[i]; - const auto& profile = bundle.getMetadata()->selfCast()->profile; - // const std::string out = std::filesystem::absolute("out/cpu/" + std::string(getProfileRS(profile)) + "/" + stem + ".png").string(); TODO (?): why its not working? - const std::string out = std::filesystem::absolute(std::string(getProfileRS(profile)) + "_" + stem + ".png").string(); + const auto& profile = bundle.getMetadata()->selfCast()->profile; + // const std::string out = std::filesystem::absolute("out/cpu/" + std::string(getProfileRS(profile)) + "/" + stem + ".png").string(); TODO (?): why its not working? ah touch required probably first + const std::string out = std::filesystem::absolute(std::string(getProfileRS(profile)) + "_" + stem + ".png").string(); - asset::IAssetWriter::SAssetWriteParams wparams(bundle.getContents().begin()->get()); + asset::IAssetWriter::SAssetWriteParams wparams(bundle.getContents().begin()->get()); - if (am->writeAsset(out.c_str(), wparams)) - printf("Saved \"%s\"\n", out.c_str()); - else - printf("Could not write \"%s\"\n", out.c_str()); - } + if (am->writeAsset(out.c_str(), wparams)) + printf("Saved \"%s\"\n", out.c_str()); + else + printf("Could not write \"%s\"\n", out.c_str()); + } return 0; } \ No newline at end of file diff --git a/50.IESProfileTest/test.ies b/50.IESProfileTest/test.ies deleted file mode 100644 index 8e00804c3..000000000 --- a/50.IESProfileTest/test.ies +++ /dev/null @@ -1,30 +0,0 @@ -IESNA:LM-63-1995 -[TEST] -[TESTLAB] BEGA -[MANUFAC] BEGA -[MORE] Copyright LUMCat V -[LUMCAT] -[LUMINAIRE] 84483K3 (Preliminary) -[ISSUEDATE] 2020-07-22 -[LAMPCAT] LED 24W -[LAMP] 2500 lm,27 W -TILT=NONE -1 -1 1.0 73 1 1 2 -0.485 0.000 0.130 -1.0 1.0 27 - 0.0 2.5 5.0 7.5 10.0 12.5 15.0 17.5 20.0 22.5 25.0 27.5 30.0 - 32.5 35.0 37.5 40.0 42.5 45.0 47.5 50.0 52.5 55.0 57.5 60.0 62.5 - 65.0 67.5 70.0 72.5 75.0 77.5 80.0 82.5 85.0 87.5 90.0 92.5 95.0 - 97.5 100.0 102.5 105.0 107.5 110.0 112.5 115.0 117.5 120.0 122.5 125.0 127.5 - 130.0 132.5 135.0 137.5 140.0 142.5 145.0 147.5 150.0 152.5 155.0 157.5 160.0 - 162.5 165.0 167.5 170.0 172.5 175.0 177.5 180.0 - 0.0 - 688.3 686.8 684.0 680.3 675.3 668.8 660.9 650.7 - 638.6 624.9 609.6 593.0 575.2 556.3 536.5 516.3 - 495.7 475.4 455.7 436.5 417.0 397.4 378.0 359.1 - 340.8 322.9 305.3 287.9 270.9 253.9 237.1 220.5 - 204.1 187.8 171.8 156.0 140.5 125.4 110.7 96.4 - 82.6 69.3 56.5 44.6 33.6 23.7 15.3 8.7 - 4.1 1.4 0.3 0.1 0.1 0.0 0.0 0.0 - 0.0 0.0 0.0 0.0 0.1 0.2 0.4 0.6 - 1.0 1.3 1.4 1.4 1.4 1.3 0.8 0.5 - 0.4 diff --git a/media b/media index 6f5346ff8..ad2cb3a9a 160000 --- a/media +++ b/media @@ -1 +1 @@ -Subproject commit 6f5346ff8f20f0bedeaa9c58a715ab4d6fce661c +Subproject commit ad2cb3a9a1655c5c4d0ffa1c515f710568f0487d