diff --git a/include/nbl/asset/ECommonEnums.h b/include/nbl/asset/ECommonEnums.h index c07a0ced6a..f830b270d3 100644 --- a/include/nbl/asset/ECommonEnums.h +++ b/include/nbl/asset/ECommonEnums.h @@ -185,292 +185,13 @@ struct SMemoryBarrier } }; -inline core::bitflag allPreviousStages(core::bitflag stages) -{ - struct PerStagePreviousStages - { - public: - constexpr PerStagePreviousStages() - { - // set all stage to have itself as their previous stages - for (auto i = 0; i < std::numeric_limits::digits; i++) - data[i] = static_cast(i); - - add(PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT); - - add(PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT, PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT); - - // graphics primitive pipeline - PIPELINE_STAGE_FLAGS primitivePrevStage = PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT; - for (auto pipelineStage : {PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_CONTROL_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_EVALUATION_SHADER_BIT, PIPELINE_STAGE_FLAGS::GEOMETRY_SHADER_BIT, PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT, PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT}) - { - if (pipelineStage == PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT) - primitivePrevStage |= PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT; - add(pipelineStage, primitivePrevStage); - primitivePrevStage |= pipelineStage; - } - - - } - constexpr const auto& operator[](const size_t ix) const {return data[ix];} - - private: - constexpr static uint8_t findLSB(size_t val) - { - for (size_t ix=0ull; ix(stageFlag)); - data[bitIx] |= previousStageFlags; - } - - PIPELINE_STAGE_FLAGS data[std::numeric_limits>::digits] = {}; - }; - - constexpr PerStagePreviousStages bitToAccess = {}; - - core::bitflag retval = PIPELINE_STAGE_FLAGS::NONE; - while (bool(stages.value)) - { - const auto bitIx = hlsl::findLSB(stages); - retval |= bitToAccess[bitIx]; - stages ^= static_cast(0x1u< allLaterStages(core::bitflag stages) -{ - struct PerStageLaterStages - { - public: - constexpr PerStageLaterStages() - { - // set all stage to have itself as their next stages - for (auto i = 0; i < std::numeric_limits::digits; i++) - data[i] = static_cast(i); - - add(PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT, PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT); - add(PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT, PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT); - - // graphics primitive pipeline - PIPELINE_STAGE_FLAGS laterStage = PIPELINE_STAGE_FLAGS::NONE; - const auto graphicsPrimitivePipelineOrders = std::array{ PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT, PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_CONTROL_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_EVALUATION_SHADER_BIT, PIPELINE_STAGE_FLAGS::GEOMETRY_SHADER_BIT, PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT, PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT }; - for (auto iter = graphicsPrimitivePipelineOrders.rbegin(); iter < graphicsPrimitivePipelineOrders.rend(); iter++) - { - const auto pipelineStage = *iter; - add(pipelineStage, laterStage); - laterStage |= pipelineStage; - } - - add(PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT, PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT); - } - constexpr const auto& operator[](const size_t ix) const {return data[ix];} - - private: - constexpr static uint8_t findLSB(size_t val) - { - for (size_t ix=0ull; ix(stageFlag)); - data[bitIx] |= laterStageFlags; - } - - PIPELINE_STAGE_FLAGS data[std::numeric_limits>::digits] = {}; - }; - - constexpr PerStageLaterStages bitToAccess = {}; - - core::bitflag retval = PIPELINE_STAGE_FLAGS::NONE; - while (bool(stages.value)) - { - const auto bitIx = hlsl::findLSB(stages); - retval |= bitToAccess[bitIx]; - stages ^= static_cast(0x1u< allAccessesFromStages(core::bitflag stages) -{ - struct PerStageAccesses - { - public: - constexpr PerStageAccesses() - { - init(PIPELINE_STAGE_FLAGS::HOST_BIT,ACCESS_FLAGS::HOST_READ_BIT|ACCESS_FLAGS::HOST_WRITE_BIT); - - constexpr auto TransferRW = ACCESS_FLAGS::TRANSFER_READ_BIT|ACCESS_FLAGS::TRANSFER_WRITE_BIT; - init(PIPELINE_STAGE_FLAGS::COPY_BIT,TransferRW); - init(PIPELINE_STAGE_FLAGS::CLEAR_BIT,ACCESS_FLAGS::TRANSFER_WRITE_BIT); - - constexpr auto MicromapRead = ACCESS_FLAGS::SHADER_READ_BITS;//|ACCESS_FLAGS::MICROMAP_READ_BIT; -// init(PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT,MicromapRead|ACCESS_FLAGS::MICROMAP_WRITE_BIT); // can micromaps be built indirectly? - - constexpr auto AccelerationStructureRW = ACCESS_FLAGS::ACCELERATION_STRUCTURE_READ_BIT|ACCESS_FLAGS::ACCELERATION_STRUCTURE_WRITE_BIT; - init(PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_COPY_BIT,TransferRW|AccelerationStructureRW); - init(PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT,ACCESS_FLAGS::INDIRECT_COMMAND_READ_BIT|MicromapRead|AccelerationStructureRW); - - init(PIPELINE_STAGE_FLAGS::COMMAND_PREPROCESS_BIT,ACCESS_FLAGS::COMMAND_PREPROCESS_READ_BIT|ACCESS_FLAGS::COMMAND_PREPROCESS_WRITE_BIT); - init(PIPELINE_STAGE_FLAGS::CONDITIONAL_RENDERING_BIT,ACCESS_FLAGS::CONDITIONAL_RENDERING_READ_BIT); - init(PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT,ACCESS_FLAGS::INDIRECT_COMMAND_READ_BIT); - - constexpr auto ShaderRW = ACCESS_FLAGS::SHADER_READ_BITS|ACCESS_FLAGS::SHADER_WRITE_BITS; - constexpr auto AllShaderStagesRW = ShaderRW^(ACCESS_FLAGS::INPUT_ATTACHMENT_READ_BIT|ACCESS_FLAGS::SHADER_BINDING_TABLE_READ_BIT); - init(PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT,AllShaderStagesRW); - init(PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT,ACCESS_FLAGS::INDEX_READ_BIT); - init(PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT,ACCESS_FLAGS::VERTEX_ATTRIBUTE_READ_BIT); - init(PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT,AllShaderStagesRW); - init(PIPELINE_STAGE_FLAGS::TESSELLATION_CONTROL_SHADER_BIT,AllShaderStagesRW); - init(PIPELINE_STAGE_FLAGS::TESSELLATION_EVALUATION_SHADER_BIT,AllShaderStagesRW); - init(PIPELINE_STAGE_FLAGS::GEOMETRY_SHADER_BIT,AllShaderStagesRW); -// init(PIPELINE_STAGE_FLAGS::TASK_SHADER_BIT,AllShaderStagesRW); -// init(PIPELINE_STAGE_FLAGS::MESH_SHADER_BIT,AllShaderStagesRW); - init(PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT,ACCESS_FLAGS::FRAGMENT_DENSITY_MAP_READ_BIT); - init(PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT,ACCESS_FLAGS::SHADING_RATE_ATTACHMENT_READ_BIT); - constexpr auto DepthStencilRW = ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_READ_BIT|ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - init(PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT,DepthStencilRW); - init(PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT,AllShaderStagesRW|ACCESS_FLAGS::INPUT_ATTACHMENT_READ_BIT); - init(PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT,DepthStencilRW); - init(PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT,ACCESS_FLAGS::COLOR_ATTACHMENT_READ_BIT|ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT); - - init(PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT,AllShaderStagesRW|ACCESS_FLAGS::SHADER_BINDING_TABLE_READ_BIT); - - init(PIPELINE_STAGE_FLAGS::RESOLVE_BIT,TransferRW); - init(PIPELINE_STAGE_FLAGS::BLIT_BIT,TransferRW); - -// init(PIPELINE_STAGE_FLAGS::VIDEO_DECODE_BIT,ACCESS_FLAGS::VIDEO_DECODE_READ_BIT|ACCESS_FLAGS::VIDEO_DECODE_WRITE_BIT); -// init(PIPELINE_STAGE_FLAGS::VIDEO_ENCODE_BIT,ACCESS_FLAGS::VIDEO_ENCODE_READ_BIT|ACCESS_FLAGS::VIDEO_ENCODE_WRITE_BIT); -// init(PIPELINE_STAGE_FLAGS::OPTICAL_FLOW_BIT,ACCESS_FLAGS::OPTICAL_FLOW_READ_BIT|ACCESS_FLAGS::OPTICAL_FLOW_WRITE_BIT); - } - constexpr const auto& operator[](const size_t ix) const {return data[ix];} +core::bitflag allPreviousStages(core::bitflag stages); - private: - constexpr static uint8_t findLSB(size_t val) - { - for (size_t ix=0ull; ix(stageFlag)); - data[bitIx] = accessFlags; - } +core::bitflag allLaterStages(core::bitflag stages); - ACCESS_FLAGS data[32] = {}; - }; - constexpr PerStageAccesses bitToAccess = {}; +core::bitflag allAccessesFromStages(core::bitflag stages); - // TODO: add logically later or previous stages to make sure all other accesses remain valid - // or ideally expand the stages before calling `allAccessesFromStages` (TODO: add a `allLaterStages` and `allPreviouStages` basically) - - core::bitflag retval = ACCESS_FLAGS::NONE; - while (bool(stages.value)) - { - const auto bitIx = hlsl::findLSB(stages); - retval |= bitToAccess[bitIx]; - stages ^= static_cast(0x1u< allStagesFromAccesses(core::bitflag accesses) -{ - struct PerAccessStages - { - public: - constexpr PerAccessStages() - { - init(ACCESS_FLAGS::HOST_READ_BIT,PIPELINE_STAGE_FLAGS::HOST_BIT); - init(ACCESS_FLAGS::HOST_WRITE_BIT,PIPELINE_STAGE_FLAGS::HOST_BIT); - - init(ACCESS_FLAGS::TRANSFER_READ_BIT,PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS^PIPELINE_STAGE_FLAGS::CLEAR_BIT); - init(ACCESS_FLAGS::TRANSFER_WRITE_BIT,PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS); - - constexpr auto MicromapAccelerationStructureBuilds = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT;//|PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT; -// init(ACCESS_FLAGS::MICROMAP_READ_BIT,MicromapAccelerationStructureBuilds); -// init(ACCESS_FLAGS::MICROMAP_WRITE_BIT,PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT); - - constexpr auto AllShaders = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT|PIPELINE_STAGE_FLAGS::PRE_RASTERIZATION_SHADERS_BITS|PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT|PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT; - constexpr auto AccelerationStructureOperations = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_COPY_BIT|PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT; - init(ACCESS_FLAGS::ACCELERATION_STRUCTURE_READ_BIT,AccelerationStructureOperations|AllShaders); - init(ACCESS_FLAGS::ACCELERATION_STRUCTURE_WRITE_BIT,AccelerationStructureOperations); - - init(ACCESS_FLAGS::COMMAND_PREPROCESS_READ_BIT,PIPELINE_STAGE_FLAGS::COMMAND_PREPROCESS_BIT); - init(ACCESS_FLAGS::COMMAND_PREPROCESS_WRITE_BIT,PIPELINE_STAGE_FLAGS::COMMAND_PREPROCESS_BIT); - init(ACCESS_FLAGS::CONDITIONAL_RENDERING_READ_BIT,PIPELINE_STAGE_FLAGS::CONDITIONAL_RENDERING_BIT); - init(ACCESS_FLAGS::INDIRECT_COMMAND_READ_BIT,PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT|PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT); - - init(ACCESS_FLAGS::UNIFORM_READ_BIT,AllShaders); - init(ACCESS_FLAGS::SAMPLED_READ_BIT,AllShaders);//|PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT); - init(ACCESS_FLAGS::STORAGE_READ_BIT,AllShaders|MicromapAccelerationStructureBuilds); - init(ACCESS_FLAGS::STORAGE_WRITE_BIT,AllShaders); - - init(ACCESS_FLAGS::INDEX_READ_BIT,PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT); - init(ACCESS_FLAGS::VERTEX_ATTRIBUTE_READ_BIT,PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT); - - init(ACCESS_FLAGS::FRAGMENT_DENSITY_MAP_READ_BIT,PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT); - init(ACCESS_FLAGS::SHADING_RATE_ATTACHMENT_READ_BIT,PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT); - constexpr auto FragmentTests = PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT|PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT; - init(ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_READ_BIT,FragmentTests); - init(ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,FragmentTests); - init(ACCESS_FLAGS::INPUT_ATTACHMENT_READ_BIT,PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT); - init(ACCESS_FLAGS::COLOR_ATTACHMENT_READ_BIT,PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT); - init(ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT,PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT); - - init(ACCESS_FLAGS::SHADER_BINDING_TABLE_READ_BIT,PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT); - -// init(ACCESS_FLAGS::VIDEO_DECODE_READ_BIT,PIPELINE_STAGE_FLAGS::VIDEO_DECODE_BIT); -// init(ACCESS_FLAGS::VIDEO_DECODE_WRITE_BIT,PIPELINE_STAGE_FLAGS::VIDEO_DECODE_BIT); -// init(ACCESS_FLAGS::VIDEO_ENCODE_READ_BIT,PIPELINE_STAGE_FLAGS::VIDEO_ENCODE_BIT); -// init(ACCESS_FLAGS::VIDEO_ENCODE_WRITE_BIT,PIPELINE_STAGE_FLAGS::VIDEO_ENCODE_BIT); -// init(ACCESS_FLAGS::OPTICAL_FLOW_READ_BIT,PIPELINE_STAGE_FLAGS::OPTICAL_FLOW_BIT); -// init(ACCESS_FLAGS::OPTICAL_FLOW_WRITE_BIT,PIPELINE_STAGE_FLAGS::OPTICAL_FLOW_BIT); - } - constexpr const auto& operator[](const size_t ix) const {return data[ix];} - - private: - constexpr static uint8_t findLSB(size_t val) - { - for (size_t ix=0ull; ix(accessFlags)); - data[bitIx] = stageFlags; - } - - PIPELINE_STAGE_FLAGS data[32] = {}; - }; - constexpr PerAccessStages bitToStage = {}; - - core::bitflag retval = PIPELINE_STAGE_FLAGS::NONE; - while (bool(accesses.value)) - { - const auto bitIx = hlsl::findLSB(accesses); - retval |= bitToStage[bitIx]; - accesses ^= static_cast(0x1u< allStagesFromAccesses(core::bitflag accesses); } diff --git a/include/nbl/asset/IBuffer.h b/include/nbl/asset/IBuffer.h index 8c3b8f95ef..6f8c1bb35b 100644 --- a/include/nbl/asset/IBuffer.h +++ b/include/nbl/asset/IBuffer.h @@ -100,7 +100,7 @@ struct SBufferRange inline operator SBufferRange&() {return *reinterpret_cast*>(this);} inline operator const SBufferRange&() const {return *reinterpret_cast*>(this);} - template requires std::is_same_v,BufferType> + template requires (std::is_const_v && std::is_base_of_v>) inline operator SBufferBinding() const { return {.offset=offset,.buffer=buffer}; } explicit inline operator bool() const {return isValid();} diff --git a/include/nbl/asset/IPolygonGeometry.h b/include/nbl/asset/IPolygonGeometry.h index 4d021c178c..97a6cda7d0 100644 --- a/include/nbl/asset/IPolygonGeometry.h +++ b/include/nbl/asset/IPolygonGeometry.h @@ -203,6 +203,26 @@ class IPolygonGeometry : public IIndexableGeometry, public IPolygonG // For User defined semantics inline const core::vector& getAuxAttributeViews() const {return m_auxAttributeViews;} + inline E_INDEX_TYPE getIndexType() const + { + auto indexType = EIT_UNKNOWN; + // disallowed index format + if (base_t::m_indexView) + { + switch (base_t::m_indexView.composed.format) + { + case EF_R16_UINT: + indexType = EIT_16BIT; + break; + case EF_R32_UINT: [[fallthrough]]; + indexType = EIT_32BIT; + break; + default: + break; + } + } + return indexType; + } // Does not set the `transform` or `geometryFlags` fields, because it doesn't care about it. // Also won't set second set of vertex data, opacity mipmaps, etc. @@ -212,30 +232,12 @@ class IPolygonGeometry : public IIndexableGeometry, public IPolygonG // must be a triangle list, but don't want to compare pointers if (m_indexing && m_indexing->knownTopology()==EPT_TRIANGLE_LIST)// && m_indexing->degree() == TriangleList()->degree() && m_indexing->rate() == TriangleList->rate()) { - auto indexType = EIT_UNKNOWN; - // disallowed index format - if (base_t::m_indexView) - { - switch (base_t::m_indexView.composed.format) - { - case EF_R16_UINT: - indexType = EIT_16BIT; - break; - case EF_R32_UINT: [[fallthrough]]; - indexType = EIT_32BIT; - break; - default: - break; - } - if (indexType==EIT_UNKNOWN) - return retval; - } retval.vertexData[0] = base_t::m_positionView.src; retval.indexData = base_t::m_indexView.src; retval.maxVertex = base_t::m_positionView.getElementCount() - 1; retval.vertexStride = base_t::m_positionView.composed.getStride(); retval.vertexFormat = base_t::m_positionView.composed.format; - retval.indexType = indexType; + retval.indexType = getIndexType(); } return retval; } diff --git a/include/nbl/asset/utils/CDirQuantCacheBase.h b/include/nbl/asset/utils/CDirQuantCacheBase.h index 462d414a73..a0e656d50e 100644 --- a/include/nbl/asset/utils/CDirQuantCacheBase.h +++ b/include/nbl/asset/utils/CDirQuantCacheBase.h @@ -43,13 +43,13 @@ class CDirQuantCacheBase Vector8u3() : x(0u),y(0u),z(0u) {} Vector8u3(const Vector8u3&) = default; - explicit Vector8u3(const core::vectorSIMDu32& val) + explicit Vector8u3(const hlsl::float32_t3& val) { operator=(val); } Vector8u3& operator=(const Vector8u3&) = default; - Vector8u3& operator=(const core::vectorSIMDu32& val) + Vector8u3& operator=(const hlsl::float32_t3& val) { x = val.x; y = val.y; @@ -57,11 +57,12 @@ class CDirQuantCacheBase return *this; } - inline core::vectorSIMDu32 getValue() const + hlsl::float32_t3 getValue() const { - return core::vectorSIMDu32(x,y,z); + return { x, y, z }; } + private: uint8_t x; uint8_t y; @@ -74,24 +75,24 @@ class CDirQuantCacheBase Vector8u4() : x(0u),y(0u),z(0u),w(0u) {} Vector8u4(const Vector8u4&) = default; - explicit Vector8u4(const core::vectorSIMDu32& val) + explicit Vector8u4(const hlsl::float32_t3& val) { operator=(val); } Vector8u4& operator=(const Vector8u4&) = default; - Vector8u4& operator=(const core::vectorSIMDu32& val) + Vector8u4& operator=(const hlsl::float32_t3& val) { x = val.x; y = val.y; z = val.z; - w = val.w; + w = 0; return *this; } - inline core::vectorSIMDu32 getValue() const + hlsl::float32_t3 getValue() const { - return core::vectorSIMDu32(x,y,z,w); + return { x, y, z }; } private: @@ -108,16 +109,17 @@ class CDirQuantCacheBase Vector1010102() : storage(0u) {} Vector1010102(const Vector1010102&) = default; - explicit Vector1010102(const core::vectorSIMDu32& val) + explicit Vector1010102(const hlsl::float32_t3& val) { operator=(val); } Vector1010102& operator=(const Vector1010102&) = default; - Vector1010102& operator=(const core::vectorSIMDu32& val) + Vector1010102& operator=(const hlsl::float32_t3& val) { - constexpr auto storageBits = quantizationBits+1u; - storage = val.x|(val.y<>storageBits,storage>>(storageBits*2u))&mask; + constexpr auto storageBits = quantizationBits + 1u; + const auto mask = (0x1u << storageBits) - 1u; + return { storage & mask, (storage >> storageBits) & mask, (storage >> (storageBits * 2)) & mask}; } - + private: uint32_t storage; }; @@ -149,13 +151,13 @@ class CDirQuantCacheBase Vector16u3() : x(0u),y(0u),z(0u) {} Vector16u3(const Vector16u3&) = default; - explicit Vector16u3(const core::vectorSIMDu32& val) + explicit Vector16u3(const hlsl::float32_t3& val) { operator=(val); } Vector16u3& operator=(const Vector16u3&) = default; - Vector16u3& operator=(const core::vectorSIMDu32& val) + Vector16u3& operator=(const hlsl::float32_t3& val) { x = val.x; y = val.y; @@ -163,11 +165,11 @@ class CDirQuantCacheBase return *this; } - inline core::vectorSIMDu32 getValue() const + hlsl::float32_t3 getValue() const { - return core::vectorSIMDu32(x,y,z); + return { x, y, z }; } - + private: uint16_t x; uint16_t y; @@ -180,26 +182,26 @@ class CDirQuantCacheBase Vector16u4() : x(0u),y(0u),z(0u),w(0u) {} Vector16u4(const Vector16u4&) = default; - explicit Vector16u4(const core::vectorSIMDu32& val) + explicit Vector16u4(const hlsl::float32_t3& val) { operator=(val); } Vector16u4& operator=(const Vector16u4&) = default; - Vector16u4& operator=(const core::vectorSIMDu32& val) + Vector16u4& operator=(const hlsl::float32_t3& val) { x = val.x; y = val.y; z = val.z; - w = val.w; + w = 0; return *this; } - inline core::vectorSIMDu32 getValue() const + hlsl::float32_t3 getValue() const { - return core::vectorSIMDu32(x,y,z,w); + return { x, y, z }; } - + private: uint16_t x; uint16_t y; @@ -377,11 +379,11 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl:: std::tuple...> cache; template - value_type_t quantize(const core::vectorSIMDf& value) + value_type_t quantize(const hlsl::float32_t3& value) { - const auto negativeMask = value < core::vectorSIMDf(0.0f); + const auto negativeMask = lessThan(value, hlsl::float32_t3(0.0f)); - const core::vectorSIMDf absValue = abs(value); + const hlsl::float32_t3 absValue = abs(value); const auto key = Key(absValue); constexpr auto quantizationBits = quantization_bits_v; @@ -393,32 +395,31 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl:: quantized = found->second; else { - const core::vectorSIMDf fit = findBestFit(absValue); + const auto fit = findBestFit(absValue); - quantized = core::vectorSIMDu32(core::abs(fit)); + quantized = abs(fit); insertIntoCache(key,quantized); } } - const core::vectorSIMDu32 xorflag((0x1u<<(quantizationBits+1u))-1u); - auto restoredAsVec = quantized.getValue()^core::mix(core::vectorSIMDu32(0u),xorflag,negativeMask); - restoredAsVec += core::mix(core::vectorSIMDu32(0u),core::vectorSIMDu32(1u),negativeMask); - return value_type_t(restoredAsVec&xorflag); + //return quantized. + const auto negativeMulVec = hlsl::float32_t3(negativeMask.x ? -1 : 1, negativeMask.y ? -1 : 1, negativeMask.z ? -1 : 1); + return value_type_t(negativeMulVec * quantized.getValue()); } template - static inline core::vectorSIMDf findBestFit(const core::vectorSIMDf& value) + static inline hlsl::float32_t3 findBestFit(const hlsl::float32_t3& value) { static_assert(dimensions>1u,"No point"); static_assert(dimensions<=4u,"High Dimensions are Hard!"); - // precise normalize - const auto vectorForDots = value.preciseDivision(length(value)); + + const auto vectorForDots = hlsl::normalize(value); // - core::vectorSIMDf fittingVector; - core::vectorSIMDf floorOffset; + hlsl::float32_t3 fittingVector; + hlsl::float32_t3 floorOffset; constexpr uint32_t cornerCount = (0x1u<<(dimensions-1u))-1u; - core::vectorSIMDf corners[cornerCount] = {}; + hlsl::float32_t3 corners[cornerCount] = {}; { uint32_t maxDirCompIndex = 0u; for (auto i=1u; i void + auto evaluateFit = [&](const hlsl::float32_t3& newFit) -> void { - auto newFitLen = core::length(newFit); - const float dp = core::dot(newFit,vectorForDots).preciseDivision(newFitLen)[0]; + auto newFitLen = length(newFit); + const float dp = hlsl::dot(newFit,vectorForDots) / (newFitLen); if (dp > closestTo1) { closestTo1 = dp; @@ -466,18 +467,18 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl:: }; constexpr uint32_t cubeHalfSize = (0x1u << quantizationBits) - 1u; - const core::vectorSIMDf cubeHalfSizeND = core::vectorSIMDf(cubeHalfSize); + const hlsl::float32_t3 cubeHalfSizeND = hlsl::float32_t3(cubeHalfSize); for (uint32_t n=cubeHalfSize; n>0u; n--) { //we'd use float addition in the interest of speed, to increment the loop //but adding a small number to a large one loses precision, so multiplication preferrable - core::vectorSIMDf bottomFit = core::floor(fittingVector*float(n)+floorOffset); - if ((bottomFit<=cubeHalfSizeND).all()) + const auto bottomFit = floor(fittingVector * float(n) + floorOffset); + if (hlsl::all(glm::lessThanEqual(bottomFit, cubeHalfSizeND))) evaluateFit(bottomFit); - for (auto i=0u; i createArrow(const uint32_t tesselationCylinder = 4, + core::vector> createArrow(const uint32_t tesselationCylinder = 4, const uint32_t tesselationCone = 8, const float height = 1.f, const float cylinderHeight = 0.6f, const float widthCylinder = 0.05f, - const float widthCone = 0.3f, const video::SColor colorCylinder = 0xFFFFFFFF, - const video::SColor colorCone = 0xFFFFFFFF) const; + const float widthCone = 0.3f) const; //! Create a sphere mesh. @@ -87,7 +86,7 @@ class NBL_API2 CGeometryCreator final : public core::IReferenceCounted */ core::smart_refctd_ptr createCylinder(float radius, float length, uint32_t tesselation, - const video::SColor& color=video::SColor(0xffffffff), CQuantNormalCache* const quantNormalCacheOverride=nullptr) const; + CQuantNormalCache* const quantNormalCacheOverride=nullptr) const; //! Create a cone mesh. /** @@ -100,8 +99,6 @@ class NBL_API2 CGeometryCreator final : public core::IReferenceCounted \return Generated mesh. */ core::smart_refctd_ptr createCone(float radius, float length, uint32_t tesselation, - const video::SColor& colorTop=video::SColor(0xffffffff), - const video::SColor& colorBottom=video::SColor(0xffffffff), float oblique=0.f, CQuantNormalCache* const quantNormalCacheOverride=nullptr) const; core::smart_refctd_ptr createRectangle(const hlsl::float32_t2 size={0.5f,0.5f}) const; diff --git a/include/nbl/asset/utils/CQuantNormalCache.h b/include/nbl/asset/utils/CQuantNormalCache.h index 92703d9d37..31b7d403d2 100644 --- a/include/nbl/asset/utils/CQuantNormalCache.h +++ b/include/nbl/asset/utils/CQuantNormalCache.h @@ -19,7 +19,7 @@ namespace impl struct VectorUV { - inline VectorUV(const core::vectorSIMDf& absNormal) + inline VectorUV(const hlsl::float32_t3& absNormal) { const float rcpManhattanNorm = 1.f / (absNormal.x + absNormal.y + absNormal.z); u = absNormal.x * rcpManhattanNorm; @@ -56,9 +56,8 @@ class CQuantNormalCache : public CDirQuantCacheBase - value_type_t quantize(core::vectorSIMDf normal) + value_type_t quantize(hlsl::float32_t3 normal) { - normal.makeSafe3D(); return Base::quantize<3u,CacheFormat>(normal); } }; diff --git a/src/nbl/asset/ECommonEnums.cpp b/src/nbl/asset/ECommonEnums.cpp new file mode 100644 index 0000000000..0f23b9b3fc --- /dev/null +++ b/src/nbl/asset/ECommonEnums.cpp @@ -0,0 +1,281 @@ +#include "nbl/asset/ECommonEnums.h" + +namespace nbl::asset +{ + +constexpr static int32_t findLSB(size_t val) +{ + if constexpr(std::is_constant_evaluated()) + { + for (size_t ix=0ull; ix allPreviousStages(core::bitflag stages) +{ + struct PerStagePreviousStages + { + public: + constexpr PerStagePreviousStages() + { + // set all stage to have itself as their previous stages + for (auto i = 0; i < std::numeric_limits::digits; i++) + data[i] = static_cast(i); + + add(PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT); + + add(PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT, PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT); + + // graphics primitive pipeline + PIPELINE_STAGE_FLAGS primitivePrevStage = PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT; + for (auto pipelineStage : {PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_CONTROL_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_EVALUATION_SHADER_BIT, PIPELINE_STAGE_FLAGS::GEOMETRY_SHADER_BIT, PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT, PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT}) + { + if (pipelineStage == PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT) + primitivePrevStage |= PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT; + add(pipelineStage, primitivePrevStage); + primitivePrevStage |= pipelineStage; + } + + + } + constexpr const auto& operator[](const size_t ix) const {return data[ix];} + + private: + + constexpr void add(PIPELINE_STAGE_FLAGS stageFlag, PIPELINE_STAGE_FLAGS previousStageFlags) + { + const auto bitIx = findLSB(static_cast(stageFlag)); + data[bitIx] |= previousStageFlags; + } + + PIPELINE_STAGE_FLAGS data[std::numeric_limits>::digits] = {}; + }; + + constexpr PerStagePreviousStages bitToAccess = {}; + + core::bitflag retval = PIPELINE_STAGE_FLAGS::NONE; + while (bool(stages.value)) + { + const auto bitIx = findLSB(static_cast(stages.value)); + retval |= bitToAccess[bitIx]; + stages ^= static_cast(0x1u< allLaterStages(core::bitflag stages) +{ + struct PerStageLaterStages + { + public: + constexpr PerStageLaterStages() + { + // set all stage to have itself as their next stages + for (auto i = 0; i < std::numeric_limits::digits; i++) + data[i] = static_cast(i); + + add(PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT, PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT); + add(PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT, PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT); + + // graphics primitive pipeline + PIPELINE_STAGE_FLAGS laterStage = PIPELINE_STAGE_FLAGS::NONE; + const auto graphicsPrimitivePipelineOrders = std::array{ PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT, PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_CONTROL_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_EVALUATION_SHADER_BIT, PIPELINE_STAGE_FLAGS::GEOMETRY_SHADER_BIT, PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT, PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT }; + for (auto iter = graphicsPrimitivePipelineOrders.rbegin(); iter < graphicsPrimitivePipelineOrders.rend(); iter++) + { + const auto pipelineStage = *iter; + add(pipelineStage, laterStage); + laterStage |= pipelineStage; + } + + add(PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT, PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT); + } + constexpr const auto& operator[](const size_t ix) const {return data[ix];} + + private: + + constexpr void add(PIPELINE_STAGE_FLAGS stageFlag, PIPELINE_STAGE_FLAGS laterStageFlags) + { + const auto bitIx = findLSB(static_cast(stageFlag)); + data[bitIx] |= laterStageFlags; + } + + PIPELINE_STAGE_FLAGS data[std::numeric_limits>::digits] = {}; + }; + + constexpr PerStageLaterStages bitToAccess = {}; + + core::bitflag retval = PIPELINE_STAGE_FLAGS::NONE; + while (bool(stages.value)) + { + const auto bitIx = findLSB(static_cast(stages.value)); + retval |= bitToAccess[bitIx]; + stages ^= static_cast(0x1u< allAccessesFromStages(core::bitflag stages) +{ + struct PerStageAccesses + { + public: + constexpr PerStageAccesses() + { + init(PIPELINE_STAGE_FLAGS::HOST_BIT,ACCESS_FLAGS::HOST_READ_BIT|ACCESS_FLAGS::HOST_WRITE_BIT); + + constexpr auto TransferRW = ACCESS_FLAGS::TRANSFER_READ_BIT|ACCESS_FLAGS::TRANSFER_WRITE_BIT; + init(PIPELINE_STAGE_FLAGS::COPY_BIT,TransferRW); + init(PIPELINE_STAGE_FLAGS::CLEAR_BIT,ACCESS_FLAGS::TRANSFER_WRITE_BIT); + + constexpr auto MicromapRead = ACCESS_FLAGS::SHADER_READ_BITS;//|ACCESS_FLAGS::MICROMAP_READ_BIT; +// init(PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT,MicromapRead|ACCESS_FLAGS::MICROMAP_WRITE_BIT); // can micromaps be built indirectly? + + constexpr auto AccelerationStructureRW = ACCESS_FLAGS::ACCELERATION_STRUCTURE_READ_BIT|ACCESS_FLAGS::ACCELERATION_STRUCTURE_WRITE_BIT; + init(PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_COPY_BIT,TransferRW|AccelerationStructureRW); + init(PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT,ACCESS_FLAGS::INDIRECT_COMMAND_READ_BIT|MicromapRead|AccelerationStructureRW); + + init(PIPELINE_STAGE_FLAGS::COMMAND_PREPROCESS_BIT,ACCESS_FLAGS::COMMAND_PREPROCESS_READ_BIT|ACCESS_FLAGS::COMMAND_PREPROCESS_WRITE_BIT); + init(PIPELINE_STAGE_FLAGS::CONDITIONAL_RENDERING_BIT,ACCESS_FLAGS::CONDITIONAL_RENDERING_READ_BIT); + init(PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT,ACCESS_FLAGS::INDIRECT_COMMAND_READ_BIT); + + constexpr auto ShaderRW = ACCESS_FLAGS::SHADER_READ_BITS|ACCESS_FLAGS::SHADER_WRITE_BITS; + constexpr auto AllShaderStagesRW = ShaderRW^(ACCESS_FLAGS::INPUT_ATTACHMENT_READ_BIT|ACCESS_FLAGS::SHADER_BINDING_TABLE_READ_BIT); + init(PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT,AllShaderStagesRW); + init(PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT,ACCESS_FLAGS::INDEX_READ_BIT); + init(PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT,ACCESS_FLAGS::VERTEX_ATTRIBUTE_READ_BIT); + init(PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT,AllShaderStagesRW); + init(PIPELINE_STAGE_FLAGS::TESSELLATION_CONTROL_SHADER_BIT,AllShaderStagesRW); + init(PIPELINE_STAGE_FLAGS::TESSELLATION_EVALUATION_SHADER_BIT,AllShaderStagesRW); + init(PIPELINE_STAGE_FLAGS::GEOMETRY_SHADER_BIT,AllShaderStagesRW); +// init(PIPELINE_STAGE_FLAGS::TASK_SHADER_BIT,AllShaderStagesRW); +// init(PIPELINE_STAGE_FLAGS::MESH_SHADER_BIT,AllShaderStagesRW); + init(PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT,ACCESS_FLAGS::FRAGMENT_DENSITY_MAP_READ_BIT); + init(PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT,ACCESS_FLAGS::SHADING_RATE_ATTACHMENT_READ_BIT); + constexpr auto DepthStencilRW = ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_READ_BIT|ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + init(PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT,DepthStencilRW); + init(PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT,AllShaderStagesRW|ACCESS_FLAGS::INPUT_ATTACHMENT_READ_BIT); + init(PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT,DepthStencilRW); + init(PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT,ACCESS_FLAGS::COLOR_ATTACHMENT_READ_BIT|ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT); + + init(PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT,AllShaderStagesRW|ACCESS_FLAGS::SHADER_BINDING_TABLE_READ_BIT); + + init(PIPELINE_STAGE_FLAGS::RESOLVE_BIT,TransferRW); + init(PIPELINE_STAGE_FLAGS::BLIT_BIT,TransferRW); + +// init(PIPELINE_STAGE_FLAGS::VIDEO_DECODE_BIT,ACCESS_FLAGS::VIDEO_DECODE_READ_BIT|ACCESS_FLAGS::VIDEO_DECODE_WRITE_BIT); +// init(PIPELINE_STAGE_FLAGS::VIDEO_ENCODE_BIT,ACCESS_FLAGS::VIDEO_ENCODE_READ_BIT|ACCESS_FLAGS::VIDEO_ENCODE_WRITE_BIT); +// init(PIPELINE_STAGE_FLAGS::OPTICAL_FLOW_BIT,ACCESS_FLAGS::OPTICAL_FLOW_READ_BIT|ACCESS_FLAGS::OPTICAL_FLOW_WRITE_BIT); + } + constexpr const auto& operator[](const size_t ix) const {return data[ix];} + + private: + + constexpr void init(PIPELINE_STAGE_FLAGS stageFlag, ACCESS_FLAGS accessFlags) + { + const auto bitIx = findLSB(static_cast(stageFlag)); + data[bitIx] = accessFlags; + } + + ACCESS_FLAGS data[32] = {}; + }; + constexpr PerStageAccesses bitToAccess = {}; + + // TODO: add logically later or previous stages to make sure all other accesses remain valid + // or ideally expand the stages before calling `allAccessesFromStages` (TODO: add a `allLaterStages` and `allPreviouStages` basically) + + core::bitflag retval = ACCESS_FLAGS::NONE; + while (bool(stages.value)) + { + const auto bitIx = findLSB(static_cast(stages.value)); + retval |= bitToAccess[bitIx]; + stages ^= static_cast(0x1u< allStagesFromAccesses(core::bitflag accesses) +{ + struct PerAccessStages + { + public: + constexpr PerAccessStages() + { + init(ACCESS_FLAGS::HOST_READ_BIT,PIPELINE_STAGE_FLAGS::HOST_BIT); + init(ACCESS_FLAGS::HOST_WRITE_BIT,PIPELINE_STAGE_FLAGS::HOST_BIT); + + init(ACCESS_FLAGS::TRANSFER_READ_BIT,PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS^PIPELINE_STAGE_FLAGS::CLEAR_BIT); + init(ACCESS_FLAGS::TRANSFER_WRITE_BIT,PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS); + + constexpr auto MicromapAccelerationStructureBuilds = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT;//|PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT; +// init(ACCESS_FLAGS::MICROMAP_READ_BIT,MicromapAccelerationStructureBuilds); +// init(ACCESS_FLAGS::MICROMAP_WRITE_BIT,PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT); + + constexpr auto AllShaders = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT|PIPELINE_STAGE_FLAGS::PRE_RASTERIZATION_SHADERS_BITS|PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT|PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT; + constexpr auto AccelerationStructureOperations = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_COPY_BIT|PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT; + init(ACCESS_FLAGS::ACCELERATION_STRUCTURE_READ_BIT,AccelerationStructureOperations|AllShaders); + init(ACCESS_FLAGS::ACCELERATION_STRUCTURE_WRITE_BIT,AccelerationStructureOperations); + + init(ACCESS_FLAGS::COMMAND_PREPROCESS_READ_BIT,PIPELINE_STAGE_FLAGS::COMMAND_PREPROCESS_BIT); + init(ACCESS_FLAGS::COMMAND_PREPROCESS_WRITE_BIT,PIPELINE_STAGE_FLAGS::COMMAND_PREPROCESS_BIT); + init(ACCESS_FLAGS::CONDITIONAL_RENDERING_READ_BIT,PIPELINE_STAGE_FLAGS::CONDITIONAL_RENDERING_BIT); + init(ACCESS_FLAGS::INDIRECT_COMMAND_READ_BIT,PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT|PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT); + + init(ACCESS_FLAGS::UNIFORM_READ_BIT,AllShaders); + init(ACCESS_FLAGS::SAMPLED_READ_BIT,AllShaders);//|PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT); + init(ACCESS_FLAGS::STORAGE_READ_BIT,AllShaders|MicromapAccelerationStructureBuilds); + init(ACCESS_FLAGS::STORAGE_WRITE_BIT,AllShaders); + + init(ACCESS_FLAGS::INDEX_READ_BIT,PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT); + init(ACCESS_FLAGS::VERTEX_ATTRIBUTE_READ_BIT,PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT); + + init(ACCESS_FLAGS::FRAGMENT_DENSITY_MAP_READ_BIT,PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT); + init(ACCESS_FLAGS::SHADING_RATE_ATTACHMENT_READ_BIT,PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT); + constexpr auto FragmentTests = PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT|PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT; + init(ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_READ_BIT,FragmentTests); + init(ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,FragmentTests); + init(ACCESS_FLAGS::INPUT_ATTACHMENT_READ_BIT,PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT); + init(ACCESS_FLAGS::COLOR_ATTACHMENT_READ_BIT,PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT); + init(ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT,PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT); + + init(ACCESS_FLAGS::SHADER_BINDING_TABLE_READ_BIT,PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT); + +// init(ACCESS_FLAGS::VIDEO_DECODE_READ_BIT,PIPELINE_STAGE_FLAGS::VIDEO_DECODE_BIT); +// init(ACCESS_FLAGS::VIDEO_DECODE_WRITE_BIT,PIPELINE_STAGE_FLAGS::VIDEO_DECODE_BIT); +// init(ACCESS_FLAGS::VIDEO_ENCODE_READ_BIT,PIPELINE_STAGE_FLAGS::VIDEO_ENCODE_BIT); +// init(ACCESS_FLAGS::VIDEO_ENCODE_WRITE_BIT,PIPELINE_STAGE_FLAGS::VIDEO_ENCODE_BIT); +// init(ACCESS_FLAGS::OPTICAL_FLOW_READ_BIT,PIPELINE_STAGE_FLAGS::OPTICAL_FLOW_BIT); +// init(ACCESS_FLAGS::OPTICAL_FLOW_WRITE_BIT,PIPELINE_STAGE_FLAGS::OPTICAL_FLOW_BIT); + } + constexpr const auto& operator[](const size_t ix) const {return data[ix];} + + private: + constexpr void init(ACCESS_FLAGS accessFlags, PIPELINE_STAGE_FLAGS stageFlags) + { + const auto bitIx = findLSB(static_cast(accessFlags)); + data[bitIx] = stageFlags; + } + + PIPELINE_STAGE_FLAGS data[32] = {}; + }; + constexpr PerAccessStages bitToStage = {}; + + core::bitflag retval = PIPELINE_STAGE_FLAGS::NONE; + while (bool(accesses.value)) + { + const auto bitIx = findLSB(static_cast(accesses.value)); + retval |= bitToStage[bitIx]; + accesses ^= static_cast(0x1u< CGeometryCreator::createCube(const hlsl::float32_t3 size) const { using namespace hlsl; @@ -156,7 +161,7 @@ core::smart_refctd_ptr CGeometryCreator::createCube(const h { const hlsl::vector norm[6] = { - hlsl::vector(0, 0, 127), + hlsl::vector(0, 0, 1), hlsl::vector(127, 0, 0), hlsl::vector(0, 0,-127), hlsl::vector(-127, 0, 0), @@ -186,121 +191,12 @@ core::smart_refctd_ptr CGeometryCreator::createCube(const h return retval; } -#if 0 - -/* - a cylinder, a cone and a cross - point up on (0,1.f, 0.f ) -*/ -core::smart_refctd_ptr CGeometryCreator::createArrow( - const uint32_t tesselationCylinder, - const uint32_t tesselationCone, - const float height, - const float cylinderHeight, - const float width0, - const float width1, - const video::SColor vtxColor0, - const video::SColor vtxColor1 -) const +core::smart_refctd_ptr CGeometryCreator::createSphere(float radius, + uint32_t polyCountX, uint32_t polyCountY, CQuantNormalCache* const quantNormalCacheOverride) const { - assert(height > cylinderHeight); - - auto cylinder = createCylinderMesh(width0, cylinderHeight, tesselationCylinder, vtxColor0); - auto cone = createConeMesh(width1, height-cylinderHeight, tesselationCone, vtxColor1, vtxColor1); - - auto cylinderVertices = reinterpret_cast(cylinder.bindings[0].buffer->getPointer()); - auto coneVertices = reinterpret_cast(cone.bindings[0].buffer->getPointer()); - - auto cylinderIndecies = reinterpret_cast(cylinder.indexBuffer.buffer->getPointer()); - auto coneIndecies = reinterpret_cast(cone.indexBuffer.buffer->getPointer()); - - const auto cylinderVertexCount = cylinder.bindings[0].buffer->getSize() / sizeof(CylinderVertex); - const auto coneVertexCount = cone.bindings[0].buffer->getSize() / sizeof(ConeVertex); - const auto newArrowVertexCount = cylinderVertexCount + coneVertexCount; - - const auto cylinderIndexCount = cylinder.indexBuffer.buffer->getSize() / sizeof(uint16_t); - const auto coneIndexCount = cone.indexBuffer.buffer->getSize() / sizeof(uint16_t); - const auto newArrowIndexCount = cylinderIndexCount + coneIndexCount; - - for (auto i = 0ull; i < coneVertexCount; ++i) - { - core::vector3df_SIMD newPos = coneVertices[i].pos; - newPos.rotateYZByRAD(-1.5707963268); - - for (auto c = 0; c < 3; ++c) - coneVertices[i].pos[c] = newPos[c]; - } - - auto newArrowVertexBuffer = asset::ICPUBuffer::create({ newArrowVertexCount * sizeof(ArrowVertex) }); - newArrowVertexBuffer->setUsageFlags(newArrowVertexBuffer->getUsageFlags() | asset::IBuffer::EUF_VERTEX_BUFFER_BIT); - auto newArrowIndexBuffer = asset::ICPUBuffer::create({ newArrowIndexCount * sizeof(uint16_t) }); - newArrowIndexBuffer->setUsageFlags(newArrowIndexBuffer->getUsageFlags() | asset::IBuffer::EUF_INDEX_BUFFER_BIT); - - for (auto z = 0ull; z < newArrowVertexCount; ++z) - { - auto arrowVertex = reinterpret_cast(newArrowVertexBuffer->getPointer()) + z; - - if (z < cylinderVertexCount) - { - auto cylinderVertex = (cylinderVertices + z); - memcpy(arrowVertex, cylinderVertex, sizeof(ArrowVertex)); - } - else - { - auto coneVertex = (coneVertices + z - cylinderVertexCount); - memcpy(arrowVertex, coneVertex, offsetof(ConeVertex, normal)); // copy position and color - arrowVertex->uv[0] = 0; - arrowVertex->uv[1] = 0; - arrowVertex->normal = coneVertex->normal; - } - } - - { - auto ArrowIndices = reinterpret_cast(newArrowIndexBuffer->getPointer()); - auto newConeIndices = (ArrowIndices + cylinderIndexCount); - - memcpy(ArrowIndices, cylinderIndecies, sizeof(uint16_t) * cylinderIndexCount); - memcpy(newConeIndices, coneIndecies, sizeof(uint16_t) * coneIndexCount); - - for (auto i = 0ull; i < coneIndexCount; ++i) - *(newConeIndices + i) += cylinderVertexCount; - } - - return_type arrow; - - constexpr size_t vertexSize = sizeof(ArrowVertex); - arrow.inputParams = - { 0b1111u,0b1u, - { - {0u,EF_R32G32B32_SFLOAT,offsetof(ArrowVertex,pos)}, - {0u,EF_R8G8B8A8_UNORM,offsetof(ArrowVertex,color)}, - {0u,EF_R32G32_SFLOAT,offsetof(ArrowVertex,uv)}, - {0u,EF_A2B10G10R10_SNORM_PACK32,offsetof(ArrowVertex,normal)} - }, - {vertexSize,SVertexInputBindingParams::EVIR_PER_VERTEX} - }; - - arrow.bindings[0] = { 0, std::move(newArrowVertexBuffer) }; - arrow.indexBuffer = { 0, std::move(newArrowIndexBuffer) }; - arrow.indexCount = newArrowIndexCount; - arrow.indexType = EIT_16BIT; - - return arrow; -} + using namespace hlsl; -/* A sphere with proper normals and texture coords */ -core::smart_refctd_ptr CGeometryCreator::createSphere(float radius, uint32_t polyCountX, uint32_t polyCountY, IMeshManipulator* const meshManipulatorOverride) const -{ - // we are creating the sphere mesh here. - return_type retval; - constexpr size_t vertexSize = sizeof(CGeometryCreator::SphereVertex); - CQuantNormalCache* const quantNormalCache = (meshManipulatorOverride == nullptr) ? defaultMeshManipulator->getQuantNormalCache() : meshManipulatorOverride->getQuantNormalCache(); - retval.inputParams = { 0b1111u,0b1u,{ - {0u,EF_R32G32B32_SFLOAT,offsetof(SphereVertex,pos)}, - {0u,EF_R8G8B8A8_UNORM,offsetof(SphereVertex,color)}, - {0u,EF_R32G32_SFLOAT,offsetof(SphereVertex,uv)}, - {0u,EF_A2B10G10R10_SNORM_PACK32,offsetof(SphereVertex,normal)} - },{vertexSize,SVertexInputBindingParams::EVIR_PER_VERTEX} }; + CQuantNormalCache* const quantNormalCache = quantNormalCacheOverride == nullptr ? m_params.normalCache.get() : quantNormalCacheOverride; if (polyCountX < 2) polyCountX = 2; @@ -308,15 +204,20 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float polyCountY = 2; const uint32_t polyCountXPitch = polyCountX + 1; // get to same vertex on next level + const size_t vertexCount = (polyCountXPitch * polyCountY) + 2; - retval.indexCount = (polyCountX * polyCountY) * 6; - auto indices = asset::ICPUBuffer::create({ sizeof(uint32_t) * retval.indexCount }); + auto retval = core::make_smart_refctd_ptr(); + retval->setIndexing(IPolygonGeometryBase::TriangleList()); // Create indices { + using index_t = uint32_t; + const auto indexCount = (polyCountX * polyCountY) * 6; + const auto bytesize = sizeof(index_t) * indexCount; + auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT}); + auto indexPtr = reinterpret_cast(indices->getPointer()); uint32_t level = 0; size_t indexAddIx = 0; - uint32_t* indexPtr = (uint32_t*)indices->getPointer(); for (uint32_t p1 = 0; p1 < polyCountY - 1; ++p1) { //main quads, top to bottom @@ -372,23 +273,96 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float indexPtr[indexAddIx++] = polyCountSqM1 + polyCountX - 1; indexPtr[indexAddIx++] = polyCountSqM1; indexPtr[indexAddIx++] = polyCountSq1; + + shapes::AABB<4,index_t> aabb; + aabb.minVx[0] = 0; + aabb.maxVx[0] = vertexCount - 1; + retval->setIndexView({ + .composed = { + .encodedDataRange = {.u32=aabb}, + .stride = sizeof(index_t), + .format = EF_R16_UINT, + .rangeFormat = IGeometryBase::EAABBFormat::U16 + }, + .src = {.offset=0,.size=bytesize,.buffer=std::move(indices)} + }); } - indices->setUsageFlags(indices->getUsageFlags() | asset::IBuffer::EUF_INDEX_BUFFER_BIT); - retval.indexBuffer = {0ull, std::move(indices)}; - // handle vertices + constexpr auto NormalCacheFormat = EF_R8G8B8_SNORM; + constexpr auto NormalFormat = EF_R8G8B8A8_SNORM; + + // Create vertex attributes with NONE usage because we have no clue how they'll be used + hlsl::float32_t3* positions; + hlsl::vector* normals; + hlsl::vector* uvs; { - size_t vertexSize = 3 * 4 + 4 + 2 * 4 + 4; - size_t vertexCount = (polyCountXPitch * polyCountY) + 2; - auto vtxBuf = asset::ICPUBuffer::create({ vertexCount * vertexSize }); - auto* tmpMem = reinterpret_cast(vtxBuf->getPointer()); - for (size_t i = 0; i < vertexCount; i++) { - tmpMem[i * vertexSize + 3 * 4 + 0] = 255; - tmpMem[i * vertexSize + 3 * 4 + 1] = 255; - tmpMem[i * vertexSize + 3 * 4 + 2] = 255; - tmpMem[i * vertexSize + 3 * 4 + 3] = 255; + constexpr auto AttrSize = sizeof(decltype(*positions)); + auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}}); + positions = reinterpret_cast(buff->getPointer()); + shapes::AABB<4, float32_t> aabb; + aabb.maxVx = float32_t4(radius, radius, radius, 0.0f); + aabb.minVx = float32_t4(-radius, -radius, -radius, 0.0f); + retval->setPositionView({ + .composed = { + .encodedDataRange = {.f32 = aabb}, + .stride = AttrSize, + .format = EF_R32G32B32_SFLOAT, + .rangeFormat = IGeometryBase::EAABBFormat::F32 + }, + .src = { + .offset=0, + .size = buff->getSize(), + .buffer = std::move(buff), + } + }); + } + { + constexpr auto AttrSize = sizeof(decltype(*normals)); + auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}}); + normals = reinterpret_cast(buff->getPointer()); + shapes::AABB<4, int8_t> aabb; + aabb.maxVx = hlsl::vector(127,127,127,0); + aabb.minVx = -aabb.maxVx; + retval->setNormalView({ + .composed = { + .encodedDataRange = {.s8=aabb}, + .stride = AttrSize, + .format = NormalFormat, + .rangeFormat = IGeometryBase::EAABBFormat::S8_NORM + }, + .src = { + .offset = 0, + .size = buff->getSize(), + .buffer = std::move(buff) + } + }); + } + { + constexpr auto AttrSize = sizeof(decltype(*uvs)); + auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}}); + uvs = reinterpret_cast(buff->getPointer()); + shapes::AABB<4, uint16_t> aabb; + aabb.minVx = hlsl::vector(0,0,0,0); + aabb.maxVx = hlsl::vector(255,255,0,0); + retval->getAuxAttributeViews()->push_back({ + .composed = { + .encodedDataRange = {.u16=aabb}, + .stride = AttrSize, + .format = EF_R8G8_UNORM, + .rangeFormat = IGeometryBase::EAABBFormat::U8_NORM + }, + .src = { + .offset = 0, + .size = buff->getSize(), + .buffer = std::move(buff), + } + }); } + } + + // fill vertices + { // calculate the angle which separates all points in a circle const float AngleX = 2 * core::PI() / polyCountX; const float AngleY = core::PI() / polyCountY; @@ -398,9 +372,7 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float // we don't start at 0. double ay = 0;//AngleY / 2; - - using quant_normal_t = CQuantNormalCache::value_type_t; - uint8_t* tmpMemPtr = tmpMem; + auto vertex_i = 0; for (uint32_t y = 0; y < polyCountY; ++y) { ay += AngleY; @@ -408,7 +380,7 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float axz = 0; // calculate the necessary vertices without the doubled one - uint8_t* oldTmpMemPtr = tmpMemPtr; + const auto old_vertex_i = vertex_i; for (uint32_t xz = 0; xz < polyCountX; ++xz) { // calculate points position @@ -417,9 +389,8 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float static_cast(cos(ay)), static_cast(sin(axz) * sinay)); // for spheres the normal is the position - core::vectorSIMDf normal(&pos.X); - normal.makeSafe3D(); - quant_normal_t quantizedNormal = quantNormalCache->quantize(normal); + const auto normal = pos; + const auto quantizedNormal = quantNormalCache->quantize(normal); pos *= radius; // calculate texture coordinates via sphere mapping @@ -427,229 +398,364 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float float tu = 0.5f; //if (y==0) //{ - if (normal.Y != -1.0f && normal.Y != 1.0f) - tu = static_cast(acos(core::clamp(normal.X / sinay, -1.0, 1.0)) * 0.5 * core::RECIPROCAL_PI()); - if (normal.Z < 0.0f) + if (normal.y != -1.0f && normal.y != 1.0f) + tu = static_cast(acos(core::clamp(normal.x / sinay, -1.0, 1.0)) * 0.5 * core::RECIPROCAL_PI()); + if (normal.z < 0.0f) tu = 1 - tu; //} //else //tu = ((float*)(tmpMem+(i-polyCountXPitch)*vertexSize))[4]; - ((float*)tmpMemPtr)[0] = pos.X; - ((float*)tmpMemPtr)[1] = pos.Y; - ((float*)tmpMemPtr)[2] = pos.Z; - ((float*)tmpMemPtr)[4] = tu; - ((float*)tmpMemPtr)[5] = static_cast(ay * core::RECIPROCAL_PI()); - ((quant_normal_t*)tmpMemPtr)[6] = quantizedNormal; - static_assert(sizeof(quant_normal_t)==4u); + positions[vertex_i] = pos; + uvs[vertex_i] = { packSnorm(tu), packSnorm(static_cast(ay * core::RECIPROCAL_PI())) }; + memcpy(normals + vertex_i, &quantizedNormal, sizeof(quantizedNormal)); - tmpMemPtr += vertexSize; + vertex_i++; axz += AngleX; } // This is the doubled vertex on the initial position - ((float*)tmpMemPtr)[0] = ((float*)oldTmpMemPtr)[0]; - ((float*)tmpMemPtr)[1] = ((float*)oldTmpMemPtr)[1]; - ((float*)tmpMemPtr)[2] = ((float*)oldTmpMemPtr)[2]; - ((float*)tmpMemPtr)[4] = 1.f; - ((float*)tmpMemPtr)[5] = ((float*)oldTmpMemPtr)[5]; - ((uint32_t*)tmpMemPtr)[6] = ((uint32_t*)oldTmpMemPtr)[6]; - tmpMemPtr += vertexSize; + positions[vertex_i] = positions[old_vertex_i]; + uvs[vertex_i] = { 127, uvs[old_vertex_i].y }; + normals[vertex_i] = normals[old_vertex_i]; + + vertex_i++; } // the vertex at the top of the sphere - ((float*)tmpMemPtr)[0] = 0.f; - ((float*)tmpMemPtr)[1] = radius; - ((float*)tmpMemPtr)[2] = 0.f; - ((float*)tmpMemPtr)[4] = 0.5f; - ((float*)tmpMemPtr)[5] = 0.f; - ((quant_normal_t*)tmpMemPtr)[6] = quantNormalCache->quantize(core::vectorSIMDf(0.f, 1.f, 0.f)); + positions[vertex_i] = { 0.f, radius, 0.f }; + uvs[vertex_i] = { 0, 63}; + const auto quantizedTopNormal = quantNormalCache->quantize(hlsl::float32_t3(0.f, 1.f, 0.f)); + memcpy(normals + vertex_i, &quantizedTopNormal, sizeof(quantizedTopNormal)); // the vertex at the bottom of the sphere - tmpMemPtr += vertexSize; - ((float*)tmpMemPtr)[0] = 0.f; - ((float*)tmpMemPtr)[1] = -radius; - ((float*)tmpMemPtr)[2] = 0.f; - ((float*)tmpMemPtr)[4] = 0.5f; - ((float*)tmpMemPtr)[5] = 1.f; - ((quant_normal_t*)tmpMemPtr)[6] = quantNormalCache->quantize(core::vectorSIMDf(0.f, -1.f, 0.f)); - - // recalculate bounding box - core::aabbox3df BoundingBox; - BoundingBox.reset(float32_t3(radius)); - BoundingBox.addInternalPoint(-radius, -radius, -radius); - - // set vertex buffer - vtxBuf->setUsageFlags(vtxBuf->getUsageFlags() | asset::IBuffer::EUF_VERTEX_BUFFER_BIT); - retval.bindings[0] = { 0ull,std::move(vtxBuf) }; - retval.indexType = asset::EIT_32BIT; - retval.bbox = BoundingBox; + vertex_i++; + positions[vertex_i] = { 0.f, -radius, 0.f }; + uvs[vertex_i] = { 63, 127}; + const auto quantizedBottomNormal = quantNormalCache->quantize(hlsl::float32_t3(0.f, -1.f, 0.f)); + memcpy(normals + vertex_i, &quantizedBottomNormal, sizeof(quantizedBottomNormal)); } + CPolygonGeometryManipulator::recomputeContentHashes(retval.get()); return retval; } -/* A cylinder with proper normals and texture coords */ core::smart_refctd_ptr CGeometryCreator::createCylinder( float radius, float length, - uint32_t tesselation, const video::SColor& color, IMeshManipulator* const meshManipulatorOverride -) const + uint32_t tesselation, CQuantNormalCache* const quantNormalCacheOverride) const { - return_type retval; - constexpr size_t vertexSize = sizeof(CGeometryCreator::CylinderVertex); - CQuantNormalCache* const quantNormalCache = (meshManipulatorOverride == nullptr) ? defaultMeshManipulator->getQuantNormalCache() : meshManipulatorOverride->getQuantNormalCache(); - retval.inputParams = { 0b1111u,0b1u,{ - {0u,EF_R32G32B32_SFLOAT,offsetof(CylinderVertex,pos)}, - {0u,EF_R8G8B8A8_UNORM,offsetof(CylinderVertex,color)}, - {0u,EF_R32G32_SFLOAT,offsetof(CylinderVertex,uv)}, - {0u,EF_A2B10G10R10_SNORM_PACK32,offsetof(CylinderVertex,normal)} - },{vertexSize,SVertexInputBindingParams::EVIR_PER_VERTEX} }; - - const size_t vtxCnt = 2u*tesselation; - auto vtxBuf = asset::ICPUBuffer::create({ vtxCnt*sizeof(CylinderVertex) }); - - CylinderVertex* vertices = reinterpret_cast(vtxBuf->getPointer()); - for (auto i=0ull; i(tesselation); - const float step = 2.f*core::PI()*tesselationRec; - for (uint32_t i = 0u; iquantize(core::normalize(p)); - - memcpy(vertices[i].pos, p.pointer, 12u); - vertices[i].normal = n; - memcpy(vertices[i].color, glcolor, 4u); - vertices[i].uv[0] = float(i) * tesselationRec; - - vertices[i+halfIx] = vertices[i]; - vertices[i+halfIx].pos[2] = length; - vertices[i+halfIx].uv[1] = 1.f; - } - - constexpr uint32_t rows = 2u; - retval.indexCount = rows * 3u * tesselation; - auto idxBuf = asset::ICPUBuffer::create({ retval.indexCount *sizeof(uint16_t) }); - uint16_t* indices = (uint16_t*)idxBuf->getPointer(); - - for (uint32_t i = 0u, j = 0u; i < halfIx; ++i) - { - indices[j++] = i; - indices[j++] = (i+1u)!=halfIx ? (i+1u):0u; - indices[j++] = i+halfIx; - indices[j++] = i+halfIx; - indices[j++] = (i+1u)!=halfIx ? (i+1u):0u; - indices[j++] = (i+1u)!=halfIx ? (i+1u+halfIx):halfIx; - } - - // set vertex buffer - idxBuf->setUsageFlags(idxBuf->getUsageFlags() | asset::IBuffer::EUF_INDEX_BUFFER_BIT); - retval.indexBuffer = { 0ull, std::move(idxBuf) }; - vtxBuf->setUsageFlags(vtxBuf->getUsageFlags() | asset::IBuffer::EUF_VERTEX_BUFFER_BIT); - retval.bindings[0] = { 0ull, std::move(vtxBuf) }; - retval.indexType = asset::EIT_16BIT; - //retval.bbox = ?; + using namespace hlsl; + + CQuantNormalCache* const quantNormalCache = quantNormalCacheOverride == nullptr ? m_params.normalCache.get() : quantNormalCacheOverride; + + const auto halfIx = static_cast(tesselation); + const uint32_t u32_vertexCount = 2 * tesselation; + if (u32_vertexCount > std::numeric_limits::max()) + return nullptr; + const auto vertexCount = static_cast(u32_vertexCount); + + auto retval = core::make_smart_refctd_ptr(); + retval->setIndexing(IPolygonGeometryBase::TriangleList()); + + // Create indices + using index_t = uint16_t; + { + constexpr uint32_t RowCount = 2u; + const auto IndexCount = RowCount * 3 * tesselation; + const auto bytesize = sizeof(index_t) * IndexCount; + auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT}); + auto u = reinterpret_cast(indices->getPointer()); + for (uint16_t i = 0u, j = 0u; i < halfIx; ++i) + { + u[j++] = i; + u[j++] = (i + 1u) != halfIx ? (i + 1u):0u; + u[j++] = i + halfIx; + u[j++] = i + halfIx; + u[j++] = (i + 1u)!= halfIx ? (i + 1u):0u; + u[j++] = (i + 1u)!= halfIx ? (i + 1u + halfIx) : halfIx; + } + + shapes::AABB<4,index_t> aabb; + aabb.minVx[0] = 0; + aabb.maxVx[0] = vertexCount - 1; + retval->setIndexView({ + .composed = { + .encodedDataRange = {.u16=aabb}, + .stride = sizeof(index_t), + .format = EF_R16_UINT, + .rangeFormat = IGeometryBase::EAABBFormat::U16 + }, + .src = {.offset=0,.size=bytesize,.buffer=std::move(indices)} + }); + } + + constexpr auto NormalCacheFormat = EF_R8G8B8_SNORM; + constexpr auto NormalFormat = EF_R8G8B8A8_SNORM; + + // Create vertex attributes with NONE usage because we have no clue how they'll be used + hlsl::float32_t3* positions; + hlsl::vector* normals; + hlsl::vector* uvs; + { + { + constexpr auto AttrSize = sizeof(decltype(*positions)); + auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}}); + positions = reinterpret_cast(buff->getPointer()); + shapes::AABB<4, float32_t> aabb; + aabb.maxVx = float32_t4(radius, radius, length, 0.0f); + aabb.minVx = float32_t4(-radius, -radius, 0.0f, 0.0f); + retval->setPositionView({ + .composed = { + .encodedDataRange = {.f32 = aabb}, + .stride = AttrSize, + .format = EF_R32G32B32_SFLOAT, + .rangeFormat = IGeometryBase::EAABBFormat::F32 + }, + .src = { + .offset=0, + .size = buff->getSize(), + .buffer = std::move(buff), + } + }); + } + { + constexpr auto AttrSize = sizeof(decltype(*normals)); + auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}}); + normals = reinterpret_cast(buff->getPointer()); + shapes::AABB<4, int8_t> aabb; + aabb.maxVx = hlsl::vector(127,127,127,0); + aabb.minVx = -aabb.maxVx; + retval->setNormalView({ + .composed = { + .encodedDataRange = {.s8=aabb}, + .stride = AttrSize, + .format = NormalFormat, + .rangeFormat = IGeometryBase::EAABBFormat::S8_NORM + }, + .src = { + .offset = 0, + .size = buff->getSize(), + .buffer = std::move(buff) + } + }); + } + { + constexpr auto AttrSize = sizeof(decltype(*uvs)); + auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}}); + uvs = reinterpret_cast(buff->getPointer()); + shapes::AABB<4, uint8_t> aabb; + aabb.minVx = hlsl::vector(0,0,0,0); + aabb.maxVx = hlsl::vector(255,255,0,0); + retval->getAuxAttributeViews()->push_back({ + .composed = { + .encodedDataRange = {.u8=aabb}, + .stride = AttrSize, + .format = EF_R8G8_UNORM, + .rangeFormat = IGeometryBase::EAABBFormat::U8_NORM + }, + .src = { + .offset = 0, + .size = buff->getSize(), + .buffer = std::move(buff), + } + }); + } + } + + const float tesselationRec = core::reciprocal_approxim(static_cast(tesselation)); + const float step = 2.f * core::PI() * tesselationRec; + for (uint32_t i = 0u; i < tesselation; ++i) + { + const auto f_i = static_cast(i); + hlsl::float32_t3 p(std::cos(f_i * step), std::sin(f_i * step), 0.f); + p *= radius; + const auto n = quantNormalCache->quantize(hlsl::normalize(p)); + + positions[i] = { p.x, p.y, p.z }; + memcpy(normals + i, &n, sizeof(n)); + uvs[i] = { packSnorm(f_i * tesselationRec), packSnorm(0.0) }; + + positions[i + halfIx] = { p.x, p.y, length }; + normals[i + halfIx] = normals[i]; + uvs[i + halfIx] = { packSnorm(1.0f), packSnorm(0.0f) }; + } + CPolygonGeometryManipulator::recomputeContentHashes(retval.get()); return retval; } -/* A cone with proper normals and texture coords */ core::smart_refctd_ptr CGeometryCreator::createCone( float radius, float length, uint32_t tesselation, - const video::SColor& colorTop, - const video::SColor& colorBottom, - float oblique, - IMeshManipulator* const meshManipulatorOverride -) const + float oblique, CQuantNormalCache* const quantNormalCacheOverride) const { - const size_t vtxCnt = tesselation * 2; - auto vtxBuf = asset::ICPUBuffer::create({ vtxCnt * sizeof(ConeVertex) }); - ConeVertex* vertices = reinterpret_cast(vtxBuf->getPointer()); - ConeVertex* baseVertices = vertices; - ConeVertex* apexVertices = vertices + tesselation; + using namespace hlsl; - std::fill(vertices,vertices+vtxCnt, ConeVertex(core::vectorSIMDf(0.f),{},colorBottom)); - CQuantNormalCache* const quantNormalCache = (meshManipulatorOverride == nullptr) ? defaultMeshManipulator->getQuantNormalCache() : meshManipulatorOverride->getQuantNormalCache(); + CQuantNormalCache* const quantNormalCache = quantNormalCacheOverride == nullptr ? m_params.normalCache.get() : quantNormalCacheOverride; - const float step = (2.f*core::PI()) / tesselation; + const uint32_t u32_vertexCount = 2 * tesselation; + if (u32_vertexCount > std::numeric_limits::max()) + return nullptr; + const auto vertexCount = static_cast(u32_vertexCount); - const core::vectorSIMDf apexVertexCoords(oblique, length, 0.0f); + auto retval = core::make_smart_refctd_ptr(); + retval->setIndexing(IPolygonGeometryBase::TriangleList()); - //vertex positions - for (uint32_t i = 0u; i < tesselation; i++) + // Create indices + using index_t = uint16_t; { - core::vectorSIMDf v(std::cos(i * step), 0.0f, std::sin(i * step), 0.0f); - v *= radius; + constexpr uint32_t RowCount = 2u; + const auto IndexCount = 3 * tesselation; + const auto bytesize = sizeof(index_t) * IndexCount; + auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT}); + auto u = reinterpret_cast(indices->getPointer()); + const uint32_t firstIndexOfBaseVertices = 0; + const uint32_t firstIndexOfApexVertices = tesselation; + for (uint32_t i = 0; i < tesselation; i++) + { + u[i * 3] = firstIndexOfApexVertices + i; + u[(i * 3) + 1] = firstIndexOfBaseVertices + i; + u[(i * 3) + 2] = i == (tesselation - 1) ? firstIndexOfBaseVertices : firstIndexOfBaseVertices + i + 1; + } - memcpy(baseVertices[i].pos, v.pointer, sizeof(float) * 3); - memcpy(apexVertices[i].pos, apexVertexCoords.pointer, sizeof(float) * 3); + shapes::AABB<4,index_t> aabb; + aabb.minVx[0] = 0; + aabb.maxVx[0] = vertexCount - 1; + retval->setIndexView({ + .composed = { + .encodedDataRange = {.u16=aabb}, + .stride = sizeof(index_t), + .format = EF_R16_UINT, + .rangeFormat = IGeometryBase::EAABBFormat::U16 + }, + .src = {.offset=0,.size=bytesize,.buffer=std::move(indices)} + }); } - //vertex normals - for (uint32_t i = 0; i < tesselation; i++) + constexpr auto NormalCacheFormat = EF_R8G8B8_SNORM; + constexpr auto NormalFormat = EF_R8G8B8A8_SNORM; + + // Create vertex attributes with NONE usage because we have no clue how they'll be used + hlsl::float32_t3* positions; + hlsl::vector* normals; { - const core::vectorSIMDf v0ToApex = apexVertexCoords - core::vectorSIMDf(vertices[i].pos[0], vertices[i].pos[1], vertices[i].pos[2]); + { + constexpr auto AttrSize = sizeof(decltype(*positions)); + auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}}); + positions = reinterpret_cast(buff->getPointer()); + shapes::AABB<4, float32_t> aabb; + aabb.maxVx = float32_t4(radius, radius, length, 0.0f); + aabb.minVx = float32_t4(-radius, -radius, 0.0f, 0.0f); + retval->setPositionView({ + .composed = { + .encodedDataRange = {.f32 = aabb}, + .stride = AttrSize, + .format = EF_R32G32B32_SFLOAT, + .rangeFormat = IGeometryBase::EAABBFormat::F32 + }, + .src = { + .offset=0, + .size = buff->getSize(), + .buffer = std::move(buff), + } + }); + } + { + constexpr auto AttrSize = sizeof(decltype(*normals)); + auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}}); + normals = reinterpret_cast(buff->getPointer()); + shapes::AABB<4, int8_t> aabb; + aabb.maxVx = hlsl::vector(127,127,127,0); + aabb.minVx = -aabb.maxVx; + retval->setNormalView({ + .composed = { + .encodedDataRange = {.s8=aabb}, + .stride = AttrSize, + .format = NormalFormat, + .rangeFormat = IGeometryBase::EAABBFormat::S8_NORM + }, + .src = { + .offset = 0, + .size = buff->getSize(), + .buffer = std::move(buff) + } + }); + } + } + + const float step = (2.f*core::PI()) / tesselation; + + const hlsl::float32_t3 apexVertexCoords(oblique, length, 0.0f); + + const auto apexVertexBase_i = tesselation; + + for (uint32_t i = 0u; i < tesselation; i++) + { + hlsl::float32_t3 v(std::cos(i * step), 0.0f, std::sin(i * step)); + v *= radius; + + positions[i] = { v.x, v.y, v.z }; + positions[apexVertexBase_i + i] = { apexVertexCoords.x, apexVertexCoords.y, apexVertexCoords.z }; + + const auto simdPosition = hlsl::float32_t3(positions[i].x, positions[i].y, positions[i].z); + const hlsl::float32_t3 v0ToApex = apexVertexCoords - simdPosition; uint32_t nextVertexIndex = i == (tesselation - 1) ? 0 : i + 1; - core::vectorSIMDf u1 = core::vectorSIMDf(baseVertices[nextVertexIndex].pos[0], baseVertices[nextVertexIndex].pos[1], baseVertices[nextVertexIndex].pos[2]); - u1 -= core::vectorSIMDf(baseVertices[i].pos[0], baseVertices[i].pos[1], baseVertices[i].pos[2]); - float angleWeight = std::acos(core::dot(core::normalize(apexVertexCoords), core::normalize(u1)).x); - u1 = core::normalize(core::cross(v0ToApex, u1)) * angleWeight; + hlsl::float32_t3 u1 = hlsl::float32_t3(positions[nextVertexIndex].x, positions[nextVertexIndex].y, positions[nextVertexIndex].z); + u1 -= simdPosition; + float angleWeight = std::acos(hlsl::dot(hlsl::normalize(apexVertexCoords), hlsl::normalize(u1))); + u1 = hlsl::normalize(hlsl::cross(v0ToApex, u1)) * angleWeight; uint32_t prevVertexIndex = i == 0 ? (tesselation - 1) : i - 1; - core::vectorSIMDf u2 = core::vectorSIMDf(baseVertices[prevVertexIndex].pos[0], baseVertices[prevVertexIndex].pos[1], baseVertices[prevVertexIndex].pos[2]); - u2 -= core::vectorSIMDf(baseVertices[i].pos[0], baseVertices[i].pos[1], baseVertices[i].pos[2]); - angleWeight = std::acos(core::dot(core::normalize(apexVertexCoords), core::normalize(u2)).x); - u2 = core::normalize(core::cross(u2, v0ToApex)) * angleWeight; + hlsl::float32_t3 u2 = hlsl::float32_t3(positions[prevVertexIndex].x, positions[prevVertexIndex].y, positions[prevVertexIndex].z); + u2 -= simdPosition; + angleWeight = std::acos(hlsl::dot(hlsl::normalize(apexVertexCoords), hlsl::normalize(u2))); + u2 = hlsl::normalize(hlsl::cross(u2, v0ToApex)) * angleWeight; - baseVertices[i].normal = quantNormalCache->quantize(core::normalize(u1 + u2)); - apexVertices[i].normal = quantNormalCache->quantize(core::normalize(u1)); + + const auto baseNormal = quantNormalCache->quantize(hlsl::normalize(u1 + u2)); + memcpy(normals + i, &baseNormal, sizeof(baseNormal)); + + const auto apexNormal = quantNormalCache->quantize(hlsl::normalize(u1)); + memcpy(normals + apexVertexBase_i + i, &apexNormal, sizeof(apexNormal)); } - auto idxBuf = asset::ICPUBuffer::create({ 3u * tesselation * sizeof(uint16_t) }); - uint16_t* indices = (uint16_t*)idxBuf->getPointer(); + CPolygonGeometryManipulator::recomputeContentHashes(retval.get()); + return retval; +} + +core::vector> CGeometryCreator::createArrow( + const uint32_t tesselationCylinder, + const uint32_t tesselationCone, + const float height, + const float cylinderHeight, + const float width0, + const float width1 +) const +{ + assert(height > cylinderHeight); + + using position_t = hlsl::float32_t3; + + auto cylinder = createCylinder(width0, cylinderHeight, tesselationCylinder); + auto cone = createCone(width1, height-cylinderHeight, tesselationCone); - const uint32_t firstIndexOfBaseVertices = 0; - const uint32_t firstIndexOfApexVertices = tesselation; - for (uint32_t i = 0; i < tesselation; i++) + auto conePositions = reinterpret_cast(cone->getPositionView().src.buffer->getPointer()); + + const auto coneVertexCount = cone->getPositionView().getElementCount(); + + for (auto i = 0ull; i < coneVertexCount; ++i) { - indices[i * 3] = firstIndexOfApexVertices + i; - indices[(i * 3) + 1] = firstIndexOfBaseVertices + i; - indices[(i * 3) + 2] = i == (tesselation - 1) ? firstIndexOfBaseVertices : firstIndexOfBaseVertices + i + 1; + auto& conePosition = conePositions[i]; + core::vector3df_SIMD newPos(conePosition.x, conePosition.y, conePosition.z); + newPos.rotateYZByRAD(-1.5707963268); + + conePosition = {newPos.x, newPos.y, newPos.z}; } - return_type cone; + return {cylinder, cone}; - constexpr size_t vertexSize = sizeof(ConeVertex); - cone.inputParams = - { 0b111u,0b1u, - { - {0u,EF_R32G32B32_SFLOAT,offsetof(ConeVertex,pos)}, - {0u,EF_R8G8B8A8_UNORM,offsetof(ConeVertex,color)}, - {0u,EF_A2B10G10R10_SNORM_PACK32,offsetof(ConeVertex,normal)} - }, - {vertexSize,SVertexInputBindingParams::EVIR_PER_VERTEX} - }; - - vtxBuf->addUsageFlags(asset::IBuffer::EUF_VERTEX_BUFFER_BIT); - cone.bindings[0] = { 0, std::move(vtxBuf) }; - idxBuf->addUsageFlags(asset::IBuffer::EUF_INDEX_BUFFER_BIT); - cone.indexBuffer = { 0, std::move(idxBuf) }; - cone.indexCount = cone.indexBuffer.buffer->getSize() / sizeof(uint16_t); - cone.indexType = EIT_16BIT; - - return cone; } -#endif core::smart_refctd_ptr CGeometryCreator::createRectangle(const hlsl::float32_t2 size) const { @@ -853,19 +959,20 @@ core::smart_refctd_ptr CGeometryCreator::createDisk(const f return retval; } -#if 0 /* Helpful Icosphere class implementation used to compute and create icopshere's vertices and indecies. Polyhedron subdividing icosahedron (20 tris) by N-times iteration - The icosphere with N=1 (default) has 80 triangles by subdividing a triangle - of icosahedron into 4 triangles. If N=0, it is identical to icosahedron. + The icosphere with N=1 (default) has 80 triangles by subdividing a triangle + of icosahedron into 4 triangles. If N=0, it is identical to icosahedron. */ class Icosphere { public: + using index_t = unsigned int; + Icosphere(float radius = 1.0f, int subdivision = 1, bool smooth = false) : radius(radius), subdivision(subdivision), smooth(smooth), interleavedStride(32) { if (smooth) @@ -876,27 +983,27 @@ class Icosphere ~Icosphere() {} - unsigned int getVertexCount() const { return (unsigned int)vertices.size() / 3; } + unsigned int getPositionCount() const { return (unsigned int)vertices.size() / 3; } unsigned int getNormalCount() const { return (unsigned int)normals.size() / 3; } unsigned int getTexCoordCount() const { return (unsigned int)texCoords.size() / 2; } unsigned int getIndexCount() const { return (unsigned int)indices.size(); } unsigned int getLineIndexCount() const { return (unsigned int)lineIndices.size(); } unsigned int getTriangleCount() const { return getIndexCount() / 3; } - unsigned int getVertexSize() const { return (unsigned int)vertices.size() * sizeof(float); } // # of bytes + unsigned int getPositionSize() const { return (unsigned int)vertices.size() * sizeof(float); } // # of bytes unsigned int getNormalSize() const { return (unsigned int)normals.size() * sizeof(float); } unsigned int getTexCoordSize() const { return (unsigned int)texCoords.size() * sizeof(float); } - unsigned int getIndexSize() const { return (unsigned int)indices.size() * sizeof(unsigned int); } + unsigned int getIndexSize() const { return (unsigned int)indices.size() * sizeof(index_t); } unsigned int getLineIndexSize() const { return (unsigned int)lineIndices.size() * sizeof(unsigned int); } - const float* getVertices() const { return vertices.data(); } + const float* getPositions() const { return vertices.data(); } const float* getNormals() const { return normals.data(); } const float* getTexCoords() const { return texCoords.data(); } const unsigned int* getIndices() const { return indices.data(); } const unsigned int* getLineIndices() const { return lineIndices.data(); } // for interleaved vertices: V/N/T - unsigned int getInterleavedVertexCount() const { return getVertexCount(); } // # of vertices + unsigned int getInterleavedVertexCount() const { return getPositionCount(); } // # of vertices unsigned int getInterleavedVertexSize() const { return (unsigned int)interleavedVertices.size() * sizeof(float); } // # of bytes int getInterleavedStride() const { return interleavedStride; } // should be 32 bytes const float* getInterleavedVertices() const { return interleavedVertices.data(); } @@ -997,14 +1104,14 @@ class Icosphere texture coordinate is shared or no. If it is on the line segments, it is also non-shared point - 00 01 02 03 04 - /\ /\ /\ /\ /\ - / \/ \/ \/ \/ \ + 00 01 02 03 04 + /\ /\ /\ /\ /\ + / \/ \/ \/ \/ \ 05 06 07 08 09 \ - \ 10 11 12 13 14 + \ 10 11 12 13 14 \ /\ /\ /\ /\ / \/ \/ \/ \/ \/ - 15 16 17 18 19 + 15 16 17 18 19 */ static inline bool isSharedTexCoord(const float t[2]) @@ -1570,7 +1677,7 @@ class Icosphere v1 / \ newV1 *---* newV3 - / \ / \ + / \ / \ v2---*---v3 newV2 */ @@ -1736,8 +1843,8 @@ class Icosphere add 7 sub edge lines per triangle to array using 6 indices (CCW) i1 / : (i1, i2) - i2---i6 : (i2, i6) - / \ / : (i2, i3), (i2, i4), (i6, i4) + i2---i6 : (i2, i6) + / \ / : (i2, i3), (i2, i4), (i6, i4) i3---i4---i5 : (i3, i4), (i4, i5) */ @@ -1820,38 +1927,109 @@ class Icosphere core::smart_refctd_ptr CGeometryCreator::createIcoSphere(float radius, uint32_t subdivision, bool smooth) const { - Icosphere IcosphereData(radius, subdivision, smooth); - - return_type icosphereGeometry; - constexpr size_t vertexSize = sizeof(IcosphereVertex); + Icosphere icosphere(radius, subdivision, smooth); + + auto retval = core::make_smart_refctd_ptr(); + retval->setIndexing(IPolygonGeometryBase::TriangleList()); + + using namespace hlsl; + + // Create indices + { + auto indexBuffer = asset::ICPUBuffer::create({ icosphere.getIndexSize() }); + memcpy(indexBuffer->getPointer(), icosphere.getIndices(), indexBuffer->getSize()); + + shapes::AABB<4,Icosphere::index_t> aabb; + aabb.minVx[0] = 0; + aabb.maxVx[0] = icosphere.getPositionCount() - 1; + + static_assert(sizeof(Icosphere::index_t) == 2 || sizeof(Icosphere::index_t) == 4); + const auto isIndex16Bit = sizeof(Icosphere::index_t) == 2; + + retval->setIndexView({ + .composed = { + .encodedDataRange = {.u32=aabb}, + .stride = sizeof(Icosphere::index_t), + .format = isIndex16Bit ? EF_R16_UINT : EF_R32_UINT, + .rangeFormat = isIndex16Bit? IGeometryBase::EAABBFormat::U16 : IGeometryBase::EAABBFormat::U32 + }, + .src = {.offset=0,.size=icosphere.getIndexSize(),.buffer = std::move(indexBuffer)} + }); + } - icosphereGeometry.inputParams = - { 0b111u,0b1u, + { + { + using position_t = float32_t3; + constexpr auto AttrSize = sizeof(position_t); + auto buff = ICPUBuffer::create({ icosphere.getPositionCount() * AttrSize, IBuffer::EUF_NONE }); + const auto positions = reinterpret_cast(buff->getPointer()); + memcpy(positions, icosphere.getPositions(), icosphere.getPositionSize()); + shapes::AABB<4, float32_t> aabb; + aabb.maxVx = float32_t4(radius, radius, radius, 0.f); + aabb.minVx = -aabb.maxVx; + retval->setPositionView({ + .composed = { + .encodedDataRange = {.f32 = aabb}, + .stride = AttrSize, + .format = EF_R32G32B32_SFLOAT, + .rangeFormat = IGeometryBase::EAABBFormat::F32 + }, + .src = { + .offset = 0, + .size = buff->getSize(), + .buffer = std::move(buff), + } + }); + } { - {0u, EF_R32G32B32_SFLOAT, offsetof(IcosphereVertex,pos)}, - {0u, EF_R32G32B32_SFLOAT, offsetof(IcosphereVertex,normals)}, - {0u, EF_R32G32_SFLOAT, offsetof(IcosphereVertex,uv)} - }, - {vertexSize,SVertexInputBindingParams::EVIR_PER_VERTEX} - }; - - auto vertexBuffer = asset::ICPUBuffer::create({ IcosphereData.getInterleavedVertexSize() }); - auto indexBuffer = asset::ICPUBuffer::create({ IcosphereData.getIndexSize() }); - - memcpy(vertexBuffer->getPointer(), IcosphereData.getInterleavedVertices(), vertexBuffer->getSize()); - memcpy(indexBuffer->getPointer(), IcosphereData.getIndices(), indexBuffer->getSize()); - - vertexBuffer->addUsageFlags(asset::IBuffer::EUF_VERTEX_BUFFER_BIT); - icosphereGeometry.bindings[0] = { 0, std::move(vertexBuffer) }; - indexBuffer->addUsageFlags(asset::IBuffer::EUF_INDEX_BUFFER_BIT); - icosphereGeometry.indexBuffer = { 0, std::move(indexBuffer) }; - icosphereGeometry.indexCount = IcosphereData.getIndexCount(); - icosphereGeometry.indexType = EIT_32BIT; - - return icosphereGeometry; + using normal_t = float32_t3; + constexpr auto AttrSize = sizeof(normal_t); + auto buff = ICPUBuffer::create({icosphere.getNormalSize(), IBuffer::EUF_NONE}); + const auto normals = reinterpret_cast(buff->getPointer()); + memcpy(normals, icosphere.getNormals(), icosphere.getNormalSize()); + shapes::AABB<4,float32_t> aabb; + aabb.maxVx = float32_t4(1, 1, 1, 0.f); + aabb.minVx = -aabb.maxVx; + retval->setNormalView({ + .composed = { + .encodedDataRange = {.f32 = aabb}, + .stride = AttrSize, + .format = EF_R32G32B32_SFLOAT, + .rangeFormat = IGeometryBase::EAABBFormat::F32 + }, + .src = {.offset = 0,.size = buff->getSize(),.buffer = std::move(buff)}, + }); + } + { + using uv_t = uint32_t; + constexpr auto AttrSize = sizeof(uv_t); + auto buff = ICPUBuffer::create({AttrSize * icosphere.getTexCoordCount(), IBuffer::EUF_NONE}); + const auto uvs = reinterpret_cast(buff->getPointer()); + shapes::AABB<4, uint16_t> aabb; + aabb.minVx = uint16_t4(0,0,0,0); + aabb.maxVx = uint16_t4(0xFFFF,0xFFFF,0,0); + retval->getAuxAttributeViews()->push_back({ + .composed = { + .encodedDataRange = {.u16=aabb}, + .stride = AttrSize, + .format = EF_R16G16_UNORM, + .rangeFormat = IGeometryBase::EAABBFormat::U16_NORM + }, + .src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)} + }); + for (auto uv_i = 0u; uv_i < icosphere.getTexCoordCount(); uv_i++) + { + const auto texCoords = icosphere.getTexCoords(); + const auto f32_uv = float32_t2{ texCoords[2 * uv_i], texCoords[(2 * uv_i) + 1] }; + uvs[uv_i] = packUnorm2x16(f32_uv); + } + } + } + + CPolygonGeometryManipulator::recomputeContentHashes(retval.get()); + return retval; } -#endif } // end namespace nbl::asset