diff --git a/integration/omm-sdk-nvrhi/omm-sdk-nvrhi.cpp b/integration/omm-sdk-nvrhi/omm-sdk-nvrhi.cpp index 9c345ac..fc12c30 100644 --- a/integration/omm-sdk-nvrhi/omm-sdk-nvrhi.cpp +++ b/integration/omm-sdk-nvrhi/omm-sdk-nvrhi.cpp @@ -649,8 +649,8 @@ omm::Gpu::DispatchConfigDesc GpuBakeNvrhiImpl::GetConfig(const GpuBakeNvrhi::Inp config.indexCount = (uint32_t)params.numIndices; config.globalFormat = params.format == nvrhi::rt::OpacityMicromapFormat::OC1_2_State ? Format::OC1_2_State : Format::OC1_4_State; config.maxScratchMemorySize = params.minimalMemoryMode ? Gpu::ScratchMemoryBudget::MB_4 : Gpu::ScratchMemoryBudget::MB_256; + config.maxOutOmmArraySize = params.maxOutOmmArraySize; config.maxSubdivisionLevel = params.maxSubdivisionLevel; - config.globalSubdivisionLevel = params.maxSubdivisionLevel; config.dynamicSubdivisionScale = params.dynamicSubdivisionScale; return config; } diff --git a/integration/omm-sdk-nvrhi/omm-sdk-nvrhi.h b/integration/omm-sdk-nvrhi/omm-sdk-nvrhi.h index 094faf1..912e822 100644 --- a/integration/omm-sdk-nvrhi/omm-sdk-nvrhi.h +++ b/integration/omm-sdk-nvrhi/omm-sdk-nvrhi.h @@ -56,6 +56,7 @@ namespace omm uint32_t numIndices = 0; uint32_t maxSubdivisionLevel = 0; + uint32_t maxOutOmmArraySize = 0xFFFFFFFF; nvrhi::rt::OpacityMicromapFormat format = nvrhi::rt::OpacityMicromapFormat::OC1_4_State; float dynamicSubdivisionScale = 0.5f; bool minimalMemoryMode = false; diff --git a/omm-sdk/include/omm.h b/omm-sdk/include/omm.h index 2093710..db0eb5e 100644 --- a/omm-sdk/include/omm.h +++ b/omm-sdk/include/omm.h @@ -16,7 +16,7 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. #define OMM_VERSION_MAJOR 0 #define OMM_VERSION_MINOR 9 -#define OMM_VERSION_BUILD 1 +#define OMM_VERSION_BUILD 2 #if defined(_MSC_VER) #define OMM_CALL __fastcall @@ -788,7 +788,7 @@ typedef struct ommGpuPreDispatchInfo uint32_t outOmmIndexCount; // Min required size of OUT_OMM_ARRAY_DATA. GetBakeInfo returns most conservative estimation while less conservative number // can be obtained via BakePrepass - size_t outOmmArraySizeInBytes; + uint32_t outOmmArraySizeInBytes; // Min required size of OUT_OMM_DESC_ARRAY. GetBakeInfo returns most conservative estimation while less conservative number // can be obtained via BakePrepass uint32_t outOmmDescSizeInBytes; @@ -848,12 +848,15 @@ typedef struct ommGpuDispatchConfigDesc float dynamicSubdivisionScale; // The global Format. May be overriden by the per-triangle config. ommFormat globalFormat; - // Micro triangle count is 4^N, where N is the subdivision level. Subdivision level must be in range [0, - // MaxSubdivisionLevel]. The global subdivisionLevel. May be overriden by the per-triangle subdivision level setting. The - // subdivision level to allow in dynamic mode and value is used to allocate appropriate scratch memory. - uint8_t globalSubdivisionLevel; uint8_t maxSubdivisionLevel; - uint8_t enableSubdivisionLevelBuffer; + ommBool enableSubdivisionLevelBuffer; + // The SDK will try to limit the omm array size of PreDispatchInfo::outOmmArraySizeInBytes and + // PostBakeInfo::outOmmArraySizeInBytes. + // Currently a greedy algorithm is implemented with a first come-first serve order. + // The SDK may (or may not) apply more sophisticated heuristics in the future. + // If no memory is available to allocate an OMM Array Block the state will default to Unknown Opaque (ignoring any bake + // flags do disable special indices). + uint32_t maxOutOmmArraySize; // Target scratch memory budget, The SDK will try adjust the sum of the transient pool buffers to match this value. Higher // budget more efficiently executes the baking operation. May return INSUFFICIENT_SCRATCH_MEMORY if set too low. ommGpuScratchMemoryBudget maxScratchMemorySize; @@ -877,9 +880,9 @@ inline ommGpuDispatchConfigDesc ommGpuDispatchConfigDescDefault() v.alphaCutoff = 0.5f; v.dynamicSubdivisionScale = 2; v.globalFormat = ommFormat_OC1_4_State; - v.globalSubdivisionLevel = 4; v.maxSubdivisionLevel = 8; v.enableSubdivisionLevelBuffer = 0; + v.maxOutOmmArraySize = 0xFFFFFFFF; v.maxScratchMemorySize = ommGpuScratchMemoryBudget_Default; return v; } diff --git a/omm-sdk/include/omm.hpp b/omm-sdk/include/omm.hpp index 4b544a6..14429e9 100644 --- a/omm-sdk/include/omm.hpp +++ b/omm-sdk/include/omm.hpp @@ -683,7 +683,7 @@ namespace omm uint32_t outOmmIndexCount = 0xFFFFFFFF; // Min required size of OUT_OMM_ARRAY_DATA. GetBakeInfo returns most conservative estimation while less conservative number // can be obtained via BakePrepass - size_t outOmmArraySizeInBytes = 0xFFFFFFFF; + uint32_t outOmmArraySizeInBytes = 0xFFFFFFFF; // Min required size of OUT_OMM_DESC_ARRAY. GetBakeInfo returns most conservative estimation while less conservative number // can be obtained via BakePrepass uint32_t outOmmDescSizeInBytes = 0xFFFFFFFF; @@ -728,12 +728,15 @@ namespace omm float dynamicSubdivisionScale = 2; // The global Format. May be overriden by the per-triangle config. Format globalFormat = Format::OC1_4_State; - // Micro triangle count is 4^N, where N is the subdivision level. Subdivision level must be in range [0, - // MaxSubdivisionLevel]. The global subdivisionLevel. May be overriden by the per-triangle subdivision level setting. The - // subdivision level to allow in dynamic mode and value is used to allocate appropriate scratch memory. - uint8_t globalSubdivisionLevel = 4; uint8_t maxSubdivisionLevel = 8; - uint8_t enableSubdivisionLevelBuffer = 0; + bool enableSubdivisionLevelBuffer = false; + // The SDK will try to limit the omm array size of PreDispatchInfo::outOmmArraySizeInBytes and + // PostBakeInfo::outOmmArraySizeInBytes. + // Currently a greedy algorithm is implemented with a first come-first serve order. + // The SDK may (or may not) apply more sophisticated heuristics in the future. + // If no memory is available to allocate an OMM Array Block the state will default to Unknown Opaque (ignoring any bake + // flags do disable special indices). + uint32_t maxOutOmmArraySize = 0xFFFFFFFF; // Target scratch memory budget, The SDK will try adjust the sum of the transient pool buffers to match this value. Higher // budget more efficiently executes the baking operation. May return INSUFFICIENT_SCRATCH_MEMORY if set too low. ScratchMemoryBudget maxScratchMemorySize = ScratchMemoryBudget::Default; diff --git a/omm-sdk/scripts/omm.json b/omm-sdk/scripts/omm.json index a97cb36..1b16b53 100644 --- a/omm-sdk/scripts/omm.json +++ b/omm-sdk/scripts/omm.json @@ -1824,21 +1824,21 @@ }, "comment": "The global Format. May be overriden by the per-triangle config." }, - { - "type": "uint8_t", - "name": "globalSubdivisionLevel", - "value": "4", - "comment": "Micro triangle count is 4^N, where N is the subdivision level. Subdivision level must be in range [0, MaxSubdivisionLevel]. The global subdivisionLevel. May be overriden by the per-triangle subdivision level setting. The subdivision level to allow in dynamic mode and value is used to allocate appropriate scratch memory." - }, { "type": "uint8_t", "name": "maxSubdivisionLevel", "value": "8" }, { - "type": "uint8_t", + "type": "bool", "name": "enableSubdivisionLevelBuffer", - "value": "0" + "value": "false" + }, + { + "type": "uint32_t", + "name": "maxOutOmmArraySize", + "value": "0xFFFFFFFF", + "comment": "The SDK will try to limit the omm array size of PreDispatchInfo::outOmmArraySizeInBytes and PostBakeInfo::outOmmArraySizeInBytes.\nCurrently a greedy algorithm is implemented with a first come-first serve order.\nThe SDK may (or may not) apply more sophisticated heuristics in the future.\nIf no memory is available to allocate an OMM Array Block the state will default to Unknown Opaque (ignoring any bake flags do disable special indices)." }, { "type": "ScratchMemoryBudget", @@ -1915,7 +1915,7 @@ "value": "0xFFFFFFFF" }, { - "type": "size_t", + "type": "uint32_t", "name": "outOmmArraySizeInBytes", "value": "0xFFFFFFFF", "comment": "Min required size of OUT_OMM_ARRAY_DATA. GetBakeInfo returns most conservative estimation while less conservative number can be obtained via BakePrepass" diff --git a/omm-sdk/scripts/omm_header_c.txt b/omm-sdk/scripts/omm_header_c.txt index 7f67aef..34d4845 100644 --- a/omm-sdk/scripts/omm_header_c.txt +++ b/omm-sdk/scripts/omm_header_c.txt @@ -16,7 +16,7 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. #define OMM_VERSION_MAJOR 0 #define OMM_VERSION_MINOR 9 -#define OMM_VERSION_BUILD 1 +#define OMM_VERSION_BUILD 2 #if defined(_MSC_VER) #define OMM_CALL __fastcall diff --git a/omm-sdk/shaders/omm_desc_patch.cs.hlsl b/omm-sdk/shaders/omm_desc_patch.cs.hlsl index d19add2..f3da7b1 100644 --- a/omm-sdk/shaders/omm_desc_patch.cs.hlsl +++ b/omm-sdk/shaders/omm_desc_patch.cs.hlsl @@ -51,7 +51,7 @@ uint GetSourcePrimitiveIndex(uint primitiveIndex) if (primitiveIndexOrHashTableEntryIndex < -4) { - const uint hashTableEntryIndex = -(primitiveIndexOrHashTableEntryIndex + 4); + const uint hashTableEntryIndex = -(primitiveIndexOrHashTableEntryIndex + 5); const uint primitiveIndexRef = OMM_SUBRESOURCE_LOAD(HashTableBuffer, 8 * hashTableEntryIndex + 4); // [hash|primitiveIndex] return primitiveIndexRef; } diff --git a/omm-sdk/shaders/omm_global_cb.hlsli b/omm-sdk/shaders/omm_global_cb.hlsli index 8f110f4..726bd0b 100644 --- a/omm-sdk/shaders/omm_global_cb.hlsli +++ b/omm-sdk/shaders/omm_global_cb.hlsli @@ -7,14 +7,12 @@ and any modifications thereto. Any use, reproduction, disclosure or distribution of this software and related documentation without an express license agreement from NVIDIA CORPORATION is strictly prohibited. */ - - #define OMM_DECLARE_GLOBAL_CONSTANT_BUFFER \ OMM_CONSTANTS_START(GlobalConstants) \ OMM_CONSTANT(uint, IndexCount) \ OMM_CONSTANT(uint, PrimitiveCount) \ OMM_CONSTANT(uint, MaxBatchCount) \ - OMM_CONSTANT(uint, GlobalSubdivisionLevel) \ + OMM_CONSTANT(uint, MaxOutOmmArraySize) \ \ OMM_CONSTANT(uint, IsOmmIndexFormat16bit) \ OMM_CONSTANT(uint, EnableSpecialIndices) \ @@ -45,7 +43,6 @@ OMM_CONSTANTS_START(GlobalConstants) \ \ OMM_CONSTANT(float2, TexSize) \ OMM_CONSTANT(float2, InvTexSize) \ - \ /* ---- Buffer offsets go here */\ \ OMM_CONSTANT(uint, IEBakeBufferOffset) \ diff --git a/omm-sdk/shaders/omm_post_build_info.cs.hlsl b/omm-sdk/shaders/omm_post_build_info.cs.hlsl index 7011ca2..b63290c 100644 --- a/omm-sdk/shaders/omm_post_build_info.cs.hlsl +++ b/omm-sdk/shaders/omm_post_build_info.cs.hlsl @@ -31,6 +31,6 @@ void main(uint3 tid : SV_DispatchThreadID) const uint ommDescCount = OMM_SUBRESOURCE_LOAD(OmmDescAllocatorCounterBuffer, 0); const uint ommDescByteSize = ommDescCount * 8; - u_postBuildInfo.Store(0, ommArrayByteSize); + u_postBuildInfo.Store(0, min(ommArrayByteSize, g_GlobalConstants.MaxOutOmmArraySize)); u_postBuildInfo.Store(4, ommDescByteSize); } \ No newline at end of file diff --git a/omm-sdk/shaders/omm_work_setup_common.hlsli b/omm-sdk/shaders/omm_work_setup_common.hlsli index 4526d07..ebe3c3b 100644 --- a/omm-sdk/shaders/omm_work_setup_common.hlsli +++ b/omm-sdk/shaders/omm_work_setup_common.hlsli @@ -203,19 +203,25 @@ uint GetSubdivisionLevel(TexCoords texCoords) } else { - return g_GlobalConstants.GlobalSubdivisionLevel; + return g_GlobalConstants.MaxSubdivisionLevel; } } int GetOmmDescOffset(ByteAddressBuffer ommIndexBuffer, uint primitiveIndex) { - // TODO: support 16-bit indices. if (g_GlobalConstants.IsOmmIndexFormat16bit) { const uint dwOffset = primitiveIndex.x >> 1u; - const uint shift = (primitiveIndex.x & 1u) << 4u; // 0 or 16 - const uint val = ommIndexBuffer.Load(4 * dwOffset); - return (val >> shift) & 0xFFFF; + const uint shift = (primitiveIndex.x & 1u) << 4u; // 0 or 16 + const uint raw = ommIndexBuffer.Load(4 * dwOffset); + const uint raw16 = (raw >> shift) & 0xFFFFu; + + if (raw16 > 0xFFFB) // e.g special index + { + return (raw16 - 0xFFFF) - 1; // -1, -2, -3 or -4 + } + + return raw16; } else { diff --git a/omm-sdk/shaders/omm_work_setup_cs.cs.hlsl b/omm-sdk/shaders/omm_work_setup_cs.cs.hlsl index be73358..537910b 100644 --- a/omm-sdk/shaders/omm_work_setup_cs.cs.hlsl +++ b/omm-sdk/shaders/omm_work_setup_cs.cs.hlsl @@ -39,7 +39,8 @@ void main(uint3 tid : SV_DispatchThreadID) uint hashTableEntryIndex; hashTable::Result result = FindOrInsertOMMEntry(texCoords, subdivisionLevel, hashTableEntryIndex); - uint vmDescOffset = 0; + int vmDescOffset = (int)SpecialIndex::FullyUnknownOpaque; + if (result == hashTable::Result::Null || result == hashTable::Result::Inserted || result == hashTable::Result::ReachedMaxAttemptCount) @@ -66,9 +67,7 @@ void main(uint3 tid : SV_DispatchThreadID) OMM_SUBRESOURCE_INTERLOCKEDADD(OmmArrayAllocatorCounterBuffer, 0, vmDataByteSize, vmArrayOffset); } - const uint kMaxVmArrayBudget = 0xFFFFFFFF; - - if ((vmArrayOffset + vmDataByteSize) < kMaxVmArrayBudget) + if ((vmArrayOffset + vmDataByteSize) <= g_GlobalConstants.MaxOutOmmArraySize) { // Allocate new VM-desc for the vmArrayOffset { @@ -147,7 +146,7 @@ void main(uint3 tid : SV_DispatchThreadID) else // if (status == hashTable::Result::Found { // Store the hash-table offset and patch up the pointers later. - vmDescOffset = (uint)(-hashTableEntryIndex - 4); + vmDescOffset = (uint)(-hashTableEntryIndex - 5); } OMM_SUBRESOURCE_STORE(TempOmmIndexBuffer, 4 * primitiveIndex, vmDescOffset); diff --git a/omm-sdk/shaders/omm_work_setup_gfx.cs.hlsl b/omm-sdk/shaders/omm_work_setup_gfx.cs.hlsl index ee95799..bb77c41 100644 --- a/omm-sdk/shaders/omm_work_setup_gfx.cs.hlsl +++ b/omm-sdk/shaders/omm_work_setup_gfx.cs.hlsl @@ -40,7 +40,8 @@ void main(uint3 tid : SV_DispatchThreadID) uint hashTableEntryIndex; hashTable::Result result = FindOrInsertOMMEntry(texCoords, subdivisionLevel, hashTableEntryIndex); - uint vmDescOffset = 0; + int vmDescOffset = (int)SpecialIndex::FullyUnknownOpaque; + if (result == hashTable::Result::Null || result == hashTable::Result::Inserted || result == hashTable::Result::ReachedMaxAttemptCount) @@ -56,17 +57,19 @@ void main(uint3 tid : SV_DispatchThreadID) // Allocate new VM-array offset & vm-index uint vmArrayOffset = 0; + uint vmDataByteSize = 0; { const uint vmDataBitSize = GetOMMFormatBitCount(ommFormat) * numMicroTriangles; // spec allows 1 byte alignment but we require 4 byte to make sure UAV writes // are DW aligned. - const uint vmDataByteSize = max(vmDataBitSize >> 3u, 4u); + vmDataByteSize = max(vmDataBitSize >> 3u, 4u); OMM_SUBRESOURCE_INTERLOCKEDADD(OmmArrayAllocatorCounterBuffer, 0, vmDataByteSize, vmArrayOffset); } // Allocate new VM-desc for the vmArrayOffset + if ((vmArrayOffset + vmDataByteSize) <= g_GlobalConstants.MaxOutOmmArraySize) { // The rasterItemOffset is the same things as the vmDescOffset, OMM_SUBRESOURCE_INTERLOCKEDADD(OmmDescAllocatorCounterBuffer, 0, 1, vmDescOffset); @@ -79,84 +82,84 @@ void main(uint3 tid : SV_DispatchThreadID) u_ommDescArrayBuffer.Store(vmDescOffset * 8, vmArrayOffset); u_ommDescArrayBuffer.Store(vmDescOffset * 8 + 4, vmDescData); } - } - // Increase UsageDesc info struct, - // resolve uniform vm's later by usage subtraction. - { - const uint strideInBytes = 8; // sizeof(VisibilityMapUsageDesc), [count32, format16, level16] - const uint index = (kOMMFormatNum * subdivisionLevel + ((uint)ommFormat - 1)); - const uint offset = strideInBytes * index; + // Increase UsageDesc info struct, + // resolve uniform vm's later by usage subtraction. + { + const uint strideInBytes = 8; // sizeof(VisibilityMapUsageDesc), [count32, format16, level16] + const uint index = (kOMMFormatNum * subdivisionLevel + ((uint)ommFormat - 1)); + const uint offset = strideInBytes * index; - InterlockedAdd(u_ommDescArrayHistogramBuffer, offset, 1); - } + InterlockedAdd(u_ommDescArrayHistogramBuffer, offset, 1); + } - /// ---- Setup baking parameters ----- + /// ---- Setup baking parameters ----- - // Allocate a slot in the raster items array. - uint bakeResultGlobalOffset = 0; - uint bakeResultBatchIndex = 0; - { - const uint offset = 4 * subdivisionLevel; + // Allocate a slot in the raster items array. + uint bakeResultGlobalOffset = 0; + uint bakeResultBatchIndex = 0; + { + const uint offset = 4 * subdivisionLevel; - OMM_SUBRESOURCE_INTERLOCKEDADD(BakeResultBufferCounterBuffer, offset, 1, bakeResultGlobalOffset); + OMM_SUBRESOURCE_INTERLOCKEDADD(BakeResultBufferCounterBuffer, offset, 1, bakeResultGlobalOffset); - const uint maxItemsPerBatch = GetMaxItemsPerBatch(subdivisionLevel); - bakeResultBatchIndex = bakeResultGlobalOffset / maxItemsPerBatch; - } + const uint maxItemsPerBatch = GetMaxItemsPerBatch(subdivisionLevel); + bakeResultBatchIndex = bakeResultGlobalOffset / maxItemsPerBatch; + } - // Store the VM-that will be procesed by the rasterizer. - { - const uint ommFormatAndPrimitiveIndex = (primitiveIndex) | ((uint)ommFormat << 30); + // Store the VM-that will be procesed by the rasterizer. + { + const uint ommFormatAndPrimitiveIndex = (primitiveIndex) | ((uint)ommFormat << 30); - const uint offset = 8 * (bakeResultGlobalOffset + subdivisionLevel * g_GlobalConstants.PrimitiveCount); + const uint offset = 8 * (bakeResultGlobalOffset + subdivisionLevel * g_GlobalConstants.PrimitiveCount); - OMM_SUBRESOURCE_STORE(RasterItemsBuffer, offset, vmArrayOffset); - OMM_SUBRESOURCE_STORE(RasterItemsBuffer, offset + 4, ommFormatAndPrimitiveIndex); - } + OMM_SUBRESOURCE_STORE(RasterItemsBuffer, offset, vmArrayOffset); + OMM_SUBRESOURCE_STORE(RasterItemsBuffer, offset + 4, ommFormatAndPrimitiveIndex); + } - // Increment the drawcall count for the current batch & subdivisiolevel. - { - const uint strideInBytes = 20; // arg count of DrawIndexedInstanced - const uint InstanceCountOffsetInBytes = 4; // offset of InstanceCount in DrawIndexedInstanced - const uint offset = InstanceCountOffsetInBytes + strideInBytes * (subdivisionLevel * g_GlobalConstants.MaxBatchCount + bakeResultBatchIndex); + // Increment the drawcall count for the current batch & subdivisiolevel. + { + const uint strideInBytes = 20; // arg count of DrawIndexedInstanced + const uint InstanceCountOffsetInBytes = 4; // offset of InstanceCount in DrawIndexedInstanced + const uint offset = InstanceCountOffsetInBytes + strideInBytes * (subdivisionLevel * g_GlobalConstants.MaxBatchCount + bakeResultBatchIndex); - OMM_SUBRESOURCE_INTERLOCKEDADD(IEBakeBuffer, offset, 1, bakeResultGlobalOffset); - } + OMM_SUBRESOURCE_INTERLOCKEDADD(IEBakeBuffer, offset, 1, bakeResultGlobalOffset); + } - // Increment the thread count for the current batch & subdivisiolevel. - uint threadGroupCountX = 0; - { - // This is the most number of micro-triangles that will be processed per thread - // 32 allows non-atomic writes to the vmArrayBuffer for 2 and 4-state vm formats. - const uint kMaxNumMicroTrianglePerThread = 32; - const uint numMicroTrianglePerThread = min(kMaxNumMicroTrianglePerThread, numMicroTriangles); - const uint numThreadsNeeded = max(numMicroTriangles / numMicroTrianglePerThread, 1u); + // Increment the thread count for the current batch & subdivisiolevel. + uint threadGroupCountX = 0; + { + // This is the most number of micro-triangles that will be processed per thread + // 32 allows non-atomic writes to the vmArrayBuffer for 2 and 4-state vm formats. + const uint kMaxNumMicroTrianglePerThread = 32; + const uint numMicroTrianglePerThread = min(kMaxNumMicroTrianglePerThread, numMicroTriangles); + const uint numThreadsNeeded = max(numMicroTriangles / numMicroTrianglePerThread, 1u); - const uint strideInBytes = 4; // sizeof(uint32_t) - const uint offset = strideInBytes * (subdivisionLevel * g_GlobalConstants.MaxBatchCount + bakeResultBatchIndex); + const uint strideInBytes = 4; // sizeof(uint32_t) + const uint offset = strideInBytes * (subdivisionLevel * g_GlobalConstants.MaxBatchCount + bakeResultBatchIndex); - uint oldGlobalThreadCountX; - OMM_SUBRESOURCE_INTERLOCKEDADD(DispatchIndirectThreadCountBuffer, offset, numThreadsNeeded, oldGlobalThreadCountX); - uint newGlobalThreadCountX = numThreadsNeeded + oldGlobalThreadCountX; + uint oldGlobalThreadCountX; + OMM_SUBRESOURCE_INTERLOCKEDADD(DispatchIndirectThreadCountBuffer, offset, numThreadsNeeded, oldGlobalThreadCountX); + uint newGlobalThreadCountX = numThreadsNeeded + oldGlobalThreadCountX; - threadGroupCountX = (newGlobalThreadCountX + 127) / 128; - } + threadGroupCountX = (newGlobalThreadCountX + 127) / 128; + } - // Increment the thread GROUP count for the current batch & subdivisiolevel. - { - const uint strideInBytes = 12; // arg count of Dispatch - const uint ThreadCountXOffsetInBytes = 0; // offset of ThreadCountX in Dispatch - const uint offset = ThreadCountXOffsetInBytes + strideInBytes * (subdivisionLevel * g_GlobalConstants.MaxBatchCount + bakeResultBatchIndex); + // Increment the thread GROUP count for the current batch & subdivisiolevel. + { + const uint strideInBytes = 12; // arg count of Dispatch + const uint ThreadCountXOffsetInBytes = 0; // offset of ThreadCountX in Dispatch + const uint offset = ThreadCountXOffsetInBytes + strideInBytes * (subdivisionLevel * g_GlobalConstants.MaxBatchCount + bakeResultBatchIndex); - uint _dummy; - OMM_SUBRESOURCE_INTERLOCKEDMAX(IECompressCsBuffer, offset, threadGroupCountX, _dummy); + uint _dummy; + OMM_SUBRESOURCE_INTERLOCKEDMAX(IECompressCsBuffer, offset, threadGroupCountX, _dummy); + } } } else // if (status == hashTable::Result::Found { // Store the hash-table offset and patch up the pointers later. - vmDescOffset = (uint)(-hashTableEntryIndex - 4); + vmDescOffset = (uint)(-hashTableEntryIndex - 5); } OMM_SUBRESOURCE_STORE(TempOmmIndexBuffer, 4 * primitiveIndex, vmDescOffset); diff --git a/omm-sdk/src/bake_gpu_impl.cpp b/omm-sdk/src/bake_gpu_impl.cpp index ab26b48..6bd5997 100644 --- a/omm-sdk/src/bake_gpu_impl.cpp +++ b/omm-sdk/src/bake_gpu_impl.cpp @@ -250,7 +250,8 @@ ommResult PipelineImpl::Validate(const ommGpuPipelineConfigDesc& config) ommResult PipelineImpl::Validate(const ommGpuDispatchConfigDesc& config) const { const uint32_t MaxSubdivLevelAPI = kMaxSubdivLevel; - const uint32_t MaxSubdivLevel = std::min(MaxSubdivLevelAPI, OmmStaticBuffersImpl::kMaxSubdivisionLevelNum); + const uint32_t MaxSubdivLevelGfx = std::min(MaxSubdivLevelAPI, OmmStaticBuffersImpl::kMaxSubdivisionLevelNum); + const uint32_t MaxSubdivLevelCS = 12; const bool computeOnly = (((uint32_t)config.bakeFlags & (uint32_t)ommGpuBakeFlags_ComputeOnly) == (uint32_t)ommGpuBakeFlags_ComputeOnly); const bool doSetup = (((uint32_t)config.bakeFlags & (uint32_t)ommGpuBakeFlags_PerformSetup) == (uint32_t)ommGpuBakeFlags_PerformSetup); const bool doBake = (((uint32_t)config.bakeFlags & (uint32_t)ommGpuBakeFlags_PerformBake) == (uint32_t)ommGpuBakeFlags_PerformBake); @@ -259,9 +260,9 @@ ommResult PipelineImpl::Validate(const ommGpuDispatchConfigDesc& config) const return ommResult_INVALID_ARGUMENT; if (config.indexCount % 3 != 0) return ommResult_INVALID_ARGUMENT; - if (!computeOnly && config.maxSubdivisionLevel > MaxSubdivLevel) + if (!computeOnly && config.maxSubdivisionLevel > MaxSubdivLevelGfx) return ommResult_INVALID_ARGUMENT; - if (config.maxSubdivisionLevel < config.globalSubdivisionLevel) + if (computeOnly && config.maxSubdivisionLevel > MaxSubdivLevelCS) return ommResult_INVALID_ARGUMENT; if (config.enableSubdivisionLevelBuffer) return ommResult_NOT_IMPLEMENTED; @@ -605,7 +606,7 @@ ommResult PipelineImpl::GetPreDispatchInfo(const ommGpuDispatchConfigDesc& confi const size_t maxNumMicroTris = bird::GetNumMicroTriangles(config.maxSubdivisionLevel); const size_t bitsPerState = size_t(config.globalFormat); const size_t vmArraySizeInBits = size_t(primitiveCount) * std::max(maxNumMicroTris * bitsPerState, 32u); - ommIndexFormat outOmmIndexBufferFormat = primitiveCount < std::numeric_limits::max() - kNumSpecialIndices ? ommIndexFormat_I16_UINT : ommIndexFormat_I32_UINT; + ommIndexFormat outOmmIndexBufferFormat = primitiveCount < std::numeric_limits::max() - kNumSpecialIndices ? ommIndexFormat_I16_UINT : ommIndexFormat_I32_UINT; if (force32BitIndices) outOmmIndexBufferFormat = ommIndexFormat_I32_UINT; @@ -615,7 +616,7 @@ ommResult PipelineImpl::GetPreDispatchInfo(const ommGpuDispatchConfigDesc& confi const size_t outMaxTheoreticalOmmArraySizeInBytes = math::Align(math::DivUp(vmArraySizeInBits, 8u), 4u); - const size_t outOmmArraySizeInBytes = outMaxTheoreticalOmmArraySizeInBytes; + const size_t outOmmArraySizeInBytes = std::min(outMaxTheoreticalOmmArraySizeInBytes, config.maxOutOmmArraySize); const size_t outOmmDescSizeInBytes = primitiveCount * sizeof(uint64_t); const size_t outOmmIndexBufferSizeInBytes = math::Align(primitiveCount * indexBufferFormatSize, 4u); const size_t outOmmHistogramSizeInBytes = (size_t(config.maxSubdivisionLevel) + 1) * 2 * sizeof(uint64_t); @@ -653,7 +654,7 @@ ommResult PipelineImpl::GetPreDispatchInfo(const ommGpuDispatchConfigDesc& confi outPreBuildInfo->outOmmIndexCount = primitiveCount; RETURN_STATUS_IF_FAILED(SafeCast(outPreBuildInfo->outOmmArrayHistogramSizeInBytes, outOmmHistogramSizeInBytes)); - outPreBuildInfo->outOmmArraySizeInBytes = outOmmArraySizeInBytes; + RETURN_STATUS_IF_FAILED(SafeCast(outPreBuildInfo->outOmmArraySizeInBytes, outOmmArraySizeInBytes)); RETURN_STATUS_IF_FAILED(SafeCast(outPreBuildInfo->outOmmDescSizeInBytes, outOmmDescSizeInBytes)); outPreBuildInfo->outOmmIndexBufferFormat = outOmmIndexBufferFormat; RETURN_STATUS_IF_FAILED(SafeCast(outPreBuildInfo->outOmmIndexBufferSizeInBytes, outOmmIndexBufferSizeInBytes)); @@ -710,7 +711,7 @@ ommResult PipelineImpl::InitGlobalConstants(const ommGpuDispatchConfigDesc& conf cbuffer.IndexCount = config.indexCount; cbuffer.PrimitiveCount = primitiveCount; cbuffer.MaxBatchCount = info.MaxBatchCount; - cbuffer.GlobalSubdivisionLevel = config.globalSubdivisionLevel; + cbuffer.MaxOutOmmArraySize = preBuildInfo.outOmmArraySizeInBytes; cbuffer.IsOmmIndexFormat16bit = IsOmmIndexFormat16bit; cbuffer.DoSetup = doSetup; cbuffer.SamplerIndex = m_pipelineBuilder.GetStaticSamplerIndex(config.runtimeSamplerDesc); diff --git a/omm-sdk/src/version.h b/omm-sdk/src/version.h index 0741cf2..fbf9816 100644 --- a/omm-sdk/src/version.h +++ b/omm-sdk/src/version.h @@ -13,7 +13,7 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. #define VERSION_MAJOR 0 #define VERSION_MINOR 9 -#define VERSION_BUILD 1 +#define VERSION_BUILD 2 #define VERSION_REVISION 0 #define VERSION_STRING STR(VERSION_MAJOR.VERSION_MINOR.VERSION_BUILD.VERSION_REVISION) diff --git a/tests/test_omm_bake_gpu.cpp b/tests/test_omm_bake_gpu.cpp index c830f57..083efc2 100644 --- a/tests/test_omm_bake_gpu.cpp +++ b/tests/test_omm_bake_gpu.cpp @@ -70,22 +70,39 @@ namespace { return 3; } - omm::Debug::Stats RunVmBake( - float alphaCutoff, - uint32_t subdivisionLevel, - int2 texSize, - uint32_t indexBufferSize, - uint32_t* triangleIndices, - float* texCoords, - uint32_t texCoordBufferSize, - std::function texCb, - omm::Format format = omm::Format::OC1_4_State) { + struct OmmBakeParams + { + float alphaCutoff = 0.5f; + uint32_t subdivisionLevel = 5; + int2 texSize = { 1024, 1024 }; + uint32_t indexBufferSize = 0; + uint32_t* triangleIndices = nullptr; + float* texCoords = nullptr; + uint32_t texCoordBufferSize = 0; + uint32_t maxOutOmmArraySize = 0xFFFFFFFF; + std::function texCb; + omm::Format format = omm::Format::OC1_4_State; + + static OmmBakeParams InitQuad() + { + OmmBakeParams p; + static uint32_t s_triangleIndices[] = { 0, 1, 2, 3, 1, 2 }; + static float s_texCoords[] = { 0.f, 0.f, 0.f, 1.f, 1.f, 0.f, 1.f, 1.f }; + p.triangleIndices = s_triangleIndices; + p.indexBufferSize = sizeof(s_triangleIndices); + p.texCoords = s_texCoords; + p.texCoordBufferSize = sizeof(s_texCoords); + return p; + } + }; + omm::Debug::Stats RunOmmBake(const OmmBakeParams& p) + { const uint32_t alphaTextureChannel = GetAlphaChannelIndex(); nvrhi::TextureDesc desc; - desc.width = texSize.x; - desc.height = texSize.y; + desc.width = p.texSize.x; + desc.height = p.texSize.y; desc.format = nvrhi::Format::RGBA32_FLOAT; nvrhi::StagingTextureHandle staging = m_device->createStagingTexture(desc, nvrhi::CpuAccessMode::Write); @@ -100,7 +117,7 @@ namespace { for (uint32_t i = 0; i < desc.width; ++i) { float* rgba = (float*)((uint8_t*)data + j * rowPitch + (4 * i) * sizeof(float)); - float val = texCb(i, j); + float val = p.texCb(i, j); rgba[0] = alphaTextureChannel == 0 ? val : 0.f; rgba[1] = alphaTextureChannel == 1 ? val : 0.f; rgba[2] = alphaTextureChannel == 2 ? val : 0.f; @@ -123,18 +140,18 @@ namespace { // Upload index buffer nvrhi::BufferHandle ib; { - ib = m_device->createBuffer({ .byteSize = indexBufferSize, .debugName = "ib", .format = nvrhi::Format::R32_UINT, .canHaveUAVs = true, .canHaveTypedViews = true, .canHaveRawViews = true }); + ib = m_device->createBuffer({ .byteSize = p.indexBufferSize, .debugName = "ib", .format = nvrhi::Format::R32_UINT, .canHaveUAVs = true, .canHaveTypedViews = true, .canHaveRawViews = true }); m_commandList->beginTrackingBufferState(ib, nvrhi::ResourceStates::Common); - m_commandList->writeBuffer(ib, triangleIndices, indexBufferSize); + m_commandList->writeBuffer(ib, p.triangleIndices, p.indexBufferSize); } // Upload texcoords nvrhi::BufferHandle vb; { - vb = m_device->createBuffer({ .byteSize = texCoordBufferSize, .debugName = "vb", .canHaveUAVs = true, .canHaveRawViews = true }); + vb = m_device->createBuffer({ .byteSize = p.texCoordBufferSize, .debugName = "vb", .canHaveUAVs = true, .canHaveRawViews = true }); m_commandList->beginTrackingBufferState(vb, nvrhi::ResourceStates::Common); - m_commandList->writeBuffer(vb, texCoords, texCoordBufferSize); + m_commandList->writeBuffer(vb, p.texCoords, p.texCoordBufferSize); } // Upload index buffer @@ -147,22 +164,25 @@ namespace { input.texCoordBuffer = vb; input.texCoordStrideInBytes = sizeof(float2); input.indexBuffer = ib; - input.numIndices = indexBufferSize / sizeof(uint32_t); - input.maxSubdivisionLevel = subdivisionLevel; - input.format = format == omm::Format::OC1_2_State ? nvrhi::rt::OpacityMicromapFormat::OC1_2_State : nvrhi::rt::OpacityMicromapFormat::OC1_4_State; + input.numIndices = p.indexBufferSize / sizeof(uint32_t); + input.maxSubdivisionLevel = p.subdivisionLevel; + input.format = p.format == omm::Format::OC1_2_State ? nvrhi::rt::OpacityMicromapFormat::OC1_2_State : nvrhi::rt::OpacityMicromapFormat::OC1_4_State; input.dynamicSubdivisionScale = 0.f; input.enableSpecialIndices = EnableSpecialIndices(); input.force32BitIndices = Force32BitIndices(); input.enableTexCoordDeduplication = EnableTexCoordDeduplication(); input.computeOnly = ComputeOnly(); + input.maxOutOmmArraySize = p.maxOutOmmArraySize; // Readback. - auto ReadBuffer = [this](nvrhi::BufferHandle buffer, size_t size = 0)->std::vector + auto ReadBuffer = [this](nvrhi::BufferHandle buffer, size_t size = 0xFFFFFFFF)->std::vector { + if (size == 0) + return {}; std::vector data; void* pData = m_device->mapBuffer(buffer, nvrhi::CpuAccessMode::Read); assert(pData); - size_t byteSize = size == 0 ? buffer->getDesc().byteSize : size; + size_t byteSize = size == 0xFFFFFFFF ? buffer->getDesc().byteSize : size; assert(size <= buffer->getDesc().byteSize); data.resize(byteSize); memcpy(data.data(), pData, byteSize); @@ -224,7 +244,7 @@ namespace { EXPECT_LE(postBuildInfo.ommArrayBufferSize, info.ommArrayBufferSize); EXPECT_LE(postBuildInfo.ommDescBufferSize, info.ommDescBufferSize); - res.ommArrayBuffer = m_device->createBuffer({ .byteSize = postBuildInfo.ommArrayBufferSize, .debugName = "omArrayBuffer", .canHaveUAVs = true, .canHaveRawViews = true }); + res.ommArrayBuffer = m_device->createBuffer({ .byteSize = std::max(postBuildInfo.ommArrayBufferSize, 4u), .debugName = "omArrayBuffer", .canHaveUAVs = true, .canHaveRawViews = true}); m_commandList->open(); @@ -256,10 +276,10 @@ namespace { ommIndexFormat = info.ommIndexFormat; ommIndexCount = info.ommIndexCount; - res.ommArrayBuffer = m_device->createBuffer({ .byteSize = info.ommArrayBufferSize, .debugName = "omArrayBuffer", .canHaveUAVs = true, .canHaveRawViews = true }); - res.ommDescBuffer = m_device->createBuffer({ .byteSize = info.ommDescBufferSize, .debugName = "omDescBuffer", .canHaveUAVs = true, .canHaveRawViews = true }); - res.ommIndexBuffer = m_device->createBuffer({ .byteSize = info.ommIndexBufferSize, .debugName = "omIndexBuffer", .canHaveUAVs = true, .canHaveRawViews = true }); - res.ommDescArrayHistogramBuffer = m_device->createBuffer({ .byteSize = info.ommDescArrayHistogramSize , .debugName = "omUsageDescBuffer" , .canHaveUAVs = true, .canHaveRawViews = true }); + res.ommArrayBuffer = m_device->createBuffer({ .byteSize = std::max(info.ommArrayBufferSize, 4u), .debugName = "ommArrayBuffer", .canHaveUAVs = true, .canHaveRawViews = true }); + res.ommDescBuffer = m_device->createBuffer({ .byteSize = info.ommDescBufferSize, .debugName = "ommDescBuffer", .canHaveUAVs = true, .canHaveRawViews = true }); + res.ommIndexBuffer = m_device->createBuffer({ .byteSize = info.ommIndexBufferSize, .debugName = "ommIndexBuffer", .canHaveUAVs = true, .canHaveRawViews = true }); + res.ommDescArrayHistogramBuffer = m_device->createBuffer({ .byteSize = info.ommDescArrayHistogramSize , .debugName = "ommUsageDescBuffer" , .canHaveUAVs = true, .canHaveRawViews = true }); res.ommIndexHistogramBuffer = m_device->createBuffer({ .byteSize = info.ommIndexHistogramSize , .debugName = "ommIndexHistogramBuffer" , .canHaveUAVs = true, .canHaveRawViews = true }); res.ommPostBuildInfoBuffer = m_device->createBuffer({ .byteSize = info.ommPostBuildInfoBufferSize , .debugName = "ommPostBuildInfoBuffer" , .canHaveUAVs = true, .canHaveRawViews = true }); @@ -375,7 +395,31 @@ namespace { }; } - omm::Debug::Stats RunVmBake( + omm::Debug::Stats RunOmmBake( + float alphaCutoff, + uint32_t subdivisionLevel, + int2 texSize, + uint32_t indexBufferSize, + uint32_t* triangleIndices, + float* texCoords, + uint32_t texCoordBufferSize, + std::function texCb, + omm::Format format = omm::Format::OC1_4_State) + { + OmmBakeParams p; + p.alphaCutoff = alphaCutoff; + p.subdivisionLevel = subdivisionLevel; + p.texSize = texSize; + p.texCb = texCb; + p.format = format; + p.triangleIndices = triangleIndices; + p.indexBufferSize = indexBufferSize; + p.texCoords = texCoords; + p.texCoordBufferSize = texCoordBufferSize; + return RunOmmBake(p); + } + + omm::Debug::Stats RunOmmBake( float alphaCutoff, uint32_t subdivisionLevel, int2 texSize, @@ -383,7 +427,18 @@ namespace { omm::Format format = omm::Format::OC1_4_State) { uint32_t triangleIndices[] = { 0, 1, 2, 3, 1, 2 }; float texCoords[] = { 0.f, 0.f, 0.f, 1.f, 1.f, 0.f, 1.f, 1.f }; - return RunVmBake(alphaCutoff, subdivisionLevel, texSize, sizeof(triangleIndices), triangleIndices, texCoords, sizeof(texCoords), tex, format); + + OmmBakeParams p; + p.alphaCutoff = alphaCutoff; + p.subdivisionLevel = subdivisionLevel; + p.texSize = texSize; + p.texCb = tex; + p.format = format; + p.triangleIndices = triangleIndices; + p.indexBufferSize = sizeof(triangleIndices); + p.texCoords = texCoords; + p.texCoordBufferSize = sizeof(texCoords); + return RunOmmBake(p); } void ExpectEqual(const omm::Debug::Stats& stats, const omm::Debug::Stats& expectedStats) { @@ -408,7 +463,7 @@ namespace { uint32_t subdivisionLevel = 4; uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel); - omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { + omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { return 0.6f; }); @@ -427,7 +482,7 @@ namespace { uint32_t subdivisionLevel = 3; uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel); - omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { + omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { return 0.6f; }); @@ -446,7 +501,7 @@ namespace { uint32_t subdivisionLevel = 2; uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel); - omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { + omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { return 0.6f; }); @@ -465,7 +520,7 @@ namespace { uint32_t subdivisionLevel = 1; uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel); - omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { + omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { return 0.6f; }); @@ -484,7 +539,7 @@ namespace { uint32_t subdivisionLevel = 0; uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel); - omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { + omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { return 0.6f; }); @@ -503,7 +558,7 @@ namespace { uint32_t subdivisionLevel = 4; uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel); - omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { + omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { return 0.4f; }); @@ -522,7 +577,7 @@ namespace { uint32_t subdivisionLevel = 3; uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel); - omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { + omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { return 0.4f; }); @@ -541,7 +596,7 @@ namespace { uint32_t subdivisionLevel = 2; uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel); - omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { + omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { return 0.4f; }); @@ -560,7 +615,7 @@ namespace { uint32_t subdivisionLevel = 1; uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel); - omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { + omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { return 0.4f; }); @@ -579,7 +634,7 @@ namespace { uint32_t subdivisionLevel = 0; uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel); - omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { + omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { return 0.4f; }); @@ -598,7 +653,7 @@ namespace { uint32_t subdivisionLevel = 1; - omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { + omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { if ((i) % 8 != (j) % 8) return 0.f; else @@ -619,7 +674,7 @@ namespace { uint32_t subdivisionLevel = 1; - omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { + omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { if ((i) % 8 != (j) % 8) return 1.f; else @@ -641,7 +696,7 @@ namespace { uint32_t subdivisionLevel = 4; uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel); - omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { + omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { if (i == 0 && j == 0) return 0.6f; return 0.4f; @@ -664,12 +719,63 @@ namespace { } } + TEST_P(OMMBakeTestGPU, ZeroOmmArraySizeBudget) { + + uint32_t subdivisionLevel = 4; + uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel); + + OmmBakeParams p = OmmBakeParams::InitQuad(); + p.subdivisionLevel = 4; + p.maxOutOmmArraySize = 0; + p.texCb = [](int i, int j)->float { + if (i == 0 && j == 0) + return 0.6f; + return 0.4f; + }; + + omm::Debug::Stats stats = RunOmmBake(p); + + ExpectEqual(stats, { + .totalFullyUnknownOpaque = 2, + }); + } + + TEST_P(OMMBakeTestGPU, HalfOmmArraySizeBudget) { + + uint32_t subdivisionLevel = 4; + uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel); + + OmmBakeParams p = OmmBakeParams::InitQuad(); + p.subdivisionLevel = 4; + p.maxOutOmmArraySize = 64u; // 64 bytes covers a single subdivlvl 4 prim + p.texCb = [](int i, int j)->float { + return 0.4f; + }; + + omm::Debug::Stats stats = RunOmmBake(p); + + if (EnableSpecialIndices()) + { + ExpectEqual(stats, { + .totalFullyTransparent = 1, + .totalFullyUnknownOpaque = 1, // one triangle is "out of memory" + }); + } + else + { + ExpectEqual(stats, { + .totalTransparent = 256, + .totalFullyUnknownOpaque = 1 // one triangle is "out of memory" + }); + } + } + TEST_P(OMMBakeTestGPU, Circle) { uint32_t subdivisionLevel = 4; uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel); - omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { + omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { if (i == 0 && j == 0) return 0.6f; @@ -695,7 +801,7 @@ namespace { uint32_t subdivisionLevel = 4; uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel); - omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { + omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { if (i == 0 && j == 0) return 0.6f; @@ -719,7 +825,7 @@ namespace { uint32_t subdivisionLevel = 4; uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel); - omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { + omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { if (i == 0 && j == 0) return 0.6f; @@ -741,7 +847,7 @@ namespace { uint32_t subdivisionLevel = 4; uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel); - omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { + omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { if (i == 0 && j == 0) return 0.6f; @@ -762,7 +868,7 @@ namespace { uint32_t subdivisionLevel = 4; uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel); - omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { + omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { if (i == 0 && j == 0) return 0.6f; @@ -782,7 +888,7 @@ namespace { uint32_t subdivisionLevel = 5; uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel); - omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { + omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float { auto complexMultiply = [](float2 a, float2 b)->float2 { return float2(a.x * b.x - a.y * b.y, a.x * b.y + a.y * b.x); @@ -826,7 +932,7 @@ namespace { uint32_t triangleIndices[] = { 0, 1, 2, }; float texCoords[] = { 0.2f, 0.f, 0.1f, 0.8f, 0.9f, 0.1f }; - omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, sizeof(triangleIndices), triangleIndices, texCoords, sizeof(texCoords), [](int i, int j)->float { + omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, sizeof(triangleIndices), triangleIndices, texCoords, sizeof(texCoords), [](int i, int j)->float { auto complexMultiply = [](float2 a, float2 b)->float2 { return float2(a.x * b.x - a.y * b.y, a.x * b.y + a.y * b.x); @@ -882,7 +988,7 @@ namespace { uint32_t triangleIndices[] = { 0, 1, 2, }; float texCoords[] = { 0.2f, 0.f, 0.1f, 0.8f, 0.9f, 0.1f }; - omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, sizeof(triangleIndices), triangleIndices, texCoords, sizeof(texCoords), [](int i, int j)->float { + omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, sizeof(triangleIndices), triangleIndices, texCoords, sizeof(texCoords), [](int i, int j)->float { auto complexMultiply = [](float2 a, float2 b)->float2 { return float2(a.x * b.x - a.y * b.y, a.x * b.y + a.y * b.x); @@ -938,7 +1044,7 @@ namespace { uint32_t triangleIndices[] = { 0, 1, 2, }; float texCoords[] = { 0.2f, 0.f, 0.1f, 0.8f, 0.9f, 0.1f }; - omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, sizeof(triangleIndices), triangleIndices, texCoords, sizeof(texCoords), [](int i, int j)->float { + omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, sizeof(triangleIndices), triangleIndices, texCoords, sizeof(texCoords), [](int i, int j)->float { auto multiply = [](float2 x, float2 y)->float2 { return float2(x.x * y.x - x.y * y.y, x.x * y.y + x.y * y.x); @@ -996,7 +1102,7 @@ namespace { uint32_t triangleIndices[] = { 0, 1, 2, 3, 4, 5, }; float texCoords[] = { 0.2f, 0.f, 0.1f, 0.8f, 0.9f, 0.1f, 0.2f, 0.f, 0.1f, 0.8f, 0.9f, 0.1f }; - omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, sizeof(triangleIndices), triangleIndices, texCoords, sizeof(texCoords), [](int i, int j)->float { + omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, sizeof(triangleIndices), triangleIndices, texCoords, sizeof(texCoords), [](int i, int j)->float { auto multiply = [](float2 x, float2 y)->float2 { return float2(x.x * y.x - x.y * y.y, x.x * y.y + x.y * y.x); @@ -1055,7 +1161,7 @@ namespace { //float texCoords[8] = { 0.25f, 0.25f, 0.25f, 0.75f, 0.75f, 0.25f }; float texCoords[] = { 0.f, 0.f, 0.f, 1.0f, 1.f, 1.f, 1.f, 0.f }; - omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 4, 4 }, sizeof(triangleIndices), triangleIndices, texCoords, sizeof(texCoords), [](int i, int j)->float { + omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 4, 4 }, sizeof(triangleIndices), triangleIndices, texCoords, sizeof(texCoords), [](int i, int j)->float { uint32_t x = (i) % 2; uint32_t y = (j) % 2;