diff --git a/integration/omm-sdk-nvrhi/omm-sdk-nvrhi.cpp b/integration/omm-sdk-nvrhi/omm-sdk-nvrhi.cpp
index 9c345ac..fc12c30 100644
--- a/integration/omm-sdk-nvrhi/omm-sdk-nvrhi.cpp
+++ b/integration/omm-sdk-nvrhi/omm-sdk-nvrhi.cpp
@@ -649,8 +649,8 @@ omm::Gpu::DispatchConfigDesc GpuBakeNvrhiImpl::GetConfig(const GpuBakeNvrhi::Inp
 	config.indexCount							= (uint32_t)params.numIndices;
 	config.globalFormat							= params.format == nvrhi::rt::OpacityMicromapFormat::OC1_2_State ? Format::OC1_2_State : Format::OC1_4_State;
 	config.maxScratchMemorySize					= params.minimalMemoryMode ? Gpu::ScratchMemoryBudget::MB_4 : Gpu::ScratchMemoryBudget::MB_256;
+	config.maxOutOmmArraySize				    = params.maxOutOmmArraySize;
 	config.maxSubdivisionLevel					= params.maxSubdivisionLevel;
-	config.globalSubdivisionLevel				= params.maxSubdivisionLevel;
 	config.dynamicSubdivisionScale				= params.dynamicSubdivisionScale;
 	return config;
 }
diff --git a/integration/omm-sdk-nvrhi/omm-sdk-nvrhi.h b/integration/omm-sdk-nvrhi/omm-sdk-nvrhi.h
index 094faf1..912e822 100644
--- a/integration/omm-sdk-nvrhi/omm-sdk-nvrhi.h
+++ b/integration/omm-sdk-nvrhi/omm-sdk-nvrhi.h
@@ -56,6 +56,7 @@ namespace omm
 			uint32_t							numIndices = 0;
 
 			uint32_t							maxSubdivisionLevel = 0;
+			uint32_t							maxOutOmmArraySize = 0xFFFFFFFF;
 			nvrhi::rt::OpacityMicromapFormat	format = nvrhi::rt::OpacityMicromapFormat::OC1_4_State;
 			float								dynamicSubdivisionScale = 0.5f;
 			bool								minimalMemoryMode = false;
diff --git a/omm-sdk/include/omm.h b/omm-sdk/include/omm.h
index 2093710..db0eb5e 100644
--- a/omm-sdk/include/omm.h
+++ b/omm-sdk/include/omm.h
@@ -16,7 +16,7 @@ license agreement from NVIDIA CORPORATION is strictly prohibited.
 
 #define OMM_VERSION_MAJOR 0
 #define OMM_VERSION_MINOR 9
-#define OMM_VERSION_BUILD 1
+#define OMM_VERSION_BUILD 2
 
 #if defined(_MSC_VER)
     #define OMM_CALL __fastcall
@@ -788,7 +788,7 @@ typedef struct ommGpuPreDispatchInfo
    uint32_t       outOmmIndexCount;
    // Min required size of OUT_OMM_ARRAY_DATA. GetBakeInfo returns most conservative estimation while less conservative number
    // can be obtained via BakePrepass
-   size_t         outOmmArraySizeInBytes;
+   uint32_t       outOmmArraySizeInBytes;
    // Min required size of OUT_OMM_DESC_ARRAY. GetBakeInfo returns most conservative estimation while less conservative number
    // can be obtained via BakePrepass
    uint32_t       outOmmDescSizeInBytes;
@@ -848,12 +848,15 @@ typedef struct ommGpuDispatchConfigDesc
    float                     dynamicSubdivisionScale;
    // The global Format. May be overriden by the per-triangle config.
    ommFormat                 globalFormat;
-   // Micro triangle count is 4^N, where N is the subdivision level. Subdivision level must be in range [0,
-   // MaxSubdivisionLevel]. The global subdivisionLevel. May be overriden by the per-triangle subdivision level setting. The
-   // subdivision level to allow in dynamic mode and value is used to allocate appropriate scratch memory.
-   uint8_t                   globalSubdivisionLevel;
    uint8_t                   maxSubdivisionLevel;
-   uint8_t                   enableSubdivisionLevelBuffer;
+   ommBool                   enableSubdivisionLevelBuffer;
+   // The SDK will try to limit the omm array size of PreDispatchInfo::outOmmArraySizeInBytes and
+   // PostBakeInfo::outOmmArraySizeInBytes.
+   // Currently a greedy algorithm is implemented with a first come-first serve order.
+   // The SDK may (or may not) apply more sophisticated heuristics in the future.
+   // If no memory is available to allocate an OMM Array Block the state will default to Unknown Opaque (ignoring any bake
+   // flags do disable special indices).
+   uint32_t                  maxOutOmmArraySize;
    // Target scratch memory budget, The SDK will try adjust the sum of the transient pool buffers to match this value. Higher
    // budget more efficiently executes the baking operation. May return INSUFFICIENT_SCRATCH_MEMORY if set too low.
    ommGpuScratchMemoryBudget maxScratchMemorySize;
@@ -877,9 +880,9 @@ inline ommGpuDispatchConfigDesc ommGpuDispatchConfigDescDefault()
    v.alphaCutoff                   = 0.5f;
    v.dynamicSubdivisionScale       = 2;
    v.globalFormat                  = ommFormat_OC1_4_State;
-   v.globalSubdivisionLevel        = 4;
    v.maxSubdivisionLevel           = 8;
    v.enableSubdivisionLevelBuffer  = 0;
+   v.maxOutOmmArraySize            = 0xFFFFFFFF;
    v.maxScratchMemorySize          = ommGpuScratchMemoryBudget_Default;
    return v;
 }
diff --git a/omm-sdk/include/omm.hpp b/omm-sdk/include/omm.hpp
index 4b544a6..14429e9 100644
--- a/omm-sdk/include/omm.hpp
+++ b/omm-sdk/include/omm.hpp
@@ -683,7 +683,7 @@ namespace omm
          uint32_t    outOmmIndexCount                   = 0xFFFFFFFF;
          // Min required size of OUT_OMM_ARRAY_DATA. GetBakeInfo returns most conservative estimation while less conservative number
          // can be obtained via BakePrepass
-         size_t      outOmmArraySizeInBytes             = 0xFFFFFFFF;
+         uint32_t    outOmmArraySizeInBytes             = 0xFFFFFFFF;
          // Min required size of OUT_OMM_DESC_ARRAY. GetBakeInfo returns most conservative estimation while less conservative number
          // can be obtained via BakePrepass
          uint32_t    outOmmDescSizeInBytes              = 0xFFFFFFFF;
@@ -728,12 +728,15 @@ namespace omm
          float               dynamicSubdivisionScale       = 2;
          // The global Format. May be overriden by the per-triangle config.
          Format              globalFormat                  = Format::OC1_4_State;
-         // Micro triangle count is 4^N, where N is the subdivision level. Subdivision level must be in range [0,
-         // MaxSubdivisionLevel]. The global subdivisionLevel. May be overriden by the per-triangle subdivision level setting. The
-         // subdivision level to allow in dynamic mode and value is used to allocate appropriate scratch memory.
-         uint8_t             globalSubdivisionLevel        = 4;
          uint8_t             maxSubdivisionLevel           = 8;
-         uint8_t             enableSubdivisionLevelBuffer  = 0;
+         bool                enableSubdivisionLevelBuffer  = false;
+         // The SDK will try to limit the omm array size of PreDispatchInfo::outOmmArraySizeInBytes and
+         // PostBakeInfo::outOmmArraySizeInBytes.
+         // Currently a greedy algorithm is implemented with a first come-first serve order.
+         // The SDK may (or may not) apply more sophisticated heuristics in the future.
+         // If no memory is available to allocate an OMM Array Block the state will default to Unknown Opaque (ignoring any bake
+         // flags do disable special indices).
+         uint32_t            maxOutOmmArraySize            = 0xFFFFFFFF;
          // Target scratch memory budget, The SDK will try adjust the sum of the transient pool buffers to match this value. Higher
          // budget more efficiently executes the baking operation. May return INSUFFICIENT_SCRATCH_MEMORY if set too low.
          ScratchMemoryBudget maxScratchMemorySize          = ScratchMemoryBudget::Default;
diff --git a/omm-sdk/scripts/omm.json b/omm-sdk/scripts/omm.json
index a97cb36..1b16b53 100644
--- a/omm-sdk/scripts/omm.json
+++ b/omm-sdk/scripts/omm.json
@@ -1824,21 +1824,21 @@
                 },
                 "comment": "The global Format. May be overriden by the per-triangle config."
             },
-            {
-                "type": "uint8_t",
-                "name": "globalSubdivisionLevel",
-                "value": "4",
-                "comment": "Micro triangle count is 4^N, where N is the subdivision level. Subdivision level must be in range [0, MaxSubdivisionLevel]. The global subdivisionLevel. May be overriden by the per-triangle subdivision level setting. The subdivision level to allow in dynamic mode and value is used to allocate appropriate scratch memory."
-            },
             {
                 "type": "uint8_t",
                 "name": "maxSubdivisionLevel",
                 "value": "8"
             },
             {
-                "type": "uint8_t",
+                "type": "bool",
                 "name": "enableSubdivisionLevelBuffer",
-                "value": "0"
+                "value": "false"
+            },
+            {
+                "type": "uint32_t",
+                "name": "maxOutOmmArraySize",
+                "value": "0xFFFFFFFF",
+                "comment": "The SDK will try to limit the omm array size of PreDispatchInfo::outOmmArraySizeInBytes and PostBakeInfo::outOmmArraySizeInBytes.\nCurrently a greedy algorithm is implemented with a first come-first serve order.\nThe SDK may (or may not) apply more sophisticated heuristics in the future.\nIf no memory is available to allocate an OMM Array Block the state will default to Unknown Opaque (ignoring any bake flags do disable special indices)."
             },
             {
                 "type": "ScratchMemoryBudget",
@@ -1915,7 +1915,7 @@
                 "value": "0xFFFFFFFF"
             },
             {
-                "type": "size_t",
+                "type": "uint32_t",
                 "name": "outOmmArraySizeInBytes",
                 "value": "0xFFFFFFFF",
                 "comment": "Min required size of OUT_OMM_ARRAY_DATA. GetBakeInfo returns most conservative estimation while less conservative number can be obtained via BakePrepass"
diff --git a/omm-sdk/scripts/omm_header_c.txt b/omm-sdk/scripts/omm_header_c.txt
index 7f67aef..34d4845 100644
--- a/omm-sdk/scripts/omm_header_c.txt
+++ b/omm-sdk/scripts/omm_header_c.txt
@@ -16,7 +16,7 @@ license agreement from NVIDIA CORPORATION is strictly prohibited.
 
 #define OMM_VERSION_MAJOR 0
 #define OMM_VERSION_MINOR 9
-#define OMM_VERSION_BUILD 1
+#define OMM_VERSION_BUILD 2
 
 #if defined(_MSC_VER)
     #define OMM_CALL __fastcall
diff --git a/omm-sdk/shaders/omm_desc_patch.cs.hlsl b/omm-sdk/shaders/omm_desc_patch.cs.hlsl
index d19add2..f3da7b1 100644
--- a/omm-sdk/shaders/omm_desc_patch.cs.hlsl
+++ b/omm-sdk/shaders/omm_desc_patch.cs.hlsl
@@ -51,7 +51,7 @@ uint GetSourcePrimitiveIndex(uint primitiveIndex)
 
 	if (primitiveIndexOrHashTableEntryIndex < -4)
 	{
-		const uint hashTableEntryIndex = -(primitiveIndexOrHashTableEntryIndex + 4);
+		const uint hashTableEntryIndex = -(primitiveIndexOrHashTableEntryIndex + 5);
 		const uint primitiveIndexRef =  OMM_SUBRESOURCE_LOAD(HashTableBuffer, 8 * hashTableEntryIndex + 4); // [hash|primitiveIndex]
 		return primitiveIndexRef;
 	}
diff --git a/omm-sdk/shaders/omm_global_cb.hlsli b/omm-sdk/shaders/omm_global_cb.hlsli
index 8f110f4..726bd0b 100644
--- a/omm-sdk/shaders/omm_global_cb.hlsli
+++ b/omm-sdk/shaders/omm_global_cb.hlsli
@@ -7,14 +7,12 @@ and any modifications thereto. Any use, reproduction, disclosure or
 distribution of this software and related documentation without an express
 license agreement from NVIDIA CORPORATION is strictly prohibited.
 */
-
-
 #define OMM_DECLARE_GLOBAL_CONSTANT_BUFFER						\
 OMM_CONSTANTS_START(GlobalConstants)							\
 	OMM_CONSTANT(uint, IndexCount)								\
 	OMM_CONSTANT(uint, PrimitiveCount)							\
 	OMM_CONSTANT(uint, MaxBatchCount)							\
-	OMM_CONSTANT(uint, GlobalSubdivisionLevel)					\
+	OMM_CONSTANT(uint, MaxOutOmmArraySize)						\
 																\
 	OMM_CONSTANT(uint, IsOmmIndexFormat16bit)					\
 	OMM_CONSTANT(uint, EnableSpecialIndices)					\
@@ -45,7 +43,6 @@ OMM_CONSTANTS_START(GlobalConstants)							\
 																\
 	OMM_CONSTANT(float2, TexSize)								\
 	OMM_CONSTANT(float2, InvTexSize)							\
-																\
 /*	---- Buffer offsets go here								  */\
 																\
 	OMM_CONSTANT(uint, IEBakeBufferOffset)						\
diff --git a/omm-sdk/shaders/omm_post_build_info.cs.hlsl b/omm-sdk/shaders/omm_post_build_info.cs.hlsl
index 7011ca2..b63290c 100644
--- a/omm-sdk/shaders/omm_post_build_info.cs.hlsl
+++ b/omm-sdk/shaders/omm_post_build_info.cs.hlsl
@@ -31,6 +31,6 @@ void main(uint3 tid : SV_DispatchThreadID)
 	const uint ommDescCount			= OMM_SUBRESOURCE_LOAD(OmmDescAllocatorCounterBuffer, 0);
 	const uint ommDescByteSize		= ommDescCount * 8;
 
-	u_postBuildInfo.Store(0, ommArrayByteSize);
+	u_postBuildInfo.Store(0, min(ommArrayByteSize, g_GlobalConstants.MaxOutOmmArraySize));
 	u_postBuildInfo.Store(4, ommDescByteSize);
 }
\ No newline at end of file
diff --git a/omm-sdk/shaders/omm_work_setup_common.hlsli b/omm-sdk/shaders/omm_work_setup_common.hlsli
index 4526d07..ebe3c3b 100644
--- a/omm-sdk/shaders/omm_work_setup_common.hlsli
+++ b/omm-sdk/shaders/omm_work_setup_common.hlsli
@@ -203,19 +203,25 @@ uint GetSubdivisionLevel(TexCoords texCoords)
 	}
 	else
 	{
-		return g_GlobalConstants.GlobalSubdivisionLevel;
+		return g_GlobalConstants.MaxSubdivisionLevel;
 	}
 }
 
 int GetOmmDescOffset(ByteAddressBuffer ommIndexBuffer, uint primitiveIndex)
 {
-	// TODO: support 16-bit indices.
 	if (g_GlobalConstants.IsOmmIndexFormat16bit)
 	{
 		const uint dwOffset = primitiveIndex.x >> 1u;
-		const uint shift = (primitiveIndex.x & 1u) << 4u; // 0 or 16
-		const uint val = ommIndexBuffer.Load(4 * dwOffset);
-		return (val >> shift) & 0xFFFF;
+		const uint shift	= (primitiveIndex.x & 1u) << 4u; // 0 or 16
+		const uint raw		= ommIndexBuffer.Load(4 * dwOffset);
+		const uint raw16	= (raw >> shift) & 0xFFFFu;
+
+		if (raw16 > 0xFFFB) // e.g special index
+		{
+			return (raw16 - 0xFFFF) - 1; // -1, -2, -3 or -4
+		}
+
+		return raw16;
 	}
 	else
 	{
diff --git a/omm-sdk/shaders/omm_work_setup_cs.cs.hlsl b/omm-sdk/shaders/omm_work_setup_cs.cs.hlsl
index be73358..537910b 100644
--- a/omm-sdk/shaders/omm_work_setup_cs.cs.hlsl
+++ b/omm-sdk/shaders/omm_work_setup_cs.cs.hlsl
@@ -39,7 +39,8 @@ void main(uint3 tid : SV_DispatchThreadID)
 	uint hashTableEntryIndex;
 	hashTable::Result result		= FindOrInsertOMMEntry(texCoords, subdivisionLevel, hashTableEntryIndex);
 
-	uint vmDescOffset = 0;
+	int vmDescOffset = (int)SpecialIndex::FullyUnknownOpaque;
+
 	if (result == hashTable::Result::Null ||
 		result == hashTable::Result::Inserted || 
 		result == hashTable::Result::ReachedMaxAttemptCount)
@@ -66,9 +67,7 @@ void main(uint3 tid : SV_DispatchThreadID)
 			OMM_SUBRESOURCE_INTERLOCKEDADD(OmmArrayAllocatorCounterBuffer, 0, vmDataByteSize, vmArrayOffset);
 		}
 
-		const uint kMaxVmArrayBudget = 0xFFFFFFFF;
-
-		if ((vmArrayOffset + vmDataByteSize) < kMaxVmArrayBudget)
+		if ((vmArrayOffset + vmDataByteSize) <= g_GlobalConstants.MaxOutOmmArraySize)
 		{
 			// Allocate new VM-desc for the vmArrayOffset
 			{
@@ -147,7 +146,7 @@ void main(uint3 tid : SV_DispatchThreadID)
 	else // if (status == hashTable::Result::Found
 	{
 		// Store the hash-table offset and patch up the pointers later.
-		vmDescOffset = (uint)(-hashTableEntryIndex - 4);
+		vmDescOffset = (uint)(-hashTableEntryIndex - 5);
 	}
 
 	OMM_SUBRESOURCE_STORE(TempOmmIndexBuffer, 4 * primitiveIndex, vmDescOffset);
diff --git a/omm-sdk/shaders/omm_work_setup_gfx.cs.hlsl b/omm-sdk/shaders/omm_work_setup_gfx.cs.hlsl
index ee95799..bb77c41 100644
--- a/omm-sdk/shaders/omm_work_setup_gfx.cs.hlsl
+++ b/omm-sdk/shaders/omm_work_setup_gfx.cs.hlsl
@@ -40,7 +40,8 @@ void main(uint3 tid : SV_DispatchThreadID)
 	uint hashTableEntryIndex;
 	hashTable::Result result		= FindOrInsertOMMEntry(texCoords, subdivisionLevel, hashTableEntryIndex);
 
-	uint vmDescOffset = 0;
+	int vmDescOffset = (int)SpecialIndex::FullyUnknownOpaque;
+
 	if (result == hashTable::Result::Null ||
 		result == hashTable::Result::Inserted || 
 		result == hashTable::Result::ReachedMaxAttemptCount)
@@ -56,17 +57,19 @@ void main(uint3 tid : SV_DispatchThreadID)
 
 		// Allocate new VM-array offset & vm-index
 		uint vmArrayOffset = 0;
+		uint vmDataByteSize = 0;
 		{
 			const uint vmDataBitSize			= GetOMMFormatBitCount(ommFormat) * numMicroTriangles;
 
 			// spec allows 1 byte alignment but we require 4 byte to make sure UAV writes
 			// are DW aligned.
-			const uint vmDataByteSize			= max(vmDataBitSize >> 3u, 4u);
+			vmDataByteSize						= max(vmDataBitSize >> 3u, 4u);
 
 			OMM_SUBRESOURCE_INTERLOCKEDADD(OmmArrayAllocatorCounterBuffer, 0, vmDataByteSize, vmArrayOffset);
 		}
 
 		// Allocate new VM-desc for the vmArrayOffset
+		if ((vmArrayOffset + vmDataByteSize) <= g_GlobalConstants.MaxOutOmmArraySize)
 		{
 			// The rasterItemOffset is the same things as the vmDescOffset,
 			OMM_SUBRESOURCE_INTERLOCKEDADD(OmmDescAllocatorCounterBuffer, 0, 1, vmDescOffset);
@@ -79,84 +82,84 @@ void main(uint3 tid : SV_DispatchThreadID)
 				u_ommDescArrayBuffer.Store(vmDescOffset * 8, vmArrayOffset);
 				u_ommDescArrayBuffer.Store(vmDescOffset * 8 + 4, vmDescData);
 			}
-		}
 
-		// Increase UsageDesc info struct,
-		// resolve uniform vm's later by usage subtraction.
-		{
-			const uint strideInBytes		= 8;	// sizeof(VisibilityMapUsageDesc), [count32, format16, level16]
-			const uint index				= (kOMMFormatNum * subdivisionLevel + ((uint)ommFormat - 1));
-			const uint offset				= strideInBytes * index;
+			// Increase UsageDesc info struct,
+			// resolve uniform vm's later by usage subtraction.
+			{
+				const uint strideInBytes		= 8;	// sizeof(VisibilityMapUsageDesc), [count32, format16, level16]
+				const uint index				= (kOMMFormatNum * subdivisionLevel + ((uint)ommFormat - 1));
+				const uint offset				= strideInBytes * index;
 
-			InterlockedAdd(u_ommDescArrayHistogramBuffer, offset, 1);
-		}
+				InterlockedAdd(u_ommDescArrayHistogramBuffer, offset, 1);
+			}
 
-		/// ---- Setup baking parameters ----- 
+			/// ---- Setup baking parameters ----- 
 
-		// Allocate a slot in the raster items array.
-		uint bakeResultGlobalOffset	= 0;
-		uint bakeResultBatchIndex	= 0;
-		{
-			const uint offset						= 4 * subdivisionLevel;
+			// Allocate a slot in the raster items array.
+			uint bakeResultGlobalOffset	= 0;
+			uint bakeResultBatchIndex	= 0;
+			{
+				const uint offset						= 4 * subdivisionLevel;
 
-			OMM_SUBRESOURCE_INTERLOCKEDADD(BakeResultBufferCounterBuffer, offset, 1, bakeResultGlobalOffset);
+				OMM_SUBRESOURCE_INTERLOCKEDADD(BakeResultBufferCounterBuffer, offset, 1, bakeResultGlobalOffset);
 
-			const uint maxItemsPerBatch				= GetMaxItemsPerBatch(subdivisionLevel);
-			bakeResultBatchIndex					= bakeResultGlobalOffset / maxItemsPerBatch;
-		}
+				const uint maxItemsPerBatch				= GetMaxItemsPerBatch(subdivisionLevel);
+				bakeResultBatchIndex					= bakeResultGlobalOffset / maxItemsPerBatch;
+			}
 
-		// Store the VM-that will be procesed by the rasterizer.
-		{
-			const uint ommFormatAndPrimitiveIndex = (primitiveIndex) | ((uint)ommFormat << 30);
+			// Store the VM-that will be procesed by the rasterizer.
+			{
+				const uint ommFormatAndPrimitiveIndex = (primitiveIndex) | ((uint)ommFormat << 30);
 
-			const uint offset					 = 8 * (bakeResultGlobalOffset + subdivisionLevel * g_GlobalConstants.PrimitiveCount);
+				const uint offset					 = 8 * (bakeResultGlobalOffset + subdivisionLevel * g_GlobalConstants.PrimitiveCount);
 
-			OMM_SUBRESOURCE_STORE(RasterItemsBuffer, offset, vmArrayOffset);
-			OMM_SUBRESOURCE_STORE(RasterItemsBuffer, offset + 4, ommFormatAndPrimitiveIndex);
-		}
+				OMM_SUBRESOURCE_STORE(RasterItemsBuffer, offset, vmArrayOffset);
+				OMM_SUBRESOURCE_STORE(RasterItemsBuffer, offset + 4, ommFormatAndPrimitiveIndex);
+			}
 
-		// Increment the drawcall count for the current batch & subdivisiolevel.
-		{
-			const uint strideInBytes					= 20;	// arg count of DrawIndexedInstanced 
-			const uint InstanceCountOffsetInBytes		= 4;	// offset of InstanceCount in DrawIndexedInstanced
-			const uint offset							= InstanceCountOffsetInBytes + strideInBytes * (subdivisionLevel * g_GlobalConstants.MaxBatchCount + bakeResultBatchIndex);
+			// Increment the drawcall count for the current batch & subdivisiolevel.
+			{
+				const uint strideInBytes					= 20;	// arg count of DrawIndexedInstanced 
+				const uint InstanceCountOffsetInBytes		= 4;	// offset of InstanceCount in DrawIndexedInstanced
+				const uint offset							= InstanceCountOffsetInBytes + strideInBytes * (subdivisionLevel * g_GlobalConstants.MaxBatchCount + bakeResultBatchIndex);
 
-			OMM_SUBRESOURCE_INTERLOCKEDADD(IEBakeBuffer, offset, 1, bakeResultGlobalOffset);
-		}
+				OMM_SUBRESOURCE_INTERLOCKEDADD(IEBakeBuffer, offset, 1, bakeResultGlobalOffset);
+			}
 
-		// Increment the thread count for the current batch & subdivisiolevel.
-		uint threadGroupCountX = 0;
-		{
-			// This is the most number of micro-triangles that will be processed per thread
-			// 32 allows non-atomic writes to the vmArrayBuffer for 2 and 4-state vm formats.
-			const uint kMaxNumMicroTrianglePerThread	= 32;
-			const uint numMicroTrianglePerThread		= min(kMaxNumMicroTrianglePerThread, numMicroTriangles);
-			const uint numThreadsNeeded					= max(numMicroTriangles / numMicroTrianglePerThread, 1u);
+			// Increment the thread count for the current batch & subdivisiolevel.
+			uint threadGroupCountX = 0;
+			{
+				// This is the most number of micro-triangles that will be processed per thread
+				// 32 allows non-atomic writes to the vmArrayBuffer for 2 and 4-state vm formats.
+				const uint kMaxNumMicroTrianglePerThread	= 32;
+				const uint numMicroTrianglePerThread		= min(kMaxNumMicroTrianglePerThread, numMicroTriangles);
+				const uint numThreadsNeeded					= max(numMicroTriangles / numMicroTrianglePerThread, 1u);
 
-			const uint strideInBytes					= 4; // sizeof(uint32_t)
-			const uint offset							= strideInBytes * (subdivisionLevel * g_GlobalConstants.MaxBatchCount + bakeResultBatchIndex);
+				const uint strideInBytes					= 4; // sizeof(uint32_t)
+				const uint offset							= strideInBytes * (subdivisionLevel * g_GlobalConstants.MaxBatchCount + bakeResultBatchIndex);
 
-			uint oldGlobalThreadCountX;
-			OMM_SUBRESOURCE_INTERLOCKEDADD(DispatchIndirectThreadCountBuffer, offset, numThreadsNeeded, oldGlobalThreadCountX);
-			uint newGlobalThreadCountX = numThreadsNeeded + oldGlobalThreadCountX;
+				uint oldGlobalThreadCountX;
+				OMM_SUBRESOURCE_INTERLOCKEDADD(DispatchIndirectThreadCountBuffer, offset, numThreadsNeeded, oldGlobalThreadCountX);
+				uint newGlobalThreadCountX = numThreadsNeeded + oldGlobalThreadCountX;
 
-			threadGroupCountX = (newGlobalThreadCountX + 127) / 128;
-		}
+				threadGroupCountX = (newGlobalThreadCountX + 127) / 128;
+			}
 
-		// Increment the thread GROUP count for the current batch & subdivisiolevel.
-		{
-			const uint strideInBytes			 = 12; // arg count of Dispatch
-			const uint ThreadCountXOffsetInBytes = 0;	 // offset of ThreadCountX in Dispatch
-			const uint offset					 = ThreadCountXOffsetInBytes + strideInBytes * (subdivisionLevel * g_GlobalConstants.MaxBatchCount + bakeResultBatchIndex);
+			// Increment the thread GROUP count for the current batch & subdivisiolevel.
+			{
+				const uint strideInBytes			 = 12; // arg count of Dispatch
+				const uint ThreadCountXOffsetInBytes = 0;	 // offset of ThreadCountX in Dispatch
+				const uint offset					 = ThreadCountXOffsetInBytes + strideInBytes * (subdivisionLevel * g_GlobalConstants.MaxBatchCount + bakeResultBatchIndex);
 
-			uint _dummy;
-			OMM_SUBRESOURCE_INTERLOCKEDMAX(IECompressCsBuffer, offset, threadGroupCountX, _dummy);
+				uint _dummy;
+				OMM_SUBRESOURCE_INTERLOCKEDMAX(IECompressCsBuffer, offset, threadGroupCountX, _dummy);
+			}
 		}
 	}
 	else // if (status == hashTable::Result::Found
 	{
 		// Store the hash-table offset and patch up the pointers later.
-		vmDescOffset = (uint)(-hashTableEntryIndex - 4);
+		vmDescOffset = (uint)(-hashTableEntryIndex - 5);
 	}
 
 	OMM_SUBRESOURCE_STORE(TempOmmIndexBuffer, 4 * primitiveIndex, vmDescOffset);
diff --git a/omm-sdk/src/bake_gpu_impl.cpp b/omm-sdk/src/bake_gpu_impl.cpp
index ab26b48..6bd5997 100644
--- a/omm-sdk/src/bake_gpu_impl.cpp
+++ b/omm-sdk/src/bake_gpu_impl.cpp
@@ -250,7 +250,8 @@ ommResult  PipelineImpl::Validate(const ommGpuPipelineConfigDesc& config)
 ommResult  PipelineImpl::Validate(const ommGpuDispatchConfigDesc& config) const
 {
     const uint32_t MaxSubdivLevelAPI    = kMaxSubdivLevel;
-    const uint32_t MaxSubdivLevel       = std::min<uint32_t>(MaxSubdivLevelAPI, OmmStaticBuffersImpl::kMaxSubdivisionLevelNum);
+    const uint32_t MaxSubdivLevelGfx    = std::min<uint32_t>(MaxSubdivLevelAPI, OmmStaticBuffersImpl::kMaxSubdivisionLevelNum);
+    const uint32_t MaxSubdivLevelCS     = 12;
     const bool computeOnly              = (((uint32_t)config.bakeFlags & (uint32_t)ommGpuBakeFlags_ComputeOnly) == (uint32_t)ommGpuBakeFlags_ComputeOnly);
     const bool doSetup                  = (((uint32_t)config.bakeFlags & (uint32_t)ommGpuBakeFlags_PerformSetup) == (uint32_t)ommGpuBakeFlags_PerformSetup);
     const bool doBake                   = (((uint32_t)config.bakeFlags & (uint32_t)ommGpuBakeFlags_PerformBake) == (uint32_t)ommGpuBakeFlags_PerformBake);
@@ -259,9 +260,9 @@ ommResult  PipelineImpl::Validate(const ommGpuDispatchConfigDesc& config) const
         return ommResult_INVALID_ARGUMENT;
     if (config.indexCount % 3 != 0)
         return ommResult_INVALID_ARGUMENT;
-    if (!computeOnly && config.maxSubdivisionLevel > MaxSubdivLevel)
+    if (!computeOnly && config.maxSubdivisionLevel > MaxSubdivLevelGfx)
         return ommResult_INVALID_ARGUMENT;
-    if (config.maxSubdivisionLevel < config.globalSubdivisionLevel)
+    if (computeOnly && config.maxSubdivisionLevel > MaxSubdivLevelCS)
         return ommResult_INVALID_ARGUMENT;
     if (config.enableSubdivisionLevelBuffer)
         return ommResult_NOT_IMPLEMENTED;
@@ -605,7 +606,7 @@ ommResult PipelineImpl::GetPreDispatchInfo(const ommGpuDispatchConfigDesc& confi
     const size_t maxNumMicroTris        = bird::GetNumMicroTriangles(config.maxSubdivisionLevel);
     const size_t bitsPerState           = size_t(config.globalFormat);
     const size_t vmArraySizeInBits      = size_t(primitiveCount) * std::max<size_t>(maxNumMicroTris * bitsPerState, 32u);
-    ommIndexFormat outOmmIndexBufferFormat = primitiveCount < std::numeric_limits<uint16_t>::max() - kNumSpecialIndices ? ommIndexFormat_I16_UINT : ommIndexFormat_I32_UINT;
+    ommIndexFormat outOmmIndexBufferFormat = primitiveCount < std::numeric_limits<int16_t>::max() - kNumSpecialIndices ? ommIndexFormat_I16_UINT : ommIndexFormat_I32_UINT;
     if (force32BitIndices)
         outOmmIndexBufferFormat = ommIndexFormat_I32_UINT;
 
@@ -615,7 +616,7 @@ ommResult PipelineImpl::GetPreDispatchInfo(const ommGpuDispatchConfigDesc& confi
 
     const size_t outMaxTheoreticalOmmArraySizeInBytes   = math::Align<size_t>(math::DivUp<size_t>(vmArraySizeInBits, 8u), 4u);
 
-    const size_t outOmmArraySizeInBytes                 = outMaxTheoreticalOmmArraySizeInBytes;
+    const size_t outOmmArraySizeInBytes                 = std::min<size_t>(outMaxTheoreticalOmmArraySizeInBytes, config.maxOutOmmArraySize);
     const size_t outOmmDescSizeInBytes                  = primitiveCount * sizeof(uint64_t);
     const size_t outOmmIndexBufferSizeInBytes           = math::Align<size_t>(primitiveCount * indexBufferFormatSize, 4u);
     const size_t outOmmHistogramSizeInBytes             = (size_t(config.maxSubdivisionLevel) + 1) * 2 * sizeof(uint64_t);
@@ -653,7 +654,7 @@ ommResult PipelineImpl::GetPreDispatchInfo(const ommGpuDispatchConfigDesc& confi
 
     outPreBuildInfo->outOmmIndexCount = primitiveCount;
     RETURN_STATUS_IF_FAILED(SafeCast(outPreBuildInfo->outOmmArrayHistogramSizeInBytes, outOmmHistogramSizeInBytes));
-    outPreBuildInfo->outOmmArraySizeInBytes = outOmmArraySizeInBytes;
+    RETURN_STATUS_IF_FAILED(SafeCast(outPreBuildInfo->outOmmArraySizeInBytes, outOmmArraySizeInBytes));
     RETURN_STATUS_IF_FAILED(SafeCast(outPreBuildInfo->outOmmDescSizeInBytes, outOmmDescSizeInBytes));
     outPreBuildInfo->outOmmIndexBufferFormat = outOmmIndexBufferFormat;
     RETURN_STATUS_IF_FAILED(SafeCast(outPreBuildInfo->outOmmIndexBufferSizeInBytes, outOmmIndexBufferSizeInBytes));
@@ -710,7 +711,7 @@ ommResult PipelineImpl::InitGlobalConstants(const ommGpuDispatchConfigDesc& conf
     cbuffer.IndexCount                                 = config.indexCount;
     cbuffer.PrimitiveCount                             = primitiveCount;
     cbuffer.MaxBatchCount                              = info.MaxBatchCount;
-    cbuffer.GlobalSubdivisionLevel                     = config.globalSubdivisionLevel;
+    cbuffer.MaxOutOmmArraySize                         = preBuildInfo.outOmmArraySizeInBytes;
     cbuffer.IsOmmIndexFormat16bit                      = IsOmmIndexFormat16bit;
     cbuffer.DoSetup                                    = doSetup;
     cbuffer.SamplerIndex                               = m_pipelineBuilder.GetStaticSamplerIndex(config.runtimeSamplerDesc);
diff --git a/omm-sdk/src/version.h b/omm-sdk/src/version.h
index 0741cf2..fbf9816 100644
--- a/omm-sdk/src/version.h
+++ b/omm-sdk/src/version.h
@@ -13,7 +13,7 @@ license agreement from NVIDIA CORPORATION is strictly prohibited.
 
 #define VERSION_MAJOR 0
 #define VERSION_MINOR 9
-#define VERSION_BUILD 1
+#define VERSION_BUILD 2
 #define VERSION_REVISION 0
 
 #define VERSION_STRING STR(VERSION_MAJOR.VERSION_MINOR.VERSION_BUILD.VERSION_REVISION)
diff --git a/tests/test_omm_bake_gpu.cpp b/tests/test_omm_bake_gpu.cpp
index c830f57..083efc2 100644
--- a/tests/test_omm_bake_gpu.cpp
+++ b/tests/test_omm_bake_gpu.cpp
@@ -70,22 +70,39 @@ namespace {
 			return 3;
 		}
 
-		omm::Debug::Stats RunVmBake(
-			float alphaCutoff,
-			uint32_t subdivisionLevel,
-			int2 texSize,
-			uint32_t indexBufferSize,
-			uint32_t* triangleIndices,
-			float* texCoords,
-			uint32_t texCoordBufferSize,
-			std::function<float(int i, int j)> texCb,
-			omm::Format format = omm::Format::OC1_4_State) {
+		struct OmmBakeParams
+		{
+			float alphaCutoff = 0.5f;
+			uint32_t subdivisionLevel = 5;
+			int2 texSize = { 1024, 1024 };
+			uint32_t indexBufferSize = 0;
+			uint32_t* triangleIndices = nullptr;
+			float* texCoords = nullptr;
+			uint32_t texCoordBufferSize = 0;
+			uint32_t maxOutOmmArraySize = 0xFFFFFFFF;
+			std::function<float(int i, int j)> texCb;
+			omm::Format format = omm::Format::OC1_4_State;
+
+			static OmmBakeParams InitQuad()
+			{
+				OmmBakeParams p;
+				static uint32_t s_triangleIndices[] = { 0, 1, 2, 3, 1, 2 };
+				static float s_texCoords[] = { 0.f, 0.f,	0.f, 1.f,	1.f, 0.f,	 1.f, 1.f };
+				p.triangleIndices = s_triangleIndices;
+				p.indexBufferSize = sizeof(s_triangleIndices);
+				p.texCoords = s_texCoords;
+				p.texCoordBufferSize = sizeof(s_texCoords);
+				return p;
+			}
+		};
 
+		omm::Debug::Stats RunOmmBake(const OmmBakeParams& p) 
+		{
 			const uint32_t alphaTextureChannel = GetAlphaChannelIndex();
 
 			nvrhi::TextureDesc desc;
-			desc.width = texSize.x;
-			desc.height = texSize.y;
+			desc.width = p.texSize.x;
+			desc.height = p.texSize.y;
 			desc.format = nvrhi::Format::RGBA32_FLOAT;
 
 			nvrhi::StagingTextureHandle staging = m_device->createStagingTexture(desc, nvrhi::CpuAccessMode::Write);
@@ -100,7 +117,7 @@ namespace {
 				for (uint32_t i = 0; i < desc.width; ++i)
 				{
 					float* rgba = (float*)((uint8_t*)data + j * rowPitch + (4 * i) * sizeof(float));
-					float val = texCb(i, j);
+					float val = p.texCb(i, j);
 					rgba[0] = alphaTextureChannel == 0 ? val : 0.f;
 					rgba[1] = alphaTextureChannel == 1 ? val : 0.f;
 					rgba[2] = alphaTextureChannel == 2 ? val : 0.f;
@@ -123,18 +140,18 @@ namespace {
 			// Upload index buffer
 			nvrhi::BufferHandle ib;
 			{
-				ib = m_device->createBuffer({ .byteSize = indexBufferSize, .debugName = "ib", .format = nvrhi::Format::R32_UINT, .canHaveUAVs = true, .canHaveTypedViews = true, .canHaveRawViews = true });
+				ib = m_device->createBuffer({ .byteSize = p.indexBufferSize, .debugName = "ib", .format = nvrhi::Format::R32_UINT, .canHaveUAVs = true, .canHaveTypedViews = true, .canHaveRawViews = true });
 				m_commandList->beginTrackingBufferState(ib, nvrhi::ResourceStates::Common);
-				m_commandList->writeBuffer(ib, triangleIndices, indexBufferSize);
+				m_commandList->writeBuffer(ib, p.triangleIndices, p.indexBufferSize);
 			}
 
 			// Upload texcoords
 			nvrhi::BufferHandle vb;
 			{
-				vb = m_device->createBuffer({ .byteSize = texCoordBufferSize, .debugName = "vb", .canHaveUAVs = true, .canHaveRawViews = true });
+				vb = m_device->createBuffer({ .byteSize = p.texCoordBufferSize, .debugName = "vb", .canHaveUAVs = true, .canHaveRawViews = true });
 				m_commandList->beginTrackingBufferState(vb, nvrhi::ResourceStates::Common);
 
-				m_commandList->writeBuffer(vb, texCoords, texCoordBufferSize);
+				m_commandList->writeBuffer(vb, p.texCoords, p.texCoordBufferSize);
 			}
 
 			// Upload index buffer
@@ -147,22 +164,25 @@ namespace {
 			input.texCoordBuffer = vb;
 			input.texCoordStrideInBytes = sizeof(float2);
 			input.indexBuffer = ib;
-			input.numIndices = indexBufferSize / sizeof(uint32_t);
-			input.maxSubdivisionLevel = subdivisionLevel;
-			input.format = format == omm::Format::OC1_2_State ? nvrhi::rt::OpacityMicromapFormat::OC1_2_State : nvrhi::rt::OpacityMicromapFormat::OC1_4_State;
+			input.numIndices = p.indexBufferSize / sizeof(uint32_t);
+			input.maxSubdivisionLevel = p.subdivisionLevel;
+			input.format = p.format == omm::Format::OC1_2_State ? nvrhi::rt::OpacityMicromapFormat::OC1_2_State : nvrhi::rt::OpacityMicromapFormat::OC1_4_State;
 			input.dynamicSubdivisionScale = 0.f;
 			input.enableSpecialIndices = EnableSpecialIndices();
 			input.force32BitIndices = Force32BitIndices();
 			input.enableTexCoordDeduplication = EnableTexCoordDeduplication();
 			input.computeOnly = ComputeOnly();
+			input.maxOutOmmArraySize = p.maxOutOmmArraySize;
 
 			// Readback.
-			auto ReadBuffer = [this](nvrhi::BufferHandle buffer, size_t size = 0)->std::vector<uint8_t>
+			auto ReadBuffer = [this](nvrhi::BufferHandle buffer, size_t size = 0xFFFFFFFF)->std::vector<uint8_t>
 			{
+				if (size == 0)
+					return {};
 				std::vector<uint8_t> data;
 				void* pData = m_device->mapBuffer(buffer, nvrhi::CpuAccessMode::Read);
 				assert(pData);
-				size_t byteSize = size == 0 ? buffer->getDesc().byteSize : size;
+				size_t byteSize = size == 0xFFFFFFFF ? buffer->getDesc().byteSize : size;
 				assert(size <= buffer->getDesc().byteSize);
 				data.resize(byteSize);
 				memcpy(data.data(), pData, byteSize);
@@ -224,7 +244,7 @@ namespace {
 				EXPECT_LE(postBuildInfo.ommArrayBufferSize, info.ommArrayBufferSize);
 				EXPECT_LE(postBuildInfo.ommDescBufferSize, info.ommDescBufferSize);
 
-				res.ommArrayBuffer = m_device->createBuffer({ .byteSize = postBuildInfo.ommArrayBufferSize, .debugName = "omArrayBuffer", .canHaveUAVs = true, .canHaveRawViews = true });
+				res.ommArrayBuffer = m_device->createBuffer({ .byteSize = std::max(postBuildInfo.ommArrayBufferSize, 4u), .debugName = "omArrayBuffer", .canHaveUAVs = true, .canHaveRawViews = true});
 
 				m_commandList->open();
 
@@ -256,10 +276,10 @@ namespace {
 				ommIndexFormat = info.ommIndexFormat;
 				ommIndexCount = info.ommIndexCount;
 
-				res.ommArrayBuffer = m_device->createBuffer({ .byteSize = info.ommArrayBufferSize, .debugName = "omArrayBuffer", .canHaveUAVs = true, .canHaveRawViews = true });
-				res.ommDescBuffer = m_device->createBuffer({ .byteSize = info.ommDescBufferSize, .debugName = "omDescBuffer", .canHaveUAVs = true, .canHaveRawViews = true });
-				res.ommIndexBuffer = m_device->createBuffer({ .byteSize = info.ommIndexBufferSize, .debugName = "omIndexBuffer", .canHaveUAVs = true, .canHaveRawViews = true });
-				res.ommDescArrayHistogramBuffer = m_device->createBuffer({ .byteSize = info.ommDescArrayHistogramSize , .debugName = "omUsageDescBuffer" , .canHaveUAVs = true, .canHaveRawViews = true });
+				res.ommArrayBuffer = m_device->createBuffer({ .byteSize = std::max<size_t>(info.ommArrayBufferSize, 4u), .debugName = "ommArrayBuffer", .canHaveUAVs = true, .canHaveRawViews = true });
+				res.ommDescBuffer = m_device->createBuffer({ .byteSize = info.ommDescBufferSize, .debugName = "ommDescBuffer", .canHaveUAVs = true, .canHaveRawViews = true });
+				res.ommIndexBuffer = m_device->createBuffer({ .byteSize = info.ommIndexBufferSize, .debugName = "ommIndexBuffer", .canHaveUAVs = true, .canHaveRawViews = true });
+				res.ommDescArrayHistogramBuffer = m_device->createBuffer({ .byteSize = info.ommDescArrayHistogramSize , .debugName = "ommUsageDescBuffer" , .canHaveUAVs = true, .canHaveRawViews = true });
 				res.ommIndexHistogramBuffer = m_device->createBuffer({ .byteSize = info.ommIndexHistogramSize , .debugName = "ommIndexHistogramBuffer" , .canHaveUAVs = true, .canHaveRawViews = true });
 				res.ommPostBuildInfoBuffer = m_device->createBuffer({ .byteSize = info.ommPostBuildInfoBufferSize , .debugName = "ommPostBuildInfoBuffer" , .canHaveUAVs = true, .canHaveRawViews = true });
 
@@ -375,7 +395,31 @@ namespace {
 			};
 		}
 
-		omm::Debug::Stats RunVmBake(
+		omm::Debug::Stats RunOmmBake(
+			float alphaCutoff,
+			uint32_t subdivisionLevel,
+			int2 texSize,
+			uint32_t indexBufferSize,
+			uint32_t* triangleIndices,
+			float* texCoords,
+			uint32_t texCoordBufferSize,
+			std::function<float(int i, int j)> texCb,
+			omm::Format format = omm::Format::OC1_4_State)
+		{
+			OmmBakeParams p;
+			p.alphaCutoff = alphaCutoff;
+			p.subdivisionLevel = subdivisionLevel;
+			p.texSize = texSize;
+			p.texCb = texCb;
+			p.format = format;
+			p.triangleIndices = triangleIndices;
+			p.indexBufferSize = indexBufferSize;
+			p.texCoords = texCoords;
+			p.texCoordBufferSize = texCoordBufferSize;
+			return RunOmmBake(p);
+		}
+
+		omm::Debug::Stats RunOmmBake(
 			float alphaCutoff,
 			uint32_t subdivisionLevel,
 			int2 texSize,
@@ -383,7 +427,18 @@ namespace {
 			omm::Format format = omm::Format::OC1_4_State) {
 			uint32_t triangleIndices[] = { 0, 1, 2, 3, 1, 2 };
 			float texCoords[] = { 0.f, 0.f,	0.f, 1.f,	1.f, 0.f,	 1.f, 1.f };
-			return RunVmBake(alphaCutoff, subdivisionLevel, texSize, sizeof(triangleIndices), triangleIndices, texCoords, sizeof(texCoords), tex, format);
+
+			OmmBakeParams p;
+			p.alphaCutoff = alphaCutoff;
+			p.subdivisionLevel = subdivisionLevel;
+			p.texSize = texSize;
+			p.texCb = tex;
+			p.format = format;
+			p.triangleIndices = triangleIndices;
+			p.indexBufferSize = sizeof(triangleIndices);
+			p.texCoords = texCoords;
+			p.texCoordBufferSize = sizeof(texCoords);
+			return RunOmmBake(p);
 		}
 
 		void ExpectEqual(const omm::Debug::Stats& stats, const omm::Debug::Stats& expectedStats) {
@@ -408,7 +463,7 @@ namespace {
 		uint32_t subdivisionLevel = 4;
 		uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel);
 
-		omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
+		omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
 			return 0.6f;
 			});
 
@@ -427,7 +482,7 @@ namespace {
 		uint32_t subdivisionLevel = 3;
 		uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel);
 
-		omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
+		omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
 			return 0.6f;
 			});
 
@@ -446,7 +501,7 @@ namespace {
 		uint32_t subdivisionLevel = 2;
 		uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel);
 
-		omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
+		omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
 			return 0.6f;
 			});
 
@@ -465,7 +520,7 @@ namespace {
 		uint32_t subdivisionLevel = 1;
 		uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel);
 
-		omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
+		omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
 			return 0.6f;
 			});
 
@@ -484,7 +539,7 @@ namespace {
 		uint32_t subdivisionLevel = 0;
 		uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel);
 
-		omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
+		omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
 			return 0.6f;
 			});
 
@@ -503,7 +558,7 @@ namespace {
 		uint32_t subdivisionLevel = 4;
 		uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel);
 
-		omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
+		omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
 			return 0.4f;
 			});
 
@@ -522,7 +577,7 @@ namespace {
 		uint32_t subdivisionLevel = 3;
 		uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel);
 
-		omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
+		omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
 			return 0.4f;
 			});
 
@@ -541,7 +596,7 @@ namespace {
 		uint32_t subdivisionLevel = 2;
 		uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel);
 
-		omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
+		omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
 			return 0.4f;
 			});
 
@@ -560,7 +615,7 @@ namespace {
 		uint32_t subdivisionLevel = 1;
 		uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel);
 
-		omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
+		omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
 			return 0.4f;
 			});
 
@@ -579,7 +634,7 @@ namespace {
 		uint32_t subdivisionLevel = 0;
 		uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel);
 
-		omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
+		omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
 			return 0.4f;
 			});
 
@@ -598,7 +653,7 @@ namespace {
 
 		uint32_t subdivisionLevel = 1;
 
-		omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
+		omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
 			if ((i) % 8 != (j) % 8)
 				return 0.f;
 			else
@@ -619,7 +674,7 @@ namespace {
 
 		uint32_t subdivisionLevel = 1;
 
-		omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
+		omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
 			if ((i) % 8 != (j) % 8)
 				return 1.f;
 			else
@@ -641,7 +696,7 @@ namespace {
 		uint32_t subdivisionLevel = 4;
 		uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel);
 
-		omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
+		omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
 			if (i == 0 && j == 0)
 				return 0.6f;
 			return 0.4f;
@@ -664,12 +719,63 @@ namespace {
 		}
 	}
 
+	TEST_P(OMMBakeTestGPU, ZeroOmmArraySizeBudget) {
+
+		uint32_t subdivisionLevel = 4;
+		uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel);
+
+		OmmBakeParams p = OmmBakeParams::InitQuad();
+		p.subdivisionLevel = 4;
+		p.maxOutOmmArraySize = 0;
+		p.texCb = [](int i, int j)->float {
+			if (i == 0 && j == 0)
+				return 0.6f;
+			return 0.4f;
+		};
+
+		omm::Debug::Stats stats = RunOmmBake(p);
+
+		ExpectEqual(stats, {
+			.totalFullyUnknownOpaque = 2,
+			});
+	}
+
+	TEST_P(OMMBakeTestGPU, HalfOmmArraySizeBudget) {
+
+		uint32_t subdivisionLevel = 4;
+		uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel);
+
+		OmmBakeParams p = OmmBakeParams::InitQuad();
+		p.subdivisionLevel = 4;
+		p.maxOutOmmArraySize = 64u; // 64 bytes covers a single subdivlvl 4 prim
+		p.texCb = [](int i, int j)->float {
+			return 0.4f;
+		};
+
+		omm::Debug::Stats stats = RunOmmBake(p);
+
+		if (EnableSpecialIndices())
+		{
+			ExpectEqual(stats, { 
+				.totalFullyTransparent = 1, 
+				.totalFullyUnknownOpaque = 1,  // one triangle is "out of memory"
+				});
+		}
+		else
+		{
+			ExpectEqual(stats, { 
+				.totalTransparent = 256,
+				.totalFullyUnknownOpaque = 1 // one triangle is "out of memory"
+				});
+		}
+	}
+
 	TEST_P(OMMBakeTestGPU, Circle) {
 
 		uint32_t subdivisionLevel = 4;
 		uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel);
 
-		omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
+		omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
 			if (i == 0 && j == 0)
 				return 0.6f;
 
@@ -695,7 +801,7 @@ namespace {
 		uint32_t subdivisionLevel = 4;
 		uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel);
 
-		omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
+		omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
 			if (i == 0 && j == 0)
 				return 0.6f;
 
@@ -719,7 +825,7 @@ namespace {
 		uint32_t subdivisionLevel = 4;
 		uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel);
 
-		omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
+		omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
 			if (i == 0 && j == 0)
 				return 0.6f;
 
@@ -741,7 +847,7 @@ namespace {
 		uint32_t subdivisionLevel = 4;
 		uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel);
 
-		omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
+		omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
 			if (i == 0 && j == 0)
 				return 0.6f;
 
@@ -762,7 +868,7 @@ namespace {
 		uint32_t subdivisionLevel = 4;
 		uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel);
 
-		omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
+		omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
 			if (i == 0 && j == 0)
 				return 0.6f;
 
@@ -782,7 +888,7 @@ namespace {
 		uint32_t subdivisionLevel = 5;
 		uint32_t numMicroTris = omm::bird::GetNumMicroTriangles(subdivisionLevel);
 
-		omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
+		omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, [](int i, int j)->float {
 
 			auto complexMultiply = [](float2 a, float2 b)->float2 {
 				return float2(a.x * b.x - a.y * b.y, a.x * b.y + a.y * b.x);
@@ -826,7 +932,7 @@ namespace {
 		uint32_t triangleIndices[] = { 0, 1, 2, };
 		float texCoords[] = { 0.2f, 0.f,  0.1f, 0.8f,  0.9f, 0.1f };
 
-		omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, sizeof(triangleIndices), triangleIndices, texCoords, sizeof(texCoords), [](int i, int j)->float {
+		omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, sizeof(triangleIndices), triangleIndices, texCoords, sizeof(texCoords), [](int i, int j)->float {
 
 			auto complexMultiply = [](float2 a, float2 b)->float2 {
 				return float2(a.x * b.x - a.y * b.y, a.x * b.y + a.y * b.x);
@@ -882,7 +988,7 @@ namespace {
 		uint32_t triangleIndices[] = { 0, 1, 2, };
 		float texCoords[] = { 0.2f, 0.f,  0.1f, 0.8f,  0.9f, 0.1f };
 
-		omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, sizeof(triangleIndices), triangleIndices, texCoords, sizeof(texCoords), [](int i, int j)->float {
+		omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, sizeof(triangleIndices), triangleIndices, texCoords, sizeof(texCoords), [](int i, int j)->float {
 
 			auto complexMultiply = [](float2 a, float2 b)->float2 {
 				return float2(a.x * b.x - a.y * b.y, a.x * b.y + a.y * b.x);
@@ -938,7 +1044,7 @@ namespace {
 		uint32_t triangleIndices[] = { 0, 1, 2, };
 		float texCoords[] = { 0.2f, 0.f,  0.1f, 0.8f,  0.9f, 0.1f };
 
-		omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, sizeof(triangleIndices), triangleIndices, texCoords, sizeof(texCoords), [](int i, int j)->float {
+		omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, sizeof(triangleIndices), triangleIndices, texCoords, sizeof(texCoords), [](int i, int j)->float {
 
 			auto multiply = [](float2 x, float2 y)->float2 {
 				return float2(x.x * y.x - x.y * y.y, x.x * y.y + x.y * y.x);
@@ -996,7 +1102,7 @@ namespace {
 		uint32_t triangleIndices[] = { 0, 1, 2, 3, 4, 5, };
 		float texCoords[] = { 0.2f, 0.f,  0.1f, 0.8f,  0.9f, 0.1f, 0.2f, 0.f,  0.1f, 0.8f,  0.9f, 0.1f };
 
-		omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 1024, 1024 }, sizeof(triangleIndices), triangleIndices, texCoords, sizeof(texCoords), [](int i, int j)->float {
+		omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 1024, 1024 }, sizeof(triangleIndices), triangleIndices, texCoords, sizeof(texCoords), [](int i, int j)->float {
 
 			auto multiply = [](float2 x, float2 y)->float2 {
 				return float2(x.x * y.x - x.y * y.y, x.x * y.y + x.y * y.x);
@@ -1055,7 +1161,7 @@ namespace {
 		//float texCoords[8] = { 0.25f, 0.25f,  0.25f, 0.75f,  0.75f, 0.25f };
 		float texCoords[] = { 0.f, 0.f,  0.f, 1.0f,  1.f, 1.f, 1.f, 0.f };
 
-		omm::Debug::Stats stats = RunVmBake(0.5f, subdivisionLevel, { 4, 4 }, sizeof(triangleIndices), triangleIndices, texCoords, sizeof(texCoords), [](int i, int j)->float {
+		omm::Debug::Stats stats = RunOmmBake(0.5f, subdivisionLevel, { 4, 4 }, sizeof(triangleIndices), triangleIndices, texCoords, sizeof(texCoords), [](int i, int j)->float {
 
 			uint32_t x = (i) % 2;
 			uint32_t y = (j) % 2;