Skip to content


Browse files Browse the repository at this point in the history
- Added F2 to toggle GUI
- Added toggle for hackily blend normals (to mitigate shadow issue)
- Removed now-useless "render to screen" button

- Added smooth geometric normal to g-buffer
- Quite possibly fixed shadow bias for the last time
- Fixed UB modulo with negative operand in VsmShadow.frag

- Moved some math utilities to separate header and namespace
  • Loading branch information
JuanDiegoMontoya committed Dec 8, 2023
1 parent 5ab5e09 commit 25f21f5
Show file tree
Hide file tree
Showing 14 changed files with 195 additions and 238 deletions.
159 changes: 32 additions & 127 deletions data/shaders/ShadeDeferredPbr.frag.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,9 @@
#include "Utility.h.glsl"

layout(binding = 0) uniform sampler2D s_gAlbedo;
layout(binding = 1) uniform sampler2D s_gNormal;
layout(binding = 1) uniform sampler2D s_gNormalAndFaceNormal;
layout(binding = 2) uniform sampler2D s_gDepth;
layout(binding = 3) uniform sampler2D s_rsmIndirect;
layout(binding = 4) uniform sampler2D s_rsmDepth;
layout(binding = 5) uniform sampler2DShadow s_rsmDepthShadow;
layout(binding = 3) uniform sampler2D s_gSmoothVertexNormal;
layout(binding = 6) uniform sampler2D s_emission;
layout(binding = 7) uniform sampler2D s_metallicRoughnessAo;

Expand All @@ -27,6 +25,7 @@ layout(location = 0) out vec3 o_color;
#define VSM_SHOW_PAGE_OUTLINES (1 << 2)
#define VSM_SHOW_SHADOW_DEPTH (1 << 3)
#define VSM_SHOW_DIRTY_PAGES (1 << 4)
#define BLEND_NORMALS (1 << 5)

layout(binding = 1, std140) uniform ShadingUniforms
Expand Down Expand Up @@ -61,25 +60,21 @@ layout(binding = 6, std430) readonly buffer LightBuffer
GpuLight lights[];

// Returns a shadow bias in the space of whatever texelWidth is in.
// For example, if texelWidth is 0.125 units in world space, the bias will be in world space too.
// Returns an exact shadow bias in the space of whatever texelWidth is in.
// N and L are expected to be normalized
float GetShadowBias(vec3 N, vec3 L, float texelWidth)
const float sqrt2 = 1.41421356;
const float quantize = 2.0 / (1 << 23);
const float sqrt2 = 1.41421356; // Mul by sqrt2 to get diagonal length
const float quantize = 2.0 / (1 << 23); // Arbitrary constant that should help prevent most numerical issues
const float b = sqrt2 * texelWidth / 2.0;
const float NoL = clamp(dot(N, L), 0.0, 1.0);
const float NoL = clamp(abs(dot(N, L)), 0.0001, 1.0);
return quantize + b * length(cross(N, L)) / NoL;

float CalcVsmShadowBias(uint clipmapLevel, vec3 faceNormal)
const float magicClipmapLevelBias = 0.1;
const float magicConstantBias = 2.0 / (1 << 24);
const float halfOrthoFrustumLength = clipmapUniforms.projectionZLength / 2;
const float shadowTexelSize = exp2(clipmapLevel + (clipmapLevel * magicClipmapLevelBias)) * clipmapUniforms.firstClipmapTexelLength;
const float bias = magicConstantBias + GetShadowBias(faceNormal,, shadowTexelSize) / halfOrthoFrustumLength;

const float shadowTexelSize = exp2(clipmapLevel) * clipmapUniforms.firstClipmapTexelLength;
const float bias = GetShadowBias(faceNormal,, shadowTexelSize);
return bias;

Expand Down Expand Up @@ -120,7 +115,8 @@ ShadowVsmOut ShadowVsm(vec3 fragWorldPos, vec3 normal)
const uint physicalAddress = GetPagePhysicalAddress(ret.pageData);
ret.shadowDepth = LoadPageTexel(pageTexel, physicalAddress);

const float bias = min(0.03, CalcVsmShadowBias(addr.clipmapLevel, normal));
const float maxBias = exp2(ret.clipmapLevel) * 0.1;
const float bias = min(maxBias, 0.01 + CalcVsmShadowBias(addr.clipmapLevel, normal)) / clipmapUniforms.projectionZLength;

if (ret.shadowDepth + bias < ret.projectedDepth)
Expand Down Expand Up @@ -166,7 +162,9 @@ float ShadowVsmPcss(vec3 fragWorldPos, vec3 flatNormal)
const float depthSample = texelFetch(s_gDepth, gid, 0).x;
const PageAddressInfo addr = GetClipmapPageFromDepth(depthSample, gid, textureSize(s_gDepth, 0));

const float baseBias = min(0.03, CalcVsmShadowBias(addr.clipmapLevel, flatNormal));
const float maxBias = exp2(addr.clipmapLevel) * 0.02;
const float baseBias = min(maxBias, CalcVsmShadowBias(addr.clipmapLevel, flatNormal));
const float invProjZLength = 1.0 / clipmapUniforms.projectionZLength;

// Blocker search
float accumDepth = 0;
Expand All @@ -178,8 +176,8 @@ float ShadowVsmPcss(vec3 fragWorldPos, vec3 flatNormal)
const float theta = xi.y * 2.0 * 3.14159;
const vec2 offset = r * vec2(cos(theta), sin(theta));
// PCF puts some samples under the surface when L is not parallel to N
const float pcfBias = 2.0 * r / clipmapUniforms.projectionZLength;
const float realBias = baseBias + mix(pcfBias, 0.0, max(0.0, dot(flatNormal,;
const float pcfBias = 2.0 * r;
const float realBias = invProjZLength * (baseBias + mix(pcfBias, 0.0, max(0.0, dot(flatNormal,;

float depth;
if (TrySampleVsmClipmap(int(addr.clipmapLevel), addr.posLightNdc.xy * 0.5 + 0.5, offset, depth))
Expand Down Expand Up @@ -286,105 +284,6 @@ float ShadowVsmPcss(vec3 fragWorldPos, vec3 flatNormal)
return lightVisibility / shadowUniforms.pcfSamples;

float ShadowPCF(vec2 uv, float viewDepth, float bias)
float lightOcclusion = 0.0;

for (uint i = 0; i < shadowUniforms.pcfSamples; i++)
vec2 xi = fract(Hammersley(i, shadowUniforms.pcfSamples) + hash(gl_FragCoord.xy) + shadingUniforms.random);
float r = sqrt(xi.x);
float theta = xi.y * 2.0 * 3.14159;
vec2 offset = shadowUniforms.maxPcfRadius * vec2(r * cos(theta), r * sin(theta));
// float lightDepth = textureLod(s_rsmDepth, uv + offset, 0).x;
// lightDepth += bias;
// if (lightDepth >= viewDepth)
// {
// lightOcclusion += 1.0;
// }
lightOcclusion += textureLod(s_rsmDepthShadow, vec3(uv + offset, viewDepth - bias), 0);

return lightOcclusion / shadowUniforms.pcfSamples;

// Marches a ray in view space until it collides with the height field defined by the shadow map.
// We assume the height field has a certain thickness so rays can pass behind it
float MarchShadowRay(vec3 rayLightViewPos, vec3 rayLightViewDir, float bias, mat4 lightProj, mat4 lightInvProj)
for (int stepIdx = 0; stepIdx < shadowUniforms.stepsPerRay; stepIdx++)
rayLightViewPos += rayLightViewDir * shadowUniforms.rayStepSize;

vec4 rayLightClipPos = lightProj * vec4(rayLightViewPos, 1.0);
rayLightClipPos.xy /= rayLightClipPos.w; // to NDC
rayLightClipPos.xy = rayLightClipPos.xy * 0.5 + 0.5; // to UV
float shadowMapWindowZ = /*bias*/ + textureLod(s_rsmDepth, rayLightClipPos.xy, 0.0).x;
// Note: view Z gets *smaller* as we go deeper into the frusum (farther from the camera)
float shadowMapViewZ = UnprojectUV_ZO(shadowMapWindowZ, rayLightClipPos.xy, lightInvProj).z;

// Positive dDepth: tested position is below the shadow map
// Negative dDepth: tested position is above
float dDepth = shadowMapViewZ - rayLightViewPos.z;

// Ray is under the shadow map height field
if (dDepth > 0)
// Ray intersected some geometry
// OR
// The ray hasn't collided with anything on the last step (we're already under the height field, assume infinite thickness so there is at least some shadow)
if (dDepth < shadowUniforms.heightmapThickness || stepIdx == shadowUniforms.stepsPerRay - 1)
return 0.0;

return 1.0;

float ShadowRayTraced(vec3 fragWorldPos, vec3 lightDir, float bias, mat4 lightView, mat4 lightProj, mat4 lightInvProj)
float lightOcclusion = 0.0;

for (int rayIdx = 0; rayIdx < shadowUniforms.shadowRays; rayIdx++)
vec2 xi = Hammersley(rayIdx, shadowUniforms.shadowRays);
xi = fract(xi + hash(gl_FragCoord.xy) + shadingUniforms.random);
vec3 newLightDir = RandVecInCone(xi, lightDir, shadowUniforms.sourceAngleRad);

vec3 rayLightViewDir = (lightView * vec4(newLightDir, 0.0)).xyz;
vec3 rayLightViewPos = (lightView * vec4(fragWorldPos, 1.0)).xyz;

lightOcclusion += MarchShadowRay(rayLightViewPos, rayLightViewDir, bias, lightProj, lightInvProj);

return lightOcclusion / shadowUniforms.shadowRays;

float Shadow(vec3 fragWorldPos, vec3 normal, vec3 lightDir, mat4 lightViewProj)
vec4 clip = lightViewProj * vec4(fragWorldPos, 1.0);
vec2 uv = clip.xy * .5 + .5;
if (uv.x < 0 || uv.x > 1 || uv.y < 0 || uv.y > 1)
return 0;

// Analytically compute slope-scaled bias
const float maxBias = 0.0008;
float bias = maxBias;
//float bias = GetShadowBias(normal,, textureSize(s_rsmDepthShadow, 0));
//bias = min(bias, maxBias);

switch (shadowUniforms.shadowMode)
//case 0: return ShadowPCF(uv, clip.z * .5 + .5, bias);
//case 1: return ShadowRayTraced(fragWorldPos, lightDir, bias);
default: return 1.0;

vec3 LocalLightIntensity(vec3 viewDir, Surface surface)
vec3 color = { 0, 0, 0 };
Expand All @@ -402,9 +301,10 @@ vec3 LocalLightIntensity(vec3 viewDir, Surface surface)
void main()
const vec3 albedo = textureLod(s_gAlbedo, v_uv, 0.0).rgb;
const vec4 normalOctAndFlatNormalOct = textureLod(s_gNormal, v_uv, 0.0).xyzw;
const vec3 normal = OctToVec3(normalOctAndFlatNormalOct.xy);
const vec4 normalOctAndFlatNormalOct = textureLod(s_gNormalAndFaceNormal, v_uv, 0.0).xyzw;
const vec3 mappedNormal = OctToVec3(normalOctAndFlatNormalOct.xy);
const vec3 flatNormal = OctToVec3(;
const vec3 smoothNormal = OctToVec3(textureLod(s_gSmoothVertexNormal, v_uv, 0.0).xy);
const float depth = textureLod(s_gDepth, v_uv, 0.0).x;
const vec3 emission = textureLod(s_emission, v_uv, 0.0).rgb;
const vec3 metallicRoughnessAo = textureLod(s_metallicRoughnessAo, v_uv, 0.0).rgb;
Expand All @@ -416,16 +316,21 @@ void main()

const vec3 fragWorldPos = UnprojectUV_ZO(depth, v_uv, perFrameUniforms.invViewProj);

const vec3 incidentDir =;
const float cosTheta = max(0.0, dot(incidentDir, normal));
const vec3 diffuse = albedo * cosTheta * shadingUniforms.sunStrength.rgb;

//float shadow = Shadow(fragWorldPos, normal,;
ShadowVsmOut shadowVsm = ShadowVsm(fragWorldPos, flatNormal);
float shadowSun = shadowVsm.shadow;
//shadowSun = 0;
shadowSun = ShadowVsmPcss(fragWorldPos, flatNormal);

vec3 normal = mappedNormal;

if ((shadingUniforms.debugFlags & BLEND_NORMALS) != 0)
const float NoL_sun_flat = dot(smoothNormal,;
float horiz = 1.0 - NoL_sun_flat;
horiz *= horiz;
normal = normalize(mix(mappedNormal, smoothNormal, clamp(horiz, 0.0, 1.0)));

vec3 viewDir = normalize( - fragWorldPos);

Surface surface;
Expand Down
2 changes: 1 addition & 1 deletion data/shaders/shadows/ShadowMain.vert.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ void main()
const uint index = indices[indexOffset + primitive];
const Vertex vertex = vertices[vertexOffset + index];
const vec3 position = PackedToVec3(vertex.position);
const mat4 transform = transforms[instanceId];
const mat4 transform = transforms[instanceId].modelCurrent;

v_meshletId = meshletId;
v_uv = PackedToVec2(vertex.uv);
Expand Down
15 changes: 6 additions & 9 deletions data/shaders/shadows/vsm/VsmShadow.frag.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,6 @@ layout(binding = 1, std140) uniform VsmShadowUniforms
layout(location = 0) in vec2 v_uv;
layout(location = 1) in flat uint v_meshletId;

uint AtomicMinPageTexel(ivec2 texel, uint page, float value)
const int atlasWidth = imageSize(i_physicalPagesUint).x / PAGE_SIZE;
const ivec2 pageCorner = PAGE_SIZE * ivec2(page / atlasWidth, page % atlasWidth);
return imageAtomicMin(i_physicalPagesUint, pageCorner + texel, floatBitsToUint(value));

void main()
const Meshlet meshlet = meshlets[v_meshletId];
Expand Down Expand Up @@ -52,11 +45,15 @@ void main()

const uint clipmapIndex = clipmapUniforms.clipmapTableIndices[clipmapLod];
const ivec2 pageOffset = clipmapUniforms.clipmapPageOffsets[clipmapLod];
const ivec2 pageAddressXy = (ivec2(gl_FragCoord.xy) / PAGE_SIZE + pageOffset) % imageSize(i_pageTables).xy;
const ivec2 pageAddressXy = ivec2(mod(vec2(ivec2(gl_FragCoord.xy) / PAGE_SIZE + pageOffset), vec2(imageSize(i_pageTables).xy)));
const uint pageData = imageLoad(i_pageTables, ivec3(pageAddressXy, clipmapIndex)).x;
if (GetIsPageBacked(pageData) && GetIsPageDirty(pageData))
const ivec2 pageTexel = ivec2(gl_FragCoord.xy) % PAGE_SIZE;
AtomicMinPageTexel(pageTexel, GetPagePhysicalAddress(pageData), gl_FragCoord.z);
const uint page = GetPagePhysicalAddress(pageData);
const int atlasWidth = imageSize(i_physicalPagesUint).x / PAGE_SIZE;
const ivec2 pageCorner = PAGE_SIZE * ivec2(page / atlasWidth, page % atlasWidth);
const uint depthUint = floatBitsToUint(gl_FragCoord.z);
imageAtomicMin(i_physicalPagesUint, pageCorner + pageTexel, depthUint);
6 changes: 3 additions & 3 deletions data/shaders/visbuffer/CullMeshlets.comp.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
void DebugDrawMeshletAabb(in uint meshletId)
const uint instanceId = meshlets[meshletId].instanceId;
const mat4 transform = transforms[instanceId];
const mat4 transform = transforms[instanceId].modelCurrent;
const vec3 aabbMin = PackedToVec3(meshlets[meshletId].aabbMin);
const vec3 aabbMax = PackedToVec3(meshlets[meshletId].aabbMax);
const vec3 aabbSize = aabbMax - aabbMin;
Expand Down Expand Up @@ -83,7 +83,7 @@ struct GetMeshletUvBoundsParams
void GetMeshletUvBounds(GetMeshletUvBoundsParams params, out vec2 minXY, out vec2 maxXY, out float nearestZ, out bool intersectsNearPlane)
const uint instanceId = meshlets[params.meshletId].instanceId;
const mat4 transform = transforms[instanceId];
const mat4 transform = transforms[instanceId].modelCurrent;
const vec3 aabbMin = PackedToVec3(meshlets[params.meshletId].aabbMin);
const vec3 aabbMax = PackedToVec3(meshlets[params.meshletId].aabbMax);
const vec3 aabbSize = aabbMax - aabbMin;
Expand Down Expand Up @@ -174,7 +174,7 @@ bool CullQuadHiz(vec2 minXY, vec2 maxXY, float nearestZ)
bool CullMeshletFrustum(in uint meshletId, View view)
const uint instanceId = meshlets[meshletId].instanceId;
const mat4 transform = transforms[instanceId];
const mat4 transform = transforms[instanceId].modelCurrent;
const vec3 aabbMin = PackedToVec3(meshlets[meshletId].aabbMin);
const vec3 aabbMax = PackedToVec3(meshlets[meshletId].aabbMax);
const vec3 aabbCenter = (aabbMin + aabbMax) / 2.0;
Expand Down
4 changes: 2 additions & 2 deletions data/shaders/visbuffer/CullTriangles.comp.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ bool CullTriangle(Meshlet meshlet, uint localId)
return false;

if ((perFrameUniforms.flags & CULL_PRIMITIVE_VSM) != 0)
if (currentView.type == VIEW_TYPE_VIRTUAL)
Expand All @@ -200,7 +200,7 @@ void main()
if (localId == 0)
sh_primitivesPassed = 0;
sh_mvp = currentView.viewProj * transforms[meshlet.instanceId];
sh_mvp = currentView.viewProj * transforms[meshlet.instanceId].modelCurrent;

Expand Down
4 changes: 2 additions & 2 deletions data/shaders/visbuffer/Visbuffer.vert.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@ void main()
const uint indexOffset = meshlets[meshletId].indexOffset;
const uint primitiveOffset = meshlets[meshletId].primitiveOffset;
const uint instanceId = meshlets[meshletId].instanceId;

const uint primitive = uint(primitives[primitiveOffset + primitiveId]);
const uint index = indices[indexOffset + primitive];
const Vertex vertex = vertices[vertexOffset + index];
const vec3 position = PackedToVec3(vertex.position);
const vec2 uv = PackedToVec2(vertex.uv);
const mat4 transform = transforms[instanceId];
const mat4 transform = transforms[instanceId].modelCurrent;

o_meshletId = meshletId;
o_primitiveId = primitiveId / 3;
Expand Down
8 changes: 7 additions & 1 deletion data/shaders/visbuffer/VisbufferCommon.h.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,15 @@ layout (std430, binding = 3) restrict readonly buffer MeshletIndexBuffer
uint indices[];

struct ObjectUniforms
mat4 modelPrevious;
mat4 modelCurrent;

layout (std430, binding = 4) restrict readonly buffer TransformBuffer
mat4 transforms[];
ObjectUniforms transforms[];

layout (std430, binding = 6) restrict buffer IndirectDrawCommand
Expand Down

0 comments on commit 25f21f5

Please sign in to comment.