Merge pull request #5577 from vastcharade/horizAmbOcclPerf

Horiz amb occl performance updates
2025-04-09 21:32:12 +02:00 · 2015-08-22 12:39:51 -07:00 · 2015-08-22 12:39:51 -07:00 · fe44442ffe
commit fe44442ffe
parent 2bfa121925 21dc58eb85
3 changed files with 211 additions and 134 deletions
--- a/libraries/render-utils/src/AmbientOcclusionEffect.cpp
+++ b/libraries/render-utils/src/AmbientOcclusionEffect.cpp
@ -51,8 +51,13 @@ const gpu::PipelinePointer& AmbientOcclusion::getOcclusionPipeline() {
        _gBiasLoc = program->getUniforms().findLocation("g_bias");
        _gSampleRadiusLoc = program->getUniforms().findLocation("g_sample_rad");
        _gIntensityLoc = program->getUniforms().findLocation("g_intensity");
-        _bufferWidthLoc = program->getUniforms().findLocation("bufferWidth");
-        _bufferHeightLoc = program->getUniforms().findLocation("bufferHeight");
+
+        _nearLoc = program->getUniforms().findLocation("near");
+        _depthScaleLoc = program->getUniforms().findLocation("depthScale");
+        _depthTexCoordOffsetLoc = program->getUniforms().findLocation("depthTexCoordOffset");
+        _depthTexCoordScaleLoc = program->getUniforms().findLocation("depthTexCoordScale");
+        _renderTargetResLoc = program->getUniforms().findLocation("renderTargetRes");
+        _renderTargetResInvLoc = program->getUniforms().findLocation("renderTargetResInv");

        gpu::StatePointer state = gpu::StatePointer(new gpu::State());

@ -172,9 +177,19 @@ const gpu::PipelinePointer& AmbientOcclusion::getBlendPipeline() {
 void AmbientOcclusion::run(const render::SceneContextPointer& sceneContext, const render::RenderContextPointer& renderContext) {
    assert(renderContext->args);
    assert(renderContext->args->_viewFrustum);
-    RenderArgs* args = renderContext->args;

    gpu::Batch batch;
+    RenderArgs* args = renderContext->args;
+
+    auto framebufferCache = DependencyManager::get<FramebufferCache>();
+    QSize framebufferSize = framebufferCache->getFrameBufferSize();
+    float fbWidth = framebufferSize.width();
+    float fbHeight = framebufferSize.height();
+    float sMin = args->_viewport.x / fbWidth;
+    float sWidth = args->_viewport.z / fbWidth;
+    float tMin = args->_viewport.y / fbHeight;
+    float tHeight = args->_viewport.w / fbHeight;
+

    glm::mat4 projMat;
    Transform viewMat;
@ -186,8 +201,8 @@ void AmbientOcclusion::run(const render::SceneContextPointer& sceneContext, cons

    // Occlusion step
    getOcclusionPipeline();
-    batch.setResourceTexture(0, DependencyManager::get<FramebufferCache>()->getPrimaryDepthTexture());
-    batch.setResourceTexture(1, DependencyManager::get<FramebufferCache>()->getPrimaryNormalTexture());
+    batch.setResourceTexture(0, framebufferCache->getPrimaryDepthTexture());
+    batch.setResourceTexture(1, framebufferCache->getPrimaryNormalTexture());
    _occlusionBuffer->setRenderBuffer(0, _occlusionTexture);
    batch.setFramebuffer(_occlusionBuffer);

@ -203,8 +218,32 @@ void AmbientOcclusion::run(const render::SceneContextPointer& sceneContext, cons
    batch._glUniform1f(_gBiasLoc, g_bias);
    batch._glUniform1f(_gSampleRadiusLoc, g_sample_rad);
    batch._glUniform1f(_gIntensityLoc, g_intensity);
-    batch._glUniform1f(_bufferWidthLoc, DependencyManager::get<FramebufferCache>()->getFrameBufferSize().width());
-    batch._glUniform1f(_bufferHeightLoc, DependencyManager::get<FramebufferCache>()->getFrameBufferSize().height());
+
+    // setup uniforms for unpacking a view-space position from the depth buffer
+    // This is code taken from DeferredLightEffect.render() method in DeferredLightingEffect.cpp.
+    // DeferredBuffer.slh shows how the unpacking is done and what variables are needed.
+
+    // initialize the view-space unpacking uniforms using frustum data
+    float left, right, bottom, top, nearVal, farVal;
+    glm::vec4 nearClipPlane, farClipPlane;
+
+    args->_viewFrustum->computeOffAxisFrustum(left, right, bottom, top, nearVal, farVal, nearClipPlane, farClipPlane);
+
+    float depthScale = (farVal - nearVal) / farVal;
+    float nearScale = -1.0f / nearVal;
+    float depthTexCoordScaleS = (right - left) * nearScale / sWidth;
+    float depthTexCoordScaleT = (top - bottom) * nearScale / tHeight;
+    float depthTexCoordOffsetS = left * nearScale - sMin * depthTexCoordScaleS;
+    float depthTexCoordOffsetT = bottom * nearScale - tMin * depthTexCoordScaleT;
+
+    // now set the position-unpacking unforms
+    batch._glUniform1f(_nearLoc, nearVal);
+    batch._glUniform1f(_depthScaleLoc, depthScale);
+    batch._glUniform2f(_depthTexCoordOffsetLoc, depthTexCoordOffsetS, depthTexCoordOffsetT);
+    batch._glUniform2f(_depthTexCoordScaleLoc, depthTexCoordScaleS, depthTexCoordScaleT);
+
+    batch._glUniform2f(_renderTargetResLoc, fbWidth, fbHeight);
+    batch._glUniform2f(_renderTargetResInvLoc, 1.0/fbWidth, 1.0/fbHeight);

    glm::vec4 color(0.0f, 0.0f, 0.0f, 1.0f);
    glm::vec2 bottomLeft(-1.0f, -1.0f);
@ -238,13 +277,13 @@ void AmbientOcclusion::run(const render::SceneContextPointer& sceneContext, cons
    // Blend step
    getBlendPipeline();
    batch.setResourceTexture(0, _hBlurTexture);
-    batch.setFramebuffer(DependencyManager::get<FramebufferCache>()->getPrimaryFramebuffer());
+    batch.setFramebuffer(framebufferCache->getPrimaryFramebuffer());

    // Bind the fourth gpu::Pipeline we need - for blending the primary color buffer with blurred occlusion texture
    batch.setPipeline(getBlendPipeline());

    DependencyManager::get<GeometryCache>()->renderQuad(batch, bottomLeft, topRight, texCoordTopLeft, texCoordBottomRight, color);
-    
+
    // Ready to render
    args->_context->render((batch));
 }
--- a/libraries/render-utils/src/AmbientOcclusionEffect.h
+++ b/libraries/render-utils/src/AmbientOcclusionEffect.h
@ -36,8 +36,15 @@ private:
    gpu::int32 _gBiasLoc;
    gpu::int32 _gSampleRadiusLoc;
    gpu::int32 _gIntensityLoc;
-    gpu::int32 _bufferWidthLoc;
-    gpu::int32 _bufferHeightLoc;
+
+    gpu::int32 _nearLoc;
+    gpu::int32 _depthScaleLoc;
+    gpu::int32 _depthTexCoordOffsetLoc;
+    gpu::int32 _depthTexCoordScaleLoc;
+    gpu::int32 _renderTargetResLoc;
+    gpu::int32 _renderTargetResInvLoc;
+
+
    float g_scale;
    float g_bias;
    float g_sample_rad;
--- a/libraries/render-utils/src/ambient_occlusion.slf
+++ b/libraries/render-utils/src/ambient_occlusion.slf
@ -30,25 +30,40 @@ uniform float g_scale;
 uniform float g_bias;
 uniform float g_sample_rad;
 uniform float g_intensity;
-uniform float bufferWidth;
-uniform float bufferHeight;
+
+// the distance to the near clip plane
+uniform float near;
+
+// scale factor for depth: (far - near) / far
+uniform float depthScale;
+
+// offset for depth texture coordinates
+uniform vec2 depthTexCoordOffset;
+
+// scale for depth texture coordinates
+uniform vec2 depthTexCoordScale;
+
+// the resolution of the occlusion buffer
+// and its inverse
+uniform vec2 renderTargetRes;
+uniform vec2 renderTargetResInv;
+
+

 const float PI = 3.14159265;

-const vec2 FocalLen = vec2(1.0, 1.0);
-
-const vec2 LinMAD = vec2(0.1-10.0, 0.1+10.0) / (2.0*0.1*10.0);
-
-const vec2 AORes = vec2(1024.0, 768.0);
-const vec2 InvAORes = vec2(1.0/1024.0, 1.0/768.0);
-const vec2 NoiseScale = vec2(1024.0, 768.0) / 4.0;
-
 const float AOStrength = 1.9;
-const float R = 0.3;
-const float R2 = 0.3*0.3;
-const float NegInvR2 = - 1.0 / (0.3*0.3);
+
+
+// TODO: R (radius) should be exposed as a uniform parameter
+const float R = 0.01;
+const float R2 = 0.01*0.01;
+const float NegInvR2 = - 1.0 / (0.01*0.01);
+
+
+
 // can't use tan to initialize a const value
-const float TanBias = 0.57735027; // tan(30.0 * PI / 180.0); 
+const float TanBias = 0.57735027; // tan(30.0 * PI / 180.0);
 const float MaxRadiusPixels = 50.0;

 const int NumDirections = 6;
@ -56,113 +71,126 @@ const int NumSamples = 4;

 out vec4 outFragColor;

+/**
+ * Gets the normal in view space from a normal texture.
+ * uv: the uv texture coordinates to look up in the texture at.
+ */
+vec3 GetViewNormalFromTexture(vec2 uv) {
+    // convert [0,1] -> [-1,1], note: since we're normalizing
+    // we don't need to do v*2 - 1.0, we can just do a v-0.5
+    return normalize(texture(normalTexture, uv).xyz - 0.5);
+}
+
+/**
+ * Gets the linearized depth in view space.
+ * d: the depth value [0-1], usually from a depth texture to convert.
+ */
 float ViewSpaceZFromDepth(float d){
-	// [0,1] -> [-1,1] clip space
-	d = d * 2.0 - 1.0;
-
-	// Get view space Z
-	return -1.0 / (LinMAD.x * d + LinMAD.y);
+    return near / (d * depthScale - 1.0);
 }

+/**
+ * Converts a uv coordinate and depth value into a 3D view space coordinate.
+ * uv: the uv coordinates to convert
+ * z: the view space depth of the uv coordinate.
+ */
 vec3 UVToViewSpace(vec2 uv, float z){
-	//uv = UVToViewA * uv + UVToViewB;
-	return vec3(uv * z, z);
+    return vec3((depthTexCoordOffset + varTexcoord * depthTexCoordScale) * z, z);
 }

-vec3 GetViewPos(vec2 uv){
-	float z = ViewSpaceZFromDepth(texture(depthTexture, uv).r);
-	return UVToViewSpace(uv, z);
+/**
+ * Converts a uv coordinate into a 3D view space coordinate.
+ * The depth of the uv coord is determined from the depth texture.
+ * uv: the uv coordinates to convert
+ */
+vec3 GetViewPos(vec2 uv) {
+    float z = ViewSpaceZFromDepth(texture(depthTexture, uv).r);
+    return UVToViewSpace(uv, z);
 }

-vec3 GetViewPosPoint(ivec2 uv){
-	vec2 coord = vec2(gl_FragCoord.xy) + uv;
-	//float z = texelFetch(texture0, coord, 0).r;
-    float z = texture(depthTexture, uv).r;
-	return UVToViewSpace(uv, z);
+
+float TanToSin(float x) {
+    return x * inversesqrt(x*x + 1.0);
 }

-float TanToSin(float x){
-	return x * inversesqrt(x*x + 1.0);
+float InvLength(vec2 V) {
+    return inversesqrt(dot(V, V));
 }

-float InvLength(vec2 V){
-	return inversesqrt(dot(V,V));
+float Tangent(vec3 V) {
+    return V.z * InvLength(V.xy);
 }

-float Tangent(vec3 V){
-	return V.z * InvLength(V.xy);
+float BiasedTangent(vec3 V) {
+    return V.z * InvLength(V.xy) + TanBias;
 }

-float BiasedTangent(vec3 V){
-	return V.z * InvLength(V.xy) + TanBias;
-}
-
-float Tangent(vec3 P, vec3 S){
+float Tangent(vec3 P, vec3 S) {
    return -(P.z - S.z) * InvLength(S.xy - P.xy);
 }

-float Length2(vec3 V){
-	return dot(V,V);
+float Length2(vec3 V) {
+    return dot(V, V);
 }

-vec3 MinDiff(vec3 P, vec3 Pr, vec3 Pl){
+vec3 MinDiff(vec3 P, vec3 Pr, vec3 Pl) {
    vec3 V1 = Pr - P;
    vec3 V2 = P - Pl;
    return (Length2(V1) < Length2(V2)) ? V1 : V2;
 }

-vec2 SnapUVOffset(vec2 uv){
-    return round(uv * AORes) * InvAORes;
+vec2 SnapUVOffset(vec2 uv) {
+    return round(uv * renderTargetRes) * renderTargetResInv;
 }

-float Falloff(float d2){
-	return d2 * NegInvR2 + 1.0f;
+float Falloff(float d2) {
+    return d2 * NegInvR2 + 1.0f;
 }

-float HorizonOcclusion(	vec2 deltaUV, vec3 P, vec3 dPdu, vec3 dPdv, float randstep, float numSamples){
-	float ao = 0;
+float HorizonOcclusion(vec2 deltaUV, vec3 P, vec3 dPdu, vec3 dPdv, float randstep, float numSamples) {
+    float ao = 0;

-	// Offset the first coord with some noise
-	vec2 uv = varTexcoord + SnapUVOffset(randstep*deltaUV);
-	deltaUV = SnapUVOffset( deltaUV );
+    // Offset the first coord with some noise
+    vec2 uv = varTexcoord + SnapUVOffset(randstep*deltaUV);
+    deltaUV = SnapUVOffset(deltaUV);

-	// Calculate the tangent vector
-	vec3 T = deltaUV.x * dPdu + deltaUV.y * dPdv;
+    // Calculate the tangent vector
+    vec3 T = deltaUV.x * dPdu + deltaUV.y * dPdv;

-	// Get the angle of the tangent vector from the viewspace axis
-	float tanH = BiasedTangent(T);
-	float sinH = TanToSin(tanH);
+    // Get the angle of the tangent vector from the viewspace axis
+    float tanH = BiasedTangent(T);
+    float sinH = TanToSin(tanH);

-	float tanS;
-	float d2;
-	vec3 S;
+    float tanS;
+    float d2;
+    vec3 S;

-	// Sample to find the maximum angle
-	for(float s = 1; s <= numSamples; ++s){
-		uv += deltaUV;
-		S = GetViewPos(uv);
-		tanS = Tangent(P, S);
-		d2 = Length2(S - P);
+    // Sample to find the maximum angle
+    for (float s = 1; s <= numSamples; ++s) {
+        uv += deltaUV;
+        S = GetViewPos(uv);
+        tanS = Tangent(P, S);
+        d2 = Length2(S - P);

-		// Is the sample within the radius and the angle greater?
-		if(d2 < R2 && tanS > tanH)
-		{
-			float sinS = TanToSin(tanS);
-			// Apply falloff based on the distance
-			ao += Falloff(d2) * (sinS - sinH);
+        // Is the sample within the radius and the angle greater?
+        if (d2 < R2 && tanS > tanH) {
+            float sinS = TanToSin(tanS);
+            // Apply falloff based on the distance
+            ao += Falloff(d2) * (sinS - sinH);

-			tanH = tanS;
-			sinH = sinS;
-		}
-	}
-	return ao;
+            tanH = tanS;
+            sinH = sinS;
+        }
+    }
+    return ao;
 }

-vec2 RotateDirections(vec2 Dir, vec2 CosSin){
-    return vec2(Dir.x*CosSin.x - Dir.y*CosSin.y, Dir.x*CosSin.y + Dir.y*CosSin.x);
+vec2 RotateDirections(vec2 Dir, vec2 CosSin) {
+    return vec2(Dir.x*CosSin.x - Dir.y*CosSin.y,
+                Dir.x*CosSin.y + Dir.y*CosSin.x);
 }

-void ComputeSteps(inout vec2 stepSizeUv, inout float numSteps, float rayRadiusPix, float rand){
+void ComputeSteps(inout vec2 stepSizeUv, inout float numSteps, float rayRadiusPix, float rand) {
    // Avoid oversampling if numSteps is greater than the kernel radius in pixels
    numSteps = min(NumSamples, rayRadiusPix);

@ -171,8 +199,7 @@ void ComputeSteps(inout vec2 stepSizeUv, inout float numSteps, float rayRadiusPi

    // Clamp numSteps if it is greater than the max kernel footprint
    float maxNumSteps = MaxRadiusPixels / stepSizePix;
-    if (maxNumSteps < numSteps)
-    {
+    if (maxNumSteps < numSteps) {
        // Use dithering to avoid AO discontinuities
        numSteps = floor(maxNumSteps + rand);
        numSteps = max(numSteps, 1);
@ -180,69 +207,73 @@ void ComputeSteps(inout vec2 stepSizeUv, inout float numSteps, float rayRadiusPi
    }

    // Step size in uv space
-    stepSizeUv = stepSizePix * InvAORes;
+    stepSizeUv = stepSizePix * renderTargetResInv;
 }

-float getRandom(vec2 uv){
+float getRandom(vec2 uv) {
    return fract(sin(dot(uv.xy ,vec2(12.9898,78.233))) * 43758.5453);
 }

-void main(void){
-	float numDirections = NumDirections;
+void main(void) {
+    mat4 projMatrix = getTransformCamera()._projection;

-	vec3 P, Pr, Pl, Pt, Pb;
-	P = GetViewPos(varTexcoord);
+    float numDirections = NumDirections;

-	// Sample neighboring pixels
-    Pr = GetViewPos(varTexcoord + vec2( InvAORes.x, 0));
-    Pl = GetViewPos(varTexcoord + vec2(-InvAORes.x, 0));
-    Pt = GetViewPos(varTexcoord + vec2( 0, InvAORes.y));
-    Pb = GetViewPos(varTexcoord + vec2( 0,-InvAORes.y));
+    vec3 P, Pr, Pl, Pt, Pb;
+    P = GetViewPos(varTexcoord);
+
+    // Sample neighboring pixels
+    Pr = GetViewPos(varTexcoord + vec2( renderTargetResInv.x, 0));
+    Pl = GetViewPos(varTexcoord + vec2(-renderTargetResInv.x, 0));
+    Pt = GetViewPos(varTexcoord + vec2( 0, renderTargetResInv.y));
+    Pb = GetViewPos(varTexcoord + vec2( 0,-renderTargetResInv.y));

    // Calculate tangent basis vectors using the minimum difference
    vec3 dPdu = MinDiff(P, Pr, Pl);
-    vec3 dPdv = MinDiff(P, Pt, Pb) * (AORes.y * InvAORes.x);
+    vec3 dPdv = MinDiff(P, Pt, Pb) * (renderTargetRes.y * renderTargetResInv.x);

    // Get the random samples from the noise function
-	vec3 random = vec3(getRandom(varTexcoord.xy), getRandom(varTexcoord.yx), getRandom(varTexcoord.xx));
+    vec3 random = vec3(getRandom(varTexcoord.xy), getRandom(varTexcoord.yx), getRandom(varTexcoord.xx));

-	// Calculate the projected size of the hemisphere
-    vec2 rayRadiusUV = 0.5 * R * FocalLen / -P.z;
-    float rayRadiusPix = rayRadiusUV.x * AORes.x;
+    // Calculate the projected size of the hemisphere
+    float w = P.z * projMatrix[2][3] + projMatrix[3][3];
+    vec2 rayRadiusUV = (0.5 * R * vec2(projMatrix[0][0], projMatrix[1][1]) / w);  // [-1,1] -> [0,1] uv
+    float rayRadiusPix = rayRadiusUV.x * renderTargetRes.x;

    float ao = 1.0;

    // Make sure the radius of the evaluated hemisphere is more than a pixel
-    if(rayRadiusPix > 1.0){
-    	ao = 0.0;
-    	float numSteps;
-    	vec2 stepSizeUV;
+    if(rayRadiusPix > 1.0) {
+        ao = 0.0;
+        float numSteps;
+        vec2 stepSizeUV;

-    	// Compute the number of steps
-    	ComputeSteps(stepSizeUV, numSteps, rayRadiusPix, random.z);
+        // Compute the number of steps
+        ComputeSteps(stepSizeUV, numSteps, rayRadiusPix, random.z);

-		float alpha = 2.0 * PI / numDirections;
+        float alpha = 2.0 * PI / numDirections;

-		// Calculate the horizon occlusion of each direction
-		for(float d = 0; d < numDirections; ++d){
-			float theta = alpha * d;
+        // Calculate the horizon occlusion of each direction
+        for(float d = 0; d < numDirections; ++d) {
+            float theta = alpha * d;

-			// Apply noise to the direction
-			vec2 dir = RotateDirections(vec2(cos(theta), sin(theta)), random.xy);
-			vec2 deltaUV = dir * stepSizeUV;
+            // Apply noise to the direction
+            vec2 dir = RotateDirections(vec2(cos(theta), sin(theta)), random.xy);
+            vec2 deltaUV = dir * stepSizeUV;

-			// Sample the pixels along the direction
-			ao += HorizonOcclusion(	deltaUV,
-									P,
-									dPdu,
-									dPdv,
-									random.z,
-									numSteps);
-		}
+            // Sample the pixels along the direction
+            ao += HorizonOcclusion(	deltaUV,
+                                    P,
+                                    dPdu,
+                                    dPdv,
+                                    random.z,
+                                    numSteps);
+        }
+
+        // Average the results and produce the final AO
+        ao = 1.0 - ao / numDirections * AOStrength;
+    }

-		// Average the results and produce the final AO
-		ao = 1.0 - ao / numDirections * AOStrength;
-	}

    outFragColor = vec4(vec3(ao), 1.0);
-}
+}