From 708190bfd6cefc0f746a3746c703fe756fc675ea Mon Sep 17 00:00:00 2001 From: Christopher Root Date: Sat, 15 Aug 2015 14:10:25 -0700 Subject: [PATCH] horiz amb occl performance upgrades --- .../src/AmbientOcclusionEffect.cpp | 53 ++++++-- .../render-utils/src/AmbientOcclusionEffect.h | 11 +- .../render-utils/src/ambient_occlusion.slf | 126 +++++++++++++----- 3 files changed, 142 insertions(+), 48 deletions(-) diff --git a/libraries/render-utils/src/AmbientOcclusionEffect.cpp b/libraries/render-utils/src/AmbientOcclusionEffect.cpp index 5f0afd37d1..b94bd09538 100644 --- a/libraries/render-utils/src/AmbientOcclusionEffect.cpp +++ b/libraries/render-utils/src/AmbientOcclusionEffect.cpp @@ -51,8 +51,13 @@ const gpu::PipelinePointer& AmbientOcclusion::getOcclusionPipeline() { _gBiasLoc = program->getUniforms().findLocation("g_bias"); _gSampleRadiusLoc = program->getUniforms().findLocation("g_sample_rad"); _gIntensityLoc = program->getUniforms().findLocation("g_intensity"); - _bufferWidthLoc = program->getUniforms().findLocation("bufferWidth"); - _bufferHeightLoc = program->getUniforms().findLocation("bufferHeight"); + + _nearLoc = program->getUniforms().findLocation("near"); + _depthScaleLoc = program->getUniforms().findLocation("depthScale"); + _depthTexCoordOffsetLoc = program->getUniforms().findLocation("depthTexCoordOffset"); + _depthTexCoordScaleLoc = program->getUniforms().findLocation("depthTexCoordScale"); + _renderTargetResLoc = program->getUniforms().findLocation("renderTargetRes"); + _renderTargetResInvLoc = program->getUniforms().findLocation("renderTargetResInv"); gpu::StatePointer state = gpu::StatePointer(new gpu::State()); @@ -172,9 +177,19 @@ const gpu::PipelinePointer& AmbientOcclusion::getBlendPipeline() { void AmbientOcclusion::run(const render::SceneContextPointer& sceneContext, const render::RenderContextPointer& renderContext) { assert(renderContext->args); assert(renderContext->args->_viewFrustum); - RenderArgs* args = renderContext->args; gpu::Batch batch; + RenderArgs* args = renderContext->args; + + auto framebufferCache = DependencyManager::get(); + QSize framebufferSize = framebufferCache->getFrameBufferSize(); + float fbWidth = framebufferSize.width(); + float fbHeight = framebufferSize.height(); + float sMin = args->_viewport.x / fbWidth; + float sWidth = args->_viewport.z / fbWidth; + float tMin = args->_viewport.y / fbHeight; + float tHeight = args->_viewport.w / fbHeight; + glm::mat4 projMat; Transform viewMat; @@ -186,8 +201,8 @@ void AmbientOcclusion::run(const render::SceneContextPointer& sceneContext, cons // Occlusion step getOcclusionPipeline(); - batch.setResourceTexture(0, DependencyManager::get()->getPrimaryDepthTexture()); - batch.setResourceTexture(1, DependencyManager::get()->getPrimaryNormalTexture()); + batch.setResourceTexture(0, framebufferCache->getPrimaryDepthTexture()); + batch.setResourceTexture(1, framebufferCache->getPrimaryNormalTexture()); _occlusionBuffer->setRenderBuffer(0, _occlusionTexture); batch.setFramebuffer(_occlusionBuffer); @@ -203,8 +218,28 @@ void AmbientOcclusion::run(const render::SceneContextPointer& sceneContext, cons batch._glUniform1f(_gBiasLoc, g_bias); batch._glUniform1f(_gSampleRadiusLoc, g_sample_rad); batch._glUniform1f(_gIntensityLoc, g_intensity); - batch._glUniform1f(_bufferWidthLoc, DependencyManager::get()->getFrameBufferSize().width()); - batch._glUniform1f(_bufferHeightLoc, DependencyManager::get()->getFrameBufferSize().height()); + + // setup uniforms for extracting depth from the depth buffer and + // converting that depth to a camera-space position, same as DeferredLightingEffect.cpp + float left, right, bottom, top, nearVal, farVal; + glm::vec4 nearClipPlane, farClipPlane; + args->_viewFrustum->computeOffAxisFrustum(left, right, bottom, top, nearVal, farVal, nearClipPlane, farClipPlane); + + batch._glUniform1f(_nearLoc, nearVal); + + float depthScale = (farVal - nearVal) / farVal; + batch._glUniform1f(_depthScaleLoc, depthScale); + + float nearScale = -1.0f / nearVal; + float depthTexCoordScaleS = (right - left) * nearScale / sWidth; + float depthTexCoordScaleT = (top - bottom) * nearScale / tHeight; + float depthTexCoordOffsetS = left * nearScale - sMin * depthTexCoordScaleS; + float depthTexCoordOffsetT = bottom * nearScale - tMin * depthTexCoordScaleT; + batch._glUniform2f(_depthTexCoordOffsetLoc, depthTexCoordOffsetS, depthTexCoordOffsetT); + batch._glUniform2f(_depthTexCoordScaleLoc, depthTexCoordScaleS, depthTexCoordScaleT); + + batch._glUniform2f(_renderTargetResLoc, fbWidth, fbHeight); + batch._glUniform2f(_renderTargetResInvLoc, 1.0/fbWidth, 1.0/fbHeight); glm::vec4 color(0.0f, 0.0f, 0.0f, 1.0f); glm::vec2 bottomLeft(-1.0f, -1.0f); @@ -238,13 +273,13 @@ void AmbientOcclusion::run(const render::SceneContextPointer& sceneContext, cons // Blend step getBlendPipeline(); batch.setResourceTexture(0, _hBlurTexture); - batch.setFramebuffer(DependencyManager::get()->getPrimaryFramebuffer()); + batch.setFramebuffer(framebufferCache->getPrimaryFramebuffer()); // Bind the fourth gpu::Pipeline we need - for blending the primary color buffer with blurred occlusion texture batch.setPipeline(getBlendPipeline()); DependencyManager::get()->renderQuad(batch, bottomLeft, topRight, texCoordTopLeft, texCoordBottomRight, color); - + // Ready to render args->_context->syncCache(); args->_context->render((batch)); diff --git a/libraries/render-utils/src/AmbientOcclusionEffect.h b/libraries/render-utils/src/AmbientOcclusionEffect.h index 0b695dd2ad..6153795ea6 100644 --- a/libraries/render-utils/src/AmbientOcclusionEffect.h +++ b/libraries/render-utils/src/AmbientOcclusionEffect.h @@ -36,8 +36,15 @@ private: gpu::int32 _gBiasLoc; gpu::int32 _gSampleRadiusLoc; gpu::int32 _gIntensityLoc; - gpu::int32 _bufferWidthLoc; - gpu::int32 _bufferHeightLoc; + + gpu::int32 _nearLoc; + gpu::int32 _depthScaleLoc; + gpu::int32 _depthTexCoordOffsetLoc; + gpu::int32 _depthTexCoordScaleLoc; + gpu::int32 _renderTargetResLoc; + gpu::int32 _renderTargetResInvLoc; + + float g_scale; float g_bias; float g_sample_rad; diff --git a/libraries/render-utils/src/ambient_occlusion.slf b/libraries/render-utils/src/ambient_occlusion.slf index 649fb16c56..73bed26a9c 100644 --- a/libraries/render-utils/src/ambient_occlusion.slf +++ b/libraries/render-utils/src/ambient_occlusion.slf @@ -30,25 +30,49 @@ uniform float g_scale; uniform float g_bias; uniform float g_sample_rad; uniform float g_intensity; -uniform float bufferWidth; -uniform float bufferHeight; + +// the distance to the near clip plane +uniform float near; + +// scale factor for depth: (far - near) / far +uniform float depthScale; + +// offset for depth texture coordinates +uniform vec2 depthTexCoordOffset; + +// scale for depth texture coordinates +uniform vec2 depthTexCoordScale; + +// the resolution of the occlusion buffer +// and its inverse +uniform vec2 renderTargetRes; +uniform vec2 renderTargetResInv; + + const float PI = 3.14159265; -const vec2 FocalLen = vec2(1.0, 1.0); +// const vec2 FocalLen = vec2(1.0, 1.0); +// const vec2 LinMAD = vec2(0.1-10.0, 0.1+10.0) / (2.0*0.1*10.0); -const vec2 LinMAD = vec2(0.1-10.0, 0.1+10.0) / (2.0*0.1*10.0); - -const vec2 AORes = vec2(1024.0, 768.0); -const vec2 InvAORes = vec2(1.0/1024.0, 1.0/768.0); -const vec2 NoiseScale = vec2(1024.0, 768.0) / 4.0; +// const vec2 AORes = vec2(1024.0, 768.0); +// const vec2 InvAORes = vec2(1.0/1024.0, 1.0/768.0); +// const vec2 NoiseScale = vec2(1024.0, 768.0) / 4.0; const float AOStrength = 1.9; -const float R = 0.3; -const float R2 = 0.3*0.3; -const float NegInvR2 = - 1.0 / (0.3*0.3); + +// const float R = 0.3; +// const float R2 = 0.3*0.3; +// const float NegInvR2 = - 1.0 / (0.3*0.3); + +const float R = 0.01; +const float R2 = 0.01*0.01; +const float NegInvR2 = - 1.0 / (0.01*0.01); + + + // can't use tan to initialize a const value -const float TanBias = 0.57735027; // tan(30.0 * PI / 180.0); +const float TanBias = 0.57735027; // tan(30.0 * PI / 180.0); const float MaxRadiusPixels = 50.0; const int NumDirections = 6; @@ -56,30 +80,43 @@ const int NumSamples = 4; out vec4 outFragColor; -float ViewSpaceZFromDepth(float d){ - // [0,1] -> [-1,1] clip space - d = d * 2.0 - 1.0; +// float ViewSpaceZFromDepth(float d){ +// // [0,1] -> [-1,1] clip space +// d = d * 2.0 - 1.0; - // Get view space Z - return -1.0 / (LinMAD.x * d + LinMAD.y); +// // Get view space Z +// return -1.0 / (LinMAD.x * d + LinMAD.y); +// } + +// vec3 UVToViewSpace(vec2 uv, float z){ +// //uv = UVToViewA * uv + UVToViewB; +// return vec3(uv * z, z); +// } + +// vec3 GetViewPos(vec2 uv){ +// float z = ViewSpaceZFromDepth(texture(depthTexture, uv).r); +// return UVToViewSpace(uv, z); +// } + +vec3 GetViewNormalFromTexture(vec2 uv) { + // convert [0,1] -> [-1,1], note: since we're normalizing + // we don't need to do v*2 - 1.0, we can just do a v-0.5 + return normalize(texture(normalTexture, uv).xyz - 0.5); +} + +float ViewSpaceZFromDepth(float d){ + return near / (d * depthScale - 1.0); } vec3 UVToViewSpace(vec2 uv, float z){ - //uv = UVToViewA * uv + UVToViewB; - return vec3(uv * z, z); + return vec3((depthTexCoordOffset + varTexcoord * depthTexCoordScale) * z, z); } vec3 GetViewPos(vec2 uv){ - float z = ViewSpaceZFromDepth(texture(depthTexture, uv).r); - return UVToViewSpace(uv, z); + float z = ViewSpaceZFromDepth(texture(depthTexture, uv).r); + return UVToViewSpace(uv, z); } -vec3 GetViewPosPoint(ivec2 uv){ - vec2 coord = vec2(gl_FragCoord.xy) + uv; - //float z = texelFetch(texture0, coord, 0).r; - float z = texture(depthTexture, uv).r; - return UVToViewSpace(uv, z); -} float TanToSin(float x){ return x * inversesqrt(x*x + 1.0); @@ -112,7 +149,8 @@ vec3 MinDiff(vec3 P, vec3 Pr, vec3 Pl){ } vec2 SnapUVOffset(vec2 uv){ - return round(uv * AORes) * InvAORes; + // return round(uv * AORes) * InvAORes; + return round(uv * renderTargetRes) * renderTargetResInv; } float Falloff(float d2){ @@ -180,7 +218,8 @@ void ComputeSteps(inout vec2 stepSizeUv, inout float numSteps, float rayRadiusPi } // Step size in uv space - stepSizeUv = stepSizePix * InvAORes; + // stepSizeUv = stepSizePix * InvAORes; + stepSizeUv = stepSizePix * renderTargetResInv; } float getRandom(vec2 uv){ @@ -188,27 +227,39 @@ float getRandom(vec2 uv){ } void main(void){ + mat4 projMatrix = getTransformCamera()._projection; + float numDirections = NumDirections; vec3 P, Pr, Pl, Pt, Pb; P = GetViewPos(varTexcoord); // Sample neighboring pixels - Pr = GetViewPos(varTexcoord + vec2( InvAORes.x, 0)); - Pl = GetViewPos(varTexcoord + vec2(-InvAORes.x, 0)); - Pt = GetViewPos(varTexcoord + vec2( 0, InvAORes.y)); - Pb = GetViewPos(varTexcoord + vec2( 0,-InvAORes.y)); + // Pr = GetViewPos(varTexcoord + vec2( InvAORes.x, 0)); + // Pl = GetViewPos(varTexcoord + vec2(-InvAORes.x, 0)); + // Pt = GetViewPos(varTexcoord + vec2( 0, InvAORes.y)); + // Pb = GetViewPos(varTexcoord + vec2( 0,-InvAORes.y)); + Pr = GetViewPos(varTexcoord + vec2( renderTargetResInv.x, 0)); + Pl = GetViewPos(varTexcoord + vec2(-renderTargetResInv.x, 0)); + Pt = GetViewPos(varTexcoord + vec2( 0, renderTargetResInv.y)); + Pb = GetViewPos(varTexcoord + vec2( 0,-renderTargetResInv.y)); // Calculate tangent basis vectors using the minimum difference vec3 dPdu = MinDiff(P, Pr, Pl); - vec3 dPdv = MinDiff(P, Pt, Pb) * (AORes.y * InvAORes.x); + // vec3 dPdv = MinDiff(P, Pt, Pb) * (AORes.y * InvAORes.x); + vec3 dPdv = MinDiff(P, Pt, Pb) * (renderTargetRes.y * renderTargetResInv.x); // Get the random samples from the noise function vec3 random = vec3(getRandom(varTexcoord.xy), getRandom(varTexcoord.yx), getRandom(varTexcoord.xx)); // Calculate the projected size of the hemisphere - vec2 rayRadiusUV = 0.5 * R * FocalLen / -P.z; - float rayRadiusPix = rayRadiusUV.x * AORes.x; + // vec2 rayRadiusUV = 0.5 * R * FocalLen / -P.z; + // float rayRadiusPix = rayRadiusUV.x * AORes.x; + + // project the radius of the hemisphere into screen space + float w = P.z * projMatrix[2][3] + projMatrix[3][3]; + vec2 rayRadiusUV = (0.5 * R * vec2(projMatrix[0][0], projMatrix[1][1]) / w); // [-1,1] -> [0,1] uv + float rayRadiusPix = rayRadiusUV.x * renderTargetRes.x; float ao = 1.0; @@ -244,5 +295,6 @@ void main(void){ ao = 1.0 - ao / numDirections * AOStrength; } + outFragColor = vec4(vec3(ao), 1.0); -} \ No newline at end of file +}