Merge pull request #5577 from vastcharade/horizAmbOcclPerf

Horiz amb occl performance updates
This commit is contained in:
Brad Hefta-Gaub 2015-08-22 12:39:51 -07:00
commit fe44442ffe
3 changed files with 211 additions and 134 deletions

View file

@ -51,8 +51,13 @@ const gpu::PipelinePointer& AmbientOcclusion::getOcclusionPipeline() {
_gBiasLoc = program->getUniforms().findLocation("g_bias"); _gBiasLoc = program->getUniforms().findLocation("g_bias");
_gSampleRadiusLoc = program->getUniforms().findLocation("g_sample_rad"); _gSampleRadiusLoc = program->getUniforms().findLocation("g_sample_rad");
_gIntensityLoc = program->getUniforms().findLocation("g_intensity"); _gIntensityLoc = program->getUniforms().findLocation("g_intensity");
_bufferWidthLoc = program->getUniforms().findLocation("bufferWidth");
_bufferHeightLoc = program->getUniforms().findLocation("bufferHeight"); _nearLoc = program->getUniforms().findLocation("near");
_depthScaleLoc = program->getUniforms().findLocation("depthScale");
_depthTexCoordOffsetLoc = program->getUniforms().findLocation("depthTexCoordOffset");
_depthTexCoordScaleLoc = program->getUniforms().findLocation("depthTexCoordScale");
_renderTargetResLoc = program->getUniforms().findLocation("renderTargetRes");
_renderTargetResInvLoc = program->getUniforms().findLocation("renderTargetResInv");
gpu::StatePointer state = gpu::StatePointer(new gpu::State()); gpu::StatePointer state = gpu::StatePointer(new gpu::State());
@ -172,9 +177,19 @@ const gpu::PipelinePointer& AmbientOcclusion::getBlendPipeline() {
void AmbientOcclusion::run(const render::SceneContextPointer& sceneContext, const render::RenderContextPointer& renderContext) { void AmbientOcclusion::run(const render::SceneContextPointer& sceneContext, const render::RenderContextPointer& renderContext) {
assert(renderContext->args); assert(renderContext->args);
assert(renderContext->args->_viewFrustum); assert(renderContext->args->_viewFrustum);
RenderArgs* args = renderContext->args;
gpu::Batch batch; gpu::Batch batch;
RenderArgs* args = renderContext->args;
auto framebufferCache = DependencyManager::get<FramebufferCache>();
QSize framebufferSize = framebufferCache->getFrameBufferSize();
float fbWidth = framebufferSize.width();
float fbHeight = framebufferSize.height();
float sMin = args->_viewport.x / fbWidth;
float sWidth = args->_viewport.z / fbWidth;
float tMin = args->_viewport.y / fbHeight;
float tHeight = args->_viewport.w / fbHeight;
glm::mat4 projMat; glm::mat4 projMat;
Transform viewMat; Transform viewMat;
@ -186,8 +201,8 @@ void AmbientOcclusion::run(const render::SceneContextPointer& sceneContext, cons
// Occlusion step // Occlusion step
getOcclusionPipeline(); getOcclusionPipeline();
batch.setResourceTexture(0, DependencyManager::get<FramebufferCache>()->getPrimaryDepthTexture()); batch.setResourceTexture(0, framebufferCache->getPrimaryDepthTexture());
batch.setResourceTexture(1, DependencyManager::get<FramebufferCache>()->getPrimaryNormalTexture()); batch.setResourceTexture(1, framebufferCache->getPrimaryNormalTexture());
_occlusionBuffer->setRenderBuffer(0, _occlusionTexture); _occlusionBuffer->setRenderBuffer(0, _occlusionTexture);
batch.setFramebuffer(_occlusionBuffer); batch.setFramebuffer(_occlusionBuffer);
@ -203,8 +218,32 @@ void AmbientOcclusion::run(const render::SceneContextPointer& sceneContext, cons
batch._glUniform1f(_gBiasLoc, g_bias); batch._glUniform1f(_gBiasLoc, g_bias);
batch._glUniform1f(_gSampleRadiusLoc, g_sample_rad); batch._glUniform1f(_gSampleRadiusLoc, g_sample_rad);
batch._glUniform1f(_gIntensityLoc, g_intensity); batch._glUniform1f(_gIntensityLoc, g_intensity);
batch._glUniform1f(_bufferWidthLoc, DependencyManager::get<FramebufferCache>()->getFrameBufferSize().width());
batch._glUniform1f(_bufferHeightLoc, DependencyManager::get<FramebufferCache>()->getFrameBufferSize().height()); // setup uniforms for unpacking a view-space position from the depth buffer
// This is code taken from DeferredLightEffect.render() method in DeferredLightingEffect.cpp.
// DeferredBuffer.slh shows how the unpacking is done and what variables are needed.
// initialize the view-space unpacking uniforms using frustum data
float left, right, bottom, top, nearVal, farVal;
glm::vec4 nearClipPlane, farClipPlane;
args->_viewFrustum->computeOffAxisFrustum(left, right, bottom, top, nearVal, farVal, nearClipPlane, farClipPlane);
float depthScale = (farVal - nearVal) / farVal;
float nearScale = -1.0f / nearVal;
float depthTexCoordScaleS = (right - left) * nearScale / sWidth;
float depthTexCoordScaleT = (top - bottom) * nearScale / tHeight;
float depthTexCoordOffsetS = left * nearScale - sMin * depthTexCoordScaleS;
float depthTexCoordOffsetT = bottom * nearScale - tMin * depthTexCoordScaleT;
// now set the position-unpacking unforms
batch._glUniform1f(_nearLoc, nearVal);
batch._glUniform1f(_depthScaleLoc, depthScale);
batch._glUniform2f(_depthTexCoordOffsetLoc, depthTexCoordOffsetS, depthTexCoordOffsetT);
batch._glUniform2f(_depthTexCoordScaleLoc, depthTexCoordScaleS, depthTexCoordScaleT);
batch._glUniform2f(_renderTargetResLoc, fbWidth, fbHeight);
batch._glUniform2f(_renderTargetResInvLoc, 1.0/fbWidth, 1.0/fbHeight);
glm::vec4 color(0.0f, 0.0f, 0.0f, 1.0f); glm::vec4 color(0.0f, 0.0f, 0.0f, 1.0f);
glm::vec2 bottomLeft(-1.0f, -1.0f); glm::vec2 bottomLeft(-1.0f, -1.0f);
@ -238,13 +277,13 @@ void AmbientOcclusion::run(const render::SceneContextPointer& sceneContext, cons
// Blend step // Blend step
getBlendPipeline(); getBlendPipeline();
batch.setResourceTexture(0, _hBlurTexture); batch.setResourceTexture(0, _hBlurTexture);
batch.setFramebuffer(DependencyManager::get<FramebufferCache>()->getPrimaryFramebuffer()); batch.setFramebuffer(framebufferCache->getPrimaryFramebuffer());
// Bind the fourth gpu::Pipeline we need - for blending the primary color buffer with blurred occlusion texture // Bind the fourth gpu::Pipeline we need - for blending the primary color buffer with blurred occlusion texture
batch.setPipeline(getBlendPipeline()); batch.setPipeline(getBlendPipeline());
DependencyManager::get<GeometryCache>()->renderQuad(batch, bottomLeft, topRight, texCoordTopLeft, texCoordBottomRight, color); DependencyManager::get<GeometryCache>()->renderQuad(batch, bottomLeft, topRight, texCoordTopLeft, texCoordBottomRight, color);
// Ready to render // Ready to render
args->_context->render((batch)); args->_context->render((batch));
} }

View file

@ -36,8 +36,15 @@ private:
gpu::int32 _gBiasLoc; gpu::int32 _gBiasLoc;
gpu::int32 _gSampleRadiusLoc; gpu::int32 _gSampleRadiusLoc;
gpu::int32 _gIntensityLoc; gpu::int32 _gIntensityLoc;
gpu::int32 _bufferWidthLoc;
gpu::int32 _bufferHeightLoc; gpu::int32 _nearLoc;
gpu::int32 _depthScaleLoc;
gpu::int32 _depthTexCoordOffsetLoc;
gpu::int32 _depthTexCoordScaleLoc;
gpu::int32 _renderTargetResLoc;
gpu::int32 _renderTargetResInvLoc;
float g_scale; float g_scale;
float g_bias; float g_bias;
float g_sample_rad; float g_sample_rad;

View file

@ -30,25 +30,40 @@ uniform float g_scale;
uniform float g_bias; uniform float g_bias;
uniform float g_sample_rad; uniform float g_sample_rad;
uniform float g_intensity; uniform float g_intensity;
uniform float bufferWidth;
uniform float bufferHeight; // the distance to the near clip plane
uniform float near;
// scale factor for depth: (far - near) / far
uniform float depthScale;
// offset for depth texture coordinates
uniform vec2 depthTexCoordOffset;
// scale for depth texture coordinates
uniform vec2 depthTexCoordScale;
// the resolution of the occlusion buffer
// and its inverse
uniform vec2 renderTargetRes;
uniform vec2 renderTargetResInv;
const float PI = 3.14159265; const float PI = 3.14159265;
const vec2 FocalLen = vec2(1.0, 1.0);
const vec2 LinMAD = vec2(0.1-10.0, 0.1+10.0) / (2.0*0.1*10.0);
const vec2 AORes = vec2(1024.0, 768.0);
const vec2 InvAORes = vec2(1.0/1024.0, 1.0/768.0);
const vec2 NoiseScale = vec2(1024.0, 768.0) / 4.0;
const float AOStrength = 1.9; const float AOStrength = 1.9;
const float R = 0.3;
const float R2 = 0.3*0.3;
const float NegInvR2 = - 1.0 / (0.3*0.3); // TODO: R (radius) should be exposed as a uniform parameter
const float R = 0.01;
const float R2 = 0.01*0.01;
const float NegInvR2 = - 1.0 / (0.01*0.01);
// can't use tan to initialize a const value // can't use tan to initialize a const value
const float TanBias = 0.57735027; // tan(30.0 * PI / 180.0); const float TanBias = 0.57735027; // tan(30.0 * PI / 180.0);
const float MaxRadiusPixels = 50.0; const float MaxRadiusPixels = 50.0;
const int NumDirections = 6; const int NumDirections = 6;
@ -56,113 +71,126 @@ const int NumSamples = 4;
out vec4 outFragColor; out vec4 outFragColor;
/**
* Gets the normal in view space from a normal texture.
* uv: the uv texture coordinates to look up in the texture at.
*/
vec3 GetViewNormalFromTexture(vec2 uv) {
// convert [0,1] -> [-1,1], note: since we're normalizing
// we don't need to do v*2 - 1.0, we can just do a v-0.5
return normalize(texture(normalTexture, uv).xyz - 0.5);
}
/**
* Gets the linearized depth in view space.
* d: the depth value [0-1], usually from a depth texture to convert.
*/
float ViewSpaceZFromDepth(float d){ float ViewSpaceZFromDepth(float d){
// [0,1] -> [-1,1] clip space return near / (d * depthScale - 1.0);
d = d * 2.0 - 1.0;
// Get view space Z
return -1.0 / (LinMAD.x * d + LinMAD.y);
} }
/**
* Converts a uv coordinate and depth value into a 3D view space coordinate.
* uv: the uv coordinates to convert
* z: the view space depth of the uv coordinate.
*/
vec3 UVToViewSpace(vec2 uv, float z){ vec3 UVToViewSpace(vec2 uv, float z){
//uv = UVToViewA * uv + UVToViewB; return vec3((depthTexCoordOffset + varTexcoord * depthTexCoordScale) * z, z);
return vec3(uv * z, z);
} }
vec3 GetViewPos(vec2 uv){ /**
float z = ViewSpaceZFromDepth(texture(depthTexture, uv).r); * Converts a uv coordinate into a 3D view space coordinate.
return UVToViewSpace(uv, z); * The depth of the uv coord is determined from the depth texture.
* uv: the uv coordinates to convert
*/
vec3 GetViewPos(vec2 uv) {
float z = ViewSpaceZFromDepth(texture(depthTexture, uv).r);
return UVToViewSpace(uv, z);
} }
vec3 GetViewPosPoint(ivec2 uv){
vec2 coord = vec2(gl_FragCoord.xy) + uv; float TanToSin(float x) {
//float z = texelFetch(texture0, coord, 0).r; return x * inversesqrt(x*x + 1.0);
float z = texture(depthTexture, uv).r;
return UVToViewSpace(uv, z);
} }
float TanToSin(float x){ float InvLength(vec2 V) {
return x * inversesqrt(x*x + 1.0); return inversesqrt(dot(V, V));
} }
float InvLength(vec2 V){ float Tangent(vec3 V) {
return inversesqrt(dot(V,V)); return V.z * InvLength(V.xy);
} }
float Tangent(vec3 V){ float BiasedTangent(vec3 V) {
return V.z * InvLength(V.xy); return V.z * InvLength(V.xy) + TanBias;
} }
float BiasedTangent(vec3 V){ float Tangent(vec3 P, vec3 S) {
return V.z * InvLength(V.xy) + TanBias;
}
float Tangent(vec3 P, vec3 S){
return -(P.z - S.z) * InvLength(S.xy - P.xy); return -(P.z - S.z) * InvLength(S.xy - P.xy);
} }
float Length2(vec3 V){ float Length2(vec3 V) {
return dot(V,V); return dot(V, V);
} }
vec3 MinDiff(vec3 P, vec3 Pr, vec3 Pl){ vec3 MinDiff(vec3 P, vec3 Pr, vec3 Pl) {
vec3 V1 = Pr - P; vec3 V1 = Pr - P;
vec3 V2 = P - Pl; vec3 V2 = P - Pl;
return (Length2(V1) < Length2(V2)) ? V1 : V2; return (Length2(V1) < Length2(V2)) ? V1 : V2;
} }
vec2 SnapUVOffset(vec2 uv){ vec2 SnapUVOffset(vec2 uv) {
return round(uv * AORes) * InvAORes; return round(uv * renderTargetRes) * renderTargetResInv;
} }
float Falloff(float d2){ float Falloff(float d2) {
return d2 * NegInvR2 + 1.0f; return d2 * NegInvR2 + 1.0f;
} }
float HorizonOcclusion( vec2 deltaUV, vec3 P, vec3 dPdu, vec3 dPdv, float randstep, float numSamples){ float HorizonOcclusion(vec2 deltaUV, vec3 P, vec3 dPdu, vec3 dPdv, float randstep, float numSamples) {
float ao = 0; float ao = 0;
// Offset the first coord with some noise // Offset the first coord with some noise
vec2 uv = varTexcoord + SnapUVOffset(randstep*deltaUV); vec2 uv = varTexcoord + SnapUVOffset(randstep*deltaUV);
deltaUV = SnapUVOffset( deltaUV ); deltaUV = SnapUVOffset(deltaUV);
// Calculate the tangent vector // Calculate the tangent vector
vec3 T = deltaUV.x * dPdu + deltaUV.y * dPdv; vec3 T = deltaUV.x * dPdu + deltaUV.y * dPdv;
// Get the angle of the tangent vector from the viewspace axis // Get the angle of the tangent vector from the viewspace axis
float tanH = BiasedTangent(T); float tanH = BiasedTangent(T);
float sinH = TanToSin(tanH); float sinH = TanToSin(tanH);
float tanS; float tanS;
float d2; float d2;
vec3 S; vec3 S;
// Sample to find the maximum angle // Sample to find the maximum angle
for(float s = 1; s <= numSamples; ++s){ for (float s = 1; s <= numSamples; ++s) {
uv += deltaUV; uv += deltaUV;
S = GetViewPos(uv); S = GetViewPos(uv);
tanS = Tangent(P, S); tanS = Tangent(P, S);
d2 = Length2(S - P); d2 = Length2(S - P);
// Is the sample within the radius and the angle greater? // Is the sample within the radius and the angle greater?
if(d2 < R2 && tanS > tanH) if (d2 < R2 && tanS > tanH) {
{ float sinS = TanToSin(tanS);
float sinS = TanToSin(tanS); // Apply falloff based on the distance
// Apply falloff based on the distance ao += Falloff(d2) * (sinS - sinH);
ao += Falloff(d2) * (sinS - sinH);
tanH = tanS; tanH = tanS;
sinH = sinS; sinH = sinS;
} }
} }
return ao; return ao;
} }
vec2 RotateDirections(vec2 Dir, vec2 CosSin){ vec2 RotateDirections(vec2 Dir, vec2 CosSin) {
return vec2(Dir.x*CosSin.x - Dir.y*CosSin.y, Dir.x*CosSin.y + Dir.y*CosSin.x); return vec2(Dir.x*CosSin.x - Dir.y*CosSin.y,
Dir.x*CosSin.y + Dir.y*CosSin.x);
} }
void ComputeSteps(inout vec2 stepSizeUv, inout float numSteps, float rayRadiusPix, float rand){ void ComputeSteps(inout vec2 stepSizeUv, inout float numSteps, float rayRadiusPix, float rand) {
// Avoid oversampling if numSteps is greater than the kernel radius in pixels // Avoid oversampling if numSteps is greater than the kernel radius in pixels
numSteps = min(NumSamples, rayRadiusPix); numSteps = min(NumSamples, rayRadiusPix);
@ -171,8 +199,7 @@ void ComputeSteps(inout vec2 stepSizeUv, inout float numSteps, float rayRadiusPi
// Clamp numSteps if it is greater than the max kernel footprint // Clamp numSteps if it is greater than the max kernel footprint
float maxNumSteps = MaxRadiusPixels / stepSizePix; float maxNumSteps = MaxRadiusPixels / stepSizePix;
if (maxNumSteps < numSteps) if (maxNumSteps < numSteps) {
{
// Use dithering to avoid AO discontinuities // Use dithering to avoid AO discontinuities
numSteps = floor(maxNumSteps + rand); numSteps = floor(maxNumSteps + rand);
numSteps = max(numSteps, 1); numSteps = max(numSteps, 1);
@ -180,69 +207,73 @@ void ComputeSteps(inout vec2 stepSizeUv, inout float numSteps, float rayRadiusPi
} }
// Step size in uv space // Step size in uv space
stepSizeUv = stepSizePix * InvAORes; stepSizeUv = stepSizePix * renderTargetResInv;
} }
float getRandom(vec2 uv){ float getRandom(vec2 uv) {
return fract(sin(dot(uv.xy ,vec2(12.9898,78.233))) * 43758.5453); return fract(sin(dot(uv.xy ,vec2(12.9898,78.233))) * 43758.5453);
} }
void main(void){ void main(void) {
float numDirections = NumDirections; mat4 projMatrix = getTransformCamera()._projection;
vec3 P, Pr, Pl, Pt, Pb; float numDirections = NumDirections;
P = GetViewPos(varTexcoord);
// Sample neighboring pixels vec3 P, Pr, Pl, Pt, Pb;
Pr = GetViewPos(varTexcoord + vec2( InvAORes.x, 0)); P = GetViewPos(varTexcoord);
Pl = GetViewPos(varTexcoord + vec2(-InvAORes.x, 0));
Pt = GetViewPos(varTexcoord + vec2( 0, InvAORes.y)); // Sample neighboring pixels
Pb = GetViewPos(varTexcoord + vec2( 0,-InvAORes.y)); Pr = GetViewPos(varTexcoord + vec2( renderTargetResInv.x, 0));
Pl = GetViewPos(varTexcoord + vec2(-renderTargetResInv.x, 0));
Pt = GetViewPos(varTexcoord + vec2( 0, renderTargetResInv.y));
Pb = GetViewPos(varTexcoord + vec2( 0,-renderTargetResInv.y));
// Calculate tangent basis vectors using the minimum difference // Calculate tangent basis vectors using the minimum difference
vec3 dPdu = MinDiff(P, Pr, Pl); vec3 dPdu = MinDiff(P, Pr, Pl);
vec3 dPdv = MinDiff(P, Pt, Pb) * (AORes.y * InvAORes.x); vec3 dPdv = MinDiff(P, Pt, Pb) * (renderTargetRes.y * renderTargetResInv.x);
// Get the random samples from the noise function // Get the random samples from the noise function
vec3 random = vec3(getRandom(varTexcoord.xy), getRandom(varTexcoord.yx), getRandom(varTexcoord.xx)); vec3 random = vec3(getRandom(varTexcoord.xy), getRandom(varTexcoord.yx), getRandom(varTexcoord.xx));
// Calculate the projected size of the hemisphere // Calculate the projected size of the hemisphere
vec2 rayRadiusUV = 0.5 * R * FocalLen / -P.z; float w = P.z * projMatrix[2][3] + projMatrix[3][3];
float rayRadiusPix = rayRadiusUV.x * AORes.x; vec2 rayRadiusUV = (0.5 * R * vec2(projMatrix[0][0], projMatrix[1][1]) / w); // [-1,1] -> [0,1] uv
float rayRadiusPix = rayRadiusUV.x * renderTargetRes.x;
float ao = 1.0; float ao = 1.0;
// Make sure the radius of the evaluated hemisphere is more than a pixel // Make sure the radius of the evaluated hemisphere is more than a pixel
if(rayRadiusPix > 1.0){ if(rayRadiusPix > 1.0) {
ao = 0.0; ao = 0.0;
float numSteps; float numSteps;
vec2 stepSizeUV; vec2 stepSizeUV;
// Compute the number of steps // Compute the number of steps
ComputeSteps(stepSizeUV, numSteps, rayRadiusPix, random.z); ComputeSteps(stepSizeUV, numSteps, rayRadiusPix, random.z);
float alpha = 2.0 * PI / numDirections; float alpha = 2.0 * PI / numDirections;
// Calculate the horizon occlusion of each direction // Calculate the horizon occlusion of each direction
for(float d = 0; d < numDirections; ++d){ for(float d = 0; d < numDirections; ++d) {
float theta = alpha * d; float theta = alpha * d;
// Apply noise to the direction // Apply noise to the direction
vec2 dir = RotateDirections(vec2(cos(theta), sin(theta)), random.xy); vec2 dir = RotateDirections(vec2(cos(theta), sin(theta)), random.xy);
vec2 deltaUV = dir * stepSizeUV; vec2 deltaUV = dir * stepSizeUV;
// Sample the pixels along the direction // Sample the pixels along the direction
ao += HorizonOcclusion( deltaUV, ao += HorizonOcclusion( deltaUV,
P, P,
dPdu, dPdu,
dPdv, dPdv,
random.z, random.z,
numSteps); numSteps);
} }
// Average the results and produce the final AO
ao = 1.0 - ao / numDirections * AOStrength;
}
// Average the results and produce the final AO
ao = 1.0 - ao / numDirections * AOStrength;
}
outFragColor = vec4(vec3(ao), 1.0); outFragColor = vec4(vec3(ao), 1.0);
} }