Switched to split sum model for ambient (as Unreal)

This commit is contained in:
Olivier Prat 2019-04-04 16:10:33 +02:00
parent 57de55c5ce
commit b71a8f7902
13 changed files with 207 additions and 47 deletions

View file

@ -15,6 +15,7 @@
#include <tbb/blocked_range2d.h>
#include "RandomAndNoise.h"
#include "BRDF.h"
#include "ImageLogging.h"
#ifndef M_PI
@ -501,34 +502,15 @@ glm::vec4 CubeMap::fetchLod(const glm::vec3& dir, float lod) const {
return loColor + (hiColor - loColor) * lodFrac;
}
static glm::vec3 sampleGGX(const glm::vec2& Xi, const float roughness) {
const float a = roughness * roughness;
float phi = (float)(2.0 * M_PI * Xi.x);
float cosTheta = (float)(std::sqrt((1.0 - Xi.y) / (1.0 + (a*a - 1.0) * Xi.y)));
float sinTheta = (float)(std::sqrt(1.0 - cosTheta * cosTheta));
// from spherical coordinates to cartesian coordinates
glm::vec3 H;
H.x = std::cos(phi) * sinTheta;
H.y = std::sin(phi) * sinTheta;
H.z = cosTheta;
return H;
}
static float evaluateGGX(float NdotH, float roughness) {
float alpha = roughness * roughness;
float alphaSquared = alpha * alpha;
float denom = (float)(NdotH * NdotH * (alphaSquared - 1.0) + 1.0);
return alphaSquared / (denom * denom);
}
struct CubeMap::GGXSamples {
float invTotalWeight;
std::vector<glm::vec4> points;
};
// All the GGX convolution code is inspired from:
// https://placeholderart.wordpress.com/2015/07/28/implementation-notes-runtime-environment-map-filtering-for-image-based-lighting/
// Computation is done in tangent space so normal is always (0,0,1) which simplifies a lot of things
void CubeMap::generateGGXSamples(GGXSamples& data, float roughness, const int resolution) {
glm::vec2 xi;
glm::vec3 L;
@ -546,8 +528,8 @@ void CubeMap::generateGGXSamples(GGXSamples& data, float roughness, const int re
// Do some computation in tangent space
while (sampleIndex < sampleCount) {
if (hammersleySampleIndex < hammersleySequenceLength) {
xi = evaluateHammersley((int)hammersleySampleIndex, (int)hammersleySequenceLength);
H = sampleGGX(xi, roughness);
xi = hammersley::evaluate((int)hammersleySampleIndex, (int)hammersleySequenceLength);
H = ggx::sample(xi, roughness);
L = H * (2.0f * H.z) - glm::vec3(0.0f, 0.0f, 1.0f);
NdotL = L.z;
hammersleySampleIndex++;
@ -559,14 +541,14 @@ void CubeMap::generateGGXSamples(GGXSamples& data, float roughness, const int re
// Create a purely random sample
xi.x = rand() / float(RAND_MAX);
xi.y = rand() / float(RAND_MAX);
H = sampleGGX(xi, roughness);
H = ggx::sample(xi, roughness);
L = H * (2.0f * H.z) - glm::vec3(0.0f, 0.0f, 1.0f);
NdotL = L.z;
}
float NdotH = std::max(0.0f, H.z);
float HdotV = NdotH;
float D = evaluateGGX(NdotH, roughness);
float D = ggx::evaluate(NdotH, roughness);
float pdf = (D * NdotH / (4.0f * HdotV)) + 0.0001f;
float saSample = 1.0f / (float(sampleCount) * pdf + 0.0001f);
float mipLevel = std::max(0.5f * log2(saSample / saTexel) + mipBias, 0.0f);
@ -628,7 +610,7 @@ void CubeMap::convolveMipFaceForGGX(const GGXSamples& samples, CubeMap& output,
const auto outputLineStride = output.getMipLineStride(mipLevel);
auto outputFacePixels = output.editFace(mipLevel, face);
tbb::parallel_for(tbb::blocked_range2d<int, int>(0, mipDimensions.x, 32, 0, mipDimensions.y, 32), [&](const tbb::blocked_range2d<int, int>& range) {
tbb::parallel_for(tbb::blocked_range2d<int, int>(0, mipDimensions.y, 32, 0, mipDimensions.x, 32), [&](const tbb::blocked_range2d<int, int>& range) {
auto rowRange = range.rows();
auto colRange = range.cols();

View file

@ -363,7 +363,7 @@ JitterSample::SampleSequence::SampleSequence(){
// Halton sequence (2,3)
for (int i = 0; i < SEQUENCE_LENGTH; i++) {
offsets[i] = glm::vec2(evaluateHalton<2>(i), evaluateHalton<3>(i));
offsets[i] = glm::vec2(halton::evaluate<2>(i), halton::evaluate<3>(i));
offsets[i] -= vec2(0.5f);
}
offsets[SEQUENCE_LENGTH] = glm::vec2(0.0f);

View file

@ -365,6 +365,7 @@ void PrepareDeferred::run(const RenderContextPointer& renderContext, const Input
// For the rest of the rendering, bind the lighting model
batch.setUniformBuffer(ru::Buffer::LightModel, lightingModel->getParametersBuffer());
batch.setResourceTexture(ru::Texture::AmbientFresnel, lightingModel->getAmbientFresnelLUT());
});
}
@ -416,6 +417,7 @@ void RenderDeferredSetup::run(const render::RenderContextPointer& renderContext,
// THe lighting model
batch.setUniformBuffer(ru::Buffer::LightModel, lightingModel->getParametersBuffer());
batch.setResourceTexture(ru::Texture::AmbientFresnel, lightingModel->getAmbientFresnelLUT());
// Subsurface scattering specific
if (surfaceGeometryFramebuffer) {

View file

@ -27,10 +27,17 @@ vec4 evalSkyboxLight(vec3 direction, float lod) {
<@endfunc@>
<@func declareEvalAmbientSpecularIrradiance(supportAmbientSphere, supportAmbientMap, supportIfAmbientMapElseAmbientSphere)@>
LAYOUT(binding=RENDER_UTILS_TEXTURE_AMBIENT_FRESNEL) uniform sampler2D ambientFresnelLUT;
vec3 fresnelSchlickAmbient(vec3 fresnelColor, float ndotd, float gloss) {
vec3 fresnelSchlickAmbient(vec3 fresnelColor, float ndotd, float roughness) {
#if 0
float gloss = 1.0-roughness;
float f = pow(1.0 - ndotd, 5.0);
return fresnelColor + (max(vec3(gloss), fresnelColor) - fresnelColor) * f;
#else
vec2 ambientFresnel = texture(ambientFresnelLUT, vec2(roughness, ndotd)).xy;
return fresnelColor * ambientFresnel.x + vec3(ambientFresnel.y);
#endif
}
<@if supportAmbientMap@>
@ -95,7 +102,7 @@ void evalLightingAmbient(out vec3 diffuse, out vec3 specular, LightAmbient ambie
vec3 ambientSpaceLowNormal = (ambient.transform * vec4(lowNormalCurvature.xyz, 0.0)).xyz;
<@endif@>
vec3 ambientFresnel = fresnelSchlickAmbient(fresnelF0, surface.ndotv, 1.0-surface.roughness);
vec3 ambientFresnel = fresnelSchlickAmbient(fresnelF0, surface.ndotv, surface.roughness);
diffuse = (1.0 - metallic) * (vec3(1.0) - ambientFresnel) *
sphericalHarmonics_evalSphericalLight(getLightAmbientSphere(ambient), ambientSpaceSurfaceNormal).xyz;

View file

@ -9,10 +9,85 @@
// See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
//
#include "LightingModel.h"
#include "RandomAndNoise.h"
#include "BRDF.h"
#include <tbb/parallel_for.h>
#include <tbb/blocked_range2d.h>
gpu::TexturePointer LightingModel::_ambientFresnelLUT;
LightingModel::LightingModel() {
Parameters parameters;
_parametersBuffer = gpu::BufferView(std::make_shared<gpu::Buffer>(sizeof(Parameters), (const gpu::Byte*) &parameters, sizeof(Parameters)));
if (!_ambientFresnelLUT) {
// Code taken from the IntegrateBRDF method as described in this talk :
// https://cdn2.unrealengine.com/Resources/files/2013SiggraphPresentationsNotes-26915738.pdf
const auto N_roughness = 32;
const auto N_NdotV = 256;
using LUTVector = std::vector<glm::u16vec2>;
using LUTValueType = LUTVector::value_type::value_type;
LUTVector lut(N_roughness * N_NdotV);
_ambientFresnelLUT = gpu::Texture::create2D(gpu::Element{ gpu::VEC2, gpu::NUINT16, gpu::XY }, N_roughness, N_NdotV, 1U,
gpu::Sampler(gpu::Sampler::FILTER_MIN_POINT_MAG_LINEAR, gpu::Sampler::WRAP_CLAMP));
tbb::parallel_for(tbb::blocked_range2d<int, int>(0, N_NdotV, 8, 0, N_roughness, 8), [&](const tbb::blocked_range2d<int, int>& range) {
auto roughnessRange = range.cols();
auto ndotvRange = range.rows();
for (auto j = ndotvRange.begin(); j < ndotvRange.end(); j++) {
const float NdotV = j / float(N_NdotV - 1);
glm::vec3 V;
V.x = std::sqrt(1.0f - NdotV * NdotV); // sin
V.y = 0;
V.z = NdotV; // cos
for (auto k = roughnessRange.begin(); k < roughnessRange.end(); k++) {
const float roughness = k / float(N_roughness - 1);
const float alpha = roughness * roughness;
const float alphaSquared = alpha * alpha;
float A = 0.0f;
float B = 0.0f;
const uint NumSamples = 1024;
for (uint i = 0; i < NumSamples; i++) {
glm::vec2 Xi = hammersley::evaluate(i, NumSamples);
glm::vec3 H = ggx::sample(Xi, roughness);
float VdotH = glm::dot(V, H);
glm::vec3 L = 2.0f * VdotH * H - V;
float NdotL = L.z;
if (NdotL > 0.0f) {
VdotH = glm::clamp(VdotH, 0.0f, 1.0f);
float NdotH = glm::clamp(H.z, 0.0f, 1.0f);
float G = smith::evaluateFastWithoutNdotV(alphaSquared, NdotV, NdotL);
float G_Vis = (G * VdotH) / NdotH;
float Fc = std::pow(1.0f - VdotH, 5.0f);
A += (1.0f - Fc) * G_Vis;
B += Fc * G_Vis;
}
}
A /= NumSamples;
B /= NumSamples;
auto& lutValue = lut[k + j * N_roughness];
lutValue.x = (LUTValueType)(glm::min(1.0f, A) * std::numeric_limits<LUTValueType>::max());
lutValue.y = (LUTValueType)(glm::min(1.0f, B) * std::numeric_limits<LUTValueType>::max());
}
}
});
_ambientFresnelLUT->assignStoredMip(0, N_roughness * N_NdotV * sizeof(LUTVector::value_type), (const gpu::Byte*)lut.data());
}
}
void LightingModel::setUnlit(bool enable) {

View file

@ -83,6 +83,7 @@ public:
bool isShadowEnabled() const;
UniformBufferView getParametersBuffer() const { return _parametersBuffer; }
gpu::TexturePointer getAmbientFresnelLUT() const { return _ambientFresnelLUT; }
protected:
@ -126,6 +127,7 @@ protected:
Parameters() {}
};
UniformBufferView _parametersBuffer;
static gpu::TexturePointer _ambientFresnelLUT;
};
using LightingModelPointer = std::shared_ptr<LightingModel>;

View file

@ -94,6 +94,7 @@ void DrawLayered3D::run(const RenderContextPointer& renderContext, const Inputs&
// Setup lighting model for all items;
batch.setUniformBuffer(ru::Buffer::LightModel, lightingModel->getParametersBuffer());
batch.setResourceTexture(ru::Texture::AmbientFresnel, lightingModel->getAmbientFresnelLUT());
if (_opaquePass) {
renderStateSortShapes(renderContext, _shapePlumber, inItems, _maxDrawn);

View file

@ -471,6 +471,7 @@ void RenderTransparentDeferred::run(const RenderContextPointer& renderContext, c
// Setup lighting model for all items;
batch.setUniformBuffer(ru::Buffer::LightModel, lightingModel->getParametersBuffer());
batch.setResourceTexture(ru::Texture::AmbientFresnel, lightingModel->getAmbientFresnelLUT());
// Set the light
deferredLightingEffect->setupKeyLightBatch(args, batch, *lightFrame);
@ -536,6 +537,7 @@ void DrawStateSortDeferred::run(const RenderContextPointer& renderContext, const
// Setup lighting model for all items;
batch.setUniformBuffer(ru::Buffer::LightModel, lightingModel->getParametersBuffer());
batch.setResourceTexture(ru::Texture::AmbientFresnel, lightingModel->getAmbientFresnelLUT());
// From the lighting model define a global shapeKey ORED with individiual keys
ShapeKey::Builder keyBuilder;

View file

@ -251,6 +251,7 @@ void DrawForward::run(const RenderContextPointer& renderContext, const Inputs& i
// Setup lighting model for all items;
batch.setUniformBuffer(ru::Buffer::LightModel, lightingModel->getParametersBuffer());
batch.setResourceTexture(ru::Texture::AmbientFresnel, lightingModel->getAmbientFresnelLUT());
// From the lighting model define a global shapeKey ORED with individiual keys
ShapeKey::Builder keyBuilder;

View file

@ -54,6 +54,7 @@
#define RENDER_UTILS_TEXTURE_DEFERRED_DIFFUSED_CURVATURE 7
#define RENDER_UTILS_TEXTURE_DEFERRED_LIGHTING 10
#define RENDER_UTILS_TEXTURE_SKYBOX 11
#define RENDER_UTILS_TEXTURE_AMBIENT_FRESNEL 14
#define RENDER_UTILS_BUFFER_SHADOW_PARAMS 2
#define RENDER_UTILS_TEXTURE_SHADOW 12
@ -198,6 +199,7 @@ enum Texture {
BloomColor = RENDER_UTILS_TEXTURE_BLOOM_COLOR,
ToneMappingColor = RENDER_UTILS_TEXTURE_TM_COLOR,
TextFont = RENDER_UTILS_TEXTURE_TEXT_FONT,
AmbientFresnel = RENDER_UTILS_TEXTURE_AMBIENT_FRESNEL,
DebugTexture0 = RENDER_UTILS_DEBUG_TEXTURE0,
};
} // namespace texture

View file

@ -0,0 +1,45 @@
#include "BRDF.h"
#include <cmath>
#ifndef M_PI
#define M_PI 3.14159265359
#endif
namespace ggx {
float evaluate(float NdotH, float roughness) {
float alpha = roughness * roughness;
float alphaSquared = alpha * alpha;
float denom = (float)(NdotH * NdotH * (alphaSquared - 1.0) + 1.0);
return alphaSquared / (denom * denom);
}
glm::vec3 sample(const glm::vec2& Xi, const float roughness) {
const float a = roughness * roughness;
float phi = (float)(2.0 * M_PI * Xi.x);
float cosTheta = (float)(std::sqrt((1.0 - Xi.y) / (1.0 + (a*a - 1.0) * Xi.y)));
float sinTheta = (float)(std::sqrt(1.0 - cosTheta * cosTheta));
// from spherical coordinates to cartesian coordinates
glm::vec3 H;
H.x = std::cos(phi) * sinTheta;
H.y = std::sin(phi) * sinTheta;
H.z = cosTheta;
return H;
}
}
namespace smith {
float evaluateFastWithoutNdotV(float alphaSquared, float NdotV, float NdotL) {
float oneMinusAlphaSquared = 1.0f - alphaSquared;
float G = NdotL * std::sqrt(alphaSquared + NdotV * NdotV * oneMinusAlphaSquared);
G = G + NdotV * std::sqrt(alphaSquared + NdotL * NdotL * oneMinusAlphaSquared);
return 2.0f * NdotL / G;
}
}

View file

@ -0,0 +1,36 @@
#pragma once
//
// BRDF.h
//
// Created by Olivier Prat on 04/04/19.
// Copyright 2019 High Fidelity, Inc.
//
// Distributed under the Apache License, Version 2.0.
// See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
//
#ifndef SHARED_BRDF_H
#define SHARED_BRDF_H
#include <glm/vec2.hpp>
#include <glm/vec3.hpp>
// GGX micro-facet model
namespace ggx {
float evaluate(float NdotH, float roughness);
glm::vec3 sample(const glm::vec2& Xi, const float roughness);
}
// Smith visibility function
namespace smith {
float evaluateFastWithoutNdotV(float alphaSquared, float NdotV, float NdotL);
inline float evaluateFast(float alphaSquared, float NdotV, float NdotL) {
return evaluateFastWithoutNdotV(alphaSquared, NdotV, NdotL) * NdotV;
}
inline float evaluate(float roughness, float NdotV, float NdotL) {
return evaluateFast(roughness*roughness*roughness*roughness, NdotV, NdotL);
}
}
#endif // SHARED_BRDF_H

View file

@ -12,22 +12,24 @@
#include <glm/vec2.hpp>
// Low discrepancy Halton sequence generator
template <int B>
float evaluateHalton(int index) {
float f = 1.0f;
float r = 0.0f;
float invB = 1.0f / (float)B;
index++; // Indices start at 1, not 0
namespace halton {
// Low discrepancy Halton sequence generator
template <int B>
float evaluate(int index) {
float f = 1.0f;
float r = 0.0f;
float invB = 1.0f / (float)B;
index++; // Indices start at 1, not 0
while (index > 0) {
f = f * invB;
r = r + f * (float)(index % B);
index = index / B;
while (index > 0) {
f = f * invB;
r = r + f * (float)(index % B);
index = index / B;
}
return r;
}
return r;
}
inline float getRadicalInverseVdC(uint32_t bits) {
@ -39,9 +41,12 @@ inline float getRadicalInverseVdC(uint32_t bits) {
return float(bits) * 2.3283064365386963e-10f; // / 0x100000000\n"
}
// Low discrepancy Hammersley 2D sequence generator
inline glm::vec2 evaluateHammersley(int k, const int sequenceLength) {
return glm::vec2(float(k) / float(sequenceLength), getRadicalInverseVdC(k));
namespace hammersley {
// Low discrepancy Hammersley 2D sequence generator
inline glm::vec2 evaluate(int k, const int sequenceLength) {
return glm::vec2(float(k) / float(sequenceLength), getRadicalInverseVdC(k));
}
}
#endif