Switched to split sum model for ambient (as Unreal)

2025-08-04 14:06:22 +02:00 · 2019-04-04 16:10:33 +02:00 · 2019-04-04 16:10:33 +02:00 · b71a8f7902
commit b71a8f7902
parent 57de55c5ce
13 changed files with 207 additions and 47 deletions
--- a/libraries/image/src/image/CubeMap.cpp
+++ b/libraries/image/src/image/CubeMap.cpp
@ -15,6 +15,7 @@
 #include <tbb/blocked_range2d.h>

 #include "RandomAndNoise.h"
+#include "BRDF.h"
 #include "ImageLogging.h"

 #ifndef M_PI
@ -501,34 +502,15 @@ glm::vec4 CubeMap::fetchLod(const glm::vec3& dir, float lod) const {
    return loColor + (hiColor - loColor) * lodFrac;
 }

-static glm::vec3 sampleGGX(const glm::vec2& Xi, const float roughness) {
-    const float a = roughness * roughness;
-
-    float phi = (float)(2.0 * M_PI * Xi.x);
-    float cosTheta = (float)(std::sqrt((1.0 - Xi.y) / (1.0 + (a*a - 1.0) * Xi.y)));
-    float sinTheta = (float)(std::sqrt(1.0 - cosTheta * cosTheta));
-
-    // from spherical coordinates to cartesian coordinates
-    glm::vec3 H;
-    H.x = std::cos(phi) * sinTheta;
-    H.y = std::sin(phi) * sinTheta;
-    H.z = cosTheta;
-
-    return H;
-}
-
-static float evaluateGGX(float NdotH, float roughness) {
-    float alpha = roughness * roughness;
-    float alphaSquared = alpha * alpha;
-    float denom = (float)(NdotH * NdotH * (alphaSquared - 1.0) + 1.0);
-    return alphaSquared / (denom * denom);
-}
-
 struct CubeMap::GGXSamples {
    float invTotalWeight;
    std::vector<glm::vec4> points;
 };

+// All the GGX convolution code is inspired from:
+// https://placeholderart.wordpress.com/2015/07/28/implementation-notes-runtime-environment-map-filtering-for-image-based-lighting/
+// Computation is done in tangent space so normal is always (0,0,1) which simplifies a lot of things
+
 void CubeMap::generateGGXSamples(GGXSamples& data, float roughness, const int resolution) {
    glm::vec2 xi;
    glm::vec3 L;
@ -546,8 +528,8 @@ void CubeMap::generateGGXSamples(GGXSamples& data, float roughness, const int re
    // Do some computation in tangent space
    while (sampleIndex < sampleCount) {
        if (hammersleySampleIndex < hammersleySequenceLength) {
-            xi = evaluateHammersley((int)hammersleySampleIndex, (int)hammersleySequenceLength);
-            H = sampleGGX(xi, roughness);
+            xi = hammersley::evaluate((int)hammersleySampleIndex, (int)hammersleySequenceLength);
+            H = ggx::sample(xi, roughness);
            L = H * (2.0f * H.z) - glm::vec3(0.0f, 0.0f, 1.0f);
            NdotL = L.z;
            hammersleySampleIndex++;
@ -559,14 +541,14 @@ void CubeMap::generateGGXSamples(GGXSamples& data, float roughness, const int re
            // Create a purely random sample
            xi.x = rand() / float(RAND_MAX);
            xi.y = rand() / float(RAND_MAX);
-            H = sampleGGX(xi, roughness);
+            H = ggx::sample(xi, roughness);
            L = H * (2.0f * H.z) - glm::vec3(0.0f, 0.0f, 1.0f);
            NdotL = L.z;
        }

        float NdotH = std::max(0.0f, H.z);
        float HdotV = NdotH;
-        float D = evaluateGGX(NdotH, roughness);
+        float D = ggx::evaluate(NdotH, roughness);
        float pdf = (D * NdotH / (4.0f * HdotV)) + 0.0001f;
        float saSample = 1.0f / (float(sampleCount) * pdf + 0.0001f);
        float mipLevel = std::max(0.5f * log2(saSample / saTexel) + mipBias, 0.0f);
@ -628,7 +610,7 @@ void CubeMap::convolveMipFaceForGGX(const GGXSamples& samples, CubeMap& output,
    const auto outputLineStride = output.getMipLineStride(mipLevel);
    auto outputFacePixels = output.editFace(mipLevel, face);

-    tbb::parallel_for(tbb::blocked_range2d<int, int>(0, mipDimensions.x, 32, 0, mipDimensions.y, 32), [&](const tbb::blocked_range2d<int, int>& range) {
+    tbb::parallel_for(tbb::blocked_range2d<int, int>(0, mipDimensions.y, 32, 0, mipDimensions.x, 32), [&](const tbb::blocked_range2d<int, int>& range) {
        auto rowRange = range.rows();
        auto colRange = range.cols();

--- a/libraries/render-utils/src/AntialiasingEffect.cpp
+++ b/libraries/render-utils/src/AntialiasingEffect.cpp
@ -363,7 +363,7 @@ JitterSample::SampleSequence::SampleSequence(){
    // Halton sequence (2,3)

    for (int i = 0; i < SEQUENCE_LENGTH; i++) {
-        offsets[i] = glm::vec2(evaluateHalton<2>(i), evaluateHalton<3>(i));
+        offsets[i] = glm::vec2(halton::evaluate<2>(i), halton::evaluate<3>(i));
        offsets[i] -= vec2(0.5f);
    }
    offsets[SEQUENCE_LENGTH] = glm::vec2(0.0f);
--- a/libraries/render-utils/src/DeferredLightingEffect.cpp
+++ b/libraries/render-utils/src/DeferredLightingEffect.cpp
@ -365,6 +365,7 @@ void PrepareDeferred::run(const RenderContextPointer& renderContext, const Input

        // For the rest of the rendering, bind the lighting model
        batch.setUniformBuffer(ru::Buffer::LightModel, lightingModel->getParametersBuffer());
+        batch.setResourceTexture(ru::Texture::AmbientFresnel, lightingModel->getAmbientFresnelLUT());
    });
 }

@ -416,6 +417,7 @@ void RenderDeferredSetup::run(const render::RenderContextPointer& renderContext,

        // THe lighting model
        batch.setUniformBuffer(ru::Buffer::LightModel, lightingModel->getParametersBuffer());
+        batch.setResourceTexture(ru::Texture::AmbientFresnel, lightingModel->getAmbientFresnelLUT());

        // Subsurface scattering specific
        if (surfaceGeometryFramebuffer) {
--- a/libraries/render-utils/src/LightAmbient.slh
+++ b/libraries/render-utils/src/LightAmbient.slh
@ -27,10 +27,17 @@ vec4 evalSkyboxLight(vec3 direction, float lod) {
 <@endfunc@>

 <@func declareEvalAmbientSpecularIrradiance(supportAmbientSphere, supportAmbientMap, supportIfAmbientMapElseAmbientSphere)@>
+LAYOUT(binding=RENDER_UTILS_TEXTURE_AMBIENT_FRESNEL) uniform sampler2D ambientFresnelLUT;

-vec3 fresnelSchlickAmbient(vec3 fresnelColor, float ndotd, float gloss) {
+vec3 fresnelSchlickAmbient(vec3 fresnelColor, float ndotd, float roughness) {
+#if 0
+    float gloss = 1.0-roughness;
    float f = pow(1.0 - ndotd, 5.0);
    return fresnelColor + (max(vec3(gloss), fresnelColor) - fresnelColor) * f;
+#else
+    vec2 ambientFresnel = texture(ambientFresnelLUT, vec2(roughness, ndotd)).xy;
+    return fresnelColor * ambientFresnel.x + vec3(ambientFresnel.y);
+#endif
 }

 <@if supportAmbientMap@>
@ -95,7 +102,7 @@ void evalLightingAmbient(out vec3 diffuse, out vec3 specular, LightAmbient ambie
    vec3 ambientSpaceLowNormal = (ambient.transform * vec4(lowNormalCurvature.xyz, 0.0)).xyz;
 <@endif@>

-    vec3 ambientFresnel = fresnelSchlickAmbient(fresnelF0, surface.ndotv, 1.0-surface.roughness);
+    vec3 ambientFresnel = fresnelSchlickAmbient(fresnelF0, surface.ndotv, surface.roughness);

    diffuse = (1.0 - metallic) * (vec3(1.0) - ambientFresnel) * 
              sphericalHarmonics_evalSphericalLight(getLightAmbientSphere(ambient), ambientSpaceSurfaceNormal).xyz;
--- a/libraries/render-utils/src/LightingModel.cpp
+++ b/libraries/render-utils/src/LightingModel.cpp
@ -9,10 +9,85 @@
 //  See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
 //
 #include "LightingModel.h"
+#include "RandomAndNoise.h"
+#include "BRDF.h"
+
+#include <tbb/parallel_for.h>
+#include <tbb/blocked_range2d.h>
+
+gpu::TexturePointer LightingModel::_ambientFresnelLUT;

 LightingModel::LightingModel() {
    Parameters parameters;
    _parametersBuffer = gpu::BufferView(std::make_shared<gpu::Buffer>(sizeof(Parameters), (const gpu::Byte*) &parameters, sizeof(Parameters)));
+
+    if (!_ambientFresnelLUT) {
+        // Code taken from the IntegrateBRDF method as described in this talk :
+        // https://cdn2.unrealengine.com/Resources/files/2013SiggraphPresentationsNotes-26915738.pdf
+        const auto N_roughness = 32;
+        const auto N_NdotV = 256;
+
+        using LUTVector = std::vector<glm::u16vec2>;
+        using LUTValueType = LUTVector::value_type::value_type;
+
+        LUTVector lut(N_roughness * N_NdotV);
+
+        _ambientFresnelLUT = gpu::Texture::create2D(gpu::Element{ gpu::VEC2, gpu::NUINT16, gpu::XY }, N_roughness, N_NdotV, 1U,
+                                                    gpu::Sampler(gpu::Sampler::FILTER_MIN_POINT_MAG_LINEAR, gpu::Sampler::WRAP_CLAMP));
+
+        tbb::parallel_for(tbb::blocked_range2d<int, int>(0, N_NdotV, 8, 0, N_roughness, 8), [&](const tbb::blocked_range2d<int, int>& range) {
+            auto roughnessRange = range.cols();
+            auto ndotvRange = range.rows();
+
+            for (auto j = ndotvRange.begin(); j < ndotvRange.end(); j++) {
+                const float NdotV = j / float(N_NdotV - 1);
+
+                glm::vec3 V;
+                V.x = std::sqrt(1.0f - NdotV * NdotV);  // sin
+                V.y = 0;
+                V.z = NdotV;                            // cos
+
+                for (auto k = roughnessRange.begin(); k < roughnessRange.end(); k++) {
+                    const float roughness = k / float(N_roughness - 1);
+                    const float alpha = roughness * roughness;
+                    const float alphaSquared = alpha * alpha;
+
+                    float A = 0.0f;
+                    float B = 0.0f;
+
+                    const uint NumSamples = 1024;
+                    for (uint i = 0; i < NumSamples; i++) {
+                        glm::vec2 Xi = hammersley::evaluate(i, NumSamples);
+                        glm::vec3 H = ggx::sample(Xi, roughness);
+                        float VdotH = glm::dot(V, H);
+                        glm::vec3 L = 2.0f * VdotH * H - V;
+                        float NdotL = L.z;
+
+                        if (NdotL > 0.0f) {
+                            VdotH = glm::clamp(VdotH, 0.0f, 1.0f);
+
+                            float NdotH = glm::clamp(H.z, 0.0f, 1.0f);
+                            float G = smith::evaluateFastWithoutNdotV(alphaSquared, NdotV, NdotL);
+                            float G_Vis = (G * VdotH) / NdotH;
+                            float Fc = std::pow(1.0f - VdotH, 5.0f);
+
+                            A += (1.0f - Fc) * G_Vis;
+                            B += Fc * G_Vis;
+                        }
+                    }
+
+                    A /= NumSamples;
+                    B /= NumSamples;
+
+                    auto& lutValue = lut[k + j * N_roughness];
+                    lutValue.x = (LUTValueType)(glm::min(1.0f, A) * std::numeric_limits<LUTValueType>::max());
+                    lutValue.y = (LUTValueType)(glm::min(1.0f, B) * std::numeric_limits<LUTValueType>::max());
+                }
+            }
+        });
+
+        _ambientFresnelLUT->assignStoredMip(0, N_roughness * N_NdotV * sizeof(LUTVector::value_type), (const gpu::Byte*)lut.data());
+    }
 }

 void LightingModel::setUnlit(bool enable) {
--- a/libraries/render-utils/src/LightingModel.h
+++ b/libraries/render-utils/src/LightingModel.h
@ -83,6 +83,7 @@ public:
    bool isShadowEnabled() const;

    UniformBufferView getParametersBuffer() const { return _parametersBuffer; }
+    gpu::TexturePointer getAmbientFresnelLUT() const { return _ambientFresnelLUT; }

 protected:

@ -126,6 +127,7 @@ protected:
        Parameters() {}
    };
    UniformBufferView _parametersBuffer;
+    static gpu::TexturePointer _ambientFresnelLUT;
 };

 using LightingModelPointer = std::shared_ptr<LightingModel>;
--- a/libraries/render-utils/src/RenderCommonTask.cpp
+++ b/libraries/render-utils/src/RenderCommonTask.cpp
@ -94,6 +94,7 @@ void DrawLayered3D::run(const RenderContextPointer& renderContext, const Inputs&

            // Setup lighting model for all items;
            batch.setUniformBuffer(ru::Buffer::LightModel, lightingModel->getParametersBuffer());
+            batch.setResourceTexture(ru::Texture::AmbientFresnel, lightingModel->getAmbientFresnelLUT());

            if (_opaquePass) {
                renderStateSortShapes(renderContext, _shapePlumber, inItems, _maxDrawn);
--- a/libraries/render-utils/src/RenderDeferredTask.cpp
+++ b/libraries/render-utils/src/RenderDeferredTask.cpp
@ -471,6 +471,7 @@ void RenderTransparentDeferred::run(const RenderContextPointer& renderContext, c

        // Setup lighting model for all items;
        batch.setUniformBuffer(ru::Buffer::LightModel, lightingModel->getParametersBuffer());
+        batch.setResourceTexture(ru::Texture::AmbientFresnel, lightingModel->getAmbientFresnelLUT());

        // Set the light
        deferredLightingEffect->setupKeyLightBatch(args, batch, *lightFrame);
@ -536,6 +537,7 @@ void DrawStateSortDeferred::run(const RenderContextPointer& renderContext, const

        // Setup lighting model for all items;
        batch.setUniformBuffer(ru::Buffer::LightModel, lightingModel->getParametersBuffer());
+        batch.setResourceTexture(ru::Texture::AmbientFresnel, lightingModel->getAmbientFresnelLUT());

        // From the lighting model define a global shapeKey ORED with individiual keys
        ShapeKey::Builder keyBuilder;
--- a/libraries/render-utils/src/RenderForwardTask.cpp
+++ b/libraries/render-utils/src/RenderForwardTask.cpp
@ -251,6 +251,7 @@ void DrawForward::run(const RenderContextPointer& renderContext, const Inputs& i

        // Setup lighting model for all items;
        batch.setUniformBuffer(ru::Buffer::LightModel, lightingModel->getParametersBuffer());
+        batch.setResourceTexture(ru::Texture::AmbientFresnel, lightingModel->getAmbientFresnelLUT());

        // From the lighting model define a global shapeKey ORED with individiual keys
        ShapeKey::Builder keyBuilder;
--- a/libraries/render-utils/src/render-utils/ShaderConstants.h
+++ b/libraries/render-utils/src/render-utils/ShaderConstants.h
@ -54,6 +54,7 @@
 #define RENDER_UTILS_TEXTURE_DEFERRED_DIFFUSED_CURVATURE 7
 #define RENDER_UTILS_TEXTURE_DEFERRED_LIGHTING 10
 #define RENDER_UTILS_TEXTURE_SKYBOX 11
+#define RENDER_UTILS_TEXTURE_AMBIENT_FRESNEL 14

 #define RENDER_UTILS_BUFFER_SHADOW_PARAMS 2
 #define RENDER_UTILS_TEXTURE_SHADOW 12
@ -198,6 +199,7 @@ enum Texture {
    BloomColor = RENDER_UTILS_TEXTURE_BLOOM_COLOR,
    ToneMappingColor = RENDER_UTILS_TEXTURE_TM_COLOR,
    TextFont = RENDER_UTILS_TEXTURE_TEXT_FONT,
+    AmbientFresnel = RENDER_UTILS_TEXTURE_AMBIENT_FRESNEL,
    DebugTexture0 = RENDER_UTILS_DEBUG_TEXTURE0,
 };
 } // namespace texture
--- a/libraries/shared/src/BRDF.cpp
+++ b/libraries/shared/src/BRDF.cpp
@ -0,0 +1,45 @@
+#include "BRDF.h"
+
+#include <cmath>
+#ifndef M_PI
+#define M_PI    3.14159265359
+#endif
+
+namespace ggx {
+
+float evaluate(float NdotH, float roughness) {
+    float alpha = roughness * roughness;
+    float alphaSquared = alpha * alpha;
+    float denom = (float)(NdotH * NdotH * (alphaSquared - 1.0) + 1.0);
+    return alphaSquared / (denom * denom);
+}
+
+glm::vec3 sample(const glm::vec2& Xi, const float roughness) {
+    const float a = roughness * roughness;
+
+    float phi = (float)(2.0 * M_PI * Xi.x);
+    float cosTheta = (float)(std::sqrt((1.0 - Xi.y) / (1.0 + (a*a - 1.0) * Xi.y)));
+    float sinTheta = (float)(std::sqrt(1.0 - cosTheta * cosTheta));
+
+    // from spherical coordinates to cartesian coordinates
+    glm::vec3 H;
+    H.x = std::cos(phi) * sinTheta;
+    H.y = std::sin(phi) * sinTheta;
+    H.z = cosTheta;
+
+    return H;
+}
+
+}
+
+
+namespace smith {
+
+    float evaluateFastWithoutNdotV(float alphaSquared, float NdotV, float NdotL) {
+        float oneMinusAlphaSquared = 1.0f - alphaSquared;
+        float G = NdotL * std::sqrt(alphaSquared + NdotV * NdotV * oneMinusAlphaSquared);
+        G = G + NdotV * std::sqrt(alphaSquared + NdotL * NdotL * oneMinusAlphaSquared);
+        return 2.0f * NdotL / G;
+    }
+
+}
--- a/libraries/shared/src/BRDF.h
+++ b/libraries/shared/src/BRDF.h
@ -0,0 +1,36 @@
+#pragma once
+//
+//  BRDF.h
+//
+//  Created by Olivier Prat on 04/04/19.
+//  Copyright 2019 High Fidelity, Inc.
+//
+//  Distributed under the Apache License, Version 2.0.
+//  See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
+//
+#ifndef SHARED_BRDF_H
+#define SHARED_BRDF_H
+
+#include <glm/vec2.hpp>
+#include <glm/vec3.hpp>
+
+// GGX micro-facet model
+namespace ggx {
+    float evaluate(float NdotH, float roughness);
+    glm::vec3 sample(const glm::vec2& Xi, const float roughness);
+}
+
+// Smith visibility function
+namespace smith {
+    float evaluateFastWithoutNdotV(float alphaSquared, float NdotV, float NdotL);
+
+    inline float evaluateFast(float alphaSquared, float NdotV, float NdotL) {
+        return evaluateFastWithoutNdotV(alphaSquared, NdotV, NdotL) * NdotV;
+    }
+
+    inline float evaluate(float roughness, float NdotV, float NdotL) {
+        return evaluateFast(roughness*roughness*roughness*roughness, NdotV, NdotL);
+    }
+}
+
+#endif // SHARED_BRDF_H
--- a/libraries/shared/src/RandomAndNoise.h
+++ b/libraries/shared/src/RandomAndNoise.h
@ -12,22 +12,24 @@

 #include <glm/vec2.hpp>

-// Low discrepancy Halton sequence generator
-template <int B>
-float evaluateHalton(int index) {
-    float f = 1.0f;
-    float r = 0.0f;
-    float invB = 1.0f / (float)B;
-    index++; // Indices start at 1, not 0
+namespace halton {
+    // Low discrepancy Halton sequence generator
+    template <int B>
+    float evaluate(int index) {
+        float f = 1.0f;
+        float r = 0.0f;
+        float invB = 1.0f / (float)B;
+        index++; // Indices start at 1, not 0

-    while (index > 0) {
-        f = f * invB;
-        r = r + f * (float)(index % B);
-        index = index / B;
+        while (index > 0) {
+            f = f * invB;
+            r = r + f * (float)(index % B);
+            index = index / B;

+        }
+
+        return r;
    }
-
-    return r;
 }

 inline float getRadicalInverseVdC(uint32_t bits) {
@ -39,9 +41,12 @@ inline float getRadicalInverseVdC(uint32_t bits) {
    return float(bits) * 2.3283064365386963e-10f; // / 0x100000000\n"
 }

-// Low discrepancy Hammersley 2D sequence generator
-inline glm::vec2 evaluateHammersley(int k, const int sequenceLength) {
-    return glm::vec2(float(k) / float(sequenceLength), getRadicalInverseVdC(k));
+namespace hammersley {
+    // Low discrepancy Hammersley 2D sequence generator
+    inline glm::vec2 evaluate(int k, const int sequenceLength) {
+        return glm::vec2(float(k) / float(sequenceLength), getRadicalInverseVdC(k));
+    }
 }

+
 #endif