From 4a2323f3c2323f700be53c8f5ee12b1f99b4f4b0 Mon Sep 17 00:00:00 2001
From: Olivier Prat <olivier@zvork.fr>
Date: Wed, 27 Mar 2019 17:43:26 +0100
Subject: [PATCH] Just need to write correct textureLod equivalent on CPU cube
 map

---
 libraries/image/src/image/CubeMap.cpp         | 214 ++++++++++++++++++
 libraries/image/src/image/CubeMap.h           |  10 +
 libraries/image/src/image/Image.cpp           |  19 +-
 .../render-utils/src/AntialiasingEffect.cpp   |  29 +--
 libraries/shared/src/RandomAndNoise.h         |  47 ++++
 5 files changed, 280 insertions(+), 39 deletions(-)
 create mode 100644 libraries/shared/src/RandomAndNoise.h

diff --git a/libraries/image/src/image/CubeMap.cpp b/libraries/image/src/image/CubeMap.cpp
index 303cb98fe7..acd8d6fb85 100644
--- a/libraries/image/src/image/CubeMap.cpp
+++ b/libraries/image/src/image/CubeMap.cpp
@@ -10,6 +10,16 @@
 //
 #include "CubeMap.h"
 
+#include <cmath>
+#include <tbb/parallel_for.h>
+#include <tbb/blocked_range2d.h>
+
+#include "RandomAndNoise.h"
+
+#ifndef M_PI
+#define M_PI    3.14159265359
+#endif
+
 using namespace image;
 
 CubeMap::CubeMap(int width, int height, int mipCount) :
@@ -26,3 +36,207 @@ CubeMap::CubeMap(int width, int height, int mipCount) :
         }
     }
 }
+
+glm::vec4 CubeMap::fetchLod(const glm::vec3& dir, float lod) const {
+    // TODO
+    return glm::vec4(0.0f);
+}
+
+static glm::vec3 sampleGGX(const glm::vec2& Xi, const float roughness) {
+    const float a = roughness * roughness;
+
+    float phi = (float)(2.0 * M_PI * Xi.x);
+    float cosTheta = (float)(std::sqrt((1.0 - Xi.y) / (1.0 + (a*a - 1.0) * Xi.y)));
+    float sinTheta = (float)(std::sqrt(1.0 - cosTheta * cosTheta));
+
+    // from spherical coordinates to cartesian coordinates
+    glm::vec3 H;
+    H.x = std::cos(phi) * sinTheta;
+    H.y = std::sin(phi) * sinTheta;
+    H.z = cosTheta;
+
+    return H;
+}
+
+static float evaluateGGX(float NdotH, float roughness) {
+    float alpha = roughness * roughness;
+    float alphaSquared = alpha * alpha;
+    float denom = (float)(NdotH * NdotH * (alphaSquared - 1.0) + 1.0);
+    return alphaSquared / (denom * denom);
+}
+
+struct CubeMap::GGXSamples {
+    float invTotalWeight;
+    std::vector<glm::vec4> points;
+};
+
+void CubeMap::generateGGXSamples(GGXSamples& data, float roughness, const int resolution) {
+    glm::vec2 xi;
+    glm::vec3 L;
+    glm::vec3 H;
+    const float saTexel = (float)(4.0 * M_PI / (6.0 * resolution * resolution));
+    const float mipBias = 3.0f;
+    const auto sampleCount = data.points.size();
+    const auto hammersleySequenceLength = data.points.size();
+    int sampleIndex = 0;
+    int hammersleySampleIndex = 0;
+    float NdotL;
+
+    data.invTotalWeight = 0.0f;
+
+    // Do some computation in tangent space
+    while (sampleIndex < sampleCount) {
+        if (hammersleySampleIndex < hammersleySequenceLength) {
+            xi = evaluateHammersley((int)hammersleySampleIndex, (int)hammersleySequenceLength);
+            H = sampleGGX(xi, roughness);
+            L = H * (2.0f * H.z) - glm::vec3(0.0f, 0.0f, 1.0f);
+            NdotL = L.z;
+            hammersleySampleIndex++;
+        } else {
+            NdotL = -1.0f;
+        }
+
+        while (NdotL <= 0.0f) {
+            // Create a purely random sample
+            xi.x = rand() / float(RAND_MAX);
+            xi.y = rand() / float(RAND_MAX);
+            H = sampleGGX(xi, roughness);
+            L = H * (2.0f * H.z) - glm::vec3(0.0f, 0.0f, 1.0f);
+            NdotL = L.z;
+        }
+
+        float NdotH = std::max(0.0f, H.z);
+        float HdotV = NdotH;
+        float D = evaluateGGX(NdotH, roughness);
+        float pdf = (D * NdotH / (4.0f * HdotV)) + 0.0001f;
+        float saSample = 1.0f / (float(sampleCount) * pdf + 0.0001f);
+        float mipLevel = std::max(0.5f * log2(saSample / saTexel) + mipBias, 0.0f);
+
+        auto& sample = data.points[sampleIndex];
+        sample.x = L.x;
+        sample.y = L.y;
+        sample.z = L.z;
+        sample.w = mipLevel;
+
+        data.invTotalWeight += NdotL;
+
+        sampleIndex++;
+    }
+    data.invTotalWeight = 1.0f / data.invTotalWeight;
+}
+
+void CubeMap::convolveForGGX(CubeMap& output, const std::atomic<bool>& abortProcessing) const {
+    // This should match fragment.glsl values, too
+    static const float ROUGHNESS_1_MIP_RESOLUTION = 1.5f;
+    static const gpu::uint16 MAX_SAMPLE_COUNT = 4000;
+
+    const auto mipCount = getMipCount();
+    GGXSamples params;
+
+    params.points.reserve(MAX_SAMPLE_COUNT);
+
+    for (gpu::uint16 mipLevel = 0; mipLevel < mipCount; ++mipLevel) {
+        // This is the inverse code found in fragment.glsl in evaluateAmbientLighting
+        float levelAlpha = float(mipLevel) / (mipCount - ROUGHNESS_1_MIP_RESOLUTION);
+        float mipRoughness = levelAlpha * (1.0f + 2.0f * levelAlpha) / 3.0f;
+        mipRoughness = std::max(1e-3f, mipRoughness);
+        mipRoughness = std::min(1.0f, mipRoughness);
+
+        params.points.resize(std::min<size_t>(MAX_SAMPLE_COUNT, 1U + size_t(4000 * mipRoughness * mipRoughness)));
+        generateGGXSamples(params, mipRoughness, _width);
+
+        for (int face = 0; face < 6; face++) {
+            convolveMipFaceForGGX(params, output, mipLevel, face, abortProcessing);
+            if (abortProcessing.load()) {
+                return;
+            }
+        }
+    }
+}
+
+void CubeMap::convolveMipFaceForGGX(const GGXSamples& samples, CubeMap& output, gpu::uint16 mipLevel, int face, const std::atomic<bool>& abortProcessing) const {
+    static const glm::vec3 NORMALS[24] = {
+        // POSITIVE X
+        glm::vec3(1.0f, 1.0f, 1.0f),
+        glm::vec3(1.0f, 1.0f, -1.0f),
+        glm::vec3(1.0f, -1.0f, 1.0f),
+        glm::vec3(1.0f, -1.0f, -1.0f),
+        // NEGATIVE X
+        glm::vec3(-1.0f, 1.0f, -1.0f),
+        glm::vec3(-1.0f, 1.0f, 1.0f),
+        glm::vec3(-1.0f, -1.0f, -1.0f),
+        glm::vec3(-1.0f, -1.0f, 1.0f),
+        // POSITIVE Y
+        glm::vec3(-1.0f, 1.0f, -1.0f),
+        glm::vec3(1.0f, 1.0f, -1.0f),
+        glm::vec3(-1.0f, 1.0f, 1.0f),
+        glm::vec3(1.0f, 1.0f, 1.0f),
+        // NEGATIVE Y
+        glm::vec3(-1.0f, -1.0f, 1.0f),
+        glm::vec3(1.0f, -1.0f, 1.0f),
+        glm::vec3(-1.0f, -1.0f, -1.0f),
+        glm::vec3(1.0f, -1.0f, -1.0f),
+        // POSITIVE Z
+        glm::vec3(-1.0f, 1.0f, 1.0f),
+        glm::vec3(1.0f, 1.0f, 1.0f),
+        glm::vec3(-1.0f, -1.0f, 1.0f),
+        glm::vec3(1.0f, -1.0f, 1.0f),
+        // NEGATIVE Z
+        glm::vec3(1.0f, 1.0f, -1.0f),
+        glm::vec3(-1.0f, 1.0f, -1.0f),
+        glm::vec3(1.0f, -1.0f, -1.0f),
+        glm::vec3(-1.0f, -1.0f, -1.0f)
+    };
+
+    const glm::vec3* faceNormals = NORMALS + face * 4;
+    const glm::vec3 deltaXNormalLo = faceNormals[1] - faceNormals[0];
+    const glm::vec3 deltaXNormalHi = faceNormals[3] - faceNormals[2];
+    auto& outputFace = output._mips[mipLevel][face];
+
+    tbb::parallel_for(tbb::blocked_range2d<int, int>(0, _width, 16, 0, _height, 16), [&](const tbb::blocked_range2d<int, int>& range) {
+        auto rowRange = range.rows();
+        auto colRange = range.cols();
+
+        for (auto x = rowRange.begin(); x < rowRange.end(); x++) {
+            const float xAlpha = (x + 0.5f) / _width;
+            const glm::vec3 normalYLo = faceNormals[0] + deltaXNormalLo * xAlpha;
+            const glm::vec3 normalYHi = faceNormals[2] + deltaXNormalHi * xAlpha;
+            const glm::vec3 deltaYNormal = normalYHi - normalYLo;
+
+            for (auto y = colRange.begin(); y < colRange.end(); y++) {
+                const float yAlpha = (y + 0.5f) / _width;
+                // Interpolate normal for this pixel
+                const glm::vec3 normal = glm::normalize(normalYLo + deltaYNormal * yAlpha);
+
+                outputFace[x + y * _width] = computeConvolution(normal, samples);
+            }
+
+            if (abortProcessing.load()) {
+                break;
+            }
+        }
+    });
+}
+
+glm::vec4 CubeMap::computeConvolution(const glm::vec3& N, const GGXSamples& samples) const {
+    // from tangent-space vector to world-space
+    glm::vec3 bitangent = abs(N.z) < 0.999 ? glm::vec3(0.0, 0.0, 1.0) : glm::vec3(1.0, 0.0, 0.0);
+    glm::vec3 tangent = glm::normalize(glm::cross(bitangent, N));
+    bitangent = glm::cross(N, tangent);
+
+    const size_t sampleCount = samples.points.size();
+    glm::vec4 prefilteredColor = glm::vec4(0.0f);
+
+    for (int i = 0; i < sampleCount; ++i) {
+        const auto& sample = samples.points[i];
+        glm::vec3 L(sample.x, sample.y, sample.z);
+        float NdotL = L.z;
+        float mipLevel = sample.w;
+        // Now back to world space
+        L = tangent * L.x + bitangent * L.y + N * L.z;
+        prefilteredColor += fetchLod(L, mipLevel) * NdotL;
+    }
+    prefilteredColor = prefilteredColor * samples.invTotalWeight;
+    prefilteredColor.a = 1.0f;
+    return prefilteredColor;
+}
\ No newline at end of file
diff --git a/libraries/image/src/image/CubeMap.h b/libraries/image/src/image/CubeMap.h
index 05a571cafc..231db7d76f 100644
--- a/libraries/image/src/image/CubeMap.h
+++ b/libraries/image/src/image/CubeMap.h
@@ -16,6 +16,7 @@
 #include <glm/vec4.hpp>
 #include <vector>
 #include <array>
+#include <atomic>
 
 namespace image {
 
@@ -31,11 +32,20 @@ namespace image {
         Faces& editMip(gpu::uint16 mipLevel) { return _mips[mipLevel]; }
         const Faces& getMip(gpu::uint16 mipLevel) const { return _mips[mipLevel]; }
 
+        void convolveForGGX(CubeMap& output, const std::atomic<bool>& abortProcessing) const;
+        glm::vec4 fetchLod(const glm::vec3& dir, float lod) const;
+
     private:
 
+        struct GGXSamples;
+
         int _width;
         int _height;
         std::vector<Faces> _mips;
+
+        static void generateGGXSamples(GGXSamples& data, float roughness, const int resolution);
+        void convolveMipFaceForGGX(const GGXSamples& samples, CubeMap& output, gpu::uint16 mipLevel, int face, const std::atomic<bool>& abortProcessing) const;
+        glm::vec4 computeConvolution(const glm::vec3& normal, const GGXSamples& samples) const;
     };
 
 }
diff --git a/libraries/image/src/image/Image.cpp b/libraries/image/src/image/Image.cpp
index 6e7e08ea89..7131871937 100644
--- a/libraries/image/src/image/Image.cpp
+++ b/libraries/image/src/image/Image.cpp
@@ -850,13 +850,10 @@ void generateMips(gpu::Texture* texture, QImage&& image, BackendTarget target, c
     }
 }
 
-void convolveFaceWithGGX(const CubeMap& source, int face, const std::atomic<bool>& abortProcessing) {
-
-}
-
-void convolveWithGGX(gpu::Texture* texture, BackendTarget target, const std::atomic<bool>& abortProcessing = false) {
-    PROFILE_RANGE(resource_parse, "convolveWithGGX");
+void convolveForGGX(gpu::Texture* texture, BackendTarget target, const std::atomic<bool>& abortProcessing = false) {
+    PROFILE_RANGE(resource_parse, "convolveForGGX");
     CubeMap source(texture->getWidth(), texture->getHeight(), texture->getNumMips());
+    CubeMap output(texture->getWidth(), texture->getHeight(), texture->getNumMips());
     gpu::uint16 mipLevel;
     int face;
     const auto textureFormat = texture->getTexelFormat();
@@ -875,18 +872,16 @@ void convolveWithGGX(gpu::Texture* texture, BackendTarget target, const std::ato
         }
     }
 
-    for (face = 0; face < 6; face++) {
-        convolveFaceWithGGX(source, face, abortProcessing);
-    }
+    source.convolveForGGX(output, abortProcessing);
 
     if (!abortProcessing) {
         // Convert all mip data back from float
         unsigned char* convertedPixels = new unsigned char[texture->getWidth() * texture->getHeight() * sizeof(uint32)];
 
-        for (mipLevel = 0; mipLevel < source.getMipCount(); ++mipLevel) {
+        for (mipLevel = 0; mipLevel < output.getMipCount(); ++mipLevel) {
             auto mipDims = texture->evalMipDimensions(mipLevel);
             auto mipSize = texture->evalMipFaceSize(mipLevel);
-            auto& mip = source.getMip(mipLevel);
+            auto& mip = output.getMip(mipLevel);
 
             for (face = 0; face < 6; face++) {
                 convertFromFloat(convertedPixels, mipDims.x, mipDims.y, sizeof(uint32)*mipDims.x, textureFormat, mip[face]);
@@ -1620,7 +1615,7 @@ gpu::TexturePointer TextureUsage::processCubeTextureColorFromImage(QImage&& srcI
         }
 
         if (options & CUBE_GGX_CONVOLVE) {
-            convolveWithGGX(theTexture.get(), target, abortProcessing);
+            convolveForGGX(theTexture.get(), target, abortProcessing);
         }
     }
 
diff --git a/libraries/render-utils/src/AntialiasingEffect.cpp b/libraries/render-utils/src/AntialiasingEffect.cpp
index 17c13df19a..f30e67a979 100644
--- a/libraries/render-utils/src/AntialiasingEffect.cpp
+++ b/libraries/render-utils/src/AntialiasingEffect.cpp
@@ -26,7 +26,7 @@
 #include "ViewFrustum.h"
 #include "GeometryCache.h"
 #include "FramebufferCache.h"
-
+#include "RandomAndNoise.h"
 
 namespace ru {
     using render_utils::slot::texture::Texture;
@@ -359,36 +359,11 @@ int JitterSampleConfig::play() {
     return _state;
 }
 
-template <int B> 
-class Halton {
-public:
-
-    float eval(int index) const {
-        float f = 1.0f;
-        float r = 0.0f;
-        float invB = 1.0f / (float)B;
-        index++; // Indices start at 1, not 0
-
-        while (index > 0) {
-            f = f * invB;
-            r = r + f * (float)(index % B);
-            index = index / B;
-
-        }
-
-        return r;
-    }
-
-};
-
-
 JitterSample::SampleSequence::SampleSequence(){
     // Halton sequence (2,3)
-    Halton<2> genX;
-    Halton<3> genY;
 
     for (int i = 0; i < SEQUENCE_LENGTH; i++) {
-        offsets[i] = glm::vec2(genX.eval(i), genY.eval(i));
+        offsets[i] = glm::vec2(evaluateHalton<2>(i), evaluateHalton<3>(i));
         offsets[i] -= vec2(0.5f);
     }
     offsets[SEQUENCE_LENGTH] = glm::vec2(0.0f);
diff --git a/libraries/shared/src/RandomAndNoise.h b/libraries/shared/src/RandomAndNoise.h
new file mode 100644
index 0000000000..c69c186159
--- /dev/null
+++ b/libraries/shared/src/RandomAndNoise.h
@@ -0,0 +1,47 @@
+//
+//  RandomAndNoise.h
+//
+//  Created by Olivier Prat on 05/16/18.
+//  Copyright 2018 High Fidelity, Inc.
+//
+//  Distributed under the Apache License, Version 2.0.
+//  See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
+//
+#ifndef RANDOM_AND_NOISE_H
+#define RANDOM_AND_NOISE_H
+
+#include <glm/vec2.hpp>
+
+// Low discrepancy Halton sequence generator
+template <int B>
+float evaluateHalton(int index) {
+    float f = 1.0f;
+    float r = 0.0f;
+    float invB = 1.0f / (float)B;
+    index++; // Indices start at 1, not 0
+
+    while (index > 0) {
+        f = f * invB;
+        r = r + f * (float)(index % B);
+        index = index / B;
+
+    }
+
+    return r;
+}
+
+inline float getRadicalInverseVdC(uint32_t bits) {
+    bits = (bits << 16u) | (bits >> 16u);
+    bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u);
+    bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u);
+    bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u);
+    bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u);
+    return float(bits) * 2.3283064365386963e-10f; // / 0x100000000\n"
+}
+
+// Low discrepancy Hammersley 2D sequence generator
+inline glm::vec2 evaluateHammersley(int k, const int sequenceLength) {
+    return glm::vec2(float(k) / float(sequenceLength), getRadicalInverseVdC(k));
+}
+
+#endif
\ No newline at end of file