From 1aedfff6f7642bbcd94447daa8172d98616f302b Mon Sep 17 00:00:00 2001
From: Olivier Prat <olivier@zvork.fr>
Date: Tue, 2 Apr 2019 15:40:42 +0200
Subject: [PATCH] Working convolution

---
 libraries/image/src/image/CubeMap.cpp         | 128 +++++++++++-------
 libraries/image/src/image/CubeMap.h           |  20 ++-
 .../image/src/image/TextureProcessing.cpp     |   3 +-
 libraries/image/src/image/TextureProcessing.h |   2 +-
 libraries/render-utils/src/LightAmbient.slh   |   5 +-
 5 files changed, 97 insertions(+), 61 deletions(-)

diff --git a/libraries/image/src/image/CubeMap.cpp b/libraries/image/src/image/CubeMap.cpp
index e746aa25fe..c8fd9cee80 100644
--- a/libraries/image/src/image/CubeMap.cpp
+++ b/libraries/image/src/image/CubeMap.cpp
@@ -63,7 +63,7 @@ struct CubeFaceMip {
 
     CubeFaceMip(gpu::uint16 level, const CubeMap* cubemap) {
         _dims = cubemap->getMipDimensions(level);
-        _lineStride = _dims.x + 2;
+        _lineStride = cubemap->getMipLineStride(level);
     }
 
     CubeFaceMip(const CubeFaceMip& other) : _dims(other._dims), _lineStride(other._lineStride) {
@@ -71,7 +71,7 @@ struct CubeFaceMip {
     }
 
     gpu::Vec2i _dims;
-    int _lineStride;
+    size_t _lineStride;
 };
 
 class CubeMap::ConstMip : public CubeFaceMip {
@@ -87,21 +87,23 @@ public:
 
         coordFrac -= coords;
 
-        coords += 1.0f;
+        coords += (float)EDGE_WIDTH;
 
         const auto& pixels = _faces[face];
         gpu::Vec2i loCoords(coords);
+        gpu::Vec2i hiCoords;
 
-        loCoords = glm::clamp(loCoords, gpu::Vec2i(0, 0), _dims);
+        hiCoords = glm::clamp(loCoords + 1, gpu::Vec2i(0, 0), _dims - 1 + (int)EDGE_WIDTH);
+        loCoords = glm::clamp(loCoords, gpu::Vec2i(0, 0), _dims - 1 + (int)EDGE_WIDTH);
 
         const size_t offsetLL = loCoords.x + loCoords.y * _lineStride;
-        const size_t offsetHL = offsetLL + 1;
-        const size_t offsetLH = offsetLL + _lineStride;
-        const size_t offsetHH = offsetLH + 1;
-        assert(offsetLL >= 0 && offsetLL < (_dims.x + 2)*(_dims.y + 2));
-        assert(offsetHL >= 0 && offsetHL < (_dims.x + 2)*(_dims.y + 2));
-        assert(offsetLH >= 0 && offsetLH < (_dims.x + 2)*(_dims.y + 2));
-        assert(offsetHH >= 0 && offsetHH < (_dims.x + 2)*(_dims.y + 2));
+        const size_t offsetHL = hiCoords.x + loCoords.y * _lineStride;
+        const size_t offsetLH = loCoords.x + hiCoords.y * _lineStride;
+        const size_t offsetHH = hiCoords.x + hiCoords.y * _lineStride;
+        assert(offsetLL >= 0 && offsetLL < _lineStride*(_dims.y + 2 * EDGE_WIDTH));
+        assert(offsetHL >= 0 && offsetHL < _lineStride*(_dims.y + 2 * EDGE_WIDTH));
+        assert(offsetLH >= 0 && offsetLH < _lineStride*(_dims.y + 2 * EDGE_WIDTH));
+        assert(offsetHH >= 0 && offsetHH < _lineStride*(_dims.y + 2 * EDGE_WIDTH));
         glm::vec4 colorLL = pixels[offsetLL];
         glm::vec4 colorHL = pixels[offsetHL];
         glm::vec4 colorLH = pixels[offsetLH];
@@ -129,6 +131,10 @@ public:
     }
 
     void applySeams() {
+        if (EDGE_WIDTH == 0) {
+            return;
+        }
+
         // Copy edge rows and columns from neighbouring faces to fix seam filtering issues
         seamColumnAndRow(gpu::Texture::CUBE_FACE_TOP_POS_Y, _dims.x, gpu::Texture::CUBE_FACE_RIGHT_POS_X, -1, -1);
         seamColumnAndRow(gpu::Texture::CUBE_FACE_BOTTOM_NEG_Y, _dims.x, gpu::Texture::CUBE_FACE_RIGHT_POS_X, _dims.y, 1);
@@ -162,7 +168,7 @@ private:
 
     Faces& _faces;
 
-    inline static void copy(CubeMap::Face::const_iterator srcFirst, CubeMap::Face::const_iterator srcLast, int srcStride, CubeMap::Face::iterator dstBegin, int dstStride) {
+    inline static void copy(CubeMap::Face::const_iterator srcFirst, CubeMap::Face::const_iterator srcLast, size_t srcStride, CubeMap::Face::iterator dstBegin, size_t dstStride) {
         while (srcFirst <= srcLast) {
             *dstBegin = *srcFirst;
             srcFirst += srcStride;
@@ -293,6 +299,26 @@ private:
     }
 };
 
+static void copySurface(const nvtt::Surface& source, glm::vec4* dest, size_t dstLineStride) {
+    const float* srcRedIt = source.channel(0);
+    const float* srcGreenIt = source.channel(1);
+    const float* srcBlueIt = source.channel(2);
+    const float* srcAlphaIt = source.channel(3);
+
+    for (int y = 0; y < source.height(); y++) {
+        glm::vec4* dstColIt = dest;
+        for (int x = 0; x < source.width(); x++) {
+            *dstColIt = glm::vec4(*srcRedIt, *srcGreenIt, *srcBlueIt, *srcAlphaIt);
+            dstColIt++;
+            srcRedIt++;
+            srcGreenIt++;
+            srcBlueIt++;
+            srcAlphaIt++;
+        }
+        dest += dstLineStride;
+    }
+}
+
 CubeMap::CubeMap(int width, int height, int mipCount) {
     reset(width, height, mipCount);
 }
@@ -327,32 +353,26 @@ CubeMap::CubeMap(const std::vector<Image>& faces, gpu::Element srcTextureFormat,
 
     int face;
 
-    struct MipMapErrorHandler : public nvtt::ErrorHandler {
-        virtual void error(nvtt::Error e) override {
-            qCWarning(imagelogging) << "Texture mip map creation error:" << nvtt::errorString(e);
-        }
-    };
+    nvtt::Surface surface;
+    surface.setAlphaMode(nvtt::AlphaMode_None);
+    surface.setWrapMode(nvtt::WrapMode_Mirror);
+
+    std::vector<glm::vec4> floatPixels;
+    floatPixels.resize(_width * _height);
 
     // Compute mips
     for (face = 0; face < 6; face++) {
-        auto sourcePixels = faces[face].getBits();
-        auto floatPixels = editFace(0, face);
-
-        convertToFloat(sourcePixels, _width, _height, faces[face].getBytesPerLineCount(), srcTextureFormat, floatPixels, _width);
-
-        nvtt::Surface surface;
-        surface.setImage(nvtt::InputFormat_RGBA_32F, _width, _height, 1, floatPixels);
-        surface.setAlphaMode(nvtt::AlphaMode_None);
-        surface.setWrapMode(nvtt::WrapMode_Clamp);
+        convertToFloat(faces[face].getBits(), _width, _height, faces[face].getBytesPerLineCount(), srcTextureFormat, floatPixels.data(), _width);
+        surface.setImage(nvtt::InputFormat_RGBA_32F, _width, _height, 1, &floatPixels.front().x);
 
         auto mipLevel = 0;
-        copyFace(_width, _height, reinterpret_cast<const glm::vec4*>(surface.data()), surface.width(), editFace(0, face), getFaceLineStride(0));
+        copySurface(surface, editFace(0, face), getMipLineStride(0));
 
         while (surface.canMakeNextMipmap() && !abortProcessing.load()) {
             surface.buildNextMipmap(nvtt::MipmapFilter_Box);
             mipLevel++;
 
-            copyFace(surface.width(), surface.height(), reinterpret_cast<const glm::vec4*>(surface.data()), surface.width(), editFace(mipLevel, face), getFaceLineStride(mipLevel));
+            copySurface(surface, editFace(mipLevel, face), getMipLineStride(mipLevel));
         }
     }
 
@@ -366,7 +386,7 @@ CubeMap::CubeMap(const std::vector<Image>& faces, gpu::Element srcTextureFormat,
     }
 }
 
-void CubeMap::copyFace(int width, int height, const glm::vec4* source, int srcLineStride, glm::vec4* dest, int dstLineStride) {
+void CubeMap::copyFace(int width, int height, const glm::vec4* source, size_t srcLineStride, glm::vec4* dest, size_t dstLineStride) {
     for (int y = 0; y < height; y++) {
         std::copy(source, source + width, dest);
         source += srcLineStride;
@@ -383,7 +403,7 @@ void CubeMap::reset(int width, int height, int mipCount) {
         auto mipDimensions = getMipDimensions(mipLevel);
         // Add extra pixels on edges to perform edge seam fixup (we will duplicate pixels from
         // neighbouring faces)
-        auto mipPixelCount = (mipDimensions.x+2) * (mipDimensions.y+2);
+        auto mipPixelCount = (mipDimensions.x + 2 * EDGE_WIDTH) * (mipDimensions.y + 2 * EDGE_WIDTH);
 
         for (auto& face : _mips[mipLevel]) {
             face.resize(mipPixelCount);
@@ -391,6 +411,12 @@ void CubeMap::reset(int width, int height, int mipCount) {
     }
 }
 
+void CubeMap::copyTo(CubeMap& other) const {
+    other._width = _width;
+    other._height = _height;
+    other._mips = _mips;
+}
+
 void CubeMap::copyTo(gpu::Texture* texture, const std::atomic<bool>& abortProcessing) const {
     assert(_width == texture->getWidth() && _height == texture->getHeight() && texture->getNumMips() == _mips.size());
 
@@ -407,24 +433,27 @@ void CubeMap::copyTo(gpu::Texture* texture, const std::atomic<bool>& abortProces
 
     nvtt::Surface surface;
     surface.setAlphaMode(nvtt::AlphaMode_None);
-    surface.setWrapMode(nvtt::WrapMode_Clamp);
+    surface.setWrapMode(nvtt::WrapMode_Mirror);
+
+    std::vector<glm::vec4> floatPixels;
+    floatPixels.resize(_width * _height);
+
+    nvtt::CompressionOptions compressionOptions;
+
+    SequentialTaskDispatcher dispatcher(abortProcessing);
+    nvtt::Context context;
+    context.setTaskDispatcher(&dispatcher);
 
-    glm::vec4* packedPixels = new glm::vec4[_width * _height];
     for (int face = 0; face < 6; face++) {
-        nvtt::CompressionOptions compressionOptions;
-        std::unique_ptr<nvtt::OutputHandler> outputHandler{ getNVTTCompressionOutputHandler(texture, face, compressionOptions) };
-
-        outputOptions.setOutputHandler(outputHandler.get());
-
-        SequentialTaskDispatcher dispatcher(abortProcessing);
-        nvtt::Context context;
-        context.setTaskDispatcher(&dispatcher);
-
         for (gpu::uint16 mipLevel = 0; mipLevel < _mips.size() && !abortProcessing.load(); mipLevel++) {
             auto mipDims = getMipDimensions(mipLevel);
 
-            copyFace(mipDims.x, mipDims.y, getFace(mipLevel, face), getFaceLineStride(mipLevel), packedPixels, mipDims.x);
-            surface.setImage(nvtt::InputFormat_RGBA_32F, mipDims.x, mipDims.y, 1, packedPixels);
+            std::unique_ptr<nvtt::OutputHandler> outputHandler{ getNVTTCompressionOutputHandler(texture, face, compressionOptions) };
+
+            outputOptions.setOutputHandler(outputHandler.get());
+
+            copyFace(mipDims.x, mipDims.y, getFace(mipLevel, face), getMipLineStride(mipLevel), &floatPixels.front(), mipDims.x);
+            surface.setImage(nvtt::InputFormat_RGBA_32F, mipDims.x, mipDims.y, 1, &floatPixels.front().x);
             context.compress(surface, face, mipLevel, compressionOptions, outputOptions);
         }
 
@@ -432,7 +461,6 @@ void CubeMap::copyTo(gpu::Texture* texture, const std::atomic<bool>& abortProces
             break;
         }
     }
-    delete[] packedPixels;
 }
 
 void CubeMap::getFaceUV(const glm::vec3& dir, int* index, glm::vec2* uv) {
@@ -651,11 +679,11 @@ void CubeMap::convolveMipFaceForGGX(const GGXSamples& samples, CubeMap& output,
     const glm::vec3* faceNormals = FACE_NORMALS + face * 4;
     const glm::vec3 deltaYNormalLo = faceNormals[2] - faceNormals[0];
     const glm::vec3 deltaYNormalHi = faceNormals[3] - faceNormals[1];
-    auto mipDimensions = output.getMipDimensions(mipLevel);
+    const auto mipDimensions = output.getMipDimensions(mipLevel);
+    const auto outputLineStride = output.getMipLineStride(mipLevel);
     auto outputFacePixels = output.editFace(mipLevel, face);
-    auto outputLineStride = output.getFaceLineStride(mipLevel);
 
-    tbb::parallel_for(tbb::blocked_range2d<int, int>(0, mipDimensions.x, 16, 0, mipDimensions.y, 16), [&](const tbb::blocked_range2d<int, int>& range) {
+    tbb::parallel_for(tbb::blocked_range2d<int, int>(0, mipDimensions.x, 32, 0, mipDimensions.y, 32), [&](const tbb::blocked_range2d<int, int>& range) {
         auto rowRange = range.rows();
         auto colRange = range.cols();
 
@@ -664,15 +692,15 @@ void CubeMap::convolveMipFaceForGGX(const GGXSamples& samples, CubeMap& output,
                 break;
             }
 
-            const float yAlpha = (y + 0.5f) / _height;
+            const float yAlpha = (y + 0.5f) / mipDimensions.y;
             const glm::vec3 normalXLo = faceNormals[0] + deltaYNormalLo * yAlpha;
             const glm::vec3 normalXHi = faceNormals[1] + deltaYNormalHi * yAlpha;
             const glm::vec3 deltaXNormal = normalXHi - normalXLo;
 
             for (auto x = colRange.begin(); x < colRange.end(); x++) {
-                const float xAlpha = (x + 0.5f) / _width;
+                const float xAlpha = (x + 0.5f) / mipDimensions.x;
                 // Interpolate normal for this pixel
-                const glm::vec3 normal = glm::normalize(normalXLo + deltaXNormal * yAlpha);
+                const glm::vec3 normal = glm::normalize(normalXLo + deltaXNormal * xAlpha);
 
                 outputFacePixels[x + y * outputLineStride] = computeConvolution(normal, samples);
             }
diff --git a/libraries/image/src/image/CubeMap.h b/libraries/image/src/image/CubeMap.h
index 808f6eea42..6f867ce57a 100644
--- a/libraries/image/src/image/CubeMap.h
+++ b/libraries/image/src/image/CubeMap.h
@@ -23,6 +23,11 @@
 namespace image {
 
     class CubeMap {
+
+        enum {
+            EDGE_WIDTH = 1
+        };
+
     public:
  
         CubeMap(int width, int height, int mipCount);
@@ -30,6 +35,7 @@ namespace image {
 
         void reset(int width, int height, int mipCount);
         void copyTo(gpu::Texture* texture, const std::atomic<bool>& abortProcessing = false) const;
+        void copyTo(CubeMap& other) const;
 
         gpu::uint16 getMipCount() const { return (gpu::uint16)_mips.size(); }
         int getMipWidth(gpu::uint16 mipLevel) const {
@@ -42,16 +48,16 @@ namespace image {
             return gpu::Vec2i(getMipWidth(mipLevel), getMipHeight(mipLevel));
         }
 
+        size_t getMipLineStride(gpu::uint16 mipLevel) const {
+            return getMipWidth(mipLevel) + 2 * EDGE_WIDTH;
+        }
+
         glm::vec4* editFace(gpu::uint16 mipLevel, int face) {
-            return _mips[mipLevel][face].data() + getFaceLineStride(mipLevel) + 1;
+            return _mips[mipLevel][face].data() + (getMipLineStride(mipLevel) + 1)*EDGE_WIDTH;
         }
 
         const glm::vec4* getFace(gpu::uint16 mipLevel, int face) const {
-            return _mips[mipLevel][face].data() + getFaceLineStride(mipLevel) + 1;
-        }
-
-        size_t getFaceLineStride(gpu::uint16 mipLevel) const {
-            return getMipWidth(mipLevel)+2;
+            return _mips[mipLevel][face].data() + (getMipLineStride(mipLevel) + 1)*EDGE_WIDTH;
         }
 
         void convolveForGGX(CubeMap& output, const std::atomic<bool>& abortProcessing) const;
@@ -73,7 +79,7 @@ namespace image {
 
         static void getFaceUV(const glm::vec3& dir, int* index, glm::vec2* uv);
         static void generateGGXSamples(GGXSamples& data, float roughness, const int resolution);
-        static void copyFace(int width, int height, const glm::vec4* source, int srcLineStride, glm::vec4* dest, int dstLineStride);
+        static void copyFace(int width, int height, const glm::vec4* source, size_t srcLineStride, glm::vec4* dest, size_t dstLineStride);
         void convolveMipFaceForGGX(const GGXSamples& samples, CubeMap& output, gpu::uint16 mipLevel, int face, const std::atomic<bool>& abortProcessing) const;
         glm::vec4 computeConvolution(const glm::vec3& normal, const GGXSamples& samples) const;
 
diff --git a/libraries/image/src/image/TextureProcessing.cpp b/libraries/image/src/image/TextureProcessing.cpp
index 00e6fd806d..ac0c17d115 100644
--- a/libraries/image/src/image/TextureProcessing.cpp
+++ b/libraries/image/src/image/TextureProcessing.cpp
@@ -1581,8 +1581,9 @@ gpu::TexturePointer TextureUsage::processCubeTextureColorFromImage(Image&& srcIm
             auto irradiance = irradianceTexture->getIrradiance();
             theTexture->overrideIrradiance(irradiance);
         }
-
+        
         if (options & CUBE_GGX_CONVOLVE) {
+            // Performs and convolution AND mip map generation
             convolveForGGX(faces, GPU_CUBEMAP_HDR_FORMAT, theTexture.get(), abortProcessing);
         } else {
             // Create mip maps and compress to final format in one go
diff --git a/libraries/image/src/image/TextureProcessing.h b/libraries/image/src/image/TextureProcessing.h
index 7d1c483155..378e68228a 100644
--- a/libraries/image/src/image/TextureProcessing.h
+++ b/libraries/image/src/image/TextureProcessing.h
@@ -105,7 +105,7 @@ gpu::TexturePointer processImage(std::shared_ptr<QIODevice> content, const std::
 #if defined(NVTT_API)
 class SequentialTaskDispatcher : public nvtt::TaskDispatcher {
 public:
-    SequentialTaskDispatcher(const std::atomic<bool>& abortProcessing);
+    SequentialTaskDispatcher(const std::atomic<bool>& abortProcessing = false);
 
     const std::atomic<bool>& _abortProcessing;
 
diff --git a/libraries/render-utils/src/LightAmbient.slh b/libraries/render-utils/src/LightAmbient.slh
index 0c7130b110..8afcb6ccd3 100644
--- a/libraries/render-utils/src/LightAmbient.slh
+++ b/libraries/render-utils/src/LightAmbient.slh
@@ -17,8 +17,9 @@ vec4 evalSkyboxLight(vec3 direction, float lod) {
 
 #if !defined(GL_ES)
     float filterLod = textureQueryLod(skyboxMap, direction).x;
-    // Keep texture filtering LOD as limit to prevent aliasing on specular reflection
-    lod = max(lod, filterLod);
+    // Keep texture filtering LOD as limit to prevent aliasing on specular reflection, but add
+    // a bias to limit overblurring with convolved maps
+    lod = max(lod, filterLod-2);
 #endif
 
     return textureLod(skyboxMap, direction, lod);