speed up SH calculation with minimal error

2025-04-25 13:12:52 +02:00 · 2016-08-22 10:21:22 -07:00 · 2016-08-22 10:21:22 -07:00 · d2d905fc86
commit d2d905fc86
parent 857e97e979
1 changed files with 37 additions and 25 deletions
--- a/libraries/gpu/src/gpu/Texture.cpp
+++ b/libraries/gpu/src/gpu/Texture.cpp
@ -676,6 +676,10 @@ void sphericalHarmonicsEvaluateDirection(float * result, int order,  const glm::
 }

 bool sphericalHarmonicsFromTexture(const gpu::Texture& cubeTexture, std::vector<glm::vec3> & output, const uint order) {
+    int width = cubeTexture.getWidth();
+    if(width != cubeTexture.getHeight()) {
+        return false;
+    }
    const uint sqOrder = order*order;

    // allocate memory for calculations
@ -684,8 +688,6 @@ bool sphericalHarmonicsFromTexture(const gpu::Texture& cubeTexture, std::vector<
    std::vector<float> resultG(sqOrder);
    std::vector<float> resultB(sqOrder);

-    int width, height;
-
    // initialize values
    float fWt = 0.0f;
    for(uint i=0; i < sqOrder; i++) {
@ -696,11 +698,16 @@ bool sphericalHarmonicsFromTexture(const gpu::Texture& cubeTexture, std::vector<
    }
    std::vector<float> shBuff(sqOrder);
    std::vector<float> shBuffB(sqOrder);
-    // get width and height
-    width = height = cubeTexture.getWidth();
-    if(width != height) {
-        return false;
+
+    // We trade accuracy for speed by breaking the image into 32x32 parts
+    // and approximating the distance for all the pixels in each part to be
+    // the distance to the part's center.
+    int numDivisionsPerSide = 32;
+    if (width < numDivisionsPerSide) {
+        numDivisionsPerSide = width;
    }
+    int stride = width / numDivisionsPerSide;
+    int halfStride = stride / 2;

    // for each face of cube texture
    for(int face=0; face < gpu::Texture::NUM_CUBE_FACES; face++) {
@ -718,11 +725,11 @@ bool sphericalHarmonicsFromTexture(const gpu::Texture& cubeTexture, std::vector<
        // step between two texels for range [-1, 1]
        float invWidthBy2 = 2.0f / float(width);

-        for(int y=0; y < width; y++) {
+        for(int y=halfStride; y < width-halfStride; y += stride) {
            // texture coordinate V in range [-1 to 1]
            const float fV = negativeBound + float(y) * invWidthBy2;

-            for(int x=0; x < width; x++) {
+            for(int x=halfStride; x < width - halfStride; x += stride) {
                // texture coordinate U in range [-1 to 1]
                const float fU = negativeBound + float(x) * invWidthBy2;

@ -785,32 +792,37 @@ bool sphericalHarmonicsFromTexture(const gpu::Texture& cubeTexture, std::vector<
                sphericalHarmonicsEvaluateDirection(shBuff.data(), order, dir);

                // index of texel in texture
-                uint pixOffsetIndex = (x + y * width) * numComponents;

                // get color from texture and map to range [0, 1]
-                glm::vec3 clr(ColorUtils::sRGB8ToLinearFloat(data[pixOffsetIndex]),
-                              ColorUtils::sRGB8ToLinearFloat(data[pixOffsetIndex + 1]),
-                              ColorUtils::sRGB8ToLinearFloat(data[pixOffsetIndex + 2]));
+                float red { 0.0f };
+                float green { 0.0f };
+                float blue { 0.0f };
+                for (int i = 0; i < stride; ++i) {
+                    for (int j = 0; j < stride; ++j) {
+                        int k = (int)(x + i - halfStride + (y + j - halfStride) * width) * numComponents;
+                        red += ColorUtils::sRGB8ToLinearFloat(data[k]);
+                        green += ColorUtils::sRGB8ToLinearFloat(data[k + 1]);
+                        blue += ColorUtils::sRGB8ToLinearFloat(data[k + 2]);
+                    }
+                }
+                glm::vec3 clr(red, green, blue);

                // scale color and add to previously accumulated coefficients
-                sphericalHarmonicsScale(shBuffB.data(), order,
-                        shBuff.data(), clr.r * fDiffSolid);
-                sphericalHarmonicsAdd(resultR.data(), order,
-                        resultR.data(), shBuffB.data());
-                sphericalHarmonicsScale(shBuffB.data(), order,
-                        shBuff.data(), clr.g * fDiffSolid);
-                sphericalHarmonicsAdd(resultG.data(), order,
-                        resultG.data(), shBuffB.data());
-                sphericalHarmonicsScale(shBuffB.data(), order,
-                        shBuff.data(), clr.b * fDiffSolid);
-                sphericalHarmonicsAdd(resultB.data(), order,
-                        resultB.data(), shBuffB.data());
+                // red
+                sphericalHarmonicsScale(shBuffB.data(), order, shBuff.data(), clr.r * fDiffSolid);
+                sphericalHarmonicsAdd(resultR.data(), order, resultR.data(), shBuffB.data());
+                // green
+                sphericalHarmonicsScale(shBuffB.data(), order, shBuff.data(), clr.g * fDiffSolid);
+                sphericalHarmonicsAdd(resultG.data(), order, resultG.data(), shBuffB.data());
+                // blue
+                sphericalHarmonicsScale(shBuffB.data(), order, shBuff.data(), clr.b * fDiffSolid);
+                sphericalHarmonicsAdd(resultB.data(), order, resultB.data(), shBuffB.data());
            }
        }
    }

    // final scale for coefficients
-    const float fNormProj = (4.0f * glm::pi<float>()) / fWt;
+    const float fNormProj = (4.0f * glm::pi<float>()) / (fWt * (float)(stride * stride));
    sphericalHarmonicsScale(resultR.data(), order, resultR.data(), fNormProj);
    sphericalHarmonicsScale(resultG.data(), order, resultG.data(), fNormProj);
    sphericalHarmonicsScale(resultB.data(), order, resultB.data(), fNormProj);