From 901c020aaeaef8b471c5149b4da28398561639e5 Mon Sep 17 00:00:00 2001 From: Brad Davis Date: Mon, 26 Dec 2016 13:57:51 -0800 Subject: [PATCH] Optimizations, SIMD and const correctness --- libraries/render-utils/src/Model.cpp | 11 ++++++-- libraries/render/src/render/Scene.cpp | 4 +-- libraries/render/src/render/Scene.h | 2 +- libraries/shared/src/AABox.cpp | 10 +++---- libraries/shared/src/GLMHelpers.cpp | 5 +++- tests/shared/src/GLMHelpersTests.cpp | 38 +++++++++++++++++++++++++++ tests/shared/src/GLMHelpersTests.h | 1 + 7 files changed, 60 insertions(+), 11 deletions(-) diff --git a/libraries/render-utils/src/Model.cpp b/libraries/render-utils/src/Model.cpp index 436574a1ff..ffbe1fb34c 100644 --- a/libraries/render-utils/src/Model.cpp +++ b/libraries/render-utils/src/Model.cpp @@ -1160,7 +1160,8 @@ void Model::updateClusterMatrices(glm::vec3 modelPosition, glm::quat modelOrient } _needsUpdateClusterMatrices = false; const FBXGeometry& geometry = getFBXGeometry(); - glm::mat4 zeroScale(glm::vec4(0.0f, 0.0f, 0.0f, 0.0f), + static const glm::mat4 zeroScale( + glm::vec4(0.0f, 0.0f, 0.0f, 0.0f), glm::vec4(0.0f, 0.0f, 0.0f, 0.0f), glm::vec4(0.0f, 0.0f, 0.0f, 0.0f), glm::vec4(0.0f, 0.0f, 0.0f, 1.0f)); @@ -1170,11 +1171,17 @@ void Model::updateClusterMatrices(glm::vec3 modelPosition, glm::quat modelOrient for (int i = 0; i < _meshStates.size(); i++) { MeshState& state = _meshStates[i]; const FBXMesh& mesh = geometry.meshes.at(i); - for (int j = 0; j < mesh.clusters.size(); j++) { const FBXCluster& cluster = mesh.clusters.at(j); auto jointMatrix = _rig->getJointTransform(cluster.jointIndex); +#if GLM_ARCH & GLM_ARCH_SSE2 + glm::mat4 temp, out, inverseBindMatrix = cluster.inverseBindMatrix; + glm_mat4_mul((glm_vec4*)&modelToWorld, (glm_vec4*)&jointMatrix, (glm_vec4*)&temp); + glm_mat4_mul((glm_vec4*)&temp, (glm_vec4*)&inverseBindMatrix, (glm_vec4*)&out); + state.clusterMatrices[j] = out; +#else state.clusterMatrices[j] = modelToWorld * jointMatrix * cluster.inverseBindMatrix; +#endif // as an optimization, don't build cautrizedClusterMatrices if the boneSet is empty. if (!_cauterizeBoneSet.empty()) { diff --git a/libraries/render/src/render/Scene.cpp b/libraries/render/src/render/Scene.cpp index f918fc0bf6..95fef3e9f0 100644 --- a/libraries/render/src/render/Scene.cpp +++ b/libraries/render/src/render/Scene.cpp @@ -35,7 +35,7 @@ void PendingChanges::updateItem(ItemID id, const UpdateFunctorPointer& functor) _updateFunctors.push_back(functor); } -void PendingChanges::merge(PendingChanges& changes) { +void PendingChanges::merge(const PendingChanges& changes) { _resetItems.insert(_resetItems.end(), changes._resetItems.begin(), changes._resetItems.end()); _resetPayloads.insert(_resetPayloads.end(), changes._resetPayloads.begin(), changes._resetPayloads.end()); _removedItems.insert(_removedItems.end(), changes._removedItems.begin(), changes._removedItems.end()); @@ -71,7 +71,7 @@ void Scene::enqueuePendingChanges(const PendingChanges& pendingChanges) { void consolidateChangeQueue(PendingChangesQueue& queue, PendingChanges& singleBatch) { while (!queue.empty()) { - auto pendingChanges = queue.front(); + const auto& pendingChanges = queue.front(); singleBatch.merge(pendingChanges); queue.pop(); }; diff --git a/libraries/render/src/render/Scene.h b/libraries/render/src/render/Scene.h index 6b57a22a36..13475d0556 100644 --- a/libraries/render/src/render/Scene.h +++ b/libraries/render/src/render/Scene.h @@ -34,7 +34,7 @@ public: void updateItem(ItemID id, const UpdateFunctorPointer& functor); void updateItem(ItemID id) { updateItem(id, nullptr); } - void merge(PendingChanges& changes); + void merge(const PendingChanges& changes); ItemIDs _resetItems; Payloads _resetPayloads; diff --git a/libraries/shared/src/AABox.cpp b/libraries/shared/src/AABox.cpp index 5e9c031355..4a74fb4033 100644 --- a/libraries/shared/src/AABox.cpp +++ b/libraries/shared/src/AABox.cpp @@ -575,18 +575,18 @@ void AABox::transform(const Transform& transform) { // Logic based on http://clb.demon.fi/MathGeoLib/nightly/docs/AABB.cpp_code.html#471 void AABox::transform(const glm::mat4& matrix) { + // FIXME use simd operations auto halfSize = _scale * 0.5f; auto center = _corner + halfSize; halfSize = abs(halfSize); - auto newCenter = transformPoint(matrix, center); - auto mm = glm::transpose(glm::mat3(matrix)); vec3 newDir = vec3( - glm::dot(glm::abs(vec3(mm[0])), halfSize), - glm::dot(glm::abs(vec3(mm[1])), halfSize), - glm::dot(glm::abs(vec3(mm[2])), halfSize) + glm::dot(glm::abs(mm[0]), halfSize), + glm::dot(glm::abs(mm[1]), halfSize), + glm::dot(glm::abs(mm[2]), halfSize) ); + auto newCenter = transformPoint(matrix, center); _corner = newCenter - newDir; _scale = newDir * 2.0f; } diff --git a/libraries/shared/src/GLMHelpers.cpp b/libraries/shared/src/GLMHelpers.cpp index 3520a4dc44..6aa4f68f22 100644 --- a/libraries/shared/src/GLMHelpers.cpp +++ b/libraries/shared/src/GLMHelpers.cpp @@ -502,7 +502,10 @@ glm::mat4 cancelOutRollAndPitch(const glm::mat4& m) { glm::vec3 transformPoint(const glm::mat4& m, const glm::vec3& p) { glm::vec4 temp = m * glm::vec4(p, 1.0f); - return glm::vec3(temp.x / temp.w, temp.y / temp.w, temp.z / temp.w); + if (temp.w != 1.0f) { + temp *= (1.0f / temp.w); + } + return glm::vec3(temp); } // does not handle non-uniform scale correctly, but it's faster then transformVectorFull diff --git a/tests/shared/src/GLMHelpersTests.cpp b/tests/shared/src/GLMHelpersTests.cpp index a796d62ba5..8d26d35c69 100644 --- a/tests/shared/src/GLMHelpersTests.cpp +++ b/tests/shared/src/GLMHelpersTests.cpp @@ -15,6 +15,8 @@ #include #include <../QTestExtensions.h> +#include +#include QTEST_MAIN(GLMHelpersTests) @@ -102,3 +104,39 @@ void GLMHelpersTests::testSixByteOrientationCompression() { testQuatCompression(-(ROT_Y_180 * ROT_Z_30 * ROT_X_90)); testQuatCompression(-(ROT_Z_30 * ROT_X_90 * ROT_Y_180)); } + +#define LOOPS 500000 + +void GLMHelpersTests::testSimd() { + glm::mat4 a = glm::translate(glm::mat4(), vec3(1, 4, 9)); + glm::mat4 b = glm::rotate(glm::mat4(), PI / 3, vec3(0, 1, 0)); + glm::mat4 a1, b1; + glm::mat4 a2, b2; + + a1 = a * b; + b1 = b * a; + glm_mat4_mul((glm_vec4*)&a, (glm_vec4*)&b, (glm_vec4*)&a2); + glm_mat4_mul((glm_vec4*)&b, (glm_vec4*)&a, (glm_vec4*)&b2); + + + { + QElapsedTimer timer; + timer.start(); + for (size_t i = 0; i < LOOPS; ++i) { + a1 = a * b; + b1 = b * a; + } + qDebug() << "Native " << timer.elapsed(); + } + + { + QElapsedTimer timer; + timer.start(); + for (size_t i = 0; i < LOOPS; ++i) { + glm_mat4_mul((glm_vec4*)&a, (glm_vec4*)&b, (glm_vec4*)&a2); + glm_mat4_mul((glm_vec4*)&b, (glm_vec4*)&a, (glm_vec4*)&b2); + } + qDebug() << "SIMD " << timer.elapsed(); + } + qDebug() << "Done "; +} diff --git a/tests/shared/src/GLMHelpersTests.h b/tests/shared/src/GLMHelpersTests.h index 40d552a07b..acc7b533f5 100644 --- a/tests/shared/src/GLMHelpersTests.h +++ b/tests/shared/src/GLMHelpersTests.h @@ -20,6 +20,7 @@ class GLMHelpersTests : public QObject { private slots: void testEulerDecomposition(); void testSixByteOrientationCompression(); + void testSimd(); }; float getErrorDifference(const float& a, const float& b);