Optimizations, SIMD and const correctness

This commit is contained in:
Brad Davis 2016-12-26 13:57:51 -08:00
parent 53b64b9877
commit 901c020aae
7 changed files with 60 additions and 11 deletions

View file

@ -1160,7 +1160,8 @@ void Model::updateClusterMatrices(glm::vec3 modelPosition, glm::quat modelOrient
} }
_needsUpdateClusterMatrices = false; _needsUpdateClusterMatrices = false;
const FBXGeometry& geometry = getFBXGeometry(); const FBXGeometry& geometry = getFBXGeometry();
glm::mat4 zeroScale(glm::vec4(0.0f, 0.0f, 0.0f, 0.0f), static const glm::mat4 zeroScale(
glm::vec4(0.0f, 0.0f, 0.0f, 0.0f),
glm::vec4(0.0f, 0.0f, 0.0f, 0.0f), glm::vec4(0.0f, 0.0f, 0.0f, 0.0f),
glm::vec4(0.0f, 0.0f, 0.0f, 0.0f), glm::vec4(0.0f, 0.0f, 0.0f, 0.0f),
glm::vec4(0.0f, 0.0f, 0.0f, 1.0f)); glm::vec4(0.0f, 0.0f, 0.0f, 1.0f));
@ -1170,11 +1171,17 @@ void Model::updateClusterMatrices(glm::vec3 modelPosition, glm::quat modelOrient
for (int i = 0; i < _meshStates.size(); i++) { for (int i = 0; i < _meshStates.size(); i++) {
MeshState& state = _meshStates[i]; MeshState& state = _meshStates[i];
const FBXMesh& mesh = geometry.meshes.at(i); const FBXMesh& mesh = geometry.meshes.at(i);
for (int j = 0; j < mesh.clusters.size(); j++) { for (int j = 0; j < mesh.clusters.size(); j++) {
const FBXCluster& cluster = mesh.clusters.at(j); const FBXCluster& cluster = mesh.clusters.at(j);
auto jointMatrix = _rig->getJointTransform(cluster.jointIndex); auto jointMatrix = _rig->getJointTransform(cluster.jointIndex);
#if GLM_ARCH & GLM_ARCH_SSE2
glm::mat4 temp, out, inverseBindMatrix = cluster.inverseBindMatrix;
glm_mat4_mul((glm_vec4*)&modelToWorld, (glm_vec4*)&jointMatrix, (glm_vec4*)&temp);
glm_mat4_mul((glm_vec4*)&temp, (glm_vec4*)&inverseBindMatrix, (glm_vec4*)&out);
state.clusterMatrices[j] = out;
#else
state.clusterMatrices[j] = modelToWorld * jointMatrix * cluster.inverseBindMatrix; state.clusterMatrices[j] = modelToWorld * jointMatrix * cluster.inverseBindMatrix;
#endif
// as an optimization, don't build cautrizedClusterMatrices if the boneSet is empty. // as an optimization, don't build cautrizedClusterMatrices if the boneSet is empty.
if (!_cauterizeBoneSet.empty()) { if (!_cauterizeBoneSet.empty()) {

View file

@ -35,7 +35,7 @@ void PendingChanges::updateItem(ItemID id, const UpdateFunctorPointer& functor)
_updateFunctors.push_back(functor); _updateFunctors.push_back(functor);
} }
void PendingChanges::merge(PendingChanges& changes) { void PendingChanges::merge(const PendingChanges& changes) {
_resetItems.insert(_resetItems.end(), changes._resetItems.begin(), changes._resetItems.end()); _resetItems.insert(_resetItems.end(), changes._resetItems.begin(), changes._resetItems.end());
_resetPayloads.insert(_resetPayloads.end(), changes._resetPayloads.begin(), changes._resetPayloads.end()); _resetPayloads.insert(_resetPayloads.end(), changes._resetPayloads.begin(), changes._resetPayloads.end());
_removedItems.insert(_removedItems.end(), changes._removedItems.begin(), changes._removedItems.end()); _removedItems.insert(_removedItems.end(), changes._removedItems.begin(), changes._removedItems.end());
@ -71,7 +71,7 @@ void Scene::enqueuePendingChanges(const PendingChanges& pendingChanges) {
void consolidateChangeQueue(PendingChangesQueue& queue, PendingChanges& singleBatch) { void consolidateChangeQueue(PendingChangesQueue& queue, PendingChanges& singleBatch) {
while (!queue.empty()) { while (!queue.empty()) {
auto pendingChanges = queue.front(); const auto& pendingChanges = queue.front();
singleBatch.merge(pendingChanges); singleBatch.merge(pendingChanges);
queue.pop(); queue.pop();
}; };

View file

@ -34,7 +34,7 @@ public:
void updateItem(ItemID id, const UpdateFunctorPointer& functor); void updateItem(ItemID id, const UpdateFunctorPointer& functor);
void updateItem(ItemID id) { updateItem(id, nullptr); } void updateItem(ItemID id) { updateItem(id, nullptr); }
void merge(PendingChanges& changes); void merge(const PendingChanges& changes);
ItemIDs _resetItems; ItemIDs _resetItems;
Payloads _resetPayloads; Payloads _resetPayloads;

View file

@ -575,18 +575,18 @@ void AABox::transform(const Transform& transform) {
// Logic based on http://clb.demon.fi/MathGeoLib/nightly/docs/AABB.cpp_code.html#471 // Logic based on http://clb.demon.fi/MathGeoLib/nightly/docs/AABB.cpp_code.html#471
void AABox::transform(const glm::mat4& matrix) { void AABox::transform(const glm::mat4& matrix) {
// FIXME use simd operations
auto halfSize = _scale * 0.5f; auto halfSize = _scale * 0.5f;
auto center = _corner + halfSize; auto center = _corner + halfSize;
halfSize = abs(halfSize); halfSize = abs(halfSize);
auto newCenter = transformPoint(matrix, center);
auto mm = glm::transpose(glm::mat3(matrix)); auto mm = glm::transpose(glm::mat3(matrix));
vec3 newDir = vec3( vec3 newDir = vec3(
glm::dot(glm::abs(vec3(mm[0])), halfSize), glm::dot(glm::abs(mm[0]), halfSize),
glm::dot(glm::abs(vec3(mm[1])), halfSize), glm::dot(glm::abs(mm[1]), halfSize),
glm::dot(glm::abs(vec3(mm[2])), halfSize) glm::dot(glm::abs(mm[2]), halfSize)
); );
auto newCenter = transformPoint(matrix, center);
_corner = newCenter - newDir; _corner = newCenter - newDir;
_scale = newDir * 2.0f; _scale = newDir * 2.0f;
} }

View file

@ -502,7 +502,10 @@ glm::mat4 cancelOutRollAndPitch(const glm::mat4& m) {
glm::vec3 transformPoint(const glm::mat4& m, const glm::vec3& p) { glm::vec3 transformPoint(const glm::mat4& m, const glm::vec3& p) {
glm::vec4 temp = m * glm::vec4(p, 1.0f); glm::vec4 temp = m * glm::vec4(p, 1.0f);
return glm::vec3(temp.x / temp.w, temp.y / temp.w, temp.z / temp.w); if (temp.w != 1.0f) {
temp *= (1.0f / temp.w);
}
return glm::vec3(temp);
} }
// does not handle non-uniform scale correctly, but it's faster then transformVectorFull // does not handle non-uniform scale correctly, but it's faster then transformVectorFull

View file

@ -15,6 +15,8 @@
#include <StreamUtils.h> #include <StreamUtils.h>
#include <../QTestExtensions.h> #include <../QTestExtensions.h>
#include <glm/gtc/matrix_transform.hpp>
#include <glm/simd/matrix.h>
QTEST_MAIN(GLMHelpersTests) QTEST_MAIN(GLMHelpersTests)
@ -102,3 +104,39 @@ void GLMHelpersTests::testSixByteOrientationCompression() {
testQuatCompression(-(ROT_Y_180 * ROT_Z_30 * ROT_X_90)); testQuatCompression(-(ROT_Y_180 * ROT_Z_30 * ROT_X_90));
testQuatCompression(-(ROT_Z_30 * ROT_X_90 * ROT_Y_180)); testQuatCompression(-(ROT_Z_30 * ROT_X_90 * ROT_Y_180));
} }
#define LOOPS 500000
void GLMHelpersTests::testSimd() {
glm::mat4 a = glm::translate(glm::mat4(), vec3(1, 4, 9));
glm::mat4 b = glm::rotate(glm::mat4(), PI / 3, vec3(0, 1, 0));
glm::mat4 a1, b1;
glm::mat4 a2, b2;
a1 = a * b;
b1 = b * a;
glm_mat4_mul((glm_vec4*)&a, (glm_vec4*)&b, (glm_vec4*)&a2);
glm_mat4_mul((glm_vec4*)&b, (glm_vec4*)&a, (glm_vec4*)&b2);
{
QElapsedTimer timer;
timer.start();
for (size_t i = 0; i < LOOPS; ++i) {
a1 = a * b;
b1 = b * a;
}
qDebug() << "Native " << timer.elapsed();
}
{
QElapsedTimer timer;
timer.start();
for (size_t i = 0; i < LOOPS; ++i) {
glm_mat4_mul((glm_vec4*)&a, (glm_vec4*)&b, (glm_vec4*)&a2);
glm_mat4_mul((glm_vec4*)&b, (glm_vec4*)&a, (glm_vec4*)&b2);
}
qDebug() << "SIMD " << timer.elapsed();
}
qDebug() << "Done ";
}

View file

@ -20,6 +20,7 @@ class GLMHelpersTests : public QObject {
private slots: private slots:
void testEulerDecomposition(); void testEulerDecomposition();
void testSixByteOrientationCompression(); void testSixByteOrientationCompression();
void testSimd();
}; };
float getErrorDifference(const float& a, const float& b); float getErrorDifference(const float& a, const float& b);