Optimizations, SIMD and const correctness

This commit is contained in:
Brad Davis 2016-12-26 13:57:51 -08:00
parent 53b64b9877
commit 901c020aae
7 changed files with 60 additions and 11 deletions

View file

@ -1160,7 +1160,8 @@ void Model::updateClusterMatrices(glm::vec3 modelPosition, glm::quat modelOrient
}
_needsUpdateClusterMatrices = false;
const FBXGeometry& geometry = getFBXGeometry();
glm::mat4 zeroScale(glm::vec4(0.0f, 0.0f, 0.0f, 0.0f),
static const glm::mat4 zeroScale(
glm::vec4(0.0f, 0.0f, 0.0f, 0.0f),
glm::vec4(0.0f, 0.0f, 0.0f, 0.0f),
glm::vec4(0.0f, 0.0f, 0.0f, 0.0f),
glm::vec4(0.0f, 0.0f, 0.0f, 1.0f));
@ -1170,11 +1171,17 @@ void Model::updateClusterMatrices(glm::vec3 modelPosition, glm::quat modelOrient
for (int i = 0; i < _meshStates.size(); i++) {
MeshState& state = _meshStates[i];
const FBXMesh& mesh = geometry.meshes.at(i);
for (int j = 0; j < mesh.clusters.size(); j++) {
const FBXCluster& cluster = mesh.clusters.at(j);
auto jointMatrix = _rig->getJointTransform(cluster.jointIndex);
#if GLM_ARCH & GLM_ARCH_SSE2
glm::mat4 temp, out, inverseBindMatrix = cluster.inverseBindMatrix;
glm_mat4_mul((glm_vec4*)&modelToWorld, (glm_vec4*)&jointMatrix, (glm_vec4*)&temp);
glm_mat4_mul((glm_vec4*)&temp, (glm_vec4*)&inverseBindMatrix, (glm_vec4*)&out);
state.clusterMatrices[j] = out;
#else
state.clusterMatrices[j] = modelToWorld * jointMatrix * cluster.inverseBindMatrix;
#endif
// as an optimization, don't build cautrizedClusterMatrices if the boneSet is empty.
if (!_cauterizeBoneSet.empty()) {

View file

@ -35,7 +35,7 @@ void PendingChanges::updateItem(ItemID id, const UpdateFunctorPointer& functor)
_updateFunctors.push_back(functor);
}
void PendingChanges::merge(PendingChanges& changes) {
void PendingChanges::merge(const PendingChanges& changes) {
_resetItems.insert(_resetItems.end(), changes._resetItems.begin(), changes._resetItems.end());
_resetPayloads.insert(_resetPayloads.end(), changes._resetPayloads.begin(), changes._resetPayloads.end());
_removedItems.insert(_removedItems.end(), changes._removedItems.begin(), changes._removedItems.end());
@ -71,7 +71,7 @@ void Scene::enqueuePendingChanges(const PendingChanges& pendingChanges) {
void consolidateChangeQueue(PendingChangesQueue& queue, PendingChanges& singleBatch) {
while (!queue.empty()) {
auto pendingChanges = queue.front();
const auto& pendingChanges = queue.front();
singleBatch.merge(pendingChanges);
queue.pop();
};

View file

@ -34,7 +34,7 @@ public:
void updateItem(ItemID id, const UpdateFunctorPointer& functor);
void updateItem(ItemID id) { updateItem(id, nullptr); }
void merge(PendingChanges& changes);
void merge(const PendingChanges& changes);
ItemIDs _resetItems;
Payloads _resetPayloads;

View file

@ -575,18 +575,18 @@ void AABox::transform(const Transform& transform) {
// Logic based on http://clb.demon.fi/MathGeoLib/nightly/docs/AABB.cpp_code.html#471
void AABox::transform(const glm::mat4& matrix) {
// FIXME use simd operations
auto halfSize = _scale * 0.5f;
auto center = _corner + halfSize;
halfSize = abs(halfSize);
auto newCenter = transformPoint(matrix, center);
auto mm = glm::transpose(glm::mat3(matrix));
vec3 newDir = vec3(
glm::dot(glm::abs(vec3(mm[0])), halfSize),
glm::dot(glm::abs(vec3(mm[1])), halfSize),
glm::dot(glm::abs(vec3(mm[2])), halfSize)
glm::dot(glm::abs(mm[0]), halfSize),
glm::dot(glm::abs(mm[1]), halfSize),
glm::dot(glm::abs(mm[2]), halfSize)
);
auto newCenter = transformPoint(matrix, center);
_corner = newCenter - newDir;
_scale = newDir * 2.0f;
}

View file

@ -502,7 +502,10 @@ glm::mat4 cancelOutRollAndPitch(const glm::mat4& m) {
glm::vec3 transformPoint(const glm::mat4& m, const glm::vec3& p) {
glm::vec4 temp = m * glm::vec4(p, 1.0f);
return glm::vec3(temp.x / temp.w, temp.y / temp.w, temp.z / temp.w);
if (temp.w != 1.0f) {
temp *= (1.0f / temp.w);
}
return glm::vec3(temp);
}
// does not handle non-uniform scale correctly, but it's faster then transformVectorFull

View file

@ -15,6 +15,8 @@
#include <StreamUtils.h>
#include <../QTestExtensions.h>
#include <glm/gtc/matrix_transform.hpp>
#include <glm/simd/matrix.h>
QTEST_MAIN(GLMHelpersTests)
@ -102,3 +104,39 @@ void GLMHelpersTests::testSixByteOrientationCompression() {
testQuatCompression(-(ROT_Y_180 * ROT_Z_30 * ROT_X_90));
testQuatCompression(-(ROT_Z_30 * ROT_X_90 * ROT_Y_180));
}
#define LOOPS 500000
void GLMHelpersTests::testSimd() {
glm::mat4 a = glm::translate(glm::mat4(), vec3(1, 4, 9));
glm::mat4 b = glm::rotate(glm::mat4(), PI / 3, vec3(0, 1, 0));
glm::mat4 a1, b1;
glm::mat4 a2, b2;
a1 = a * b;
b1 = b * a;
glm_mat4_mul((glm_vec4*)&a, (glm_vec4*)&b, (glm_vec4*)&a2);
glm_mat4_mul((glm_vec4*)&b, (glm_vec4*)&a, (glm_vec4*)&b2);
{
QElapsedTimer timer;
timer.start();
for (size_t i = 0; i < LOOPS; ++i) {
a1 = a * b;
b1 = b * a;
}
qDebug() << "Native " << timer.elapsed();
}
{
QElapsedTimer timer;
timer.start();
for (size_t i = 0; i < LOOPS; ++i) {
glm_mat4_mul((glm_vec4*)&a, (glm_vec4*)&b, (glm_vec4*)&a2);
glm_mat4_mul((glm_vec4*)&b, (glm_vec4*)&a, (glm_vec4*)&b2);
}
qDebug() << "SIMD " << timer.elapsed();
}
qDebug() << "Done ";
}

View file

@ -20,6 +20,7 @@ class GLMHelpersTests : public QObject {
private slots:
void testEulerDecomposition();
void testSixByteOrientationCompression();
void testSimd();
};
float getErrorDifference(const float& a, const float& b);