mirror of
https://github.com/HifiExperiments/overte.git
synced 2025-08-08 17:56:47 +02:00
Optimizations, SIMD and const correctness
This commit is contained in:
parent
53b64b9877
commit
901c020aae
7 changed files with 60 additions and 11 deletions
|
@ -1160,7 +1160,8 @@ void Model::updateClusterMatrices(glm::vec3 modelPosition, glm::quat modelOrient
|
||||||
}
|
}
|
||||||
_needsUpdateClusterMatrices = false;
|
_needsUpdateClusterMatrices = false;
|
||||||
const FBXGeometry& geometry = getFBXGeometry();
|
const FBXGeometry& geometry = getFBXGeometry();
|
||||||
glm::mat4 zeroScale(glm::vec4(0.0f, 0.0f, 0.0f, 0.0f),
|
static const glm::mat4 zeroScale(
|
||||||
|
glm::vec4(0.0f, 0.0f, 0.0f, 0.0f),
|
||||||
glm::vec4(0.0f, 0.0f, 0.0f, 0.0f),
|
glm::vec4(0.0f, 0.0f, 0.0f, 0.0f),
|
||||||
glm::vec4(0.0f, 0.0f, 0.0f, 0.0f),
|
glm::vec4(0.0f, 0.0f, 0.0f, 0.0f),
|
||||||
glm::vec4(0.0f, 0.0f, 0.0f, 1.0f));
|
glm::vec4(0.0f, 0.0f, 0.0f, 1.0f));
|
||||||
|
@ -1170,11 +1171,17 @@ void Model::updateClusterMatrices(glm::vec3 modelPosition, glm::quat modelOrient
|
||||||
for (int i = 0; i < _meshStates.size(); i++) {
|
for (int i = 0; i < _meshStates.size(); i++) {
|
||||||
MeshState& state = _meshStates[i];
|
MeshState& state = _meshStates[i];
|
||||||
const FBXMesh& mesh = geometry.meshes.at(i);
|
const FBXMesh& mesh = geometry.meshes.at(i);
|
||||||
|
|
||||||
for (int j = 0; j < mesh.clusters.size(); j++) {
|
for (int j = 0; j < mesh.clusters.size(); j++) {
|
||||||
const FBXCluster& cluster = mesh.clusters.at(j);
|
const FBXCluster& cluster = mesh.clusters.at(j);
|
||||||
auto jointMatrix = _rig->getJointTransform(cluster.jointIndex);
|
auto jointMatrix = _rig->getJointTransform(cluster.jointIndex);
|
||||||
|
#if GLM_ARCH & GLM_ARCH_SSE2
|
||||||
|
glm::mat4 temp, out, inverseBindMatrix = cluster.inverseBindMatrix;
|
||||||
|
glm_mat4_mul((glm_vec4*)&modelToWorld, (glm_vec4*)&jointMatrix, (glm_vec4*)&temp);
|
||||||
|
glm_mat4_mul((glm_vec4*)&temp, (glm_vec4*)&inverseBindMatrix, (glm_vec4*)&out);
|
||||||
|
state.clusterMatrices[j] = out;
|
||||||
|
#else
|
||||||
state.clusterMatrices[j] = modelToWorld * jointMatrix * cluster.inverseBindMatrix;
|
state.clusterMatrices[j] = modelToWorld * jointMatrix * cluster.inverseBindMatrix;
|
||||||
|
#endif
|
||||||
|
|
||||||
// as an optimization, don't build cautrizedClusterMatrices if the boneSet is empty.
|
// as an optimization, don't build cautrizedClusterMatrices if the boneSet is empty.
|
||||||
if (!_cauterizeBoneSet.empty()) {
|
if (!_cauterizeBoneSet.empty()) {
|
||||||
|
|
|
@ -35,7 +35,7 @@ void PendingChanges::updateItem(ItemID id, const UpdateFunctorPointer& functor)
|
||||||
_updateFunctors.push_back(functor);
|
_updateFunctors.push_back(functor);
|
||||||
}
|
}
|
||||||
|
|
||||||
void PendingChanges::merge(PendingChanges& changes) {
|
void PendingChanges::merge(const PendingChanges& changes) {
|
||||||
_resetItems.insert(_resetItems.end(), changes._resetItems.begin(), changes._resetItems.end());
|
_resetItems.insert(_resetItems.end(), changes._resetItems.begin(), changes._resetItems.end());
|
||||||
_resetPayloads.insert(_resetPayloads.end(), changes._resetPayloads.begin(), changes._resetPayloads.end());
|
_resetPayloads.insert(_resetPayloads.end(), changes._resetPayloads.begin(), changes._resetPayloads.end());
|
||||||
_removedItems.insert(_removedItems.end(), changes._removedItems.begin(), changes._removedItems.end());
|
_removedItems.insert(_removedItems.end(), changes._removedItems.begin(), changes._removedItems.end());
|
||||||
|
@ -71,7 +71,7 @@ void Scene::enqueuePendingChanges(const PendingChanges& pendingChanges) {
|
||||||
|
|
||||||
void consolidateChangeQueue(PendingChangesQueue& queue, PendingChanges& singleBatch) {
|
void consolidateChangeQueue(PendingChangesQueue& queue, PendingChanges& singleBatch) {
|
||||||
while (!queue.empty()) {
|
while (!queue.empty()) {
|
||||||
auto pendingChanges = queue.front();
|
const auto& pendingChanges = queue.front();
|
||||||
singleBatch.merge(pendingChanges);
|
singleBatch.merge(pendingChanges);
|
||||||
queue.pop();
|
queue.pop();
|
||||||
};
|
};
|
||||||
|
|
|
@ -34,7 +34,7 @@ public:
|
||||||
void updateItem(ItemID id, const UpdateFunctorPointer& functor);
|
void updateItem(ItemID id, const UpdateFunctorPointer& functor);
|
||||||
void updateItem(ItemID id) { updateItem(id, nullptr); }
|
void updateItem(ItemID id) { updateItem(id, nullptr); }
|
||||||
|
|
||||||
void merge(PendingChanges& changes);
|
void merge(const PendingChanges& changes);
|
||||||
|
|
||||||
ItemIDs _resetItems;
|
ItemIDs _resetItems;
|
||||||
Payloads _resetPayloads;
|
Payloads _resetPayloads;
|
||||||
|
|
|
@ -575,18 +575,18 @@ void AABox::transform(const Transform& transform) {
|
||||||
|
|
||||||
// Logic based on http://clb.demon.fi/MathGeoLib/nightly/docs/AABB.cpp_code.html#471
|
// Logic based on http://clb.demon.fi/MathGeoLib/nightly/docs/AABB.cpp_code.html#471
|
||||||
void AABox::transform(const glm::mat4& matrix) {
|
void AABox::transform(const glm::mat4& matrix) {
|
||||||
|
// FIXME use simd operations
|
||||||
auto halfSize = _scale * 0.5f;
|
auto halfSize = _scale * 0.5f;
|
||||||
auto center = _corner + halfSize;
|
auto center = _corner + halfSize;
|
||||||
halfSize = abs(halfSize);
|
halfSize = abs(halfSize);
|
||||||
auto newCenter = transformPoint(matrix, center);
|
|
||||||
|
|
||||||
auto mm = glm::transpose(glm::mat3(matrix));
|
auto mm = glm::transpose(glm::mat3(matrix));
|
||||||
vec3 newDir = vec3(
|
vec3 newDir = vec3(
|
||||||
glm::dot(glm::abs(vec3(mm[0])), halfSize),
|
glm::dot(glm::abs(mm[0]), halfSize),
|
||||||
glm::dot(glm::abs(vec3(mm[1])), halfSize),
|
glm::dot(glm::abs(mm[1]), halfSize),
|
||||||
glm::dot(glm::abs(vec3(mm[2])), halfSize)
|
glm::dot(glm::abs(mm[2]), halfSize)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
auto newCenter = transformPoint(matrix, center);
|
||||||
_corner = newCenter - newDir;
|
_corner = newCenter - newDir;
|
||||||
_scale = newDir * 2.0f;
|
_scale = newDir * 2.0f;
|
||||||
}
|
}
|
||||||
|
|
|
@ -502,7 +502,10 @@ glm::mat4 cancelOutRollAndPitch(const glm::mat4& m) {
|
||||||
|
|
||||||
glm::vec3 transformPoint(const glm::mat4& m, const glm::vec3& p) {
|
glm::vec3 transformPoint(const glm::mat4& m, const glm::vec3& p) {
|
||||||
glm::vec4 temp = m * glm::vec4(p, 1.0f);
|
glm::vec4 temp = m * glm::vec4(p, 1.0f);
|
||||||
return glm::vec3(temp.x / temp.w, temp.y / temp.w, temp.z / temp.w);
|
if (temp.w != 1.0f) {
|
||||||
|
temp *= (1.0f / temp.w);
|
||||||
|
}
|
||||||
|
return glm::vec3(temp);
|
||||||
}
|
}
|
||||||
|
|
||||||
// does not handle non-uniform scale correctly, but it's faster then transformVectorFull
|
// does not handle non-uniform scale correctly, but it's faster then transformVectorFull
|
||||||
|
|
|
@ -15,6 +15,8 @@
|
||||||
#include <StreamUtils.h>
|
#include <StreamUtils.h>
|
||||||
|
|
||||||
#include <../QTestExtensions.h>
|
#include <../QTestExtensions.h>
|
||||||
|
#include <glm/gtc/matrix_transform.hpp>
|
||||||
|
#include <glm/simd/matrix.h>
|
||||||
|
|
||||||
|
|
||||||
QTEST_MAIN(GLMHelpersTests)
|
QTEST_MAIN(GLMHelpersTests)
|
||||||
|
@ -102,3 +104,39 @@ void GLMHelpersTests::testSixByteOrientationCompression() {
|
||||||
testQuatCompression(-(ROT_Y_180 * ROT_Z_30 * ROT_X_90));
|
testQuatCompression(-(ROT_Y_180 * ROT_Z_30 * ROT_X_90));
|
||||||
testQuatCompression(-(ROT_Z_30 * ROT_X_90 * ROT_Y_180));
|
testQuatCompression(-(ROT_Z_30 * ROT_X_90 * ROT_Y_180));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define LOOPS 500000
|
||||||
|
|
||||||
|
void GLMHelpersTests::testSimd() {
|
||||||
|
glm::mat4 a = glm::translate(glm::mat4(), vec3(1, 4, 9));
|
||||||
|
glm::mat4 b = glm::rotate(glm::mat4(), PI / 3, vec3(0, 1, 0));
|
||||||
|
glm::mat4 a1, b1;
|
||||||
|
glm::mat4 a2, b2;
|
||||||
|
|
||||||
|
a1 = a * b;
|
||||||
|
b1 = b * a;
|
||||||
|
glm_mat4_mul((glm_vec4*)&a, (glm_vec4*)&b, (glm_vec4*)&a2);
|
||||||
|
glm_mat4_mul((glm_vec4*)&b, (glm_vec4*)&a, (glm_vec4*)&b2);
|
||||||
|
|
||||||
|
|
||||||
|
{
|
||||||
|
QElapsedTimer timer;
|
||||||
|
timer.start();
|
||||||
|
for (size_t i = 0; i < LOOPS; ++i) {
|
||||||
|
a1 = a * b;
|
||||||
|
b1 = b * a;
|
||||||
|
}
|
||||||
|
qDebug() << "Native " << timer.elapsed();
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
QElapsedTimer timer;
|
||||||
|
timer.start();
|
||||||
|
for (size_t i = 0; i < LOOPS; ++i) {
|
||||||
|
glm_mat4_mul((glm_vec4*)&a, (glm_vec4*)&b, (glm_vec4*)&a2);
|
||||||
|
glm_mat4_mul((glm_vec4*)&b, (glm_vec4*)&a, (glm_vec4*)&b2);
|
||||||
|
}
|
||||||
|
qDebug() << "SIMD " << timer.elapsed();
|
||||||
|
}
|
||||||
|
qDebug() << "Done ";
|
||||||
|
}
|
||||||
|
|
|
@ -20,6 +20,7 @@ class GLMHelpersTests : public QObject {
|
||||||
private slots:
|
private slots:
|
||||||
void testEulerDecomposition();
|
void testEulerDecomposition();
|
||||||
void testSixByteOrientationCompression();
|
void testSixByteOrientationCompression();
|
||||||
|
void testSimd();
|
||||||
};
|
};
|
||||||
|
|
||||||
float getErrorDifference(const float& a, const float& b);
|
float getErrorDifference(const float& a, const float& b);
|
||||||
|
|
Loading…
Reference in a new issue