mirror of
https://github.com/AleziaKurdis/overte.git
synced 2025-04-07 12:12:39 +02:00
Optimizations, SIMD and const correctness
This commit is contained in:
parent
53b64b9877
commit
901c020aae
7 changed files with 60 additions and 11 deletions
|
@ -1160,7 +1160,8 @@ void Model::updateClusterMatrices(glm::vec3 modelPosition, glm::quat modelOrient
|
|||
}
|
||||
_needsUpdateClusterMatrices = false;
|
||||
const FBXGeometry& geometry = getFBXGeometry();
|
||||
glm::mat4 zeroScale(glm::vec4(0.0f, 0.0f, 0.0f, 0.0f),
|
||||
static const glm::mat4 zeroScale(
|
||||
glm::vec4(0.0f, 0.0f, 0.0f, 0.0f),
|
||||
glm::vec4(0.0f, 0.0f, 0.0f, 0.0f),
|
||||
glm::vec4(0.0f, 0.0f, 0.0f, 0.0f),
|
||||
glm::vec4(0.0f, 0.0f, 0.0f, 1.0f));
|
||||
|
@ -1170,11 +1171,17 @@ void Model::updateClusterMatrices(glm::vec3 modelPosition, glm::quat modelOrient
|
|||
for (int i = 0; i < _meshStates.size(); i++) {
|
||||
MeshState& state = _meshStates[i];
|
||||
const FBXMesh& mesh = geometry.meshes.at(i);
|
||||
|
||||
for (int j = 0; j < mesh.clusters.size(); j++) {
|
||||
const FBXCluster& cluster = mesh.clusters.at(j);
|
||||
auto jointMatrix = _rig->getJointTransform(cluster.jointIndex);
|
||||
#if GLM_ARCH & GLM_ARCH_SSE2
|
||||
glm::mat4 temp, out, inverseBindMatrix = cluster.inverseBindMatrix;
|
||||
glm_mat4_mul((glm_vec4*)&modelToWorld, (glm_vec4*)&jointMatrix, (glm_vec4*)&temp);
|
||||
glm_mat4_mul((glm_vec4*)&temp, (glm_vec4*)&inverseBindMatrix, (glm_vec4*)&out);
|
||||
state.clusterMatrices[j] = out;
|
||||
#else
|
||||
state.clusterMatrices[j] = modelToWorld * jointMatrix * cluster.inverseBindMatrix;
|
||||
#endif
|
||||
|
||||
// as an optimization, don't build cautrizedClusterMatrices if the boneSet is empty.
|
||||
if (!_cauterizeBoneSet.empty()) {
|
||||
|
|
|
@ -35,7 +35,7 @@ void PendingChanges::updateItem(ItemID id, const UpdateFunctorPointer& functor)
|
|||
_updateFunctors.push_back(functor);
|
||||
}
|
||||
|
||||
void PendingChanges::merge(PendingChanges& changes) {
|
||||
void PendingChanges::merge(const PendingChanges& changes) {
|
||||
_resetItems.insert(_resetItems.end(), changes._resetItems.begin(), changes._resetItems.end());
|
||||
_resetPayloads.insert(_resetPayloads.end(), changes._resetPayloads.begin(), changes._resetPayloads.end());
|
||||
_removedItems.insert(_removedItems.end(), changes._removedItems.begin(), changes._removedItems.end());
|
||||
|
@ -71,7 +71,7 @@ void Scene::enqueuePendingChanges(const PendingChanges& pendingChanges) {
|
|||
|
||||
void consolidateChangeQueue(PendingChangesQueue& queue, PendingChanges& singleBatch) {
|
||||
while (!queue.empty()) {
|
||||
auto pendingChanges = queue.front();
|
||||
const auto& pendingChanges = queue.front();
|
||||
singleBatch.merge(pendingChanges);
|
||||
queue.pop();
|
||||
};
|
||||
|
|
|
@ -34,7 +34,7 @@ public:
|
|||
void updateItem(ItemID id, const UpdateFunctorPointer& functor);
|
||||
void updateItem(ItemID id) { updateItem(id, nullptr); }
|
||||
|
||||
void merge(PendingChanges& changes);
|
||||
void merge(const PendingChanges& changes);
|
||||
|
||||
ItemIDs _resetItems;
|
||||
Payloads _resetPayloads;
|
||||
|
|
|
@ -575,18 +575,18 @@ void AABox::transform(const Transform& transform) {
|
|||
|
||||
// Logic based on http://clb.demon.fi/MathGeoLib/nightly/docs/AABB.cpp_code.html#471
|
||||
void AABox::transform(const glm::mat4& matrix) {
|
||||
// FIXME use simd operations
|
||||
auto halfSize = _scale * 0.5f;
|
||||
auto center = _corner + halfSize;
|
||||
halfSize = abs(halfSize);
|
||||
auto newCenter = transformPoint(matrix, center);
|
||||
|
||||
auto mm = glm::transpose(glm::mat3(matrix));
|
||||
vec3 newDir = vec3(
|
||||
glm::dot(glm::abs(vec3(mm[0])), halfSize),
|
||||
glm::dot(glm::abs(vec3(mm[1])), halfSize),
|
||||
glm::dot(glm::abs(vec3(mm[2])), halfSize)
|
||||
glm::dot(glm::abs(mm[0]), halfSize),
|
||||
glm::dot(glm::abs(mm[1]), halfSize),
|
||||
glm::dot(glm::abs(mm[2]), halfSize)
|
||||
);
|
||||
|
||||
auto newCenter = transformPoint(matrix, center);
|
||||
_corner = newCenter - newDir;
|
||||
_scale = newDir * 2.0f;
|
||||
}
|
||||
|
|
|
@ -502,7 +502,10 @@ glm::mat4 cancelOutRollAndPitch(const glm::mat4& m) {
|
|||
|
||||
glm::vec3 transformPoint(const glm::mat4& m, const glm::vec3& p) {
|
||||
glm::vec4 temp = m * glm::vec4(p, 1.0f);
|
||||
return glm::vec3(temp.x / temp.w, temp.y / temp.w, temp.z / temp.w);
|
||||
if (temp.w != 1.0f) {
|
||||
temp *= (1.0f / temp.w);
|
||||
}
|
||||
return glm::vec3(temp);
|
||||
}
|
||||
|
||||
// does not handle non-uniform scale correctly, but it's faster then transformVectorFull
|
||||
|
|
|
@ -15,6 +15,8 @@
|
|||
#include <StreamUtils.h>
|
||||
|
||||
#include <../QTestExtensions.h>
|
||||
#include <glm/gtc/matrix_transform.hpp>
|
||||
#include <glm/simd/matrix.h>
|
||||
|
||||
|
||||
QTEST_MAIN(GLMHelpersTests)
|
||||
|
@ -102,3 +104,39 @@ void GLMHelpersTests::testSixByteOrientationCompression() {
|
|||
testQuatCompression(-(ROT_Y_180 * ROT_Z_30 * ROT_X_90));
|
||||
testQuatCompression(-(ROT_Z_30 * ROT_X_90 * ROT_Y_180));
|
||||
}
|
||||
|
||||
#define LOOPS 500000
|
||||
|
||||
void GLMHelpersTests::testSimd() {
|
||||
glm::mat4 a = glm::translate(glm::mat4(), vec3(1, 4, 9));
|
||||
glm::mat4 b = glm::rotate(glm::mat4(), PI / 3, vec3(0, 1, 0));
|
||||
glm::mat4 a1, b1;
|
||||
glm::mat4 a2, b2;
|
||||
|
||||
a1 = a * b;
|
||||
b1 = b * a;
|
||||
glm_mat4_mul((glm_vec4*)&a, (glm_vec4*)&b, (glm_vec4*)&a2);
|
||||
glm_mat4_mul((glm_vec4*)&b, (glm_vec4*)&a, (glm_vec4*)&b2);
|
||||
|
||||
|
||||
{
|
||||
QElapsedTimer timer;
|
||||
timer.start();
|
||||
for (size_t i = 0; i < LOOPS; ++i) {
|
||||
a1 = a * b;
|
||||
b1 = b * a;
|
||||
}
|
||||
qDebug() << "Native " << timer.elapsed();
|
||||
}
|
||||
|
||||
{
|
||||
QElapsedTimer timer;
|
||||
timer.start();
|
||||
for (size_t i = 0; i < LOOPS; ++i) {
|
||||
glm_mat4_mul((glm_vec4*)&a, (glm_vec4*)&b, (glm_vec4*)&a2);
|
||||
glm_mat4_mul((glm_vec4*)&b, (glm_vec4*)&a, (glm_vec4*)&b2);
|
||||
}
|
||||
qDebug() << "SIMD " << timer.elapsed();
|
||||
}
|
||||
qDebug() << "Done ";
|
||||
}
|
||||
|
|
|
@ -20,6 +20,7 @@ class GLMHelpersTests : public QObject {
|
|||
private slots:
|
||||
void testEulerDecomposition();
|
||||
void testSixByteOrientationCompression();
|
||||
void testSimd();
|
||||
};
|
||||
|
||||
float getErrorDifference(const float& a, const float& b);
|
||||
|
|
Loading…
Reference in a new issue