From 5f08ed5027a8904242a47b17e994ab4f309e3930 Mon Sep 17 00:00:00 2001 From: SamGondelman Date: Thu, 30 Aug 2018 15:13:52 -0700 Subject: [PATCH] misc perf improvements --- .../src/avatars/AvatarMixerSlave.cpp | 6 +- interface/src/avatar/AvatarManager.cpp | 23 +++----- interface/src/avatar/OtherAvatar.cpp | 9 ++- .../src/controllers/UserInputMapper.cpp | 6 +- .../src/EntityTreeRenderer.cpp | 9 ++- .../graphics/src/graphics/BufferViewHelpers.h | 5 +- libraries/shared/src/AABox.cpp | 56 ++++--------------- libraries/shared/src/AABox.h | 17 +++++- libraries/shared/src/GLMHelpers.h | 8 +++ libraries/shared/src/PrioritySortUtil.h | 15 +++-- tests/shared/src/AACubeTests.cpp | 2 +- tests/shared/src/GLMHelpersTests.cpp | 36 ++++++++++++ tests/shared/src/GLMHelpersTests.h | 1 + 13 files changed, 108 insertions(+), 85 deletions(-) diff --git a/assignment-client/src/avatars/AvatarMixerSlave.cpp b/assignment-client/src/avatars/AvatarMixerSlave.cpp index f347ff1f10..59c6db5dc4 100644 --- a/assignment-client/src/avatars/AvatarMixerSlave.cpp +++ b/assignment-client/src/avatars/AvatarMixerSlave.cpp @@ -429,9 +429,9 @@ void AvatarMixerSlave::broadcastAvatarDataToAgent(const SharedNodePointer& node) int remainingAvatars = (int)sortedAvatars.size(); auto traitsPacketList = NLPacketList::create(PacketType::BulkAvatarTraits, QByteArray(), true, true); - while (!sortedAvatars.empty()) { - const auto avatarData = sortedAvatars.top().getAvatar(); - sortedAvatars.pop(); + const auto& sortedAvatarVector = sortedAvatars.getSortedVector(); + for (const auto& sortedAvatar : sortedAvatarVector) { + const auto& avatarData = sortedAvatar.getAvatar(); remainingAvatars--; auto otherNode = avatarDataToNodes[avatarData]; diff --git a/interface/src/avatar/AvatarManager.cpp b/interface/src/avatar/AvatarManager.cpp index 9a7d8ef0c8..af9d9ad6b1 100644 --- a/interface/src/avatar/AvatarManager.cpp +++ b/interface/src/avatar/AvatarManager.cpp @@ -206,6 +206,7 @@ void AvatarManager::updateOtherAvatars(float deltaTime) { } ++itr; } + const auto& sortedAvatarVector = sortedAvatars.getSortedVector(); // process in sorted order uint64_t startTime = usecTimestampNow(); @@ -216,8 +217,8 @@ void AvatarManager::updateOtherAvatars(float deltaTime) { render::Transaction renderTransaction; workload::Transaction workloadTransaction; - while (!sortedAvatars.empty()) { - const SortableAvatar& sortData = sortedAvatars.top(); + for (auto it = sortedAvatarVector.begin(); it != sortedAvatarVector.end(); ++it) { + const SortableAvatar& sortData = *it; const auto avatar = std::static_pointer_cast(sortData.getAvatar()); // TODO: to help us scale to more avatars it would be nice to not have to poll orb state here @@ -231,7 +232,6 @@ void AvatarManager::updateOtherAvatars(float deltaTime) { bool ignoring = DependencyManager::get()->isPersonalMutingNode(avatar->getID()); if (ignoring) { - sortedAvatars.pop(); continue; } @@ -260,26 +260,17 @@ void AvatarManager::updateOtherAvatars(float deltaTime) { // --> some avatar velocity measurements may be a little off // no time to simulate, but we take the time to count how many were tragically missed - bool inView = sortData.getPriority() > OUT_OF_VIEW_THRESHOLD; - if (!inView) { - break; - } - if (inView && avatar->hasNewJointData()) { - numAVatarsNotUpdated++; - } - sortedAvatars.pop(); - while (inView && !sortedAvatars.empty()) { - const SortableAvatar& newSortData = sortedAvatars.top(); + while (it != sortedAvatarVector.end()) { + const SortableAvatar& newSortData = *it; const auto newAvatar = std::static_pointer_cast(newSortData.getAvatar()); - inView = newSortData.getPriority() > OUT_OF_VIEW_THRESHOLD; + bool inView = newSortData.getPriority() > OUT_OF_VIEW_THRESHOLD; if (inView && newAvatar->hasNewJointData()) { numAVatarsNotUpdated++; } - sortedAvatars.pop(); + ++it; } break; } - sortedAvatars.pop(); } if (_shouldRender) { diff --git a/interface/src/avatar/OtherAvatar.cpp b/interface/src/avatar/OtherAvatar.cpp index 29ad5aed91..a0fa496c4c 100644 --- a/interface/src/avatar/OtherAvatar.cpp +++ b/interface/src/avatar/OtherAvatar.cpp @@ -29,20 +29,23 @@ OtherAvatar::~OtherAvatar() { } void OtherAvatar::removeOrb() { - if (qApp->getOverlays().isAddedOverlay(_otherAvatarOrbMeshPlaceholderID)) { + if (!_otherAvatarOrbMeshPlaceholderID.isNull()) { qApp->getOverlays().deleteOverlay(_otherAvatarOrbMeshPlaceholderID); + _otherAvatarOrbMeshPlaceholderID = UNKNOWN_OVERLAY_ID; } } void OtherAvatar::updateOrbPosition() { if (_otherAvatarOrbMeshPlaceholder != nullptr) { _otherAvatarOrbMeshPlaceholder->setWorldPosition(getHead()->getPosition()); + if (_otherAvatarOrbMeshPlaceholderID.isNull()) { + _otherAvatarOrbMeshPlaceholderID = qApp->getOverlays().addOverlay(_otherAvatarOrbMeshPlaceholder); + } } } void OtherAvatar::createOrb() { - if (_otherAvatarOrbMeshPlaceholderID == UNKNOWN_OVERLAY_ID || - !qApp->getOverlays().isAddedOverlay(_otherAvatarOrbMeshPlaceholderID)) { + if (_otherAvatarOrbMeshPlaceholderID.isNull()) { _otherAvatarOrbMeshPlaceholder = std::make_shared(); _otherAvatarOrbMeshPlaceholder->setAlpha(1.0f); _otherAvatarOrbMeshPlaceholder->setColor({ 0xFF, 0x00, 0xFF }); diff --git a/libraries/controllers/src/controllers/UserInputMapper.cpp b/libraries/controllers/src/controllers/UserInputMapper.cpp index 371deec7d5..307064c073 100755 --- a/libraries/controllers/src/controllers/UserInputMapper.cpp +++ b/libraries/controllers/src/controllers/UserInputMapper.cpp @@ -527,8 +527,8 @@ bool UserInputMapper::applyRoute(const Route::Pointer& route, bool force) { } // If the source hasn't been written yet, defer processing of this route - auto source = route->source; - auto sourceInput = source->getInput(); + auto& source = route->source; + auto& sourceInput = source->getInput(); if (sourceInput.device == STANDARD_DEVICE && !force && source->writeable()) { if (debugRoutes && route->debug) { qCDebug(controllers) << "Source not yet written, deferring"; @@ -559,7 +559,7 @@ bool UserInputMapper::applyRoute(const Route::Pointer& route, bool force) { return true; } - auto destination = route->destination; + auto& destination = route->destination; // THis could happen if the route destination failed to create // FIXME: Maybe do not create the route if the destination failed and avoid this case ? if (!destination) { diff --git a/libraries/entities-renderer/src/EntityTreeRenderer.cpp b/libraries/entities-renderer/src/EntityTreeRenderer.cpp index c3c4095251..a363093083 100644 --- a/libraries/entities-renderer/src/EntityTreeRenderer.cpp +++ b/libraries/entities-renderer/src/EntityTreeRenderer.cpp @@ -405,11 +405,14 @@ void EntityTreeRenderer::updateChangedEntities(const render::ScenePointer& scene // process the sorted renderables size_t numSorted = sortedRenderables.size(); - while (!sortedRenderables.empty() && usecTimestampNow() < expiry) { - const auto renderable = sortedRenderables.top().getRenderer(); + const auto& sortedRenderablesVector = sortedRenderables.getSortedVector(); + for (const auto& sortedRenderable : sortedRenderablesVector) { + if (usecTimestampNow() > expiry) { + break; + } + const auto& renderable = sortedRenderable.getRenderer(); renderable->updateInScene(scene, transaction); _renderablesToUpdate.erase(renderable->getEntity()->getID()); - sortedRenderables.pop(); } // compute average per-renderable update cost diff --git a/libraries/graphics/src/graphics/BufferViewHelpers.h b/libraries/graphics/src/graphics/BufferViewHelpers.h index a9707c3128..7c37c75163 100644 --- a/libraries/graphics/src/graphics/BufferViewHelpers.h +++ b/libraries/graphics/src/graphics/BufferViewHelpers.h @@ -13,6 +13,7 @@ #include #include "GpuHelpers.h" +#include "GLMHelpers.h" namespace graphics { class Mesh; @@ -55,8 +56,8 @@ namespace buffer_helpers { tangent = glm::clamp(tangent, -1.0f, 1.0f); normal *= 511.0f; tangent *= 511.0f; - normal = glm::round(normal); - tangent = glm::round(tangent); + normal = fastRoundf(normal); + tangent = fastRoundf(tangent); glm::detail::i10i10i10i2 normalStruct; glm::detail::i10i10i10i2 tangentStruct; diff --git a/libraries/shared/src/AABox.cpp b/libraries/shared/src/AABox.cpp index b4384c494f..e537c3e56a 100644 --- a/libraries/shared/src/AABox.cpp +++ b/libraries/shared/src/AABox.cpp @@ -79,33 +79,23 @@ void AABox::setBox(const glm::vec3& corner, const glm::vec3& scale) { glm::vec3 AABox::getFarthestVertex(const glm::vec3& normal) const { glm::vec3 result = _corner; - if (normal.x > 0.0f) { - result.x += _scale.x; - } - if (normal.y > 0.0f) { - result.y += _scale.y; - } - if (normal.z > 0.0f) { - result.z += _scale.z; - } + float blend = (float)(normal.x > 0.0f); + result.x += blend * _scale.x + (1.0f - blend) * 0.0f; + blend = (float)(normal.y > 0.0f); + result.y += blend * _scale.y + (1.0f - blend) * 0.0f; + blend = (float)(normal.z > 0.0f); + result.z += blend * _scale.z + (1.0f - blend) * 0.0f; return result; } glm::vec3 AABox::getNearestVertex(const glm::vec3& normal) const { glm::vec3 result = _corner; - - if (normal.x < 0.0f) { - result.x += _scale.x; - } - - if (normal.y < 0.0f) { - result.y += _scale.y; - } - - if (normal.z < 0.0f) { - result.z += _scale.z; - } - + float blend = (float)(normal.x < 0.0f); + result.x += blend * _scale.x + (1.0f - blend) * 0.0f; + blend = (float)(normal.y < 0.0f); + result.y += blend * _scale.y + (1.0f - blend) * 0.0f; + blend = (float)(normal.z < 0.0f); + result.z += blend * _scale.z + (1.0f - blend) * 0.0f; return result; } @@ -459,28 +449,6 @@ AABox AABox::clamp(float min, float max) const { return AABox(clampedCorner, clampedScale); } -AABox& AABox::operator += (const glm::vec3& point) { - - if (isInvalid()) { - _corner = glm::min(_corner, point); - } else { - glm::vec3 maximum(_corner + _scale); - _corner = glm::min(_corner, point); - maximum = glm::max(maximum, point); - _scale = maximum - _corner; - } - - return (*this); -} - -AABox& AABox::operator += (const AABox& box) { - if (!box.isInvalid()) { - (*this) += box._corner; - (*this) += box.calcTopFarLeft(); - } - return (*this); -} - void AABox::embiggen(float scale) { _corner += scale * (-0.5f * _scale); _scale *= scale; diff --git a/libraries/shared/src/AABox.h b/libraries/shared/src/AABox.h index daad01d7c7..a56615c40e 100644 --- a/libraries/shared/src/AABox.h +++ b/libraries/shared/src/AABox.h @@ -85,8 +85,21 @@ public: AABox clamp(const glm::vec3& min, const glm::vec3& max) const; AABox clamp(float min, float max) const; - AABox& operator += (const glm::vec3& point); - AABox& operator += (const AABox& box); + inline AABox& operator+=(const glm::vec3& point) { + float blend = (float)isInvalid(); + glm::vec3 maximumScale(glm::max(_scale, point - _corner)); + _corner = glm::min(_corner, point); + _scale = blend * _scale + (1.0f - blend) * maximumScale; + return (*this); + } + + inline AABox& operator+=(const AABox& box) { + if (!box.isInvalid()) { + (*this) += box._corner; + (*this) += box.calcTopFarLeft(); + } + return (*this); + } // Translate the AABox just moving the corner void translate(const glm::vec3& translation) { _corner += translation; } diff --git a/libraries/shared/src/GLMHelpers.h b/libraries/shared/src/GLMHelpers.h index 7e6ef4cb28..619f8172d5 100644 --- a/libraries/shared/src/GLMHelpers.h +++ b/libraries/shared/src/GLMHelpers.h @@ -316,4 +316,12 @@ inline void glm_mat4u_mul(const glm::mat4& m1, const glm::mat4& m2, glm::mat4& r #endif } +inline glm::vec3 fastRoundf(const glm::vec3& vec) { +#if GLM_ARCH & GLM_ARCH_SSE2_BIT + return glm::vec3(_mm_cvt_ss2si(_mm_set_ss(vec.x)), _mm_cvt_ss2si(_mm_set_ss(vec.y)), _mm_cvt_ss2si(_mm_set_ss(vec.z))); +#else + return glm::round(vec); +#endif +} + #endif // hifi_GLMHelpers_h diff --git a/libraries/shared/src/PrioritySortUtil.h b/libraries/shared/src/PrioritySortUtil.h index 34ec074d45..e0137b3d8c 100644 --- a/libraries/shared/src/PrioritySortUtil.h +++ b/libraries/shared/src/PrioritySortUtil.h @@ -12,7 +12,6 @@ #define hifi_PrioritySortUtil_h #include -#include #include "NumericalConstants.h" #include "shared/ConicalViewFrustum.h" @@ -75,7 +74,6 @@ namespace PrioritySortUtil { void setPriority(float priority) { _priority = priority; } float getPriority() const { return _priority; } - bool operator<(const Sortable& other) const { return _priority < other._priority; } private: float _priority { 0.0f }; }; @@ -97,14 +95,15 @@ namespace PrioritySortUtil { _ageWeight = ageWeight; } - size_t size() const { return _queue.size(); } + size_t size() const { return _vector.size(); } void push(T thing) { thing.setPriority(computePriority(thing)); - _queue.push(thing); + _vector.push_back(thing); + } + const std::vector& getSortedVector() { + std::sort(_vector.begin(), _vector.end(), [](const T& left, const T& right) { return left.getPriority() > right.getPriority(); }); + return _vector; } - const T& top() const { return _queue.top(); } - void pop() { return _queue.pop(); } - bool empty() const { return _queue.empty(); } private: @@ -153,7 +152,7 @@ namespace PrioritySortUtil { } ConicalViewFrustums _views; - std::priority_queue _queue; + std::vector _vector; float _angularWeight { DEFAULT_ANGULAR_COEF }; float _centerWeight { DEFAULT_CENTER_COEF }; float _ageWeight { DEFAULT_AGE_COEF }; diff --git a/tests/shared/src/AACubeTests.cpp b/tests/shared/src/AACubeTests.cpp index 95a4d7f9f0..c3c8e3e6f7 100644 --- a/tests/shared/src/AACubeTests.cpp +++ b/tests/shared/src/AACubeTests.cpp @@ -173,7 +173,7 @@ void AACubeTests::rayVsParabolaPerformance() { glm::vec3 normal; auto start = std::chrono::high_resolution_clock::now(); for (auto& cube : cubes) { - if (cube.findRayIntersection(origin, direction, distance, face, normal)) { + if (cube.findRayIntersection(origin, direction, 1.0f / direction, distance, face, normal)) { numRayHits++; } } diff --git a/tests/shared/src/GLMHelpersTests.cpp b/tests/shared/src/GLMHelpersTests.cpp index 93c4735a6d..669bbb8e43 100644 --- a/tests/shared/src/GLMHelpersTests.cpp +++ b/tests/shared/src/GLMHelpersTests.cpp @@ -214,3 +214,39 @@ void GLMHelpersTests::testGenerateBasisVectors() { QCOMPARE_WITH_ABS_ERROR(w, z, EPSILON); } } + +void GLMHelpersTests::roundPerf() { + const int NUM_VECS = 1000000; + const float MAX_VEC = 500.0f; + std::vector vecs; + vecs.reserve(NUM_VECS); + for (int i = 0; i < NUM_VECS; i++) { + vecs.emplace_back(randFloatInRange(-MAX_VEC, MAX_VEC), randFloatInRange(-MAX_VEC, MAX_VEC), randFloatInRange(-MAX_VEC, MAX_VEC)); + } + std::vector vecs2 = vecs; + std::vector originalVecs = vecs; + + auto start = std::chrono::high_resolution_clock::now(); + for (auto& vec : vecs) { + vec = glm::round(vec); + } + + auto glmTime = std::chrono::high_resolution_clock::now() - start; + start = std::chrono::high_resolution_clock::now(); + for (auto& vec : vecs2) { + vec = fastRoundf(vec); + } + auto manualTime = std::chrono::high_resolution_clock::now() - start; + + bool identical = true; + for (int i = 0; i < vecs.size(); i++) { + identical &= vecs[i] == vecs2[i]; + if (vecs[i] != vecs2[i]) { + qDebug() << "glm: " << vecs[i].x << vecs[i].y << vecs[i].z << ", manual: " << vecs2[i].x << vecs2[i].y << vecs2[i].z; + qDebug() << "original: " << originalVecs[i].x << originalVecs[i].y << originalVecs[i].z; + break; + } + } + + qDebug() << "ratio: " << (float)glmTime.count() / (float)manualTime.count() << ", identical: " << identical; +} \ No newline at end of file diff --git a/tests/shared/src/GLMHelpersTests.h b/tests/shared/src/GLMHelpersTests.h index 030f2d477f..4d9bd0bb60 100644 --- a/tests/shared/src/GLMHelpersTests.h +++ b/tests/shared/src/GLMHelpersTests.h @@ -22,6 +22,7 @@ private slots: void testSixByteOrientationCompression(); void testSimd(); void testGenerateBasisVectors(); + void roundPerf(); }; float getErrorDifference(const float& a, const float& b);