From 5f08ed5027a8904242a47b17e994ab4f309e3930 Mon Sep 17 00:00:00 2001 From: SamGondelman Date: Thu, 30 Aug 2018 15:13:52 -0700 Subject: [PATCH 1/4] misc perf improvements --- .../src/avatars/AvatarMixerSlave.cpp | 6 +- interface/src/avatar/AvatarManager.cpp | 23 +++----- interface/src/avatar/OtherAvatar.cpp | 9 ++- .../src/controllers/UserInputMapper.cpp | 6 +- .../src/EntityTreeRenderer.cpp | 9 ++- .../graphics/src/graphics/BufferViewHelpers.h | 5 +- libraries/shared/src/AABox.cpp | 56 ++++--------------- libraries/shared/src/AABox.h | 17 +++++- libraries/shared/src/GLMHelpers.h | 8 +++ libraries/shared/src/PrioritySortUtil.h | 15 +++-- tests/shared/src/AACubeTests.cpp | 2 +- tests/shared/src/GLMHelpersTests.cpp | 36 ++++++++++++ tests/shared/src/GLMHelpersTests.h | 1 + 13 files changed, 108 insertions(+), 85 deletions(-) diff --git a/assignment-client/src/avatars/AvatarMixerSlave.cpp b/assignment-client/src/avatars/AvatarMixerSlave.cpp index f347ff1f10..59c6db5dc4 100644 --- a/assignment-client/src/avatars/AvatarMixerSlave.cpp +++ b/assignment-client/src/avatars/AvatarMixerSlave.cpp @@ -429,9 +429,9 @@ void AvatarMixerSlave::broadcastAvatarDataToAgent(const SharedNodePointer& node) int remainingAvatars = (int)sortedAvatars.size(); auto traitsPacketList = NLPacketList::create(PacketType::BulkAvatarTraits, QByteArray(), true, true); - while (!sortedAvatars.empty()) { - const auto avatarData = sortedAvatars.top().getAvatar(); - sortedAvatars.pop(); + const auto& sortedAvatarVector = sortedAvatars.getSortedVector(); + for (const auto& sortedAvatar : sortedAvatarVector) { + const auto& avatarData = sortedAvatar.getAvatar(); remainingAvatars--; auto otherNode = avatarDataToNodes[avatarData]; diff --git a/interface/src/avatar/AvatarManager.cpp b/interface/src/avatar/AvatarManager.cpp index 9a7d8ef0c8..af9d9ad6b1 100644 --- a/interface/src/avatar/AvatarManager.cpp +++ b/interface/src/avatar/AvatarManager.cpp @@ -206,6 +206,7 @@ void AvatarManager::updateOtherAvatars(float deltaTime) { } ++itr; } + const auto& sortedAvatarVector = sortedAvatars.getSortedVector(); // process in sorted order uint64_t startTime = usecTimestampNow(); @@ -216,8 +217,8 @@ void AvatarManager::updateOtherAvatars(float deltaTime) { render::Transaction renderTransaction; workload::Transaction workloadTransaction; - while (!sortedAvatars.empty()) { - const SortableAvatar& sortData = sortedAvatars.top(); + for (auto it = sortedAvatarVector.begin(); it != sortedAvatarVector.end(); ++it) { + const SortableAvatar& sortData = *it; const auto avatar = std::static_pointer_cast(sortData.getAvatar()); // TODO: to help us scale to more avatars it would be nice to not have to poll orb state here @@ -231,7 +232,6 @@ void AvatarManager::updateOtherAvatars(float deltaTime) { bool ignoring = DependencyManager::get()->isPersonalMutingNode(avatar->getID()); if (ignoring) { - sortedAvatars.pop(); continue; } @@ -260,26 +260,17 @@ void AvatarManager::updateOtherAvatars(float deltaTime) { // --> some avatar velocity measurements may be a little off // no time to simulate, but we take the time to count how many were tragically missed - bool inView = sortData.getPriority() > OUT_OF_VIEW_THRESHOLD; - if (!inView) { - break; - } - if (inView && avatar->hasNewJointData()) { - numAVatarsNotUpdated++; - } - sortedAvatars.pop(); - while (inView && !sortedAvatars.empty()) { - const SortableAvatar& newSortData = sortedAvatars.top(); + while (it != sortedAvatarVector.end()) { + const SortableAvatar& newSortData = *it; const auto newAvatar = std::static_pointer_cast(newSortData.getAvatar()); - inView = newSortData.getPriority() > OUT_OF_VIEW_THRESHOLD; + bool inView = newSortData.getPriority() > OUT_OF_VIEW_THRESHOLD; if (inView && newAvatar->hasNewJointData()) { numAVatarsNotUpdated++; } - sortedAvatars.pop(); + ++it; } break; } - sortedAvatars.pop(); } if (_shouldRender) { diff --git a/interface/src/avatar/OtherAvatar.cpp b/interface/src/avatar/OtherAvatar.cpp index 29ad5aed91..a0fa496c4c 100644 --- a/interface/src/avatar/OtherAvatar.cpp +++ b/interface/src/avatar/OtherAvatar.cpp @@ -29,20 +29,23 @@ OtherAvatar::~OtherAvatar() { } void OtherAvatar::removeOrb() { - if (qApp->getOverlays().isAddedOverlay(_otherAvatarOrbMeshPlaceholderID)) { + if (!_otherAvatarOrbMeshPlaceholderID.isNull()) { qApp->getOverlays().deleteOverlay(_otherAvatarOrbMeshPlaceholderID); + _otherAvatarOrbMeshPlaceholderID = UNKNOWN_OVERLAY_ID; } } void OtherAvatar::updateOrbPosition() { if (_otherAvatarOrbMeshPlaceholder != nullptr) { _otherAvatarOrbMeshPlaceholder->setWorldPosition(getHead()->getPosition()); + if (_otherAvatarOrbMeshPlaceholderID.isNull()) { + _otherAvatarOrbMeshPlaceholderID = qApp->getOverlays().addOverlay(_otherAvatarOrbMeshPlaceholder); + } } } void OtherAvatar::createOrb() { - if (_otherAvatarOrbMeshPlaceholderID == UNKNOWN_OVERLAY_ID || - !qApp->getOverlays().isAddedOverlay(_otherAvatarOrbMeshPlaceholderID)) { + if (_otherAvatarOrbMeshPlaceholderID.isNull()) { _otherAvatarOrbMeshPlaceholder = std::make_shared(); _otherAvatarOrbMeshPlaceholder->setAlpha(1.0f); _otherAvatarOrbMeshPlaceholder->setColor({ 0xFF, 0x00, 0xFF }); diff --git a/libraries/controllers/src/controllers/UserInputMapper.cpp b/libraries/controllers/src/controllers/UserInputMapper.cpp index 371deec7d5..307064c073 100755 --- a/libraries/controllers/src/controllers/UserInputMapper.cpp +++ b/libraries/controllers/src/controllers/UserInputMapper.cpp @@ -527,8 +527,8 @@ bool UserInputMapper::applyRoute(const Route::Pointer& route, bool force) { } // If the source hasn't been written yet, defer processing of this route - auto source = route->source; - auto sourceInput = source->getInput(); + auto& source = route->source; + auto& sourceInput = source->getInput(); if (sourceInput.device == STANDARD_DEVICE && !force && source->writeable()) { if (debugRoutes && route->debug) { qCDebug(controllers) << "Source not yet written, deferring"; @@ -559,7 +559,7 @@ bool UserInputMapper::applyRoute(const Route::Pointer& route, bool force) { return true; } - auto destination = route->destination; + auto& destination = route->destination; // THis could happen if the route destination failed to create // FIXME: Maybe do not create the route if the destination failed and avoid this case ? if (!destination) { diff --git a/libraries/entities-renderer/src/EntityTreeRenderer.cpp b/libraries/entities-renderer/src/EntityTreeRenderer.cpp index c3c4095251..a363093083 100644 --- a/libraries/entities-renderer/src/EntityTreeRenderer.cpp +++ b/libraries/entities-renderer/src/EntityTreeRenderer.cpp @@ -405,11 +405,14 @@ void EntityTreeRenderer::updateChangedEntities(const render::ScenePointer& scene // process the sorted renderables size_t numSorted = sortedRenderables.size(); - while (!sortedRenderables.empty() && usecTimestampNow() < expiry) { - const auto renderable = sortedRenderables.top().getRenderer(); + const auto& sortedRenderablesVector = sortedRenderables.getSortedVector(); + for (const auto& sortedRenderable : sortedRenderablesVector) { + if (usecTimestampNow() > expiry) { + break; + } + const auto& renderable = sortedRenderable.getRenderer(); renderable->updateInScene(scene, transaction); _renderablesToUpdate.erase(renderable->getEntity()->getID()); - sortedRenderables.pop(); } // compute average per-renderable update cost diff --git a/libraries/graphics/src/graphics/BufferViewHelpers.h b/libraries/graphics/src/graphics/BufferViewHelpers.h index a9707c3128..7c37c75163 100644 --- a/libraries/graphics/src/graphics/BufferViewHelpers.h +++ b/libraries/graphics/src/graphics/BufferViewHelpers.h @@ -13,6 +13,7 @@ #include #include "GpuHelpers.h" +#include "GLMHelpers.h" namespace graphics { class Mesh; @@ -55,8 +56,8 @@ namespace buffer_helpers { tangent = glm::clamp(tangent, -1.0f, 1.0f); normal *= 511.0f; tangent *= 511.0f; - normal = glm::round(normal); - tangent = glm::round(tangent); + normal = fastRoundf(normal); + tangent = fastRoundf(tangent); glm::detail::i10i10i10i2 normalStruct; glm::detail::i10i10i10i2 tangentStruct; diff --git a/libraries/shared/src/AABox.cpp b/libraries/shared/src/AABox.cpp index b4384c494f..e537c3e56a 100644 --- a/libraries/shared/src/AABox.cpp +++ b/libraries/shared/src/AABox.cpp @@ -79,33 +79,23 @@ void AABox::setBox(const glm::vec3& corner, const glm::vec3& scale) { glm::vec3 AABox::getFarthestVertex(const glm::vec3& normal) const { glm::vec3 result = _corner; - if (normal.x > 0.0f) { - result.x += _scale.x; - } - if (normal.y > 0.0f) { - result.y += _scale.y; - } - if (normal.z > 0.0f) { - result.z += _scale.z; - } + float blend = (float)(normal.x > 0.0f); + result.x += blend * _scale.x + (1.0f - blend) * 0.0f; + blend = (float)(normal.y > 0.0f); + result.y += blend * _scale.y + (1.0f - blend) * 0.0f; + blend = (float)(normal.z > 0.0f); + result.z += blend * _scale.z + (1.0f - blend) * 0.0f; return result; } glm::vec3 AABox::getNearestVertex(const glm::vec3& normal) const { glm::vec3 result = _corner; - - if (normal.x < 0.0f) { - result.x += _scale.x; - } - - if (normal.y < 0.0f) { - result.y += _scale.y; - } - - if (normal.z < 0.0f) { - result.z += _scale.z; - } - + float blend = (float)(normal.x < 0.0f); + result.x += blend * _scale.x + (1.0f - blend) * 0.0f; + blend = (float)(normal.y < 0.0f); + result.y += blend * _scale.y + (1.0f - blend) * 0.0f; + blend = (float)(normal.z < 0.0f); + result.z += blend * _scale.z + (1.0f - blend) * 0.0f; return result; } @@ -459,28 +449,6 @@ AABox AABox::clamp(float min, float max) const { return AABox(clampedCorner, clampedScale); } -AABox& AABox::operator += (const glm::vec3& point) { - - if (isInvalid()) { - _corner = glm::min(_corner, point); - } else { - glm::vec3 maximum(_corner + _scale); - _corner = glm::min(_corner, point); - maximum = glm::max(maximum, point); - _scale = maximum - _corner; - } - - return (*this); -} - -AABox& AABox::operator += (const AABox& box) { - if (!box.isInvalid()) { - (*this) += box._corner; - (*this) += box.calcTopFarLeft(); - } - return (*this); -} - void AABox::embiggen(float scale) { _corner += scale * (-0.5f * _scale); _scale *= scale; diff --git a/libraries/shared/src/AABox.h b/libraries/shared/src/AABox.h index daad01d7c7..a56615c40e 100644 --- a/libraries/shared/src/AABox.h +++ b/libraries/shared/src/AABox.h @@ -85,8 +85,21 @@ public: AABox clamp(const glm::vec3& min, const glm::vec3& max) const; AABox clamp(float min, float max) const; - AABox& operator += (const glm::vec3& point); - AABox& operator += (const AABox& box); + inline AABox& operator+=(const glm::vec3& point) { + float blend = (float)isInvalid(); + glm::vec3 maximumScale(glm::max(_scale, point - _corner)); + _corner = glm::min(_corner, point); + _scale = blend * _scale + (1.0f - blend) * maximumScale; + return (*this); + } + + inline AABox& operator+=(const AABox& box) { + if (!box.isInvalid()) { + (*this) += box._corner; + (*this) += box.calcTopFarLeft(); + } + return (*this); + } // Translate the AABox just moving the corner void translate(const glm::vec3& translation) { _corner += translation; } diff --git a/libraries/shared/src/GLMHelpers.h b/libraries/shared/src/GLMHelpers.h index 7e6ef4cb28..619f8172d5 100644 --- a/libraries/shared/src/GLMHelpers.h +++ b/libraries/shared/src/GLMHelpers.h @@ -316,4 +316,12 @@ inline void glm_mat4u_mul(const glm::mat4& m1, const glm::mat4& m2, glm::mat4& r #endif } +inline glm::vec3 fastRoundf(const glm::vec3& vec) { +#if GLM_ARCH & GLM_ARCH_SSE2_BIT + return glm::vec3(_mm_cvt_ss2si(_mm_set_ss(vec.x)), _mm_cvt_ss2si(_mm_set_ss(vec.y)), _mm_cvt_ss2si(_mm_set_ss(vec.z))); +#else + return glm::round(vec); +#endif +} + #endif // hifi_GLMHelpers_h diff --git a/libraries/shared/src/PrioritySortUtil.h b/libraries/shared/src/PrioritySortUtil.h index 34ec074d45..e0137b3d8c 100644 --- a/libraries/shared/src/PrioritySortUtil.h +++ b/libraries/shared/src/PrioritySortUtil.h @@ -12,7 +12,6 @@ #define hifi_PrioritySortUtil_h #include -#include #include "NumericalConstants.h" #include "shared/ConicalViewFrustum.h" @@ -75,7 +74,6 @@ namespace PrioritySortUtil { void setPriority(float priority) { _priority = priority; } float getPriority() const { return _priority; } - bool operator<(const Sortable& other) const { return _priority < other._priority; } private: float _priority { 0.0f }; }; @@ -97,14 +95,15 @@ namespace PrioritySortUtil { _ageWeight = ageWeight; } - size_t size() const { return _queue.size(); } + size_t size() const { return _vector.size(); } void push(T thing) { thing.setPriority(computePriority(thing)); - _queue.push(thing); + _vector.push_back(thing); + } + const std::vector& getSortedVector() { + std::sort(_vector.begin(), _vector.end(), [](const T& left, const T& right) { return left.getPriority() > right.getPriority(); }); + return _vector; } - const T& top() const { return _queue.top(); } - void pop() { return _queue.pop(); } - bool empty() const { return _queue.empty(); } private: @@ -153,7 +152,7 @@ namespace PrioritySortUtil { } ConicalViewFrustums _views; - std::priority_queue _queue; + std::vector _vector; float _angularWeight { DEFAULT_ANGULAR_COEF }; float _centerWeight { DEFAULT_CENTER_COEF }; float _ageWeight { DEFAULT_AGE_COEF }; diff --git a/tests/shared/src/AACubeTests.cpp b/tests/shared/src/AACubeTests.cpp index 95a4d7f9f0..c3c8e3e6f7 100644 --- a/tests/shared/src/AACubeTests.cpp +++ b/tests/shared/src/AACubeTests.cpp @@ -173,7 +173,7 @@ void AACubeTests::rayVsParabolaPerformance() { glm::vec3 normal; auto start = std::chrono::high_resolution_clock::now(); for (auto& cube : cubes) { - if (cube.findRayIntersection(origin, direction, distance, face, normal)) { + if (cube.findRayIntersection(origin, direction, 1.0f / direction, distance, face, normal)) { numRayHits++; } } diff --git a/tests/shared/src/GLMHelpersTests.cpp b/tests/shared/src/GLMHelpersTests.cpp index 93c4735a6d..669bbb8e43 100644 --- a/tests/shared/src/GLMHelpersTests.cpp +++ b/tests/shared/src/GLMHelpersTests.cpp @@ -214,3 +214,39 @@ void GLMHelpersTests::testGenerateBasisVectors() { QCOMPARE_WITH_ABS_ERROR(w, z, EPSILON); } } + +void GLMHelpersTests::roundPerf() { + const int NUM_VECS = 1000000; + const float MAX_VEC = 500.0f; + std::vector vecs; + vecs.reserve(NUM_VECS); + for (int i = 0; i < NUM_VECS; i++) { + vecs.emplace_back(randFloatInRange(-MAX_VEC, MAX_VEC), randFloatInRange(-MAX_VEC, MAX_VEC), randFloatInRange(-MAX_VEC, MAX_VEC)); + } + std::vector vecs2 = vecs; + std::vector originalVecs = vecs; + + auto start = std::chrono::high_resolution_clock::now(); + for (auto& vec : vecs) { + vec = glm::round(vec); + } + + auto glmTime = std::chrono::high_resolution_clock::now() - start; + start = std::chrono::high_resolution_clock::now(); + for (auto& vec : vecs2) { + vec = fastRoundf(vec); + } + auto manualTime = std::chrono::high_resolution_clock::now() - start; + + bool identical = true; + for (int i = 0; i < vecs.size(); i++) { + identical &= vecs[i] == vecs2[i]; + if (vecs[i] != vecs2[i]) { + qDebug() << "glm: " << vecs[i].x << vecs[i].y << vecs[i].z << ", manual: " << vecs2[i].x << vecs2[i].y << vecs2[i].z; + qDebug() << "original: " << originalVecs[i].x << originalVecs[i].y << originalVecs[i].z; + break; + } + } + + qDebug() << "ratio: " << (float)glmTime.count() / (float)manualTime.count() << ", identical: " << identical; +} \ No newline at end of file diff --git a/tests/shared/src/GLMHelpersTests.h b/tests/shared/src/GLMHelpersTests.h index 030f2d477f..4d9bd0bb60 100644 --- a/tests/shared/src/GLMHelpersTests.h +++ b/tests/shared/src/GLMHelpersTests.h @@ -22,6 +22,7 @@ private slots: void testSixByteOrientationCompression(); void testSimd(); void testGenerateBasisVectors(); + void roundPerf(); }; float getErrorDifference(const float& a, const float& b); From d01b438cb320a7ccc0aa8a0085e86940bb73c09c Mon Sep 17 00:00:00 2001 From: SamGondelman Date: Fri, 31 Aug 2018 11:25:17 -0700 Subject: [PATCH 2/4] CR --- interface/src/avatar/AvatarManager.cpp | 6 ++++-- .../graphics/src/graphics/BufferViewHelpers.h | 14 ++++++------- libraries/shared/src/AABox.cpp | 20 +++++++++++++++++++ libraries/shared/src/AABox.h | 9 +++++++++ libraries/shared/src/GLMHelpers.h | 11 +++++++--- tests/shared/src/AACubeTests.cpp | 3 ++- tests/shared/src/GLMHelpersTests.cpp | 2 +- 7 files changed, 50 insertions(+), 15 deletions(-) diff --git a/interface/src/avatar/AvatarManager.cpp b/interface/src/avatar/AvatarManager.cpp index af9d9ad6b1..bd98549510 100644 --- a/interface/src/avatar/AvatarManager.cpp +++ b/interface/src/avatar/AvatarManager.cpp @@ -264,9 +264,11 @@ void AvatarManager::updateOtherAvatars(float deltaTime) { const SortableAvatar& newSortData = *it; const auto newAvatar = std::static_pointer_cast(newSortData.getAvatar()); bool inView = newSortData.getPriority() > OUT_OF_VIEW_THRESHOLD; - if (inView && newAvatar->hasNewJointData()) { - numAVatarsNotUpdated++; + // Once we reach an avatar that's not in view, all avatars after it will also be out of view + if (!inView) { + break; } + numAVatarsNotUpdated += (int)(newAvatar->hasNewJointData()); ++it; } break; diff --git a/libraries/graphics/src/graphics/BufferViewHelpers.h b/libraries/graphics/src/graphics/BufferViewHelpers.h index 7c37c75163..026e7b53a3 100644 --- a/libraries/graphics/src/graphics/BufferViewHelpers.h +++ b/libraries/graphics/src/graphics/BufferViewHelpers.h @@ -56,18 +56,16 @@ namespace buffer_helpers { tangent = glm::clamp(tangent, -1.0f, 1.0f); normal *= 511.0f; tangent *= 511.0f; - normal = fastRoundf(normal); - tangent = fastRoundf(tangent); glm::detail::i10i10i10i2 normalStruct; glm::detail::i10i10i10i2 tangentStruct; - normalStruct.data.x = int(normal.x); - normalStruct.data.y = int(normal.y); - normalStruct.data.z = int(normal.z); + normalStruct.data.x = fastLrintf(normal.x); + normalStruct.data.y = fastLrintf(normal.y); + normalStruct.data.z = fastLrintf(normal.z); normalStruct.data.w = 0; - tangentStruct.data.x = int(tangent.x); - tangentStruct.data.y = int(tangent.y); - tangentStruct.data.z = int(tangent.z); + tangentStruct.data.x = fastLrintf(tangent.x); + tangentStruct.data.y = fastLrintf(tangent.y); + tangentStruct.data.z = fastLrintf(tangent.z); tangentStruct.data.w = 0; packedNormal = normalStruct.pack; packedTangent = tangentStruct.pack; diff --git a/libraries/shared/src/AABox.cpp b/libraries/shared/src/AABox.cpp index e537c3e56a..ff6c2a4e6e 100644 --- a/libraries/shared/src/AABox.cpp +++ b/libraries/shared/src/AABox.cpp @@ -79,6 +79,16 @@ void AABox::setBox(const glm::vec3& corner, const glm::vec3& scale) { glm::vec3 AABox::getFarthestVertex(const glm::vec3& normal) const { glm::vec3 result = _corner; + // This is a branchless version of: + //if (normal.x > 0.0f) { + // result.x += _scale.x; + //} + //if (normal.y > 0.0f) { + // result.y += _scale.y; + //} + //if (normal.z > 0.0f) { + // result.z += _scale.z; + //} float blend = (float)(normal.x > 0.0f); result.x += blend * _scale.x + (1.0f - blend) * 0.0f; blend = (float)(normal.y > 0.0f); @@ -90,6 +100,16 @@ glm::vec3 AABox::getFarthestVertex(const glm::vec3& normal) const { glm::vec3 AABox::getNearestVertex(const glm::vec3& normal) const { glm::vec3 result = _corner; + // This is a branchless version of: + //if (normal.x < 0.0f) { + // result.x += _scale.x; + //} + //if (normal.y < 0.0f) { + // result.y += _scale.y; + //} + //if (normal.z < 0.0f) { + // result.z += _scale.z; + //} float blend = (float)(normal.x < 0.0f); result.x += blend * _scale.x + (1.0f - blend) * 0.0f; blend = (float)(normal.y < 0.0f); diff --git a/libraries/shared/src/AABox.h b/libraries/shared/src/AABox.h index a56615c40e..e0bb1343f8 100644 --- a/libraries/shared/src/AABox.h +++ b/libraries/shared/src/AABox.h @@ -86,6 +86,15 @@ public: AABox clamp(float min, float max) const; inline AABox& operator+=(const glm::vec3& point) { + // Branchless version of: + //if (isInvalid()) { + // _corner = glm::min(_corner, point); + //} else { + // glm::vec3 maximum(_corner + _scale); + // _corner = glm::min(_corner, point); + // maximum = glm::max(maximum, point); + // _scale = maximum - _corner; + //} float blend = (float)isInvalid(); glm::vec3 maximumScale(glm::max(_scale, point - _corner)); _corner = glm::min(_corner, point); diff --git a/libraries/shared/src/GLMHelpers.h b/libraries/shared/src/GLMHelpers.h index 619f8172d5..96219ea48c 100644 --- a/libraries/shared/src/GLMHelpers.h +++ b/libraries/shared/src/GLMHelpers.h @@ -316,11 +316,16 @@ inline void glm_mat4u_mul(const glm::mat4& m1, const glm::mat4& m2, glm::mat4& r #endif } -inline glm::vec3 fastRoundf(const glm::vec3& vec) { +// convert float to int, using round-to-nearest-even (undefined on overflow) +inline int fastLrintf(float x) { #if GLM_ARCH & GLM_ARCH_SSE2_BIT - return glm::vec3(_mm_cvt_ss2si(_mm_set_ss(vec.x)), _mm_cvt_ss2si(_mm_set_ss(vec.y)), _mm_cvt_ss2si(_mm_set_ss(vec.z))); + return _mm_cvt_ss2si(_mm_set_ss(x)); #else - return glm::round(vec); + // return lrintf(x); + static_assert(std::numeric_limits::is_iec559, "Requires IEEE-754 double precision format"); + union { double d; int64_t i; } bits = { (double)x }; + bits.d += (3ULL << 51); + return (int)bits.i; #endif } diff --git a/tests/shared/src/AACubeTests.cpp b/tests/shared/src/AACubeTests.cpp index c3c8e3e6f7..4ed3ee2813 100644 --- a/tests/shared/src/AACubeTests.cpp +++ b/tests/shared/src/AACubeTests.cpp @@ -168,12 +168,13 @@ void AACubeTests::rayVsParabolaPerformance() { glm::vec3 origin(0.0f); glm::vec3 direction = glm::normalize(glm::vec3(1.0f)); + glm::vec3 invDirection = 1.0f / direction; float distance; BoxFace face; glm::vec3 normal; auto start = std::chrono::high_resolution_clock::now(); for (auto& cube : cubes) { - if (cube.findRayIntersection(origin, direction, 1.0f / direction, distance, face, normal)) { + if (cube.findRayIntersection(origin, direction, invDirection, distance, face, normal)) { numRayHits++; } } diff --git a/tests/shared/src/GLMHelpersTests.cpp b/tests/shared/src/GLMHelpersTests.cpp index 669bbb8e43..71877e89f6 100644 --- a/tests/shared/src/GLMHelpersTests.cpp +++ b/tests/shared/src/GLMHelpersTests.cpp @@ -234,7 +234,7 @@ void GLMHelpersTests::roundPerf() { auto glmTime = std::chrono::high_resolution_clock::now() - start; start = std::chrono::high_resolution_clock::now(); for (auto& vec : vecs2) { - vec = fastRoundf(vec); + vec = glm::vec3(fastLrintf(vec.x), fastLrintf(vec.y), fastLrintf(vec.z)); } auto manualTime = std::chrono::high_resolution_clock::now() - start; From ade58a52cc97fe06c2f4bb88c4b9d097636fd4b9 Mon Sep 17 00:00:00 2001 From: SamGondelman Date: Tue, 4 Sep 2018 10:33:01 -0700 Subject: [PATCH 3/4] reserve priority sort util vectors --- assignment-client/src/avatars/AvatarMixerSlave.cpp | 1 + interface/src/avatar/AvatarManager.cpp | 5 +++-- libraries/entities-renderer/src/EntityTreeRenderer.cpp | 1 + libraries/shared/src/PrioritySortUtil.h | 3 +++ 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/assignment-client/src/avatars/AvatarMixerSlave.cpp b/assignment-client/src/avatars/AvatarMixerSlave.cpp index 59c6db5dc4..c434d82116 100644 --- a/assignment-client/src/avatars/AvatarMixerSlave.cpp +++ b/assignment-client/src/avatars/AvatarMixerSlave.cpp @@ -329,6 +329,7 @@ void AvatarMixerSlave::broadcastAvatarDataToAgent(const SharedNodePointer& node) AvatarData::_avatarSortCoefficientSize, AvatarData::_avatarSortCoefficientCenter, AvatarData::_avatarSortCoefficientAge); + sortedAvatars.reserve(avatarsToSort.size()); // ignore or sort const AvatarSharedPointer& thisAvatar = nodeData->getAvatarSharedPointer(); diff --git a/interface/src/avatar/AvatarManager.cpp b/interface/src/avatar/AvatarManager.cpp index bd98549510..e9486b9def 100644 --- a/interface/src/avatar/AvatarManager.cpp +++ b/interface/src/avatar/AvatarManager.cpp @@ -187,16 +187,17 @@ void AvatarManager::updateOtherAvatars(float deltaTime) { AvatarSharedPointer _avatar; }; + auto avatarMap = getHashCopy(); + AvatarHash::iterator itr = avatarMap.begin(); const auto& views = qApp->getConicalViews(); PrioritySortUtil::PriorityQueue sortedAvatars(views, AvatarData::_avatarSortCoefficientSize, AvatarData::_avatarSortCoefficientCenter, AvatarData::_avatarSortCoefficientAge); + sortedAvatars.reserve(avatarMap.size() - 1); // don't include MyAvatar // sort - auto avatarMap = getHashCopy(); - AvatarHash::iterator itr = avatarMap.begin(); while (itr != avatarMap.end()) { const auto& avatar = std::static_pointer_cast(*itr); // DO NOT update _myAvatar! Its update has already been done earlier in the main loop. diff --git a/libraries/entities-renderer/src/EntityTreeRenderer.cpp b/libraries/entities-renderer/src/EntityTreeRenderer.cpp index a363093083..3d782f69a7 100644 --- a/libraries/entities-renderer/src/EntityTreeRenderer.cpp +++ b/libraries/entities-renderer/src/EntityTreeRenderer.cpp @@ -382,6 +382,7 @@ void EntityTreeRenderer::updateChangedEntities(const render::ScenePointer& scene const auto& views = _viewState->getConicalViews(); PrioritySortUtil::PriorityQueue sortedRenderables(views); + sortedRenderables.reserve(_renderablesToUpdate.size()); { PROFILE_RANGE_EX(simulation_physics, "SortRenderables", 0xffff00ff, (uint64_t)_renderablesToUpdate.size()); std::unordered_map::iterator itr = _renderablesToUpdate.begin(); diff --git a/libraries/shared/src/PrioritySortUtil.h b/libraries/shared/src/PrioritySortUtil.h index e0137b3d8c..8ded047212 100644 --- a/libraries/shared/src/PrioritySortUtil.h +++ b/libraries/shared/src/PrioritySortUtil.h @@ -100,6 +100,9 @@ namespace PrioritySortUtil { thing.setPriority(computePriority(thing)); _vector.push_back(thing); } + void reserve(size_t num) { + _vector.reserve(num); + } const std::vector& getSortedVector() { std::sort(_vector.begin(), _vector.end(), [](const T& left, const T& right) { return left.getPriority() > right.getPriority(); }); return _vector; From 9e76c164ac76fcc6dfadb05b79da96a862188d97 Mon Sep 17 00:00:00 2001 From: SamGondelman Date: Tue, 4 Sep 2018 16:25:47 -0700 Subject: [PATCH 4/4] fix aabox operator+= --- libraries/shared/src/AABox.h | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/libraries/shared/src/AABox.h b/libraries/shared/src/AABox.h index e0bb1343f8..f41bb8a814 100644 --- a/libraries/shared/src/AABox.h +++ b/libraries/shared/src/AABox.h @@ -86,19 +86,12 @@ public: AABox clamp(float min, float max) const; inline AABox& operator+=(const glm::vec3& point) { - // Branchless version of: - //if (isInvalid()) { - // _corner = glm::min(_corner, point); - //} else { - // glm::vec3 maximum(_corner + _scale); - // _corner = glm::min(_corner, point); - // maximum = glm::max(maximum, point); - // _scale = maximum - _corner; - //} - float blend = (float)isInvalid(); - glm::vec3 maximumScale(glm::max(_scale, point - _corner)); + bool valid = !isInvalid(); + glm::vec3 maximum = glm::max(_corner + _scale, point); _corner = glm::min(_corner, point); - _scale = blend * _scale + (1.0f - blend) * maximumScale; + if (valid) { + _scale = maximum - _corner; + } return (*this); } @@ -136,7 +129,7 @@ public: static const glm::vec3 INFINITY_VECTOR; - bool isInvalid() const { return _corner == INFINITY_VECTOR; } + bool isInvalid() const { return _corner.x == std::numeric_limits::infinity(); } void clear() { _corner = INFINITY_VECTOR; _scale = glm::vec3(0.0f); }