diff --git a/assignment-client/src/avatars/AvatarMixerSlave.cpp b/assignment-client/src/avatars/AvatarMixerSlave.cpp index f347ff1f10..c434d82116 100644 --- a/assignment-client/src/avatars/AvatarMixerSlave.cpp +++ b/assignment-client/src/avatars/AvatarMixerSlave.cpp @@ -329,6 +329,7 @@ void AvatarMixerSlave::broadcastAvatarDataToAgent(const SharedNodePointer& node) AvatarData::_avatarSortCoefficientSize, AvatarData::_avatarSortCoefficientCenter, AvatarData::_avatarSortCoefficientAge); + sortedAvatars.reserve(avatarsToSort.size()); // ignore or sort const AvatarSharedPointer& thisAvatar = nodeData->getAvatarSharedPointer(); @@ -429,9 +430,9 @@ void AvatarMixerSlave::broadcastAvatarDataToAgent(const SharedNodePointer& node) int remainingAvatars = (int)sortedAvatars.size(); auto traitsPacketList = NLPacketList::create(PacketType::BulkAvatarTraits, QByteArray(), true, true); - while (!sortedAvatars.empty()) { - const auto avatarData = sortedAvatars.top().getAvatar(); - sortedAvatars.pop(); + const auto& sortedAvatarVector = sortedAvatars.getSortedVector(); + for (const auto& sortedAvatar : sortedAvatarVector) { + const auto& avatarData = sortedAvatar.getAvatar(); remainingAvatars--; auto otherNode = avatarDataToNodes[avatarData]; diff --git a/interface/src/avatar/AvatarManager.cpp b/interface/src/avatar/AvatarManager.cpp index 9a7d8ef0c8..e9486b9def 100644 --- a/interface/src/avatar/AvatarManager.cpp +++ b/interface/src/avatar/AvatarManager.cpp @@ -187,16 +187,17 @@ void AvatarManager::updateOtherAvatars(float deltaTime) { AvatarSharedPointer _avatar; }; + auto avatarMap = getHashCopy(); + AvatarHash::iterator itr = avatarMap.begin(); const auto& views = qApp->getConicalViews(); PrioritySortUtil::PriorityQueue sortedAvatars(views, AvatarData::_avatarSortCoefficientSize, AvatarData::_avatarSortCoefficientCenter, AvatarData::_avatarSortCoefficientAge); + sortedAvatars.reserve(avatarMap.size() - 1); // don't include MyAvatar // sort - auto avatarMap = getHashCopy(); - AvatarHash::iterator itr = avatarMap.begin(); while (itr != avatarMap.end()) { const auto& avatar = std::static_pointer_cast(*itr); // DO NOT update _myAvatar! Its update has already been done earlier in the main loop. @@ -206,6 +207,7 @@ void AvatarManager::updateOtherAvatars(float deltaTime) { } ++itr; } + const auto& sortedAvatarVector = sortedAvatars.getSortedVector(); // process in sorted order uint64_t startTime = usecTimestampNow(); @@ -216,8 +218,8 @@ void AvatarManager::updateOtherAvatars(float deltaTime) { render::Transaction renderTransaction; workload::Transaction workloadTransaction; - while (!sortedAvatars.empty()) { - const SortableAvatar& sortData = sortedAvatars.top(); + for (auto it = sortedAvatarVector.begin(); it != sortedAvatarVector.end(); ++it) { + const SortableAvatar& sortData = *it; const auto avatar = std::static_pointer_cast(sortData.getAvatar()); // TODO: to help us scale to more avatars it would be nice to not have to poll orb state here @@ -231,7 +233,6 @@ void AvatarManager::updateOtherAvatars(float deltaTime) { bool ignoring = DependencyManager::get()->isPersonalMutingNode(avatar->getID()); if (ignoring) { - sortedAvatars.pop(); continue; } @@ -260,26 +261,19 @@ void AvatarManager::updateOtherAvatars(float deltaTime) { // --> some avatar velocity measurements may be a little off // no time to simulate, but we take the time to count how many were tragically missed - bool inView = sortData.getPriority() > OUT_OF_VIEW_THRESHOLD; - if (!inView) { - break; - } - if (inView && avatar->hasNewJointData()) { - numAVatarsNotUpdated++; - } - sortedAvatars.pop(); - while (inView && !sortedAvatars.empty()) { - const SortableAvatar& newSortData = sortedAvatars.top(); + while (it != sortedAvatarVector.end()) { + const SortableAvatar& newSortData = *it; const auto newAvatar = std::static_pointer_cast(newSortData.getAvatar()); - inView = newSortData.getPriority() > OUT_OF_VIEW_THRESHOLD; - if (inView && newAvatar->hasNewJointData()) { - numAVatarsNotUpdated++; + bool inView = newSortData.getPriority() > OUT_OF_VIEW_THRESHOLD; + // Once we reach an avatar that's not in view, all avatars after it will also be out of view + if (!inView) { + break; } - sortedAvatars.pop(); + numAVatarsNotUpdated += (int)(newAvatar->hasNewJointData()); + ++it; } break; } - sortedAvatars.pop(); } if (_shouldRender) { diff --git a/interface/src/avatar/OtherAvatar.cpp b/interface/src/avatar/OtherAvatar.cpp index 29ad5aed91..a0fa496c4c 100644 --- a/interface/src/avatar/OtherAvatar.cpp +++ b/interface/src/avatar/OtherAvatar.cpp @@ -29,20 +29,23 @@ OtherAvatar::~OtherAvatar() { } void OtherAvatar::removeOrb() { - if (qApp->getOverlays().isAddedOverlay(_otherAvatarOrbMeshPlaceholderID)) { + if (!_otherAvatarOrbMeshPlaceholderID.isNull()) { qApp->getOverlays().deleteOverlay(_otherAvatarOrbMeshPlaceholderID); + _otherAvatarOrbMeshPlaceholderID = UNKNOWN_OVERLAY_ID; } } void OtherAvatar::updateOrbPosition() { if (_otherAvatarOrbMeshPlaceholder != nullptr) { _otherAvatarOrbMeshPlaceholder->setWorldPosition(getHead()->getPosition()); + if (_otherAvatarOrbMeshPlaceholderID.isNull()) { + _otherAvatarOrbMeshPlaceholderID = qApp->getOverlays().addOverlay(_otherAvatarOrbMeshPlaceholder); + } } } void OtherAvatar::createOrb() { - if (_otherAvatarOrbMeshPlaceholderID == UNKNOWN_OVERLAY_ID || - !qApp->getOverlays().isAddedOverlay(_otherAvatarOrbMeshPlaceholderID)) { + if (_otherAvatarOrbMeshPlaceholderID.isNull()) { _otherAvatarOrbMeshPlaceholder = std::make_shared(); _otherAvatarOrbMeshPlaceholder->setAlpha(1.0f); _otherAvatarOrbMeshPlaceholder->setColor({ 0xFF, 0x00, 0xFF }); diff --git a/libraries/controllers/src/controllers/UserInputMapper.cpp b/libraries/controllers/src/controllers/UserInputMapper.cpp index 371deec7d5..307064c073 100755 --- a/libraries/controllers/src/controllers/UserInputMapper.cpp +++ b/libraries/controllers/src/controllers/UserInputMapper.cpp @@ -527,8 +527,8 @@ bool UserInputMapper::applyRoute(const Route::Pointer& route, bool force) { } // If the source hasn't been written yet, defer processing of this route - auto source = route->source; - auto sourceInput = source->getInput(); + auto& source = route->source; + auto& sourceInput = source->getInput(); if (sourceInput.device == STANDARD_DEVICE && !force && source->writeable()) { if (debugRoutes && route->debug) { qCDebug(controllers) << "Source not yet written, deferring"; @@ -559,7 +559,7 @@ bool UserInputMapper::applyRoute(const Route::Pointer& route, bool force) { return true; } - auto destination = route->destination; + auto& destination = route->destination; // THis could happen if the route destination failed to create // FIXME: Maybe do not create the route if the destination failed and avoid this case ? if (!destination) { diff --git a/libraries/entities-renderer/src/EntityTreeRenderer.cpp b/libraries/entities-renderer/src/EntityTreeRenderer.cpp index c3c4095251..3d782f69a7 100644 --- a/libraries/entities-renderer/src/EntityTreeRenderer.cpp +++ b/libraries/entities-renderer/src/EntityTreeRenderer.cpp @@ -382,6 +382,7 @@ void EntityTreeRenderer::updateChangedEntities(const render::ScenePointer& scene const auto& views = _viewState->getConicalViews(); PrioritySortUtil::PriorityQueue sortedRenderables(views); + sortedRenderables.reserve(_renderablesToUpdate.size()); { PROFILE_RANGE_EX(simulation_physics, "SortRenderables", 0xffff00ff, (uint64_t)_renderablesToUpdate.size()); std::unordered_map::iterator itr = _renderablesToUpdate.begin(); @@ -405,11 +406,14 @@ void EntityTreeRenderer::updateChangedEntities(const render::ScenePointer& scene // process the sorted renderables size_t numSorted = sortedRenderables.size(); - while (!sortedRenderables.empty() && usecTimestampNow() < expiry) { - const auto renderable = sortedRenderables.top().getRenderer(); + const auto& sortedRenderablesVector = sortedRenderables.getSortedVector(); + for (const auto& sortedRenderable : sortedRenderablesVector) { + if (usecTimestampNow() > expiry) { + break; + } + const auto& renderable = sortedRenderable.getRenderer(); renderable->updateInScene(scene, transaction); _renderablesToUpdate.erase(renderable->getEntity()->getID()); - sortedRenderables.pop(); } // compute average per-renderable update cost diff --git a/libraries/graphics/src/graphics/BufferViewHelpers.h b/libraries/graphics/src/graphics/BufferViewHelpers.h index a9707c3128..026e7b53a3 100644 --- a/libraries/graphics/src/graphics/BufferViewHelpers.h +++ b/libraries/graphics/src/graphics/BufferViewHelpers.h @@ -13,6 +13,7 @@ #include #include "GpuHelpers.h" +#include "GLMHelpers.h" namespace graphics { class Mesh; @@ -55,18 +56,16 @@ namespace buffer_helpers { tangent = glm::clamp(tangent, -1.0f, 1.0f); normal *= 511.0f; tangent *= 511.0f; - normal = glm::round(normal); - tangent = glm::round(tangent); glm::detail::i10i10i10i2 normalStruct; glm::detail::i10i10i10i2 tangentStruct; - normalStruct.data.x = int(normal.x); - normalStruct.data.y = int(normal.y); - normalStruct.data.z = int(normal.z); + normalStruct.data.x = fastLrintf(normal.x); + normalStruct.data.y = fastLrintf(normal.y); + normalStruct.data.z = fastLrintf(normal.z); normalStruct.data.w = 0; - tangentStruct.data.x = int(tangent.x); - tangentStruct.data.y = int(tangent.y); - tangentStruct.data.z = int(tangent.z); + tangentStruct.data.x = fastLrintf(tangent.x); + tangentStruct.data.y = fastLrintf(tangent.y); + tangentStruct.data.z = fastLrintf(tangent.z); tangentStruct.data.w = 0; packedNormal = normalStruct.pack; packedTangent = tangentStruct.pack; diff --git a/libraries/shared/src/AABox.cpp b/libraries/shared/src/AABox.cpp index b4384c494f..ff6c2a4e6e 100644 --- a/libraries/shared/src/AABox.cpp +++ b/libraries/shared/src/AABox.cpp @@ -79,33 +79,43 @@ void AABox::setBox(const glm::vec3& corner, const glm::vec3& scale) { glm::vec3 AABox::getFarthestVertex(const glm::vec3& normal) const { glm::vec3 result = _corner; - if (normal.x > 0.0f) { - result.x += _scale.x; - } - if (normal.y > 0.0f) { - result.y += _scale.y; - } - if (normal.z > 0.0f) { - result.z += _scale.z; - } + // This is a branchless version of: + //if (normal.x > 0.0f) { + // result.x += _scale.x; + //} + //if (normal.y > 0.0f) { + // result.y += _scale.y; + //} + //if (normal.z > 0.0f) { + // result.z += _scale.z; + //} + float blend = (float)(normal.x > 0.0f); + result.x += blend * _scale.x + (1.0f - blend) * 0.0f; + blend = (float)(normal.y > 0.0f); + result.y += blend * _scale.y + (1.0f - blend) * 0.0f; + blend = (float)(normal.z > 0.0f); + result.z += blend * _scale.z + (1.0f - blend) * 0.0f; return result; } glm::vec3 AABox::getNearestVertex(const glm::vec3& normal) const { glm::vec3 result = _corner; - - if (normal.x < 0.0f) { - result.x += _scale.x; - } - - if (normal.y < 0.0f) { - result.y += _scale.y; - } - - if (normal.z < 0.0f) { - result.z += _scale.z; - } - + // This is a branchless version of: + //if (normal.x < 0.0f) { + // result.x += _scale.x; + //} + //if (normal.y < 0.0f) { + // result.y += _scale.y; + //} + //if (normal.z < 0.0f) { + // result.z += _scale.z; + //} + float blend = (float)(normal.x < 0.0f); + result.x += blend * _scale.x + (1.0f - blend) * 0.0f; + blend = (float)(normal.y < 0.0f); + result.y += blend * _scale.y + (1.0f - blend) * 0.0f; + blend = (float)(normal.z < 0.0f); + result.z += blend * _scale.z + (1.0f - blend) * 0.0f; return result; } @@ -459,28 +469,6 @@ AABox AABox::clamp(float min, float max) const { return AABox(clampedCorner, clampedScale); } -AABox& AABox::operator += (const glm::vec3& point) { - - if (isInvalid()) { - _corner = glm::min(_corner, point); - } else { - glm::vec3 maximum(_corner + _scale); - _corner = glm::min(_corner, point); - maximum = glm::max(maximum, point); - _scale = maximum - _corner; - } - - return (*this); -} - -AABox& AABox::operator += (const AABox& box) { - if (!box.isInvalid()) { - (*this) += box._corner; - (*this) += box.calcTopFarLeft(); - } - return (*this); -} - void AABox::embiggen(float scale) { _corner += scale * (-0.5f * _scale); _scale *= scale; diff --git a/libraries/shared/src/AABox.h b/libraries/shared/src/AABox.h index daad01d7c7..f41bb8a814 100644 --- a/libraries/shared/src/AABox.h +++ b/libraries/shared/src/AABox.h @@ -85,8 +85,23 @@ public: AABox clamp(const glm::vec3& min, const glm::vec3& max) const; AABox clamp(float min, float max) const; - AABox& operator += (const glm::vec3& point); - AABox& operator += (const AABox& box); + inline AABox& operator+=(const glm::vec3& point) { + bool valid = !isInvalid(); + glm::vec3 maximum = glm::max(_corner + _scale, point); + _corner = glm::min(_corner, point); + if (valid) { + _scale = maximum - _corner; + } + return (*this); + } + + inline AABox& operator+=(const AABox& box) { + if (!box.isInvalid()) { + (*this) += box._corner; + (*this) += box.calcTopFarLeft(); + } + return (*this); + } // Translate the AABox just moving the corner void translate(const glm::vec3& translation) { _corner += translation; } @@ -114,7 +129,7 @@ public: static const glm::vec3 INFINITY_VECTOR; - bool isInvalid() const { return _corner == INFINITY_VECTOR; } + bool isInvalid() const { return _corner.x == std::numeric_limits::infinity(); } void clear() { _corner = INFINITY_VECTOR; _scale = glm::vec3(0.0f); } diff --git a/libraries/shared/src/GLMHelpers.h b/libraries/shared/src/GLMHelpers.h index 7e6ef4cb28..96219ea48c 100644 --- a/libraries/shared/src/GLMHelpers.h +++ b/libraries/shared/src/GLMHelpers.h @@ -316,4 +316,17 @@ inline void glm_mat4u_mul(const glm::mat4& m1, const glm::mat4& m2, glm::mat4& r #endif } +// convert float to int, using round-to-nearest-even (undefined on overflow) +inline int fastLrintf(float x) { +#if GLM_ARCH & GLM_ARCH_SSE2_BIT + return _mm_cvt_ss2si(_mm_set_ss(x)); +#else + // return lrintf(x); + static_assert(std::numeric_limits::is_iec559, "Requires IEEE-754 double precision format"); + union { double d; int64_t i; } bits = { (double)x }; + bits.d += (3ULL << 51); + return (int)bits.i; +#endif +} + #endif // hifi_GLMHelpers_h diff --git a/libraries/shared/src/PrioritySortUtil.h b/libraries/shared/src/PrioritySortUtil.h index 34ec074d45..8ded047212 100644 --- a/libraries/shared/src/PrioritySortUtil.h +++ b/libraries/shared/src/PrioritySortUtil.h @@ -12,7 +12,6 @@ #define hifi_PrioritySortUtil_h #include -#include #include "NumericalConstants.h" #include "shared/ConicalViewFrustum.h" @@ -75,7 +74,6 @@ namespace PrioritySortUtil { void setPriority(float priority) { _priority = priority; } float getPriority() const { return _priority; } - bool operator<(const Sortable& other) const { return _priority < other._priority; } private: float _priority { 0.0f }; }; @@ -97,14 +95,18 @@ namespace PrioritySortUtil { _ageWeight = ageWeight; } - size_t size() const { return _queue.size(); } + size_t size() const { return _vector.size(); } void push(T thing) { thing.setPriority(computePriority(thing)); - _queue.push(thing); + _vector.push_back(thing); + } + void reserve(size_t num) { + _vector.reserve(num); + } + const std::vector& getSortedVector() { + std::sort(_vector.begin(), _vector.end(), [](const T& left, const T& right) { return left.getPriority() > right.getPriority(); }); + return _vector; } - const T& top() const { return _queue.top(); } - void pop() { return _queue.pop(); } - bool empty() const { return _queue.empty(); } private: @@ -153,7 +155,7 @@ namespace PrioritySortUtil { } ConicalViewFrustums _views; - std::priority_queue _queue; + std::vector _vector; float _angularWeight { DEFAULT_ANGULAR_COEF }; float _centerWeight { DEFAULT_CENTER_COEF }; float _ageWeight { DEFAULT_AGE_COEF }; diff --git a/tests/shared/src/AACubeTests.cpp b/tests/shared/src/AACubeTests.cpp index 95a4d7f9f0..4ed3ee2813 100644 --- a/tests/shared/src/AACubeTests.cpp +++ b/tests/shared/src/AACubeTests.cpp @@ -168,12 +168,13 @@ void AACubeTests::rayVsParabolaPerformance() { glm::vec3 origin(0.0f); glm::vec3 direction = glm::normalize(glm::vec3(1.0f)); + glm::vec3 invDirection = 1.0f / direction; float distance; BoxFace face; glm::vec3 normal; auto start = std::chrono::high_resolution_clock::now(); for (auto& cube : cubes) { - if (cube.findRayIntersection(origin, direction, distance, face, normal)) { + if (cube.findRayIntersection(origin, direction, invDirection, distance, face, normal)) { numRayHits++; } } diff --git a/tests/shared/src/GLMHelpersTests.cpp b/tests/shared/src/GLMHelpersTests.cpp index 93c4735a6d..71877e89f6 100644 --- a/tests/shared/src/GLMHelpersTests.cpp +++ b/tests/shared/src/GLMHelpersTests.cpp @@ -214,3 +214,39 @@ void GLMHelpersTests::testGenerateBasisVectors() { QCOMPARE_WITH_ABS_ERROR(w, z, EPSILON); } } + +void GLMHelpersTests::roundPerf() { + const int NUM_VECS = 1000000; + const float MAX_VEC = 500.0f; + std::vector vecs; + vecs.reserve(NUM_VECS); + for (int i = 0; i < NUM_VECS; i++) { + vecs.emplace_back(randFloatInRange(-MAX_VEC, MAX_VEC), randFloatInRange(-MAX_VEC, MAX_VEC), randFloatInRange(-MAX_VEC, MAX_VEC)); + } + std::vector vecs2 = vecs; + std::vector originalVecs = vecs; + + auto start = std::chrono::high_resolution_clock::now(); + for (auto& vec : vecs) { + vec = glm::round(vec); + } + + auto glmTime = std::chrono::high_resolution_clock::now() - start; + start = std::chrono::high_resolution_clock::now(); + for (auto& vec : vecs2) { + vec = glm::vec3(fastLrintf(vec.x), fastLrintf(vec.y), fastLrintf(vec.z)); + } + auto manualTime = std::chrono::high_resolution_clock::now() - start; + + bool identical = true; + for (int i = 0; i < vecs.size(); i++) { + identical &= vecs[i] == vecs2[i]; + if (vecs[i] != vecs2[i]) { + qDebug() << "glm: " << vecs[i].x << vecs[i].y << vecs[i].z << ", manual: " << vecs2[i].x << vecs2[i].y << vecs2[i].z; + qDebug() << "original: " << originalVecs[i].x << originalVecs[i].y << originalVecs[i].z; + break; + } + } + + qDebug() << "ratio: " << (float)glmTime.count() / (float)manualTime.count() << ", identical: " << identical; +} \ No newline at end of file diff --git a/tests/shared/src/GLMHelpersTests.h b/tests/shared/src/GLMHelpersTests.h index 030f2d477f..4d9bd0bb60 100644 --- a/tests/shared/src/GLMHelpersTests.h +++ b/tests/shared/src/GLMHelpersTests.h @@ -22,6 +22,7 @@ private slots: void testSixByteOrientationCompression(); void testSimd(); void testGenerateBasisVectors(); + void roundPerf(); }; float getErrorDifference(const float& a, const float& b);