misc perf improvements

This commit is contained in:
SamGondelman 2018-08-30 15:13:52 -07:00
parent 12ec56875e
commit 5f08ed5027
13 changed files with 108 additions and 85 deletions

View file

@ -429,9 +429,9 @@ void AvatarMixerSlave::broadcastAvatarDataToAgent(const SharedNodePointer& node)
int remainingAvatars = (int)sortedAvatars.size();
auto traitsPacketList = NLPacketList::create(PacketType::BulkAvatarTraits, QByteArray(), true, true);
while (!sortedAvatars.empty()) {
const auto avatarData = sortedAvatars.top().getAvatar();
sortedAvatars.pop();
const auto& sortedAvatarVector = sortedAvatars.getSortedVector();
for (const auto& sortedAvatar : sortedAvatarVector) {
const auto& avatarData = sortedAvatar.getAvatar();
remainingAvatars--;
auto otherNode = avatarDataToNodes[avatarData];

View file

@ -206,6 +206,7 @@ void AvatarManager::updateOtherAvatars(float deltaTime) {
}
++itr;
}
const auto& sortedAvatarVector = sortedAvatars.getSortedVector();
// process in sorted order
uint64_t startTime = usecTimestampNow();
@ -216,8 +217,8 @@ void AvatarManager::updateOtherAvatars(float deltaTime) {
render::Transaction renderTransaction;
workload::Transaction workloadTransaction;
while (!sortedAvatars.empty()) {
const SortableAvatar& sortData = sortedAvatars.top();
for (auto it = sortedAvatarVector.begin(); it != sortedAvatarVector.end(); ++it) {
const SortableAvatar& sortData = *it;
const auto avatar = std::static_pointer_cast<OtherAvatar>(sortData.getAvatar());
// TODO: to help us scale to more avatars it would be nice to not have to poll orb state here
@ -231,7 +232,6 @@ void AvatarManager::updateOtherAvatars(float deltaTime) {
bool ignoring = DependencyManager::get<NodeList>()->isPersonalMutingNode(avatar->getID());
if (ignoring) {
sortedAvatars.pop();
continue;
}
@ -260,26 +260,17 @@ void AvatarManager::updateOtherAvatars(float deltaTime) {
// --> some avatar velocity measurements may be a little off
// no time to simulate, but we take the time to count how many were tragically missed
bool inView = sortData.getPriority() > OUT_OF_VIEW_THRESHOLD;
if (!inView) {
break;
}
if (inView && avatar->hasNewJointData()) {
numAVatarsNotUpdated++;
}
sortedAvatars.pop();
while (inView && !sortedAvatars.empty()) {
const SortableAvatar& newSortData = sortedAvatars.top();
while (it != sortedAvatarVector.end()) {
const SortableAvatar& newSortData = *it;
const auto newAvatar = std::static_pointer_cast<Avatar>(newSortData.getAvatar());
inView = newSortData.getPriority() > OUT_OF_VIEW_THRESHOLD;
bool inView = newSortData.getPriority() > OUT_OF_VIEW_THRESHOLD;
if (inView && newAvatar->hasNewJointData()) {
numAVatarsNotUpdated++;
}
sortedAvatars.pop();
++it;
}
break;
}
sortedAvatars.pop();
}
if (_shouldRender) {

View file

@ -29,20 +29,23 @@ OtherAvatar::~OtherAvatar() {
}
void OtherAvatar::removeOrb() {
if (qApp->getOverlays().isAddedOverlay(_otherAvatarOrbMeshPlaceholderID)) {
if (!_otherAvatarOrbMeshPlaceholderID.isNull()) {
qApp->getOverlays().deleteOverlay(_otherAvatarOrbMeshPlaceholderID);
_otherAvatarOrbMeshPlaceholderID = UNKNOWN_OVERLAY_ID;
}
}
void OtherAvatar::updateOrbPosition() {
if (_otherAvatarOrbMeshPlaceholder != nullptr) {
_otherAvatarOrbMeshPlaceholder->setWorldPosition(getHead()->getPosition());
if (_otherAvatarOrbMeshPlaceholderID.isNull()) {
_otherAvatarOrbMeshPlaceholderID = qApp->getOverlays().addOverlay(_otherAvatarOrbMeshPlaceholder);
}
}
}
void OtherAvatar::createOrb() {
if (_otherAvatarOrbMeshPlaceholderID == UNKNOWN_OVERLAY_ID ||
!qApp->getOverlays().isAddedOverlay(_otherAvatarOrbMeshPlaceholderID)) {
if (_otherAvatarOrbMeshPlaceholderID.isNull()) {
_otherAvatarOrbMeshPlaceholder = std::make_shared<Sphere3DOverlay>();
_otherAvatarOrbMeshPlaceholder->setAlpha(1.0f);
_otherAvatarOrbMeshPlaceholder->setColor({ 0xFF, 0x00, 0xFF });

View file

@ -527,8 +527,8 @@ bool UserInputMapper::applyRoute(const Route::Pointer& route, bool force) {
}
// If the source hasn't been written yet, defer processing of this route
auto source = route->source;
auto sourceInput = source->getInput();
auto& source = route->source;
auto& sourceInput = source->getInput();
if (sourceInput.device == STANDARD_DEVICE && !force && source->writeable()) {
if (debugRoutes && route->debug) {
qCDebug(controllers) << "Source not yet written, deferring";
@ -559,7 +559,7 @@ bool UserInputMapper::applyRoute(const Route::Pointer& route, bool force) {
return true;
}
auto destination = route->destination;
auto& destination = route->destination;
// THis could happen if the route destination failed to create
// FIXME: Maybe do not create the route if the destination failed and avoid this case ?
if (!destination) {

View file

@ -405,11 +405,14 @@ void EntityTreeRenderer::updateChangedEntities(const render::ScenePointer& scene
// process the sorted renderables
size_t numSorted = sortedRenderables.size();
while (!sortedRenderables.empty() && usecTimestampNow() < expiry) {
const auto renderable = sortedRenderables.top().getRenderer();
const auto& sortedRenderablesVector = sortedRenderables.getSortedVector();
for (const auto& sortedRenderable : sortedRenderablesVector) {
if (usecTimestampNow() > expiry) {
break;
}
const auto& renderable = sortedRenderable.getRenderer();
renderable->updateInScene(scene, transaction);
_renderablesToUpdate.erase(renderable->getEntity()->getID());
sortedRenderables.pop();
}
// compute average per-renderable update cost

View file

@ -13,6 +13,7 @@
#include <glm/detail/type_vec.hpp>
#include "GpuHelpers.h"
#include "GLMHelpers.h"
namespace graphics {
class Mesh;
@ -55,8 +56,8 @@ namespace buffer_helpers {
tangent = glm::clamp(tangent, -1.0f, 1.0f);
normal *= 511.0f;
tangent *= 511.0f;
normal = glm::round(normal);
tangent = glm::round(tangent);
normal = fastRoundf(normal);
tangent = fastRoundf(tangent);
glm::detail::i10i10i10i2 normalStruct;
glm::detail::i10i10i10i2 tangentStruct;

View file

@ -79,33 +79,23 @@ void AABox::setBox(const glm::vec3& corner, const glm::vec3& scale) {
glm::vec3 AABox::getFarthestVertex(const glm::vec3& normal) const {
glm::vec3 result = _corner;
if (normal.x > 0.0f) {
result.x += _scale.x;
}
if (normal.y > 0.0f) {
result.y += _scale.y;
}
if (normal.z > 0.0f) {
result.z += _scale.z;
}
float blend = (float)(normal.x > 0.0f);
result.x += blend * _scale.x + (1.0f - blend) * 0.0f;
blend = (float)(normal.y > 0.0f);
result.y += blend * _scale.y + (1.0f - blend) * 0.0f;
blend = (float)(normal.z > 0.0f);
result.z += blend * _scale.z + (1.0f - blend) * 0.0f;
return result;
}
glm::vec3 AABox::getNearestVertex(const glm::vec3& normal) const {
glm::vec3 result = _corner;
if (normal.x < 0.0f) {
result.x += _scale.x;
}
if (normal.y < 0.0f) {
result.y += _scale.y;
}
if (normal.z < 0.0f) {
result.z += _scale.z;
}
float blend = (float)(normal.x < 0.0f);
result.x += blend * _scale.x + (1.0f - blend) * 0.0f;
blend = (float)(normal.y < 0.0f);
result.y += blend * _scale.y + (1.0f - blend) * 0.0f;
blend = (float)(normal.z < 0.0f);
result.z += blend * _scale.z + (1.0f - blend) * 0.0f;
return result;
}
@ -459,28 +449,6 @@ AABox AABox::clamp(float min, float max) const {
return AABox(clampedCorner, clampedScale);
}
AABox& AABox::operator += (const glm::vec3& point) {
if (isInvalid()) {
_corner = glm::min(_corner, point);
} else {
glm::vec3 maximum(_corner + _scale);
_corner = glm::min(_corner, point);
maximum = glm::max(maximum, point);
_scale = maximum - _corner;
}
return (*this);
}
AABox& AABox::operator += (const AABox& box) {
if (!box.isInvalid()) {
(*this) += box._corner;
(*this) += box.calcTopFarLeft();
}
return (*this);
}
void AABox::embiggen(float scale) {
_corner += scale * (-0.5f * _scale);
_scale *= scale;

View file

@ -85,8 +85,21 @@ public:
AABox clamp(const glm::vec3& min, const glm::vec3& max) const;
AABox clamp(float min, float max) const;
AABox& operator += (const glm::vec3& point);
AABox& operator += (const AABox& box);
inline AABox& operator+=(const glm::vec3& point) {
float blend = (float)isInvalid();
glm::vec3 maximumScale(glm::max(_scale, point - _corner));
_corner = glm::min(_corner, point);
_scale = blend * _scale + (1.0f - blend) * maximumScale;
return (*this);
}
inline AABox& operator+=(const AABox& box) {
if (!box.isInvalid()) {
(*this) += box._corner;
(*this) += box.calcTopFarLeft();
}
return (*this);
}
// Translate the AABox just moving the corner
void translate(const glm::vec3& translation) { _corner += translation; }

View file

@ -316,4 +316,12 @@ inline void glm_mat4u_mul(const glm::mat4& m1, const glm::mat4& m2, glm::mat4& r
#endif
}
inline glm::vec3 fastRoundf(const glm::vec3& vec) {
#if GLM_ARCH & GLM_ARCH_SSE2_BIT
return glm::vec3(_mm_cvt_ss2si(_mm_set_ss(vec.x)), _mm_cvt_ss2si(_mm_set_ss(vec.y)), _mm_cvt_ss2si(_mm_set_ss(vec.z)));
#else
return glm::round(vec);
#endif
}
#endif // hifi_GLMHelpers_h

View file

@ -12,7 +12,6 @@
#define hifi_PrioritySortUtil_h
#include <glm/glm.hpp>
#include <queue>
#include "NumericalConstants.h"
#include "shared/ConicalViewFrustum.h"
@ -75,7 +74,6 @@ namespace PrioritySortUtil {
void setPriority(float priority) { _priority = priority; }
float getPriority() const { return _priority; }
bool operator<(const Sortable& other) const { return _priority < other._priority; }
private:
float _priority { 0.0f };
};
@ -97,14 +95,15 @@ namespace PrioritySortUtil {
_ageWeight = ageWeight;
}
size_t size() const { return _queue.size(); }
size_t size() const { return _vector.size(); }
void push(T thing) {
thing.setPriority(computePriority(thing));
_queue.push(thing);
_vector.push_back(thing);
}
const std::vector<T>& getSortedVector() {
std::sort(_vector.begin(), _vector.end(), [](const T& left, const T& right) { return left.getPriority() > right.getPriority(); });
return _vector;
}
const T& top() const { return _queue.top(); }
void pop() { return _queue.pop(); }
bool empty() const { return _queue.empty(); }
private:
@ -153,7 +152,7 @@ namespace PrioritySortUtil {
}
ConicalViewFrustums _views;
std::priority_queue<T> _queue;
std::vector<T> _vector;
float _angularWeight { DEFAULT_ANGULAR_COEF };
float _centerWeight { DEFAULT_CENTER_COEF };
float _ageWeight { DEFAULT_AGE_COEF };

View file

@ -173,7 +173,7 @@ void AACubeTests::rayVsParabolaPerformance() {
glm::vec3 normal;
auto start = std::chrono::high_resolution_clock::now();
for (auto& cube : cubes) {
if (cube.findRayIntersection(origin, direction, distance, face, normal)) {
if (cube.findRayIntersection(origin, direction, 1.0f / direction, distance, face, normal)) {
numRayHits++;
}
}

View file

@ -214,3 +214,39 @@ void GLMHelpersTests::testGenerateBasisVectors() {
QCOMPARE_WITH_ABS_ERROR(w, z, EPSILON);
}
}
void GLMHelpersTests::roundPerf() {
const int NUM_VECS = 1000000;
const float MAX_VEC = 500.0f;
std::vector<glm::vec3> vecs;
vecs.reserve(NUM_VECS);
for (int i = 0; i < NUM_VECS; i++) {
vecs.emplace_back(randFloatInRange(-MAX_VEC, MAX_VEC), randFloatInRange(-MAX_VEC, MAX_VEC), randFloatInRange(-MAX_VEC, MAX_VEC));
}
std::vector<glm::vec3> vecs2 = vecs;
std::vector<glm::vec3> originalVecs = vecs;
auto start = std::chrono::high_resolution_clock::now();
for (auto& vec : vecs) {
vec = glm::round(vec);
}
auto glmTime = std::chrono::high_resolution_clock::now() - start;
start = std::chrono::high_resolution_clock::now();
for (auto& vec : vecs2) {
vec = fastRoundf(vec);
}
auto manualTime = std::chrono::high_resolution_clock::now() - start;
bool identical = true;
for (int i = 0; i < vecs.size(); i++) {
identical &= vecs[i] == vecs2[i];
if (vecs[i] != vecs2[i]) {
qDebug() << "glm: " << vecs[i].x << vecs[i].y << vecs[i].z << ", manual: " << vecs2[i].x << vecs2[i].y << vecs2[i].z;
qDebug() << "original: " << originalVecs[i].x << originalVecs[i].y << originalVecs[i].z;
break;
}
}
qDebug() << "ratio: " << (float)glmTime.count() / (float)manualTime.count() << ", identical: " << identical;
}

View file

@ -22,6 +22,7 @@ private slots:
void testSixByteOrientationCompression();
void testSimd();
void testGenerateBasisVectors();
void roundPerf();
};
float getErrorDifference(const float& a, const float& b);