From 0f6162d68389ec29d1c65c4f032938ffd7ecfdf8 Mon Sep 17 00:00:00 2001 From: Zach Pomerantz Date: Tue, 3 May 2016 15:32:08 -0700 Subject: [PATCH 1/4] Use appropriate units for timing tests --- interface/src/Util.cpp | 74 +++++++++++++---------- libraries/shared/src/NumericalConstants.h | 1 + 2 files changed, 43 insertions(+), 32 deletions(-) diff --git a/interface/src/Util.cpp b/interface/src/Util.cpp index 43c52916fe..073d0a18a8 100644 --- a/interface/src/Util.cpp +++ b/interface/src/Util.cpp @@ -136,66 +136,76 @@ void runTimingTests() { float fTest = 1.0; float* fResults = (float*)malloc(sizeof(float) * numTests); QElapsedTimer startTime; - startTime.start(); - float elapsedUsecs; + float elapsedNSecs; + float elapsedUSecs; - float NSEC_TO_USEC = 1.0f / 1000.0f; - elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC; - qCDebug(interfaceapp, "QElapsedTimer::nsecElapsed() usecs: %f", (double)elapsedUsecs); + startTime.start(); + elapsedNSecs = (float)startTime.nsecsElapsed(); + qCDebug(interfaceapp, "QElapsedTimer::nsecElapsed() ns: %f", (double)elapsedNSecs); // Test sleep functions for accuracy startTime.start(); QThread::msleep(1); - elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC; - qCDebug(interfaceapp, "QThread::msleep(1) ms: %f", (double)(elapsedUsecs / 1000.0f)); + elapsedNSecs = (float)startTime.nsecsElapsed(); + qCDebug(interfaceapp, "QThread::msleep(1) ms: %f", (double)(elapsedNSecs / NSECS_PER_MSEC)); startTime.start(); QThread::sleep(1); - elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC; - qCDebug(interfaceapp, "QThread::sleep(1) ms: %f", (double)(elapsedUsecs / 1000.0f)); + elapsedNSecs = (float)startTime.nsecsElapsed(); + qCDebug(interfaceapp, "QThread::sleep(1) s: %f", (double)(elapsedNSecs / NSECS_PER_MSEC / MSECS_PER_SECOND)); startTime.start(); usleep(1); - elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC; - qCDebug(interfaceapp, "usleep(1) ms: %f", (double)(elapsedUsecs / 1000.0f)); + elapsedNSecs = (float)startTime.nsecsElapsed(); + qCDebug(interfaceapp, "usleep(1) us: %f", (double)(elapsedNSecs / NSECS_PER_USEC)); startTime.start(); usleep(10); - elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC; - qCDebug(interfaceapp, "usleep(10) ms: %f", (double)(elapsedUsecs / 1000.0f)); + elapsedNSecs = (float)startTime.nsecsElapsed(); + qCDebug(interfaceapp, "usleep(10) us: %f", (double)(elapsedNSecs / NSECS_PER_USEC)); startTime.start(); usleep(100); - elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC; - qCDebug(interfaceapp, "usleep(100) ms: %f", (double)(elapsedUsecs / 1000.0f)); + elapsedNSecs = (float)startTime.nsecsElapsed(); + qCDebug(interfaceapp, "usleep(100) us: %f", (double)(elapsedNSecs / NSECS_PER_USEC)); startTime.start(); usleep(1000); - elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC; - qCDebug(interfaceapp, "usleep(1000) ms: %f", (double)(elapsedUsecs / 1000.0f)); + elapsedNSecs = (float)startTime.nsecsElapsed(); + qCDebug(interfaceapp, "usleep(1000) us: %f", (double)(elapsedNSecs / NSECS_PER_USEC)); + + startTime.start(); + usleep(1500); + elapsedNSecs = (float)startTime.nsecsElapsed(); + qCDebug(interfaceapp, "usleep(1500) us: %f", (double)(elapsedNSecs / NSECS_PER_USEC)); + + startTime.start(); + usleep(2500); + elapsedNSecs = (float)startTime.nsecsElapsed(); + qCDebug(interfaceapp, "usleep(2500) us: %f", (double)(elapsedNSecs / NSECS_PER_USEC)); startTime.start(); usleep(15000); - elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC; - qCDebug(interfaceapp, "usleep(15000) ms: %f", (double)(elapsedUsecs / 1000.0f)); + elapsedNSecs = (float)startTime.nsecsElapsed(); + qCDebug(interfaceapp, "usleep(15000) us: %f", (double)(elapsedNSecs / NSECS_PER_USEC)); // Random number generation startTime.start(); for (int i = 0; i < numTests; i++) { iResults[i] = rand(); } - elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC; + elapsedUSecs = (float)startTime.nsecsElapsed() / NSECS_PER_USEC; qCDebug(interfaceapp, "rand() stored in array usecs: %f, first result:%d", - (double)(elapsedUsecs / numTests), iResults[0]); + (double)(elapsedUSecs / numTests), iResults[0]); // Random number generation using randFloat() startTime.start(); for (int i = 0; i < numTests; i++) { fResults[i] = randFloat(); } - elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC; + elapsedUSecs = (float)startTime.nsecsElapsed() / NSECS_PER_USEC; qCDebug(interfaceapp, "randFloat() stored in array usecs: %f, first result: %f", - (double)(elapsedUsecs / numTests), (double)(fResults[0])); + (double)(elapsedUSecs / numTests), (double)(fResults[0])); free(iResults); free(fResults); @@ -206,8 +216,8 @@ void runTimingTests() { for (int i = 0; i < numTests; i++) { fTest = powf(fTest, 0.5f); } - elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC; - qCDebug(interfaceapp, "powf(f, 0.5) usecs: %f", (double)(elapsedUsecs / (float) numTests)); + elapsedUSecs = (float)startTime.nsecsElapsed() / NSECS_PER_USEC; + qCDebug(interfaceapp, "powf(f, 0.5) usecs: %f", (double)(elapsedUSecs / (float) numTests)); // Vector Math float distance; @@ -218,9 +228,9 @@ void runTimingTests() { //float distanceSquared = glm::dot(temp, temp); distance = glm::distance(pointA, pointB); } - elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC; + elapsedUSecs = (float)startTime.nsecsElapsed() / NSECS_PER_USEC; qCDebug(interfaceapp, "vector math usecs: %f [%f usecs total for %d tests], last result:%f", - (double)(elapsedUsecs / (float) numTests), (double)elapsedUsecs, numTests, (double)distance); + (double)(elapsedUSecs / (float) numTests), (double)elapsedUSecs, numTests, (double)distance); // Vec3 test glm::vec3 vecA(randVector()), vecB(randVector()); @@ -231,9 +241,9 @@ void runTimingTests() { glm::vec3 temp = vecA-vecB; result = glm::dot(temp,temp); } - elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC; + elapsedUSecs = (float)startTime.nsecsElapsed() / NSECS_PER_USEC; qCDebug(interfaceapp, "vec3 assign and dot() usecs: %f, last result:%f", - (double)(elapsedUsecs / numTests), (double)result); + (double)(elapsedUSecs / numTests), (double)result); quint64 BYTE_CODE_MAX_TEST_VALUE = 99999999; @@ -265,9 +275,9 @@ void runTimingTests() { } } - elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC; - qCDebug(interfaceapp) << "ByteCountCoded usecs: " << elapsedUsecs - << "per test:" << (double) (elapsedUsecs / tests) + elapsedUSecs = (float)startTime.nsecsElapsed() / NSECS_PER_USEC; + qCDebug(interfaceapp) << "ByteCountCoded usecs: " << elapsedUSecs + << "per test:" << (double) (elapsedUSecs / tests) << "tests:" << tests << "failed:" << failed; } diff --git a/libraries/shared/src/NumericalConstants.h b/libraries/shared/src/NumericalConstants.h index cf78f6dbf3..ca18d8ad5e 100644 --- a/libraries/shared/src/NumericalConstants.h +++ b/libraries/shared/src/NumericalConstants.h @@ -35,6 +35,7 @@ const float METERS_PER_CENTIMETER = 0.01f; const float METERS_PER_MILLIMETER = 0.001f; const float MILLIMETERS_PER_METER = 1000.0f; const quint64 NSECS_PER_USEC = 1000; +const quint64 NSECS_PER_MSEC = 1000000; const quint64 USECS_PER_MSEC = 1000; const quint64 MSECS_PER_SECOND = 1000; const quint64 USECS_PER_SECOND = USECS_PER_MSEC * MSECS_PER_SECOND; From 3619821b826115ae250d890bbc7b4da8ff28ed0e Mon Sep 17 00:00:00 2001 From: Zach Pomerantz Date: Wed, 4 May 2016 12:44:11 -0700 Subject: [PATCH 2/4] Improve timing tests with averages --- interface/src/Util.cpp | 71 +++++++++++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 25 deletions(-) diff --git a/interface/src/Util.cpp b/interface/src/Util.cpp index 073d0a18a8..00715bcd69 100644 --- a/interface/src/Util.cpp +++ b/interface/src/Util.cpp @@ -131,63 +131,84 @@ const glm::vec3 randVector() { // Do some basic timing tests and report the results void runTimingTests() { // How long does it take to make a call to get the time? - const int numTests = 1000000; - int* iResults = (int*)malloc(sizeof(int) * numTests); - float fTest = 1.0; - float* fResults = (float*)malloc(sizeof(float) * numTests); + const int numTimingTests = 3; QElapsedTimer startTime; float elapsedNSecs; float elapsedUSecs; + qCDebug(interfaceapp, "numTimingTests: %d", numTimingTests); + startTime.start(); elapsedNSecs = (float)startTime.nsecsElapsed(); - qCDebug(interfaceapp, "QElapsedTimer::nsecElapsed() ns: %f", (double)elapsedNSecs); + qCDebug(interfaceapp, "QElapsedTimer::nsecElapsed() ns: %f", (double)elapsedNSecs / numTimingTests); // Test sleep functions for accuracy startTime.start(); - QThread::msleep(1); + for (int i = 0; i < numTimingTests; i++) { + QThread::msleep(1); + } elapsedNSecs = (float)startTime.nsecsElapsed(); - qCDebug(interfaceapp, "QThread::msleep(1) ms: %f", (double)(elapsedNSecs / NSECS_PER_MSEC)); + qCDebug(interfaceapp, "QThread::msleep(1) ms: %f", (double)(elapsedNSecs / NSECS_PER_MSEC / numTimingTests)); startTime.start(); - QThread::sleep(1); + for (int i = 0; i < numTimingTests; i++) { + QThread::sleep(1); + } elapsedNSecs = (float)startTime.nsecsElapsed(); - qCDebug(interfaceapp, "QThread::sleep(1) s: %f", (double)(elapsedNSecs / NSECS_PER_MSEC / MSECS_PER_SECOND)); + qCDebug(interfaceapp, "QThread::sleep(1) s: %f", (double)(elapsedNSecs / NSECS_PER_MSEC / MSECS_PER_SECOND / numTimingTests)); + + const int numUsecTests = 1000; + startTime.start(); + for (int i = 0; i < numUsecTests; i++) { + usleep(1); + } + elapsedNSecs = (float)startTime.nsecsElapsed(); + qCDebug(interfaceapp, "usleep(1) (1000x) us: %f", (double)(elapsedNSecs / NSECS_PER_USEC / numUsecTests)); startTime.start(); - usleep(1); + for (int i = 0; i < numUsecTests; i++) { + usleep(10); + } elapsedNSecs = (float)startTime.nsecsElapsed(); - qCDebug(interfaceapp, "usleep(1) us: %f", (double)(elapsedNSecs / NSECS_PER_USEC)); + qCDebug(interfaceapp, "usleep(10) (1000x) us: %f", (double)(elapsedNSecs / NSECS_PER_USEC / numUsecTests)); startTime.start(); - usleep(10); + for (int i = 0; i < numUsecTests; i++) { + usleep(100); + } elapsedNSecs = (float)startTime.nsecsElapsed(); - qCDebug(interfaceapp, "usleep(10) us: %f", (double)(elapsedNSecs / NSECS_PER_USEC)); + qCDebug(interfaceapp, "usleep(100) (1000x) us: %f", (double)(elapsedNSecs / NSECS_PER_USEC / numUsecTests)); startTime.start(); - usleep(100); + for (int i = 0; i < numTimingTests; i++) { + usleep(1000); + } elapsedNSecs = (float)startTime.nsecsElapsed(); - qCDebug(interfaceapp, "usleep(100) us: %f", (double)(elapsedNSecs / NSECS_PER_USEC)); + qCDebug(interfaceapp, "usleep(1000) us: %f", (double)(elapsedNSecs / NSECS_PER_USEC / numTimingTests)); startTime.start(); - usleep(1000); + for (int i = 0; i < numTimingTests; i++) { + usleep(1001); + } elapsedNSecs = (float)startTime.nsecsElapsed(); - qCDebug(interfaceapp, "usleep(1000) us: %f", (double)(elapsedNSecs / NSECS_PER_USEC)); + qCDebug(interfaceapp, "usleep(1001) us: %f", (double)(elapsedNSecs / NSECS_PER_USEC / numTimingTests)); startTime.start(); - usleep(1500); + for (int i = 0; i < numTimingTests; i++) { + usleep(1500); + } elapsedNSecs = (float)startTime.nsecsElapsed(); - qCDebug(interfaceapp, "usleep(1500) us: %f", (double)(elapsedNSecs / NSECS_PER_USEC)); - - startTime.start(); - usleep(2500); - elapsedNSecs = (float)startTime.nsecsElapsed(); - qCDebug(interfaceapp, "usleep(2500) us: %f", (double)(elapsedNSecs / NSECS_PER_USEC)); + qCDebug(interfaceapp, "usleep(1500) us: %f", (double)(elapsedNSecs / NSECS_PER_USEC / numTimingTests)); startTime.start(); usleep(15000); elapsedNSecs = (float)startTime.nsecsElapsed(); - qCDebug(interfaceapp, "usleep(15000) us: %f", (double)(elapsedNSecs / NSECS_PER_USEC)); + qCDebug(interfaceapp, "usleep(15000) (1x) us: %f", (double)(elapsedNSecs / NSECS_PER_USEC)); + + const int numTests = 1000000; + int* iResults = (int*)malloc(sizeof(int) * numTests); + float fTest = 1.0; + float* fResults = (float*)malloc(sizeof(float) * numTests); // Random number generation startTime.start(); From 526143b097039dad5bee78f86b3c6018cef899cd Mon Sep 17 00:00:00 2001 From: Zach Pomerantz Date: Tue, 3 May 2016 16:25:09 -0700 Subject: [PATCH 3/4] Improve perf of Win32 usleep --- libraries/shared/src/SharedUtil.cpp | 32 +++++++++++++++++++---------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/libraries/shared/src/SharedUtil.cpp b/libraries/shared/src/SharedUtil.cpp index 7925c8ad4e..615ba26812 100644 --- a/libraries/shared/src/SharedUtil.cpp +++ b/libraries/shared/src/SharedUtil.cpp @@ -455,19 +455,29 @@ void printVoxelCode(unsigned char* voxelCode) { } #ifdef _WIN32 - void usleep(int waitTime) { - const quint64 BUSY_LOOP_USECS = 2000; - quint64 compTime = waitTime + usecTimestampNow(); - quint64 compTimeSleep = compTime - BUSY_LOOP_USECS; - while (true) { - if (usecTimestampNow() < compTimeSleep) { - QThread::msleep(1); - } - if (usecTimestampNow() >= compTime) { - break; - } +void usleep(int waitTime) { + quint64 sleepUntil = waitTime + usecTimestampNow(); + + // Busy wait with sleep/yield where possible + while (true) { + quint64 now = usecTimestampNow(); + if (now >= sleepUntil) { + break; + } + + // Sleep if we have at least 1ms to spare + const int MIN_SLEEP_USECS = 1000; + // msleep is allowed to overshoot, so give it a 100us berth + const int MIN_SLEEP_USECS_BERTH = 100; + if (sleepUntil - now > (MIN_SLEEP_USECS + MIN_SLEEP_USECS_BERTH)) { + unsigned long sleepFor = (sleepUntil - now - MIN_SLEEP_USECS_BERTH) / USECS_PER_MSEC; + QThread::msleep(sleepFor); + // Yield otherwise + } else { + QThread::yieldCurrentThread(); } } +} #endif // Inserts the value and key into three arrays sorted by the key array, the first array is the value, From 1553f532ba11c34da25829166f38e23585c5ccbf Mon Sep 17 00:00:00 2001 From: Zach Pomerantz Date: Wed, 4 May 2016 15:40:53 -0700 Subject: [PATCH 4/4] Specialize usleep for Win32 --- libraries/shared/src/SharedUtil.cpp | 31 +++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/libraries/shared/src/SharedUtil.cpp b/libraries/shared/src/SharedUtil.cpp index 615ba26812..b80fac637c 100644 --- a/libraries/shared/src/SharedUtil.cpp +++ b/libraries/shared/src/SharedUtil.cpp @@ -456,24 +456,39 @@ void printVoxelCode(unsigned char* voxelCode) { #ifdef _WIN32 void usleep(int waitTime) { - quint64 sleepUntil = waitTime + usecTimestampNow(); + // Use QueryPerformanceCounter for least overhead + LARGE_INTEGER now; // ticks + QueryPerformanceCounter(&now); + + static int64_t ticksPerSec = 0; + if (ticksPerSec == 0) { + LARGE_INTEGER frequency; + QueryPerformanceFrequency(&frequency); + ticksPerSec = frequency.QuadPart; + } + + // order ops to avoid loss in precision + int64_t waitTicks = (ticksPerSec * waitTime) / USECS_PER_SECOND; + int64_t sleepTicks = now.QuadPart + waitTicks; // Busy wait with sleep/yield where possible while (true) { - quint64 now = usecTimestampNow(); - if (now >= sleepUntil) { + QueryPerformanceCounter(&now); + if (now.QuadPart >= sleepTicks) { break; } // Sleep if we have at least 1ms to spare - const int MIN_SLEEP_USECS = 1000; + const int64_t MIN_SLEEP_USECS = 1000; // msleep is allowed to overshoot, so give it a 100us berth - const int MIN_SLEEP_USECS_BERTH = 100; - if (sleepUntil - now > (MIN_SLEEP_USECS + MIN_SLEEP_USECS_BERTH)) { - unsigned long sleepFor = (sleepUntil - now - MIN_SLEEP_USECS_BERTH) / USECS_PER_MSEC; - QThread::msleep(sleepFor); + const int64_t MIN_SLEEP_USECS_BERTH = 100; + // order ops to avoid loss in precision + int64_t sleepFor = ((sleepTicks - now.QuadPart) * USECS_PER_SECOND) / ticksPerSec - MIN_SLEEP_USECS_BERTH; + if (sleepFor > MIN_SLEEP_USECS) { + Sleep((DWORD)(sleepFor / USECS_PER_MSEC)); // Yield otherwise } else { + // Use Qt to delegate, as SwitchToThread is only supported starting with XP QThread::yieldCurrentThread(); } }