mirror of
https://github.com/lubosz/overte.git
synced 2025-05-28 02:51:19 +02:00
Merge pull request #7810 from zzmp/perf/usleep
Improve CPU performance of Win32 usleep
This commit is contained in:
commit
16a5030159
3 changed files with 124 additions and 67 deletions
|
@ -131,71 +131,102 @@ const glm::vec3 randVector() {
|
||||||
// Do some basic timing tests and report the results
|
// Do some basic timing tests and report the results
|
||||||
void runTimingTests() {
|
void runTimingTests() {
|
||||||
// How long does it take to make a call to get the time?
|
// How long does it take to make a call to get the time?
|
||||||
|
const int numTimingTests = 3;
|
||||||
|
QElapsedTimer startTime;
|
||||||
|
float elapsedNSecs;
|
||||||
|
float elapsedUSecs;
|
||||||
|
|
||||||
|
qCDebug(interfaceapp, "numTimingTests: %d", numTimingTests);
|
||||||
|
|
||||||
|
startTime.start();
|
||||||
|
elapsedNSecs = (float)startTime.nsecsElapsed();
|
||||||
|
qCDebug(interfaceapp, "QElapsedTimer::nsecElapsed() ns: %f", (double)elapsedNSecs / numTimingTests);
|
||||||
|
|
||||||
|
// Test sleep functions for accuracy
|
||||||
|
startTime.start();
|
||||||
|
for (int i = 0; i < numTimingTests; i++) {
|
||||||
|
QThread::msleep(1);
|
||||||
|
}
|
||||||
|
elapsedNSecs = (float)startTime.nsecsElapsed();
|
||||||
|
qCDebug(interfaceapp, "QThread::msleep(1) ms: %f", (double)(elapsedNSecs / NSECS_PER_MSEC / numTimingTests));
|
||||||
|
|
||||||
|
startTime.start();
|
||||||
|
for (int i = 0; i < numTimingTests; i++) {
|
||||||
|
QThread::sleep(1);
|
||||||
|
}
|
||||||
|
elapsedNSecs = (float)startTime.nsecsElapsed();
|
||||||
|
qCDebug(interfaceapp, "QThread::sleep(1) s: %f", (double)(elapsedNSecs / NSECS_PER_MSEC / MSECS_PER_SECOND / numTimingTests));
|
||||||
|
|
||||||
|
const int numUsecTests = 1000;
|
||||||
|
startTime.start();
|
||||||
|
for (int i = 0; i < numUsecTests; i++) {
|
||||||
|
usleep(1);
|
||||||
|
}
|
||||||
|
elapsedNSecs = (float)startTime.nsecsElapsed();
|
||||||
|
qCDebug(interfaceapp, "usleep(1) (1000x) us: %f", (double)(elapsedNSecs / NSECS_PER_USEC / numUsecTests));
|
||||||
|
|
||||||
|
startTime.start();
|
||||||
|
for (int i = 0; i < numUsecTests; i++) {
|
||||||
|
usleep(10);
|
||||||
|
}
|
||||||
|
elapsedNSecs = (float)startTime.nsecsElapsed();
|
||||||
|
qCDebug(interfaceapp, "usleep(10) (1000x) us: %f", (double)(elapsedNSecs / NSECS_PER_USEC / numUsecTests));
|
||||||
|
|
||||||
|
startTime.start();
|
||||||
|
for (int i = 0; i < numUsecTests; i++) {
|
||||||
|
usleep(100);
|
||||||
|
}
|
||||||
|
elapsedNSecs = (float)startTime.nsecsElapsed();
|
||||||
|
qCDebug(interfaceapp, "usleep(100) (1000x) us: %f", (double)(elapsedNSecs / NSECS_PER_USEC / numUsecTests));
|
||||||
|
|
||||||
|
startTime.start();
|
||||||
|
for (int i = 0; i < numTimingTests; i++) {
|
||||||
|
usleep(1000);
|
||||||
|
}
|
||||||
|
elapsedNSecs = (float)startTime.nsecsElapsed();
|
||||||
|
qCDebug(interfaceapp, "usleep(1000) us: %f", (double)(elapsedNSecs / NSECS_PER_USEC / numTimingTests));
|
||||||
|
|
||||||
|
startTime.start();
|
||||||
|
for (int i = 0; i < numTimingTests; i++) {
|
||||||
|
usleep(1001);
|
||||||
|
}
|
||||||
|
elapsedNSecs = (float)startTime.nsecsElapsed();
|
||||||
|
qCDebug(interfaceapp, "usleep(1001) us: %f", (double)(elapsedNSecs / NSECS_PER_USEC / numTimingTests));
|
||||||
|
|
||||||
|
startTime.start();
|
||||||
|
for (int i = 0; i < numTimingTests; i++) {
|
||||||
|
usleep(1500);
|
||||||
|
}
|
||||||
|
elapsedNSecs = (float)startTime.nsecsElapsed();
|
||||||
|
qCDebug(interfaceapp, "usleep(1500) us: %f", (double)(elapsedNSecs / NSECS_PER_USEC / numTimingTests));
|
||||||
|
|
||||||
|
startTime.start();
|
||||||
|
usleep(15000);
|
||||||
|
elapsedNSecs = (float)startTime.nsecsElapsed();
|
||||||
|
qCDebug(interfaceapp, "usleep(15000) (1x) us: %f", (double)(elapsedNSecs / NSECS_PER_USEC));
|
||||||
|
|
||||||
const int numTests = 1000000;
|
const int numTests = 1000000;
|
||||||
int* iResults = (int*)malloc(sizeof(int) * numTests);
|
int* iResults = (int*)malloc(sizeof(int) * numTests);
|
||||||
float fTest = 1.0;
|
float fTest = 1.0;
|
||||||
float* fResults = (float*)malloc(sizeof(float) * numTests);
|
float* fResults = (float*)malloc(sizeof(float) * numTests);
|
||||||
QElapsedTimer startTime;
|
|
||||||
startTime.start();
|
|
||||||
float elapsedUsecs;
|
|
||||||
|
|
||||||
float NSEC_TO_USEC = 1.0f / 1000.0f;
|
|
||||||
elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC;
|
|
||||||
qCDebug(interfaceapp, "QElapsedTimer::nsecElapsed() usecs: %f", (double)elapsedUsecs);
|
|
||||||
|
|
||||||
// Test sleep functions for accuracy
|
|
||||||
startTime.start();
|
|
||||||
QThread::msleep(1);
|
|
||||||
elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC;
|
|
||||||
qCDebug(interfaceapp, "QThread::msleep(1) ms: %f", (double)(elapsedUsecs / 1000.0f));
|
|
||||||
|
|
||||||
startTime.start();
|
|
||||||
QThread::sleep(1);
|
|
||||||
elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC;
|
|
||||||
qCDebug(interfaceapp, "QThread::sleep(1) ms: %f", (double)(elapsedUsecs / 1000.0f));
|
|
||||||
|
|
||||||
startTime.start();
|
|
||||||
usleep(1);
|
|
||||||
elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC;
|
|
||||||
qCDebug(interfaceapp, "usleep(1) ms: %f", (double)(elapsedUsecs / 1000.0f));
|
|
||||||
|
|
||||||
startTime.start();
|
|
||||||
usleep(10);
|
|
||||||
elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC;
|
|
||||||
qCDebug(interfaceapp, "usleep(10) ms: %f", (double)(elapsedUsecs / 1000.0f));
|
|
||||||
|
|
||||||
startTime.start();
|
|
||||||
usleep(100);
|
|
||||||
elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC;
|
|
||||||
qCDebug(interfaceapp, "usleep(100) ms: %f", (double)(elapsedUsecs / 1000.0f));
|
|
||||||
|
|
||||||
startTime.start();
|
|
||||||
usleep(1000);
|
|
||||||
elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC;
|
|
||||||
qCDebug(interfaceapp, "usleep(1000) ms: %f", (double)(elapsedUsecs / 1000.0f));
|
|
||||||
|
|
||||||
startTime.start();
|
|
||||||
usleep(15000);
|
|
||||||
elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC;
|
|
||||||
qCDebug(interfaceapp, "usleep(15000) ms: %f", (double)(elapsedUsecs / 1000.0f));
|
|
||||||
|
|
||||||
// Random number generation
|
// Random number generation
|
||||||
startTime.start();
|
startTime.start();
|
||||||
for (int i = 0; i < numTests; i++) {
|
for (int i = 0; i < numTests; i++) {
|
||||||
iResults[i] = rand();
|
iResults[i] = rand();
|
||||||
}
|
}
|
||||||
elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC;
|
elapsedUSecs = (float)startTime.nsecsElapsed() / NSECS_PER_USEC;
|
||||||
qCDebug(interfaceapp, "rand() stored in array usecs: %f, first result:%d",
|
qCDebug(interfaceapp, "rand() stored in array usecs: %f, first result:%d",
|
||||||
(double)(elapsedUsecs / numTests), iResults[0]);
|
(double)(elapsedUSecs / numTests), iResults[0]);
|
||||||
|
|
||||||
// Random number generation using randFloat()
|
// Random number generation using randFloat()
|
||||||
startTime.start();
|
startTime.start();
|
||||||
for (int i = 0; i < numTests; i++) {
|
for (int i = 0; i < numTests; i++) {
|
||||||
fResults[i] = randFloat();
|
fResults[i] = randFloat();
|
||||||
}
|
}
|
||||||
elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC;
|
elapsedUSecs = (float)startTime.nsecsElapsed() / NSECS_PER_USEC;
|
||||||
qCDebug(interfaceapp, "randFloat() stored in array usecs: %f, first result: %f",
|
qCDebug(interfaceapp, "randFloat() stored in array usecs: %f, first result: %f",
|
||||||
(double)(elapsedUsecs / numTests), (double)(fResults[0]));
|
(double)(elapsedUSecs / numTests), (double)(fResults[0]));
|
||||||
|
|
||||||
free(iResults);
|
free(iResults);
|
||||||
free(fResults);
|
free(fResults);
|
||||||
|
@ -206,8 +237,8 @@ void runTimingTests() {
|
||||||
for (int i = 0; i < numTests; i++) {
|
for (int i = 0; i < numTests; i++) {
|
||||||
fTest = powf(fTest, 0.5f);
|
fTest = powf(fTest, 0.5f);
|
||||||
}
|
}
|
||||||
elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC;
|
elapsedUSecs = (float)startTime.nsecsElapsed() / NSECS_PER_USEC;
|
||||||
qCDebug(interfaceapp, "powf(f, 0.5) usecs: %f", (double)(elapsedUsecs / (float) numTests));
|
qCDebug(interfaceapp, "powf(f, 0.5) usecs: %f", (double)(elapsedUSecs / (float) numTests));
|
||||||
|
|
||||||
// Vector Math
|
// Vector Math
|
||||||
float distance;
|
float distance;
|
||||||
|
@ -218,9 +249,9 @@ void runTimingTests() {
|
||||||
//float distanceSquared = glm::dot(temp, temp);
|
//float distanceSquared = glm::dot(temp, temp);
|
||||||
distance = glm::distance(pointA, pointB);
|
distance = glm::distance(pointA, pointB);
|
||||||
}
|
}
|
||||||
elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC;
|
elapsedUSecs = (float)startTime.nsecsElapsed() / NSECS_PER_USEC;
|
||||||
qCDebug(interfaceapp, "vector math usecs: %f [%f usecs total for %d tests], last result:%f",
|
qCDebug(interfaceapp, "vector math usecs: %f [%f usecs total for %d tests], last result:%f",
|
||||||
(double)(elapsedUsecs / (float) numTests), (double)elapsedUsecs, numTests, (double)distance);
|
(double)(elapsedUSecs / (float) numTests), (double)elapsedUSecs, numTests, (double)distance);
|
||||||
|
|
||||||
// Vec3 test
|
// Vec3 test
|
||||||
glm::vec3 vecA(randVector()), vecB(randVector());
|
glm::vec3 vecA(randVector()), vecB(randVector());
|
||||||
|
@ -231,9 +262,9 @@ void runTimingTests() {
|
||||||
glm::vec3 temp = vecA-vecB;
|
glm::vec3 temp = vecA-vecB;
|
||||||
result = glm::dot(temp,temp);
|
result = glm::dot(temp,temp);
|
||||||
}
|
}
|
||||||
elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC;
|
elapsedUSecs = (float)startTime.nsecsElapsed() / NSECS_PER_USEC;
|
||||||
qCDebug(interfaceapp, "vec3 assign and dot() usecs: %f, last result:%f",
|
qCDebug(interfaceapp, "vec3 assign and dot() usecs: %f, last result:%f",
|
||||||
(double)(elapsedUsecs / numTests), (double)result);
|
(double)(elapsedUSecs / numTests), (double)result);
|
||||||
|
|
||||||
|
|
||||||
quint64 BYTE_CODE_MAX_TEST_VALUE = 99999999;
|
quint64 BYTE_CODE_MAX_TEST_VALUE = 99999999;
|
||||||
|
@ -265,9 +296,9 @@ void runTimingTests() {
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC;
|
elapsedUSecs = (float)startTime.nsecsElapsed() / NSECS_PER_USEC;
|
||||||
qCDebug(interfaceapp) << "ByteCountCoded<quint64> usecs: " << elapsedUsecs
|
qCDebug(interfaceapp) << "ByteCountCoded<quint64> usecs: " << elapsedUSecs
|
||||||
<< "per test:" << (double) (elapsedUsecs / tests)
|
<< "per test:" << (double) (elapsedUSecs / tests)
|
||||||
<< "tests:" << tests
|
<< "tests:" << tests
|
||||||
<< "failed:" << failed;
|
<< "failed:" << failed;
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,6 +35,7 @@ const float METERS_PER_CENTIMETER = 0.01f;
|
||||||
const float METERS_PER_MILLIMETER = 0.001f;
|
const float METERS_PER_MILLIMETER = 0.001f;
|
||||||
const float MILLIMETERS_PER_METER = 1000.0f;
|
const float MILLIMETERS_PER_METER = 1000.0f;
|
||||||
const quint64 NSECS_PER_USEC = 1000;
|
const quint64 NSECS_PER_USEC = 1000;
|
||||||
|
const quint64 NSECS_PER_MSEC = 1000000;
|
||||||
const quint64 USECS_PER_MSEC = 1000;
|
const quint64 USECS_PER_MSEC = 1000;
|
||||||
const quint64 MSECS_PER_SECOND = 1000;
|
const quint64 MSECS_PER_SECOND = 1000;
|
||||||
const quint64 USECS_PER_SECOND = USECS_PER_MSEC * MSECS_PER_SECOND;
|
const quint64 USECS_PER_SECOND = USECS_PER_MSEC * MSECS_PER_SECOND;
|
||||||
|
|
|
@ -455,19 +455,44 @@ void printVoxelCode(unsigned char* voxelCode) {
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
void usleep(int waitTime) {
|
void usleep(int waitTime) {
|
||||||
const quint64 BUSY_LOOP_USECS = 2000;
|
// Use QueryPerformanceCounter for least overhead
|
||||||
quint64 compTime = waitTime + usecTimestampNow();
|
LARGE_INTEGER now; // ticks
|
||||||
quint64 compTimeSleep = compTime - BUSY_LOOP_USECS;
|
QueryPerformanceCounter(&now);
|
||||||
while (true) {
|
|
||||||
if (usecTimestampNow() < compTimeSleep) {
|
static int64_t ticksPerSec = 0;
|
||||||
QThread::msleep(1);
|
if (ticksPerSec == 0) {
|
||||||
}
|
LARGE_INTEGER frequency;
|
||||||
if (usecTimestampNow() >= compTime) {
|
QueryPerformanceFrequency(&frequency);
|
||||||
break;
|
ticksPerSec = frequency.QuadPart;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// order ops to avoid loss in precision
|
||||||
|
int64_t waitTicks = (ticksPerSec * waitTime) / USECS_PER_SECOND;
|
||||||
|
int64_t sleepTicks = now.QuadPart + waitTicks;
|
||||||
|
|
||||||
|
// Busy wait with sleep/yield where possible
|
||||||
|
while (true) {
|
||||||
|
QueryPerformanceCounter(&now);
|
||||||
|
if (now.QuadPart >= sleepTicks) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sleep if we have at least 1ms to spare
|
||||||
|
const int64_t MIN_SLEEP_USECS = 1000;
|
||||||
|
// msleep is allowed to overshoot, so give it a 100us berth
|
||||||
|
const int64_t MIN_SLEEP_USECS_BERTH = 100;
|
||||||
|
// order ops to avoid loss in precision
|
||||||
|
int64_t sleepFor = ((sleepTicks - now.QuadPart) * USECS_PER_SECOND) / ticksPerSec - MIN_SLEEP_USECS_BERTH;
|
||||||
|
if (sleepFor > MIN_SLEEP_USECS) {
|
||||||
|
Sleep((DWORD)(sleepFor / USECS_PER_MSEC));
|
||||||
|
// Yield otherwise
|
||||||
|
} else {
|
||||||
|
// Use Qt to delegate, as SwitchToThread is only supported starting with XP
|
||||||
|
QThread::yieldCurrentThread();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Inserts the value and key into three arrays sorted by the key array, the first array is the value,
|
// Inserts the value and key into three arrays sorted by the key array, the first array is the value,
|
||||||
|
|
Loading…
Reference in a new issue