mirror of
https://github.com/lubosz/overte.git
synced 2025-04-25 01:03:59 +02:00
Merge pull request #7810 from zzmp/perf/usleep
Improve CPU performance of Win32 usleep
This commit is contained in:
commit
16a5030159
3 changed files with 124 additions and 67 deletions
|
@ -131,71 +131,102 @@ const glm::vec3 randVector() {
|
|||
// Do some basic timing tests and report the results
|
||||
void runTimingTests() {
|
||||
// How long does it take to make a call to get the time?
|
||||
const int numTimingTests = 3;
|
||||
QElapsedTimer startTime;
|
||||
float elapsedNSecs;
|
||||
float elapsedUSecs;
|
||||
|
||||
qCDebug(interfaceapp, "numTimingTests: %d", numTimingTests);
|
||||
|
||||
startTime.start();
|
||||
elapsedNSecs = (float)startTime.nsecsElapsed();
|
||||
qCDebug(interfaceapp, "QElapsedTimer::nsecElapsed() ns: %f", (double)elapsedNSecs / numTimingTests);
|
||||
|
||||
// Test sleep functions for accuracy
|
||||
startTime.start();
|
||||
for (int i = 0; i < numTimingTests; i++) {
|
||||
QThread::msleep(1);
|
||||
}
|
||||
elapsedNSecs = (float)startTime.nsecsElapsed();
|
||||
qCDebug(interfaceapp, "QThread::msleep(1) ms: %f", (double)(elapsedNSecs / NSECS_PER_MSEC / numTimingTests));
|
||||
|
||||
startTime.start();
|
||||
for (int i = 0; i < numTimingTests; i++) {
|
||||
QThread::sleep(1);
|
||||
}
|
||||
elapsedNSecs = (float)startTime.nsecsElapsed();
|
||||
qCDebug(interfaceapp, "QThread::sleep(1) s: %f", (double)(elapsedNSecs / NSECS_PER_MSEC / MSECS_PER_SECOND / numTimingTests));
|
||||
|
||||
const int numUsecTests = 1000;
|
||||
startTime.start();
|
||||
for (int i = 0; i < numUsecTests; i++) {
|
||||
usleep(1);
|
||||
}
|
||||
elapsedNSecs = (float)startTime.nsecsElapsed();
|
||||
qCDebug(interfaceapp, "usleep(1) (1000x) us: %f", (double)(elapsedNSecs / NSECS_PER_USEC / numUsecTests));
|
||||
|
||||
startTime.start();
|
||||
for (int i = 0; i < numUsecTests; i++) {
|
||||
usleep(10);
|
||||
}
|
||||
elapsedNSecs = (float)startTime.nsecsElapsed();
|
||||
qCDebug(interfaceapp, "usleep(10) (1000x) us: %f", (double)(elapsedNSecs / NSECS_PER_USEC / numUsecTests));
|
||||
|
||||
startTime.start();
|
||||
for (int i = 0; i < numUsecTests; i++) {
|
||||
usleep(100);
|
||||
}
|
||||
elapsedNSecs = (float)startTime.nsecsElapsed();
|
||||
qCDebug(interfaceapp, "usleep(100) (1000x) us: %f", (double)(elapsedNSecs / NSECS_PER_USEC / numUsecTests));
|
||||
|
||||
startTime.start();
|
||||
for (int i = 0; i < numTimingTests; i++) {
|
||||
usleep(1000);
|
||||
}
|
||||
elapsedNSecs = (float)startTime.nsecsElapsed();
|
||||
qCDebug(interfaceapp, "usleep(1000) us: %f", (double)(elapsedNSecs / NSECS_PER_USEC / numTimingTests));
|
||||
|
||||
startTime.start();
|
||||
for (int i = 0; i < numTimingTests; i++) {
|
||||
usleep(1001);
|
||||
}
|
||||
elapsedNSecs = (float)startTime.nsecsElapsed();
|
||||
qCDebug(interfaceapp, "usleep(1001) us: %f", (double)(elapsedNSecs / NSECS_PER_USEC / numTimingTests));
|
||||
|
||||
startTime.start();
|
||||
for (int i = 0; i < numTimingTests; i++) {
|
||||
usleep(1500);
|
||||
}
|
||||
elapsedNSecs = (float)startTime.nsecsElapsed();
|
||||
qCDebug(interfaceapp, "usleep(1500) us: %f", (double)(elapsedNSecs / NSECS_PER_USEC / numTimingTests));
|
||||
|
||||
startTime.start();
|
||||
usleep(15000);
|
||||
elapsedNSecs = (float)startTime.nsecsElapsed();
|
||||
qCDebug(interfaceapp, "usleep(15000) (1x) us: %f", (double)(elapsedNSecs / NSECS_PER_USEC));
|
||||
|
||||
const int numTests = 1000000;
|
||||
int* iResults = (int*)malloc(sizeof(int) * numTests);
|
||||
float fTest = 1.0;
|
||||
float* fResults = (float*)malloc(sizeof(float) * numTests);
|
||||
QElapsedTimer startTime;
|
||||
startTime.start();
|
||||
float elapsedUsecs;
|
||||
|
||||
float NSEC_TO_USEC = 1.0f / 1000.0f;
|
||||
elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC;
|
||||
qCDebug(interfaceapp, "QElapsedTimer::nsecElapsed() usecs: %f", (double)elapsedUsecs);
|
||||
|
||||
// Test sleep functions for accuracy
|
||||
startTime.start();
|
||||
QThread::msleep(1);
|
||||
elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC;
|
||||
qCDebug(interfaceapp, "QThread::msleep(1) ms: %f", (double)(elapsedUsecs / 1000.0f));
|
||||
|
||||
startTime.start();
|
||||
QThread::sleep(1);
|
||||
elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC;
|
||||
qCDebug(interfaceapp, "QThread::sleep(1) ms: %f", (double)(elapsedUsecs / 1000.0f));
|
||||
|
||||
startTime.start();
|
||||
usleep(1);
|
||||
elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC;
|
||||
qCDebug(interfaceapp, "usleep(1) ms: %f", (double)(elapsedUsecs / 1000.0f));
|
||||
|
||||
startTime.start();
|
||||
usleep(10);
|
||||
elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC;
|
||||
qCDebug(interfaceapp, "usleep(10) ms: %f", (double)(elapsedUsecs / 1000.0f));
|
||||
|
||||
startTime.start();
|
||||
usleep(100);
|
||||
elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC;
|
||||
qCDebug(interfaceapp, "usleep(100) ms: %f", (double)(elapsedUsecs / 1000.0f));
|
||||
|
||||
startTime.start();
|
||||
usleep(1000);
|
||||
elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC;
|
||||
qCDebug(interfaceapp, "usleep(1000) ms: %f", (double)(elapsedUsecs / 1000.0f));
|
||||
|
||||
startTime.start();
|
||||
usleep(15000);
|
||||
elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC;
|
||||
qCDebug(interfaceapp, "usleep(15000) ms: %f", (double)(elapsedUsecs / 1000.0f));
|
||||
|
||||
// Random number generation
|
||||
startTime.start();
|
||||
for (int i = 0; i < numTests; i++) {
|
||||
iResults[i] = rand();
|
||||
}
|
||||
elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC;
|
||||
elapsedUSecs = (float)startTime.nsecsElapsed() / NSECS_PER_USEC;
|
||||
qCDebug(interfaceapp, "rand() stored in array usecs: %f, first result:%d",
|
||||
(double)(elapsedUsecs / numTests), iResults[0]);
|
||||
(double)(elapsedUSecs / numTests), iResults[0]);
|
||||
|
||||
// Random number generation using randFloat()
|
||||
startTime.start();
|
||||
for (int i = 0; i < numTests; i++) {
|
||||
fResults[i] = randFloat();
|
||||
}
|
||||
elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC;
|
||||
elapsedUSecs = (float)startTime.nsecsElapsed() / NSECS_PER_USEC;
|
||||
qCDebug(interfaceapp, "randFloat() stored in array usecs: %f, first result: %f",
|
||||
(double)(elapsedUsecs / numTests), (double)(fResults[0]));
|
||||
(double)(elapsedUSecs / numTests), (double)(fResults[0]));
|
||||
|
||||
free(iResults);
|
||||
free(fResults);
|
||||
|
@ -206,8 +237,8 @@ void runTimingTests() {
|
|||
for (int i = 0; i < numTests; i++) {
|
||||
fTest = powf(fTest, 0.5f);
|
||||
}
|
||||
elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC;
|
||||
qCDebug(interfaceapp, "powf(f, 0.5) usecs: %f", (double)(elapsedUsecs / (float) numTests));
|
||||
elapsedUSecs = (float)startTime.nsecsElapsed() / NSECS_PER_USEC;
|
||||
qCDebug(interfaceapp, "powf(f, 0.5) usecs: %f", (double)(elapsedUSecs / (float) numTests));
|
||||
|
||||
// Vector Math
|
||||
float distance;
|
||||
|
@ -218,9 +249,9 @@ void runTimingTests() {
|
|||
//float distanceSquared = glm::dot(temp, temp);
|
||||
distance = glm::distance(pointA, pointB);
|
||||
}
|
||||
elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC;
|
||||
elapsedUSecs = (float)startTime.nsecsElapsed() / NSECS_PER_USEC;
|
||||
qCDebug(interfaceapp, "vector math usecs: %f [%f usecs total for %d tests], last result:%f",
|
||||
(double)(elapsedUsecs / (float) numTests), (double)elapsedUsecs, numTests, (double)distance);
|
||||
(double)(elapsedUSecs / (float) numTests), (double)elapsedUSecs, numTests, (double)distance);
|
||||
|
||||
// Vec3 test
|
||||
glm::vec3 vecA(randVector()), vecB(randVector());
|
||||
|
@ -231,9 +262,9 @@ void runTimingTests() {
|
|||
glm::vec3 temp = vecA-vecB;
|
||||
result = glm::dot(temp,temp);
|
||||
}
|
||||
elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC;
|
||||
elapsedUSecs = (float)startTime.nsecsElapsed() / NSECS_PER_USEC;
|
||||
qCDebug(interfaceapp, "vec3 assign and dot() usecs: %f, last result:%f",
|
||||
(double)(elapsedUsecs / numTests), (double)result);
|
||||
(double)(elapsedUSecs / numTests), (double)result);
|
||||
|
||||
|
||||
quint64 BYTE_CODE_MAX_TEST_VALUE = 99999999;
|
||||
|
@ -265,9 +296,9 @@ void runTimingTests() {
|
|||
}
|
||||
|
||||
}
|
||||
elapsedUsecs = (float)startTime.nsecsElapsed() * NSEC_TO_USEC;
|
||||
qCDebug(interfaceapp) << "ByteCountCoded<quint64> usecs: " << elapsedUsecs
|
||||
<< "per test:" << (double) (elapsedUsecs / tests)
|
||||
elapsedUSecs = (float)startTime.nsecsElapsed() / NSECS_PER_USEC;
|
||||
qCDebug(interfaceapp) << "ByteCountCoded<quint64> usecs: " << elapsedUSecs
|
||||
<< "per test:" << (double) (elapsedUSecs / tests)
|
||||
<< "tests:" << tests
|
||||
<< "failed:" << failed;
|
||||
}
|
||||
|
|
|
@ -35,6 +35,7 @@ const float METERS_PER_CENTIMETER = 0.01f;
|
|||
const float METERS_PER_MILLIMETER = 0.001f;
|
||||
const float MILLIMETERS_PER_METER = 1000.0f;
|
||||
const quint64 NSECS_PER_USEC = 1000;
|
||||
const quint64 NSECS_PER_MSEC = 1000000;
|
||||
const quint64 USECS_PER_MSEC = 1000;
|
||||
const quint64 MSECS_PER_SECOND = 1000;
|
||||
const quint64 USECS_PER_SECOND = USECS_PER_MSEC * MSECS_PER_SECOND;
|
||||
|
|
|
@ -455,19 +455,44 @@ void printVoxelCode(unsigned char* voxelCode) {
|
|||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
void usleep(int waitTime) {
|
||||
const quint64 BUSY_LOOP_USECS = 2000;
|
||||
quint64 compTime = waitTime + usecTimestampNow();
|
||||
quint64 compTimeSleep = compTime - BUSY_LOOP_USECS;
|
||||
while (true) {
|
||||
if (usecTimestampNow() < compTimeSleep) {
|
||||
QThread::msleep(1);
|
||||
}
|
||||
if (usecTimestampNow() >= compTime) {
|
||||
break;
|
||||
}
|
||||
void usleep(int waitTime) {
|
||||
// Use QueryPerformanceCounter for least overhead
|
||||
LARGE_INTEGER now; // ticks
|
||||
QueryPerformanceCounter(&now);
|
||||
|
||||
static int64_t ticksPerSec = 0;
|
||||
if (ticksPerSec == 0) {
|
||||
LARGE_INTEGER frequency;
|
||||
QueryPerformanceFrequency(&frequency);
|
||||
ticksPerSec = frequency.QuadPart;
|
||||
}
|
||||
|
||||
// order ops to avoid loss in precision
|
||||
int64_t waitTicks = (ticksPerSec * waitTime) / USECS_PER_SECOND;
|
||||
int64_t sleepTicks = now.QuadPart + waitTicks;
|
||||
|
||||
// Busy wait with sleep/yield where possible
|
||||
while (true) {
|
||||
QueryPerformanceCounter(&now);
|
||||
if (now.QuadPart >= sleepTicks) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Sleep if we have at least 1ms to spare
|
||||
const int64_t MIN_SLEEP_USECS = 1000;
|
||||
// msleep is allowed to overshoot, so give it a 100us berth
|
||||
const int64_t MIN_SLEEP_USECS_BERTH = 100;
|
||||
// order ops to avoid loss in precision
|
||||
int64_t sleepFor = ((sleepTicks - now.QuadPart) * USECS_PER_SECOND) / ticksPerSec - MIN_SLEEP_USECS_BERTH;
|
||||
if (sleepFor > MIN_SLEEP_USECS) {
|
||||
Sleep((DWORD)(sleepFor / USECS_PER_MSEC));
|
||||
// Yield otherwise
|
||||
} else {
|
||||
// Use Qt to delegate, as SwitchToThread is only supported starting with XP
|
||||
QThread::yieldCurrentThread();
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Inserts the value and key into three arrays sorted by the key array, the first array is the value,
|
||||
|
|
Loading…
Reference in a new issue