From bbfdb0f7eb0fd8c213e40f9d78ad367d5a5eead9 Mon Sep 17 00:00:00 2001 From: Ken Cooke Date: Tue, 20 Jun 2017 16:06:05 -0700 Subject: [PATCH] Refactor CPU detection to use common subfunctions --- libraries/shared/src/CPUDetect.h | 160 ++++++++++--------------------- 1 file changed, 53 insertions(+), 107 deletions(-) diff --git a/libraries/shared/src/CPUDetect.h b/libraries/shared/src/CPUDetect.h index ea6d23d8d6..5f564c7a7f 100644 --- a/libraries/shared/src/CPUDetect.h +++ b/libraries/shared/src/CPUDetect.h @@ -2,8 +2,8 @@ // CPUDetect.h // libraries/shared/src // -// Created by Ken Cooke on 6/6/16. -// Copyright 2016 High Fidelity, Inc. +// Created by Ken Cooke on 6/16/17. +// Copyright 2017 High Fidelity, Inc. // // Distributed under the Apache License, Version 2.0. // See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html @@ -16,25 +16,62 @@ // Lightweight functions to detect SSE/AVX/AVX2 support // +#define MASK_SSE3 (1 << 0) // SSE3 +#define MASK_SSSE3 (1 << 9) // SSSE3 +#define MASK_SSE41 (1 << 19) // SSE4.1 +#define MASK_SSE42 ((1 << 20) | (1 << 23)) // SSE4.2 and POPCNT +#define MASK_OSXSAVE (1 << 27) // OSXSAVE +#define MASK_AVX ((1 << 27) | (1 << 28)) // OSXSAVE and AVX +#define MASK_AVX2 (1 << 5) // AVX2 + +#define MASK_XCR0_YMM ((1 << 1) | (1 << 2)) // XMM,YMM + #if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__x86_64__) #define ARCH_X86 #endif -#define MASK_SSE3 (1 << 0) // SSE3 -#define MASK_SSSE3 (1 << 9) // SSSE3 -#define MASK_SSE41 (1 << 19) // SSE4.1 -#define MASK_SSE42 ((1 << 20) | (1 << 23)) // SSE4.2 and POPCNT -#define MASK_AVX ((1 << 27) | (1 << 28)) // OSXSAVE and AVX -#define MASK_AVX2 (1 << 5) // AVX2 - #if defined(ARCH_X86) && defined(_MSC_VER) #include +// use MSVC intrinsics +#define cpuidex(info, eax, ecx) __cpuidex(info, eax, ecx) +#define xgetbv(ecx) _xgetbv(ecx) + +#elif defined(ARCH_X86) && defined(__GNUC__) + +#include + +// use GCC intrinics/asm +static inline void cpuidex(int info[4], int eax, int ecx) { + __cpuid_count(eax, ecx, info[0], info[1], info[2], info[3]); +} + +static inline unsigned long long xgetbv(unsigned int ecx){ + unsigned int eax, edx; + __asm__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(ecx)); + return ((unsigned long long)edx << 32) | eax; +} + +#else + +static inline void cpuidex(int info[4], int eax, int ecx) { + info[0] = 0; + info[1] = 0; + info[2] = 0; + info[3] = 0; +} + +static inline unsigned long long xgetbv(unsigned int ecx){ + return 0ULL; +} + +#endif + static inline bool cpuSupportsSSE3() { int info[4]; - __cpuidex(info, 0x1, 0); + cpuidex(info, 0x1, 0); return ((info[2] & MASK_SSE3) == MASK_SSE3); } @@ -42,7 +79,7 @@ static inline bool cpuSupportsSSE3() { static inline bool cpuSupportsSSSE3() { int info[4]; - __cpuidex(info, 0x1, 0); + cpuidex(info, 0x1, 0); return ((info[2] & MASK_SSSE3) == MASK_SSSE3); } @@ -50,7 +87,7 @@ static inline bool cpuSupportsSSSE3() { static inline bool cpuSupportsSSE41() { int info[4]; - __cpuidex(info, 0x1, 0); + cpuidex(info, 0x1, 0); return ((info[2] & MASK_SSE41) == MASK_SSE41); } @@ -58,7 +95,7 @@ static inline bool cpuSupportsSSE41() { static inline bool cpuSupportsSSE42() { int info[4]; - __cpuidex(info, 0x1, 0); + cpuidex(info, 0x1, 0); return ((info[2] & MASK_SSE42) == MASK_SSE42); } @@ -66,13 +103,13 @@ static inline bool cpuSupportsSSE42() { static inline bool cpuSupportsAVX() { int info[4]; - __cpuidex(info, 0x1, 0); + cpuidex(info, 0x1, 0); bool result = false; if ((info[2] & MASK_AVX) == MASK_AVX) { // verify OS support for YMM state - if ((_xgetbv(_XCR_XFEATURE_ENABLED_MASK) & 0x6) == 0x6) { + if ((xgetbv(0) & MASK_XCR0_YMM) == MASK_XCR0_YMM) { result = true; } } @@ -85,7 +122,7 @@ static inline bool cpuSupportsAVX2() { bool result = false; if (cpuSupportsAVX()) { - __cpuidex(info, 0x7, 0); + cpuidex(info, 0x7, 0); if ((info[1] & MASK_AVX2) == MASK_AVX2) { result = true; @@ -94,95 +131,4 @@ static inline bool cpuSupportsAVX2() { return result; } -#elif defined(ARCH_X86) && defined(__GNUC__) - -#include - -static inline bool cpuSupportsSSE3() { - unsigned int eax, ebx, ecx, edx; - - return __get_cpuid(0x1, &eax, &ebx, &ecx, &edx) && ((ecx & MASK_SSE3) == MASK_SSE3); -} - -static inline bool cpuSupportsSSSE3() { - unsigned int eax, ebx, ecx, edx; - - return __get_cpuid(0x1, &eax, &ebx, &ecx, &edx) && ((ecx & MASK_SSSE3) == MASK_SSSE3); -} - -static inline bool cpuSupportsSSE41() { - unsigned int eax, ebx, ecx, edx; - - return __get_cpuid(0x1, &eax, &ebx, &ecx, &edx) && ((ecx & MASK_SSE41) == MASK_SSE41); -} - -static inline bool cpuSupportsSSE42() { - unsigned int eax, ebx, ecx, edx; - - return __get_cpuid(0x1, &eax, &ebx, &ecx, &edx) && ((ecx & MASK_SSE42) == MASK_SSE42); -} - -static inline bool cpuSupportsAVX() { - unsigned int eax, ebx, ecx, edx; - - bool result = false; - if (__get_cpuid(0x1, &eax, &ebx, &ecx, &edx) && ((ecx & MASK_AVX) == MASK_AVX)) { - - // verify OS support for YMM state - __asm__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0)); - if ((eax & 0x6) == 0x6) { - result = true; - } - } - return result; -} - -static inline bool cpuSupportsAVX2() { - unsigned int eax, ebx, ecx, edx; - - bool result = false; - if (cpuSupportsAVX()) { - - // Work around a bug where __get_cpuid(0x7) returns wrong values on older GCC - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77756 - if (__get_cpuid(0x0, &eax, &ebx, &ecx, &edx) && (eax >= 0x7)) { - - __cpuid_count(0x7, 0x0, eax, ebx, ecx, edx); - - if ((ebx & MASK_AVX2) == MASK_AVX2) { - result = true; - } - } - } - return result; -} - -#else - -static inline bool cpuSupportsSSE3() { - return false; -} - -static inline bool cpuSupportsSSSE3() { - return false; -} - -static inline bool cpuSupportsSSE41() { - return false; -} - -static inline bool cpuSupportsSSE42() { - return false; -} - -static inline bool cpuSupportsAVX() { - return false; -} - -static inline bool cpuSupportsAVX2() { - return false; -} - -#endif - #endif // hifi_CPUDetect_h