From bbfdb0f7eb0fd8c213e40f9d78ad367d5a5eead9 Mon Sep 17 00:00:00 2001 From: Ken Cooke Date: Tue, 20 Jun 2017 16:06:05 -0700 Subject: [PATCH 1/9] Refactor CPU detection to use common subfunctions --- libraries/shared/src/CPUDetect.h | 160 ++++++++++--------------------- 1 file changed, 53 insertions(+), 107 deletions(-) diff --git a/libraries/shared/src/CPUDetect.h b/libraries/shared/src/CPUDetect.h index ea6d23d8d6..5f564c7a7f 100644 --- a/libraries/shared/src/CPUDetect.h +++ b/libraries/shared/src/CPUDetect.h @@ -2,8 +2,8 @@ // CPUDetect.h // libraries/shared/src // -// Created by Ken Cooke on 6/6/16. -// Copyright 2016 High Fidelity, Inc. +// Created by Ken Cooke on 6/16/17. +// Copyright 2017 High Fidelity, Inc. // // Distributed under the Apache License, Version 2.0. // See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html @@ -16,25 +16,62 @@ // Lightweight functions to detect SSE/AVX/AVX2 support // +#define MASK_SSE3 (1 << 0) // SSE3 +#define MASK_SSSE3 (1 << 9) // SSSE3 +#define MASK_SSE41 (1 << 19) // SSE4.1 +#define MASK_SSE42 ((1 << 20) | (1 << 23)) // SSE4.2 and POPCNT +#define MASK_OSXSAVE (1 << 27) // OSXSAVE +#define MASK_AVX ((1 << 27) | (1 << 28)) // OSXSAVE and AVX +#define MASK_AVX2 (1 << 5) // AVX2 + +#define MASK_XCR0_YMM ((1 << 1) | (1 << 2)) // XMM,YMM + #if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__x86_64__) #define ARCH_X86 #endif -#define MASK_SSE3 (1 << 0) // SSE3 -#define MASK_SSSE3 (1 << 9) // SSSE3 -#define MASK_SSE41 (1 << 19) // SSE4.1 -#define MASK_SSE42 ((1 << 20) | (1 << 23)) // SSE4.2 and POPCNT -#define MASK_AVX ((1 << 27) | (1 << 28)) // OSXSAVE and AVX -#define MASK_AVX2 (1 << 5) // AVX2 - #if defined(ARCH_X86) && defined(_MSC_VER) #include +// use MSVC intrinsics +#define cpuidex(info, eax, ecx) __cpuidex(info, eax, ecx) +#define xgetbv(ecx) _xgetbv(ecx) + +#elif defined(ARCH_X86) && defined(__GNUC__) + +#include + +// use GCC intrinics/asm +static inline void cpuidex(int info[4], int eax, int ecx) { + __cpuid_count(eax, ecx, info[0], info[1], info[2], info[3]); +} + +static inline unsigned long long xgetbv(unsigned int ecx){ + unsigned int eax, edx; + __asm__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(ecx)); + return ((unsigned long long)edx << 32) | eax; +} + +#else + +static inline void cpuidex(int info[4], int eax, int ecx) { + info[0] = 0; + info[1] = 0; + info[2] = 0; + info[3] = 0; +} + +static inline unsigned long long xgetbv(unsigned int ecx){ + return 0ULL; +} + +#endif + static inline bool cpuSupportsSSE3() { int info[4]; - __cpuidex(info, 0x1, 0); + cpuidex(info, 0x1, 0); return ((info[2] & MASK_SSE3) == MASK_SSE3); } @@ -42,7 +79,7 @@ static inline bool cpuSupportsSSE3() { static inline bool cpuSupportsSSSE3() { int info[4]; - __cpuidex(info, 0x1, 0); + cpuidex(info, 0x1, 0); return ((info[2] & MASK_SSSE3) == MASK_SSSE3); } @@ -50,7 +87,7 @@ static inline bool cpuSupportsSSSE3() { static inline bool cpuSupportsSSE41() { int info[4]; - __cpuidex(info, 0x1, 0); + cpuidex(info, 0x1, 0); return ((info[2] & MASK_SSE41) == MASK_SSE41); } @@ -58,7 +95,7 @@ static inline bool cpuSupportsSSE41() { static inline bool cpuSupportsSSE42() { int info[4]; - __cpuidex(info, 0x1, 0); + cpuidex(info, 0x1, 0); return ((info[2] & MASK_SSE42) == MASK_SSE42); } @@ -66,13 +103,13 @@ static inline bool cpuSupportsSSE42() { static inline bool cpuSupportsAVX() { int info[4]; - __cpuidex(info, 0x1, 0); + cpuidex(info, 0x1, 0); bool result = false; if ((info[2] & MASK_AVX) == MASK_AVX) { // verify OS support for YMM state - if ((_xgetbv(_XCR_XFEATURE_ENABLED_MASK) & 0x6) == 0x6) { + if ((xgetbv(0) & MASK_XCR0_YMM) == MASK_XCR0_YMM) { result = true; } } @@ -85,7 +122,7 @@ static inline bool cpuSupportsAVX2() { bool result = false; if (cpuSupportsAVX()) { - __cpuidex(info, 0x7, 0); + cpuidex(info, 0x7, 0); if ((info[1] & MASK_AVX2) == MASK_AVX2) { result = true; @@ -94,95 +131,4 @@ static inline bool cpuSupportsAVX2() { return result; } -#elif defined(ARCH_X86) && defined(__GNUC__) - -#include - -static inline bool cpuSupportsSSE3() { - unsigned int eax, ebx, ecx, edx; - - return __get_cpuid(0x1, &eax, &ebx, &ecx, &edx) && ((ecx & MASK_SSE3) == MASK_SSE3); -} - -static inline bool cpuSupportsSSSE3() { - unsigned int eax, ebx, ecx, edx; - - return __get_cpuid(0x1, &eax, &ebx, &ecx, &edx) && ((ecx & MASK_SSSE3) == MASK_SSSE3); -} - -static inline bool cpuSupportsSSE41() { - unsigned int eax, ebx, ecx, edx; - - return __get_cpuid(0x1, &eax, &ebx, &ecx, &edx) && ((ecx & MASK_SSE41) == MASK_SSE41); -} - -static inline bool cpuSupportsSSE42() { - unsigned int eax, ebx, ecx, edx; - - return __get_cpuid(0x1, &eax, &ebx, &ecx, &edx) && ((ecx & MASK_SSE42) == MASK_SSE42); -} - -static inline bool cpuSupportsAVX() { - unsigned int eax, ebx, ecx, edx; - - bool result = false; - if (__get_cpuid(0x1, &eax, &ebx, &ecx, &edx) && ((ecx & MASK_AVX) == MASK_AVX)) { - - // verify OS support for YMM state - __asm__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0)); - if ((eax & 0x6) == 0x6) { - result = true; - } - } - return result; -} - -static inline bool cpuSupportsAVX2() { - unsigned int eax, ebx, ecx, edx; - - bool result = false; - if (cpuSupportsAVX()) { - - // Work around a bug where __get_cpuid(0x7) returns wrong values on older GCC - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77756 - if (__get_cpuid(0x0, &eax, &ebx, &ecx, &edx) && (eax >= 0x7)) { - - __cpuid_count(0x7, 0x0, eax, ebx, ecx, edx); - - if ((ebx & MASK_AVX2) == MASK_AVX2) { - result = true; - } - } - } - return result; -} - -#else - -static inline bool cpuSupportsSSE3() { - return false; -} - -static inline bool cpuSupportsSSSE3() { - return false; -} - -static inline bool cpuSupportsSSE41() { - return false; -} - -static inline bool cpuSupportsSSE42() { - return false; -} - -static inline bool cpuSupportsAVX() { - return false; -} - -static inline bool cpuSupportsAVX2() { - return false; -} - -#endif - #endif // hifi_CPUDetect_h From b27b09ebc6bf326465eee932d6b04f9ba2be7a35 Mon Sep 17 00:00:00 2001 From: Ken Cooke Date: Tue, 20 Jun 2017 16:09:52 -0700 Subject: [PATCH 2/9] Detect support for AVX512 instructions --- libraries/shared/src/CPUDetect.h | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/libraries/shared/src/CPUDetect.h b/libraries/shared/src/CPUDetect.h index 5f564c7a7f..a2320dcdc1 100644 --- a/libraries/shared/src/CPUDetect.h +++ b/libraries/shared/src/CPUDetect.h @@ -13,7 +13,7 @@ #define hifi_CPUDetect_h // -// Lightweight functions to detect SSE/AVX/AVX2 support +// Lightweight functions to detect SSE/AVX/AVX2/AVX512 support // #define MASK_SSE3 (1 << 0) // SSE3 @@ -24,7 +24,10 @@ #define MASK_AVX ((1 << 27) | (1 << 28)) // OSXSAVE and AVX #define MASK_AVX2 (1 << 5) // AVX2 -#define MASK_XCR0_YMM ((1 << 1) | (1 << 2)) // XMM,YMM +#define MASK_AVX512 ((1 << 16) | (1 << 17) | (1 << 28) | (1 << 30) | (1 << 31)) // AVX512 F,DQ,CD,BW,VL (SKX) + +#define MASK_XCR0_YMM ((1 << 1) | (1 << 2)) // XMM,YMM +#define MASK_XCR0_ZMM ((1 << 1) | (1 << 2) | (7 << 5)) // XMM,YMM,ZMM #if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__x86_64__) #define ARCH_X86 @@ -131,4 +134,25 @@ static inline bool cpuSupportsAVX2() { return result; } +static inline bool cpuSupportsAVX512() { + int info[4]; + + cpuidex(info, 0x1, 0); + + bool result = false; + if ((info[2] & MASK_OSXSAVE) == MASK_OSXSAVE) { + + // verify OS support for ZMM state + if ((xgetbv(0) & MASK_XCR0_ZMM) == MASK_XCR0_ZMM) { + + cpuidex(info, 0x7, 0); + + if ((info[1] & MASK_AVX512) == MASK_AVX512) { + result = true; + } + } + } + return result; +} + #endif // hifi_CPUDetect_h From 1688ad5f0aedb499e454b0292da38d4bc0be8f4b Mon Sep 17 00:00:00 2001 From: Ken Cooke Date: Tue, 20 Jun 2017 17:09:38 -0700 Subject: [PATCH 3/9] HRTF optimized for AVX512 --- .../audio/src/avx512/AudioHRTF_avx512.cpp | 94 +++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 libraries/audio/src/avx512/AudioHRTF_avx512.cpp diff --git a/libraries/audio/src/avx512/AudioHRTF_avx512.cpp b/libraries/audio/src/avx512/AudioHRTF_avx512.cpp new file mode 100644 index 0000000000..08a1389e57 --- /dev/null +++ b/libraries/audio/src/avx512/AudioHRTF_avx512.cpp @@ -0,0 +1,94 @@ +// +// AudioHRTF_avx512.cpp +// libraries/audio/src +// +// Created by Ken Cooke on 6/20/17. +// Copyright 2017 High Fidelity, Inc. +// +// Distributed under the Apache License, Version 2.0. +// See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html +// + +#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__x86_64__) + +#include +#include + +#include "../AudioHRTF.h" + +#ifndef __AVX512F__ +#error Must be compiled with /arch:AVX512 or -mavx512f. +#endif + +#if defined(__GNUC__) && !defined(__clang__) +// for some reason, GCC -O2 results in poorly optimized code +#pragma GCC optimize("Os") +#endif + +// 1 channel input, 4 channel output +void FIR_1x4_AVX512(float* src, float* dst0, float* dst1, float* dst2, float* dst3, float coef[4][HRTF_TAPS], int numFrames) { + + float* coef0 = coef[0] + HRTF_TAPS - 1; // process backwards + float* coef1 = coef[1] + HRTF_TAPS - 1; + float* coef2 = coef[2] + HRTF_TAPS - 1; + float* coef3 = coef[3] + HRTF_TAPS - 1; + + assert(numFrames % 16 == 0); + + for (int i = 0; i < numFrames; i += 16) { + + __m512 acc0 = _mm512_setzero_ps(); + __m512 acc1 = _mm512_setzero_ps(); + __m512 acc2 = _mm512_setzero_ps(); + __m512 acc3 = _mm512_setzero_ps(); + __m512 acc4 = _mm512_setzero_ps(); + __m512 acc5 = _mm512_setzero_ps(); + __m512 acc6 = _mm512_setzero_ps(); + __m512 acc7 = _mm512_setzero_ps(); + + float* ps = &src[i - HRTF_TAPS + 1]; // process forwards + + assert(HRTF_TAPS % 4 == 0); + + for (int k = 0; k < HRTF_TAPS; k += 4) { + + __m512 x0 = _mm512_loadu_ps(&ps[k+0]); + acc0 = _mm512_fmadd_ps(_mm512_set1_ps(coef0[-k-0]), x0, acc0); // vfmadd231ps acc0, x0, dword ptr [coef]{1to16} + acc1 = _mm512_fmadd_ps(_mm512_set1_ps(coef1[-k-0]), x0, acc1); + acc2 = _mm512_fmadd_ps(_mm512_set1_ps(coef2[-k-0]), x0, acc2); + acc3 = _mm512_fmadd_ps(_mm512_set1_ps(coef3[-k-0]), x0, acc3); + + __m512 x1 = _mm512_loadu_ps(&ps[k+1]); + acc4 = _mm512_fmadd_ps(_mm512_set1_ps(coef0[-k-1]), x1, acc4); + acc5 = _mm512_fmadd_ps(_mm512_set1_ps(coef1[-k-1]), x1, acc5); + acc6 = _mm512_fmadd_ps(_mm512_set1_ps(coef2[-k-1]), x1, acc6); + acc7 = _mm512_fmadd_ps(_mm512_set1_ps(coef3[-k-1]), x1, acc7); + + __m512 x2 = _mm512_loadu_ps(&ps[k+2]); + acc0 = _mm512_fmadd_ps(_mm512_set1_ps(coef0[-k-2]), x2, acc0); + acc1 = _mm512_fmadd_ps(_mm512_set1_ps(coef1[-k-2]), x2, acc1); + acc2 = _mm512_fmadd_ps(_mm512_set1_ps(coef2[-k-2]), x2, acc2); + acc3 = _mm512_fmadd_ps(_mm512_set1_ps(coef3[-k-2]), x2, acc3); + + __m512 x3 = _mm512_loadu_ps(&ps[k+3]); + acc4 = _mm512_fmadd_ps(_mm512_set1_ps(coef0[-k-3]), x3, acc4); + acc5 = _mm512_fmadd_ps(_mm512_set1_ps(coef1[-k-3]), x3, acc5); + acc6 = _mm512_fmadd_ps(_mm512_set1_ps(coef2[-k-3]), x3, acc6); + acc7 = _mm512_fmadd_ps(_mm512_set1_ps(coef3[-k-3]), x3, acc7); + } + + acc0 = _mm512_add_ps(acc0, acc4); + acc1 = _mm512_add_ps(acc1, acc5); + acc2 = _mm512_add_ps(acc2, acc6); + acc3 = _mm512_add_ps(acc3, acc7); + + _mm512_storeu_ps(&dst0[i], acc0); + _mm512_storeu_ps(&dst1[i], acc1); + _mm512_storeu_ps(&dst2[i], acc2); + _mm512_storeu_ps(&dst3[i], acc3); + } + + _mm256_zeroupper(); +} + +#endif From 00c904c9279aefe1bdb04c44aab76979e0e0cd22 Mon Sep 17 00:00:00 2001 From: Ken Cooke Date: Wed, 21 Jun 2017 09:34:02 -0700 Subject: [PATCH 4/9] Detect compiler support for SIMD intrinsics in conditional compilation --- libraries/audio/src/avx2/AudioFOA_avx2.cpp | 8 ++------ libraries/audio/src/avx2/AudioHRTF_avx2.cpp | 8 ++------ libraries/audio/src/avx2/AudioSRC_avx2.cpp | 6 +----- libraries/audio/src/avx512/AudioHRTF_avx512.cpp | 6 +----- 4 files changed, 6 insertions(+), 22 deletions(-) diff --git a/libraries/audio/src/avx2/AudioFOA_avx2.cpp b/libraries/audio/src/avx2/AudioFOA_avx2.cpp index de5dfcd0b5..880f40b185 100644 --- a/libraries/audio/src/avx2/AudioFOA_avx2.cpp +++ b/libraries/audio/src/avx2/AudioFOA_avx2.cpp @@ -9,15 +9,11 @@ // See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html // -#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__x86_64__) +#ifdef __AVX2__ #include #include -#include // AVX2 - -#ifndef __AVX2__ -#error Must be compiled with /arch:AVX2 or -mavx2 -mfma. -#endif +#include #define _mm256_permute4x64_ps(ymm, imm) _mm256_castpd_ps(_mm256_permute4x64_pd(_mm256_castps_pd(ymm), imm)); diff --git a/libraries/audio/src/avx2/AudioHRTF_avx2.cpp b/libraries/audio/src/avx2/AudioHRTF_avx2.cpp index 452ceb7f4c..e89128b173 100644 --- a/libraries/audio/src/avx2/AudioHRTF_avx2.cpp +++ b/libraries/audio/src/avx2/AudioHRTF_avx2.cpp @@ -9,17 +9,13 @@ // See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html // -#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__x86_64__) +#ifdef __AVX2__ #include -#include // AVX2 +#include #include "../AudioHRTF.h" -#ifndef __AVX2__ -#error Must be compiled with /arch:AVX2 or -mavx2 -mfma. -#endif - #if defined(__GNUC__) && !defined(__clang__) // for some reason, GCC -O2 results in poorly optimized code #pragma GCC optimize("Os") diff --git a/libraries/audio/src/avx2/AudioSRC_avx2.cpp b/libraries/audio/src/avx2/AudioSRC_avx2.cpp index 693bad7fc6..0e31a58ce7 100644 --- a/libraries/audio/src/avx2/AudioSRC_avx2.cpp +++ b/libraries/audio/src/avx2/AudioSRC_avx2.cpp @@ -9,17 +9,13 @@ // See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html // -#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__x86_64__) +#ifdef __AVX2__ #include #include #include "../AudioSRC.h" -#ifndef __AVX2__ -#error Must be compiled with /arch:AVX2 or -mavx2 -mfma. -#endif - // high/low part of int64_t #define LO32(a) ((uint32_t)(a)) #define HI32(a) ((int32_t)((a) >> 32)) diff --git a/libraries/audio/src/avx512/AudioHRTF_avx512.cpp b/libraries/audio/src/avx512/AudioHRTF_avx512.cpp index 08a1389e57..a8bb62be35 100644 --- a/libraries/audio/src/avx512/AudioHRTF_avx512.cpp +++ b/libraries/audio/src/avx512/AudioHRTF_avx512.cpp @@ -9,17 +9,13 @@ // See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html // -#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__x86_64__) +#ifdef __AVX512F__ #include #include #include "../AudioHRTF.h" -#ifndef __AVX512F__ -#error Must be compiled with /arch:AVX512 or -mavx512f. -#endif - #if defined(__GNUC__) && !defined(__clang__) // for some reason, GCC -O2 results in poorly optimized code #pragma GCC optimize("Os") From b3bb91af8c7d1a98cbf70918f1d9d50fac27cb49 Mon Sep 17 00:00:00 2001 From: Ken Cooke Date: Wed, 21 Jun 2017 10:25:34 -0700 Subject: [PATCH 5/9] CMAKE: detect compiler support, add flags for AVX512 source files --- cmake/macros/SetupHifiLibrary.cmake | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/cmake/macros/SetupHifiLibrary.cmake b/cmake/macros/SetupHifiLibrary.cmake index e4a286cf3f..d0fc58af0c 100644 --- a/cmake/macros/SetupHifiLibrary.cmake +++ b/cmake/macros/SetupHifiLibrary.cmake @@ -35,6 +35,23 @@ macro(SETUP_HIFI_LIBRARY) endif() endforeach() + # add compiler flags to AVX512 source files, if supported by compiler + include(CheckCXXCompilerFlag) + file(GLOB_RECURSE AVX512_SRCS "src/avx512/*.cpp" "src/avx512/*.c") + foreach(SRC ${AVX512_SRCS}) + if (WIN32) + check_cxx_compiler_flag("/arch:AVX512" COMPILER_SUPPORTS_AVX512) + if (COMPILER_SUPPORTS_AVX512) + set_source_files_properties(${SRC} PROPERTIES COMPILE_FLAGS /arch:AVX512) + endif() + elseif (APPLE OR UNIX) + check_cxx_compiler_flag("-mavx512f" COMPILER_SUPPORTS_AVX512) + if (COMPILER_SUPPORTS_AVX512) + set_source_files_properties(${SRC} PROPERTIES COMPILE_FLAGS -mavx512f) + endif() + endif() + endforeach() + setup_memory_debugger() # create a library and set the property so it can be referenced later From afb26df2de6d0a7beb40a9dd4deb1aa1bd4ae748 Mon Sep 17 00:00:00 2001 From: Ken Cooke Date: Wed, 21 Jun 2017 11:26:53 -0700 Subject: [PATCH 6/9] Runtime dispatch for AVX512 optimized HRTF --- libraries/audio/src/AudioHRTF.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libraries/audio/src/AudioHRTF.cpp b/libraries/audio/src/AudioHRTF.cpp index 2a191b5821..1d5b074db7 100644 --- a/libraries/audio/src/AudioHRTF.cpp +++ b/libraries/audio/src/AudioHRTF.cpp @@ -239,10 +239,11 @@ static void FIR_1x4_SSE(float* src, float* dst0, float* dst1, float* dst2, float #include "CPUDetect.h" void FIR_1x4_AVX2(float* src, float* dst0, float* dst1, float* dst2, float* dst3, float coef[4][HRTF_TAPS], int numFrames); +void FIR_1x4_AVX512(float* src, float* dst0, float* dst1, float* dst2, float* dst3, float coef[4][HRTF_TAPS], int numFrames); static void FIR_1x4(float* src, float* dst0, float* dst1, float* dst2, float* dst3, float coef[4][HRTF_TAPS], int numFrames) { - static auto f = cpuSupportsAVX2() ? FIR_1x4_AVX2 : FIR_1x4_SSE; + static auto f = cpuSupportsAVX512() ? FIR_1x4_AVX512 : (cpuSupportsAVX2() ? FIR_1x4_AVX2 : FIR_1x4_SSE); (*f)(src, dst0, dst1, dst2, dst3, coef, numFrames); // dispatch } From 38c87b0f0800cbe2917e87b98d2ed25152e5130c Mon Sep 17 00:00:00 2001 From: Ken Cooke Date: Wed, 21 Jun 2017 13:19:46 -0700 Subject: [PATCH 7/9] Fallback code to allow compiling with VS2013 --- libraries/audio/src/avx512/AudioHRTF_avx512.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/libraries/audio/src/avx512/AudioHRTF_avx512.cpp b/libraries/audio/src/avx512/AudioHRTF_avx512.cpp index a8bb62be35..7b29d05e35 100644 --- a/libraries/audio/src/avx512/AudioHRTF_avx512.cpp +++ b/libraries/audio/src/avx512/AudioHRTF_avx512.cpp @@ -9,7 +9,7 @@ // See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html // -#ifdef __AVX512F__ +#if defined(zz__AVX512F__) #include #include @@ -87,4 +87,15 @@ void FIR_1x4_AVX512(float* src, float* dst0, float* dst1, float* dst2, float* ds _mm256_zeroupper(); } +// FIXME: this fallback can be removed, once we require VS2017 +#elif defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__x86_64__) + +#include "../AudioHRTF.h" + +void FIR_1x4_AVX2(float* src, float* dst0, float* dst1, float* dst2, float* dst3, float coef[4][HRTF_TAPS], int numFrames); + +void FIR_1x4_AVX512(float* src, float* dst0, float* dst1, float* dst2, float* dst3, float coef[4][HRTF_TAPS], int numFrames) { + FIR_1x4_AVX2(src, dst0, dst1, dst2, dst3, coef, numFrames); +} + #endif From 5d235f8c5252a52cb168843d25120011e8f0e7f4 Mon Sep 17 00:00:00 2001 From: Ken Cooke Date: Thu, 22 Jun 2017 06:32:32 -0700 Subject: [PATCH 8/9] Fix typo --- libraries/audio/src/avx512/AudioHRTF_avx512.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libraries/audio/src/avx512/AudioHRTF_avx512.cpp b/libraries/audio/src/avx512/AudioHRTF_avx512.cpp index 7b29d05e35..b1363aae45 100644 --- a/libraries/audio/src/avx512/AudioHRTF_avx512.cpp +++ b/libraries/audio/src/avx512/AudioHRTF_avx512.cpp @@ -9,7 +9,7 @@ // See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html // -#if defined(zz__AVX512F__) +#if defined(__AVX512F__) #include #include From af0f4606592b2d2a03ab388fd833d9fbef85ba61 Mon Sep 17 00:00:00 2001 From: Ken Cooke Date: Thu, 22 Jun 2017 06:38:13 -0700 Subject: [PATCH 9/9] Fix extra whitespace --- libraries/audio/src/avx512/AudioHRTF_avx512.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libraries/audio/src/avx512/AudioHRTF_avx512.cpp b/libraries/audio/src/avx512/AudioHRTF_avx512.cpp index b1363aae45..682f5f2f77 100644 --- a/libraries/audio/src/avx512/AudioHRTF_avx512.cpp +++ b/libraries/audio/src/avx512/AudioHRTF_avx512.cpp @@ -95,7 +95,7 @@ void FIR_1x4_AVX512(float* src, float* dst0, float* dst1, float* dst2, float* ds void FIR_1x4_AVX2(float* src, float* dst0, float* dst1, float* dst2, float* dst3, float coef[4][HRTF_TAPS], int numFrames); void FIR_1x4_AVX512(float* src, float* dst0, float* dst1, float* dst2, float* dst3, float coef[4][HRTF_TAPS], int numFrames) { - FIR_1x4_AVX2(src, dst0, dst1, dst2, dst3, coef, numFrames); + FIR_1x4_AVX2(src, dst0, dst1, dst2, dst3, coef, numFrames); } #endif