diff --git a/libraries/audio/src/AudioFOA.cpp b/libraries/audio/src/AudioFOA.cpp index dc4e74869f..718b29d1b2 100644 --- a/libraries/audio/src/AudioFOA.cpp +++ b/libraries/audio/src/AudioFOA.cpp @@ -694,17 +694,19 @@ static void ifft_radix8_first(complex_t* x, complex_t* y, int n, int p) { // n >= 4 static void rfft_post(complex_t* x, const complex_t* w, int n) { + size_t t = n/4; + assert(t >= 1); // NOTE: x[n/2].re is packed into x[0].im - float t = x[0].re; - x[0].re = t + x[0].im; - x[0].im = t - x[0].im; + float tr = x[0].re; + float ti = x[0].im; + x[0].re = tr + ti; + x[0].im = tr - ti; complex_t* xp0 = &x[1]; complex_t* xp1 = &x[n/2 - 1]; - assert(n/4 >= 1); - for (size_t i = 0; i < n/4; i++) { + for (size_t i = 0; i < t; i++) { float ar = xp0[i].re; float ai = xp0[i].im; @@ -740,16 +742,19 @@ static void rfft_post(complex_t* x, const complex_t* w, int n) { // n >= 4 static void rifft_pre(complex_t* x, const complex_t* w, int n) { + size_t t = n/4; + assert(t >= 1); + // NOTE: x[n/2].re is packed into x[0].im - float t = x[0].re; - x[0].re = 0.5f * (t + x[0].im); // halved for ifft - x[0].im = 0.5f * (t - x[0].im); // halved for ifft + float tr = x[0].re; + float ti = x[0].im; + x[0].re = 0.5f * (tr + ti); // halved for ifft + x[0].im = 0.5f * (tr - ti); // halved for ifft complex_t* xp0 = &x[1]; complex_t* xp1 = &x[n/2 - 1]; - assert(n/4 >= 1); - for (size_t i = 0; i < n/4; i++) { + for (size_t i = 0; i < t; i++) { float ar = xp0[i].re; float ai = xp0[i].im; diff --git a/libraries/audio/src/avx2/AudioFOA_avx2.cpp b/libraries/audio/src/avx2/AudioFOA_avx2.cpp index 48fff19588..de5dfcd0b5 100644 --- a/libraries/audio/src/avx2/AudioFOA_avx2.cpp +++ b/libraries/audio/src/avx2/AudioFOA_avx2.cpp @@ -977,16 +977,19 @@ FORCEINLINE static void ifft_radix8_first(complex_t* x, complex_t* y, int n, int // n >= 32 static void rfft_post(complex_t* x, const complex_t* w, int n) { + size_t t = n/4; + assert(n/4 >= 8); // SIMD8 + // NOTE: x[n/2].re is packed into x[0].im - float t = x[0].re; - x[0].re = t + x[0].im; - x[0].im = t - x[0].im; + float tr = x[0].re; + float ti = x[0].im; + x[0].re = tr + ti; + x[0].im = tr - ti; complex_t* xp0 = &x[1]; complex_t* xp1 = &x[n/2 - 8]; - assert(n/4 >= 8); // SIMD8 - for (size_t i = 0; i < n/4; i += 8) { + for (size_t i = 0; i < t; i += 8) { __m256 z0 = _mm256_loadu_ps(&xp0[i+0].re); __m256 z1 = _mm256_loadu_ps(&xp0[i+4].re); @@ -1034,16 +1037,19 @@ static void rfft_post(complex_t* x, const complex_t* w, int n) { // n >= 32 static void rifft_pre(complex_t* x, const complex_t* w, int n) { + size_t t = n/4; + assert(n/4 >= 8); // SIMD8 + // NOTE: x[n/2].re is packed into x[0].im - float t = x[0].re; - x[0].re = 0.5f * (t + x[0].im); // halved for ifft - x[0].im = 0.5f * (t - x[0].im); // halved for ifft + float tr = x[0].re; + float ti = x[0].im; + x[0].re = 0.5f * (tr + ti); // halved for ifft + x[0].im = 0.5f * (tr - ti); // halved for ifft complex_t* xp0 = &x[1]; complex_t* xp1 = &x[n/2 - 8]; - assert(n/4 >= 8); // SIMD8 - for (size_t i = 0; i < n/4; i += 8) { + for (size_t i = 0; i < t; i += 8) { __m256 z0 = _mm256_loadu_ps(&xp0[i+0].re); __m256 z1 = _mm256_loadu_ps(&xp0[i+4].re);