// // AudioFOA.cpp // libraries/audio/src // // Created by Ken Cooke on 10/28/16. // Copyright 2016 High Fidelity, Inc. // // Distributed under the Apache License, Version 2.0. // See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html // #include "AudioFOA.h" #include #include #include "AudioFOAData.h" #if defined(_MSC_VER) #define ALIGN32 __declspec(align(32)) #elif defined(__GNUC__) #define ALIGN32 __attribute__((aligned(32))) #else #define ALIGN32 #endif // // Vectorized FFT based on radix 2/4/8 Stockham auto-sort // scalar reference version // typedef struct { float re; float im; } complex_t; static const float SQRT1_2 = 0.707106781f; // 1/sqrt(2) static const complex_t w_fft_256_0[] = { { 1.000000000e+00f, 0.000000000e+00f }, { 9.238795042e-01f, 3.826834261e-01f }, { 7.071067691e-01f, 7.071067691e-01f }, { 3.826834261e-01f, 9.238795042e-01f }, { 0.000000000e+00f, 1.000000000e+00f }, {-3.826834261e-01f, 9.238795042e-01f }, {-7.071067691e-01f, 7.071067691e-01f }, {-9.238795042e-01f, 3.826834261e-01f }, }; static const complex_t w_fft_256_1[] = { { 1.000000000e+00f, 0.000000000e+00f }, { 1.000000000e+00f, 0.000000000e+00f }, { 1.000000000e+00f, 0.000000000e+00f }, { 9.951847196e-01f, 9.801714122e-02f }, { 9.807852507e-01f, 1.950903237e-01f }, { 9.569403529e-01f, 2.902846634e-01f }, { 9.807852507e-01f, 1.950903237e-01f }, { 9.238795042e-01f, 3.826834261e-01f }, { 8.314695954e-01f, 5.555702448e-01f }, { 9.569403529e-01f, 2.902846634e-01f }, { 8.314695954e-01f, 5.555702448e-01f }, { 6.343932748e-01f, 7.730104327e-01f }, { 9.238795042e-01f, 3.826834261e-01f }, { 7.071067691e-01f, 7.071067691e-01f }, { 3.826834261e-01f, 9.238795042e-01f }, { 8.819212914e-01f, 4.713967443e-01f }, { 5.555702448e-01f, 8.314695954e-01f }, { 9.801714122e-02f, 9.951847196e-01f }, { 8.314695954e-01f, 5.555702448e-01f }, { 3.826834261e-01f, 9.238795042e-01f }, {-1.950903237e-01f, 9.807852507e-01f }, { 7.730104327e-01f, 6.343932748e-01f }, { 1.950903237e-01f, 9.807852507e-01f }, {-4.713967443e-01f, 8.819212914e-01f }, { 7.071067691e-01f, 7.071067691e-01f }, { 0.000000000e+00f, 1.000000000e+00f }, {-7.071067691e-01f, 7.071067691e-01f }, { 6.343932748e-01f, 7.730104327e-01f }, {-1.950903237e-01f, 9.807852507e-01f }, {-8.819212914e-01f, 4.713967443e-01f }, { 5.555702448e-01f, 8.314695954e-01f }, {-3.826834261e-01f, 9.238795042e-01f }, {-9.807852507e-01f, 1.950903237e-01f }, { 4.713967443e-01f, 8.819212914e-01f }, {-5.555702448e-01f, 8.314695954e-01f }, {-9.951847196e-01f,-9.801714122e-02f }, { 3.826834261e-01f, 9.238795042e-01f }, {-7.071067691e-01f, 7.071067691e-01f }, {-9.238795042e-01f,-3.826834261e-01f }, { 2.902846634e-01f, 9.569403529e-01f }, {-8.314695954e-01f, 5.555702448e-01f }, {-7.730104327e-01f,-6.343932748e-01f }, { 1.950903237e-01f, 9.807852507e-01f }, {-9.238795042e-01f, 3.826834261e-01f }, {-5.555702448e-01f,-8.314695954e-01f }, { 9.801714122e-02f, 9.951847196e-01f }, {-9.807852507e-01f, 1.950903237e-01f }, {-2.902846634e-01f,-9.569403529e-01f }, }; static const complex_t w_fft_256_2[] = { { 1.000000000e+00f, 0.000000000e+00f }, { 1.000000000e+00f, 0.000000000e+00f }, { 1.000000000e+00f, 0.000000000e+00f }, { 9.996988177e-01f, 2.454122901e-02f }, { 9.987954497e-01f, 4.906767607e-02f }, { 9.972904325e-01f, 7.356456667e-02f }, { 9.987954497e-01f, 4.906767607e-02f }, { 9.951847196e-01f, 9.801714122e-02f }, { 9.891765118e-01f, 1.467304677e-01f }, { 9.972904325e-01f, 7.356456667e-02f }, { 9.891765118e-01f, 1.467304677e-01f }, { 9.757021070e-01f, 2.191012353e-01f }, { 9.951847196e-01f, 9.801714122e-02f }, { 9.807852507e-01f, 1.950903237e-01f }, { 9.569403529e-01f, 2.902846634e-01f }, { 9.924795628e-01f, 1.224106774e-01f }, { 9.700312614e-01f, 2.429801822e-01f }, { 9.329928160e-01f, 3.598950505e-01f }, { 9.891765118e-01f, 1.467304677e-01f }, { 9.569403529e-01f, 2.902846634e-01f }, { 9.039893150e-01f, 4.275550842e-01f }, { 9.852776527e-01f, 1.709618866e-01f }, { 9.415440559e-01f, 3.368898630e-01f }, { 8.700869679e-01f, 4.928981960e-01f }, { 9.807852507e-01f, 1.950903237e-01f }, { 9.238795042e-01f, 3.826834261e-01f }, { 8.314695954e-01f, 5.555702448e-01f }, { 9.757021070e-01f, 2.191012353e-01f }, { 9.039893150e-01f, 4.275550842e-01f }, { 7.883464098e-01f, 6.152315736e-01f }, { 9.700312614e-01f, 2.429801822e-01f }, { 8.819212914e-01f, 4.713967443e-01f }, { 7.409511209e-01f, 6.715589762e-01f }, { 9.637760520e-01f, 2.667127550e-01f }, { 8.577286005e-01f, 5.141027570e-01f }, { 6.895405650e-01f, 7.242470980e-01f }, { 9.569403529e-01f, 2.902846634e-01f }, { 8.314695954e-01f, 5.555702448e-01f }, { 6.343932748e-01f, 7.730104327e-01f }, { 9.495281577e-01f, 3.136817515e-01f }, { 8.032075167e-01f, 5.956993103e-01f }, { 5.758081675e-01f, 8.175848126e-01f }, { 9.415440559e-01f, 3.368898630e-01f }, { 7.730104327e-01f, 6.343932748e-01f }, { 5.141027570e-01f, 8.577286005e-01f }, { 9.329928160e-01f, 3.598950505e-01f }, { 7.409511209e-01f, 6.715589762e-01f }, { 4.496113360e-01f, 8.932242990e-01f }, { 9.238795042e-01f, 3.826834261e-01f }, { 7.071067691e-01f, 7.071067691e-01f }, { 3.826834261e-01f, 9.238795042e-01f }, { 9.142097831e-01f, 4.052413106e-01f }, { 6.715589762e-01f, 7.409511209e-01f }, { 3.136817515e-01f, 9.495281577e-01f }, { 9.039893150e-01f, 4.275550842e-01f }, { 6.343932748e-01f, 7.730104327e-01f }, { 2.429801822e-01f, 9.700312614e-01f }, { 8.932242990e-01f, 4.496113360e-01f }, { 5.956993103e-01f, 8.032075167e-01f }, { 1.709618866e-01f, 9.852776527e-01f }, { 8.819212914e-01f, 4.713967443e-01f }, { 5.555702448e-01f, 8.314695954e-01f }, { 9.801714122e-02f, 9.951847196e-01f }, { 8.700869679e-01f, 4.928981960e-01f }, { 5.141027570e-01f, 8.577286005e-01f }, { 2.454122901e-02f, 9.996988177e-01f }, { 8.577286005e-01f, 5.141027570e-01f }, { 4.713967443e-01f, 8.819212914e-01f }, {-4.906767607e-02f, 9.987954497e-01f }, { 8.448535800e-01f, 5.349976420e-01f }, { 4.275550842e-01f, 9.039893150e-01f }, {-1.224106774e-01f, 9.924795628e-01f }, { 8.314695954e-01f, 5.555702448e-01f }, { 3.826834261e-01f, 9.238795042e-01f }, {-1.950903237e-01f, 9.807852507e-01f }, { 8.175848126e-01f, 5.758081675e-01f }, { 3.368898630e-01f, 9.415440559e-01f }, {-2.667127550e-01f, 9.637760520e-01f }, { 8.032075167e-01f, 5.956993103e-01f }, { 2.902846634e-01f, 9.569403529e-01f }, {-3.368898630e-01f, 9.415440559e-01f }, { 7.883464098e-01f, 6.152315736e-01f }, { 2.429801822e-01f, 9.700312614e-01f }, {-4.052413106e-01f, 9.142097831e-01f }, { 7.730104327e-01f, 6.343932748e-01f }, { 1.950903237e-01f, 9.807852507e-01f }, {-4.713967443e-01f, 8.819212914e-01f }, { 7.572088242e-01f, 6.531728506e-01f }, { 1.467304677e-01f, 9.891765118e-01f }, {-5.349976420e-01f, 8.448535800e-01f }, { 7.409511209e-01f, 6.715589762e-01f }, { 9.801714122e-02f, 9.951847196e-01f }, {-5.956993103e-01f, 8.032075167e-01f }, { 7.242470980e-01f, 6.895405650e-01f }, { 4.906767607e-02f, 9.987954497e-01f }, {-6.531728506e-01f, 7.572088242e-01f }, { 7.071067691e-01f, 7.071067691e-01f }, { 0.000000000e+00f, 1.000000000e+00f }, {-7.071067691e-01f, 7.071067691e-01f }, { 6.895405650e-01f, 7.242470980e-01f }, {-4.906767607e-02f, 9.987954497e-01f }, {-7.572088242e-01f, 6.531728506e-01f }, { 6.715589762e-01f, 7.409511209e-01f }, {-9.801714122e-02f, 9.951847196e-01f }, {-8.032075167e-01f, 5.956993103e-01f }, { 6.531728506e-01f, 7.572088242e-01f }, {-1.467304677e-01f, 9.891765118e-01f }, {-8.448535800e-01f, 5.349976420e-01f }, { 6.343932748e-01f, 7.730104327e-01f }, {-1.950903237e-01f, 9.807852507e-01f }, {-8.819212914e-01f, 4.713967443e-01f }, { 6.152315736e-01f, 7.883464098e-01f }, {-2.429801822e-01f, 9.700312614e-01f }, {-9.142097831e-01f, 4.052413106e-01f }, { 5.956993103e-01f, 8.032075167e-01f }, {-2.902846634e-01f, 9.569403529e-01f }, {-9.415440559e-01f, 3.368898630e-01f }, { 5.758081675e-01f, 8.175848126e-01f }, {-3.368898630e-01f, 9.415440559e-01f }, {-9.637760520e-01f, 2.667127550e-01f }, { 5.555702448e-01f, 8.314695954e-01f }, {-3.826834261e-01f, 9.238795042e-01f }, {-9.807852507e-01f, 1.950903237e-01f }, { 5.349976420e-01f, 8.448535800e-01f }, {-4.275550842e-01f, 9.039893150e-01f }, {-9.924795628e-01f, 1.224106774e-01f }, { 5.141027570e-01f, 8.577286005e-01f }, {-4.713967443e-01f, 8.819212914e-01f }, {-9.987954497e-01f, 4.906767607e-02f }, { 4.928981960e-01f, 8.700869679e-01f }, {-5.141027570e-01f, 8.577286005e-01f }, {-9.996988177e-01f,-2.454122901e-02f }, { 4.713967443e-01f, 8.819212914e-01f }, {-5.555702448e-01f, 8.314695954e-01f }, {-9.951847196e-01f,-9.801714122e-02f }, { 4.496113360e-01f, 8.932242990e-01f }, {-5.956993103e-01f, 8.032075167e-01f }, {-9.852776527e-01f,-1.709618866e-01f }, { 4.275550842e-01f, 9.039893150e-01f }, {-6.343932748e-01f, 7.730104327e-01f }, {-9.700312614e-01f,-2.429801822e-01f }, { 4.052413106e-01f, 9.142097831e-01f }, {-6.715589762e-01f, 7.409511209e-01f }, {-9.495281577e-01f,-3.136817515e-01f }, { 3.826834261e-01f, 9.238795042e-01f }, {-7.071067691e-01f, 7.071067691e-01f }, {-9.238795042e-01f,-3.826834261e-01f }, { 3.598950505e-01f, 9.329928160e-01f }, {-7.409511209e-01f, 6.715589762e-01f }, {-8.932242990e-01f,-4.496113360e-01f }, { 3.368898630e-01f, 9.415440559e-01f }, {-7.730104327e-01f, 6.343932748e-01f }, {-8.577286005e-01f,-5.141027570e-01f }, { 3.136817515e-01f, 9.495281577e-01f }, {-8.032075167e-01f, 5.956993103e-01f }, {-8.175848126e-01f,-5.758081675e-01f }, { 2.902846634e-01f, 9.569403529e-01f }, {-8.314695954e-01f, 5.555702448e-01f }, {-7.730104327e-01f,-6.343932748e-01f }, { 2.667127550e-01f, 9.637760520e-01f }, {-8.577286005e-01f, 5.141027570e-01f }, {-7.242470980e-01f,-6.895405650e-01f }, { 2.429801822e-01f, 9.700312614e-01f }, {-8.819212914e-01f, 4.713967443e-01f }, {-6.715589762e-01f,-7.409511209e-01f }, { 2.191012353e-01f, 9.757021070e-01f }, {-9.039893150e-01f, 4.275550842e-01f }, {-6.152315736e-01f,-7.883464098e-01f }, { 1.950903237e-01f, 9.807852507e-01f }, {-9.238795042e-01f, 3.826834261e-01f }, {-5.555702448e-01f,-8.314695954e-01f }, { 1.709618866e-01f, 9.852776527e-01f }, {-9.415440559e-01f, 3.368898630e-01f }, {-4.928981960e-01f,-8.700869679e-01f }, { 1.467304677e-01f, 9.891765118e-01f }, {-9.569403529e-01f, 2.902846634e-01f }, {-4.275550842e-01f,-9.039893150e-01f }, { 1.224106774e-01f, 9.924795628e-01f }, {-9.700312614e-01f, 2.429801822e-01f }, {-3.598950505e-01f,-9.329928160e-01f }, { 9.801714122e-02f, 9.951847196e-01f }, {-9.807852507e-01f, 1.950903237e-01f }, {-2.902846634e-01f,-9.569403529e-01f }, { 7.356456667e-02f, 9.972904325e-01f }, {-9.891765118e-01f, 1.467304677e-01f }, {-2.191012353e-01f,-9.757021070e-01f }, { 4.906767607e-02f, 9.987954497e-01f }, {-9.951847196e-01f, 9.801714122e-02f }, {-1.467304677e-01f,-9.891765118e-01f }, { 2.454122901e-02f, 9.996988177e-01f }, {-9.987954497e-01f, 4.906767607e-02f }, {-7.356456667e-02f,-9.972904325e-01f }, }; static complex_t w_rfft_512[] = { {-4.999623597e-01f, 4.938642383e-01f }, {-4.998494089e-01f, 4.877293706e-01f }, {-4.996611774e-01f, 4.815963805e-01f }, {-4.993977249e-01f, 4.754661620e-01f }, {-4.990590513e-01f, 4.693396389e-01f }, {-4.986452162e-01f, 4.632177055e-01f }, {-4.981563091e-01f, 4.571013451e-01f }, {-4.975923598e-01f, 4.509914219e-01f }, {-4.969534874e-01f, 4.448888898e-01f }, {-4.962397814e-01f, 4.387946725e-01f }, {-4.954513311e-01f, 4.327096343e-01f }, {-4.945882559e-01f, 4.266347587e-01f }, {-4.936507046e-01f, 4.205709100e-01f }, {-4.926388264e-01f, 4.145190716e-01f }, {-4.915527403e-01f, 4.084800482e-01f }, {-4.903926253e-01f, 4.024548531e-01f }, {-4.891586900e-01f, 3.964443207e-01f }, {-4.878510535e-01f, 3.904493749e-01f }, {-4.864699841e-01f, 3.844709396e-01f }, {-4.850156307e-01f, 3.785099089e-01f }, {-4.834882319e-01f, 3.725671768e-01f }, {-4.818880260e-01f, 3.666436076e-01f }, {-4.802152514e-01f, 3.607401550e-01f }, {-4.784701765e-01f, 3.548576832e-01f }, {-4.766530097e-01f, 3.489970267e-01f }, {-4.747640789e-01f, 3.431591392e-01f }, {-4.728036523e-01f, 3.373448551e-01f }, {-4.707720280e-01f, 3.315550685e-01f }, {-4.686695039e-01f, 3.257906437e-01f }, {-4.664964080e-01f, 3.200524747e-01f }, {-4.642530382e-01f, 3.143413961e-01f }, {-4.619397521e-01f, 3.086583018e-01f }, {-4.595569372e-01f, 3.030039668e-01f }, {-4.571048915e-01f, 2.973793447e-01f }, {-4.545840025e-01f, 2.917852402e-01f }, {-4.519946575e-01f, 2.862224579e-01f }, {-4.493372440e-01f, 2.806918621e-01f }, {-4.466121495e-01f, 2.751943469e-01f }, {-4.438198209e-01f, 2.697306275e-01f }, {-4.409606457e-01f, 2.643016279e-01f }, {-4.380350411e-01f, 2.589080930e-01f }, {-4.350434840e-01f, 2.535508871e-01f }, {-4.319864213e-01f, 2.482308149e-01f }, {-4.288643003e-01f, 2.429486215e-01f }, {-4.256775975e-01f, 2.377051711e-01f }, {-4.224267900e-01f, 2.325011790e-01f }, {-4.191123545e-01f, 2.273375094e-01f }, {-4.157347977e-01f, 2.222148776e-01f }, {-4.122946560e-01f, 2.171340883e-01f }, {-4.087924063e-01f, 2.120959163e-01f }, {-4.052285850e-01f, 2.071010768e-01f }, {-4.016037583e-01f, 2.021503448e-01f }, {-3.979184628e-01f, 1.972444654e-01f }, {-3.941732049e-01f, 1.923842132e-01f }, {-3.903686106e-01f, 1.875702441e-01f }, {-3.865052164e-01f, 1.828033626e-01f }, {-3.825836182e-01f, 1.780842245e-01f }, {-3.786044121e-01f, 1.734135747e-01f }, {-3.745681942e-01f, 1.687920988e-01f }, {-3.704755604e-01f, 1.642205119e-01f }, {-3.663271368e-01f, 1.596994996e-01f }, {-3.621235490e-01f, 1.552297175e-01f }, {-3.578654230e-01f, 1.508118808e-01f }, {-3.535533845e-01f, 1.464466155e-01f }, {-3.491881192e-01f, 1.421345770e-01f }, {-3.447702825e-01f, 1.378764510e-01f }, {-3.403005004e-01f, 1.336728632e-01f }, {-3.357794881e-01f, 1.295244396e-01f }, {-3.312079012e-01f, 1.254318058e-01f }, {-3.265864253e-01f, 1.213955879e-01f }, {-3.219157755e-01f, 1.174163818e-01f }, {-3.171966374e-01f, 1.134947836e-01f }, {-3.124297559e-01f, 1.096313894e-01f }, {-3.076157868e-01f, 1.058267951e-01f }, {-3.027555346e-01f, 1.020815372e-01f }, {-2.978496552e-01f, 9.839624166e-02f }, {-2.928989232e-01f, 9.477141500e-02f }, {-2.879040837e-01f, 9.120759368e-02f }, {-2.828659117e-01f, 8.770534396e-02f }, {-2.777851224e-01f, 8.426520228e-02f }, {-2.726624906e-01f, 8.088764548e-02f }, {-2.674988210e-01f, 7.757321000e-02f }, {-2.622948289e-01f, 7.432240248e-02f }, {-2.570513785e-01f, 7.113569975e-02f }, {-2.517691851e-01f, 6.801357865e-02f }, {-2.464490980e-01f, 6.495651603e-02f }, {-2.410918921e-01f, 6.196495891e-02f }, {-2.356983721e-01f, 5.903935432e-02f }, {-2.302693576e-01f, 5.618017912e-02f }, {-2.248056680e-01f, 5.338785052e-02f }, {-2.193081230e-01f, 5.066275597e-02f }, {-2.137775421e-01f, 4.800534248e-02f }, {-2.082147747e-01f, 4.541599751e-02f }, {-2.026206553e-01f, 4.289510846e-02f }, {-1.969960183e-01f, 4.044306278e-02f }, {-1.913417131e-01f, 3.806024790e-02f }, {-1.856586039e-01f, 3.574696183e-02f }, {-1.799475253e-01f, 3.350359201e-02f }, {-1.742093414e-01f, 3.133049607e-02f }, {-1.684449315e-01f, 2.922797203e-02f }, {-1.626551449e-01f, 2.719634771e-02f }, {-1.568408757e-01f, 2.523592114e-02f }, {-1.510029733e-01f, 2.334699035e-02f }, {-1.451423317e-01f, 2.152982354e-02f }, {-1.392598450e-01f, 1.978474855e-02f }, {-1.333563775e-01f, 1.811197400e-02f }, {-1.274328232e-01f, 1.651176810e-02f }, {-1.214900911e-01f, 1.498436928e-02f }, {-1.155290529e-01f, 1.353001595e-02f }, {-1.095506176e-01f, 1.214894652e-02f }, {-1.035556868e-01f, 1.084131002e-02f }, {-9.754516184e-02f, 9.607374668e-03f }, {-9.151994437e-02f, 8.447259665e-03f }, {-8.548094332e-02f, 7.361173630e-03f }, {-7.942907512e-02f, 6.349295378e-03f }, {-7.336523384e-02f, 5.411744118e-03f }, {-6.729035079e-02f, 4.548668861e-03f }, {-6.120533869e-02f, 3.760218620e-03f }, {-5.511110276e-02f, 3.046512604e-03f }, {-4.900857061e-02f, 2.407640219e-03f }, {-4.289865494e-02f, 1.843690872e-03f }, {-3.678228334e-02f, 1.354783773e-03f }, {-3.066036850e-02f, 9.409487247e-04f }, {-2.453383803e-02f, 6.022751331e-04f }, {-1.840361208e-02f, 3.388226032e-04f }, {-1.227061450e-02f, 1.505911350e-04f }, {-6.135769188e-03f, 3.764033318e-05f }, {-0.000000000e+00f, 0.000000000e+00f }, }; // x[n] in interleaved order // y[n] in interleaved order // n >= 2 static void fft_radix2_pass(complex_t* x, complex_t* y, const complex_t* w, int n, int p) { size_t t = n/2; assert(n >= 1); assert(p >= 1); complex_t* xp0 = &x[0*t]; complex_t* xp1 = &x[1*t]; complex_t* yp0 = &y[0*p]; complex_t* yp1 = &y[1*p]; for (size_t i = 0; i < t; i++) { size_t k = i & (p-1); // twiddle index size_t j = 2*(i-k) + k; // output index float ar0 = xp0[i].re; float ai0 = xp0[i].im; float ar1 = xp1[i].re; float ai1 = xp1[i].im; float wr1 = w[k].re; float wi1 = w[k].im; float br1 = ar1 * wr1; float bi1 = ai1 * wr1; br1 += ai1 * wi1; bi1 -= ar1 * wi1; float cr0 = ar0 + br1; float ci0 = ai0 + bi1; float cr1 = ar0 - br1; float ci1 = ai0 - bi1; yp0[j].re = cr0; yp0[j].im = ci0; yp1[j].re = cr1; yp1[j].im = ci1; } } // x[n] in interleaved order // y[n] in interleaved order // n >= 4 static void fft_radix4_pass(complex_t* x, complex_t* y, const complex_t* w, int n, int p) { size_t t = n/4; assert(n >= 1); assert(p >= 1); complex_t* xp0 = &x[0*t]; complex_t* xp1 = &x[1*t]; complex_t* xp2 = &x[2*t]; complex_t* xp3 = &x[3*t]; complex_t* yp0 = &y[0*p]; complex_t* yp1 = &y[1*p]; complex_t* yp2 = &y[2*p]; complex_t* yp3 = &y[3*p]; for (size_t i = 0; i < t; i++) { size_t k = i & (p-1); // twiddle index size_t j = 4*(i-k) + k; // output index float ar1 = xp1[i].re; float ai1 = xp1[i].im; float ar2 = xp2[i].re; float ai2 = xp2[i].im; float ar3 = xp3[i].re; float ai3 = xp3[i].im; float wr1 = w[3*k+0].re; float wi1 = w[3*k+0].im; float wr2 = w[3*k+1].re; float wi2 = w[3*k+1].im; float wr3 = w[3*k+2].re; float wi3 = w[3*k+2].im; float br1 = ar1 * wr1; float bi1 = ai1 * wr1; float br2 = ar2 * wr2; float bi2 = ai2 * wr2; float br3 = ar3 * wr3; float bi3 = ai3 * wr3; br1 += ai1 * wi1; bi1 -= ar1 * wi1; br2 += ai2 * wi2; bi2 -= ar2 * wi2; br3 += ai3 * wi3; bi3 -= ar3 * wi3; float ar0 = xp0[i].re; float ai0 = xp0[i].im; float cr0 = ar0 + br2; float ci0 = ai0 + bi2; float cr2 = ar0 - br2; float ci2 = ai0 - bi2; float cr1 = br1 + br3; float ci1 = bi1 + bi3; float cr3 = br1 - br3; float ci3 = bi1 - bi3; float dr0 = cr0 + cr1; float di0 = ci0 + ci1; float dr1 = cr2 + ci3; float di1 = ci2 - cr3; float dr2 = cr0 - cr1; float di2 = ci0 - ci1; float dr3 = cr2 - ci3; float di3 = ci2 + cr3; yp0[j].re = dr0; yp0[j].im = di0; yp1[j].re = dr1; yp1[j].im = di1; yp2[j].re = dr2; yp2[j].im = di2; yp3[j].re = dr3; yp3[j].im = di3; } } static auto& fft_radix4_last = fft_radix4_pass; // x[n] in interleaved order // y[n] in interleaved order // n >= 4 static void ifft_radix4_last(complex_t* x, complex_t* y, const complex_t* w, int n, int p) { size_t t = n/4; assert(n >= 1); assert(p >= 1); complex_t* xp0 = &x[0*t]; complex_t* xp1 = &x[1*t]; complex_t* xp2 = &x[2*t]; complex_t* xp3 = &x[3*t]; complex_t* yp0 = &y[0*p]; complex_t* yp1 = &y[1*p]; complex_t* yp2 = &y[2*p]; complex_t* yp3 = &y[3*p]; for (size_t i = 0; i < t; i++) { size_t k = i & (p-1); // twiddle index size_t j = 4*(i-k) + k; // output index float ar1 = xp1[i].re; float ai1 = xp1[i].im; float ar2 = xp2[i].re; float ai2 = xp2[i].im; float ar3 = xp3[i].re; float ai3 = xp3[i].im; float wr1 = w[3*k+0].re; float wi1 = w[3*k+0].im; float wr2 = w[3*k+1].re; float wi2 = w[3*k+1].im; float wr3 = w[3*k+2].re; float wi3 = w[3*k+2].im; float br1 = ar1 * wr1; float bi1 = ai1 * wr1; float br2 = ar2 * wr2; float bi2 = ai2 * wr2; float br3 = ar3 * wr3; float bi3 = ai3 * wr3; br1 += ai1 * wi1; bi1 -= ar1 * wi1; br2 += ai2 * wi2; bi2 -= ar2 * wi2; br3 += ai3 * wi3; bi3 -= ar3 * wi3; float ar0 = xp0[i].re; float ai0 = xp0[i].im; float cr0 = ar0 + br2; float ci0 = ai0 + bi2; float cr2 = ar0 - br2; float ci2 = ai0 - bi2; float cr1 = br1 + br3; float ci1 = bi1 + bi3; float cr3 = br1 - br3; float ci3 = bi1 - bi3; float dr0 = cr0 + cr1; float di0 = ci0 + ci1; float dr1 = cr2 + ci3; float di1 = ci2 - cr3; float dr2 = cr0 - cr1; float di2 = ci0 - ci1; float dr3 = cr2 - ci3; float di3 = ci2 + cr3; float scale = 1.0f/n; // scaled by 1/n for ifft dr0 *= scale; di0 *= scale; dr1 *= scale; di1 *= scale; dr2 *= scale; di2 *= scale; dr3 *= scale; di3 *= scale; yp0[j].re = di0; // swapped re,im for ifft yp0[j].im = dr0; yp1[j].re = di1; yp1[j].im = dr1; yp2[j].re = di2; yp2[j].im = dr2; yp3[j].re = di3; yp3[j].im = dr3; } } // x[n] in interleaved order // y[n] in interleaved order // n >= 8 static void fft_radix8_first(complex_t* x, complex_t* y, int n, int p) { size_t t = n/8; assert(t >= 1); assert(p == 1); complex_t* xp0 = &x[0*t]; complex_t* xp1 = &x[1*t]; complex_t* xp2 = &x[2*t]; complex_t* xp3 = &x[3*t]; complex_t* xp4 = &x[4*t]; complex_t* xp5 = &x[5*t]; complex_t* xp6 = &x[6*t]; complex_t* xp7 = &x[7*t]; for (size_t i = 0; i < t; i++) { // // even // float ar0 = xp0[i].re; float ai0 = xp0[i].im; float ar2 = xp2[i].re; float ai2 = xp2[i].im; float ar4 = xp4[i].re; float ai4 = xp4[i].im; float ar6 = xp6[i].re; float ai6 = xp6[i].im; float cr0 = ar0 + ar4; float ci0 = ai0 + ai4; float cr2 = ar2 + ar6; float ci2 = ai2 + ai6; float cr4 = ar0 - ar4; float ci4 = ai0 - ai4; float cr6 = ar2 - ar6; float ci6 = ai2 - ai6; float dr0 = cr0 + cr2; float di0 = ci0 + ci2; float dr2 = cr0 - cr2; float di2 = ci0 - ci2; float dr4 = cr4 + ci6; float di4 = ci4 - cr6; float dr6 = cr4 - ci6; float di6 = ci4 + cr6; // // odd // float ar1 = xp1[i].re; float ai1 = xp1[i].im; float ar3 = xp3[i].re; float ai3 = xp3[i].im; float ar5 = xp5[i].re; float ai5 = xp5[i].im; float ar7 = xp7[i].re; float ai7 = xp7[i].im; float br1 = ar1 + ar5; float bi1 = ai1 + ai5; float br3 = ar3 + ar7; float bi3 = ai3 + ai7; float br5 = ar1 - ar5; float bi5 = ai1 - ai5; float br7 = ar3 - ar7; float bi7 = ai3 - ai7; float cr1 = br1; float ci1 = bi1; float cr3 = br3; float ci3 = bi3; float cr5 = bi5 + br5; float ci5 = bi5 - br5; float cr7 = bi7 - br7; float ci7 = bi7 + br7; cr5 *= SQRT1_2; ci5 *= SQRT1_2; cr7 *= SQRT1_2; ci7 *= SQRT1_2; float dr1 = cr1 + cr3; float di1 = ci1 + ci3; float dr3 = cr1 - cr3; float di3 = ci1 - ci3; float dr5 = cr5 + cr7; float di5 = ci5 - ci7; float dr7 = cr5 - cr7; float di7 = ci5 + ci7; // // merge // float er0 = dr0 + dr1; float ei0 = di0 + di1; float er1 = dr4 + dr5; float ei1 = di4 + di5; float er2 = dr2 + di3; float ei2 = di2 - dr3; float er3 = dr6 + di7; float ei3 = di6 - dr7; float er4 = dr0 - dr1; float ei4 = di0 - di1; float er5 = dr4 - dr5; float ei5 = di4 - di5; float er6 = dr2 - di3; float ei6 = di2 + dr3; float er7 = dr6 - di7; float ei7 = di6 + dr7; y[8*i+0].re = er0; y[8*i+0].im = ei0; y[8*i+1].re = er1; y[8*i+1].im = ei1; y[8*i+2].re = er2; y[8*i+2].im = ei2; y[8*i+3].re = er3; y[8*i+3].im = ei3; y[8*i+4].re = er4; y[8*i+4].im = ei4; y[8*i+5].re = er5; y[8*i+5].im = ei5; y[8*i+6].re = er6; y[8*i+6].im = ei6; y[8*i+7].re = er7; y[8*i+7].im = ei7; } } // x[n] in interleaved order // y[n] in interleaved order // n >= 8 static void ifft_radix8_first(complex_t* x, complex_t* y, int n, int p) { size_t t = n/8; assert(t >= 1); assert(p == 1); complex_t* xp0 = &x[0*t]; complex_t* xp1 = &x[1*t]; complex_t* xp2 = &x[2*t]; complex_t* xp3 = &x[3*t]; complex_t* xp4 = &x[4*t]; complex_t* xp5 = &x[5*t]; complex_t* xp6 = &x[6*t]; complex_t* xp7 = &x[7*t]; for (size_t i = 0; i < t; i++) { // // even // float ar0 = xp0[i].im; // swapped re,im for ifft float ai0 = xp0[i].re; float ar2 = xp2[i].im; float ai2 = xp2[i].re; float ar4 = xp4[i].im; float ai4 = xp4[i].re; float ar6 = xp6[i].im; float ai6 = xp6[i].re; float cr0 = ar0 + ar4; float ci0 = ai0 + ai4; float cr2 = ar2 + ar6; float ci2 = ai2 + ai6; float cr4 = ar0 - ar4; float ci4 = ai0 - ai4; float cr6 = ar2 - ar6; float ci6 = ai2 - ai6; float dr0 = cr0 + cr2; float di0 = ci0 + ci2; float dr2 = cr0 - cr2; float di2 = ci0 - ci2; float dr4 = cr4 + ci6; float di4 = ci4 - cr6; float dr6 = cr4 - ci6; float di6 = ci4 + cr6; // // odd // float ar1 = xp1[i].im; // swapped re,im for ifft float ai1 = xp1[i].re; float ar3 = xp3[i].im; float ai3 = xp3[i].re; float ar5 = xp5[i].im; float ai5 = xp5[i].re; float ar7 = xp7[i].im; float ai7 = xp7[i].re; float br1 = ar1 + ar5; float bi1 = ai1 + ai5; float br3 = ar3 + ar7; float bi3 = ai3 + ai7; float br5 = ar1 - ar5; float bi5 = ai1 - ai5; float br7 = ar3 - ar7; float bi7 = ai3 - ai7; float cr1 = br1; float ci1 = bi1; float cr3 = br3; float ci3 = bi3; float cr5 = bi5 + br5; float ci5 = bi5 - br5; float cr7 = bi7 - br7; float ci7 = bi7 + br7; cr5 *= SQRT1_2; ci5 *= SQRT1_2; cr7 *= SQRT1_2; ci7 *= SQRT1_2; float dr1 = cr1 + cr3; float di1 = ci1 + ci3; float dr3 = cr1 - cr3; float di3 = ci1 - ci3; float dr5 = cr5 + cr7; float di5 = ci5 - ci7; float dr7 = cr5 - cr7; float di7 = ci5 + ci7; // // merge // float er0 = dr0 + dr1; float ei0 = di0 + di1; float er1 = dr4 + dr5; float ei1 = di4 + di5; float er2 = dr2 + di3; float ei2 = di2 - dr3; float er3 = dr6 + di7; float ei3 = di6 - dr7; float er4 = dr0 - dr1; float ei4 = di0 - di1; float er5 = dr4 - dr5; float ei5 = di4 - di5; float er6 = dr2 - di3; float ei6 = di2 + dr3; float er7 = dr6 - di7; float ei7 = di6 + dr7; y[8*i+0].re = er0; y[8*i+0].im = ei0; y[8*i+1].re = er1; y[8*i+1].im = ei1; y[8*i+2].re = er2; y[8*i+2].im = ei2; y[8*i+3].re = er3; y[8*i+3].im = ei3; y[8*i+4].re = er4; y[8*i+4].im = ei4; y[8*i+5].re = er5; y[8*i+5].im = ei5; y[8*i+6].re = er6; y[8*i+6].im = ei6; y[8*i+7].re = er7; y[8*i+7].im = ei7; } } // x[n] in interleaved order // n >= 4 static void rfft_post(complex_t* x, const complex_t* w, int n) { size_t t = n/4; assert(t >= 1); // NOTE: x[n/2].re is packed into x[0].im float tr = x[0].re; float ti = x[0].im; x[0].re = tr + ti; x[0].im = tr - ti; complex_t* xp0 = &x[1]; complex_t* xp1 = &x[n/2 - 1]; for (size_t i = 0; i < t; i++) { float ar = xp0[i].re; float ai = xp0[i].im; float br = xp1[0-i].re; float bi = xp1[0-i].im; float wr = w[i].re; float wi = w[i].im; float cr = ar - br; float ci = ai + bi; float dr = cr * wr; float di = ci * wr; dr += ci * wi; di -= cr * wi; ar = ar + di; ai = dr - ai; br = br - di; bi = dr - bi; xp0[i].re = br; xp0[i].im = bi; xp1[0-i].re = ar; xp1[0-i].im = ai; } } // x[n] in interleaved order // n >= 4 static void rifft_pre(complex_t* x, const complex_t* w, int n) { size_t t = n/4; assert(t >= 1); // NOTE: x[n/2].re is packed into x[0].im float tr = x[0].re; float ti = x[0].im; x[0].re = 0.5f * (tr + ti); // halved for ifft x[0].im = 0.5f * (tr - ti); // halved for ifft complex_t* xp0 = &x[1]; complex_t* xp1 = &x[n/2 - 1]; for (size_t i = 0; i < t; i++) { float ar = xp0[i].re; float ai = xp0[i].im; float br = xp1[0-i].re; float bi = xp1[0-i].im; float wr = w[i].re; // negated for ifft float wi = w[i].im; float cr = br - ar; float ci = ai + bi; float dr = cr * wr; float di = cr * wi; dr += ci * wi; di -= ci * wr; ar = ar + di; ai = dr - ai; br = br - di; bi = dr - bi; xp0[i].re = br; xp0[i].im = bi; xp1[0-i].re = ar; xp1[0-i].im = ai; } } // in-place complex FFT // buf[n] in interleaved order static void fft256_ref(float buf[512]) { complex_t* x = (complex_t*)buf; complex_t y[256]; fft_radix8_first(x, y, 256, 1); fft_radix2_pass(y, x, w_fft_256_0, 256, 8); fft_radix4_pass(x, y, w_fft_256_1, 256, 16); fft_radix4_last(y, x, w_fft_256_2, 256, 64); } // in-place complex IFFT // buf[n] in interleaved order static void ifft256_ref(float buf[512]) { complex_t* x = (complex_t*)buf; complex_t y[256]; ifft_radix8_first(x, y, 256, 1); fft_radix2_pass(y, x, w_fft_256_0, 256, 8); fft_radix4_pass(x, y, w_fft_256_1, 256, 16); ifft_radix4_last(y, x, w_fft_256_2, 256, 64); } // in-place real FFT // buf[n] in interleaved order static void rfft512_ref(float buf[512]) { fft256_ref(buf); // half size complex rfft_post((complex_t*)buf, w_rfft_512, 512); } // in-place real IFFT // buf[n] in interleaved order static void rifft512_ref(float buf[512]) { rifft_pre((complex_t*)buf, w_rfft_512, 512); ifft256_ref(buf); // half size complex } // fft-domain complex multiply-add, for packed complex-conjugate symmetric // 1 channel input, 2 channel output static void rfft512_cmadd_1X2_ref(const float src[512], const float coef0[512], const float coef1[512], float dst0[512], float dst1[512]) { // NOTE: x[n/2].re is packed into x[0].im dst0[0] += src[0] * coef0[0]; // first bin is real dst0[1] += src[1] * coef0[1]; // last bin is real dst1[0] += src[0] * coef1[0]; // first bin is real dst1[1] += src[1] * coef1[1]; // last bin is real for (int i = 1; i < 512/2; i++) { dst0[2*i+0] += src[2*i+0] * coef0[2*i+0] - src[2*i+1] * coef0[2*i+1]; // re dst0[2*i+1] += src[2*i+0] * coef0[2*i+1] + src[2*i+1] * coef0[2*i+0]; // im dst1[2*i+0] += src[2*i+0] * coef1[2*i+0] - src[2*i+1] * coef1[2*i+1]; // re dst1[2*i+1] += src[2*i+0] * coef1[2*i+1] + src[2*i+1] * coef1[2*i+0]; // im } } #ifdef FOA_INPUT_FUMA // input is FuMa (B-format) channel order and normalization // convert to deinterleaved float (B-format) static void convertInput_ref(int16_t* src, float *dst[4], float gain, int numFrames) { const float scale = gain * (1/32768.0f); for (int i = 0; i < numFrames; i++) { dst[0][i] = (float)src[4*i+0] * scale; // W dst[1][i] = (float)src[4*i+1] * scale; // X dst[2][i] = (float)src[4*i+2] * scale; // Y dst[3][i] = (float)src[4*i+3] * scale; // Z } } #else // input is ambiX (ACN/SN3D) channel order and normalization // convert to deinterleaved float (B-format) static void convertInput_ref(int16_t* src, float *dst[4], float gain, int numFrames) { const float scaleW = gain * (1/32768.0f) * SQRT1_2; // -3dB const float scale = gain * (1/32768.0f); for (int i = 0; i < numFrames; i++) { dst[0][i] = (float)src[4*i+0] * scaleW; // W dst[2][i] = (float)src[4*i+1] * scale; // Y dst[3][i] = (float)src[4*i+2] * scale; // Z dst[1][i] = (float)src[4*i+3] * scale; // X } } #endif // in-place rotation of the soundfield // crossfade between old and new rotation, to prevent artifacts static void rotate_3x3_ref(float* buf[4], const float m0[3][3], const float m1[3][3], const float* win, int numFrames) { const float md[3][3] = { { m0[0][0] - m1[0][0], m0[0][1] - m1[0][1], m0[0][2] - m1[0][2] }, { m0[1][0] - m1[1][0], m0[1][1] - m1[1][1], m0[1][2] - m1[1][2] }, { m0[2][0] - m1[2][0], m0[2][1] - m1[2][1], m0[2][2] - m1[2][2] }, }; for (int i = 0; i < numFrames; i++) { float frac = win[i]; // interpolate the matrix float m00 = m1[0][0] + frac * md[0][0]; float m10 = m1[1][0] + frac * md[1][0]; float m20 = m1[2][0] + frac * md[2][0]; float m01 = m1[0][1] + frac * md[0][1]; float m11 = m1[1][1] + frac * md[1][1]; float m21 = m1[2][1] + frac * md[2][1]; float m02 = m1[0][2] + frac * md[0][2]; float m12 = m1[1][2] + frac * md[1][2]; float m22 = m1[2][2] + frac * md[2][2]; // matrix multiply float x = m00 * buf[1][i] + m01 * buf[2][i] + m02 * buf[3][i]; float y = m10 * buf[1][i] + m11 * buf[2][i] + m12 * buf[3][i]; float z = m20 * buf[1][i] + m21 * buf[2][i] + m22 * buf[3][i]; buf[1][i] = x; buf[2][i] = y; buf[3][i] = z; } } #if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__x86_64__) // // Runtime CPU dispatch // #include "CPUDetect.h" void rfft512_AVX2(float buf[512]); void rifft512_AVX2(float buf[512]); void rfft512_cmadd_1X2_AVX2(const float src[512], const float coef0[512], const float coef1[512], float dst0[512], float dst1[512]); void convertInput_AVX2(int16_t* src, float *dst[4], float gain, int numFrames); void rotate_3x3_AVX2(float* buf[4], const float m0[3][3], const float m1[3][3], const float* win, int numFrames); static void rfft512(float buf[512]) { static auto f = cpuSupportsAVX2() ? rfft512_AVX2 : rfft512_ref; (*f)(buf); // dispatch } static void rifft512(float buf[512]) { static auto f = cpuSupportsAVX2() ? rifft512_AVX2 : rifft512_ref; (*f)(buf); // dispatch } static void rfft512_cmadd_1X2(const float src[512], const float coef0[512], const float coef1[512], float dst0[512], float dst1[512]) { static auto f = cpuSupportsAVX2() ? rfft512_cmadd_1X2_AVX2 : rfft512_cmadd_1X2_ref; (*f)(src, coef0, coef1, dst0, dst1); // dispatch } static void convertInput(int16_t* src, float *dst[4], float gain, int numFrames) { static auto f = cpuSupportsAVX2() ? convertInput_AVX2 : convertInput_ref; (*f)(src, dst, gain, numFrames); // dispatch } static void rotate_3x3(float* buf[4], const float m0[3][3], const float m1[3][3], const float* win, int numFrames) { static auto f = cpuSupportsAVX2() ? rotate_3x3_AVX2 : rotate_3x3_ref; (*f)(buf, m0, m1, win, numFrames); // dispatch } #else // portable reference code static auto& rfft512 = rfft512_ref; static auto& rifft512 = rifft512_ref; static auto& rfft512_cmadd_1X2 = rfft512_cmadd_1X2_ref; static auto& convertInput = convertInput_ref; static auto& rotate_3x3 = rotate_3x3_ref; #endif // // Equal-gain crossfade // // Cos(x)^2 window minimizes the modulation sidebands when a pure tone is panned. // ALIGN32 static const float crossfadeTable[FOA_BLOCK] = { 0.9999571638f, 0.9998286625f, 0.9996145181f, 0.9993147674f, 0.9989294616f, 0.9984586669f, 0.9979024638f, 0.9972609477f, 0.9965342285f, 0.9957224307f, 0.9948256934f, 0.9938441703f, 0.9927780295f, 0.9916274538f, 0.9903926402f, 0.9890738004f, 0.9876711603f, 0.9861849602f, 0.9846154549f, 0.9829629131f, 0.9812276182f, 0.9794098674f, 0.9775099722f, 0.9755282581f, 0.9734650647f, 0.9713207455f, 0.9690956680f, 0.9667902132f, 0.9644047764f, 0.9619397663f, 0.9593956051f, 0.9567727288f, 0.9540715869f, 0.9512926422f, 0.9484363708f, 0.9455032621f, 0.9424938187f, 0.9394085563f, 0.9362480035f, 0.9330127019f, 0.9297032058f, 0.9263200822f, 0.9228639109f, 0.9193352840f, 0.9157348062f, 0.9120630943f, 0.9083207776f, 0.9045084972f, 0.9006269063f, 0.8966766701f, 0.8926584654f, 0.8885729807f, 0.8844209160f, 0.8802029828f, 0.8759199037f, 0.8715724127f, 0.8671612547f, 0.8626871855f, 0.8581509717f, 0.8535533906f, 0.8488952299f, 0.8441772878f, 0.8394003728f, 0.8345653032f, 0.8296729076f, 0.8247240242f, 0.8197195010f, 0.8146601955f, 0.8095469747f, 0.8043807145f, 0.7991623003f, 0.7938926261f, 0.7885725950f, 0.7832031185f, 0.7777851165f, 0.7723195175f, 0.7668072580f, 0.7612492824f, 0.7556465430f, 0.7500000000f, 0.7443106207f, 0.7385793801f, 0.7328072602f, 0.7269952499f, 0.7211443451f, 0.7152555484f, 0.7093298688f, 0.7033683215f, 0.6973719282f, 0.6913417162f, 0.6852787188f, 0.6791839748f, 0.6730585285f, 0.6669034296f, 0.6607197327f, 0.6545084972f, 0.6482707875f, 0.6420076724f, 0.6357202249f, 0.6294095226f, 0.6230766465f, 0.6167226819f, 0.6103487175f, 0.6039558454f, 0.5975451610f, 0.5911177627f, 0.5846747519f, 0.5782172325f, 0.5717463110f, 0.5652630961f, 0.5587686987f, 0.5522642316f, 0.5457508093f, 0.5392295479f, 0.5327015646f, 0.5261679781f, 0.5196299079f, 0.5130884742f, 0.5065447978f, 0.5000000000f, 0.4934552022f, 0.4869115258f, 0.4803700921f, 0.4738320219f, 0.4672984354f, 0.4607704521f, 0.4542491907f, 0.4477357684f, 0.4412313013f, 0.4347369039f, 0.4282536890f, 0.4217827675f, 0.4153252481f, 0.4088822373f, 0.4024548390f, 0.3960441546f, 0.3896512825f, 0.3832773181f, 0.3769233535f, 0.3705904774f, 0.3642797751f, 0.3579923276f, 0.3517292125f, 0.3454915028f, 0.3392802673f, 0.3330965704f, 0.3269414715f, 0.3208160252f, 0.3147212812f, 0.3086582838f, 0.3026280718f, 0.2966316785f, 0.2906701312f, 0.2847444516f, 0.2788556549f, 0.2730047501f, 0.2671927398f, 0.2614206199f, 0.2556893793f, 0.2500000000f, 0.2443534570f, 0.2387507176f, 0.2331927420f, 0.2276804825f, 0.2222148835f, 0.2167968815f, 0.2114274050f, 0.2061073739f, 0.2008376997f, 0.1956192855f, 0.1904530253f, 0.1853398045f, 0.1802804990f, 0.1752759758f, 0.1703270924f, 0.1654346968f, 0.1605996272f, 0.1558227122f, 0.1511047701f, 0.1464466094f, 0.1418490283f, 0.1373128145f, 0.1328387453f, 0.1284275873f, 0.1240800963f, 0.1197970172f, 0.1155790840f, 0.1114270193f, 0.1073415346f, 0.1033233299f, 0.0993730937f, 0.0954915028f, 0.0916792224f, 0.0879369057f, 0.0842651938f, 0.0806647160f, 0.0771360891f, 0.0736799178f, 0.0702967942f, 0.0669872981f, 0.0637519965f, 0.0605914437f, 0.0575061813f, 0.0544967379f, 0.0515636292f, 0.0487073578f, 0.0459284131f, 0.0432272712f, 0.0406043949f, 0.0380602337f, 0.0355952236f, 0.0332097868f, 0.0309043320f, 0.0286792545f, 0.0265349353f, 0.0244717419f, 0.0224900278f, 0.0205901326f, 0.0187723818f, 0.0170370869f, 0.0153845451f, 0.0138150398f, 0.0123288397f, 0.0109261996f, 0.0096073598f, 0.0083725462f, 0.0072219705f, 0.0061558297f, 0.0051743066f, 0.0042775693f, 0.0034657715f, 0.0027390523f, 0.0020975362f, 0.0015413331f, 0.0010705384f, 0.0006852326f, 0.0003854819f, 0.0001713375f, 0.0000428362f, 0.0000000000f, }; // convert quaternion to a column-major 3x3 rotation matrix static void quatToMatrix_3x3(float w, float x, float y, float z, float m[3][3]) { float xx = x * (x + x); float xy = x * (y + y); float xz = x * (z + z); float yy = y * (y + y); float yz = y * (z + z); float zz = z * (z + z); float wx = w * (x + x); float wy = w * (y + y); float wz = w * (z + z); m[0][0] = 1.0f - (yy + zz); m[0][1] = xy - wz; m[0][2] = xz + wy; m[1][0] = xy + wz; m[1][1] = 1.0f - (xx + zz); m[1][2] = yz - wx; m[2][0] = xz - wy; m[2][1] = yz + wx; m[2][2] = 1.0f - (xx + yy); } // Ambisonic to binaural render void AudioFOA::render(int16_t* input, float* output, int index, float qw, float qx, float qy, float qz, float gain, int numFrames) { assert(index >= 0); assert(index < FOA_TABLES); assert(numFrames == FOA_BLOCK); ALIGN32 float fftBuffer[FOA_NFFT]; // in-place FFT buffer ALIGN32 float accBuffer[2][FOA_NFFT] = {}; // binaural accumulation buffers ALIGN32 float inBuffer[4][FOA_BLOCK]; // deinterleaved input buffers float* in[4] = { inBuffer[0], inBuffer[1], inBuffer[2], inBuffer[3] }; float rotation[3][3]; // convert input to deinterleaved float convertInput(input, in, FOA_GAIN * gain, FOA_BLOCK); // convert quaternion to 3x3 rotation quatToMatrix_3x3(qw, qx, qy, qz, rotation); // rotate the soundfield rotate_3x3(in, _rotationState, rotation, crossfadeTable, FOA_BLOCK); // rotation history update memcpy(_rotationState, rotation, sizeof(_rotationState)); // // Accumulate the contribution of each spherical harmonic into binaural output buffers. // Performed in the frequency domain, using overlap-save FFT. // for (int n = 0; n < 4; n++) { // fill with overlapped input memcpy(fftBuffer, _fftState[n], FOA_OVERLAP * sizeof(float)); // old input memcpy(&fftBuffer[FOA_OVERLAP], in[n], FOA_BLOCK * sizeof(float)); // new input // input history update memcpy(_fftState[n], &fftBuffer[FOA_BLOCK], FOA_OVERLAP * sizeof(float)); // forward transform rfft512(fftBuffer); // multiply-accumulate with filter kernels rfft512_cmadd_1X2(fftBuffer, foa_table_table[index][n][0], foa_table_table[index][n][1], accBuffer[0], accBuffer[1]); } // inverse transform rifft512(accBuffer[0]); rifft512(accBuffer[1]); // // Mix into the interleaved output buffer. // The first FOA_OVERLAP samples are discarded, due to overlap-save FFT. // for (int i = 0; i < FOA_BLOCK; i++) { output[2*i+0] += accBuffer[0][i + FOA_OVERLAP]; output[2*i+1] += accBuffer[1][i + FOA_OVERLAP]; } }