diff --git a/tests/shared/src/BlendshapePackingTests.cpp b/tests/shared/src/BlendshapePackingTests.cpp new file mode 100644 index 0000000000..a751a5ca02 --- /dev/null +++ b/tests/shared/src/BlendshapePackingTests.cpp @@ -0,0 +1,148 @@ +// +// BlendshapePackingTests.cpp +// tests/shared/src +// +// Created by Ken Cooke on 6/24/19. +// Copyright 2019 High Fidelity, Inc. +// +// Distributed under the Apache License, Version 2.0. +// See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html +// + +#include "BlendshapePackingTests.h" + +#include + +#include + +#include +#include + +struct BlendshapeOffsetUnpacked { + glm::vec3 positionOffset; + glm::vec3 normalOffset; + glm::vec3 tangentOffset; +}; + +struct BlendshapeOffsetPacked { + glm::uvec4 packedPosNorTan; +}; + +QTEST_MAIN(BlendshapePackingTests) + +static void packBlendshapeOffsetTo_Pos_F32_3xSN10_Nor_3xSN10_Tan_3xSN10(glm::uvec4& packed, const BlendshapeOffsetUnpacked& unpacked) { + float len = glm::compMax(glm::abs(unpacked.positionOffset)); + glm::vec3 normalizedPos(unpacked.positionOffset); + if (len > 0.0f) { + normalizedPos /= len; + } else { + len = 1.0f; + } + + packed = glm::uvec4( + glm::floatBitsToUint(len), + glm_packSnorm3x10_1x2(glm::vec4(normalizedPos, 0.0f)), + glm_packSnorm3x10_1x2(glm::vec4(unpacked.normalOffset, 0.0f)), + glm_packSnorm3x10_1x2(glm::vec4(unpacked.tangentOffset, 0.0f)) + ); +} + +static void packBlendshapeOffsets_ref(BlendshapeOffsetUnpacked* unpacked, BlendshapeOffsetPacked* packed, int size) { + for (int i = 0; i < size; ++i) { + packBlendshapeOffsetTo_Pos_F32_3xSN10_Nor_3xSN10_Tan_3xSN10((*packed).packedPosNorTan, (*unpacked)); + ++unpacked; + ++packed; + } +} + +#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__x86_64__) +// +// Runtime CPU dispatch +// +#include + +void packBlendshapeOffsets_AVX2(float (*unpacked)[9], uint32_t (*packed)[4], int size); + +static void packBlendshapeOffsets(BlendshapeOffsetUnpacked* unpacked, BlendshapeOffsetPacked* packed, int size) { + static bool _cpuSupportsAVX2 = cpuSupportsAVX2(); + if (_cpuSupportsAVX2) { + static_assert(sizeof(BlendshapeOffsetUnpacked) == 9 * sizeof(float), "struct BlendshapeOffsetUnpacked size doesn't match."); + static_assert(sizeof(BlendshapeOffsetPacked) == 4 * sizeof(uint32_t), "struct BlendshapeOffsetPacked size doesn't match."); + packBlendshapeOffsets_AVX2((float(*)[9])unpacked, (uint32_t(*)[4])packed, size); + } else { + packBlendshapeOffsets_ref(unpacked, packed, size); + } +} + +#else // portable reference code +static auto& packBlendshapeOffsets = packBlendshapeOffsets_ref; +#endif + +void comparePacked(BlendshapeOffsetPacked& ref, BlendshapeOffsetPacked& tst) { + union i10i10i10i2 { + struct { + int x : 10; + int y : 10; + int z : 10; + int w : 2; + } data; + uint32_t pack; + } Ref[4], Tst[4]; + + for (int i = 0; i < 4; i++) { + Ref[i].pack = ref.packedPosNorTan[i]; + Tst[i].pack = tst.packedPosNorTan[i]; + } + + // allow 1 ULP due to rounding differences + QCOMPARE_WITH_ABS_ERROR(Tst[0].pack, Ref[0].pack, 1); + + QCOMPARE_WITH_ABS_ERROR(Tst[1].data.x, Ref[1].data.x, 1); + QCOMPARE_WITH_ABS_ERROR(Tst[1].data.y, Ref[1].data.y, 1); + QCOMPARE_WITH_ABS_ERROR(Tst[1].data.z, Ref[1].data.z, 1); + + QCOMPARE_WITH_ABS_ERROR(Tst[2].data.x, Ref[2].data.x, 1); + QCOMPARE_WITH_ABS_ERROR(Tst[2].data.y, Ref[2].data.y, 1); + QCOMPARE_WITH_ABS_ERROR(Tst[2].data.z, Ref[2].data.z, 1); + + QCOMPARE_WITH_ABS_ERROR(Tst[3].data.x, Ref[3].data.x, 1); + QCOMPARE_WITH_ABS_ERROR(Tst[3].data.y, Ref[3].data.y, 1); + QCOMPARE_WITH_ABS_ERROR(Tst[3].data.z, Ref[3].data.z, 1); +} + +void BlendshapePackingTests::testAVX2() { + + for (int numBlendshapeOffsets = 0; numBlendshapeOffsets < 4096; ++numBlendshapeOffsets) { + + std::vector unpackedBlendshapeOffsets(numBlendshapeOffsets); + std::vector packedBlendshapeOffsets1(numBlendshapeOffsets); + std::vector packedBlendshapeOffsets2(numBlendshapeOffsets); + + // init test data + if (numBlendshapeOffsets > 0) { + unpackedBlendshapeOffsets[0] = { + glm::vec3(0.0f, 0.0f, 0.0f), glm::vec3(0.0f, 0.0f, 0.0f), glm::vec3(0.0f, 0.0f, 0.0f), + }; + } + for (int i = 1; i < numBlendshapeOffsets; ++i) { + unpackedBlendshapeOffsets[i] = { + glm::linearRand(glm::vec3(-2.0f, -2.0f, -2.0f), glm::vec3(2.0f, 2.0f, 2.0f)), + glm::linearRand(glm::vec3(-2.0f, -2.0f, -2.0f), glm::vec3(2.0f, 2.0f, 2.0f)), + glm::linearRand(glm::vec3(-2.0f, -2.0f, -2.0f), glm::vec3(2.0f, 2.0f, 2.0f)), + }; + } + + // ref version + packBlendshapeOffsets_ref(unpackedBlendshapeOffsets.data(), packedBlendshapeOffsets1.data(), numBlendshapeOffsets); + + // AVX2 version, if supported by CPU + packBlendshapeOffsets(unpackedBlendshapeOffsets.data(), packedBlendshapeOffsets2.data(), numBlendshapeOffsets); + + // verify + for (int i = 0; i < numBlendshapeOffsets; ++i) { + auto ref = packedBlendshapeOffsets1.at(i); + auto tst = packedBlendshapeOffsets2.at(i); + comparePacked(ref, tst); + } + } +} diff --git a/tests/shared/src/BlendshapePackingTests.h b/tests/shared/src/BlendshapePackingTests.h new file mode 100644 index 0000000000..d6d0ef8b10 --- /dev/null +++ b/tests/shared/src/BlendshapePackingTests.h @@ -0,0 +1,23 @@ +// +// BlendshapePackingTests.h +// tests/shared/src +// +// Created by Ken Cooke on 6/24/19. +// Copyright 2019 High Fidelity, Inc. +// +// Distributed under the Apache License, Version 2.0. +// See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html +// + +#ifndef hifi_BlendshapePackingTests_h +#define hifi_BlendshapePackingTests_h + +#include + +class BlendshapePackingTests : public QObject { + Q_OBJECT +private slots: + void testAVX2(); +}; + +#endif // hifi_BlendshapePackingTests_h