mirror of
https://github.com/JulianGro/overte.git
synced 2025-04-05 23:42:58 +02:00
Add unit-test to validate AVX2
This commit is contained in:
parent
541d45012a
commit
2322df5a59
2 changed files with 171 additions and 0 deletions
148
tests/shared/src/BlendshapePackingTests.cpp
Normal file
148
tests/shared/src/BlendshapePackingTests.cpp
Normal file
|
@ -0,0 +1,148 @@
|
|||
//
|
||||
// BlendshapePackingTests.cpp
|
||||
// tests/shared/src
|
||||
//
|
||||
// Created by Ken Cooke on 6/24/19.
|
||||
// Copyright 2019 High Fidelity, Inc.
|
||||
//
|
||||
// Distributed under the Apache License, Version 2.0.
|
||||
// See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
|
||||
//
|
||||
|
||||
#include "BlendshapePackingTests.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include <test-utils/QTestExtensions.h>
|
||||
|
||||
#include <GLMHelpers.h>
|
||||
#include <glm/gtc/random.hpp>
|
||||
|
||||
struct BlendshapeOffsetUnpacked {
|
||||
glm::vec3 positionOffset;
|
||||
glm::vec3 normalOffset;
|
||||
glm::vec3 tangentOffset;
|
||||
};
|
||||
|
||||
struct BlendshapeOffsetPacked {
|
||||
glm::uvec4 packedPosNorTan;
|
||||
};
|
||||
|
||||
QTEST_MAIN(BlendshapePackingTests)
|
||||
|
||||
static void packBlendshapeOffsetTo_Pos_F32_3xSN10_Nor_3xSN10_Tan_3xSN10(glm::uvec4& packed, const BlendshapeOffsetUnpacked& unpacked) {
|
||||
float len = glm::compMax(glm::abs(unpacked.positionOffset));
|
||||
glm::vec3 normalizedPos(unpacked.positionOffset);
|
||||
if (len > 0.0f) {
|
||||
normalizedPos /= len;
|
||||
} else {
|
||||
len = 1.0f;
|
||||
}
|
||||
|
||||
packed = glm::uvec4(
|
||||
glm::floatBitsToUint(len),
|
||||
glm_packSnorm3x10_1x2(glm::vec4(normalizedPos, 0.0f)),
|
||||
glm_packSnorm3x10_1x2(glm::vec4(unpacked.normalOffset, 0.0f)),
|
||||
glm_packSnorm3x10_1x2(glm::vec4(unpacked.tangentOffset, 0.0f))
|
||||
);
|
||||
}
|
||||
|
||||
static void packBlendshapeOffsets_ref(BlendshapeOffsetUnpacked* unpacked, BlendshapeOffsetPacked* packed, int size) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
packBlendshapeOffsetTo_Pos_F32_3xSN10_Nor_3xSN10_Tan_3xSN10((*packed).packedPosNorTan, (*unpacked));
|
||||
++unpacked;
|
||||
++packed;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__x86_64__)
|
||||
//
|
||||
// Runtime CPU dispatch
|
||||
//
|
||||
#include <CPUDetect.h>
|
||||
|
||||
void packBlendshapeOffsets_AVX2(float (*unpacked)[9], uint32_t (*packed)[4], int size);
|
||||
|
||||
static void packBlendshapeOffsets(BlendshapeOffsetUnpacked* unpacked, BlendshapeOffsetPacked* packed, int size) {
|
||||
static bool _cpuSupportsAVX2 = cpuSupportsAVX2();
|
||||
if (_cpuSupportsAVX2) {
|
||||
static_assert(sizeof(BlendshapeOffsetUnpacked) == 9 * sizeof(float), "struct BlendshapeOffsetUnpacked size doesn't match.");
|
||||
static_assert(sizeof(BlendshapeOffsetPacked) == 4 * sizeof(uint32_t), "struct BlendshapeOffsetPacked size doesn't match.");
|
||||
packBlendshapeOffsets_AVX2((float(*)[9])unpacked, (uint32_t(*)[4])packed, size);
|
||||
} else {
|
||||
packBlendshapeOffsets_ref(unpacked, packed, size);
|
||||
}
|
||||
}
|
||||
|
||||
#else // portable reference code
|
||||
static auto& packBlendshapeOffsets = packBlendshapeOffsets_ref;
|
||||
#endif
|
||||
|
||||
void comparePacked(BlendshapeOffsetPacked& ref, BlendshapeOffsetPacked& tst) {
|
||||
union i10i10i10i2 {
|
||||
struct {
|
||||
int x : 10;
|
||||
int y : 10;
|
||||
int z : 10;
|
||||
int w : 2;
|
||||
} data;
|
||||
uint32_t pack;
|
||||
} Ref[4], Tst[4];
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
Ref[i].pack = ref.packedPosNorTan[i];
|
||||
Tst[i].pack = tst.packedPosNorTan[i];
|
||||
}
|
||||
|
||||
// allow 1 ULP due to rounding differences
|
||||
QCOMPARE_WITH_ABS_ERROR(Tst[0].pack, Ref[0].pack, 1);
|
||||
|
||||
QCOMPARE_WITH_ABS_ERROR(Tst[1].data.x, Ref[1].data.x, 1);
|
||||
QCOMPARE_WITH_ABS_ERROR(Tst[1].data.y, Ref[1].data.y, 1);
|
||||
QCOMPARE_WITH_ABS_ERROR(Tst[1].data.z, Ref[1].data.z, 1);
|
||||
|
||||
QCOMPARE_WITH_ABS_ERROR(Tst[2].data.x, Ref[2].data.x, 1);
|
||||
QCOMPARE_WITH_ABS_ERROR(Tst[2].data.y, Ref[2].data.y, 1);
|
||||
QCOMPARE_WITH_ABS_ERROR(Tst[2].data.z, Ref[2].data.z, 1);
|
||||
|
||||
QCOMPARE_WITH_ABS_ERROR(Tst[3].data.x, Ref[3].data.x, 1);
|
||||
QCOMPARE_WITH_ABS_ERROR(Tst[3].data.y, Ref[3].data.y, 1);
|
||||
QCOMPARE_WITH_ABS_ERROR(Tst[3].data.z, Ref[3].data.z, 1);
|
||||
}
|
||||
|
||||
void BlendshapePackingTests::testAVX2() {
|
||||
|
||||
for (int numBlendshapeOffsets = 0; numBlendshapeOffsets < 4096; ++numBlendshapeOffsets) {
|
||||
|
||||
std::vector<BlendshapeOffsetUnpacked> unpackedBlendshapeOffsets(numBlendshapeOffsets);
|
||||
std::vector<BlendshapeOffsetPacked> packedBlendshapeOffsets1(numBlendshapeOffsets);
|
||||
std::vector<BlendshapeOffsetPacked> packedBlendshapeOffsets2(numBlendshapeOffsets);
|
||||
|
||||
// init test data
|
||||
if (numBlendshapeOffsets > 0) {
|
||||
unpackedBlendshapeOffsets[0] = {
|
||||
glm::vec3(0.0f, 0.0f, 0.0f), glm::vec3(0.0f, 0.0f, 0.0f), glm::vec3(0.0f, 0.0f, 0.0f),
|
||||
};
|
||||
}
|
||||
for (int i = 1; i < numBlendshapeOffsets; ++i) {
|
||||
unpackedBlendshapeOffsets[i] = {
|
||||
glm::linearRand(glm::vec3(-2.0f, -2.0f, -2.0f), glm::vec3(2.0f, 2.0f, 2.0f)),
|
||||
glm::linearRand(glm::vec3(-2.0f, -2.0f, -2.0f), glm::vec3(2.0f, 2.0f, 2.0f)),
|
||||
glm::linearRand(glm::vec3(-2.0f, -2.0f, -2.0f), glm::vec3(2.0f, 2.0f, 2.0f)),
|
||||
};
|
||||
}
|
||||
|
||||
// ref version
|
||||
packBlendshapeOffsets_ref(unpackedBlendshapeOffsets.data(), packedBlendshapeOffsets1.data(), numBlendshapeOffsets);
|
||||
|
||||
// AVX2 version, if supported by CPU
|
||||
packBlendshapeOffsets(unpackedBlendshapeOffsets.data(), packedBlendshapeOffsets2.data(), numBlendshapeOffsets);
|
||||
|
||||
// verify
|
||||
for (int i = 0; i < numBlendshapeOffsets; ++i) {
|
||||
auto ref = packedBlendshapeOffsets1.at(i);
|
||||
auto tst = packedBlendshapeOffsets2.at(i);
|
||||
comparePacked(ref, tst);
|
||||
}
|
||||
}
|
||||
}
|
23
tests/shared/src/BlendshapePackingTests.h
Normal file
23
tests/shared/src/BlendshapePackingTests.h
Normal file
|
@ -0,0 +1,23 @@
|
|||
//
|
||||
// BlendshapePackingTests.h
|
||||
// tests/shared/src
|
||||
//
|
||||
// Created by Ken Cooke on 6/24/19.
|
||||
// Copyright 2019 High Fidelity, Inc.
|
||||
//
|
||||
// Distributed under the Apache License, Version 2.0.
|
||||
// See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
|
||||
//
|
||||
|
||||
#ifndef hifi_BlendshapePackingTests_h
|
||||
#define hifi_BlendshapePackingTests_h
|
||||
|
||||
#include <QtTest/QtTest>
|
||||
|
||||
class BlendshapePackingTests : public QObject {
|
||||
Q_OBJECT
|
||||
private slots:
|
||||
void testAVX2();
|
||||
};
|
||||
|
||||
#endif // hifi_BlendshapePackingTests_h
|
Loading…
Reference in a new issue