From b079e3283ede4ce91ff1dda5aebdf1d5aadedf8c Mon Sep 17 00:00:00 2001 From: Ken Cooke Date: Tue, 25 Jun 2019 11:48:41 -0700 Subject: [PATCH 1/3] BUGFIX: use unaligned loads --- .../shared/src/avx2/BlendshapePacking_avx2.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/libraries/shared/src/avx2/BlendshapePacking_avx2.cpp b/libraries/shared/src/avx2/BlendshapePacking_avx2.cpp index 5524c355dc..1fea60a315 100644 --- a/libraries/shared/src/avx2/BlendshapePacking_avx2.cpp +++ b/libraries/shared/src/avx2/BlendshapePacking_avx2.cpp @@ -21,14 +21,14 @@ void packBlendshapeOffsets_AVX2(float (*unpacked)[9], uint32_t (*packed)[4], int // // deinterleave (8x9 to 9x8 matrix transpose) // - __m256 s0 = _mm256_insertf128_ps(_mm256_castps128_ps256(_mm_load_ps(&unpacked[i+0][0])), _mm_load_ps(&unpacked[i+4][0]), 1); - __m256 s1 = _mm256_insertf128_ps(_mm256_castps128_ps256(_mm_load_ps(&unpacked[i+1][0])), _mm_load_ps(&unpacked[i+5][0]), 1); - __m256 s2 = _mm256_insertf128_ps(_mm256_castps128_ps256(_mm_load_ps(&unpacked[i+2][0])), _mm_load_ps(&unpacked[i+6][0]), 1); - __m256 s3 = _mm256_insertf128_ps(_mm256_castps128_ps256(_mm_load_ps(&unpacked[i+3][0])), _mm_load_ps(&unpacked[i+7][0]), 1); - __m256 s4 = _mm256_insertf128_ps(_mm256_castps128_ps256(_mm_load_ps(&unpacked[i+0][4])), _mm_load_ps(&unpacked[i+4][4]), 1); - __m256 s5 = _mm256_insertf128_ps(_mm256_castps128_ps256(_mm_load_ps(&unpacked[i+1][4])), _mm_load_ps(&unpacked[i+5][4]), 1); - __m256 s6 = _mm256_insertf128_ps(_mm256_castps128_ps256(_mm_load_ps(&unpacked[i+2][4])), _mm_load_ps(&unpacked[i+6][4]), 1); - __m256 s7 = _mm256_insertf128_ps(_mm256_castps128_ps256(_mm_load_ps(&unpacked[i+3][4])), _mm_load_ps(&unpacked[i+7][4]), 1); + __m256 s0 = _mm256_insertf128_ps(_mm256_castps128_ps256(_mm_loadu_ps(&unpacked[i+0][0])), _mm_loadu_ps(&unpacked[i+4][0]), 1); + __m256 s1 = _mm256_insertf128_ps(_mm256_castps128_ps256(_mm_loadu_ps(&unpacked[i+1][0])), _mm_loadu_ps(&unpacked[i+5][0]), 1); + __m256 s2 = _mm256_insertf128_ps(_mm256_castps128_ps256(_mm_loadu_ps(&unpacked[i+2][0])), _mm_loadu_ps(&unpacked[i+6][0]), 1); + __m256 s3 = _mm256_insertf128_ps(_mm256_castps128_ps256(_mm_loadu_ps(&unpacked[i+3][0])), _mm_loadu_ps(&unpacked[i+7][0]), 1); + __m256 s4 = _mm256_insertf128_ps(_mm256_castps128_ps256(_mm_loadu_ps(&unpacked[i+0][4])), _mm_loadu_ps(&unpacked[i+4][4]), 1); + __m256 s5 = _mm256_insertf128_ps(_mm256_castps128_ps256(_mm_loadu_ps(&unpacked[i+1][4])), _mm_loadu_ps(&unpacked[i+5][4]), 1); + __m256 s6 = _mm256_insertf128_ps(_mm256_castps128_ps256(_mm_loadu_ps(&unpacked[i+2][4])), _mm_loadu_ps(&unpacked[i+6][4]), 1); + __m256 s7 = _mm256_insertf128_ps(_mm256_castps128_ps256(_mm_loadu_ps(&unpacked[i+3][4])), _mm_loadu_ps(&unpacked[i+7][4]), 1); __m256 t0 = _mm256_unpacklo_ps(s0, s1); __m256 t1 = _mm256_unpackhi_ps(s0, s1); From 0a34745ff9c002a3b28a723b1e3ab984565ea49b Mon Sep 17 00:00:00 2001 From: Ken Cooke Date: Tue, 25 Jun 2019 12:53:07 -0700 Subject: [PATCH 2/3] Use QVector resize() instead of reserve() to silence Debug builds about size not being set. Causes unneeded default initialization, but no obvious way to avoid. --- libraries/render-utils/src/Model.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libraries/render-utils/src/Model.cpp b/libraries/render-utils/src/Model.cpp index 11c1e42fd4..ce3ff51502 100644 --- a/libraries/render-utils/src/Model.cpp +++ b/libraries/render-utils/src/Model.cpp @@ -1777,13 +1777,13 @@ void Blender::run() { // allocate the required sizes QVector blendedMeshSizes; - blendedMeshSizes.reserve(numMeshes); + blendedMeshSizes.resize(numMeshes); QVector packedBlendshapeOffsets; - packedBlendshapeOffsets.reserve(numBlendshapeOffsets); + packedBlendshapeOffsets.resize(numBlendshapeOffsets); QVector unpackedBlendshapeOffsets; - unpackedBlendshapeOffsets.reserve(maxBlendshapeOffsets); // reuse for all meshes + unpackedBlendshapeOffsets.resize(maxBlendshapeOffsets); // reuse for all meshes int offset = 0; for (auto meshIter = _hfmModel->meshes.cbegin(); meshIter != _hfmModel->meshes.cend(); ++meshIter) { From 4c5c61231d856f4f3dcf2f4102dc8cb1e050093a Mon Sep 17 00:00:00 2001 From: Ken Cooke Date: Tue, 25 Jun 2019 12:57:55 -0700 Subject: [PATCH 3/3] Only use QVector resize when actually needed --- libraries/render-utils/src/Model.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libraries/render-utils/src/Model.cpp b/libraries/render-utils/src/Model.cpp index ce3ff51502..74cf1ffa39 100644 --- a/libraries/render-utils/src/Model.cpp +++ b/libraries/render-utils/src/Model.cpp @@ -1777,7 +1777,7 @@ void Blender::run() { // allocate the required sizes QVector blendedMeshSizes; - blendedMeshSizes.resize(numMeshes); + blendedMeshSizes.reserve(numMeshes); QVector packedBlendshapeOffsets; packedBlendshapeOffsets.resize(numBlendshapeOffsets);