From 68480f67607728ae080557fc8ce7b75b378e1c39 Mon Sep 17 00:00:00 2001 From: Ken Cooke Date: Sat, 9 Feb 2019 10:49:18 -0800 Subject: [PATCH 1/3] Replace glm::packSnorm3x10_1x2() with fast SIMD implementation --- .../src/graphics/BufferViewHelpers.cpp | 4 +-- .../src/model-baker/BuildGraphicsMeshTask.cpp | 4 +-- libraries/render-utils/src/Model.cpp | 6 ++-- libraries/shared/src/GLMHelpers.h | 36 +++++++++++++++++++ 4 files changed, 43 insertions(+), 7 deletions(-) diff --git a/libraries/graphics/src/graphics/BufferViewHelpers.cpp b/libraries/graphics/src/graphics/BufferViewHelpers.cpp index 4c57abdfd4..301f5d8d73 100644 --- a/libraries/graphics/src/graphics/BufferViewHelpers.cpp +++ b/libraries/graphics/src/graphics/BufferViewHelpers.cpp @@ -257,7 +257,7 @@ template struct GpuVec3ToGlm : GpuToGlmAdapter { static T get(con case gpu::FLOAT: view.edit(index) = value; return true; case gpu::NUINT8: CHECK_SIZE(glm::uint32); view.edit(index) = glm::packUnorm4x8(glm::fvec4(value,0.0f)); return true; case gpu::UINT8: view.edit(index) = value; return true; - case gpu::NINT2_10_10_10: view.edit(index) = glm::packSnorm3x10_1x2(glm::fvec4(value,0.0f)); return true; + case gpu::NINT2_10_10_10: view.edit(index) = glm_packSnorm3x10_1x2(glm::fvec4(value,0.0f)); return true; default: break; } error("GpuVec3ToGlm::set", view, index, hint); return false; } @@ -295,7 +295,7 @@ template struct GpuVec4ToGlm : GpuToGlmAdapter { static T get(const case gpu::FLOAT: view.edit(index) = value; return true; case gpu::HALF: CHECK_SIZE(glm::uint64); view.edit(index) = glm::packHalf4x16(value); return true; case gpu::UINT8: view.edit(index) = value; return true; - case gpu::NINT2_10_10_10: view.edit(index) = glm::packSnorm3x10_1x2(value); return true; + case gpu::NINT2_10_10_10: view.edit(index) = glm_packSnorm3x10_1x2(value); return true; case gpu::NUINT16: CHECK_SIZE(glm::uint64); view.edit(index) = glm::packUnorm4x16(value); return true; case gpu::NUINT8: CHECK_SIZE(glm::uint32); view.edit(index) = glm::packUnorm4x8(value); return true; default: break; diff --git a/libraries/model-baker/src/model-baker/BuildGraphicsMeshTask.cpp b/libraries/model-baker/src/model-baker/BuildGraphicsMeshTask.cpp index 370add2c2e..c41431f940 100644 --- a/libraries/model-baker/src/model-baker/BuildGraphicsMeshTask.cpp +++ b/libraries/model-baker/src/model-baker/BuildGraphicsMeshTask.cpp @@ -125,8 +125,8 @@ void buildGraphicsMesh(const hfm::Mesh& hfmMesh, graphics::MeshPointer& graphics #if HFM_PACK_NORMALS const auto normal = normalizeDirForPacking(*normalIt); const auto tangent = normalizeDirForPacking(*tangentIt); - const auto packedNormal = glm::packSnorm3x10_1x2(glm::vec4(normal, 0.0f)); - const auto packedTangent = glm::packSnorm3x10_1x2(glm::vec4(tangent, 0.0f)); + const auto packedNormal = glm_packSnorm3x10_1x2(glm::vec4(normal, 0.0f)); + const auto packedTangent = glm_packSnorm3x10_1x2(glm::vec4(tangent, 0.0f)); #else const auto packedNormal = *normalIt; const auto packedTangent = *tangentIt; diff --git a/libraries/render-utils/src/Model.cpp b/libraries/render-utils/src/Model.cpp index da8dceb176..9489166f43 100644 --- a/libraries/render-utils/src/Model.cpp +++ b/libraries/render-utils/src/Model.cpp @@ -1594,9 +1594,9 @@ void packBlendshapeOffsetTo_Pos_F32_3xSN10_Nor_3xSN10_Tan_3xSN10(glm::uvec4& pac packed = glm::uvec4( glm::floatBitsToUint(len), - glm::packSnorm3x10_1x2(glm::vec4(normalizedPos, 0.0f)), - glm::packSnorm3x10_1x2(glm::vec4(unpacked.normalOffset, 0.0f)), - glm::packSnorm3x10_1x2(glm::vec4(unpacked.tangentOffset, 0.0f)) + glm_packSnorm3x10_1x2(glm::vec4(normalizedPos, 0.0f)), + glm_packSnorm3x10_1x2(glm::vec4(unpacked.normalOffset, 0.0f)), + glm_packSnorm3x10_1x2(glm::vec4(unpacked.tangentOffset, 0.0f)) ); } diff --git a/libraries/shared/src/GLMHelpers.h b/libraries/shared/src/GLMHelpers.h index e7aaace1ae..e50162d8a4 100644 --- a/libraries/shared/src/GLMHelpers.h +++ b/libraries/shared/src/GLMHelpers.h @@ -315,6 +315,42 @@ inline void glm_mat4u_mul(const glm::mat4& m1, const glm::mat4& m2, glm::mat4& r #endif } +// +// Fast replacement of glm::packSnorm3x10_1x2() +// The SSE2 version quantizes using round to nearest even. +// The glm version quantizes using round away from zero. +// +inline uint32_t glm_packSnorm3x10_1x2(vec4 const& v) { + + union i10i10i10i2 { + struct { + int x : 10; + int y : 10; + int z : 10; + int w : 2; + } data; + uint32_t pack; + } Result; + +#if GLM_ARCH & GLM_ARCH_SSE2_BIT + __m128 vclamp = _mm_min_ps(_mm_max_ps(_mm_loadu_ps((float*)&v[0]), _mm_set1_ps(-1.0f)), _mm_set1_ps(1.0f)); + __m128i vpack = _mm_cvtps_epi32(_mm_mul_ps(vclamp, _mm_setr_ps(511.f, 511.f, 511.f, 1.f))); + + Result.data.x = _mm_cvtsi128_si32(vpack); + Result.data.y = _mm_cvtsi128_si32(_mm_shuffle_epi32(vpack, _MM_SHUFFLE(1,1,1,1))); + Result.data.z = _mm_cvtsi128_si32(_mm_shuffle_epi32(vpack, _MM_SHUFFLE(2,2,2,2))); + Result.data.w = _mm_cvtsi128_si32(_mm_shuffle_epi32(vpack, _MM_SHUFFLE(3,3,3,3))); +#else + ivec4 const Pack(round(clamp(v, -1.0f, 1.0f) * vec4(511.f, 511.f, 511.f, 1.f))); + + Result.data.x = Pack.x; + Result.data.y = Pack.y; + Result.data.z = Pack.z; + Result.data.w = Pack.w; +#endif + return Result.pack; +} + // convert float to int, using round-to-nearest-even (undefined on overflow) inline int fastLrintf(float x) { #if GLM_ARCH & GLM_ARCH_SSE2_BIT From 6f5514b5e3cfe920c3b634c4370c52071eaf30ab Mon Sep 17 00:00:00 2001 From: Ken Cooke Date: Sat, 9 Feb 2019 10:57:06 -0800 Subject: [PATCH 2/3] Remove dead code --- .../graphics/src/graphics/BufferViewHelpers.h | 24 ------------------- 1 file changed, 24 deletions(-) diff --git a/libraries/graphics/src/graphics/BufferViewHelpers.h b/libraries/graphics/src/graphics/BufferViewHelpers.h index 8a48c17007..3635ef64e5 100644 --- a/libraries/graphics/src/graphics/BufferViewHelpers.h +++ b/libraries/graphics/src/graphics/BufferViewHelpers.h @@ -46,30 +46,6 @@ namespace buffer_helpers { gpu::BufferView clone(const gpu::BufferView& input); gpu::BufferView resized(const gpu::BufferView& input, glm::uint32 numElements); - inline void packNormalAndTangent(glm::vec3 normal, glm::vec3 tangent, glm::uint32& packedNormal, glm::uint32& packedTangent) { - auto absNormal = glm::abs(normal); - auto absTangent = glm::abs(tangent); - normal /= glm::max(1e-6f, glm::max(glm::max(absNormal.x, absNormal.y), absNormal.z)); - tangent /= glm::max(1e-6f, glm::max(glm::max(absTangent.x, absTangent.y), absTangent.z)); - normal = glm::clamp(normal, -1.0f, 1.0f); - tangent = glm::clamp(tangent, -1.0f, 1.0f); - normal *= 511.0f; - tangent *= 511.0f; - - glm::detail::i10i10i10i2 normalStruct; - glm::detail::i10i10i10i2 tangentStruct; - normalStruct.data.x = fastLrintf(normal.x); - normalStruct.data.y = fastLrintf(normal.y); - normalStruct.data.z = fastLrintf(normal.z); - normalStruct.data.w = 0; - tangentStruct.data.x = fastLrintf(tangent.x); - tangentStruct.data.y = fastLrintf(tangent.y); - tangentStruct.data.z = fastLrintf(tangent.z); - tangentStruct.data.w = 0; - packedNormal = normalStruct.pack; - packedTangent = tangentStruct.pack; - } - namespace mesh { glm::uint32 forEachVertex(const graphics::MeshPointer& mesh, std::function func); bool setVertexAttributes(const graphics::MeshPointer& mesh, glm::uint32 index, const QVariantMap& attributes); From 7236d63da0c544c4196095dcbeac151fcbb52de6 Mon Sep 17 00:00:00 2001 From: Ken Cooke Date: Sat, 9 Feb 2019 12:16:26 -0800 Subject: [PATCH 3/3] Fix tabs --- libraries/shared/src/GLMHelpers.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libraries/shared/src/GLMHelpers.h b/libraries/shared/src/GLMHelpers.h index e50162d8a4..6deae695cd 100644 --- a/libraries/shared/src/GLMHelpers.h +++ b/libraries/shared/src/GLMHelpers.h @@ -322,7 +322,7 @@ inline void glm_mat4u_mul(const glm::mat4& m1, const glm::mat4& m2, glm::mat4& r // inline uint32_t glm_packSnorm3x10_1x2(vec4 const& v) { - union i10i10i10i2 { + union i10i10i10i2 { struct { int x : 10; int y : 10;