mirror of
https://github.com/overte-org/overte.git
synced 2025-08-09 22:28:37 +02:00
Merge pull request #14891 from kencooke/glm-pack-normals-speedup
Case 21114: SIMD optimized blendshape packing
This commit is contained in:
commit
8be93fd16c
5 changed files with 43 additions and 31 deletions
|
@ -257,7 +257,7 @@ template <typename T> struct GpuVec3ToGlm : GpuToGlmAdapter { static T get(con
|
||||||
case gpu::FLOAT: view.edit<glm::fvec3>(index) = value; return true;
|
case gpu::FLOAT: view.edit<glm::fvec3>(index) = value; return true;
|
||||||
case gpu::NUINT8: CHECK_SIZE(glm::uint32); view.edit<glm::uint32>(index) = glm::packUnorm4x8(glm::fvec4(value,0.0f)); return true;
|
case gpu::NUINT8: CHECK_SIZE(glm::uint32); view.edit<glm::uint32>(index) = glm::packUnorm4x8(glm::fvec4(value,0.0f)); return true;
|
||||||
case gpu::UINT8: view.edit<glm::u8vec3>(index) = value; return true;
|
case gpu::UINT8: view.edit<glm::u8vec3>(index) = value; return true;
|
||||||
case gpu::NINT2_10_10_10: view.edit<glm::uint32>(index) = glm::packSnorm3x10_1x2(glm::fvec4(value,0.0f)); return true;
|
case gpu::NINT2_10_10_10: view.edit<glm::uint32>(index) = glm_packSnorm3x10_1x2(glm::fvec4(value,0.0f)); return true;
|
||||||
default: break;
|
default: break;
|
||||||
} error("GpuVec3ToGlm::set", view, index, hint); return false;
|
} error("GpuVec3ToGlm::set", view, index, hint); return false;
|
||||||
}
|
}
|
||||||
|
@ -295,7 +295,7 @@ template <typename T> struct GpuVec4ToGlm : GpuToGlmAdapter { static T get(const
|
||||||
case gpu::FLOAT: view.edit<glm::fvec4>(index) = value; return true;
|
case gpu::FLOAT: view.edit<glm::fvec4>(index) = value; return true;
|
||||||
case gpu::HALF: CHECK_SIZE(glm::uint64); view.edit<glm::uint64_t>(index) = glm::packHalf4x16(value); return true;
|
case gpu::HALF: CHECK_SIZE(glm::uint64); view.edit<glm::uint64_t>(index) = glm::packHalf4x16(value); return true;
|
||||||
case gpu::UINT8: view.edit<glm::u8vec4>(index) = value; return true;
|
case gpu::UINT8: view.edit<glm::u8vec4>(index) = value; return true;
|
||||||
case gpu::NINT2_10_10_10: view.edit<glm::uint32>(index) = glm::packSnorm3x10_1x2(value); return true;
|
case gpu::NINT2_10_10_10: view.edit<glm::uint32>(index) = glm_packSnorm3x10_1x2(value); return true;
|
||||||
case gpu::NUINT16: CHECK_SIZE(glm::uint64); view.edit<glm::uint64>(index) = glm::packUnorm4x16(value); return true;
|
case gpu::NUINT16: CHECK_SIZE(glm::uint64); view.edit<glm::uint64>(index) = glm::packUnorm4x16(value); return true;
|
||||||
case gpu::NUINT8: CHECK_SIZE(glm::uint32); view.edit<glm::uint32>(index) = glm::packUnorm4x8(value); return true;
|
case gpu::NUINT8: CHECK_SIZE(glm::uint32); view.edit<glm::uint32>(index) = glm::packUnorm4x8(value); return true;
|
||||||
default: break;
|
default: break;
|
||||||
|
|
|
@ -46,30 +46,6 @@ namespace buffer_helpers {
|
||||||
gpu::BufferView clone(const gpu::BufferView& input);
|
gpu::BufferView clone(const gpu::BufferView& input);
|
||||||
gpu::BufferView resized(const gpu::BufferView& input, glm::uint32 numElements);
|
gpu::BufferView resized(const gpu::BufferView& input, glm::uint32 numElements);
|
||||||
|
|
||||||
inline void packNormalAndTangent(glm::vec3 normal, glm::vec3 tangent, glm::uint32& packedNormal, glm::uint32& packedTangent) {
|
|
||||||
auto absNormal = glm::abs(normal);
|
|
||||||
auto absTangent = glm::abs(tangent);
|
|
||||||
normal /= glm::max(1e-6f, glm::max(glm::max(absNormal.x, absNormal.y), absNormal.z));
|
|
||||||
tangent /= glm::max(1e-6f, glm::max(glm::max(absTangent.x, absTangent.y), absTangent.z));
|
|
||||||
normal = glm::clamp(normal, -1.0f, 1.0f);
|
|
||||||
tangent = glm::clamp(tangent, -1.0f, 1.0f);
|
|
||||||
normal *= 511.0f;
|
|
||||||
tangent *= 511.0f;
|
|
||||||
|
|
||||||
glm::detail::i10i10i10i2 normalStruct;
|
|
||||||
glm::detail::i10i10i10i2 tangentStruct;
|
|
||||||
normalStruct.data.x = fastLrintf(normal.x);
|
|
||||||
normalStruct.data.y = fastLrintf(normal.y);
|
|
||||||
normalStruct.data.z = fastLrintf(normal.z);
|
|
||||||
normalStruct.data.w = 0;
|
|
||||||
tangentStruct.data.x = fastLrintf(tangent.x);
|
|
||||||
tangentStruct.data.y = fastLrintf(tangent.y);
|
|
||||||
tangentStruct.data.z = fastLrintf(tangent.z);
|
|
||||||
tangentStruct.data.w = 0;
|
|
||||||
packedNormal = normalStruct.pack;
|
|
||||||
packedTangent = tangentStruct.pack;
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace mesh {
|
namespace mesh {
|
||||||
glm::uint32 forEachVertex(const graphics::MeshPointer& mesh, std::function<bool(glm::uint32 index, const QVariantMap& attributes)> func);
|
glm::uint32 forEachVertex(const graphics::MeshPointer& mesh, std::function<bool(glm::uint32 index, const QVariantMap& attributes)> func);
|
||||||
bool setVertexAttributes(const graphics::MeshPointer& mesh, glm::uint32 index, const QVariantMap& attributes);
|
bool setVertexAttributes(const graphics::MeshPointer& mesh, glm::uint32 index, const QVariantMap& attributes);
|
||||||
|
|
|
@ -125,8 +125,8 @@ void buildGraphicsMesh(const hfm::Mesh& hfmMesh, graphics::MeshPointer& graphics
|
||||||
#if HFM_PACK_NORMALS
|
#if HFM_PACK_NORMALS
|
||||||
const auto normal = normalizeDirForPacking(*normalIt);
|
const auto normal = normalizeDirForPacking(*normalIt);
|
||||||
const auto tangent = normalizeDirForPacking(*tangentIt);
|
const auto tangent = normalizeDirForPacking(*tangentIt);
|
||||||
const auto packedNormal = glm::packSnorm3x10_1x2(glm::vec4(normal, 0.0f));
|
const auto packedNormal = glm_packSnorm3x10_1x2(glm::vec4(normal, 0.0f));
|
||||||
const auto packedTangent = glm::packSnorm3x10_1x2(glm::vec4(tangent, 0.0f));
|
const auto packedTangent = glm_packSnorm3x10_1x2(glm::vec4(tangent, 0.0f));
|
||||||
#else
|
#else
|
||||||
const auto packedNormal = *normalIt;
|
const auto packedNormal = *normalIt;
|
||||||
const auto packedTangent = *tangentIt;
|
const auto packedTangent = *tangentIt;
|
||||||
|
|
|
@ -1656,9 +1656,9 @@ void packBlendshapeOffsetTo_Pos_F32_3xSN10_Nor_3xSN10_Tan_3xSN10(glm::uvec4& pac
|
||||||
|
|
||||||
packed = glm::uvec4(
|
packed = glm::uvec4(
|
||||||
glm::floatBitsToUint(len),
|
glm::floatBitsToUint(len),
|
||||||
glm::packSnorm3x10_1x2(glm::vec4(normalizedPos, 0.0f)),
|
glm_packSnorm3x10_1x2(glm::vec4(normalizedPos, 0.0f)),
|
||||||
glm::packSnorm3x10_1x2(glm::vec4(unpacked.normalOffset, 0.0f)),
|
glm_packSnorm3x10_1x2(glm::vec4(unpacked.normalOffset, 0.0f)),
|
||||||
glm::packSnorm3x10_1x2(glm::vec4(unpacked.tangentOffset, 0.0f))
|
glm_packSnorm3x10_1x2(glm::vec4(unpacked.tangentOffset, 0.0f))
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -315,6 +315,42 @@ inline void glm_mat4u_mul(const glm::mat4& m1, const glm::mat4& m2, glm::mat4& r
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// Fast replacement of glm::packSnorm3x10_1x2()
|
||||||
|
// The SSE2 version quantizes using round to nearest even.
|
||||||
|
// The glm version quantizes using round away from zero.
|
||||||
|
//
|
||||||
|
inline uint32_t glm_packSnorm3x10_1x2(vec4 const& v) {
|
||||||
|
|
||||||
|
union i10i10i10i2 {
|
||||||
|
struct {
|
||||||
|
int x : 10;
|
||||||
|
int y : 10;
|
||||||
|
int z : 10;
|
||||||
|
int w : 2;
|
||||||
|
} data;
|
||||||
|
uint32_t pack;
|
||||||
|
} Result;
|
||||||
|
|
||||||
|
#if GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
|
__m128 vclamp = _mm_min_ps(_mm_max_ps(_mm_loadu_ps((float*)&v[0]), _mm_set1_ps(-1.0f)), _mm_set1_ps(1.0f));
|
||||||
|
__m128i vpack = _mm_cvtps_epi32(_mm_mul_ps(vclamp, _mm_setr_ps(511.f, 511.f, 511.f, 1.f)));
|
||||||
|
|
||||||
|
Result.data.x = _mm_cvtsi128_si32(vpack);
|
||||||
|
Result.data.y = _mm_cvtsi128_si32(_mm_shuffle_epi32(vpack, _MM_SHUFFLE(1,1,1,1)));
|
||||||
|
Result.data.z = _mm_cvtsi128_si32(_mm_shuffle_epi32(vpack, _MM_SHUFFLE(2,2,2,2)));
|
||||||
|
Result.data.w = _mm_cvtsi128_si32(_mm_shuffle_epi32(vpack, _MM_SHUFFLE(3,3,3,3)));
|
||||||
|
#else
|
||||||
|
ivec4 const Pack(round(clamp(v, -1.0f, 1.0f) * vec4(511.f, 511.f, 511.f, 1.f)));
|
||||||
|
|
||||||
|
Result.data.x = Pack.x;
|
||||||
|
Result.data.y = Pack.y;
|
||||||
|
Result.data.z = Pack.z;
|
||||||
|
Result.data.w = Pack.w;
|
||||||
|
#endif
|
||||||
|
return Result.pack;
|
||||||
|
}
|
||||||
|
|
||||||
// convert float to int, using round-to-nearest-even (undefined on overflow)
|
// convert float to int, using round-to-nearest-even (undefined on overflow)
|
||||||
inline int fastLrintf(float x) {
|
inline int fastLrintf(float x) {
|
||||||
#if GLM_ARCH & GLM_ARCH_SSE2_BIT
|
#if GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
|
|
Loading…
Reference in a new issue