diff --git a/libraries/render-utils/src/Model.cpp b/libraries/render-utils/src/Model.cpp index b24c1a01cc..eb2508d0e4 100644 --- a/libraries/render-utils/src/Model.cpp +++ b/libraries/render-utils/src/Model.cpp @@ -1741,15 +1741,23 @@ Blender::Blender(ModelPointer model, int blendNumber, const Geometry::WeakPointe _blendshapeCoefficients(blendshapeCoefficients) { } +#define BLENDER_USE_NONE 0 +#define BLENDER_USE_TBB 1 +#define BLENDER_TBB_CHUNK_SIZE 512 +#define BLENDER_USE_OPENMP 2 + + +#define BLENDER_PARALLELISM BLENDER_USE_NONE + void Blender::run() { QVector blendshapeOffsets; QVector blendedMeshSizes; if (_model && _model->isLoaded()) { DETAILED_PROFILE_RANGE_EX(simulation_animation, __FUNCTION__, 0xFFFF0000, 0, { { "url", _model->getURL().toString() } }); int offset = 0; - auto meshes = _model->getHFMModel().meshes; + const auto& meshes = _model->getHFMModel().meshes; int meshIndex = 0; - foreach(const HFMMesh& mesh, meshes) { + for(const HFMMesh& mesh : meshes) { auto modelMeshBlendshapeOffsets = _model->_blendshapeOffsets.find(meshIndex++); if (mesh.blendshapes.isEmpty() || modelMeshBlendshapeOffsets == _model->_blendshapeOffsets.end()) { // Not blendshaped or not initialized @@ -1780,9 +1788,18 @@ void Blender::run() { float normalCoefficient = vertexCoefficient * NORMAL_COEFFICIENT_SCALE; const HFMBlendshape& blendshape = mesh.blendshapes.at(i); - - tbb::parallel_for(tbb::blocked_range(0, blendshape.indices.size()), [&](const tbb::blocked_range& range) { +#if (BLENDER_PARALLELISM == BLENDER_USE_TBB) + tbb::parallel_for(tbb::blocked_range(0, blendshape.indices.size(), BLENDER_TBB_CHUNK_SIZE), [&](const tbb::blocked_range& range) { for (auto j = range.begin(); j < range.end(); j++) { +#elif (BLENDER_PARALLELISM == BLENDER_USE_OPENMP) + { + #pragma omp parallel for + for (int j = 0; j < (int)blendshape.indices.size(); ++j) { +#else + { + for (int j = 0; j < blendshape.indices.size(); ++j) { +#endif + int index = blendshape.indices.at(j); auto& currentBlendshapeOffset = unpackedBlendshapeOffsets[index]; @@ -1793,20 +1810,41 @@ void Blender::run() { currentBlendshapeOffset.tangentOffset += blendshape.tangents.at(j) * normalCoefficient; } } +#if (BLENDER_PARALLELISM == BLENDER_USE_TBB) }); +#else + } +#endif } // Blendshape offsets are generrated, now let's pack it on its way to gpu - tbb::parallel_for(tbb::blocked_range(0, (int) unpackedBlendshapeOffsets.size()), [&](const tbb::blocked_range& range) { +#if (BLENDER_PARALLELISM == BLENDER_USE_TBB) + tbb::parallel_for(tbb::blocked_range(0, (int) unpackedBlendshapeOffsets.size(), BLENDER_TBB_CHUNK_SIZE), [&](const tbb::blocked_range& range) { auto unpacked = unpackedBlendshapeOffsets.data() + range.begin(); auto packed = meshBlendshapeOffsets + range.begin(); for (auto j = range.begin(); j < range.end(); j++) { +#elif (BLENDER_PARALLELISM == BLENDER_USE_OPENMP) + { + auto unpacked = unpackedBlendshapeOffsets.data(); + auto packed = meshBlendshapeOffsets; + #pragma omp parallel for + for (int j = 0; j < (int)unpackedBlendshapeOffsets.size(); ++j) { +#else + { + auto unpacked = unpackedBlendshapeOffsets.data(); + auto packed = meshBlendshapeOffsets; + for (int j = 0; j < (int)unpackedBlendshapeOffsets.size(); ++j) { +#endif + //for (auto j = range.begin(); j < range.end(); j++) { packBlendshapeOffsetTo_Pos_F32_3xSN10_Nor_3xSN10_Tan_3xSN10((*packed).packedPosNorTan, (*unpacked)); - - unpacked++; - packed++; + ++unpacked; + ++packed; } +#if (BLENDER_PARALLELISM == BLENDER_USE_TBB) }); +#else + } +#endif } } // post the result to the ModelBlender, which will dispatch to the model if still alive