Merge pull request #13922 from samcake/black-bis

Optimize the vertex formats of meshes for less input buffer bindings
2025-04-08 06:32:35 +02:00 · 2018-09-10 16:58:40 -07:00 · 2018-09-10 16:58:40 -07:00 · 47cea49f78
commit 47cea49f78
parent ae547f8950 082d47d20f
17 changed files with 269 additions and 105 deletions
--- a/libraries/fbx/src/FBXReader_Mesh.cpp
+++ b/libraries/fbx/src/FBXReader_Mesh.cpp
@ -585,13 +585,8 @@ void FBXReader::buildModelMesh(FBXMesh& extractedMesh, const QString& url) {

    FBXMesh& fbxMesh = extractedMesh;
    graphics::MeshPointer mesh(new graphics::Mesh());
-
-    // Grab the vertices in a buffer
-    auto vb = std::make_shared<gpu::Buffer>();
-    vb->setData(extractedMesh.vertices.size() * sizeof(glm::vec3),
-                (const gpu::Byte*) extractedMesh.vertices.data());
-    gpu::BufferView vbv(vb, gpu::Element(gpu::VEC3, gpu::FLOAT, gpu::XYZ));
-    mesh->setVertexBuffer(vbv);
+    bool hasBlendShapes = !fbxMesh.blendshapes.empty();
+    int numVerts = extractedMesh.vertices.size();

    if (!fbxMesh.normals.empty() && fbxMesh.tangents.empty()) {
        // Fill with a dummy value to force tangents to be present if there are normals
@ -607,43 +602,61 @@ void FBXReader::buildModelMesh(FBXMesh& extractedMesh, const QString& url) {
        }
    }

-    // evaluate all attribute channels sizes
-    const int normalsSize = fbxMesh.normals.size() * sizeof(NormalType);
-    const int tangentsSize = fbxMesh.tangents.size() * sizeof(NormalType);
+    // evaluate all attribute elements and data sizes
+
+    // Position is a vec3
+    const auto positionElement = gpu::Element(gpu::VEC3, gpu::FLOAT, gpu::XYZ); 
+    const int positionsSize = numVerts * positionElement.getSize();
+
+    // Normal and tangent are always there together packed in normalized xyz32bits word (times 2)
+    const auto normalElement = FBX_NORMAL_ELEMENT;
+    const int normalsSize = fbxMesh.normals.size() * normalElement.getSize();
+    const int tangentsSize = fbxMesh.tangents.size() * normalElement.getSize();
    // If there are normals then there should be tangents
-    
    assert(normalsSize <= tangentsSize);
    if (tangentsSize > normalsSize) {
        qWarning() << "Unexpected tangents in " << url;
    }
    const auto normalsAndTangentsSize = normalsSize + tangentsSize;
-    const int normalsAndTangentsStride = 2 * sizeof(NormalType);
-    const int colorsSize = fbxMesh.colors.size() * sizeof(ColorType);
+    const int normalsAndTangentsStride = 2 * normalElement.getSize();
+
+    // Color attrib
+    const auto colorElement = FBX_COLOR_ELEMENT;
+    const int colorsSize = fbxMesh.colors.size() * colorElement.getSize();
+   
    // Texture coordinates are stored in 2 half floats
-    const int texCoordsSize = fbxMesh.texCoords.size() * sizeof(vec2h);
-    const int texCoords1Size = fbxMesh.texCoords1.size() * sizeof(vec2h);
+    const auto texCoordsElement = gpu::Element(gpu::VEC2, gpu::HALF, gpu::UV);
+    const int texCoordsSize = fbxMesh.texCoords.size() * texCoordsElement.getSize();
+    const int texCoords1Size = fbxMesh.texCoords1.size() * texCoordsElement.getSize();

-    int clusterIndicesSize = fbxMesh.clusterIndices.size() * sizeof(uint8_t);
-    if (fbxMesh.clusters.size() > UINT8_MAX) {
-        // we need 16 bits instead of just 8 for clusterIndices
-        clusterIndicesSize *= 2;
-    }
+    // Support for 4 skinning clusters:
+    // 4 Indices are uint8 ideally, uint16 if more than 256.
+    const auto clusterIndiceElement = (fbxMesh.clusters.size() < UINT8_MAX ? gpu::Element(gpu::VEC4, gpu::UINT8, gpu::XYZW) : gpu::Element(gpu::VEC4, gpu::UINT16, gpu::XYZW));
+    // 4 Weights are normalized 16bits
+    const auto clusterWeightElement = gpu::Element(gpu::VEC4, gpu::NUINT16, gpu::XYZW);

-    const int clusterWeightsSize = fbxMesh.clusterWeights.size() * sizeof(uint16_t);
+    // Cluster indices and weights must be the same sizes
+    const int NUM_CLUSTERS_PER_VERT = 4;
+    const int numVertClusters = (fbxMesh.clusterIndices.size() == fbxMesh.clusterWeights.size() ? fbxMesh.clusterIndices.size() / NUM_CLUSTERS_PER_VERT : 0);
+    const int clusterIndicesSize = numVertClusters * clusterIndiceElement.getSize();
+    const int clusterWeightsSize = numVertClusters * clusterWeightElement.getSize();

-    // Normals and tangents are interleaved
-    const int normalsOffset = 0;
-    const int tangentsOffset = normalsOffset + sizeof(NormalType);
-    const int colorsOffset = normalsOffset + normalsSize + tangentsSize;
+    // Decide on where to put what seequencially in a big buffer:
+    const int positionsOffset = 0;
+    const int normalsAndTangentsOffset = positionsOffset + positionsSize;
+    const int colorsOffset = normalsAndTangentsOffset + normalsAndTangentsSize;
    const int texCoordsOffset = colorsOffset + colorsSize;
    const int texCoords1Offset = texCoordsOffset + texCoordsSize;
    const int clusterIndicesOffset = texCoords1Offset + texCoords1Size;
    const int clusterWeightsOffset = clusterIndicesOffset + clusterIndicesSize;
-    const int totalAttributeSize = clusterWeightsOffset + clusterWeightsSize;
+    const int totalVertsSize = clusterWeightsOffset + clusterWeightsSize;

-    // Copy all attribute data in a single attribute buffer
-    auto attribBuffer = std::make_shared<gpu::Buffer>();
-    attribBuffer->resize(totalAttributeSize);
+    // Copy all vertex data in a single buffer
+    auto vertBuffer = std::make_shared<gpu::Buffer>();
+    vertBuffer->resize(totalVertsSize);
+
+    // First positions
+    vertBuffer->setSubData(positionsOffset, positionsSize, (const gpu::Byte*) extractedMesh.vertices.data());

    // Interleave normals and tangents
    if (normalsSize > 0) {
@ -651,8 +664,8 @@ void FBXReader::buildModelMesh(FBXMesh& extractedMesh, const QString& url) {

        normalsAndTangents.reserve(fbxMesh.normals.size() + fbxMesh.tangents.size());
        for (auto normalIt = fbxMesh.normals.constBegin(), tangentIt = fbxMesh.tangents.constBegin();
-             normalIt != fbxMesh.normals.constEnd();
-             ++normalIt, ++tangentIt) {
+            normalIt != fbxMesh.normals.constEnd();
+            ++normalIt, ++tangentIt) {
 #if FBX_PACK_NORMALS
            const auto normal = normalizeDirForPacking(*normalIt);
            const auto tangent = normalizeDirForPacking(*tangentIt);
@ -665,9 +678,10 @@ void FBXReader::buildModelMesh(FBXMesh& extractedMesh, const QString& url) {
            normalsAndTangents.push_back(packedNormal);
            normalsAndTangents.push_back(packedTangent);
        }
-        attribBuffer->setSubData(normalsOffset, normalsAndTangentsSize, (const gpu::Byte*) normalsAndTangents.data());
+        vertBuffer->setSubData(normalsAndTangentsOffset, normalsAndTangentsSize, (const gpu::Byte*) normalsAndTangents.data());
    }

+    // Pack colors
    if (colorsSize > 0) {
 #if FBX_PACK_COLORS
        std::vector<ColorType> colors;
@ -676,12 +690,13 @@ void FBXReader::buildModelMesh(FBXMesh& extractedMesh, const QString& url) {
        for (const auto& color : fbxMesh.colors) {
            colors.push_back(glm::packUnorm4x8(glm::vec4(color, 1.0f)));
        }
-        attribBuffer->setSubData(colorsOffset, colorsSize, (const gpu::Byte*) colors.data());
+        vertBuffer->setSubData(colorsOffset, colorsSize, (const gpu::Byte*) colors.data());
 #else
-        attribBuffer->setSubData(colorsOffset, colorsSize, (const gpu::Byte*) fbxMesh.colors.constData());
+        vertBuffer->setSubData(colorsOffset, colorsSize, (const gpu::Byte*) fbxMesh.colors.constData());
 #endif
    }

+    // Pack Texcoords 0 and 1 (if exists)
    if (texCoordsSize > 0) {
        QVector<vec2h> texCoordData;
        texCoordData.reserve(fbxMesh.texCoords.size());
@ -692,9 +707,8 @@ void FBXReader::buildModelMesh(FBXMesh& extractedMesh, const QString& url) {
            texCoordVec2h.y = glm::detail::toFloat16(texCoordVec2f.y);
            texCoordData.push_back(texCoordVec2h);
        }
-        attribBuffer->setSubData(texCoordsOffset, texCoordsSize, (const gpu::Byte*) texCoordData.constData());
+        vertBuffer->setSubData(texCoordsOffset, texCoordsSize, (const gpu::Byte*) texCoordData.constData());
    }
-
    if (texCoords1Size > 0) {
        QVector<vec2h> texCoordData;
        texCoordData.reserve(fbxMesh.texCoords1.size());
@ -705,69 +719,170 @@ void FBXReader::buildModelMesh(FBXMesh& extractedMesh, const QString& url) {
            texCoordVec2h.y = glm::detail::toFloat16(texCoordVec2f.y);
            texCoordData.push_back(texCoordVec2h);
        }
-        attribBuffer->setSubData(texCoords1Offset, texCoords1Size, (const gpu::Byte*) texCoordData.constData());
+        vertBuffer->setSubData(texCoords1Offset, texCoords1Size, (const gpu::Byte*) texCoordData.constData());
    }

-    if (fbxMesh.clusters.size() < UINT8_MAX) {
-        // yay! we can fit the clusterIndices within 8-bits
-        int32_t numIndices = fbxMesh.clusterIndices.size();
-        QVector<uint8_t> clusterIndices;
-        clusterIndices.resize(numIndices);
-        for (int32_t i = 0; i < numIndices; ++i) {
-            assert(fbxMesh.clusterIndices[i] <= UINT8_MAX);
-            clusterIndices[i] = (uint8_t)(fbxMesh.clusterIndices[i]);
+    // Clusters data
+    if (clusterIndicesSize > 0) {
+        if (fbxMesh.clusters.size() < UINT8_MAX) {
+            // yay! we can fit the clusterIndices within 8-bits
+            int32_t numIndices = fbxMesh.clusterIndices.size();
+            QVector<uint8_t> clusterIndices;
+            clusterIndices.resize(numIndices);
+            for (int32_t i = 0; i < numIndices; ++i) {
+                assert(fbxMesh.clusterIndices[i] <= UINT8_MAX);
+                clusterIndices[i] = (uint8_t)(fbxMesh.clusterIndices[i]);
+            }
+            vertBuffer->setSubData(clusterIndicesOffset, clusterIndicesSize, (const gpu::Byte*) clusterIndices.constData());
+        } else {
+            vertBuffer->setSubData(clusterIndicesOffset, clusterIndicesSize, (const gpu::Byte*) fbxMesh.clusterIndices.constData());
        }
-        attribBuffer->setSubData(clusterIndicesOffset, clusterIndicesSize, (const gpu::Byte*) clusterIndices.constData());
-    } else {
-        attribBuffer->setSubData(clusterIndicesOffset, clusterIndicesSize, (const gpu::Byte*) fbxMesh.clusterIndices.constData());
    }
-    attribBuffer->setSubData(clusterWeightsOffset, clusterWeightsSize, (const gpu::Byte*) fbxMesh.clusterWeights.constData());
+    if (clusterWeightsSize > 0) {
+        vertBuffer->setSubData(clusterWeightsOffset, clusterWeightsSize, (const gpu::Byte*) fbxMesh.clusterWeights.constData());
+    }

-    if (normalsSize) {
-        mesh->addAttribute(gpu::Stream::NORMAL,
-                           graphics::BufferView(attribBuffer, normalsOffset, normalsAndTangentsSize,
-                           normalsAndTangentsStride, FBX_NORMAL_ELEMENT));
-        mesh->addAttribute(gpu::Stream::TANGENT,
-                           graphics::BufferView(attribBuffer, tangentsOffset, normalsAndTangentsSize,
-                           normalsAndTangentsStride, FBX_NORMAL_ELEMENT));
+
+    // Now we decide on how to interleave the attributes and provide the vertices among bufers:
+    // Aka the Vertex format and the vertexBufferStream
+    auto vertexFormat = std::make_shared<gpu::Stream::Format>();
+    auto vertexBufferStream = std::make_shared<gpu::BufferStream>();
+
+    // Decision time:
+    // if blendshapes then keep position and normals/tangents as separated channel buffers from interleaved attributes
+    // else everything is interleaved in one buffer
+    
+    // Default case is no blend shapes
+    gpu::BufferPointer attribBuffer;
+    int totalAttribBufferSize = totalVertsSize;
+    gpu::uint8 posChannel = 0;
+    gpu::uint8 tangentChannel = posChannel;
+    gpu::uint8 attribChannel = posChannel;
+    bool interleavePositions = true;
+    bool interleaveNormalsTangents = true;
+
+    // TODO: We are using the same vertex format layout for all meshes because this is more efficient
+    //       This work is going into rc73 release which is meant to be used for the SPot500 event and we are picking the format
+    //       that works best for blendshaped and skinned  meshes aka the avatars.
+    //       We will improve this technique in a hot fix to 73.
+    hasBlendShapes = true;
+
+    // If has blend shapes allocate and assign buffers for pos and tangents now
+    if (hasBlendShapes) {
+        auto posBuffer = std::make_shared<gpu::Buffer>();
+        posBuffer->setData(positionsSize, (const gpu::Byte*) vertBuffer->getData() + positionsOffset);
+        vertexBufferStream->addBuffer(posBuffer, 0, positionElement.getSize());
+
+        auto normalsAndTangentsBuffer = std::make_shared<gpu::Buffer>();
+        normalsAndTangentsBuffer->setData(normalsAndTangentsSize, (const gpu::Byte*) vertBuffer->getData() + normalsAndTangentsOffset);
+        vertexBufferStream->addBuffer(normalsAndTangentsBuffer, 0, normalsAndTangentsStride);
+
+        // update channels and attribBuffer size accordingly
+        interleavePositions = false;
+        interleaveNormalsTangents = false;
+
+        tangentChannel = 1;
+        attribChannel = 2;
+
+        totalAttribBufferSize = totalVertsSize - positionsSize - normalsAndTangentsSize;
    }
+
+    // Define the vertex format, compute the offset for each attributes as we append them to the vertex format
+    gpu::Offset bufOffset = 0;
+    if (positionsSize) {
+        vertexFormat->setAttribute(gpu::Stream::POSITION, posChannel, positionElement, bufOffset);
+        bufOffset += positionElement.getSize();
+        if (!interleavePositions) {
+            bufOffset = 0;
+        }
+    }
+    if (normalsSize) {
+        vertexFormat->setAttribute(gpu::Stream::NORMAL, tangentChannel, normalElement, bufOffset);
+        bufOffset += normalElement.getSize();
+        vertexFormat->setAttribute(gpu::Stream::TANGENT, tangentChannel, normalElement, bufOffset);
+        bufOffset += normalElement.getSize();
+        if (!interleaveNormalsTangents) {
+            bufOffset = 0;
+        }
+    }
+
+    // Pack normal and Tangent with the rest of atributes if no blend shapes
    if (colorsSize) {
-        mesh->addAttribute(gpu::Stream::COLOR,
-                           graphics::BufferView(attribBuffer, colorsOffset, colorsSize, FBX_COLOR_ELEMENT));
+        vertexFormat->setAttribute(gpu::Stream::COLOR, attribChannel, colorElement, bufOffset);
+        bufOffset += colorElement.getSize();
    }
    if (texCoordsSize) {
-        mesh->addAttribute(gpu::Stream::TEXCOORD,
-                           graphics::BufferView( attribBuffer, texCoordsOffset, texCoordsSize,
-                           gpu::Element(gpu::VEC2, gpu::HALF, gpu::UV)));
+        vertexFormat->setAttribute(gpu::Stream::TEXCOORD, attribChannel, texCoordsElement, bufOffset);
+        bufOffset += texCoordsElement.getSize();
    }
    if (texCoords1Size) {
-        mesh->addAttribute( gpu::Stream::TEXCOORD1,
-                            graphics::BufferView(attribBuffer, texCoords1Offset, texCoords1Size,
-                            gpu::Element(gpu::VEC2, gpu::HALF, gpu::UV)));
+        vertexFormat->setAttribute(gpu::Stream::TEXCOORD1, attribChannel, texCoordsElement, bufOffset);
+        bufOffset += texCoordsElement.getSize();
    } else if (texCoordsSize) {
-        mesh->addAttribute(gpu::Stream::TEXCOORD1,
-                           graphics::BufferView(attribBuffer, texCoordsOffset, texCoordsSize,
-                           gpu::Element(gpu::VEC2, gpu::HALF, gpu::UV)));
+        vertexFormat->setAttribute(gpu::Stream::TEXCOORD1, attribChannel, texCoordsElement, bufOffset - texCoordsElement.getSize());
    }
-
    if (clusterIndicesSize) {
-        if (fbxMesh.clusters.size() < UINT8_MAX) {
-            mesh->addAttribute(gpu::Stream::SKIN_CLUSTER_INDEX,
-                               graphics::BufferView(attribBuffer, clusterIndicesOffset, clusterIndicesSize,
-                                                 gpu::Element(gpu::VEC4, gpu::UINT8, gpu::XYZW)));
-        } else {
-            mesh->addAttribute(gpu::Stream::SKIN_CLUSTER_INDEX,
-                               graphics::BufferView(attribBuffer, clusterIndicesOffset, clusterIndicesSize,
-                                                 gpu::Element(gpu::VEC4, gpu::UINT16, gpu::XYZW)));
-        }
+        vertexFormat->setAttribute(gpu::Stream::SKIN_CLUSTER_INDEX, attribChannel, clusterIndiceElement, bufOffset);
+        bufOffset += clusterIndiceElement.getSize();
    }
    if (clusterWeightsSize) {
-        mesh->addAttribute(gpu::Stream::SKIN_CLUSTER_WEIGHT,
-                          graphics::BufferView(attribBuffer, clusterWeightsOffset, clusterWeightsSize,
-                                            gpu::Element(gpu::VEC4, gpu::NUINT16, gpu::XYZW)));
+        vertexFormat->setAttribute(gpu::Stream::SKIN_CLUSTER_WEIGHT, attribChannel, clusterWeightElement, bufOffset);
+        bufOffset += clusterWeightElement.getSize();
    }

+    // Finally, allocate and fill the attribBuffer interleaving the attributes as needed:
+    {
+        auto vPositionOffset = 0;
+        auto vPositionSize = (interleavePositions ? positionsSize / numVerts : 0);

+        auto vNormalsAndTangentsOffset = vPositionOffset + vPositionSize;
+        auto vNormalsAndTangentsSize = (interleaveNormalsTangents ? normalsAndTangentsSize / numVerts : 0);
+
+        auto vColorOffset = vNormalsAndTangentsOffset + vNormalsAndTangentsSize;
+        auto vColorSize = colorsSize / numVerts;
+    
+        auto vTexcoord0Offset = vColorOffset + vColorSize;
+        auto vTexcoord0Size = texCoordsSize / numVerts;
+
+        auto vTexcoord1Offset = vTexcoord0Offset + vTexcoord0Size;
+        auto vTexcoord1Size = texCoords1Size / numVerts;
+
+        auto vClusterIndiceOffset = vTexcoord1Offset + vTexcoord1Size;
+        auto vClusterIndiceSize = clusterIndicesSize / numVerts;
+
+        auto vClusterWeightOffset = vClusterIndiceOffset + vClusterIndiceSize;
+        auto vClusterWeightSize = clusterWeightsSize / numVerts;
+
+        auto vStride = vClusterWeightOffset + vClusterWeightSize;
+
+        std::vector<gpu::Byte> dest;
+        dest.resize(totalAttribBufferSize);
+        auto vDest = dest.data();
+
+        auto source = vertBuffer->getData();
+
+        for (int i = 0; i < numVerts; i++) {
+            
+            if (vPositionSize) memcpy(vDest + vPositionOffset, source + positionsOffset + i * vPositionSize, vPositionSize);
+            if (vNormalsAndTangentsSize) memcpy(vDest + vNormalsAndTangentsOffset, source + normalsAndTangentsOffset + i * vNormalsAndTangentsSize, vNormalsAndTangentsSize);
+            if (vColorSize) memcpy(vDest + vColorOffset, source + colorsOffset + i * vColorSize, vColorSize);
+            if (vTexcoord0Size) memcpy(vDest + vTexcoord0Offset, source + texCoordsOffset + i * vTexcoord0Size, vTexcoord0Size);
+            if (vTexcoord1Size) memcpy(vDest + vTexcoord1Offset, source + texCoords1Offset + i * vTexcoord1Size, vTexcoord1Size);
+            if (vClusterIndiceSize) memcpy(vDest + vClusterIndiceOffset, source + clusterIndicesOffset + i * vClusterIndiceSize, vClusterIndiceSize);
+            if (vClusterWeightSize) memcpy(vDest + vClusterWeightOffset, source + clusterWeightsOffset + i * vClusterWeightSize, vClusterWeightSize);
+
+            vDest += vStride;
+        }
+
+        auto attribBuffer = std::make_shared<gpu::Buffer>();
+        attribBuffer->setData(totalAttribBufferSize, dest.data());
+        vertexBufferStream->addBuffer(attribBuffer, 0, vStride);
+    }
+
+    // Mesh vertex format and vertex stream is ready
+    mesh->setVertexFormatAndStream(vertexFormat, vertexBufferStream);
+
+    // Index and Part Buffers
    unsigned int totalIndices = 0;
    foreach(const FBXMeshPart& part, extractedMesh.parts) {
        totalIndices += (part.quadTrianglesIndices.size() + part.triangleIndices.size());
--- a/libraries/gpu-gl-common/src/gpu/gl/GLBackend.h
+++ b/libraries/gpu-gl-common/src/gpu/gl/GLBackend.h
@ -239,6 +239,7 @@ public:
    virtual GLuint getFramebufferID(const FramebufferPointer& framebuffer) = 0;
    virtual GLuint getTextureID(const TexturePointer& texture) final;
    virtual GLuint getBufferID(const Buffer& buffer) = 0;
+    virtual GLuint getBufferIDUnsynced(const Buffer& buffer) = 0;
    virtual GLuint getQueryID(const QueryPointer& query) = 0;

    virtual GLFramebuffer* syncGPUObject(const Framebuffer& framebuffer) = 0;
--- a/libraries/gpu-gl-common/src/gpu/gl/GLBackendInput.cpp
+++ b/libraries/gpu-gl-common/src/gpu/gl/GLBackendInput.cpp
@ -11,6 +11,7 @@
 #include "GLBackend.h"
 #include "GLShared.h"
 #include "GLInputFormat.h"
+#include "GLBuffer.h"

 using namespace gpu;
 using namespace gpu::gl;
@ -43,13 +44,7 @@ void GLBackend::do_setInputBuffer(const Batch& batch, size_t paramOffset) {
        bool isModified = false;
        if (_input._buffers[channel] != buffer) {
            _input._buffers[channel] = buffer;
-         
-            GLuint vbo = 0;
-            if (buffer) {
-                vbo = getBufferID((*buffer));
-            }
-            _input._bufferVBOs[channel] = vbo;
-
+            _input._bufferVBOs[channel] = getBufferIDUnsynced((*buffer));
            isModified = true;
        }

@ -128,7 +123,7 @@ void GLBackend::do_setIndexBuffer(const Batch& batch, size_t paramOffset) {
    if (indexBuffer != _input._indexBuffer) {
        _input._indexBuffer = indexBuffer;
        if (indexBuffer) {
-            glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, getBufferID(*indexBuffer));
+            glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, getBufferIDUnsynced(*indexBuffer));
        } else {
            // FIXME do we really need this?  Is there ever a draw call where we care that the element buffer is null?
            glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
@ -145,7 +140,7 @@ void GLBackend::do_setIndirectBuffer(const Batch& batch, size_t paramOffset) {
    if (buffer != _input._indirectBuffer) {
        _input._indirectBuffer = buffer;
        if (buffer) {
-            glBindBuffer(GL_DRAW_INDIRECT_BUFFER, getBufferID(*buffer));
+            glBindBuffer(GL_DRAW_INDIRECT_BUFFER, getBufferIDUnsynced(*buffer));
        } else {
            // FIXME do we really need this?  Is there ever a draw call where we care that the element buffer is null?
            glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0);
@ -261,9 +256,17 @@ void GLBackend::updateInput() {
        auto offset = _input._bufferOffsets.data();
        auto stride = _input._bufferStrides.data();

+        // Profile the count of buffers to update and use it to short cut the for loop
+        int numInvalids = (int) _input._invalidBuffers.count();
+        _stats._ISNumInputBufferChanges += numInvalids;
+
        for (GLuint buffer = 0; buffer < _input._buffers.size(); buffer++, vbo++, offset++, stride++) {
            if (_input._invalidBuffers.test(buffer)) {
                glBindVertexBuffer(buffer, (*vbo), (*offset), (GLsizei)(*stride));
+                numInvalids--;
+                if (numInvalids <= 0) {
+                    break;
+                }
            }
        }

--- a/libraries/gpu-gl-common/src/gpu/gl/GLBackendPipeline.cpp
+++ b/libraries/gpu-gl-common/src/gpu/gl/GLBackendPipeline.cpp
@ -85,6 +85,8 @@ void GLBackend::do_setPipeline(const Batch& batch, size_t paramOffset) {
            auto& cameraCorrectionBuffer = _transform._viewCorrectionEnabled ?
                _pipeline._cameraCorrectionBuffer._buffer : 
                _pipeline._cameraCorrectionBufferIdentity._buffer;
+            // Because we don't sync Buffers in the bindUniformBuffer, let s force this buffer synced
+            getBufferID(*cameraCorrectionBuffer);
            bindUniformBuffer(gpu::slot::buffer::CameraCorrection, cameraCorrectionBuffer, 0, sizeof(CameraCorrection));
        }
        (void)CHECK_GL_ERROR();
@ -170,11 +172,10 @@ void GLBackend::bindUniformBuffer(uint32_t slot, const BufferPointer& buffer, GL
        return;
    }

-    // Sync BufferObject
-    auto* object = syncGPUObject(*bufferState.buffer);
-    if (object) {
-        glBindBufferRange(GL_UNIFORM_BUFFER, slot, object->_buffer, bufferState.offset, bufferState.size);
-
+    // Grab the true gl Buffer object
+    auto glBO = getBufferIDUnsynced(*buffer);
+    if (glBO) {
+        glBindBufferRange(GL_UNIFORM_BUFFER, slot, glBO, bufferState.offset, bufferState.size);
        _uniform._buffers[slot] = bufferState;
        (void)CHECK_GL_ERROR();
    } else {
--- a/libraries/gpu-gl-common/src/gpu/gl/GLBuffer.h
+++ b/libraries/gpu-gl-common/src/gpu/gl/GLBuffer.h
@ -49,6 +49,16 @@ public:
        }
    }

+    template <typename GLBufferType>
+    static GLuint getIdUnsynced(GLBackend& backend, const Buffer& buffer) {
+        GLBufferType* object = Backend::getGPUObject<GLBufferType>(buffer);
+        if (object) {
+            return object->_buffer;
+        } else {
+            return 0;
+        }
+    }
+
    const GLuint& _buffer { _id };
    const GLuint _size;
    const Stamp _stamp;
--- a/libraries/gpu-gl/src/gpu/gl41/GL41Backend.h
+++ b/libraries/gpu-gl/src/gpu/gl41/GL41Backend.h
@ -134,6 +134,7 @@ protected:
    GLFramebuffer* syncGPUObject(const Framebuffer& framebuffer) override;

    GLuint getBufferID(const Buffer& buffer) override;
+    GLuint getBufferIDUnsynced(const Buffer& buffer) override;
    GLuint getResourceBufferID(const Buffer& buffer);
    GLBuffer* syncGPUObject(const Buffer& buffer) override;

--- a/libraries/gpu-gl/src/gpu/gl41/GL41BackendBuffer.cpp
+++ b/libraries/gpu-gl/src/gpu/gl41/GL41BackendBuffer.cpp
@ -83,6 +83,10 @@ GLuint GL41Backend::getBufferID(const Buffer& buffer) {
    return GL41Buffer::getId<GL41Buffer>(*this, buffer);
 }

+GLuint GL41Backend::getBufferIDUnsynced(const Buffer& buffer) {
+    return GL41Buffer::getIdUnsynced<GL41Buffer>(*this, buffer);
+}
+
 GLuint GL41Backend::getResourceBufferID(const Buffer& buffer) {
    auto* object = GL41Buffer::sync<GL41Buffer>(*this, buffer);
    if (object) {
--- a/libraries/gpu-gl/src/gpu/gl41/GL41BackendInput.cpp
+++ b/libraries/gpu-gl/src/gpu/gl41/GL41BackendInput.cpp
@ -78,8 +78,9 @@ void GL41Backend::updateInput() {

            const Stream::Format::AttributeMap& attributes = _input._format->getAttributes();
            auto& inputChannels = _input._format->getChannels();
-            _stats._ISNumInputBufferChanges++;
-
+            int numInvalids = (int)_input._invalidBuffers.count();
+            _stats._ISNumInputBufferChanges += numInvalids;
+            
            GLuint boundVBO = 0;
            for (auto& channelIt : inputChannels) {
                const Stream::Format::ChannelMap::value_type::second_type& channel = (channelIt).second;
--- a/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h
+++ b/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h
@ -235,6 +235,7 @@ protected:
    GLFramebuffer* syncGPUObject(const Framebuffer& framebuffer) override;

    GLuint getBufferID(const Buffer& buffer) override;
+    GLuint getBufferIDUnsynced(const Buffer& buffer) override;
    GLBuffer* syncGPUObject(const Buffer& buffer) override;

    GLTexture* syncGPUObject(const TexturePointer& texture) override;
--- a/libraries/gpu-gl/src/gpu/gl45/GL45BackendBuffer.cpp
+++ b/libraries/gpu-gl/src/gpu/gl45/GL45BackendBuffer.cpp
@ -51,6 +51,10 @@ GLuint GL45Backend::getBufferID(const Buffer& buffer) {
    return GL45Buffer::getId<GL45Buffer>(*this, buffer);
 }

+GLuint GL45Backend::getBufferIDUnsynced(const Buffer& buffer) {
+    return GL45Buffer::getIdUnsynced<GL45Buffer>(*this, buffer);
+}
+
 GLBuffer* GL45Backend::syncGPUObject(const Buffer& buffer) {
    return GL45Buffer::sync<GL45Buffer>(*this, buffer);
 }
--- a/libraries/gpu-gl/src/gpu/gl45/GL45BackendInput.cpp
+++ b/libraries/gpu-gl/src/gpu/gl45/GL45BackendInput.cpp
@ -132,9 +132,18 @@ void GL45Backend::updateInput() {
        auto offset = _input._bufferOffsets.data();
        auto stride = _input._bufferStrides.data();

-        for (GLuint buffer = 0; buffer < _input._buffers.size(); buffer++, vbo++, offset++, stride++) {
+        // Profile the count of buffers to update and use it to short cut the for loop
+        int numInvalids = (int) _input._invalidBuffers.count();
+        _stats._ISNumInputBufferChanges += numInvalids;
+
+        auto numBuffers = _input._buffers.size();
+        for (GLuint buffer = 0; buffer < numBuffers; buffer++, vbo++, offset++, stride++) {
            if (_input._invalidBuffers.test(buffer)) {
                glBindVertexBuffer(buffer, (*vbo), (*offset), (GLsizei)(*stride));
+                numInvalids--;
+                if (numInvalids <= 0) {
+                    break;
+                }
            }
        }

--- a/libraries/gpu-gles/src/gpu/gles/GLESBackend.h
+++ b/libraries/gpu-gles/src/gpu/gles/GLESBackend.h
@ -130,6 +130,7 @@ protected:
    GLFramebuffer* syncGPUObject(const Framebuffer& framebuffer) override;

    GLuint getBufferID(const Buffer& buffer) override;
+    GLuint getBufferIDUnsynced(const Buffer& buffer) override;
    GLuint getResourceBufferID(const Buffer& buffer);
    GLBuffer* syncGPUObject(const Buffer& buffer) override;

--- a/libraries/gpu-gles/src/gpu/gles/GLESBackendBuffer.cpp
+++ b/libraries/gpu-gles/src/gpu/gles/GLESBackendBuffer.cpp
@ -64,6 +64,10 @@ GLuint GLESBackend::getBufferID(const Buffer& buffer) {
    return GLESBuffer::getId<GLESBuffer>(*this, buffer);
 }

+GLuint GLESBackend::getBufferIDUnsynced(const Buffer& buffer) {
+    return GLESBuffer::getIdUnsynced<GLESBuffer>(*this, buffer);
+}
+
 GLBuffer* GLESBackend::syncGPUObject(const Buffer& buffer) {
    return GLESBuffer::sync<GLESBuffer>(*this, buffer);
 }
--- a/libraries/gpu/src/gpu/Batch.h
+++ b/libraries/gpu/src/gpu/Batch.h
@ -417,10 +417,7 @@ public:
            }

            const Data& get(uint32 offset) const {
-                if (offset >= _items.size()) {
-                    static const Data EMPTY;
-                    return EMPTY;
-                }
+                assert((offset < _items.size()));
                return (_items.data() + offset)->_data;
            }

--- a/libraries/gpu/src/gpu/Stream.h
+++ b/libraries/gpu/src/gpu/Stream.h
@ -152,6 +152,8 @@ public:

    BufferStream makeRangedStream(uint32 offset, uint32 count = -1) const;

+    BufferStream& operator = (const BufferStream& src) = default;
+
 protected:
    Buffers _buffers;
    Offsets _offsets;
--- a/libraries/graphics/src/graphics/Geometry.cpp
+++ b/libraries/graphics/src/graphics/Geometry.cpp
@ -32,6 +32,15 @@ Mesh::Mesh(const Mesh& mesh) :
 Mesh::~Mesh() {
 }

+void Mesh::setVertexFormatAndStream(const gpu::Stream::FormatPointer& vf, const gpu::BufferStreamPointer& vbs) {
+    _vertexFormat = vf;
+    _vertexStream = (*vbs);
+
+    auto attrib = _vertexFormat->getAttribute(gpu::Stream::POSITION);
+    _vertexBuffer = BufferView(vbs->getBuffers()[attrib._channel], vbs->getOffsets()[attrib._channel], vbs->getBuffers()[attrib._channel]->getSize(),
+        (gpu::uint16) vbs->getStrides()[attrib._channel], attrib._element);
+}
+
 void Mesh::setVertexBuffer(const BufferView& buffer) {
    _vertexBuffer = buffer;
    evalVertexFormat();
@ -107,11 +116,10 @@ Box Mesh::evalPartBound(int partNum) const {
        index += part._startIndex;
        auto endIndex = index;
        endIndex += part._numIndices;
-        auto vertices = &_vertexBuffer.get<Vec3>(part._baseVertex);
        for (;index != endIndex; index++) {
            // skip primitive restart indices
            if ((*index) != PRIMITIVE_RESTART_INDEX) {
-                box += vertices[(*index)];
+                box += _vertexBuffer.get<Vec3>(part._baseVertex + (*index));
            }
        }
    }
@ -128,11 +136,10 @@ Box Mesh::evalPartsBound(int partStart, int partEnd) const {
        Box partBound;
        auto index = _indexBuffer.cbegin<uint>() + (*part)._startIndex;
        auto endIndex = index + (*part)._numIndices;
-        auto vertices = &_vertexBuffer.get<Vec3>((*part)._baseVertex);
        for (;index != endIndex; index++) {
            // skip primitive restart indices
            if ((*index) != (uint) PRIMITIVE_RESTART_INDEX) {
-                partBound += vertices[(*index)];
+                partBound += _vertexBuffer.get<Vec3>((*part)._baseVertex + (*index));
            }
        }

--- a/libraries/graphics/src/graphics/Geometry.h
+++ b/libraries/graphics/src/graphics/Geometry.h
@ -59,6 +59,9 @@ public:
    void removeAttribute(Slot slot);
    const BufferView getAttributeBuffer(int attrib) const;

+    // Force vertex stream and Vertex format
+    void setVertexFormatAndStream(const gpu::Stream::FormatPointer& vf, const gpu::BufferStreamPointer& vbs);
+
    // Stream format
    const gpu::Stream::FormatPointer getVertexFormat() const { return _vertexFormat; }