Merge pull request #13922 from samcake/black-bis

Optimize the vertex formats of meshes for less input buffer bindings
2025-03-11 16:13:16 +01:00 · 2018-09-10 16:58:40 -07:00 · 2018-09-10 16:58:40 -07:00 · 47cea49f78
commit 47cea49f78
parent ae547f8950 082d47d20f
17 changed files with 269 additions and 105 deletions
--- a/libraries/fbx/src/FBXReader_Mesh.cpp
+++ b/libraries/fbx/src/FBXReader_Mesh.cpp
@ -585,13 +585,8 @@ void FBXReader::buildModelMesh(FBXMesh& extractedMesh, const QString& url) {
    FBXMesh& fbxMesh = extractedMesh;
    graphics::MeshPointer mesh(new graphics::Mesh());
-
+    bool hasBlendShapes = !fbxMesh.blendshapes.empty();
-    // Grab the vertices in a buffer
+    int numVerts = extractedMesh.vertices.size();
    auto vb = std::make_shared<gpu::Buffer>();
    vb->setData(extractedMesh.vertices.size() * sizeof(glm::vec3),
                (const gpu::Byte*) extractedMesh.vertices.data());
    gpu::BufferView vbv(vb, gpu::Element(gpu::VEC3, gpu::FLOAT, gpu::XYZ));
    mesh->setVertexBuffer(vbv);
    if (!fbxMesh.normals.empty() && fbxMesh.tangents.empty()) {
        // Fill with a dummy value to force tangents to be present if there are normals
@ -607,43 +602,61 @@ void FBXReader::buildModelMesh(FBXMesh& extractedMesh, const QString& url) {
        }
    }
-    // evaluate all attribute channels sizes
+    // evaluate all attribute elements and data sizes
-    const int normalsSize = fbxMesh.normals.size() * sizeof(NormalType);
+
-    const int tangentsSize = fbxMesh.tangents.size() * sizeof(NormalType);
+    // Position is a vec3
    const auto positionElement = gpu::Element(gpu::VEC3, gpu::FLOAT, gpu::XYZ); 
    const int positionsSize = numVerts * positionElement.getSize();
    // Normal and tangent are always there together packed in normalized xyz32bits word (times 2)
    const auto normalElement = FBX_NORMAL_ELEMENT;
    const int normalsSize = fbxMesh.normals.size() * normalElement.getSize();
    const int tangentsSize = fbxMesh.tangents.size() * normalElement.getSize();
    // If there are normals then there should be tangents
    assert(normalsSize <= tangentsSize);
    if (tangentsSize > normalsSize) {
        qWarning() << "Unexpected tangents in " << url;
    }
    const auto normalsAndTangentsSize = normalsSize + tangentsSize;
-    const int normalsAndTangentsStride = 2 * sizeof(NormalType);
+    const int normalsAndTangentsStride = 2 * normalElement.getSize();
-    const int colorsSize = fbxMesh.colors.size() * sizeof(ColorType);
+
    // Color attrib
    const auto colorElement = FBX_COLOR_ELEMENT;
    const int colorsSize = fbxMesh.colors.size() * colorElement.getSize();
    // Texture coordinates are stored in 2 half floats
-    const int texCoordsSize = fbxMesh.texCoords.size() * sizeof(vec2h);
+    const auto texCoordsElement = gpu::Element(gpu::VEC2, gpu::HALF, gpu::UV);
-    const int texCoords1Size = fbxMesh.texCoords1.size() * sizeof(vec2h);
+    const int texCoordsSize = fbxMesh.texCoords.size() * texCoordsElement.getSize();
    const int texCoords1Size = fbxMesh.texCoords1.size() * texCoordsElement.getSize();
-    int clusterIndicesSize = fbxMesh.clusterIndices.size() * sizeof(uint8_t);
+    // Support for 4 skinning clusters:
-    if (fbxMesh.clusters.size() > UINT8_MAX) {
+    // 4 Indices are uint8 ideally, uint16 if more than 256.
-        // we need 16 bits instead of just 8 for clusterIndices
+    const auto clusterIndiceElement = (fbxMesh.clusters.size() < UINT8_MAX ? gpu::Element(gpu::VEC4, gpu::UINT8, gpu::XYZW) : gpu::Element(gpu::VEC4, gpu::UINT16, gpu::XYZW));
-        clusterIndicesSize *= 2;
+    // 4 Weights are normalized 16bits
-    }
+    const auto clusterWeightElement = gpu::Element(gpu::VEC4, gpu::NUINT16, gpu::XYZW);
-    const int clusterWeightsSize = fbxMesh.clusterWeights.size() * sizeof(uint16_t);
+    // Cluster indices and weights must be the same sizes
    const int NUM_CLUSTERS_PER_VERT = 4;
    const int numVertClusters = (fbxMesh.clusterIndices.size() == fbxMesh.clusterWeights.size() ? fbxMesh.clusterIndices.size() / NUM_CLUSTERS_PER_VERT : 0);
    const int clusterIndicesSize = numVertClusters * clusterIndiceElement.getSize();
    const int clusterWeightsSize = numVertClusters * clusterWeightElement.getSize();
-    // Normals and tangents are interleaved
+    // Decide on where to put what seequencially in a big buffer:
-    const int normalsOffset = 0;
+    const int positionsOffset = 0;
-    const int tangentsOffset = normalsOffset + sizeof(NormalType);
+    const int normalsAndTangentsOffset = positionsOffset + positionsSize;
-    const int colorsOffset = normalsOffset + normalsSize + tangentsSize;
+    const int colorsOffset = normalsAndTangentsOffset + normalsAndTangentsSize;
    const int texCoordsOffset = colorsOffset + colorsSize;
    const int texCoords1Offset = texCoordsOffset + texCoordsSize;
    const int clusterIndicesOffset = texCoords1Offset + texCoords1Size;
    const int clusterWeightsOffset = clusterIndicesOffset + clusterIndicesSize;
-    const int totalAttributeSize = clusterWeightsOffset + clusterWeightsSize;
+    const int totalVertsSize = clusterWeightsOffset + clusterWeightsSize;
-    // Copy all attribute data in a single attribute buffer
+    // Copy all vertex data in a single buffer
-    auto attribBuffer = std::make_shared<gpu::Buffer>();
+    auto vertBuffer = std::make_shared<gpu::Buffer>();
-    attribBuffer->resize(totalAttributeSize);
+    vertBuffer->resize(totalVertsSize);
    // First positions
    vertBuffer->setSubData(positionsOffset, positionsSize, (const gpu::Byte*) extractedMesh.vertices.data());
    // Interleave normals and tangents
    if (normalsSize > 0) {
@ -651,8 +664,8 @@ void FBXReader::buildModelMesh(FBXMesh& extractedMesh, const QString& url) {
        normalsAndTangents.reserve(fbxMesh.normals.size() + fbxMesh.tangents.size());
        for (auto normalIt = fbxMesh.normals.constBegin(), tangentIt = fbxMesh.tangents.constBegin();
-             normalIt != fbxMesh.normals.constEnd();
+            normalIt != fbxMesh.normals.constEnd();
-             ++normalIt, ++tangentIt) {
+            ++normalIt, ++tangentIt) {
 #if FBX_PACK_NORMALS
            const auto normal = normalizeDirForPacking(*normalIt);
            const auto tangent = normalizeDirForPacking(*tangentIt);
@ -665,9 +678,10 @@ void FBXReader::buildModelMesh(FBXMesh& extractedMesh, const QString& url) {
            normalsAndTangents.push_back(packedNormal);
            normalsAndTangents.push_back(packedTangent);
        }
-        attribBuffer->setSubData(normalsOffset, normalsAndTangentsSize, (const gpu::Byte*) normalsAndTangents.data());
+        vertBuffer->setSubData(normalsAndTangentsOffset, normalsAndTangentsSize, (const gpu::Byte*) normalsAndTangents.data());
    }
    // Pack colors
    if (colorsSize > 0) {
 #if FBX_PACK_COLORS
        std::vector<ColorType> colors;
@ -676,12 +690,13 @@ void FBXReader::buildModelMesh(FBXMesh& extractedMesh, const QString& url) {
        for (const auto& color : fbxMesh.colors) {
            colors.push_back(glm::packUnorm4x8(glm::vec4(color, 1.0f)));
        }
-        attribBuffer->setSubData(colorsOffset, colorsSize, (const gpu::Byte*) colors.data());
+        vertBuffer->setSubData(colorsOffset, colorsSize, (const gpu::Byte*) colors.data());
 #else
-        attribBuffer->setSubData(colorsOffset, colorsSize, (const gpu::Byte*) fbxMesh.colors.constData());
+        vertBuffer->setSubData(colorsOffset, colorsSize, (const gpu::Byte*) fbxMesh.colors.constData());
 #endif
    }
    // Pack Texcoords 0 and 1 (if exists)
    if (texCoordsSize > 0) {
        QVector<vec2h> texCoordData;
        texCoordData.reserve(fbxMesh.texCoords.size());
@ -692,9 +707,8 @@ void FBXReader::buildModelMesh(FBXMesh& extractedMesh, const QString& url) {
            texCoordVec2h.y = glm::detail::toFloat16(texCoordVec2f.y);
            texCoordData.push_back(texCoordVec2h);
        }
-        attribBuffer->setSubData(texCoordsOffset, texCoordsSize, (const gpu::Byte*) texCoordData.constData());
+        vertBuffer->setSubData(texCoordsOffset, texCoordsSize, (const gpu::Byte*) texCoordData.constData());
    }
    if (texCoords1Size > 0) {
        QVector<vec2h> texCoordData;
        texCoordData.reserve(fbxMesh.texCoords1.size());
@ -705,69 +719,170 @@ void FBXReader::buildModelMesh(FBXMesh& extractedMesh, const QString& url) {
            texCoordVec2h.y = glm::detail::toFloat16(texCoordVec2f.y);
            texCoordData.push_back(texCoordVec2h);
        }
-        attribBuffer->setSubData(texCoords1Offset, texCoords1Size, (const gpu::Byte*) texCoordData.constData());
+        vertBuffer->setSubData(texCoords1Offset, texCoords1Size, (const gpu::Byte*) texCoordData.constData());
    }
-    if (fbxMesh.clusters.size() < UINT8_MAX) {
+    // Clusters data
-        // yay! we can fit the clusterIndices within 8-bits
+    if (clusterIndicesSize > 0) {
-        int32_t numIndices = fbxMesh.clusterIndices.size();
+        if (fbxMesh.clusters.size() < UINT8_MAX) {
-        QVector<uint8_t> clusterIndices;
+            // yay! we can fit the clusterIndices within 8-bits
-        clusterIndices.resize(numIndices);
+            int32_t numIndices = fbxMesh.clusterIndices.size();
-        for (int32_t i = 0; i < numIndices; ++i) {
+            QVector<uint8_t> clusterIndices;
-            assert(fbxMesh.clusterIndices[i] <= UINT8_MAX);
+            clusterIndices.resize(numIndices);
-            clusterIndices[i] = (uint8_t)(fbxMesh.clusterIndices[i]);
+            for (int32_t i = 0; i < numIndices; ++i) {
                assert(fbxMesh.clusterIndices[i] <= UINT8_MAX);
                clusterIndices[i] = (uint8_t)(fbxMesh.clusterIndices[i]);
            }
            vertBuffer->setSubData(clusterIndicesOffset, clusterIndicesSize, (const gpu::Byte*) clusterIndices.constData());
        } else {
            vertBuffer->setSubData(clusterIndicesOffset, clusterIndicesSize, (const gpu::Byte*) fbxMesh.clusterIndices.constData());
        }
        attribBuffer->setSubData(clusterIndicesOffset, clusterIndicesSize, (const gpu::Byte*) clusterIndices.constData());
    } else {
        attribBuffer->setSubData(clusterIndicesOffset, clusterIndicesSize, (const gpu::Byte*) fbxMesh.clusterIndices.constData());
    }
-    attribBuffer->setSubData(clusterWeightsOffset, clusterWeightsSize, (const gpu::Byte*) fbxMesh.clusterWeights.constData());
+    if (clusterWeightsSize > 0) {
        vertBuffer->setSubData(clusterWeightsOffset, clusterWeightsSize, (const gpu::Byte*) fbxMesh.clusterWeights.constData());
    }
-    if (normalsSize) {
+
-        mesh->addAttribute(gpu::Stream::NORMAL,
+    // Now we decide on how to interleave the attributes and provide the vertices among bufers:
-                           graphics::BufferView(attribBuffer, normalsOffset, normalsAndTangentsSize,
+    // Aka the Vertex format and the vertexBufferStream
-                           normalsAndTangentsStride, FBX_NORMAL_ELEMENT));
+    auto vertexFormat = std::make_shared<gpu::Stream::Format>();
-        mesh->addAttribute(gpu::Stream::TANGENT,
+    auto vertexBufferStream = std::make_shared<gpu::BufferStream>();
-                           graphics::BufferView(attribBuffer, tangentsOffset, normalsAndTangentsSize,
+
-                           normalsAndTangentsStride, FBX_NORMAL_ELEMENT));
+    // Decision time:
    // if blendshapes then keep position and normals/tangents as separated channel buffers from interleaved attributes
    // else everything is interleaved in one buffer
    // Default case is no blend shapes
    gpu::BufferPointer attribBuffer;
    int totalAttribBufferSize = totalVertsSize;
    gpu::uint8 posChannel = 0;
    gpu::uint8 tangentChannel = posChannel;
    gpu::uint8 attribChannel = posChannel;
    bool interleavePositions = true;
    bool interleaveNormalsTangents = true;
    // TODO: We are using the same vertex format layout for all meshes because this is more efficient
    //       This work is going into rc73 release which is meant to be used for the SPot500 event and we are picking the format
    //       that works best for blendshaped and skinned  meshes aka the avatars.
    //       We will improve this technique in a hot fix to 73.
    hasBlendShapes = true;
    // If has blend shapes allocate and assign buffers for pos and tangents now
    if (hasBlendShapes) {
        auto posBuffer = std::make_shared<gpu::Buffer>();
        posBuffer->setData(positionsSize, (const gpu::Byte*) vertBuffer->getData() + positionsOffset);
        vertexBufferStream->addBuffer(posBuffer, 0, positionElement.getSize());
        auto normalsAndTangentsBuffer = std::make_shared<gpu::Buffer>();
        normalsAndTangentsBuffer->setData(normalsAndTangentsSize, (const gpu::Byte*) vertBuffer->getData() + normalsAndTangentsOffset);
        vertexBufferStream->addBuffer(normalsAndTangentsBuffer, 0, normalsAndTangentsStride);
        // update channels and attribBuffer size accordingly
        interleavePositions = false;
        interleaveNormalsTangents = false;
        tangentChannel = 1;
        attribChannel = 2;
        totalAttribBufferSize = totalVertsSize - positionsSize - normalsAndTangentsSize;
    }
    // Define the vertex format, compute the offset for each attributes as we append them to the vertex format
    gpu::Offset bufOffset = 0;
    if (positionsSize) {
        vertexFormat->setAttribute(gpu::Stream::POSITION, posChannel, positionElement, bufOffset);
        bufOffset += positionElement.getSize();
        if (!interleavePositions) {
            bufOffset = 0;
        }
    }
    if (normalsSize) {
        vertexFormat->setAttribute(gpu::Stream::NORMAL, tangentChannel, normalElement, bufOffset);
        bufOffset += normalElement.getSize();
        vertexFormat->setAttribute(gpu::Stream::TANGENT, tangentChannel, normalElement, bufOffset);
        bufOffset += normalElement.getSize();
        if (!interleaveNormalsTangents) {
            bufOffset = 0;
        }
    }
    // Pack normal and Tangent with the rest of atributes if no blend shapes
    if (colorsSize) {
-        mesh->addAttribute(gpu::Stream::COLOR,
+        vertexFormat->setAttribute(gpu::Stream::COLOR, attribChannel, colorElement, bufOffset);
-                           graphics::BufferView(attribBuffer, colorsOffset, colorsSize, FBX_COLOR_ELEMENT));
+        bufOffset += colorElement.getSize();
    }
    if (texCoordsSize) {
-        mesh->addAttribute(gpu::Stream::TEXCOORD,
+        vertexFormat->setAttribute(gpu::Stream::TEXCOORD, attribChannel, texCoordsElement, bufOffset);
-                           graphics::BufferView( attribBuffer, texCoordsOffset, texCoordsSize,
+        bufOffset += texCoordsElement.getSize();
                           gpu::Element(gpu::VEC2, gpu::HALF, gpu::UV)));
    }
    if (texCoords1Size) {
-        mesh->addAttribute( gpu::Stream::TEXCOORD1,
+        vertexFormat->setAttribute(gpu::Stream::TEXCOORD1, attribChannel, texCoordsElement, bufOffset);
-                            graphics::BufferView(attribBuffer, texCoords1Offset, texCoords1Size,
+        bufOffset += texCoordsElement.getSize();
                            gpu::Element(gpu::VEC2, gpu::HALF, gpu::UV)));
    } else if (texCoordsSize) {
-        mesh->addAttribute(gpu::Stream::TEXCOORD1,
+        vertexFormat->setAttribute(gpu::Stream::TEXCOORD1, attribChannel, texCoordsElement, bufOffset - texCoordsElement.getSize());
                           graphics::BufferView(attribBuffer, texCoordsOffset, texCoordsSize,
                           gpu::Element(gpu::VEC2, gpu::HALF, gpu::UV)));
    }
    if (clusterIndicesSize) {
-        if (fbxMesh.clusters.size() < UINT8_MAX) {
+        vertexFormat->setAttribute(gpu::Stream::SKIN_CLUSTER_INDEX, attribChannel, clusterIndiceElement, bufOffset);
-            mesh->addAttribute(gpu::Stream::SKIN_CLUSTER_INDEX,
+        bufOffset += clusterIndiceElement.getSize();
                               graphics::BufferView(attribBuffer, clusterIndicesOffset, clusterIndicesSize,
                                                 gpu::Element(gpu::VEC4, gpu::UINT8, gpu::XYZW)));
        } else {
            mesh->addAttribute(gpu::Stream::SKIN_CLUSTER_INDEX,
                               graphics::BufferView(attribBuffer, clusterIndicesOffset, clusterIndicesSize,
                                                 gpu::Element(gpu::VEC4, gpu::UINT16, gpu::XYZW)));
        }
    }
    if (clusterWeightsSize) {
-        mesh->addAttribute(gpu::Stream::SKIN_CLUSTER_WEIGHT,
+        vertexFormat->setAttribute(gpu::Stream::SKIN_CLUSTER_WEIGHT, attribChannel, clusterWeightElement, bufOffset);
-                          graphics::BufferView(attribBuffer, clusterWeightsOffset, clusterWeightsSize,
+        bufOffset += clusterWeightElement.getSize();
                                            gpu::Element(gpu::VEC4, gpu::NUINT16, gpu::XYZW)));
    }
    // Finally, allocate and fill the attribBuffer interleaving the attributes as needed:
    {
        auto vPositionOffset = 0;
        auto vPositionSize = (interleavePositions ? positionsSize / numVerts : 0);
        auto vNormalsAndTangentsOffset = vPositionOffset + vPositionSize;
        auto vNormalsAndTangentsSize = (interleaveNormalsTangents ? normalsAndTangentsSize / numVerts : 0);
        auto vColorOffset = vNormalsAndTangentsOffset + vNormalsAndTangentsSize;
        auto vColorSize = colorsSize / numVerts;
        auto vTexcoord0Offset = vColorOffset + vColorSize;
        auto vTexcoord0Size = texCoordsSize / numVerts;
        auto vTexcoord1Offset = vTexcoord0Offset + vTexcoord0Size;
        auto vTexcoord1Size = texCoords1Size / numVerts;
        auto vClusterIndiceOffset = vTexcoord1Offset + vTexcoord1Size;
        auto vClusterIndiceSize = clusterIndicesSize / numVerts;
        auto vClusterWeightOffset = vClusterIndiceOffset + vClusterIndiceSize;
        auto vClusterWeightSize = clusterWeightsSize / numVerts;
        auto vStride = vClusterWeightOffset + vClusterWeightSize;
        std::vector<gpu::Byte> dest;
        dest.resize(totalAttribBufferSize);
        auto vDest = dest.data();
        auto source = vertBuffer->getData();
        for (int i = 0; i < numVerts; i++) {
            if (vPositionSize) memcpy(vDest + vPositionOffset, source + positionsOffset + i * vPositionSize, vPositionSize);
            if (vNormalsAndTangentsSize) memcpy(vDest + vNormalsAndTangentsOffset, source + normalsAndTangentsOffset + i * vNormalsAndTangentsSize, vNormalsAndTangentsSize);
            if (vColorSize) memcpy(vDest + vColorOffset, source + colorsOffset + i * vColorSize, vColorSize);
            if (vTexcoord0Size) memcpy(vDest + vTexcoord0Offset, source + texCoordsOffset + i * vTexcoord0Size, vTexcoord0Size);
            if (vTexcoord1Size) memcpy(vDest + vTexcoord1Offset, source + texCoords1Offset + i * vTexcoord1Size, vTexcoord1Size);
            if (vClusterIndiceSize) memcpy(vDest + vClusterIndiceOffset, source + clusterIndicesOffset + i * vClusterIndiceSize, vClusterIndiceSize);
            if (vClusterWeightSize) memcpy(vDest + vClusterWeightOffset, source + clusterWeightsOffset + i * vClusterWeightSize, vClusterWeightSize);
            vDest += vStride;
        }
        auto attribBuffer = std::make_shared<gpu::Buffer>();
        attribBuffer->setData(totalAttribBufferSize, dest.data());
        vertexBufferStream->addBuffer(attribBuffer, 0, vStride);
    }
    // Mesh vertex format and vertex stream is ready
    mesh->setVertexFormatAndStream(vertexFormat, vertexBufferStream);
    // Index and Part Buffers
    unsigned int totalIndices = 0;
    foreach(const FBXMeshPart& part, extractedMesh.parts) {
        totalIndices += (part.quadTrianglesIndices.size() + part.triangleIndices.size());
--- a/libraries/gpu-gl-common/src/gpu/gl/GLBackend.h
+++ b/libraries/gpu-gl-common/src/gpu/gl/GLBackend.h
@ -239,6 +239,7 @@ public:
    virtual GLuint getFramebufferID(const FramebufferPointer& framebuffer) = 0;
    virtual GLuint getTextureID(const TexturePointer& texture) final;
    virtual GLuint getBufferID(const Buffer& buffer) = 0;
    virtual GLuint getBufferIDUnsynced(const Buffer& buffer) = 0;
    virtual GLuint getQueryID(const QueryPointer& query) = 0;
    virtual GLFramebuffer* syncGPUObject(const Framebuffer& framebuffer) = 0;
--- a/libraries/gpu-gl-common/src/gpu/gl/GLBackendInput.cpp
+++ b/libraries/gpu-gl-common/src/gpu/gl/GLBackendInput.cpp
@ -11,6 +11,7 @@
 #include "GLBackend.h"
 #include "GLShared.h"
 #include "GLInputFormat.h"
 #include "GLBuffer.h"
 using namespace gpu;
 using namespace gpu::gl;
@ -43,13 +44,7 @@ void GLBackend::do_setInputBuffer(const Batch& batch, size_t paramOffset) {
        bool isModified = false;
        if (_input._buffers[channel] != buffer) {
            _input._buffers[channel] = buffer;
-         
+            _input._bufferVBOs[channel] = getBufferIDUnsynced((*buffer));
            GLuint vbo = 0;
            if (buffer) {
                vbo = getBufferID((*buffer));
            }
            _input._bufferVBOs[channel] = vbo;
            isModified = true;
        }
@ -128,7 +123,7 @@ void GLBackend::do_setIndexBuffer(const Batch& batch, size_t paramOffset) {
    if (indexBuffer != _input._indexBuffer) {
        _input._indexBuffer = indexBuffer;
        if (indexBuffer) {
-            glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, getBufferID(*indexBuffer));
+            glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, getBufferIDUnsynced(*indexBuffer));
        } else {
            // FIXME do we really need this?  Is there ever a draw call where we care that the element buffer is null?
            glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
@ -145,7 +140,7 @@ void GLBackend::do_setIndirectBuffer(const Batch& batch, size_t paramOffset) {
    if (buffer != _input._indirectBuffer) {
        _input._indirectBuffer = buffer;
        if (buffer) {
-            glBindBuffer(GL_DRAW_INDIRECT_BUFFER, getBufferID(*buffer));
+            glBindBuffer(GL_DRAW_INDIRECT_BUFFER, getBufferIDUnsynced(*buffer));
        } else {
            // FIXME do we really need this?  Is there ever a draw call where we care that the element buffer is null?
            glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0);
@ -261,9 +256,17 @@ void GLBackend::updateInput() {
        auto offset = _input._bufferOffsets.data();
        auto stride = _input._bufferStrides.data();
        // Profile the count of buffers to update and use it to short cut the for loop
        int numInvalids = (int) _input._invalidBuffers.count();
        _stats._ISNumInputBufferChanges += numInvalids;
        for (GLuint buffer = 0; buffer < _input._buffers.size(); buffer++, vbo++, offset++, stride++) {
            if (_input._invalidBuffers.test(buffer)) {
                glBindVertexBuffer(buffer, (*vbo), (*offset), (GLsizei)(*stride));
                numInvalids--;
                if (numInvalids <= 0) {
                    break;
                }
            }
        }
--- a/libraries/gpu-gl-common/src/gpu/gl/GLBackendPipeline.cpp
+++ b/libraries/gpu-gl-common/src/gpu/gl/GLBackendPipeline.cpp
@ -85,6 +85,8 @@ void GLBackend::do_setPipeline(const Batch& batch, size_t paramOffset) {
            auto& cameraCorrectionBuffer = _transform._viewCorrectionEnabled ?
                _pipeline._cameraCorrectionBuffer._buffer : 
                _pipeline._cameraCorrectionBufferIdentity._buffer;
            // Because we don't sync Buffers in the bindUniformBuffer, let s force this buffer synced
            getBufferID(*cameraCorrectionBuffer);
            bindUniformBuffer(gpu::slot::buffer::CameraCorrection, cameraCorrectionBuffer, 0, sizeof(CameraCorrection));
        }
        (void)CHECK_GL_ERROR();
@ -170,11 +172,10 @@ void GLBackend::bindUniformBuffer(uint32_t slot, const BufferPointer& buffer, GL
        return;
    }
-    // Sync BufferObject
+    // Grab the true gl Buffer object
-    auto* object = syncGPUObject(*bufferState.buffer);
+    auto glBO = getBufferIDUnsynced(*buffer);
-    if (object) {
+    if (glBO) {
-        glBindBufferRange(GL_UNIFORM_BUFFER, slot, object->_buffer, bufferState.offset, bufferState.size);
+        glBindBufferRange(GL_UNIFORM_BUFFER, slot, glBO, bufferState.offset, bufferState.size);
        _uniform._buffers[slot] = bufferState;
        (void)CHECK_GL_ERROR();
    } else {
--- a/libraries/gpu-gl-common/src/gpu/gl/GLBuffer.h
+++ b/libraries/gpu-gl-common/src/gpu/gl/GLBuffer.h
@ -49,6 +49,16 @@ public:
        }
    }
    template <typename GLBufferType>
    static GLuint getIdUnsynced(GLBackend& backend, const Buffer& buffer) {
        GLBufferType* object = Backend::getGPUObject<GLBufferType>(buffer);
        if (object) {
            return object->_buffer;
        } else {
            return 0;
        }
    }
    const GLuint& _buffer { _id };
    const GLuint _size;
    const Stamp _stamp;
--- a/libraries/gpu-gl/src/gpu/gl41/GL41Backend.h
+++ b/libraries/gpu-gl/src/gpu/gl41/GL41Backend.h
@ -134,6 +134,7 @@ protected:
    GLFramebuffer* syncGPUObject(const Framebuffer& framebuffer) override;
    GLuint getBufferID(const Buffer& buffer) override;
    GLuint getBufferIDUnsynced(const Buffer& buffer) override;
    GLuint getResourceBufferID(const Buffer& buffer);
    GLBuffer* syncGPUObject(const Buffer& buffer) override;
--- a/libraries/gpu-gl/src/gpu/gl41/GL41BackendBuffer.cpp
+++ b/libraries/gpu-gl/src/gpu/gl41/GL41BackendBuffer.cpp
@ -83,6 +83,10 @@ GLuint GL41Backend::getBufferID(const Buffer& buffer) {
    return GL41Buffer::getId<GL41Buffer>(*this, buffer);
 }
 GLuint GL41Backend::getBufferIDUnsynced(const Buffer& buffer) {
    return GL41Buffer::getIdUnsynced<GL41Buffer>(*this, buffer);
 }
 GLuint GL41Backend::getResourceBufferID(const Buffer& buffer) {
    auto* object = GL41Buffer::sync<GL41Buffer>(*this, buffer);
    if (object) {
--- a/libraries/gpu-gl/src/gpu/gl41/GL41BackendInput.cpp
+++ b/libraries/gpu-gl/src/gpu/gl41/GL41BackendInput.cpp
@ -78,8 +78,9 @@ void GL41Backend::updateInput() {
            const Stream::Format::AttributeMap& attributes = _input._format->getAttributes();
            auto& inputChannels = _input._format->getChannels();
-            _stats._ISNumInputBufferChanges++;
+            int numInvalids = (int)_input._invalidBuffers.count();
-
+            _stats._ISNumInputBufferChanges += numInvalids;
            GLuint boundVBO = 0;
            for (auto& channelIt : inputChannels) {
                const Stream::Format::ChannelMap::value_type::second_type& channel = (channelIt).second;
--- a/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h
+++ b/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h
@ -235,6 +235,7 @@ protected:
    GLFramebuffer* syncGPUObject(const Framebuffer& framebuffer) override;
    GLuint getBufferID(const Buffer& buffer) override;
    GLuint getBufferIDUnsynced(const Buffer& buffer) override;
    GLBuffer* syncGPUObject(const Buffer& buffer) override;
    GLTexture* syncGPUObject(const TexturePointer& texture) override;
--- a/libraries/gpu-gl/src/gpu/gl45/GL45BackendBuffer.cpp
+++ b/libraries/gpu-gl/src/gpu/gl45/GL45BackendBuffer.cpp
@ -51,6 +51,10 @@ GLuint GL45Backend::getBufferID(const Buffer& buffer) {
    return GL45Buffer::getId<GL45Buffer>(*this, buffer);
 }
 GLuint GL45Backend::getBufferIDUnsynced(const Buffer& buffer) {
    return GL45Buffer::getIdUnsynced<GL45Buffer>(*this, buffer);
 }
 GLBuffer* GL45Backend::syncGPUObject(const Buffer& buffer) {
    return GL45Buffer::sync<GL45Buffer>(*this, buffer);
 }
--- a/libraries/gpu-gl/src/gpu/gl45/GL45BackendInput.cpp
+++ b/libraries/gpu-gl/src/gpu/gl45/GL45BackendInput.cpp
@ -132,9 +132,18 @@ void GL45Backend::updateInput() {
        auto offset = _input._bufferOffsets.data();
        auto stride = _input._bufferStrides.data();
-        for (GLuint buffer = 0; buffer < _input._buffers.size(); buffer++, vbo++, offset++, stride++) {
+        // Profile the count of buffers to update and use it to short cut the for loop
        int numInvalids = (int) _input._invalidBuffers.count();
        _stats._ISNumInputBufferChanges += numInvalids;
        auto numBuffers = _input._buffers.size();
        for (GLuint buffer = 0; buffer < numBuffers; buffer++, vbo++, offset++, stride++) {
            if (_input._invalidBuffers.test(buffer)) {
                glBindVertexBuffer(buffer, (*vbo), (*offset), (GLsizei)(*stride));
                numInvalids--;
                if (numInvalids <= 0) {
                    break;
                }
            }
        }
--- a/libraries/gpu-gles/src/gpu/gles/GLESBackend.h
+++ b/libraries/gpu-gles/src/gpu/gles/GLESBackend.h
@ -130,6 +130,7 @@ protected:
    GLFramebuffer* syncGPUObject(const Framebuffer& framebuffer) override;
    GLuint getBufferID(const Buffer& buffer) override;
    GLuint getBufferIDUnsynced(const Buffer& buffer) override;
    GLuint getResourceBufferID(const Buffer& buffer);
    GLBuffer* syncGPUObject(const Buffer& buffer) override;
--- a/libraries/gpu-gles/src/gpu/gles/GLESBackendBuffer.cpp
+++ b/libraries/gpu-gles/src/gpu/gles/GLESBackendBuffer.cpp
@ -64,6 +64,10 @@ GLuint GLESBackend::getBufferID(const Buffer& buffer) {
    return GLESBuffer::getId<GLESBuffer>(*this, buffer);
 }
 GLuint GLESBackend::getBufferIDUnsynced(const Buffer& buffer) {
    return GLESBuffer::getIdUnsynced<GLESBuffer>(*this, buffer);
 }
 GLBuffer* GLESBackend::syncGPUObject(const Buffer& buffer) {
    return GLESBuffer::sync<GLESBuffer>(*this, buffer);
 }
--- a/libraries/gpu/src/gpu/Batch.h
+++ b/libraries/gpu/src/gpu/Batch.h
@ -417,10 +417,7 @@ public:
            }
            const Data& get(uint32 offset) const {
-                if (offset >= _items.size()) {
+                assert((offset < _items.size()));
                    static const Data EMPTY;
                    return EMPTY;
                }
                return (_items.data() + offset)->_data;
            }
--- a/libraries/gpu/src/gpu/Stream.h
+++ b/libraries/gpu/src/gpu/Stream.h
@ -152,6 +152,8 @@ public:
    BufferStream makeRangedStream(uint32 offset, uint32 count = -1) const;
    BufferStream& operator = (const BufferStream& src) = default;
 protected:
    Buffers _buffers;
    Offsets _offsets;
--- a/libraries/graphics/src/graphics/Geometry.cpp
+++ b/libraries/graphics/src/graphics/Geometry.cpp
@ -32,6 +32,15 @@ Mesh::Mesh(const Mesh& mesh) :
 Mesh::~Mesh() {
 }
 void Mesh::setVertexFormatAndStream(const gpu::Stream::FormatPointer& vf, const gpu::BufferStreamPointer& vbs) {
    _vertexFormat = vf;
    _vertexStream = (*vbs);
    auto attrib = _vertexFormat->getAttribute(gpu::Stream::POSITION);
    _vertexBuffer = BufferView(vbs->getBuffers()[attrib._channel], vbs->getOffsets()[attrib._channel], vbs->getBuffers()[attrib._channel]->getSize(),
        (gpu::uint16) vbs->getStrides()[attrib._channel], attrib._element);
 }
 void Mesh::setVertexBuffer(const BufferView& buffer) {
    _vertexBuffer = buffer;
    evalVertexFormat();
@ -107,11 +116,10 @@ Box Mesh::evalPartBound(int partNum) const {
        index += part._startIndex;
        auto endIndex = index;
        endIndex += part._numIndices;
        auto vertices = &_vertexBuffer.get<Vec3>(part._baseVertex);
        for (;index != endIndex; index++) {
            // skip primitive restart indices
            if ((*index) != PRIMITIVE_RESTART_INDEX) {
-                box += vertices[(*index)];
+                box += _vertexBuffer.get<Vec3>(part._baseVertex + (*index));
            }
        }
    }
@ -128,11 +136,10 @@ Box Mesh::evalPartsBound(int partStart, int partEnd) const {
        Box partBound;
        auto index = _indexBuffer.cbegin<uint>() + (*part)._startIndex;
        auto endIndex = index + (*part)._numIndices;
        auto vertices = &_vertexBuffer.get<Vec3>((*part)._baseVertex);
        for (;index != endIndex; index++) {
            // skip primitive restart indices
            if ((*index) != (uint) PRIMITIVE_RESTART_INDEX) {
-                partBound += vertices[(*index)];
+                partBound += _vertexBuffer.get<Vec3>((*part)._baseVertex + (*index));
            }
        }
--- a/libraries/graphics/src/graphics/Geometry.h
+++ b/libraries/graphics/src/graphics/Geometry.h
@ -59,6 +59,9 @@ public:
    void removeAttribute(Slot slot);
    const BufferView getAttributeBuffer(int attrib) const;
    // Force vertex stream and Vertex format
    void setVertexFormatAndStream(const gpu::Stream::FormatPointer& vf, const gpu::BufferStreamPointer& vbs);
    // Stream format
    const gpu::Stream::FormatPointer getVertexFormat() const { return _vertexFormat; }