Merge pull request #9010 from samcake/orange

Improve Input Stage Format switching in GL45Backend
2025-08-12 14:34:43 +02:00 · 2016-11-16 14:00:17 -08:00 · 2016-11-16 14:00:17 -08:00 · 44b2b090df
commit 44b2b090df
parent 5ccfe3f6e8 b74a1af5ba
21 changed files with 491 additions and 259 deletions
--- a/libraries/gpu-gl/src/gpu/gl/GLBackend.h
+++ b/libraries/gpu-gl/src/gpu/gl/GLBackend.h
@ -83,9 +83,7 @@ public:
                                     const Vec4i& region, QImage& destImage) final override;


-    static const int MAX_NUM_ATTRIBUTES = Stream::NUM_INPUT_SLOTS;
-    static const int MAX_NUM_INPUT_BUFFERS = 16;
-
+    // this is the maximum numeber of available input buffers
    size_t getNumInputBuffers() const { return _input._invalidBuffers.size(); }

    // this is the maximum per shader stage on the low end apple
@ -147,6 +145,10 @@ public:
    virtual void do_startNamedCall(const Batch& batch, size_t paramOffset) final;
    virtual void do_stopNamedCall(const Batch& batch, size_t paramOffset) final;

+    static const int MAX_NUM_ATTRIBUTES = Stream::NUM_INPUT_SLOTS;
+    // The drawcall Info attribute  channel is reserved and is the upper bound for the number of availables Input buffers
+    static const int MAX_NUM_INPUT_BUFFERS = Stream::DRAW_CALL_INFO;
+
    virtual void do_pushProfileRange(const Batch& batch, size_t paramOffset) final;
    virtual void do_popProfileRange(const Batch& batch, size_t paramOffset) final;

@ -235,18 +237,21 @@ protected:
    virtual void initInput() final;
    virtual void killInput() final;
    virtual void syncInputStateCache() final;
-    virtual void resetInputStage() final;
-    virtual void updateInput();
+    virtual void resetInputStage();
+    virtual void updateInput() = 0;

    struct InputStageState {
        bool _invalidFormat { true };
        Stream::FormatPointer _format;
+        std::string _formatKey;

        typedef std::bitset<MAX_NUM_ATTRIBUTES> ActivationCache;
        ActivationCache _attributeActivation { 0 };

        typedef std::bitset<MAX_NUM_INPUT_BUFFERS> BuffersState;
-        BuffersState _invalidBuffers { 0 };
+
+        BuffersState _invalidBuffers{ 0 };
+        BuffersState _attribBindingBuffers{ 0 };

        Buffers _buffers;
        Offsets _bufferOffsets;
@ -266,7 +271,11 @@ protected:
        GLuint _defaultVAO { 0 };

        InputStageState() :
-            _buffers(_invalidBuffers.size()),
+            _invalidFormat(true),
+            _format(0),
+            _formatKey(),
+            _attributeActivation(0),
+            _buffers(_invalidBuffers.size(), BufferPointer(0)),
            _bufferOffsets(_invalidBuffers.size(), 0),
            _bufferStrides(_invalidBuffers.size(), 0),
            _bufferVBOs(_invalidBuffers.size(), 0) {}
@ -276,8 +285,8 @@ protected:
    void killTransform();
    // Synchronize the state cache of this Backend with the actual real state of the GL Context
    void syncTransformStateCache();
-    void updateTransform(const Batch& batch);
-    void resetTransformStage();
+    virtual void updateTransform(const Batch& batch) = 0;
+    virtual void resetTransformStage();

    // Allows for correction of the camera pose to account for changes
    // between the time when a was recorded and the time(s) when it is 
@ -325,6 +334,8 @@ protected:
        bool _invalidProj { false };
        bool _invalidViewport { false };

+        bool _enabledDrawcallInfoBuffer{ false };
+
        using Pair = std::pair<size_t, size_t>;
        using List = std::list<Pair>;
        List _cameraOffsets;
@ -399,8 +410,8 @@ protected:

    void resetQueryStage();
    struct QueryStageState {
-        
-    };
+        uint32_t _rangeQueryDepth { 0 };
+    } _queryStage;

    void resetStages();

--- a/libraries/gpu-gl/src/gpu/gl/GLBackendInput.cpp
+++ b/libraries/gpu-gl/src/gpu/gl/GLBackendInput.cpp
@ -10,16 +10,26 @@
 //
 #include "GLBackend.h"
 #include "GLShared.h"
+#include "GLInputFormat.h"

 using namespace gpu;
 using namespace gpu::gl;

 void GLBackend::do_setInputFormat(const Batch& batch, size_t paramOffset) {
    Stream::FormatPointer format = batch._streamFormats.get(batch._params[paramOffset]._uint);
-
    if (format != _input._format) {
        _input._format = format;
-        _input._invalidFormat = true;
+        if (format) {
+            auto inputFormat = GLInputFormat::sync((*format));
+            assert(inputFormat);
+            if (_input._formatKey != inputFormat->key) {
+                _input._formatKey = inputFormat->key;
+                _input._invalidFormat = true;
+            }
+        } else {
+            _input._formatKey.clear();
+            _input._invalidFormat = true;
+        }
    }
 }

@ -93,16 +103,9 @@ void GLBackend::resetInputStage() {
    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
    (void) CHECK_GL_ERROR();

-    glBindBuffer(GL_ARRAY_BUFFER, 0);
-
-
-    for (uint32_t i = 0; i < _input._attributeActivation.size(); i++) {
-        glDisableVertexAttribArray(i);
-        glVertexAttribPointer(i, 4, GL_FLOAT, GL_FALSE, 0, 0);
-    }
-
    // Reset vertex buffer and format
    _input._format.reset();
+    _input._formatKey.clear();
    _input._invalidFormat = false;
    _input._attributeActivation.reset();

@ -114,6 +117,7 @@ void GLBackend::resetInputStage() {
    }
    _input._invalidBuffers.reset();

+    // THe vertex array binding MUST be reset in the specific Backend versions as they use different techniques
 }

 void GLBackend::do_setIndexBuffer(const Batch& batch, size_t paramOffset) {
@ -151,183 +155,3 @@ void GLBackend::do_setIndirectBuffer(const Batch& batch, size_t paramOffset) {
    (void)CHECK_GL_ERROR();
 }

-
-// Core 41 doesn't expose the features to really separate the vertex format from the vertex buffers binding
-// Core 43 does :)
-// FIXME crashing problem with glVertexBindingDivisor / glVertexAttribFormat
-// Once resolved, break this up into the GL 4.1 and 4.5 backends
-#if 1 || (GPU_INPUT_PROFILE == GPU_CORE_41)
-#define NO_SUPPORT_VERTEX_ATTRIB_FORMAT
-#else
-#define SUPPORT_VERTEX_ATTRIB_FORMAT
-#endif
-
-void GLBackend::updateInput() {
-#if defined(SUPPORT_VERTEX_ATTRIB_FORMAT)
-    if (_input._invalidFormat) {
-
-        InputStageState::ActivationCache newActivation;
-
-        // Assign the vertex format required
-        if (_input._format) {
-            for (auto& it : _input._format->getAttributes()) {
-                const Stream::Attribute& attrib = (it).second;
-
-                GLuint slot = attrib._slot;
-                GLuint count = attrib._element.getLocationScalarCount();
-                uint8_t locationCount = attrib._element.getLocationCount();
-                GLenum type = _elementTypeToGL41Type[attrib._element.getType()];
-                GLuint offset = attrib._offset;;
-                GLboolean isNormalized = attrib._element.isNormalized();
-
-                GLenum perLocationSize = attrib._element.getLocationSize();
-
-                for (size_t locNum = 0; locNum < locationCount; ++locNum) {
-                    newActivation.set(slot + locNum);
-                    glVertexAttribFormat(slot + locNum, count, type, isNormalized, offset + locNum * perLocationSize);
-                    glVertexAttribBinding(slot + locNum, attrib._channel);
-                }
-#ifdef GPU_STEREO_DRAWCALL_INSTANCED
-                glVertexBindingDivisor(attrib._channel, attrib._frequency * (isStereo() ? 2 : 1));
-#else
-                glVertexBindingDivisor(attrib._channel, attrib._frequency);
-#endif
-            }
-            (void)CHECK_GL_ERROR();
-        }
-
-        // Manage Activation what was and what is expected now
-        for (size_t i = 0; i < newActivation.size(); i++) {
-            bool newState = newActivation[i];
-            if (newState != _input._attributeActivation[i]) {
-                if (newState) {
-                    glEnableVertexAttribArray(i);
-                } else {
-                    glDisableVertexAttribArray(i);
-                }
-                _input._attributeActivation.flip(i);
-            }
-        }
-        (void)CHECK_GL_ERROR();
-
-        _input._invalidFormat = false;
-        _stats._ISNumFormatChanges++;
-    }
-
-    if (_input._invalidBuffers.any()) {
-        int numBuffers = _input._buffers.size();
-        auto buffer = _input._buffers.data();
-        auto vbo = _input._bufferVBOs.data();
-        auto offset = _input._bufferOffsets.data();
-        auto stride = _input._bufferStrides.data();
-
-        for (int bufferNum = 0; bufferNum < numBuffers; bufferNum++) {
-            if (_input._invalidBuffers.test(bufferNum)) {
-                glBindVertexBuffer(bufferNum, (*vbo), (*offset), (*stride));
-            }
-            buffer++;
-            vbo++;
-            offset++;
-            stride++;
-        }
-        _input._invalidBuffers.reset();
-        (void)CHECK_GL_ERROR();
-    }
-#else
-    if (_input._invalidFormat || _input._invalidBuffers.any()) {
-
-        if (_input._invalidFormat) {
-            InputStageState::ActivationCache newActivation;
-
-            _stats._ISNumFormatChanges++;
-
-            // Check expected activation
-            if (_input._format) {
-                for (auto& it : _input._format->getAttributes()) {
-                    const Stream::Attribute& attrib = (it).second;
-                    uint8_t locationCount = attrib._element.getLocationCount();
-                    for (int i = 0; i < locationCount; ++i) {
-                        newActivation.set(attrib._slot + i);
-                    }
-                }
-            }
-
-            // Manage Activation what was and what is expected now
-            for (unsigned int i = 0; i < newActivation.size(); i++) {
-                bool newState = newActivation[i];
-                if (newState != _input._attributeActivation[i]) {
-
-                    if (newState) {
-                        glEnableVertexAttribArray(i);
-                    } else {
-                        glDisableVertexAttribArray(i);
-                    }
-                    (void)CHECK_GL_ERROR();
-
-                    _input._attributeActivation.flip(i);
-                }
-            }
-        }
-
-        // now we need to bind the buffers and assign the attrib pointers
-        if (_input._format) {
-            const Buffers& buffers = _input._buffers;
-            const Offsets& offsets = _input._bufferOffsets;
-            const Offsets& strides = _input._bufferStrides;
-
-            const Stream::Format::AttributeMap& attributes = _input._format->getAttributes();
-            auto& inputChannels = _input._format->getChannels();
-            _stats._ISNumInputBufferChanges++;
-
-            GLuint boundVBO = 0;
-            for (auto& channelIt : inputChannels) {
-                const Stream::Format::ChannelMap::value_type::second_type& channel = (channelIt).second;
-                if ((channelIt).first < buffers.size()) {
-                    int bufferNum = (channelIt).first;
-
-                    if (_input._invalidBuffers.test(bufferNum) || _input._invalidFormat) {
-                        //  GLuint vbo = gpu::GL41Backend::getBufferID((*buffers[bufferNum]));
-                        GLuint vbo = _input._bufferVBOs[bufferNum];
-                        if (boundVBO != vbo) {
-                            glBindBuffer(GL_ARRAY_BUFFER, vbo);
-                            (void)CHECK_GL_ERROR();
-                            boundVBO = vbo;
-                        }
-                        _input._invalidBuffers[bufferNum] = false;
-
-                        for (unsigned int i = 0; i < channel._slots.size(); i++) {
-                            const Stream::Attribute& attrib = attributes.at(channel._slots[i]);
-                            GLuint slot = attrib._slot;
-                            GLuint count = attrib._element.getLocationScalarCount();
-                            uint8_t locationCount = attrib._element.getLocationCount();
-                            GLenum type = gl::ELEMENT_TYPE_TO_GL[attrib._element.getType()];
-                            // GLenum perLocationStride = strides[bufferNum];
-                            GLenum perLocationStride = attrib._element.getLocationSize();
-                            GLuint stride = (GLuint)strides[bufferNum];
-                            GLuint pointer = (GLuint)(attrib._offset + offsets[bufferNum]);
-                            GLboolean isNormalized = attrib._element.isNormalized();
-
-                            for (size_t locNum = 0; locNum < locationCount; ++locNum) {
-                                glVertexAttribPointer(slot + (GLuint)locNum, count, type, isNormalized, stride,
-                                    reinterpret_cast<GLvoid*>(pointer + perLocationStride * (GLuint)locNum));
-#ifdef GPU_STEREO_DRAWCALL_INSTANCED
-                                glVertexAttribDivisor(slot + (GLuint)locNum, attrib._frequency * (isStereo() ? 2 : 1));
-#else
-                                glVertexAttribDivisor(slot + (GLuint)locNum, attrib._frequency);
-#endif
-                            }
-
-                            // TODO: Support properly the IAttrib version
-
-                            (void)CHECK_GL_ERROR();
-                        }
-                    }
-                }
-            }
-        }
-        // everything format related should be in sync now
-        _input._invalidFormat = false;
-    }
-#endif
-}
-
--- a/libraries/gpu-gl/src/gpu/gl/GLBackendQuery.cpp
+++ b/libraries/gpu-gl/src/gpu/gl/GLBackendQuery.cpp
@ -16,8 +16,10 @@ using namespace gpu::gl;

 // Eventually, we want to test with TIME_ELAPSED instead of TIMESTAMP
 #ifdef Q_OS_MAC
+const uint32_t MAX_RANGE_QUERY_DEPTH = 1;
 static bool timeElapsed = true;
 #else
+const uint32_t MAX_RANGE_QUERY_DEPTH = 10000;
 static bool timeElapsed = false;
 #endif

@ -25,12 +27,16 @@ void GLBackend::do_beginQuery(const Batch& batch, size_t paramOffset) {
    auto query = batch._queries.get(batch._params[paramOffset]._uint);
    GLQuery* glquery = syncGPUObject(*query);
    if (glquery) {
+        ++_queryStage._rangeQueryDepth;
        glGetInteger64v(GL_TIMESTAMP, (GLint64*)&glquery->_batchElapsedTime);
        if (timeElapsed) {
-            glBeginQuery(GL_TIME_ELAPSED, glquery->_endqo);
+            if (_queryStage._rangeQueryDepth <= MAX_RANGE_QUERY_DEPTH) {
+                glBeginQuery(GL_TIME_ELAPSED, glquery->_endqo);
+            }
        } else {
            glQueryCounter(glquery->_beginqo, GL_TIMESTAMP);
        }
+        glquery->_rangeQueryDepth = _queryStage._rangeQueryDepth;
        (void)CHECK_GL_ERROR();
    }
 }
@ -40,10 +46,13 @@ void GLBackend::do_endQuery(const Batch& batch, size_t paramOffset) {
    GLQuery* glquery = syncGPUObject(*query);
    if (glquery) {
        if (timeElapsed) {
-            glEndQuery(GL_TIME_ELAPSED);
+            if (_queryStage._rangeQueryDepth <= MAX_RANGE_QUERY_DEPTH) {
+                glEndQuery(GL_TIME_ELAPSED);
+            }
        } else {
            glQueryCounter(glquery->_endqo, GL_TIMESTAMP);
        }
+        --_queryStage._rangeQueryDepth;
        GLint64 now;
        glGetInteger64v(GL_TIMESTAMP, &now);
        glquery->_batchElapsedTime = now - glquery->_batchElapsedTime;
@ -55,20 +64,24 @@ void GLBackend::do_endQuery(const Batch& batch, size_t paramOffset) {
 void GLBackend::do_getQuery(const Batch& batch, size_t paramOffset) {
    auto query = batch._queries.get(batch._params[paramOffset]._uint);
    GLQuery* glquery = syncGPUObject(*query);
-    if (glquery) { 
-        glGetQueryObjectui64v(glquery->_endqo, GL_QUERY_RESULT_AVAILABLE, &glquery->_result);
-        if (glquery->_result == GL_TRUE) {
-            if (timeElapsed) {
-                glGetQueryObjectui64v(glquery->_endqo, GL_QUERY_RESULT, &glquery->_result);
-            } else {
-                GLuint64 start, end;
-                glGetQueryObjectui64v(glquery->_beginqo, GL_QUERY_RESULT, &start);
-                glGetQueryObjectui64v(glquery->_endqo, GL_QUERY_RESULT, &end);
-                glquery->_result = end - start;
-            }
+    if (glquery) {
+        if (glquery->_rangeQueryDepth > MAX_RANGE_QUERY_DEPTH) {
            query->triggerReturnHandler(glquery->_result, glquery->_batchElapsedTime);
+        } else {
+            glGetQueryObjectui64v(glquery->_endqo, GL_QUERY_RESULT_AVAILABLE, &glquery->_result);
+            if (glquery->_result == GL_TRUE) {
+                if (timeElapsed) {
+                    glGetQueryObjectui64v(glquery->_endqo, GL_QUERY_RESULT, &glquery->_result);
+                } else {
+                    GLuint64 start, end;
+                    glGetQueryObjectui64v(glquery->_beginqo, GL_QUERY_RESULT, &start);
+                    glGetQueryObjectui64v(glquery->_endqo, GL_QUERY_RESULT, &end);
+                    glquery->_result = end - start;
+                }
+                query->triggerReturnHandler(glquery->_result, glquery->_batchElapsedTime);
+            }
+            (void)CHECK_GL_ERROR();
        }
-        (void)CHECK_GL_ERROR();
    }
 }

--- a/libraries/gpu-gl/src/gpu/gl/GLBackendTransform.cpp
+++ b/libraries/gpu-gl/src/gpu/gl/GLBackendTransform.cpp
@ -85,6 +85,9 @@ void GLBackend::syncTransformStateCache() {
    Mat4 modelView;
    auto modelViewInv = glm::inverse(modelView);
    _transform._view.evalFromRawMatrix(modelViewInv);
+
+    glDisableVertexAttribArray(gpu::Stream::DRAW_CALL_INFO);
+    _transform._enabledDrawcallInfoBuffer = false;
 }

 void GLBackend::TransformStageState::preUpdate(size_t commandIndex, const StereoState& stereo) {
@ -162,29 +165,7 @@ void GLBackend::TransformStageState::bindCurrentCamera(int eye) const {
    }
 }

-void GLBackend::updateTransform(const Batch& batch) {
-    _transform.update(_commandIndex, _stereo);
-
-    auto& drawCallInfoBuffer = batch.getDrawCallInfoBuffer();
-    if (batch._currentNamedCall.empty()) {
-        auto& drawCallInfo = drawCallInfoBuffer[_currentDraw];
-        glDisableVertexAttribArray(gpu::Stream::DRAW_CALL_INFO); // Make sure attrib array is disabled
-        glVertexAttribI2i(gpu::Stream::DRAW_CALL_INFO, drawCallInfo.index, drawCallInfo.unused);
-    } else {
-        glEnableVertexAttribArray(gpu::Stream::DRAW_CALL_INFO); // Make sure attrib array is enabled
-        glBindBuffer(GL_ARRAY_BUFFER, _transform._drawCallInfoBuffer);
-        glVertexAttribIPointer(gpu::Stream::DRAW_CALL_INFO, 2, GL_UNSIGNED_SHORT, 0,
-                               _transform._drawCallInfoOffsets[batch._currentNamedCall]);
-#ifdef GPU_STEREO_DRAWCALL_INSTANCED
-        glVertexAttribDivisor(gpu::Stream::DRAW_CALL_INFO, (isStereo() ? 2 : 1));
-#else
-        glVertexAttribDivisor(gpu::Stream::DRAW_CALL_INFO, 1);
-#endif
-    }
-    
-    (void)CHECK_GL_ERROR();
-}
-
 void GLBackend::resetTransformStage() {
-    
+    glDisableVertexAttribArray(gpu::Stream::DRAW_CALL_INFO);
+    _transform._enabledDrawcallInfoBuffer = false;
 }
--- a/libraries/gpu-gl/src/gpu/gl/GLInputFormat.cpp
+++ b/libraries/gpu-gl/src/gpu/gl/GLInputFormat.cpp
@ -0,0 +1,33 @@
+//
+//  Created by Sam Gateau on 2016/07/21
+//  Copyright 2013-2016 High Fidelity, Inc.
+//
+//  Distributed under the Apache License, Version 2.0.
+//  See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
+//
+
+#include "GLInputFormat.h"
+#include "GLBackend.h"
+
+using namespace gpu;
+using namespace gpu::gl;
+
+
+GLInputFormat::GLInputFormat() {
+}
+
+GLInputFormat:: ~GLInputFormat() {
+
+}
+
+GLInputFormat* GLInputFormat::sync(const Stream::Format& inputFormat) {
+    GLInputFormat* object = Backend::getGPUObject<GLInputFormat>(inputFormat);
+
+    if (!object) {
+        object = new GLInputFormat();
+        object->key = inputFormat.getKey();
+        Backend::setGPUObject(inputFormat, object);
+    }
+
+    return object;
+}
--- a/libraries/gpu-gl/src/gpu/gl/GLInputFormat.h
+++ b/libraries/gpu-gl/src/gpu/gl/GLInputFormat.h
@ -0,0 +1,29 @@
+//
+//  Created by Sam Gateau on 2016/07/21
+//  Copyright 2013-2016 High Fidelity, Inc.
+//
+//  Distributed under the Apache License, Version 2.0.
+//  See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
+//
+#ifndef hifi_gpu_gl_GLInputFormat_h
+#define hifi_gpu_gl_GLInputFormat_h
+
+#include "GLShared.h"
+
+namespace gpu {
+namespace gl {
+
+class GLInputFormat : public GPUObject {
+    public:
+        static GLInputFormat* sync(const Stream::Format& inputFormat);
+
+    GLInputFormat();
+    ~GLInputFormat();
+
+    std::string key;
+};
+
+}
+}
+
+#endif
--- a/libraries/gpu-gl/src/gpu/gl/GLQuery.h
+++ b/libraries/gpu-gl/src/gpu/gl/GLQuery.h
@ -49,6 +49,7 @@ public:
    const GLuint _beginqo = { 0 };
    GLuint64 _result { (GLuint64)-1 };
    GLuint64 _batchElapsedTime { (GLuint64) 0 };
+    uint32_t _rangeQueryDepth { 0 };

 protected:
    GLQuery(const std::weak_ptr<GLBackend>& backend, const Query& query, GLuint endId, GLuint beginId) : Parent(backend, query, endId), _beginqo(beginId) {}
--- a/libraries/gpu-gl/src/gpu/gl41/GL41Backend.h
+++ b/libraries/gpu-gl/src/gpu/gl41/GL41Backend.h
@ -77,13 +77,13 @@ protected:
    void do_multiDrawIndexedIndirect(const Batch& batch, size_t paramOffset) override;

    // Input Stage
+    void resetInputStage() override;
    void updateInput() override;

    // Synchronize the state cache of this Backend with the actual real state of the GL Context
    void transferTransformState(const Batch& batch) const override;
    void initTransform() override;
-    void updateTransform(const Batch& batch);
-    void resetTransformStage();
+    void updateTransform(const Batch& batch) override;

    // Output stage
    void do_blit(const Batch& batch, size_t paramOffset) override;
--- a/libraries/gpu-gl/src/gpu/gl41/GL41BackendInput.cpp
+++ b/libraries/gpu-gl/src/gpu/gl41/GL41BackendInput.cpp
@ -13,7 +13,111 @@
 using namespace gpu;
 using namespace gpu::gl41;

-void GL41Backend::updateInput() {
-    Parent::updateInput();
+
+void GL41Backend::resetInputStage() {
+    Parent::resetInputStage();
+
+    glBindBuffer(GL_ARRAY_BUFFER, 0);
+    for (uint32_t i = 0; i < _input._attributeActivation.size(); i++) {
+        glDisableVertexAttribArray(i);
+        glVertexAttribPointer(i, 4, GL_FLOAT, GL_FALSE, 0, 0);
+    }
+}
+
+void GL41Backend::updateInput() {
+    if (_input._invalidFormat || _input._invalidBuffers.any()) {
+
+        if (_input._invalidFormat) {
+            InputStageState::ActivationCache newActivation;
+
+            _stats._ISNumFormatChanges++;
+
+            // Check expected activation
+            if (_input._format) {
+                for (auto& it : _input._format->getAttributes()) {
+                    const Stream::Attribute& attrib = (it).second;
+                    uint8_t locationCount = attrib._element.getLocationCount();
+                    for (int i = 0; i < locationCount; ++i) {
+                        newActivation.set(attrib._slot + i);
+                    }
+                }
+            }
+
+            // Manage Activation what was and what is expected now
+            for (unsigned int i = 0; i < newActivation.size(); i++) {
+                bool newState = newActivation[i];
+                if (newState != _input._attributeActivation[i]) {
+
+                    if (newState) {
+                        glEnableVertexAttribArray(i);
+                    } else {
+                        glDisableVertexAttribArray(i);
+                    }
+                    (void)CHECK_GL_ERROR();
+
+                    _input._attributeActivation.flip(i);
+                }
+            }
+        }
+
+        // now we need to bind the buffers and assign the attrib pointers
+        if (_input._format) {
+            const Buffers& buffers = _input._buffers;
+            const Offsets& offsets = _input._bufferOffsets;
+            const Offsets& strides = _input._bufferStrides;
+
+            const Stream::Format::AttributeMap& attributes = _input._format->getAttributes();
+            auto& inputChannels = _input._format->getChannels();
+            _stats._ISNumInputBufferChanges++;
+
+            GLuint boundVBO = 0;
+            for (auto& channelIt : inputChannels) {
+                const Stream::Format::ChannelMap::value_type::second_type& channel = (channelIt).second;
+                if ((channelIt).first < buffers.size()) {
+                    int bufferNum = (channelIt).first;
+
+                    if (_input._invalidBuffers.test(bufferNum) || _input._invalidFormat) {
+                        //  GLuint vbo = gpu::GL41Backend::getBufferID((*buffers[bufferNum]));
+                        GLuint vbo = _input._bufferVBOs[bufferNum];
+                        if (boundVBO != vbo) {
+                            glBindBuffer(GL_ARRAY_BUFFER, vbo);
+                            (void)CHECK_GL_ERROR();
+                            boundVBO = vbo;
+                        }
+                        _input._invalidBuffers[bufferNum] = false;
+
+                        for (unsigned int i = 0; i < channel._slots.size(); i++) {
+                            const Stream::Attribute& attrib = attributes.at(channel._slots[i]);
+                            GLuint slot = attrib._slot;
+                            GLuint count = attrib._element.getLocationScalarCount();
+                            uint8_t locationCount = attrib._element.getLocationCount();
+                            GLenum type = gl::ELEMENT_TYPE_TO_GL[attrib._element.getType()];
+                            // GLenum perLocationStride = strides[bufferNum];
+                            GLenum perLocationStride = attrib._element.getLocationSize();
+                            GLuint stride = (GLuint)strides[bufferNum];
+                            GLuint pointer = (GLuint)(attrib._offset + offsets[bufferNum]);
+                            GLboolean isNormalized = attrib._element.isNormalized();
+
+                            for (size_t locNum = 0; locNum < locationCount; ++locNum) {
+                                glVertexAttribPointer(slot + (GLuint)locNum, count, type, isNormalized, stride,
+                                    reinterpret_cast<GLvoid*>(pointer + perLocationStride * (GLuint)locNum));
+#ifdef GPU_STEREO_DRAWCALL_INSTANCED
+                                glVertexAttribDivisor(slot + (GLuint)locNum, attrib._frequency * (isStereo() ? 2 : 1));
+#else
+                                glVertexAttribDivisor(slot + (GLuint)locNum, attrib._frequency);
+#endif
+                            }
+
+                            // TODO: Support properly the IAttrib version
+
+                            (void)CHECK_GL_ERROR();
+                        }
+                    }
+                }
+            }
+        }
+        // everything format related should be in sync now
+        _input._invalidFormat = false;
+    }
 }

--- a/libraries/gpu-gl/src/gpu/gl41/GL41BackendTransform.cpp
+++ b/libraries/gpu-gl/src/gpu/gl41/GL41BackendTransform.cpp
@ -79,3 +79,32 @@ void GL41Backend::transferTransformState(const Batch& batch) const {
    // Make sure the current Camera offset is unknown before render Draw
    _transform._currentCameraOffset = INVALID_OFFSET;
 }
+
+
+void GL41Backend::updateTransform(const Batch& batch) {
+    _transform.update(_commandIndex, _stereo);
+
+    auto& drawCallInfoBuffer = batch.getDrawCallInfoBuffer();
+    if (batch._currentNamedCall.empty()) {
+        auto& drawCallInfo = drawCallInfoBuffer[_currentDraw];
+        if (_transform._enabledDrawcallInfoBuffer) {
+            glDisableVertexAttribArray(gpu::Stream::DRAW_CALL_INFO); // Make sure attrib array is disabled
+            _transform._enabledDrawcallInfoBuffer = false;
+        }
+        glVertexAttribI2i(gpu::Stream::DRAW_CALL_INFO, drawCallInfo.index, drawCallInfo.unused);
+    } else {
+        if (!_transform._enabledDrawcallInfoBuffer) {
+            glEnableVertexAttribArray(gpu::Stream::DRAW_CALL_INFO); // Make sure attrib array is enabled
+            glBindBuffer(GL_ARRAY_BUFFER, _transform._drawCallInfoBuffer);
+#ifdef GPU_STEREO_DRAWCALL_INSTANCED
+            glVertexAttribDivisor(gpu::Stream::DRAW_CALL_INFO, (isStereo() ? 2 : 1));
+#else
+            glVertexAttribDivisor(gpu::Stream::DRAW_CALL_INFO, 1);
+#endif
+            _transform._enabledDrawcallInfoBuffer = true;
+        }
+        glVertexAttribIPointer(gpu::Stream::DRAW_CALL_INFO, 2, GL_UNSIGNED_SHORT, 0, _transform._drawCallInfoOffsets[batch._currentNamedCall]);
+    }
+
+    (void)CHECK_GL_ERROR();
+}
--- a/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h
+++ b/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h
@ -130,13 +130,13 @@ protected:
    void do_multiDrawIndexedIndirect(const Batch& batch, size_t paramOffset) override;

    // Input Stage
+    void resetInputStage() override;
    void updateInput() override;

    // Synchronize the state cache of this Backend with the actual real state of the GL Context
    void transferTransformState(const Batch& batch) const override;
    void initTransform() override;
-    void updateTransform(const Batch& batch);
-    void resetTransformStage();
+    void updateTransform(const Batch& batch) override;

    // Output stage
    void do_blit(const Batch& batch, size_t paramOffset) override;
--- a/libraries/gpu-gl/src/gpu/gl45/GL45BackendInput.cpp
+++ b/libraries/gpu-gl/src/gpu/gl45/GL45BackendInput.cpp
@ -9,10 +9,112 @@
 //  See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
 //
 #include "GL45Backend.h"
+#include "../gl/GLShared.h"

 using namespace gpu;
 using namespace gpu::gl45;

-void GL45Backend::updateInput() {
-    Parent::updateInput();
+void GL45Backend::resetInputStage() {
+    Parent::resetInputStage();
+
+    glBindBuffer(GL_ARRAY_BUFFER, 0);
+    for (uint32_t i = 0; i < _input._attributeActivation.size(); i++) {
+        glDisableVertexAttribArray(i);
+    }
+    for (uint32_t i = 0; i < _input._attribBindingBuffers.size(); i++) {
+        glBindVertexBuffer(i, 0, 0, 0);
+    }
+}
+
+void GL45Backend::updateInput() {
+    if (_input._invalidFormat) {
+        InputStageState::ActivationCache newActivation;
+
+        // Assign the vertex format required
+        if (_input._format) {
+            _input._attribBindingBuffers.reset();
+
+            const Stream::Format::AttributeMap& attributes = _input._format->getAttributes();
+            auto& inputChannels = _input._format->getChannels();
+            for (auto& channelIt : inputChannels) {
+                auto bufferChannelNum = (channelIt).first;
+                const Stream::Format::ChannelMap::value_type::second_type& channel = (channelIt).second;
+                _input._attribBindingBuffers.set(bufferChannelNum);
+
+                GLuint frequency = 0;
+                for (unsigned int i = 0; i < channel._slots.size(); i++) {
+                    const Stream::Attribute& attrib = attributes.at(channel._slots[i]);
+
+                    GLuint slot = attrib._slot;
+                    GLuint count = attrib._element.getLocationScalarCount();
+                    uint8_t locationCount = attrib._element.getLocationCount();
+                    GLenum type = gl::ELEMENT_TYPE_TO_GL[attrib._element.getType()];
+
+                    GLuint offset = (GLuint)attrib._offset;;
+                    GLboolean isNormalized = attrib._element.isNormalized();
+
+                    GLenum perLocationSize = attrib._element.getLocationSize();
+                    for (GLuint locNum = 0; locNum < locationCount; ++locNum) {
+                        GLuint attriNum = (GLuint)(slot + locNum);
+                        newActivation.set(attriNum);
+                        if (!_input._attributeActivation[attriNum]) {
+                            _input._attributeActivation.set(attriNum);
+                            glEnableVertexAttribArray(attriNum);
+                        }
+                        glVertexAttribFormat(attriNum, count, type, isNormalized, offset + locNum * perLocationSize);
+                        // TODO: Support properly the IAttrib version
+                        glVertexAttribBinding(attriNum, attrib._channel);
+                    }
+
+                    if (i == 0) {
+                        frequency = attrib._frequency;
+                    } else {
+                        assert(frequency == attrib._frequency);
+                    }
+
+
+                    (void)CHECK_GL_ERROR();
+                }
+#ifdef GPU_STEREO_DRAWCALL_INSTANCED
+                glVertexBindingDivisor(bufferChannelNum, frequency * (isStereo() ? 2 : 1));
+#else
+                glVertexBindingDivisor(bufferChannelNum, frequency);
+#endif
+            }
+
+
+            // Manage Activation what was and what is expected now
+            // This should only disable VertexAttribs since the one in use have been disabled above
+            for (GLuint i = 0; i < (GLuint)newActivation.size(); i++) {
+                bool newState = newActivation[i];
+                if (newState != _input._attributeActivation[i]) {
+                    if (newState) {
+                        glEnableVertexAttribArray(i);
+                    } else {
+                        glDisableVertexAttribArray(i);
+                    }
+                    _input._attributeActivation.flip(i);
+                }
+            }
+            (void)CHECK_GL_ERROR();
+        }
+
+        _input._invalidFormat = false;
+        _stats._ISNumFormatChanges++;
+    }
+
+    if (_input._invalidBuffers.any()) {
+        auto vbo = _input._bufferVBOs.data();
+        auto offset = _input._bufferOffsets.data();
+        auto stride = _input._bufferStrides.data();
+
+        for (GLuint buffer = 0; buffer < _input._buffers.size(); buffer++, vbo++, offset++, stride++) {
+            if (_input._invalidBuffers.test(buffer)) {
+                glBindVertexBuffer(buffer, (*vbo), (*offset), (GLsizei)(*stride));
+            }
+        }
+
+        _input._invalidBuffers.reset();
+        (void)CHECK_GL_ERROR();
+    }
 }
--- a/libraries/gpu-gl/src/gpu/gl45/GL45BackendTransform.cpp
+++ b/libraries/gpu-gl/src/gpu/gl45/GL45BackendTransform.cpp
@ -66,3 +66,36 @@ void GL45Backend::transferTransformState(const Batch& batch) const {
    // Make sure the current Camera offset is unknown before render Draw
    _transform._currentCameraOffset = INVALID_OFFSET;
 }
+
+
+void GL45Backend::updateTransform(const Batch& batch) {
+    _transform.update(_commandIndex, _stereo);
+
+    auto& drawCallInfoBuffer = batch.getDrawCallInfoBuffer();
+    if (batch._currentNamedCall.empty()) {
+        auto& drawCallInfo = drawCallInfoBuffer[_currentDraw];
+        if (_transform._enabledDrawcallInfoBuffer) {
+            glDisableVertexAttribArray(gpu::Stream::DRAW_CALL_INFO); // Make sure attrib array is disabled
+            _transform._enabledDrawcallInfoBuffer = false;
+        }
+        glVertexAttribI2i(gpu::Stream::DRAW_CALL_INFO, drawCallInfo.index, drawCallInfo.unused);
+    } else {
+        if (!_transform._enabledDrawcallInfoBuffer) {
+            glEnableVertexAttribArray(gpu::Stream::DRAW_CALL_INFO); // Make sure attrib array is enabled
+            glVertexAttribIFormat(gpu::Stream::DRAW_CALL_INFO, 2, GL_UNSIGNED_SHORT, 0);
+            glVertexAttribBinding(gpu::Stream::DRAW_CALL_INFO, gpu::Stream::DRAW_CALL_INFO);
+#ifdef GPU_STEREO_DRAWCALL_INSTANCED
+            glVertexBindingDivisor(gpu::Stream::DRAW_CALL_INFO, (isStereo() ? 2 : 1));
+#else
+            glVertexBindingDivisor(gpu::Stream::DRAW_CALL_INFO, 1);
+#endif
+            _transform._enabledDrawcallInfoBuffer = true;
+        }
+        // NOTE: A stride of zero in BindVertexBuffer signifies that all elements are sourced from the same location,
+        //       so we must provide a stride.
+        //       This is in contrast to VertexAttrib*Pointer, where a zero signifies tightly-packed elements.
+        glBindVertexBuffer(gpu::Stream::DRAW_CALL_INFO, _transform._drawCallInfoBuffer, (GLintptr)_transform._drawCallInfoOffsets[batch._currentNamedCall], 2 * sizeof(GLushort));
+    }
+
+    (void)CHECK_GL_ERROR();
+}
--- a/libraries/gpu/src/gpu/Context.cpp
+++ b/libraries/gpu/src/gpu/Context.cpp
@ -13,6 +13,23 @@
 #include "GPULogging.h"
 using namespace gpu;

+
+void ContextStats::evalDelta(const ContextStats& begin, const ContextStats& end) {
+    _ISNumFormatChanges = end._ISNumFormatChanges - begin._ISNumFormatChanges;
+    _ISNumInputBufferChanges = end._ISNumInputBufferChanges - begin._ISNumInputBufferChanges;
+    _ISNumIndexBufferChanges = end._ISNumIndexBufferChanges - begin._ISNumIndexBufferChanges;
+
+    _RSNumTextureBounded = end._RSNumTextureBounded - begin._RSNumTextureBounded;
+    _RSAmountTextureMemoryBounded = end._RSAmountTextureMemoryBounded - begin._RSAmountTextureMemoryBounded;
+
+    _DSNumAPIDrawcalls = end._DSNumAPIDrawcalls - begin._DSNumAPIDrawcalls;
+    _DSNumDrawcalls = end._DSNumDrawcalls - begin._DSNumDrawcalls;
+    _DSNumTriangles= end._DSNumTriangles - begin._DSNumTriangles;
+
+    _PSNumSetPipelines = end._PSNumSetPipelines - begin._PSNumSetPipelines;
+}
+
+
 Context::CreateBackend Context::_createBackendCallback = nullptr;
 Context::MakeProgram Context::_makeProgramCallback = nullptr;
 std::once_flag Context::_initialized;
@ -73,6 +90,10 @@ void Context::consumeFrameUpdates(const FramePointer& frame) const {
 }

 void Context::executeFrame(const FramePointer& frame) const {
+    // Grab the stats at the around the frame and delta to have a consistent sampling
+    ContextStats beginStats;
+    getStats(beginStats);
+
    // FIXME? probably not necessary, but safe
    consumeFrameUpdates(frame);
    _backend->setStereoState(frame->stereoState);
@ -90,6 +111,10 @@ void Context::executeFrame(const FramePointer& frame) const {
        _frameRangeTimer->end(endBatch);
        _backend->render(endBatch);
    }
+
+    ContextStats endStats;
+    getStats(endStats);
+    _frameStats.evalDelta(beginStats, endStats);
 }

 bool Context::makeProgram(Shader& shader, const Shader::BindingSet& bindings) {
@ -135,10 +160,18 @@ void Context::downloadFramebuffer(const FramebufferPointer& srcFramebuffer, cons
    _backend->downloadFramebuffer(srcFramebuffer, region, destImage);
 }

+void Context::resetStats() const {
+    _backend->resetStats();
+}
+
 void Context::getStats(ContextStats& stats) const {
    _backend->getStats(stats);
 }

+void Context::getFrameStats(ContextStats& stats) const {
+    stats = _frameStats;
+}
+
 double Context::getFrameTimerGPUAverage() const {
    if (_frameRangeTimer) {
        return _frameRangeTimer->getGPUAverage();
--- a/libraries/gpu/src/gpu/Context.h
+++ b/libraries/gpu/src/gpu/Context.h
@ -45,6 +45,8 @@ public:
 
    ContextStats() {}
    ContextStats(const ContextStats& stats) = default;
+
+    void evalDelta(const ContextStats& begin, const ContextStats& end); 
 };

 class Backend {
@ -83,6 +85,7 @@ public:
        return reinterpret_cast<T*>(object.gpuObject.getGPUObject());
    }

+    void resetStats() const { _stats = ContextStats(); }
    void getStats(ContextStats& stats) const { stats = _stats; }

    virtual bool isTextureManagementSparseEnabled() const = 0;
@ -123,7 +126,7 @@ protected:
    }

    friend class Context;
-    ContextStats _stats;
+    mutable ContextStats _stats;
    StereoState _stereo;

 };
@ -201,8 +204,11 @@ public:
    void downloadFramebuffer(const FramebufferPointer& srcFramebuffer, const Vec4i& region, QImage& destImage);

     // Repporting stats of the context
+    void resetStats() const;
    void getStats(ContextStats& stats) const;

+    // Same as above but grabbed at every end of a frame
+    void getFrameStats(ContextStats& stats) const;

    double getFrameTimerGPUAverage() const;
    double getFrameTimerBatchAverage() const;
@ -229,8 +235,8 @@ protected:
    RangeTimerPointer _frameRangeTimer;
    StereoState  _stereo;

-    double getGPUAverage() const;
-    double getBatchAverage() const;
+    // Sampled at the end of every frame, the stats of all the counters
+    mutable ContextStats _frameStats;

    // This function can only be called by "static Shader::makeProgram()"
    // makeProgramShader(...) make a program shader ready to be used in a Batch.
--- a/libraries/gpu/src/gpu/Stream.cpp
+++ b/libraries/gpu/src/gpu/Stream.cpp
@ -12,6 +12,8 @@
 #include "Stream.h"

 #include <algorithm> //min max and more
+#include <sstream>
+#include <iomanip>

 using namespace gpu;

@ -39,9 +41,21 @@ const ElementArray& getDefaultElements() {
    return defaultElements;
 }

+std::string Stream::Attribute::getKey() const {
+    std::stringstream skey;
+
+    skey << std::hex;
+    skey << std::setw(8) << std::setfill('0') << (uint32)((((uint32)_slot) << 24) | (((uint32)_channel) << 16) | ((uint32)_element.getRaw()));
+    skey << _offset;
+    skey << _frequency;
+    return skey.str();
+}
+
 void Stream::Format::evaluateCache() {
+    _key.clear();
    _channels.clear();
    _elementTotalSize = 0;
+
    for(AttributeMap::iterator it = _attributes.begin(); it != _attributes.end(); it++) {
        Attribute& attrib = (*it).second;
        ChannelInfo& channel = _channels[attrib._channel];
@ -49,6 +63,8 @@ void Stream::Format::evaluateCache() {
        channel._stride = std::max(channel._stride, attrib.getSize() + attrib._offset);
        channel._netSize += attrib.getSize();
        _elementTotalSize += attrib.getSize();
+
+        _key += attrib.getKey();
    }
 }

--- a/libraries/gpu/src/gpu/Stream.h
+++ b/libraries/gpu/src/gpu/Stream.h
@ -14,6 +14,7 @@
 #include <vector>
 #include <map>
 #include <array>
+#include <string>

 #include <assert.h>

@ -73,6 +74,9 @@ public:

        // Size of the 
        uint32 getSize() const { return _element.getSize(); }
+
+        // Generate a string key describing the attribute uniquely
+        std::string getKey() const;
    };

    // Stream Format is describing how to feed a list of attributes from a bunch of stream buffer channels
@ -106,10 +110,15 @@ public:

        bool hasAttribute(Slot slot) const { return (_attributes.find(slot) != _attributes.end()); }

+        const std::string& getKey() const { return _key; }
+
+        const GPUObjectPointer gpuObject{};
+
    protected:
        AttributeMap _attributes;
        ChannelMap _channels;
        uint32 _elementTotalSize { 0 };
+        std::string _key;

        void evaluateCache();
    };
--- a/libraries/render/src/render/EngineStats.cpp
+++ b/libraries/render/src/render/EngineStats.cpp
@ -35,21 +35,21 @@ void EngineStats::run(const SceneContextPointer& sceneContext, const RenderConte
    config->textureGPUVirtualMemoryUsage = gpu::Texture::getTextureGPUVirtualMemoryUsage();
    config->textureGPUTransferCount = gpu::Texture::getTextureGPUTransferCount();

-    gpu::ContextStats gpuStats(_gpuStats);
-    renderContext->args->_context->getStats(_gpuStats);
+    renderContext->args->_context->getFrameStats(_gpuStats);

-    config->frameAPIDrawcallCount = _gpuStats._DSNumAPIDrawcalls - gpuStats._DSNumAPIDrawcalls;
-    config->frameDrawcallCount = _gpuStats._DSNumDrawcalls - gpuStats._DSNumDrawcalls;
+    config->frameAPIDrawcallCount = _gpuStats._DSNumAPIDrawcalls;
+    config->frameDrawcallCount = _gpuStats._DSNumDrawcalls;
    config->frameDrawcallRate = config->frameDrawcallCount * frequency;

-    config->frameTriangleCount = _gpuStats._DSNumTriangles - gpuStats._DSNumTriangles;
+    config->frameTriangleCount = _gpuStats._DSNumTriangles;
    config->frameTriangleRate = config->frameTriangleCount * frequency;

-    config->frameTextureCount = _gpuStats._RSNumTextureBounded - gpuStats._RSNumTextureBounded;
+    config->frameTextureCount = _gpuStats._RSNumTextureBounded;
    config->frameTextureRate = config->frameTextureCount * frequency;
-    config->frameTextureMemoryUsage = _gpuStats._RSAmountTextureMemoryBounded - gpuStats._RSAmountTextureMemoryBounded;
+    config->frameTextureMemoryUsage = _gpuStats._RSAmountTextureMemoryBounded;

-    config->frameSetPipelineCount = _gpuStats._PSNumSetPipelines - gpuStats._PSNumSetPipelines;
+    config->frameSetPipelineCount = _gpuStats._PSNumSetPipelines;
+    config->frameSetInputFormatCount = _gpuStats._ISNumFormatChanges;

    config->emitDirty();
 }
--- a/libraries/render/src/render/EngineStats.h
+++ b/libraries/render/src/render/EngineStats.h
@ -48,6 +48,7 @@ namespace render {
        Q_PROPERTY(quint32 frameTextureMemoryUsage MEMBER frameTextureMemoryUsage NOTIFY dirty)

        Q_PROPERTY(quint32 frameSetPipelineCount MEMBER frameSetPipelineCount NOTIFY dirty)
+        Q_PROPERTY(quint32 frameSetInputFormatCount MEMBER frameSetInputFormatCount NOTIFY dirty)


    public:
@ -78,6 +79,8 @@ namespace render {

        quint32 frameSetPipelineCount{ 0 };

+        quint32 frameSetInputFormatCount{ 0 };
+


        void emitDirty() { emit dirty(); }
--- a/libraries/shared/src/GPUIdent.cpp
+++ b/libraries/shared/src/GPUIdent.cpp
@ -197,7 +197,7 @@ GPUIdent* GPUIdent::ensureQuery(const QString& vendor, const QString& renderer)

    ULONG uNumOfInstances = 0;
    CComPtr<IWbemClassObject> spInstance = NULL;
-    hr = spEnumInst->Next(WBEM_INFINITE, 1, &spInstance, &uNumOfInstances);
+    hr = spEnumInst->Next(WBEM_INFINITE, 1, &spInstance.p, &uNumOfInstances);
    while (hr == S_OK && spInstance && uNumOfInstances) {
        // Get properties from the object
        CComVariant var;
--- a/scripts/developer/utilities/render/stats.qml
+++ b/scripts/developer/utilities/render/stats.qml
@ -173,6 +173,11 @@ Item {
                    prop: "frameSetPipelineCount",
                    label: "Pipelines",
                    color: "#E2334D"
+                },
+                {
+                    prop: "frameSetInputFormatCount",
+                    label: "Input Formats",
+                    color: "#1AC567"
                }
            ]
        }