Merging Austin pr that adds the multi draw indirect and also fix the seprate vertex format feature for windows and linux

2025-08-10 01:00:44 +02:00 · 2015-09-23 12:19:17 -07:00 · 2015-09-23 12:19:17 -07:00 · 8735aa8003
commit 8735aa8003
parent 7f8450f77b deaa4a747b
9 changed files with 351 additions and 95 deletions
--- a/libraries/gpu/src/gpu/Batch.cpp
+++ b/libraries/gpu/src/gpu/Batch.cpp
@ -102,6 +102,19 @@ void Batch::drawIndexedInstanced(uint32 nbInstances, Primitive primitiveType, ui
    _params.push_back(nbInstances);
 }
 void Batch::multiDrawIndirect(uint32 nbCommands, Primitive primitiveType) {
    ADD_COMMAND(multiDrawIndirect);
    _params.push_back(nbCommands);
    _params.push_back(primitiveType);
 }
 void Batch::multiDrawIndexedIndirect(uint32 nbCommands, Primitive primitiveType) {
    ADD_COMMAND(multiDrawIndexedIndirect);
    _params.push_back(nbCommands);
    _params.push_back(primitiveType);
 }
 void Batch::setInputFormat(const Stream::FormatPointer& format) {
    ADD_COMMAND(setInputFormat);
@ -144,6 +157,15 @@ void Batch::setIndexBuffer(const BufferView& buffer) {
    setIndexBuffer(buffer._element.getType(), buffer._buffer, buffer._offset);
 }
 void Batch::setIndirectBuffer(const BufferPointer& buffer, Offset offset, Offset stride) {
    ADD_COMMAND(setIndirectBuffer);
    _params.push_back(_buffers.cache(buffer));
    _params.push_back(offset);
    _params.push_back(stride);
 }
 void Batch::setModelTransform(const Transform& model) {
    ADD_COMMAND(setModelTransform);
@ -288,6 +310,11 @@ void Batch::resetStages() {
    ADD_COMMAND(resetStages);
 }
 void Batch::runLambda(std::function<void()> f) {
    ADD_COMMAND(runLambda);
    _params.push_back(_lambdas.cache(f));
 }
 void Batch::enableStereo(bool enable) {
    _enableStereo = enable;
 }
--- a/libraries/gpu/src/gpu/Batch.h
+++ b/libraries/gpu/src/gpu/Batch.h
@ -96,11 +96,14 @@ public:
    void drawIndexed(Primitive primitiveType, uint32 nbIndices, uint32 startIndex = 0);
    void drawInstanced(uint32 nbInstances, Primitive primitiveType, uint32 nbVertices, uint32 startVertex = 0, uint32 startInstance = 0);
    void drawIndexedInstanced(uint32 nbInstances, Primitive primitiveType, uint32 nbIndices, uint32 startIndex = 0, uint32 startInstance = 0);
    void multiDrawIndirect(uint32 nbCommands, Primitive primitiveType);
    void multiDrawIndexedIndirect(uint32 nbCommands, Primitive primitiveType);
    void setupNamedCalls(const std::string& instanceName, size_t count, NamedBatchData::Function function);
    void setupNamedCalls(const std::string& instanceName, NamedBatchData::Function function);
    BufferPointer getNamedBuffer(const std::string& instanceName, uint8_t index = 0);
    void setNamedBuffer(const std::string& instanceName, BufferPointer& buffer, uint8_t index = 0);
@ -117,6 +120,8 @@ public:
    void setIndexBuffer(Type type, const BufferPointer& buffer, Offset offset);
    void setIndexBuffer(const BufferView& buffer); // not a command, just a shortcut from a BufferView
    void setIndirectBuffer(const BufferPointer& buffer, Offset offset = 0, Offset stride = 0);
    // Transform Stage
    // Vertex position is transformed by ModelTransform from object space to world space
    // Then by the inverse of the ViewTransform from world space to eye space
@ -169,6 +174,8 @@ public:
    // Reset the stage caches and states
    void resetStages();
    void runLambda(std::function<void()> f);
    // TODO: As long as we have gl calls explicitely issued from interface
    // code, we need to be able to record and batch these calls. THe long 
    // term strategy is to get rid of any GL calls in favor of the HIFI GPU API
@ -194,10 +201,13 @@ public:
        COMMAND_drawIndexed,
        COMMAND_drawInstanced,
        COMMAND_drawIndexedInstanced,
        COMMAND_multiDrawIndirect,
        COMMAND_multiDrawIndexedIndirect,
        COMMAND_setInputFormat,
        COMMAND_setInputBuffer,
        COMMAND_setIndexBuffer,
        COMMAND_setIndirectBuffer,
        COMMAND_setModelTransform,
        COMMAND_setViewTransform,
@ -221,6 +231,8 @@ public:
        COMMAND_resetStages,
        COMMAND_runLambda,
        // TODO: As long as we have gl calls explicitely issued from interface
        // code, we need to be able to record and batch these calls. THe long 
        // term strategy is to get rid of any GL calls in favor of the HIFI GPU API
@ -302,6 +314,7 @@ public:
    typedef Cache<PipelinePointer>::Vector PipelineCaches;
    typedef Cache<FramebufferPointer>::Vector FramebufferCaches;
    typedef Cache<QueryPointer>::Vector QueryCaches;
    typedef Cache<std::function<void()>>::Vector LambdaCache;
    // Cache Data in a byte array if too big to fit in Param
    // FOr example Mat4s are going there
@ -327,6 +340,7 @@ public:
    PipelineCaches _pipelines;
    FramebufferCaches _framebuffers;
    QueryCaches _queries;
    LambdaCache _lambdas;
    NamedBatchDataMap _namedData;
@ -336,6 +350,20 @@ public:
 protected:
 };
-};
+template <typename V>
 void popVectorParam(Batch::Params& params, uint32& paramOffset, V& v) {
    for (size_t i = 0; i < v.length(); ++i) {
        v[i] = params[paramOffset++]._float;
    }
 }
 template <typename V>
 void pushVectorParam(Batch::Params& params, const V& v) {
    for (size_t i = 0; i < v.length(); ++i) {
        params.push_back(v[i]);
    }
 }
 }
 #endif
--- a/libraries/gpu/src/gpu/GLBackend.cpp
+++ b/libraries/gpu/src/gpu/GLBackend.cpp
@ -23,10 +23,13 @@ GLBackend::CommandCall GLBackend::_commandCalls[Batch::NUM_COMMANDS] =
    (&::gpu::GLBackend::do_drawIndexed),
    (&::gpu::GLBackend::do_drawInstanced),
    (&::gpu::GLBackend::do_drawIndexedInstanced),
    (&::gpu::GLBackend::do_multiDrawIndirect),
    (&::gpu::GLBackend::do_multiDrawIndexedIndirect),
    (&::gpu::GLBackend::do_setInputFormat),
    (&::gpu::GLBackend::do_setInputBuffer),
    (&::gpu::GLBackend::do_setIndexBuffer),
    (&::gpu::GLBackend::do_setIndirectBuffer),
    (&::gpu::GLBackend::do_setModelTransform),
    (&::gpu::GLBackend::do_setViewTransform),
@ -50,6 +53,8 @@ GLBackend::CommandCall GLBackend::_commandCalls[Batch::NUM_COMMANDS] =
    (&::gpu::GLBackend::do_resetStages),
    (&::gpu::GLBackend::do_runLambda),
    (&::gpu::GLBackend::do_glActiveBindTexture),
    (&::gpu::GLBackend::do_glUniform1i),
@ -332,19 +337,65 @@ void GLBackend::do_drawIndexedInstanced(Batch& batch, uint32 paramOffset) {
    GLenum mode = _primitiveToGLmode[(Primitive)batch._params[paramOffset + 3]._uint];
    uint32 numIndices = batch._params[paramOffset + 2]._uint;
    uint32 startIndex = batch._params[paramOffset + 1]._uint;
    // FIXME glDrawElementsInstancedBaseVertexBaseInstance is only available in GL 4.3 
    // and higher, so currently we ignore this field
    uint32 startInstance = batch._params[paramOffset + 0]._uint;
    GLenum glType = _elementTypeToGLType[_input._indexBufferType];
 #if (GPU_INPUT_PROFILE == GPU_CORE_43)
    glDrawElementsInstancedBaseVertexBaseInstance(mode, numIndices, glType, reinterpret_cast<GLvoid*>(startIndex + _input._indexBufferOffset), numInstances, 0, startInstance);
 #else
    glDrawElementsInstanced(mode, numIndices, glType, reinterpret_cast<GLvoid*>(startIndex + _input._indexBufferOffset), numInstances);
    Q_UNUSED(startInstance); 
 #endif
    (void)CHECK_GL_ERROR();
 }
 void GLBackend::do_multiDrawIndirect(Batch& batch, uint32 paramOffset) {
 #if (GPU_INPUT_PROFILE == GPU_CORE_43)
    updateInput();
    updateTransform();
    updatePipeline();
    uint commandCount = batch._params[paramOffset + 0]._uint;
    GLenum mode = _primitiveToGLmode[(Primitive)batch._params[paramOffset + 1]._uint];
    glMultiDrawArraysIndirect(mode, reinterpret_cast<GLvoid*>(_input._indirectBufferOffset), commandCount, _input._indirectBufferStride);
 #else
    // FIXME implement the slow path
 #endif
    (void)CHECK_GL_ERROR();
    Q_UNUSED(startInstance);
 }
 void GLBackend::do_multiDrawIndexedIndirect(Batch& batch, uint32 paramOffset) {
 #if (GPU_INPUT_PROFILE == GPU_CORE_43)
    updateInput();
    updateTransform();
    updatePipeline();
    uint commandCount = batch._params[paramOffset + 0]._uint;
    GLenum mode = _primitiveToGLmode[(Primitive)batch._params[paramOffset + 1]._uint];
    GLenum indexType = _elementTypeToGLType[_input._indexBufferType];
    glMultiDrawElementsIndirect(mode, indexType, reinterpret_cast<GLvoid*>(_input._indirectBufferOffset), commandCount, _input._indirectBufferStride);
 #else
    // FIXME implement the slow path
 #endif
    (void)CHECK_GL_ERROR();
 }
 void GLBackend::do_resetStages(Batch& batch, uint32 paramOffset) {
    resetStages();
 }
 void GLBackend::do_runLambda(Batch& batch, uint32 paramOffset) {
    std::function<void()> f = batch._lambdas.get(batch._params[paramOffset]._uint);
    f();
 }
 void GLBackend::resetStages() {
    resetInputStage();
    resetPipelineStage();
--- a/libraries/gpu/src/gpu/GLBackend.h
+++ b/libraries/gpu/src/gpu/GLBackend.h
@ -252,11 +252,14 @@ protected:
    void do_drawIndexed(Batch& batch, uint32 paramOffset);
    void do_drawInstanced(Batch& batch, uint32 paramOffset);
    void do_drawIndexedInstanced(Batch& batch, uint32 paramOffset);
    void do_multiDrawIndirect(Batch& batch, uint32 paramOffset);
    void do_multiDrawIndexedIndirect(Batch& batch, uint32 paramOffset);
    // Input Stage
    void do_setInputFormat(Batch& batch, uint32 paramOffset);
    void do_setInputBuffer(Batch& batch, uint32 paramOffset);
    void do_setIndexBuffer(Batch& batch, uint32 paramOffset);
    void do_setIndirectBuffer(Batch& batch, uint32 paramOffset);
    void initInput();
    void killInput();
@ -284,6 +287,10 @@ protected:
        Offset _indexBufferOffset;
        Type _indexBufferType;
        BufferPointer _indirectBuffer;
        Offset _indirectBufferOffset{ 0 };
        Offset _indirectBufferStride{ 0 };
        GLuint _defaultVAO;
        InputStageState() :
@ -448,6 +455,9 @@ protected:
    // Reset stages
    void do_resetStages(Batch& batch, uint32 paramOffset);
    void do_runLambda(Batch& batch, uint32 paramOffset);
    void resetStages();
    // TODO: As long as we have gl calls explicitely issued from interface
@ -471,7 +481,6 @@ protected:
    static CommandCall _commandCalls[Batch::NUM_COMMANDS];
 };
 };
 #endif
--- a/libraries/gpu/src/gpu/GLBackendInput.cpp
+++ b/libraries/gpu/src/gpu/GLBackendInput.cpp
@ -57,11 +57,7 @@ void GLBackend::do_setInputBuffer(Batch& batch, uint32 paramOffset) {
    }
 }
-#if (GPU_INPUT_PROFILE == GPU_CORE_41)
+
 #define NO_SUPPORT_VERTEX_ATTRIB_FORMAT
 #else
 #define SUPPORT_VERTEX_ATTRIB_FORMAT
 #endif
 void GLBackend::initInput() {
@ -90,6 +86,14 @@ void GLBackend::syncInputStateCache() {
    glBindVertexArray(_input._defaultVAO);
 }
 // Core 41 doesn't expose the features to really separate the vertex format from the vertex buffers binding
 // Core 43 does :)
 #if (GPU_INPUT_PROFILE == GPU_CORE_41)
 #define NO_SUPPORT_VERTEX_ATTRIB_FORMAT
 #else
 #define SUPPORT_VERTEX_ATTRIB_FORMAT
 #endif
 void GLBackend::updateInput() {
 #if defined(SUPPORT_VERTEX_ATTRIB_FORMAT)
    if (_input._invalidFormat) {
@ -100,13 +104,21 @@ void GLBackend::updateInput() {
        if (_input._format) {
            for (auto& it : _input._format->getAttributes()) {
                const Stream::Attribute& attrib = (it).second;
-                newActivation.set(attrib._slot);
+
-                glVertexAttribFormat(
+                GLuint slot = attrib._slot;
-                    attrib._slot,
+                GLuint count = attrib._element.getDimensionCount();
-                    attrib._element.getDimensionCount(),
+                uint8_t locationCount = attrib._element.getLocationCount();
-                    _elementTypeToGLType[attrib._element.getType()],
+                GLenum type = _elementTypeToGLType[attrib._element.getType()];
-                    attrib._element.isNormalized(),
+                GLuint offset = attrib._offset;;
-                    attrib._offset);
+                GLboolean isNormalized = attrib._element.isNormalized();
                for (int j = 0; j < locationCount; ++j) {
                    newActivation.set(slot + j);
                    glVertexAttribFormat(slot + j, count, type, isNormalized, offset);
                    glVertexAttribDivisor(slot + j, attrib._frequency);
                    glVertexAttribBinding(slot + j, attrib._channel);
                }
            }
            (void) CHECK_GL_ERROR();
        }
@ -273,21 +285,36 @@ void GLBackend::resetInputStage() {
 }
 void GLBackend::do_setIndexBuffer(Batch& batch, uint32 paramOffset) {
-    _input._indexBufferType = (Type) batch._params[paramOffset + 2]._uint;
+    _input._indexBufferType = (Type)batch._params[paramOffset + 2]._uint;
    BufferPointer indexBuffer = batch._buffers.get(batch._params[paramOffset + 1]._uint);
    _input._indexBufferOffset = batch._params[paramOffset + 0]._uint;
    BufferPointer indexBuffer = batch._buffers.get(batch._params[paramOffset + 1]._uint);
    if (indexBuffer != _input._indexBuffer) {
        _input._indexBuffer = indexBuffer;
        if (indexBuffer) {
            glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, getBufferID(*indexBuffer));
        } else {
            // FIXME do we really need this?  Is there ever a draw call where we care that the element buffer is null?
            glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
        }
    }
    (void) CHECK_GL_ERROR();
 }
-template <typename V>
+void GLBackend::do_setIndirectBuffer(Batch& batch, uint32 paramOffset) {
-void popParam(Batch::Params& params, uint32& paramOffset, V& v) {
+    _input._indirectBufferOffset = batch._params[paramOffset + 1]._uint;
-    for (size_t i = 0; i < v.length(); ++i) {
+    _input._indirectBufferStride = batch._params[paramOffset + 2]._uint;
-        v[i] = params[paramOffset++]._float;
+
    BufferPointer buffer = batch._buffers.get(batch._params[paramOffset]._uint);
    if (buffer != _input._indirectBuffer) {
        _input._indirectBuffer = buffer;
        if (buffer) {
            glBindBuffer(GL_DRAW_INDIRECT_BUFFER, getBufferID(*buffer));
        } else {
            // FIXME do we really need this?  Is there ever a draw call where we care that the element buffer is null?
            glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
        }
    }
    (void)CHECK_GL_ERROR();
 }
--- a/libraries/gpu/src/gpu/GPUConfig.h
+++ b/libraries/gpu/src/gpu/GPUConfig.h
@ -17,6 +17,8 @@
 #define GPU_CORE 1
 #define GPU_LEGACY 0
 #define GPU_CORE_41 410
 #define GPU_CORE_43 430
 #if defined(__APPLE__)
@ -33,7 +35,7 @@
 #include <GL/wglew.h>
 #define GPU_FEATURE_PROFILE GPU_CORE
-#define GPU_INPUT_PROFILE GPU_CORE_41
+#define GPU_INPUT_PROFILE GPU_CORE_43
 #elif defined(ANDROID)
@ -42,7 +44,7 @@
 #include <GL/glew.h>
 #define GPU_FEATURE_PROFILE GPU_CORE
-#define GPU_INPUT_PROFILE GPU_CORE_41
+#define GPU_INPUT_PROFILE GPU_CORE_43
 #endif
--- a/libraries/render-utils/src/GeometryCache.cpp
+++ b/libraries/render-utils/src/GeometryCache.cpp
@ -88,37 +88,34 @@ void GeometryCache::ShapeData::setupIndices(gpu::BufferPointer& indexBuffer, con
 void GeometryCache::ShapeData::setupBatch(gpu::Batch& batch) const {
    batch.setInputBuffer(gpu::Stream::POSITION, _positionView);
    batch.setInputBuffer(gpu::Stream::NORMAL, _normalView);
    batch.setIndexBuffer(gpu::UINT16, _indices, 0);
 }
 void GeometryCache::ShapeData::draw(gpu::Batch& batch) const {
    if (_indexCount) {
        setupBatch(batch);
-        batch.setIndexBuffer(gpu::UINT16, _indices, _indexOffset);
+        batch.drawIndexed(gpu::TRIANGLES, _indexCount, _indexOffset);
        batch.drawIndexed(gpu::TRIANGLES, _indexCount);
    }
 }
 void GeometryCache::ShapeData::drawWire(gpu::Batch& batch) const {
    if (_wireIndexCount) {
        setupBatch(batch);
-        batch.setIndexBuffer(gpu::UINT16, _indices, _wireIndexOffset);
+        batch.drawIndexed(gpu::LINES, _wireIndexCount, _wireIndexOffset);
        batch.drawIndexed(gpu::LINES, _wireIndexCount);
    }
 }
 void GeometryCache::ShapeData::drawInstances(gpu::Batch& batch, size_t count) const {
    if (_indexCount) {
        setupBatch(batch);
-        batch.setIndexBuffer(gpu::UINT16, _indices, _indexOffset);
+        batch.drawIndexedInstanced(count, gpu::TRIANGLES, _indexCount, _indexOffset);
        batch.drawIndexedInstanced(count, gpu::TRIANGLES, _indexCount);
    }
 }
 void GeometryCache::ShapeData::drawWireInstances(gpu::Batch& batch, size_t count) const {
    if (_wireIndexCount) {
        setupBatch(batch);
-        batch.setIndexBuffer(gpu::UINT16, _indices, _wireIndexOffset);
+        batch.drawIndexedInstanced(count, gpu::LINES, _wireIndexCount, _wireIndexOffset);
        batch.drawIndexedInstanced(count, gpu::LINES, _wireIndexCount);
    }
 }
@ -323,7 +320,8 @@ void GeometryCache::buildShapes() {
            20, 21, 21, 22, 22, 23, 23, 20, // back
            0, 23, 1, 22, 2, 21, 3, 20 // sides
        };
-        for (unsigned int i = 0; i < wireIndices.size(); ++i) {
+
        for (size_t i = 0; i < wireIndices.size(); ++i) {
            indices[i] += startingIndex;
        }
@ -374,7 +372,7 @@ void GeometryCache::buildShapes() {
            0, 3, 1, 3, 2, 3,
        };
-        for (unsigned int i = 0; i < wireIndices.size(); ++i) {
+        for (size_t i = 0; i < wireIndices.size(); ++i) {
            wireIndices[i] += startingIndex;
        }
--- a/libraries/render-utils/src/GeometryCache.h
+++ b/libraries/render-utils/src/GeometryCache.h
@ -232,14 +232,6 @@ public:
    /// Set a batch to the simple pipeline, returning the previous pipeline
    void useSimpleDrawPipeline(gpu::Batch& batch, bool noBlend = false);
 private:
    GeometryCache();
    virtual ~GeometryCache();
    void buildShapes();
    typedef QPair<int, int> IntPair;
    typedef QPair<unsigned int, unsigned int> VerticesIndices;
    struct ShapeData {
        size_t _indexOffset{ 0 };
        size_t _indexCount{ 0 };
@ -263,7 +255,13 @@ private:
    VShape _shapes;
 private:
    GeometryCache();
    virtual ~GeometryCache();
    void buildShapes();
    typedef QPair<int, int> IntPair;
    typedef QPair<unsigned int, unsigned int> VerticesIndices;
    gpu::PipelinePointer _standardDrawPipeline;
    gpu::PipelinePointer _standardDrawPipelineNoBlend;
--- a/tests/gpu-test/src/main.cpp
+++ b/tests/gpu-test/src/main.cpp
@ -35,6 +35,7 @@
 // Must come after GL headers
 #include <QtGui/QOpenGLContext>
 #include <QtGui/QOpenGLDebugLogger>
 #include <GLMHelpers.h>
 #include <PathUtils.h>
@ -101,7 +102,24 @@ float getSeconds(quint64 start = 0) {
    return seconds;
 }
 struct DrawElementsIndirectCommand {
    uint  _count{ 0 };
    uint  _instanceCount{ 0 };
    uint  _firstIndex{ 0 };
    uint  _baseVertex{ 0 };
    uint  _baseInstance{ 0 };
 };
 static const size_t TYPE_COUNT = 4;
 static GeometryCache::Shape SHAPE[TYPE_COUNT] = {
    GeometryCache::Icosahedron,
    GeometryCache::Cube,
    GeometryCache::Sphere,
    GeometryCache::Tetrahedron,
    //GeometryCache::Line,
 };
 gpu::Stream::FormatPointer& getInstancedSolidStreamFormat();
 // Creates an OpenGL window that renders a simple unlit scene using the gpu library and GeometryCache
 // Should eventually get refactored into something that supports multiple gpu backends.
@ -134,7 +152,7 @@ public:
        // Qt Quick may need a depth and stencil buffer. Always make sure these are available.
        format.setDepthBufferSize(16);
        format.setStencilBufferSize(8);
-        format.setVersion(4, 1);
+        format.setVersion(4, 3);
        format.setProfile(QSurfaceFormat::OpenGLContextProfile::CoreProfile);
        format.setOption(QSurfaceFormat::DebugContext);
        format.setSwapInterval(0);
@ -147,6 +165,13 @@ public:
        show();
        makeCurrent();
        QOpenGLDebugLogger *logger = new QOpenGLDebugLogger(this);
        logger->initialize(); // initializes in the current context, i.e. ctx
        connect(logger, &QOpenGLDebugLogger::messageLogged, [](const QOpenGLDebugMessage& message){
            qDebug() << message;
        });
        logger->startLogging(QOpenGLDebugLogger::SynchronousLogging);
        gpu::Context::init<gpu::GLBackend>();
        _context = std::make_shared<gpu::Context>();
@ -175,7 +200,11 @@ public:
    }
    void draw() {
-        if (!isVisible()) {
+        static auto startTime = usecTimestampNow();
        // Attempting to draw before we're visible and have a valid size will
        // produce GL errors.
        if (!isVisible() || _size.width() <= 0 || _size.height() <= 0) {
            return;
        }
        makeCurrent();
@ -190,7 +219,8 @@ public:
        glm::vec3 unitscale { 1.0f };
        glm::vec3 up { 0.0f, 1.0f, 0.0f };
-        glm::vec3 camera_position { 1.5f * sinf(t), 0.0f, 1.5f * cosf(t) };
+        float distance = 3.0f;
        glm::vec3 camera_position{ distance * sinf(t), 0.0f, distance * cos(t) };
        static const vec3 camera_focus(0);
        static const vec3 camera_up(0, 1, 0);
@ -203,11 +233,15 @@ public:
        // Render grid on xz plane (not the optimal way to do things, but w/e)
        // Note: GeometryCache::renderGrid will *not* work, as it is apparenly unaffected by batch rotations and renders xy only
        {
            static const std::string GRID_INSTANCE = "Grid";
            static auto compactColor1 = toCompactColor(vec4{ 0.35f, 0.25f, 0.15f, 1.0f });
            static auto compactColor2 = toCompactColor(vec4{ 0.15f, 0.25f, 0.35f, 1.0f });
-        auto transformBuffer = batch.getNamedBuffer(GRID_INSTANCE, 0);
+            static gpu::BufferPointer transformBuffer; 
-        auto colorBuffer = batch.getNamedBuffer(GRID_INSTANCE, 1);
+            static gpu::BufferPointer colorBuffer;
            if (!transformBuffer) {
                transformBuffer = std::make_shared<gpu::Buffer>();
                colorBuffer = std::make_shared<gpu::Buffer>();
                for (int i = 0; i < 100; ++i) {
                    {
                        glm::mat4 transform = glm::translate(mat4(), vec3(0, -1, -50 + i));
@ -224,34 +258,115 @@ public:
                        colorBuffer->append(compactColor2);
                    }
                }
            }
            batch.setupNamedCalls(GRID_INSTANCE, 200, [=](gpu::Batch& batch, gpu::Batch::NamedBatchData& data) {
                batch.setViewTransform(camera);
                batch.setModelTransform(Transform());
                batch.setPipeline(_pipeline);
            auto& xfm = data._buffers[0];
            auto& color = data._buffers[1];
                batch._glUniform1i(_instanceLocation, 1);
-            geometryCache->renderWireShapeInstances(batch, GeometryCache::Line, data._count, xfm, color);
+                geometryCache->renderWireShapeInstances(batch, GeometryCache::Line, data._count, transformBuffer, colorBuffer);
                batch._glUniform1i(_instanceLocation, 0);
            });
        }
        {
            static const size_t ITEM_COUNT = 1000;
            static const float SHAPE_INTERVAL = (PI * 2.0f) / ITEM_COUNT;
            static const float ITEM_INTERVAL = SHAPE_INTERVAL / TYPE_COUNT;
            static const gpu::Element POSITION_ELEMENT{ gpu::VEC3, gpu::FLOAT, gpu::XYZ };
            static const gpu::Element NORMAL_ELEMENT{ gpu::VEC3, gpu::FLOAT, gpu::XYZ };
            static const gpu::Element COLOR_ELEMENT{ gpu::VEC4, gpu::NUINT8, gpu::RGBA };
            static const gpu::Element TRANSFORM_ELEMENT{ gpu::MAT4, gpu::FLOAT, gpu::XYZW };
            static std::vector<Transform> transforms;
            static std::vector<vec4> colors;
            static gpu::BufferPointer indirectBuffer;
            static gpu::BufferPointer transformBuffer;
            static gpu::BufferPointer colorBuffer;
            static gpu::BufferView colorView; 
            static gpu::BufferView instanceXfmView; 
            if (!transformBuffer) {
                transformBuffer = std::make_shared<gpu::Buffer>();
                colorBuffer = std::make_shared<gpu::Buffer>();
                indirectBuffer = std::make_shared<gpu::Buffer>();
                static const float ITEM_RADIUS = 20;
                static const vec3 ITEM_TRANSLATION{ 0, 0, -ITEM_RADIUS };
                for (size_t i = 0; i < TYPE_COUNT; ++i) {
                    GeometryCache::Shape shape = SHAPE[i];
                    GeometryCache::ShapeData shapeData = geometryCache->_shapes[shape];
                    {
                        DrawElementsIndirectCommand indirectCommand;
                        indirectCommand._count = shapeData._indexCount;
                        indirectCommand._instanceCount = ITEM_COUNT;
                        indirectCommand._baseInstance = i * ITEM_COUNT;
                        indirectCommand._firstIndex = shapeData._indexOffset / 2;
                        indirectCommand._baseVertex = 0;
                        indirectBuffer->append(indirectCommand);
                    }
                    //indirectCommand._count
                    float startingInterval = ITEM_INTERVAL * i;
                    for (size_t j = 0; j < ITEM_COUNT; ++j) {
                        float theta = j * SHAPE_INTERVAL + startingInterval;
                        auto transform = glm::rotate(mat4(), theta, Vectors::UP);
                        transform = glm::rotate(transform, (randFloat() - 0.5f) * PI / 4.0f, Vectors::UNIT_X);
                        transform = glm::translate(transform, ITEM_TRANSLATION);
                        transform = glm::scale(transform, vec3(randFloat() / 2.0f + 0.5f));
                        transformBuffer->append(transform);
                        transforms.push_back(transform);
                        auto color = vec4{ randomColorValue(64), randomColorValue(64), randomColorValue(64), 255 };
                        color /= 255.0f;
                        colors.push_back(color);
                        colorBuffer->append(toCompactColor(color));
                    }
                }
                colorView = gpu::BufferView(colorBuffer, COLOR_ELEMENT);
                instanceXfmView = gpu::BufferView(transformBuffer, TRANSFORM_ELEMENT);
            }
 #if 1
            GeometryCache::ShapeData shapeData = geometryCache->_shapes[GeometryCache::Icosahedron];
            {
                batch.setViewTransform(camera);
                batch.setModelTransform(Transform());
                batch.setPipeline(_pipeline);
                batch._glUniform1i(_instanceLocation, 1);
                batch.setInputFormat(getInstancedSolidStreamFormat());
                batch.setInputBuffer(gpu::Stream::COLOR, colorView);
                batch.setInputBuffer(gpu::Stream::INSTANCE_XFM, instanceXfmView);
                batch.setIndirectBuffer(indirectBuffer);
                shapeData.setupBatch(batch);
                batch.multiDrawIndexedIndirect(TYPE_COUNT, gpu::TRIANGLES);
                batch._glUniform1i(_instanceLocation, 0);
            }
 #else
            batch.setViewTransform(camera);
            batch.setPipeline(_pipeline);
            for (size_t i = 0; i < TYPE_COUNT; ++i) {
                GeometryCache::Shape shape = SHAPE[i];
                for (size_t j = 0; j < ITEM_COUNT; ++j) {
                    int index = i * ITEM_COUNT + j;
                    batch.setModelTransform(transforms[index]);
                    const vec4& color = colors[index];
                    batch._glColor4f(color.r, color.g, color.b, 1.0);
                    geometryCache->renderShape(batch, shape);
                }
            }
 #endif
        }
        // Render unlit cube + sphere
        static GeometryCache::Shape SHAPE[] = {
            GeometryCache::Cube,
            GeometryCache::Sphere,
            GeometryCache::Tetrahedron,
            GeometryCache::Icosahedron,
        };
        static auto startUsecs = usecTimestampNow(); 
        float seconds = getSeconds(startUsecs);
-        seconds /= 4.0f;
+
-        int shapeIndex = ((int)seconds) % 4;
+        seconds /= 4.0;
-        bool wire = seconds - (float)floor(seconds) > 0.5f;
+        int shapeIndex = ((int)seconds) % TYPE_COUNT;
        bool wire = seconds - floor(seconds) > 0.5f;
        batch.setModelTransform(Transform());
        batch._glColor4f(0.8f, 0.25f, 0.25f, 1.0f);
@ -261,7 +376,7 @@ public:
            geometryCache->renderShape(batch, SHAPE[shapeIndex]);
        }
-        batch.setModelTransform(Transform().setScale(1.05f));
+        batch.setModelTransform(Transform().setScale(2.05f));
        batch._glColor4f(1, 1, 1, 1);
        geometryCache->renderWireCube(batch);
@ -305,3 +420,4 @@ int main(int argc, char** argv) {
 }
 #include "main.moc"