Merge pull request #13182 from jherico/feature/batch_pooling

Use pooled gpu batches
2025-08-09 11:07:07 +02:00 · 2018-05-21 15:47:55 -07:00 · 2018-05-21 15:47:55 -07:00 · 87989268bf
commit 87989268bf
parent a1c948c0f0 9c26b2c1d3
9 changed files with 197 additions and 192 deletions
--- a/libraries/gpu/src/gpu/Batch.cpp
+++ b/libraries/gpu/src/gpu/Batch.cpp
@ -19,8 +19,7 @@
 #if defined(NSIGHT_FOUND)
 #include "nvToolsExt.h"
-
+ProfileRangeBatch::ProfileRangeBatch(gpu::Batch& batch, const char* name) : _batch(batch) {
 ProfileRangeBatch::ProfileRangeBatch(gpu::Batch& batch, const char *name) : _batch(batch) {
    _batch.pushProfileRange(name);
 }
@ -38,19 +37,15 @@ static const int MAX_NUM_UNIFORM_BUFFERS = 14;
 static const int MAX_NUM_RESOURCE_BUFFERS = 16;
 static const int MAX_NUM_RESOURCE_TEXTURES = 16;
-size_t Batch::_commandsMax { BATCH_PREALLOCATE_MIN };
+size_t Batch::_commandsMax{ BATCH_PREALLOCATE_MIN };
-size_t Batch::_commandOffsetsMax { BATCH_PREALLOCATE_MIN };
+size_t Batch::_commandOffsetsMax{ BATCH_PREALLOCATE_MIN };
-size_t Batch::_paramsMax { BATCH_PREALLOCATE_MIN };
+size_t Batch::_paramsMax{ BATCH_PREALLOCATE_MIN };
-size_t Batch::_dataMax { BATCH_PREALLOCATE_MIN };
+size_t Batch::_dataMax{ BATCH_PREALLOCATE_MIN };
-size_t Batch::_objectsMax { BATCH_PREALLOCATE_MIN };
+size_t Batch::_objectsMax{ BATCH_PREALLOCATE_MIN };
-size_t Batch::_drawCallInfosMax { BATCH_PREALLOCATE_MIN };
+size_t Batch::_drawCallInfosMax{ BATCH_PREALLOCATE_MIN };
 Batch::Batch(const char* name) {
-#ifdef DEBUG
+    _name = name;
    if (name) {
        _name = name;
    }
 #endif
    _commands.reserve(_commandsMax);
    _commandOffsets.reserve(_commandOffsetsMax);
    _params.reserve(_paramsMax);
@ -59,38 +54,6 @@ Batch::Batch(const char* name) {
    _drawCallInfos.reserve(_drawCallInfosMax);
 }
 Batch::Batch(const Batch& batch_) {
    Batch& batch = *const_cast<Batch*>(&batch_);
 #ifdef DEBUG
    _name = batch_._name;
 #endif
    _commands.swap(batch._commands);
    _commandOffsets.swap(batch._commandOffsets);
    _params.swap(batch._params);
    _data.swap(batch._data);
    _invalidModel = batch._invalidModel;
    _currentModel = batch._currentModel;
    _objects.swap(batch._objects);
    _currentNamedCall = batch._currentNamedCall;
    _buffers._items.swap(batch._buffers._items);
    _textures._items.swap(batch._textures._items);
    _textureTables._items.swap(batch._textureTables._items);
    _streamFormats._items.swap(batch._streamFormats._items);
    _transforms._items.swap(batch._transforms._items);
    _pipelines._items.swap(batch._pipelines._items);
    _framebuffers._items.swap(batch._framebuffers._items);
    _swapChains._items.swap(batch._swapChains._items);
    _drawCallInfos.swap(batch._drawCallInfos);
    _queries._items.swap(batch._queries._items);
    _lambdas._items.swap(batch._lambdas._items);
    _profileRanges._items.swap(batch._profileRanges._items);
    _names._items.swap(batch._names._items);
    _namedData.swap(batch._namedData);
    _enableStereo = batch._enableStereo;
    _enableSkybox = batch._enableSkybox;
 }
 Batch::~Batch() {
    _commandsMax = std::max(_commands.size(), _commandsMax);
    _commandOffsetsMax = std::max(_commandOffsets.size(), _commandOffsetsMax);
@ -100,6 +63,10 @@ Batch::~Batch() {
    _drawCallInfosMax = std::max(_drawCallInfos.size(), _drawCallInfosMax);
 }
 void Batch::setName(const char* name) {
    _name = name;
 }
 void Batch::clear() {
    _commandsMax = std::max(_commands.size(), _commandsMax);
    _commandOffsetsMax = std::max(_commandOffsets.size(), _commandOffsetsMax);
@ -110,18 +77,30 @@ void Batch::clear() {
    _commands.clear();
    _commandOffsets.clear();
    _params.clear();
    _data.clear();
    _drawCallInfos.clear();
    _buffers.clear();
    _framebuffers.clear();
    _lambdas.clear();
    _names.clear();
    _namedData.clear();
    _objects.clear();
    _params.clear();
    _pipelines.clear();
    _profileRanges.clear();
    _queries.clear();
    _swapChains.clear();
    _streamFormats.clear();
    _textures.clear();
    _textureTables.clear();
    _streamFormats.clear();
    _transforms.clear();
-    _pipelines.clear();
+
-    _framebuffers.clear();
+    _name = nullptr;
-    _swapChains.clear();
+    _invalidModel = true;
-    _objects.clear();
+    _currentModel = Transform();
-    _drawCallInfos.clear();
+    _projectionJitter = glm::vec2(0.0f);
    _enableStereo = true;
    _enableSkybox = false;
 }
 size_t Batch::cacheData(size_t size, const void* data) {
@ -177,7 +156,6 @@ void Batch::drawIndexedInstanced(uint32 numInstances, Primitive primitiveType, u
    captureDrawCallInfo();
 }
 void Batch::multiDrawIndirect(uint32 numCommands, Primitive primitiveType) {
    ADD_COMMAND(multiDrawIndirect);
    _params.emplace_back(numCommands);
@ -244,7 +222,6 @@ void Batch::setIndirectBuffer(const BufferPointer& buffer, Offset offset, Offset
    _params.emplace_back(stride);
 }
 void Batch::setModelTransform(const Transform& model) {
    ADD_COMMAND(setModelTransform);
@ -266,19 +243,19 @@ void Batch::setProjectionTransform(const Mat4& proj) {
 }
 void Batch::setProjectionJitter(float jx, float jy) {
-	_projectionJitter.x = jx;
+    _projectionJitter.x = jx;
-	_projectionJitter.y = jy;
+    _projectionJitter.y = jy;
-	pushProjectionJitter(jx, jy);
+    pushProjectionJitter(jx, jy);
 }
-void Batch::pushProjectionJitter(float jx, float jy) { 
+void Batch::pushProjectionJitter(float jx, float jy) {
-	ADD_COMMAND(setProjectionJitter);
+    ADD_COMMAND(setProjectionJitter);
-	_params.emplace_back(jx);
+    _params.emplace_back(jx);
-	_params.emplace_back(jy);
+    _params.emplace_back(jy);
 }
-void Batch::popProjectionJitter() { 
+void Batch::popProjectionJitter() {
-	pushProjectionJitter(_projectionJitter.x, _projectionJitter.y);
+    pushProjectionJitter(_projectionJitter.x, _projectionJitter.y);
 }
 void Batch::setViewportTransform(const Vec4i& viewport) {
@ -374,7 +351,6 @@ void Batch::setFramebuffer(const FramebufferPointer& framebuffer) {
    ADD_COMMAND(setFramebuffer);
    _params.emplace_back(_framebuffers.cache(framebuffer));
 }
 void Batch::setFramebufferSwapChain(const FramebufferSwapChainPointer& framebuffer, unsigned int swapChainIndex) {
@ -487,7 +463,7 @@ void Batch::runLambda(std::function<void()> f) {
 void Batch::startNamedCall(const std::string& name) {
    ADD_COMMAND(startNamedCall);
    _params.emplace_back(_names.cache(name));
-    
+
    _currentNamedCall = name;
 }
@ -556,7 +532,7 @@ void Batch::captureDrawCallInfoImpl() {
        TransformObject object;
        _currentModel.getMatrix(object._model);
-        // FIXME - we don't want to be using glm::inverse() here but it fixes the flickering issue we are 
+        // FIXME - we don't want to be using glm::inverse() here but it fixes the flickering issue we are
        // seeing with planky blocks in toybox. Our implementation of getInverseMatrix() is buggy in cases
        // of non-uniform scale. We need to fix that. In the mean time, glm::inverse() works.
        //_model.getInverseMatrix(_object._modelInverse);
@ -582,9 +558,9 @@ void Batch::captureDrawCallInfo() {
 }
 void Batch::captureNamedDrawCallInfo(std::string name) {
-    std::swap(_currentNamedCall, name); // Set and save _currentNamedCall
+    std::swap(_currentNamedCall, name);  // Set and save _currentNamedCall
    captureDrawCallInfoImpl();
-    std::swap(_currentNamedCall, name); // Restore _currentNamedCall
+    std::swap(_currentNamedCall, name);  // Restore _currentNamedCall
 }
 // Debugging
--- a/libraries/gpu/src/gpu/Batch.h
+++ b/libraries/gpu/src/gpu/Batch.h
@ -92,9 +92,12 @@ public:
    void captureNamedDrawCallInfo(std::string name);
    Batch(const char* name = nullptr);
-    Batch(const Batch& batch);
+    // Disallow copy construction and assignement of batches
    Batch(const Batch& batch) = delete;
    Batch& operator=(const Batch& batch) = delete;
    ~Batch();
    void setName(const char* name);
    void clear();
    // Batches may need to override the context level stereo settings
@ -506,10 +509,7 @@ public:
    bool _enableSkybox { false };
 protected:
-
+    const char* _name;
 #ifdef DEBUG
    std::string _name;
 #endif
    friend class Context;
    friend class Frame;
--- a/libraries/gpu/src/gpu/Context.cpp
+++ b/libraries/gpu/src/gpu/Context.cpp
@ -47,6 +47,10 @@ Context::Context(const Context& context) {
 }
 Context::~Context() {
    for (auto batch : _batchPool) {
        delete batch;
    }
    _batchPool.clear();
 }
 const std::string& Context::getBackendVersion() const {
@ -65,7 +69,7 @@ void Context::beginFrame(const glm::mat4& renderView, const glm::mat4& renderPos
    }
 }
-void Context::appendFrameBatch(Batch& batch) {
+void Context::appendFrameBatch(const BatchPointer& batch) {
    if (!_frameActive) {
        qWarning() << "Batch executed outside of frame boundaries";
        return;
@ -115,7 +119,7 @@ void Context::executeFrame(const FramePointer& frame) const {
        // Execute the frame rendering commands
        for (auto& batch : frame->batches) {
-            _backend->render(batch);
+            _backend->render(*batch);
        }
        Batch endBatch("Context::executeFrame::end");
@ -323,6 +327,7 @@ Size Context::getTextureExternalGPUMemSize() {
 uint32_t Context::getTexturePendingGPUTransferCount() {
    return Backend::texturePendingGPUTransferCount.getValue();
 }
 Size Context::getTexturePendingGPUTransferMemSize() {
    return Backend::texturePendingGPUTransferMemSize.getValue();
 }
@ -334,3 +339,34 @@ Size Context::getTextureResourcePopulatedGPUMemSize() {
 Size Context::getTextureResourceIdealGPUMemSize() {
    return Backend::textureResourceIdealGPUMemSize.getValue();
 }
 BatchPointer Context::acquireBatch(const char* name) {
    Batch* rawBatch = nullptr;
    {
        Lock lock(_batchPoolMutex);
        if (!_batchPool.empty()) {
            rawBatch = _batchPool.front();
            _batchPool.pop_front();
        }
    }
    if (!rawBatch) {
        rawBatch = new Batch();
    }
    rawBatch->setName(name);
    return BatchPointer(rawBatch, [this](Batch* batch) { releaseBatch(batch); });
 }
 void Context::releaseBatch(Batch* batch) {
    batch->clear();
    Lock lock(_batchPoolMutex);
    _batchPool.push_back(batch);
 }
 void gpu::doInBatch(const char* name,
                    const std::shared_ptr<gpu::Context>& context,
                    const std::function<void(Batch& batch)>& f) {
    auto batch = context->acquireBatch(name);
    f(*batch);
    context->appendFrameBatch(batch);
 }
--- a/libraries/gpu/src/gpu/Context.h
+++ b/libraries/gpu/src/gpu/Context.h
@ -43,17 +43,16 @@ public:
    int _DSNumTriangles = 0;
    int _PSNumSetPipelines = 0;
- 
+
    ContextStats() {}
    ContextStats(const ContextStats& stats) = default;
-    void evalDelta(const ContextStats& begin, const ContextStats& end); 
+    void evalDelta(const ContextStats& begin, const ContextStats& end);
 };
 class Backend {
 public:
-    virtual~ Backend() {};
+    virtual ~Backend(){};
    virtual const std::string& getVersion() const = 0;
@ -78,12 +77,11 @@ public:
        TransformCamera getEyeCamera(int eye, const StereoState& stereo, const Transform& xformView, Vec2 normalizedJitter) const;
    };
-
+    template <typename T, typename U>
    template<typename T, typename U>
    static void setGPUObject(const U& object, T* gpuObject) {
        object.gpuObject.setGPUObject(gpuObject);
    }
-    template<typename T, typename U>
+    template <typename T, typename U>
    static T* getGPUObject(const U& object) {
        return reinterpret_cast<T*>(object.gpuObject.getGPUObject());
    }
@ -95,26 +93,25 @@ public:
    // These should only be accessed by Backend implementation to report the buffer and texture allocations,
    // they are NOT public objects
-    static ContextMetricSize  freeGPUMemSize;
+    static ContextMetricSize freeGPUMemSize;
    static ContextMetricCount bufferCount;
-    static ContextMetricSize  bufferGPUMemSize;
+    static ContextMetricSize bufferGPUMemSize;
    static ContextMetricCount textureResidentCount;
    static ContextMetricCount textureFramebufferCount;
    static ContextMetricCount textureResourceCount;
    static ContextMetricCount textureExternalCount;
-    static ContextMetricSize  textureResidentGPUMemSize;
+    static ContextMetricSize textureResidentGPUMemSize;
-    static ContextMetricSize  textureFramebufferGPUMemSize;
+    static ContextMetricSize textureFramebufferGPUMemSize;
-    static ContextMetricSize  textureResourceGPUMemSize;
+    static ContextMetricSize textureResourceGPUMemSize;
-    static ContextMetricSize  textureExternalGPUMemSize;
+    static ContextMetricSize textureExternalGPUMemSize;
    static ContextMetricCount texturePendingGPUTransferCount;
-    static ContextMetricSize  texturePendingGPUTransferMemSize;
+    static ContextMetricSize texturePendingGPUTransferMemSize;
-    static ContextMetricSize  textureResourcePopulatedGPUMemSize;
+    static ContextMetricSize textureResourcePopulatedGPUMemSize;
-    static ContextMetricSize  textureResourceIdealGPUMemSize;
+    static ContextMetricSize textureResourceIdealGPUMemSize;
 protected:
    virtual bool isStereo() {
@ -144,7 +141,6 @@ public:
    typedef BackendPointer (*CreateBackend)();
    typedef bool (*MakeProgram)(Shader& shader, const Shader::BindingSet& bindings, const Shader::CompilationHandler& handler);
    // This one call must happen before any context is created or used (Shader::MakeProgram) in order to setup the Backend and any singleton data needed
    template <class T>
    static void init() {
@ -161,40 +157,43 @@ public:
    const std::string& getBackendVersion() const;
    void beginFrame(const glm::mat4& renderView = glm::mat4(), const glm::mat4& renderPose = glm::mat4());
-    void appendFrameBatch(Batch& batch);
+    void appendFrameBatch(const BatchPointer& batch);
    FramePointer endFrame();
    BatchPointer acquireBatch(const char* name = nullptr);
    void releaseBatch(Batch* batch);
    // MUST only be called on the rendering thread
-    // 
+    //
    // Handle any pending operations to clean up (recycle / deallocate) resources no longer in use
    void recycle() const;
    // MUST only be called on the rendering thread
-    // 
+    //
    // Execute a batch immediately, rather than as part of a frame
    void executeBatch(Batch& batch) const;
    // MUST only be called on the rendering thread
-    // 
+    //
    // Executes a frame, applying any updates contained in the frame batches to the rendering
    // thread shadow copies.  Either executeFrame or consumeFrameUpdates MUST be called on every frame
    // generated, IN THE ORDER they were generated.
    void executeFrame(const FramePointer& frame) const;
-    // MUST only be called on the rendering thread. 
+    // MUST only be called on the rendering thread.
    //
-    // Consuming a frame applies any updates queued from the recording thread and applies them to the 
+    // Consuming a frame applies any updates queued from the recording thread and applies them to the
-    // shadow copy used by the rendering thread.  
+    // shadow copy used by the rendering thread.
    //
    // EVERY frame generated MUST be consumed, regardless of whether the frame is actually executed,
    // or the buffer shadow copies can become unsynced from the recording thread copies.
-    // 
+    //
    // Consuming a frame is idempotent, as the frame encapsulates the updates and clears them out as
-    // it applies them, so calling it more than once on a given frame will have no effect after the 
+    // it applies them, so calling it more than once on a given frame will have no effect after the
    // first time
    //
    //
-    // This is automatically called by executeFrame, so you only need to call it if you 
+    // This is automatically called by executeFrame, so you only need to call it if you
    // have frames you aren't going to otherwise execute, for instance when a display plugin is
    // being disabled, or in the null display plugin where no rendering actually occurs
    void consumeFrameUpdates(const FramePointer& frame) const;
@ -212,7 +211,7 @@ public:
    // It s here for convenience to easily capture a snapshot
    void downloadFramebuffer(const FramebufferPointer& srcFramebuffer, const Vec4i& region, QImage& destImage);
-     // Repporting stats of the context
+    // Repporting stats of the context
    void resetStats() const;
    void getStats(ContextStats& stats) const;
@ -237,7 +236,7 @@ public:
    static Size getTextureGPUMemSize();
    static Size getTextureResidentGPUMemSize();
    static Size getTextureFramebufferGPUMemSize();
-    static Size getTextureResourceGPUMemSize(); 
+    static Size getTextureResourceGPUMemSize();
    static Size getTextureExternalGPUMemSize();
    static uint32_t getTexturePendingGPUTransferCount();
@ -250,10 +249,12 @@ protected:
    Context(const Context& context);
    std::shared_ptr<Backend> _backend;
-    bool _frameActive { false };
+    std::mutex _batchPoolMutex;
    std::list<Batch*> _batchPool;
    bool _frameActive{ false };
    FramePointer _currentFrame;
    RangeTimerPointer _frameRangeTimer;
-    StereoState  _stereo;
+    StereoState _stereo;
    // Sampled at the end of every frame, the stats of all the counters
    mutable ContextStats _frameStats;
@ -261,7 +262,7 @@ protected:
    // This function can only be called by "static Shader::makeProgram()"
    // makeProgramShader(...) make a program shader ready to be used in a Batch.
    // It compiles the sub shaders, link them and defines the Slots and their bindings.
-    // If the shader passed is not a program, nothing happens. 
+    // If the shader passed is not a program, nothing happens.
    static bool makeProgram(Shader& shader, const Shader::BindingSet& bindings, const Shader::CompilationHandler& handler);
    static CreateBackend _createBackendCallback;
@ -273,14 +274,8 @@ protected:
 };
 typedef std::shared_ptr<Context> ContextPointer;
-template<typename F>
+void doInBatch(const char* name, const std::shared_ptr<gpu::Context>& context, const std::function<void(Batch& batch)>& f);
 void doInBatch(const char* name, std::shared_ptr<gpu::Context> context, F f) {
    gpu::Batch batch(name);
    f(batch);
    context->appendFrameBatch(batch);
 }
 };
 };  // namespace gpu
 #endif
--- a/libraries/gpu/src/gpu/Forward.h
+++ b/libraries/gpu/src/gpu/Forward.h
@ -21,6 +21,7 @@ namespace gpu {
    using Lock = std::unique_lock<Mutex>;
    class Batch;
    using BatchPointer = std::shared_ptr<Batch>;
    class Backend;
    using BackendPointer = std::shared_ptr<Backend>;
    class Context;
--- a/libraries/gpu/src/gpu/Frame.cpp
+++ b/libraries/gpu/src/gpu/Frame.cpp
@ -28,8 +28,8 @@ Frame::~Frame() {
 }
 void Frame::finish() {
-    for (Batch& batch : batches) {
+    for (const auto& batch : batches) {
-        batch.finishFrame(bufferUpdates);
+        batch->finishFrame(bufferUpdates);
    }
 }
--- a/libraries/gpu/src/gpu/Frame.h
+++ b/libraries/gpu/src/gpu/Frame.h
@ -22,7 +22,7 @@ namespace gpu {
    public:
        Frame();
        virtual ~Frame();
-        using Batches = std::vector<Batch>;
+        using Batches = std::vector<BatchPointer>;
        using FramebufferRecycler = std::function<void(const FramebufferPointer&)>;
        using OverlayRecycler = std::function<void(const TexturePointer&)>;
--- a/libraries/render-utils/src/DeferredLightingEffect.cpp
+++ b/libraries/render-utils/src/DeferredLightingEffect.cpp
@ -738,21 +738,20 @@ void RenderDeferred::run(const RenderContextPointer& renderContext, const Inputs
    }
    auto previousBatch = args->_batch;
-    gpu::Batch batch;
+    gpu::doInBatch(nullptr, args->_context, [&](gpu::Batch& batch) {
-    args->_batch = &batch;
+        args->_batch = &batch;
-     _gpuTimer->begin(batch);
+         _gpuTimer->begin(batch);
-    setupJob.run(renderContext, deferredTransform, deferredFramebuffer, lightingModel, haze, surfaceGeometryFramebuffer, ssaoFramebuffer, subsurfaceScatteringResource);
+        setupJob.run(renderContext, deferredTransform, deferredFramebuffer, lightingModel, haze, surfaceGeometryFramebuffer, ssaoFramebuffer, subsurfaceScatteringResource);
-    lightsJob.run(renderContext, deferredTransform, deferredFramebuffer, lightingModel, surfaceGeometryFramebuffer, lightClusters);
+        lightsJob.run(renderContext, deferredTransform, deferredFramebuffer, lightingModel, surfaceGeometryFramebuffer, lightClusters);
-    cleanupJob.run(renderContext);
+        cleanupJob.run(renderContext);
-    _gpuTimer->end(batch);
+        _gpuTimer->end(batch);
-     args->_context->appendFrameBatch(batch);
+    });
     args->_batch = previousBatch;
    auto config = std::static_pointer_cast<Config>(renderContext->jobConfig);
    config->setGPUBatchRunTime(_gpuTimer->getGPUAverage(), _gpuTimer->getBatchAverage());
 }
--- a/libraries/render-utils/src/LightClusters.cpp
+++ b/libraries/render-utils/src/LightClusters.cpp
@ -707,85 +707,83 @@ void DebugLightClusters::run(const render::RenderContextPointer& renderContext,
    auto args = renderContext->args;
-    gpu::Batch batch;
+    
    gpu::doInBatch(nullptr, args->_context, [&](gpu::Batch& batch) { 
        batch.enableStereo(false);
-    batch.enableStereo(false);
+        // Assign the camera transform
        batch.setViewportTransform(args->_viewport);
        glm::mat4 projMat;
        Transform viewMat;
        args->getViewFrustum().evalProjectionMatrix(projMat);
        args->getViewFrustum().evalViewTransform(viewMat);
        batch.setProjectionTransform(projMat);
        batch.setViewTransform(viewMat, true);
-    // Assign the camera transform
+        // Then the actual ClusterGrid attributes
-    batch.setViewportTransform(args->_viewport);
+        batch.setModelTransform(Transform());
    glm::mat4 projMat;
    Transform viewMat;
    args->getViewFrustum().evalProjectionMatrix(projMat);
    args->getViewFrustum().evalViewTransform(viewMat);
    batch.setProjectionTransform(projMat);
    batch.setViewTransform(viewMat, true);
-
+        // Bind the Light CLuster data strucutre
-    // Then the actual ClusterGrid attributes
+        batch.setUniformBuffer(LIGHT_GPU_SLOT, lightClusters->_lightStage->getLightArrayBuffer());
-    batch.setModelTransform(Transform());
+        batch.setUniformBuffer(LIGHT_CLUSTER_GRID_FRUSTUM_GRID_SLOT, lightClusters->_frustumGridBuffer);
-
+        batch.setUniformBuffer(LIGHT_CLUSTER_GRID_CLUSTER_GRID_SLOT, lightClusters->_clusterGridBuffer);
-    // Bind the Light CLuster data strucutre
+        batch.setUniformBuffer(LIGHT_CLUSTER_GRID_CLUSTER_CONTENT_SLOT, lightClusters->_clusterContentBuffer);
    batch.setUniformBuffer(LIGHT_GPU_SLOT, lightClusters->_lightStage->getLightArrayBuffer());
    batch.setUniformBuffer(LIGHT_CLUSTER_GRID_FRUSTUM_GRID_SLOT, lightClusters->_frustumGridBuffer);
    batch.setUniformBuffer(LIGHT_CLUSTER_GRID_CLUSTER_GRID_SLOT, lightClusters->_clusterGridBuffer);
    batch.setUniformBuffer(LIGHT_CLUSTER_GRID_CLUSTER_CONTENT_SLOT, lightClusters->_clusterContentBuffer);
-    if (doDrawClusterFromDepth) {
+        if (doDrawClusterFromDepth) {
-        batch.setPipeline(getDrawClusterFromDepthPipeline());
+            batch.setPipeline(getDrawClusterFromDepthPipeline());
-        batch.setUniformBuffer(DEFERRED_FRAME_TRANSFORM_BUFFER_SLOT, deferredTransform->getFrameTransformBuffer());
+            batch.setUniformBuffer(DEFERRED_FRAME_TRANSFORM_BUFFER_SLOT, deferredTransform->getFrameTransformBuffer());
-        if (linearDepthTarget) {
+            if (linearDepthTarget) {
-            batch.setResourceTexture(DEFERRED_BUFFER_LINEAR_DEPTH_UNIT, linearDepthTarget->getLinearDepthTexture());
+                batch.setResourceTexture(DEFERRED_BUFFER_LINEAR_DEPTH_UNIT, linearDepthTarget->getLinearDepthTexture());
            }
            batch.draw(gpu::TRIANGLE_STRIP, 4, 0);
            batch.setResourceTexture(DEFERRED_BUFFER_LINEAR_DEPTH_UNIT, nullptr);
            batch.setUniformBuffer(DEFERRED_FRAME_TRANSFORM_BUFFER_SLOT, nullptr);
        }
-        batch.draw(gpu::TRIANGLE_STRIP, 4, 0);
+        if (doDrawContent) {
            // bind the one gpu::Pipeline we need
            batch.setPipeline(getDrawClusterContentPipeline());
            batch.setUniformBuffer(DEFERRED_FRAME_TRANSFORM_BUFFER_SLOT, deferredTransform->getFrameTransformBuffer());
            if (linearDepthTarget) {
                batch.setResourceTexture(DEFERRED_BUFFER_LINEAR_DEPTH_UNIT, linearDepthTarget->getLinearDepthTexture());
            }
            batch.draw(gpu::TRIANGLE_STRIP, 4, 0);
-        batch.setResourceTexture(DEFERRED_BUFFER_LINEAR_DEPTH_UNIT, nullptr);
+            batch.setResourceTexture(DEFERRED_BUFFER_LINEAR_DEPTH_UNIT, nullptr);
-        batch.setUniformBuffer(DEFERRED_FRAME_TRANSFORM_BUFFER_SLOT, nullptr);
+            batch.setUniformBuffer(DEFERRED_FRAME_TRANSFORM_BUFFER_SLOT, nullptr);
-    }
+        }
    });
    if (doDrawContent) {
        // bind the one gpu::Pipeline we need
        batch.setPipeline(getDrawClusterContentPipeline());
        batch.setUniformBuffer(DEFERRED_FRAME_TRANSFORM_BUFFER_SLOT, deferredTransform->getFrameTransformBuffer());
-        if (linearDepthTarget) {
+    gpu::doInBatch(nullptr, args->_context, [&](gpu::Batch& batch) { 
-            batch.setResourceTexture(DEFERRED_BUFFER_LINEAR_DEPTH_UNIT, linearDepthTarget->getLinearDepthTexture());
+        auto& drawGridAndCleanBatch = batch;
        if (doDrawGrid) {
            // bind the one gpu::Pipeline we need
            drawGridAndCleanBatch.setPipeline(getDrawClusterGridPipeline());
            auto dims = lightClusters->_frustumGridBuffer->dims;
            glm::ivec3 summedDims(dims.x * dims.y * dims.z, dims.x * dims.y, dims.x);
            drawGridAndCleanBatch.drawInstanced(summedDims.x, gpu::LINES, 24, 0);
        }
-        batch.draw(gpu::TRIANGLE_STRIP, 4, 0);
+        drawGridAndCleanBatch.setUniformBuffer(LIGHT_GPU_SLOT, nullptr);
-              
+        drawGridAndCleanBatch.setUniformBuffer(LIGHT_CLUSTER_GRID_FRUSTUM_GRID_SLOT, nullptr);
-        batch.setResourceTexture(DEFERRED_BUFFER_LINEAR_DEPTH_UNIT, nullptr);
+        drawGridAndCleanBatch.setUniformBuffer(LIGHT_CLUSTER_GRID_CLUSTER_GRID_SLOT, nullptr);
-        batch.setUniformBuffer(DEFERRED_FRAME_TRANSFORM_BUFFER_SLOT, nullptr);
+        drawGridAndCleanBatch.setUniformBuffer(LIGHT_CLUSTER_GRID_CLUSTER_CONTENT_SLOT, nullptr);
    }
-
+        drawGridAndCleanBatch.setResourceTexture(DEFERRED_BUFFER_COLOR_UNIT, nullptr);
-
+        drawGridAndCleanBatch.setResourceTexture(DEFERRED_BUFFER_NORMAL_UNIT, nullptr);
-    gpu::Batch drawGridAndCleanBatch;
+        drawGridAndCleanBatch.setResourceTexture(DEFERRED_BUFFER_EMISSIVE_UNIT, nullptr);
-
+        drawGridAndCleanBatch.setResourceTexture(DEFERRED_BUFFER_DEPTH_UNIT, nullptr);
-    if (doDrawGrid) {
+    });
        // bind the one gpu::Pipeline we need
        drawGridAndCleanBatch.setPipeline(getDrawClusterGridPipeline());
        auto dims = lightClusters->_frustumGridBuffer->dims;
        glm::ivec3 summedDims(dims.x*dims.y * dims.z, dims.x*dims.y, dims.x);
        drawGridAndCleanBatch.drawInstanced(summedDims.x, gpu::LINES, 24, 0);
    }
    drawGridAndCleanBatch.setUniformBuffer(LIGHT_GPU_SLOT, nullptr);
    drawGridAndCleanBatch.setUniformBuffer(LIGHT_CLUSTER_GRID_FRUSTUM_GRID_SLOT, nullptr);
    drawGridAndCleanBatch.setUniformBuffer(LIGHT_CLUSTER_GRID_CLUSTER_GRID_SLOT, nullptr);
    drawGridAndCleanBatch.setUniformBuffer(LIGHT_CLUSTER_GRID_CLUSTER_CONTENT_SLOT, nullptr);
    drawGridAndCleanBatch.setResourceTexture(DEFERRED_BUFFER_COLOR_UNIT, nullptr);
    drawGridAndCleanBatch.setResourceTexture(DEFERRED_BUFFER_NORMAL_UNIT, nullptr);
    drawGridAndCleanBatch.setResourceTexture(DEFERRED_BUFFER_EMISSIVE_UNIT, nullptr);
    drawGridAndCleanBatch.setResourceTexture(DEFERRED_BUFFER_DEPTH_UNIT, nullptr);
    args->_context->appendFrameBatch(batch);
    args->_context->appendFrameBatch(drawGridAndCleanBatch);
 }