Use pooled gpu batches

2025-08-07 13:30:33 +02:00 · 2018-05-16 13:47:15 -07:00 · 2018-05-16 13:47:15 -07:00 · 9c26b2c1d3
commit 9c26b2c1d3
parent efca568dbe
9 changed files with 197 additions and 192 deletions
--- a/libraries/gpu/src/gpu/Batch.cpp
+++ b/libraries/gpu/src/gpu/Batch.cpp
@ -19,7 +19,6 @@
 #if defined(NSIGHT_FOUND)
 #include "nvToolsExt.h"

-
 ProfileRangeBatch::ProfileRangeBatch(gpu::Batch& batch, const char* name) : _batch(batch) {
    _batch.pushProfileRange(name);
 }
@ -46,11 +45,7 @@ size_t Batch::_objectsMax { BATCH_PREALLOCATE_MIN };
 size_t Batch::_drawCallInfosMax{ BATCH_PREALLOCATE_MIN };

 Batch::Batch(const char* name) {
-#ifdef DEBUG
-    if (name) {
    _name = name;
-    }
-#endif
    _commands.reserve(_commandsMax);
    _commandOffsets.reserve(_commandOffsetsMax);
    _params.reserve(_paramsMax);
@ -59,38 +54,6 @@ Batch::Batch(const char* name) {
    _drawCallInfos.reserve(_drawCallInfosMax);
 }

-Batch::Batch(const Batch& batch_) {
-    Batch& batch = *const_cast<Batch*>(&batch_);
-#ifdef DEBUG
-    _name = batch_._name;
-#endif
-    _commands.swap(batch._commands);
-    _commandOffsets.swap(batch._commandOffsets);
-    _params.swap(batch._params);
-    _data.swap(batch._data);
-    _invalidModel = batch._invalidModel;
-    _currentModel = batch._currentModel;
-    _objects.swap(batch._objects);
-    _currentNamedCall = batch._currentNamedCall;
-
-    _buffers._items.swap(batch._buffers._items);
-    _textures._items.swap(batch._textures._items);
-    _textureTables._items.swap(batch._textureTables._items);
-    _streamFormats._items.swap(batch._streamFormats._items);
-    _transforms._items.swap(batch._transforms._items);
-    _pipelines._items.swap(batch._pipelines._items);
-    _framebuffers._items.swap(batch._framebuffers._items);
-    _swapChains._items.swap(batch._swapChains._items);
-    _drawCallInfos.swap(batch._drawCallInfos);
-    _queries._items.swap(batch._queries._items);
-    _lambdas._items.swap(batch._lambdas._items);
-    _profileRanges._items.swap(batch._profileRanges._items);
-    _names._items.swap(batch._names._items);
-    _namedData.swap(batch._namedData);
-    _enableStereo = batch._enableStereo;
-    _enableSkybox = batch._enableSkybox;
-}
-
 Batch::~Batch() {
    _commandsMax = std::max(_commands.size(), _commandsMax);
    _commandOffsetsMax = std::max(_commandOffsets.size(), _commandOffsetsMax);
@ -100,6 +63,10 @@ Batch::~Batch() {
    _drawCallInfosMax = std::max(_drawCallInfos.size(), _drawCallInfosMax);
 }

+void Batch::setName(const char* name) {
+    _name = name;
+}
+
 void Batch::clear() {
    _commandsMax = std::max(_commands.size(), _commandsMax);
    _commandOffsetsMax = std::max(_commandOffsets.size(), _commandOffsetsMax);
@ -110,18 +77,30 @@ void Batch::clear() {

    _commands.clear();
    _commandOffsets.clear();
-    _params.clear();
    _data.clear();
+    _drawCallInfos.clear();
    _buffers.clear();
+    _framebuffers.clear();
+    _lambdas.clear();
+    _names.clear();
+    _namedData.clear();
+    _objects.clear();
+    _params.clear();
+    _pipelines.clear();
+    _profileRanges.clear();
+    _queries.clear();
+    _swapChains.clear();
+    _streamFormats.clear();
    _textures.clear();
    _textureTables.clear();
-    _streamFormats.clear();
    _transforms.clear();
-    _pipelines.clear();
-    _framebuffers.clear();
-    _swapChains.clear();
-    _objects.clear();
-    _drawCallInfos.clear();
+
+    _name = nullptr;
+    _invalidModel = true;
+    _currentModel = Transform();
+    _projectionJitter = glm::vec2(0.0f);
+    _enableStereo = true;
+    _enableSkybox = false;
 }

 size_t Batch::cacheData(size_t size, const void* data) {
@ -177,7 +156,6 @@ void Batch::drawIndexedInstanced(uint32 numInstances, Primitive primitiveType, u
    captureDrawCallInfo();
 }

-
 void Batch::multiDrawIndirect(uint32 numCommands, Primitive primitiveType) {
    ADD_COMMAND(multiDrawIndirect);
    _params.emplace_back(numCommands);
@ -244,7 +222,6 @@ void Batch::setIndirectBuffer(const BufferPointer& buffer, Offset offset, Offset
    _params.emplace_back(stride);
 }

-
 void Batch::setModelTransform(const Transform& model) {
    ADD_COMMAND(setModelTransform);

@ -374,7 +351,6 @@ void Batch::setFramebuffer(const FramebufferPointer& framebuffer) {
    ADD_COMMAND(setFramebuffer);

    _params.emplace_back(_framebuffers.cache(framebuffer));
-
 }

 void Batch::setFramebufferSwapChain(const FramebufferSwapChainPointer& framebuffer, unsigned int swapChainIndex) {
--- a/libraries/gpu/src/gpu/Batch.h
+++ b/libraries/gpu/src/gpu/Batch.h
@ -92,9 +92,12 @@ public:
    void captureNamedDrawCallInfo(std::string name);

    Batch(const char* name = nullptr);
-    Batch(const Batch& batch);
+    // Disallow copy construction and assignement of batches
+    Batch(const Batch& batch) = delete;
+    Batch& operator=(const Batch& batch) = delete;
    ~Batch();

+    void setName(const char* name);
    void clear();

    // Batches may need to override the context level stereo settings
@ -506,10 +509,7 @@ public:
    bool _enableSkybox { false };

 protected:
-
-#ifdef DEBUG
-    std::string _name;
-#endif
+    const char* _name;

    friend class Context;
    friend class Frame;
--- a/libraries/gpu/src/gpu/Context.cpp
+++ b/libraries/gpu/src/gpu/Context.cpp
@ -47,6 +47,10 @@ Context::Context(const Context& context) {
 }

 Context::~Context() {
+    for (auto batch : _batchPool) {
+        delete batch;
+    }
+    _batchPool.clear();
 }

 const std::string& Context::getBackendVersion() const {
@ -65,7 +69,7 @@ void Context::beginFrame(const glm::mat4& renderView, const glm::mat4& renderPos
    }
 }

-void Context::appendFrameBatch(Batch& batch) {
+void Context::appendFrameBatch(const BatchPointer& batch) {
    if (!_frameActive) {
        qWarning() << "Batch executed outside of frame boundaries";
        return;
@ -115,7 +119,7 @@ void Context::executeFrame(const FramePointer& frame) const {

        // Execute the frame rendering commands
        for (auto& batch : frame->batches) {
-            _backend->render(batch);
+            _backend->render(*batch);
        }

        Batch endBatch("Context::executeFrame::end");
@ -323,6 +327,7 @@ Size Context::getTextureExternalGPUMemSize() {
 uint32_t Context::getTexturePendingGPUTransferCount() {
    return Backend::texturePendingGPUTransferCount.getValue();
 }
+
 Size Context::getTexturePendingGPUTransferMemSize() {
    return Backend::texturePendingGPUTransferMemSize.getValue();
 }
@ -334,3 +339,34 @@ Size Context::getTextureResourcePopulatedGPUMemSize() {
 Size Context::getTextureResourceIdealGPUMemSize() {
    return Backend::textureResourceIdealGPUMemSize.getValue();
 }
+
+
+BatchPointer Context::acquireBatch(const char* name) {
+    Batch* rawBatch = nullptr;
+    {
+        Lock lock(_batchPoolMutex);
+        if (!_batchPool.empty()) {
+            rawBatch = _batchPool.front();
+            _batchPool.pop_front();
+        }
+    }
+    if (!rawBatch) {
+        rawBatch = new Batch();
+    }
+    rawBatch->setName(name);
+    return BatchPointer(rawBatch, [this](Batch* batch) { releaseBatch(batch); });
+}
+
+void Context::releaseBatch(Batch* batch) {
+    batch->clear();
+    Lock lock(_batchPoolMutex);
+    _batchPool.push_back(batch);
+}
+
+void gpu::doInBatch(const char* name,
+                    const std::shared_ptr<gpu::Context>& context,
+                    const std::function<void(Batch& batch)>& f) {
+    auto batch = context->acquireBatch(name);
+    f(*batch);
+    context->appendFrameBatch(batch);
+}
--- a/libraries/gpu/src/gpu/Context.h
+++ b/libraries/gpu/src/gpu/Context.h
@ -54,7 +54,6 @@ class Backend {
 public:
    virtual ~Backend(){};

-
    virtual const std::string& getVersion() const = 0;

    void setStereoState(const StereoState& stereo) { _stereo = stereo; }
@ -78,7 +77,6 @@ public:
        TransformCamera getEyeCamera(int eye, const StereoState& stereo, const Transform& xformView, Vec2 normalizedJitter) const;
    };

-
    template <typename T, typename U>
    static void setGPUObject(const U& object, T* gpuObject) {
        object.gpuObject.setGPUObject(gpuObject);
@ -115,7 +113,6 @@ public:
    static ContextMetricSize textureResourcePopulatedGPUMemSize;
    static ContextMetricSize textureResourceIdealGPUMemSize;

-
 protected:
    virtual bool isStereo() {
        return _stereo.isStereo();
@ -144,7 +141,6 @@ public:
    typedef BackendPointer (*CreateBackend)();
    typedef bool (*MakeProgram)(Shader& shader, const Shader::BindingSet& bindings, const Shader::CompilationHandler& handler);

-
    // This one call must happen before any context is created or used (Shader::MakeProgram) in order to setup the Backend and any singleton data needed
    template <class T>
    static void init() {
@ -161,9 +157,12 @@ public:
    const std::string& getBackendVersion() const;

    void beginFrame(const glm::mat4& renderView = glm::mat4(), const glm::mat4& renderPose = glm::mat4());
-    void appendFrameBatch(Batch& batch);
+    void appendFrameBatch(const BatchPointer& batch);
    FramePointer endFrame();

+    BatchPointer acquireBatch(const char* name = nullptr);
+    void releaseBatch(Batch* batch);
+
    // MUST only be called on the rendering thread
    //
    // Handle any pending operations to clean up (recycle / deallocate) resources no longer in use
@ -250,6 +249,8 @@ protected:
    Context(const Context& context);

    std::shared_ptr<Backend> _backend;
+    std::mutex _batchPoolMutex;
+    std::list<Batch*> _batchPool;
    bool _frameActive{ false };
    FramePointer _currentFrame;
    RangeTimerPointer _frameRangeTimer;
@ -273,14 +274,8 @@ protected:
 };
 typedef std::shared_ptr<Context> ContextPointer;

-template<typename F>
-void doInBatch(const char* name, std::shared_ptr<gpu::Context> context, F f) {
-    gpu::Batch batch(name);
-    f(batch);
-    context->appendFrameBatch(batch);
-}
-
-};
+void doInBatch(const char* name, const std::shared_ptr<gpu::Context>& context, const std::function<void(Batch& batch)>& f);

+};  // namespace gpu

 #endif
--- a/libraries/gpu/src/gpu/Forward.h
+++ b/libraries/gpu/src/gpu/Forward.h
@ -21,6 +21,7 @@ namespace gpu {
    using Lock = std::unique_lock<Mutex>;

    class Batch;
+    using BatchPointer = std::shared_ptr<Batch>;
    class Backend;
    using BackendPointer = std::shared_ptr<Backend>;
    class Context;
--- a/libraries/gpu/src/gpu/Frame.cpp
+++ b/libraries/gpu/src/gpu/Frame.cpp
@ -28,8 +28,8 @@ Frame::~Frame() {
 }

 void Frame::finish() {
-    for (Batch& batch : batches) {
-        batch.finishFrame(bufferUpdates);
+    for (const auto& batch : batches) {
+        batch->finishFrame(bufferUpdates);
    }
 }

--- a/libraries/gpu/src/gpu/Frame.h
+++ b/libraries/gpu/src/gpu/Frame.h
@ -22,7 +22,7 @@ namespace gpu {
    public:
        Frame();
        virtual ~Frame();
-        using Batches = std::vector<Batch>;
+        using Batches = std::vector<BatchPointer>;
        using FramebufferRecycler = std::function<void(const FramebufferPointer&)>;
        using OverlayRecycler = std::function<void(const TexturePointer&)>;

--- a/libraries/render-utils/src/DeferredLightingEffect.cpp
+++ b/libraries/render-utils/src/DeferredLightingEffect.cpp
@ -738,7 +738,7 @@ void RenderDeferred::run(const RenderContextPointer& renderContext, const Inputs
    }

    auto previousBatch = args->_batch;
-    gpu::Batch batch;
+    gpu::doInBatch(nullptr, args->_context, [&](gpu::Batch& batch) {
        args->_batch = &batch;
         _gpuTimer->begin(batch);

@ -749,10 +749,9 @@ void RenderDeferred::run(const RenderContextPointer& renderContext, const Inputs
        cleanupJob.run(renderContext);

        _gpuTimer->end(batch);
-     args->_context->appendFrameBatch(batch);
+    });
     args->_batch = previousBatch;

-
    auto config = std::static_pointer_cast<Config>(renderContext->jobConfig);
    config->setGPUBatchRunTime(_gpuTimer->getGPUAverage(), _gpuTimer->getBatchAverage());
 }
--- a/libraries/render-utils/src/LightClusters.cpp
+++ b/libraries/render-utils/src/LightClusters.cpp
@ -707,11 +707,10 @@ void DebugLightClusters::run(const render::RenderContextPointer& renderContext,

    auto args = renderContext->args;

-    gpu::Batch batch;
    
+    gpu::doInBatch(nullptr, args->_context, [&](gpu::Batch& batch) { 
        batch.enableStereo(false);

-
        // Assign the camera transform
        batch.setViewportTransform(args->_viewport);
        glm::mat4 projMat;
@ -762,11 +761,12 @@ void DebugLightClusters::run(const render::RenderContextPointer& renderContext,
            batch.setResourceTexture(DEFERRED_BUFFER_LINEAR_DEPTH_UNIT, nullptr);
            batch.setUniformBuffer(DEFERRED_FRAME_TRANSFORM_BUFFER_SLOT, nullptr);
        }
+    });



-    gpu::Batch drawGridAndCleanBatch;
-
+    gpu::doInBatch(nullptr, args->_context, [&](gpu::Batch& batch) { 
+        auto& drawGridAndCleanBatch = batch;
        if (doDrawGrid) {
            // bind the one gpu::Pipeline we need
            drawGridAndCleanBatch.setPipeline(getDrawClusterGridPipeline());
@ -785,7 +785,5 @@ void DebugLightClusters::run(const render::RenderContextPointer& renderContext,
        drawGridAndCleanBatch.setResourceTexture(DEFERRED_BUFFER_NORMAL_UNIT, nullptr);
        drawGridAndCleanBatch.setResourceTexture(DEFERRED_BUFFER_EMISSIVE_UNIT, nullptr);
        drawGridAndCleanBatch.setResourceTexture(DEFERRED_BUFFER_DEPTH_UNIT, nullptr);
-
-    args->_context->appendFrameBatch(batch);
-    args->_context->appendFrameBatch(drawGridAndCleanBatch);
+    });
 }