Merge pull request #8322 from jherico/deferred_batches

Deferred batches
2025-04-19 07:37:54 +02:00 · 2016-07-26 21:43:53 -07:00 · 2016-07-26 21:43:53 -07:00 · 17a5fa040d
commit 17a5fa040d
parent 5ce681154d 5bbcbbb811
10 changed files with 202 additions and 107 deletions
--- a/interface/src/Application.cpp
+++ b/interface/src/Application.cpp
@ -1689,6 +1689,17 @@ void Application::paintGL() {
        renderArgs._context->syncCache();
    }

+    auto framebufferCache = DependencyManager::get<FramebufferCache>();
+    // Final framebuffer that will be handled to the display-plugin
+    auto finalFramebuffer = framebufferCache->getFramebuffer();
+
+    _gpuContext->beginFrame(finalFramebuffer, getHMDSensorPose());
+    // Reset the gpu::Context Stages
+    // Back to the default framebuffer;
+    gpu::doInBatch(_gpuContext, [&](gpu::Batch& batch) {
+        batch.resetStages();
+    });
+
    auto inputs = AvatarInputs::getInstance();
    if (inputs->mirrorVisible()) {
        PerformanceTimer perfTimer("Mirror");
@ -1711,10 +1722,6 @@ void Application::paintGL() {
        QSize size = getDeviceSize();
        renderArgs._viewport = glm::ivec4(0, 0, size.width(), size.height());
        _applicationOverlay.renderOverlay(&renderArgs);
-        auto overlayTexture = _applicationOverlay.acquireOverlay();
-        if (overlayTexture) {
-            displayPlugin->submitOverlayTexture(overlayTexture);
-        }
    }

    glm::vec3 boomOffset;
@ -1816,12 +1823,8 @@ void Application::paintGL() {
    getApplicationCompositor().setFrameInfo(_frameCount, _myCamera.getTransform());

    // Primary rendering pass
-    auto framebufferCache = DependencyManager::get<FramebufferCache>();
    const QSize size = framebufferCache->getFrameBufferSize();

-    // Final framebuffer that will be handled to the display-plugin
-    auto finalFramebuffer = framebufferCache->getFramebuffer();
-
    {
        PROFILE_RANGE(__FUNCTION__ "/mainRender");
        PerformanceTimer perfTimer("mainRender");
@ -1880,6 +1883,13 @@ void Application::paintGL() {
        renderArgs._context->enableStereo(false);
    }

+    _gpuContext->endFrame();
+
+    gpu::TexturePointer overlayTexture = _applicationOverlay.acquireOverlay();
+    if (overlayTexture) {
+        displayPlugin->submitOverlayTexture(overlayTexture);
+    }
+
    // deliver final composited scene to the display plugin
    {
        PROFILE_RANGE(__FUNCTION__ "/pluginOutput");
@ -1900,11 +1910,6 @@ void Application::paintGL() {

    {
        Stats::getInstance()->setRenderDetails(renderArgs._details);
-        // Reset the gpu::Context Stages
-        // Back to the default framebuffer;
-        gpu::doInBatch(renderArgs._context, [&](gpu::Batch& batch) {
-            batch.resetStages();
-        });
    }

    uint64_t lastPaintDuration = usecTimestampNow() - lastPaintBegin;
--- a/libraries/gpu/src/gpu/Batch.cpp
+++ b/libraries/gpu/src/gpu/Batch.cpp
@ -46,6 +46,33 @@ Batch::Batch() {
    _drawCallInfos.reserve(_drawCallInfosMax);
 }

+Batch::Batch(const Batch& batch_) {
+    Batch& batch = *const_cast<Batch*>(&batch_);
+    _commands.swap(batch._commands);
+    _commandOffsets.swap(batch._commandOffsets);
+    _params.swap(batch._params);
+    _data.swap(batch._data);
+    _invalidModel = batch._invalidModel;
+    _currentModel = batch._currentModel;
+    _objects.swap(batch._objects);
+    _currentNamedCall = batch._currentNamedCall;
+
+    _buffers._items.swap(batch._buffers._items);
+    _textures._items.swap(batch._textures._items);
+    _streamFormats._items.swap(batch._streamFormats._items);
+    _transforms._items.swap(batch._transforms._items);
+    _pipelines._items.swap(batch._pipelines._items);
+    _framebuffers._items.swap(batch._framebuffers._items);
+    _drawCallInfos.swap(batch._drawCallInfos);
+    _queries._items.swap(batch._queries._items);
+    _lambdas._items.swap(batch._lambdas._items);
+    _profileRanges._items.swap(batch._profileRanges._items);
+    _names._items.swap(batch._names._items);
+    _namedData.swap(batch._namedData);
+    _enableStereo = batch._enableStereo;
+    _enableSkybox = batch._enableSkybox;
+}
+
 Batch::~Batch() {
    _commandsMax = std::max(_commands.size(), _commandsMax);
    _commandOffsetsMax = std::max(_commandOffsets.size(), _commandOffsetsMax);
--- a/libraries/gpu/src/gpu/Context.cpp
+++ b/libraries/gpu/src/gpu/Context.cpp
@ -9,7 +9,7 @@
 //  See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
 //
 #include "Context.h"
-
+#include "Frame.h"
 using namespace gpu;

 Context::CreateBackend Context::_createBackendCallback = nullptr;
@ -20,6 +20,13 @@ Context::Context() {
    if (_createBackendCallback) {
        _backend.reset(_createBackendCallback());
    }
+
+    _frameHandler = [this](Frame& frame){
+        for (size_t i = 0; i < frame.batches.size(); ++i) {
+            _backend->_stereo = frame.stereoStates[i];
+            _backend->render(frame.batches[i]);
+        }
+    };
 }

 Context::Context(const Context& context) {
@ -28,6 +35,43 @@ Context::Context(const Context& context) {
 Context::~Context() {
 }

+void Context::setFrameHandler(FrameHandler handler) {
+    _frameHandler = handler;
+}
+
+#define DEFERRED_RENDERING
+
+void Context::beginFrame(const FramebufferPointer& outputFramebuffer, const glm::mat4& renderPose) {
+    _currentFrame = Frame();
+    _currentFrame.framebuffer = outputFramebuffer;
+    _currentFrame.pose = renderPose;
+    _frameActive = true;
+}
+
+void Context::append(Batch& batch) {
+    if (!_frameActive) {
+        qWarning() << "Batch executed outside of frame boundaries";
+    }
+#ifdef DEFERRED_RENDERING
+    _currentFrame.batches.emplace_back(batch);
+    _currentFrame.stereoStates.emplace_back(_stereo);
+#else
+    _backend->_stereo = _stereo;
+    _backend->render(batch);
+#endif
+}
+
+void Context::endFrame() {
+#ifdef DEFERRED_RENDERING
+    if (_frameHandler) {
+        _frameHandler(_currentFrame);
+    }
+#endif
+    _currentFrame = Frame();
+    _frameActive = false;
+}
+
+
 bool Context::makeProgram(Shader& shader, const Shader::BindingSet& bindings) {
    if (shader.isProgram() && _makeProgramCallback) {
        return _makeProgramCallback(shader, bindings);
@ -35,36 +79,38 @@ bool Context::makeProgram(Shader& shader, const Shader::BindingSet& bindings) {
    return false;
 }

-void Context::render(Batch& batch) {
-    PROFILE_RANGE(__FUNCTION__);
-    _backend->render(batch);
-}
-
 void Context::enableStereo(bool enable) {
-    _backend->enableStereo(enable);
+    _stereo._enable = enable;
 }

 bool Context::isStereo() {
-    return _backend->isStereo();
+    return _stereo._enable;
 }

 void Context::setStereoProjections(const mat4 eyeProjections[2]) {
-    _backend->setStereoProjections(eyeProjections);
+    for (int i = 0; i < 2; ++i) {
+        _stereo._eyeProjections[i] = eyeProjections[i];
+    }
 }

-void Context::setStereoViews(const mat4 eyeViews[2]) {
-    _backend->setStereoViews(eyeViews);
+void Context::setStereoViews(const mat4 views[2]) {
+    for (int i = 0; i < 2; ++i) {
+        _stereo._eyeViews[i] = views[i];
+    }
 }

 void Context::getStereoProjections(mat4* eyeProjections) const {
-    _backend->getStereoProjections(eyeProjections);
+    for (int i = 0; i < 2; ++i) {
+        eyeProjections[i] = _stereo._eyeProjections[i];
+    }
 }

 void Context::getStereoViews(mat4* eyeViews) const {
-    _backend->getStereoViews(eyeViews);
+    for (int i = 0; i < 2; ++i) {
+        eyeViews[i] = _stereo._eyeViews[i];
+    }
 }

-
 void Context::syncCache() {
    PROFILE_RANGE(__FUNCTION__);
    _backend->syncCache();
@ -103,12 +149,12 @@ Backend::TransformCamera Backend::TransformCamera::getEyeCamera(int eye, const S
    if (!_stereo._skybox) {
        offsetTransform.postTranslate(-Vec3(_stereo._eyeViews[eye][3]));
    } else {
-       // FIXME: If "skybox" the ipd is set to 0 for now, let s try to propose a better solution for this in the future
+        // FIXME: If "skybox" the ipd is set to 0 for now, let s try to propose a better solution for this in the future
    }
    result._projection = _stereo._eyeProjections[eye];
    result.recomputeDerived(offsetTransform);

-    result._stereoInfo = Vec4(1.0f, (float) eye, 0.0f, 0.0f);
+    result._stereoInfo = Vec4(1.0f, (float)eye, 0.0f, 0.0f);

    return result;
 }
@ -125,7 +171,7 @@ std::atomic<uint32_t> Context::_textureGPUTransferCount{ 0 };
 void Context::incrementBufferGPUCount() {
    _bufferGPUCount++;
 }
-void Context::decrementBufferGPUCount() { 
+void Context::decrementBufferGPUCount() {
    _bufferGPUCount--;
 }
 void Context::updateBufferGPUMemoryUsage(Size prevObjectSize, Size newObjectSize) {
--- a/libraries/gpu/src/gpu/Context.h
+++ b/libraries/gpu/src/gpu/Context.h
@ -16,12 +16,13 @@

 #include <GLMHelpers.h>

+#include "Forward.h"
 #include "Batch.h"
-
 #include "Resource.h"
 #include "Texture.h"
 #include "Pipeline.h"
 #include "Framebuffer.h"
+#include "Frame.h"

 class QImage;

@ -46,51 +47,11 @@ public:
    ContextStats(const ContextStats& stats) = default;
 };

-struct StereoState {
-    bool _enable{ false };
-    bool _skybox{ false };
-    // 0 for left eye, 1 for right eye
-    uint8_t _pass{ 0 };
-    mat4 _eyeViews[2];
-    mat4 _eyeProjections[2];
-};
-
 class Backend {
 public:
    virtual~ Backend() {};

    virtual void render(Batch& batch) = 0;
-    virtual void enableStereo(bool enable) {
-        _stereo._enable = enable;
-    }
-
-    virtual bool isStereo() {
-        return _stereo._enable;
-    }
-
-    void setStereoProjections(const mat4 eyeProjections[2]) {
-        for (int i = 0; i < 2; ++i) {
-            _stereo._eyeProjections[i] = eyeProjections[i];
-        }
-    }
-
-    void setStereoViews(const mat4 views[2]) {
-        for (int i = 0; i < 2; ++i) {
-            _stereo._eyeViews[i] = views[i];
-        }
-    }
-
-    void getStereoProjections(mat4* eyeProjections) const {
-        for (int i = 0; i < 2; ++i) {
-            eyeProjections[i] = _stereo._eyeProjections[i];
-        }
-    }
-
-    void getStereoViews(mat4* eyeViews) const {
-        for (int i = 0; i < 2; ++i) {
-            eyeViews[i] = _stereo._eyeViews[i];
-        }
-    }

    virtual void syncCache() = 0;
    virtual void downloadFramebuffer(const FramebufferPointer& srcFramebuffer, const Vec4i& region, QImage& destImage) = 0;
@ -137,8 +98,25 @@ public:
    static void decrementTextureGPUTransferCount();

 protected:
-    StereoState  _stereo;
+    virtual bool isStereo() {
+        return _stereo._enable;
+    }
+
+    void getStereoProjections(mat4* eyeProjections) const {
+        for (int i = 0; i < 2; ++i) {
+            eyeProjections[i] = _stereo._eyeProjections[i];
+        }
+    }
+
+    void getStereoViews(mat4* eyeViews) const {
+        for (int i = 0; i < 2; ++i) {
+            eyeViews[i] = _stereo._eyeViews[i];
+        }
+    }
+
+    friend class Context;
    ContextStats _stats;
+    StereoState _stereo;
 };

 class Context {
@ -161,7 +139,10 @@ public:
    Context();
    ~Context();

-    void render(Batch& batch);
+    void setFrameHandler(FrameHandler handler);
+    void beginFrame(const FramebufferPointer& outputFramebuffer, const glm::mat4& renderPose = glm::mat4());
+    void append(Batch& batch);
+    void endFrame();

    void enableStereo(bool enable = true);
    bool isStereo();
@ -191,6 +172,10 @@ protected:
    Context(const Context& context);

    std::unique_ptr<Backend> _backend;
+    bool _frameActive { false };
+    Frame _currentFrame;
+    FrameHandler _frameHandler;
+    StereoState  _stereo;

    // This function can only be called by "static Shader::makeProgram()"
    // makeProgramShader(...) make a program shader ready to be used in a Batch.
@ -234,7 +219,7 @@ template<typename F>
 void doInBatch(std::shared_ptr<gpu::Context> context, F f) {
    gpu::Batch batch;
    f(batch);
-    context->render(batch);
+    context->append(batch);
 }

 };
--- a/libraries/gpu/src/gpu/Forward.h
+++ b/libraries/gpu/src/gpu/Forward.h
@ -12,6 +12,7 @@
 #include <stdint.h>
 #include <memory>
 #include <vector>
+#include <functional>

 #include <glm/glm.hpp>

@ -21,6 +22,9 @@ namespace gpu {
    class Context;
    using ContextPointer = std::shared_ptr<Context>;
    class GPUObject;
+    class Frame;
+    using FramePointer = std::shared_ptr<Frame>;
+    using FrameHandler = std::function<void(Frame& frame)>;

    using Stamp = int;
    using uint32 = uint32_t;
@ -82,6 +86,15 @@ namespace gpu {
    class TextureView;
    using TextureViews = std::vector<TextureView>;

+    struct StereoState {
+        bool _enable{ false };
+        bool _skybox{ false };
+        // 0 for left eye, 1 for right eye
+        uint8 _pass{ 0 };
+        Mat4 _eyeViews[2];
+        Mat4 _eyeProjections[2];
+    };
+
    namespace gl {
        class GLBuffer;
    }
--- a/libraries/gpu/src/gpu/Frame.h
+++ b/libraries/gpu/src/gpu/Frame.h
@ -0,0 +1,29 @@
+//
+//  Created by Bradley Austin Davis on 2016/07/26
+//  Copyright 2013-2016 High Fidelity, Inc.
+//
+//  Distributed under the Apache License, Version 2.0.
+//  See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
+//
+#ifndef hifi_gpu_Frame_h
+#define hifi_gpu_Frame_h
+
+#include "Forward.h"
+
+namespace gpu {
+
+class Frame {
+public:
+    /// The sensor pose used for rendering the frame, only applicable for HMDs
+    glm::mat4 pose;
+    /// The collection of batches which make up the frame
+    std::vector<Batch> batches;
+    std::vector<StereoState> stereoStates;
+    /// The destination framebuffer in which the results will be placed
+    FramebufferPointer framebuffer;
+};
+
+};
+
+
+#endif
--- a/libraries/render-utils/src/FramebufferCache.cpp
+++ b/libraries/render-utils/src/FramebufferCache.cpp
@ -11,34 +11,25 @@

 #include "FramebufferCache.h"

-#include <mutex>
-
 #include <glm/glm.hpp>
+#include <gpu/Format.h>
+#include <gpu/Framebuffer.h>

-#include <QMap>
-#include <QQueue>
-#include <gpu/Batch.h>
 #include "RenderUtilsLogging.h"

-static QQueue<gpu::FramebufferPointer> _cachedFramebuffers;
-
-FramebufferCache::FramebufferCache() {
-}
-
-FramebufferCache::~FramebufferCache() {
-    _cachedFramebuffers.clear();
-}
-
 void FramebufferCache::setFrameBufferSize(QSize frameBufferSize) {
    //If the size changed, we need to delete our FBOs
    if (_frameBufferSize != frameBufferSize) {
        _frameBufferSize = frameBufferSize;
        _selfieFramebuffer.reset();
-        _cachedFramebuffers.clear();
        _occlusionFramebuffer.reset();
        _occlusionTexture.reset();
        _occlusionBlurredFramebuffer.reset();
        _occlusionBlurredTexture.reset();
+        {
+            std::unique_lock<std::mutex> lock(_mutex);
+            _cachedFramebuffers.clear();
+        }
    }
 }

@ -55,8 +46,6 @@ void FramebufferCache::createPrimaryFramebuffer() {

    auto smoothSampler = gpu::Sampler(gpu::Sampler::FILTER_MIN_MAG_MIP_LINEAR);

-
-
    resizeAmbientOcclusionBuffers();
 }

@ -87,7 +76,8 @@ void FramebufferCache::resizeAmbientOcclusionBuffers() {


 gpu::FramebufferPointer FramebufferCache::getFramebuffer() {
-    if (_cachedFramebuffers.isEmpty()) {
+    std::unique_lock<std::mutex> lock(_mutex);
+    if (_cachedFramebuffers.empty()) {
        _cachedFramebuffers.push_back(gpu::FramebufferPointer(gpu::Framebuffer::create(gpu::Element::COLOR_SRGBA_32, _frameBufferSize.width(), _frameBufferSize.height())));
    }
    gpu::FramebufferPointer result = _cachedFramebuffers.front();
@ -96,6 +86,7 @@ gpu::FramebufferPointer FramebufferCache::getFramebuffer() {
 }

 void FramebufferCache::releaseFramebuffer(const gpu::FramebufferPointer& framebuffer) {
+    std::unique_lock<std::mutex> lock(_mutex);
    if (QSize(framebuffer->getSize().x, framebuffer->getSize().y) == _frameBufferSize) {
        _cachedFramebuffers.push_back(framebuffer);
    }
--- a/libraries/render-utils/src/FramebufferCache.h
+++ b/libraries/render-utils/src/FramebufferCache.h
@ -11,13 +11,10 @@

 #include <QSize>

-#include <gpu/Framebuffer.h>
+#include <mutex>
+#include <gpu/Forward.h>
 #include <DependencyManager.h>

-namespace gpu {
-class Batch;
-}
-
 /// Stores cached textures, including render-to-texture targets.
 class FramebufferCache : public Dependency {
    SINGLETON_DEPENDENCY
@ -47,9 +44,6 @@ public:
    void releaseFramebuffer(const gpu::FramebufferPointer& framebuffer);

 private:
-    FramebufferCache();
-    virtual ~FramebufferCache();
-
    void createPrimaryFramebuffer();

    gpu::FramebufferPointer _shadowFramebuffer;
@ -65,6 +59,9 @@ private:
    QSize _frameBufferSize{ 100, 100 };
    int _AOResolutionLevel = 1; // AO perform at half res

+    std::mutex _mutex;
+    std::list<gpu::FramebufferPointer> _cachedFramebuffers;
+
    // Resize/reallocate the buffers used for AO
    // the size of the AO buffers is scaled by the AOResolutionScale;
    void resizeAmbientOcclusionBuffers();
--- a/libraries/render-utils/src/RenderDeferredTask.cpp
+++ b/libraries/render-utils/src/RenderDeferredTask.cpp
@ -347,10 +347,10 @@ void DrawOverlay3D::run(const SceneContextPointer& sceneContext, const RenderCon
        // Needs to be distinct from the other batch because using the clear call 
        // while stereo is enabled triggers a warning
        if (_opaquePass) {
-            gpu::Batch batch;
-            batch.enableStereo(false);
-            batch.clearFramebuffer(gpu::Framebuffer::BUFFER_DEPTH, glm::vec4(), 1.f, 0, true);
-            args->_context->render(batch);
+            gpu::doInBatch(args->_context, [&](gpu::Batch& batch){
+                batch.enableStereo(false);
+                batch.clearFramebuffer(gpu::Framebuffer::BUFFER_DEPTH, glm::vec4(), 1.f, 0, true);
+            });
        }

        // Render the items
--- a/tests/render-perf/src/main.cpp
+++ b/tests/render-perf/src/main.cpp
@ -403,7 +403,12 @@ private:
            renderArgs._blitFramebuffer = finalFramebuffer;
        }

+        _gpuContext->beginFrame(renderArgs._blitFramebuffer);
+        gpu::doInBatch(renderArgs._context, [&](gpu::Batch& batch) {
+            batch.resetStages();
+        });
        render(&renderArgs);
+        _gpuContext->endFrame();
        GLuint glTex;
        {
            auto gpuTex = renderArgs._blitFramebuffer->getRenderBuffer(0);
@ -428,9 +433,6 @@ private:
        _offscreenContext->makeCurrent();
        framebufferCache->releaseFramebuffer(renderArgs._blitFramebuffer);
        renderArgs._blitFramebuffer.reset();
-        gpu::doInBatch(renderArgs._context, [&](gpu::Batch& batch) {
-            batch.resetStages();
-        });
        _fpsCounter.increment();
        static size_t _frameCount { 0 };
        ++_frameCount;