From cfff28ad0af99250319c7471a75f2d85c79ebdd2 Mon Sep 17 00:00:00 2001
From: Brad Davis <bdavis@saintandreas.org>
Date: Thu, 6 Sep 2018 16:03:43 -0700
Subject: [PATCH] More shared pointer deduplication

---
 .../gpu-gl-common/src/gpu/gl/GLBackend.cpp    |   9 +-
 .../gpu-gl-common/src/gpu/gl/GLBackend.h      | 314 +++++++++++++-----
 .../src/gpu/gl/GLBackendInput.cpp             |  31 +-
 .../src/gpu/gl/GLBackendOutput.cpp            |  25 +-
 .../src/gpu/gl/GLBackendPipeline.cpp          |  54 +--
 .../gpu-gl-common/src/gpu/gl/GLPipeline.cpp   |   4 +-
 libraries/gpu-gl/src/gpu/gl41/GL41Backend.h   |   2 +-
 .../gpu-gl/src/gpu/gl41/GL41BackendBuffer.cpp |  13 +-
 .../gpu-gl/src/gpu/gl41/GL41BackendInput.cpp  |  13 +-
 libraries/gpu-gl/src/gpu/gl45/GL45Backend.h   |   2 +-
 .../gpu-gl/src/gpu/gl45/GL45BackendBuffer.cpp |   8 +-
 .../gpu-gl/src/gpu/gl45/GL45BackendInput.cpp  |   9 +-
 libraries/gpu-gles/src/gpu/gles/GLESBackend.h |   2 +-
 .../src/gpu/gles/GLESBackendBuffer.cpp        |   6 +-
 libraries/gpu/src/gpu/Batch.cpp               |   2 +-
 libraries/gpu/src/gpu/Batch.h                 |   2 +-
 libraries/gpu/src/gpu/Framebuffer.cpp         |  10 +-
 libraries/gpu/src/gpu/Framebuffer.h           |   6 +-
 18 files changed, 330 insertions(+), 182 deletions(-)

diff --git a/libraries/gpu-gl-common/src/gpu/gl/GLBackend.cpp b/libraries/gpu-gl-common/src/gpu/gl/GLBackend.cpp
index c1848d99b1..30a3cf0aaf 100644
--- a/libraries/gpu-gl-common/src/gpu/gl/GLBackend.cpp
+++ b/libraries/gpu-gl-common/src/gpu/gl/GLBackend.cpp
@@ -150,6 +150,7 @@ void GLBackend::init() {
     });
 }
 
+
 GLBackend::GLBackend(bool syncCache) {
     _pipeline._cameraCorrectionBuffer._buffer->flush();
     initShaderBinaryCache();
@@ -201,9 +202,10 @@ void GLBackend::renderPassTransfer(const Batch& batch) {
                 {
                     Vec2u outputSize{ 1,1 };
 
-                    if (_output._framebuffer) {
-                        outputSize.x = _output._framebuffer->getWidth();
-                        outputSize.y = _output._framebuffer->getHeight();
+                    auto framebuffer = acquire(_output._framebuffer);
+                    if (framebuffer) {
+                        outputSize.x = framebuffer->getWidth();
+                        outputSize.y = framebuffer->getHeight();
                     } else if (glm::dot(_transform._projectionJitter, _transform._projectionJitter)>0.0f) {
                         qCWarning(gpugllogging) << "Jittering needs to have a frame buffer to be set";
                     }
@@ -220,6 +222,7 @@ void GLBackend::renderPassTransfer(const Batch& batch) {
                     _stereo._contextDisable = false;
                     break;
 
+                case Batch::COMMAND_setFramebuffer:
                 case Batch::COMMAND_setViewportTransform:
                 case Batch::COMMAND_setViewTransform:
                 case Batch::COMMAND_setProjectionTransform:
diff --git a/libraries/gpu-gl-common/src/gpu/gl/GLBackend.h b/libraries/gpu-gl-common/src/gpu/gl/GLBackend.h
index 9b3a28e6fd..7927734256 100644
--- a/libraries/gpu-gl-common/src/gpu/gl/GLBackend.h
+++ b/libraries/gpu-gl-common/src/gpu/gl/GLBackend.h
@@ -30,7 +30,6 @@
 
 #include "GLShared.h"
 
-
 // Different versions for the stereo drawcall
 // Current preferred is  "instanced" which draw the shape twice but instanced and rely on clipping plane to draw left/right side only
 #if defined(USE_GLES)
@@ -40,7 +39,6 @@
 #define GPU_STEREO_TECHNIQUE_INSTANCED
 #endif
 
-
 // Let these be configured by the one define picked above
 #ifdef GPU_STEREO_TECHNIQUE_DOUBLED_SIMPLE
 #define GPU_STEREO_DRAWCALL_DOUBLED
@@ -56,8 +54,151 @@
 #define GPU_STEREO_CAMERA_BUFFER
 #endif
 
+//
+// GL Backend pointer storage mechanism
+// One of the following three defines must be uncommented.
+
+// Equivalent to current state of affairs in master, 
+// Works pretty well, but ends up creating a lot of needless smart pointer duplication
+// which means there's a high aggregate cost of manipulating std::shared_ptr counters
+//#define GPU_POINTER_STORAGE_SHARED
+
+// The platonic ideal, use references to smart pointers.  
+// However, this produces artifacts because there are too many places in the code right now that 
+// create temporary values (undesirable smart pointer duplications) and then those temp variables
+// get passed on and have their reference taken, and then invalidated
+//#define GPU_POINTER_STORAGE_REF
+
+// Raw pointer manipulation.  Seems more dangerous than the reference wrappers,
+// but in practice, the danger of grabbing a reference to a temporary variable 
+// is causing issues
+#define GPU_POINTER_STORAGE_RAW
+
 namespace gpu { namespace gl {
 
+#if defined(GPU_POINTER_STORAGE_SHARED)
+template <typename T>
+static inline bool compare(const std::shared_ptr<T>& a, const std::shared_ptr<T>& b) {
+    return a == b;
+}
+
+template <typename T>
+static inline T* acquire(const std::shared_ptr<T>& pointer) {
+    return pointer.get();
+}
+
+template <typename T>
+static inline void reset(std::shared_ptr<T>& pointer) {
+    return pointer.reset();
+}
+
+template <typename T>
+static inline bool valid(const std::shared_ptr<T>& pointer) {
+    return pointer.operator bool();
+}
+
+template <typename T>
+static inline void assign(std::shared_ptr<T>&pointer, const std::shared_ptr<T>& source) {
+    pointer = source;
+}
+
+using BufferReference = BufferPointer;
+using TextureReference = TexturePointer;
+using FramebufferReference = FramebufferPointer;
+using FormatReference = Stream::FormatPointer;
+using PipelineReference = PipelinePointer;
+
+#define GPU_REFERENCE_INIT_VALUE nullptr
+
+#elif defined(GPU_POINTER_STORAGE_REF)
+
+template <typename T>
+class PointerReferenceWrapper : public std::reference_wrapper<const std::shared_ptr<T>> {
+    using Parent = std::reference_wrapper<const std::shared_ptr<T>>;
+
+public:
+    using Pointer = std::shared_ptr<T>;
+    PointerReferenceWrapper() : Parent(EMPTY()) {}
+    PointerReferenceWrapper(const Pointer& pointer) : Parent(pointer) {}
+    void clear() { *this = EMPTY(); }
+
+private:
+    static const Pointer& EMPTY() {
+        static const Pointer EMPTY_VALUE;
+        return EMPTY_VALUE;
+    };
+};
+
+template<typename T>
+static bool compare(const PointerReferenceWrapper<T>& reference, const std::shared_ptr<T>& pointer) {
+    return reference.get() == pointer;
+}
+
+template <typename T>
+static inline T* acquire(const PointerReferenceWrapper<T>& reference) {
+    return reference.get().get();
+}
+
+template<typename T>
+static void assign(PointerReferenceWrapper<T>& reference, const std::shared_ptr<T>& pointer) {
+    reference = pointer;
+}
+
+template<typename T>
+static bool valid(const PointerReferenceWrapper<T>& reference) {
+    return reference.get().operator bool();
+}
+
+template <typename T>
+static inline void reset(PointerReferenceWrapper<T>& reference) {
+    return reference.clear();
+}
+
+using BufferReference = PointerReferenceWrapper<Buffer>;
+using TextureReference = PointerReferenceWrapper<Texture>;
+using FramebufferReference = PointerReferenceWrapper<Framebuffer>;
+using FormatReference = PointerReferenceWrapper<Stream::Format>;
+using PipelineReference = PointerReferenceWrapper<Pipeline>;
+
+#define GPU_REFERENCE_INIT_VALUE 
+
+#elif defined(GPU_POINTER_STORAGE_RAW)
+
+template<typename T>
+static bool compare(const T*const& rawPointer, const std::shared_ptr<T>& pointer) {
+    return rawPointer == pointer.get();
+}
+
+template <typename T>
+static inline T* acquire(T*& rawPointer) {
+    return rawPointer;
+}
+
+template <typename T>
+static inline bool valid(const T*const& rawPointer) {
+    return rawPointer;
+}
+
+template <typename T>
+static inline void reset(T*& rawPointer) {
+    rawPointer = nullptr;
+}
+
+template <typename T>
+static inline void assign(T*& rawPointer, const std::shared_ptr<T>& pointer) {
+    rawPointer = pointer.get();
+}
+
+using BufferReference = Buffer*;
+using TextureReference = Texture*;
+using FramebufferReference = Framebuffer*;
+using FormatReference = Stream::Format*;
+using PipelineReference = Pipeline*;
+
+#define GPU_REFERENCE_INIT_VALUE nullptr
+
+#endif
+
 class GLBackend : public Backend, public std::enable_shared_from_this<GLBackend> {
     // Context Backend static interface required
     friend class gpu::Context;
@@ -67,8 +208,9 @@ class GLBackend : public Backend, public std::enable_shared_from_this<GLBackend>
 protected:
     explicit GLBackend(bool syncCache);
     GLBackend();
-public:
 
+
+public:
 #if defined(USE_GLES)
     // https://www.khronos.org/registry/OpenGL-Refpages/es3/html/glGet.xhtml
     static const GLint MIN_REQUIRED_TEXTURE_IMAGE_UNITS = 16;
@@ -109,8 +251,8 @@ public:
     // This is the ugly "download the pixels to sysmem for taking a snapshot"
     // Just avoid using it, it's ugly and will break performances
     virtual void downloadFramebuffer(const FramebufferPointer& srcFramebuffer,
-                                     const Vec4i& region, QImage& destImage) final override;
-
+                                     const Vec4i& region,
+                                     QImage& destImage) final override;
 
     // this is the maximum numeber of available input buffers
     size_t getNumInputBuffers() const { return _input._invalidBuffers.size(); }
@@ -131,7 +273,6 @@ public:
     static const int MAX_NUM_RESOURCE_TABLE_TEXTURES = 2;
     size_t getMaxNumResourceTextureTables() const { return MAX_NUM_RESOURCE_TABLE_TEXTURES; }
 
-
     // Draw Stage
     virtual void do_draw(const Batch& batch, size_t paramOffset) = 0;
     virtual void do_drawIndexed(const Batch& batch, size_t paramOffset) = 0;
@@ -183,7 +324,6 @@ public:
     // Reset stages
     virtual void do_resetStages(const Batch& batch, size_t paramOffset) final;
 
-    
     virtual void do_disableContextViewCorrection(const Batch& batch, size_t paramOffset) final;
     virtual void do_restoreContextViewCorrection(const Batch& batch, size_t paramOffset) final;
 
@@ -203,7 +343,7 @@ public:
     virtual void do_popProfileRange(const Batch& batch, size_t paramOffset) final;
 
     // TODO: As long as we have gl calls explicitely issued from interface
-    // code, we need to be able to record and batch these calls. THe long 
+    // code, we need to be able to record and batch these calls. THe long
     // term strategy is to get rid of any GL calls in favor of the HIFI GPU API
     virtual void do_glUniform1i(const Batch& batch, size_t paramOffset) final;
     virtual void do_glUniform1f(const Batch& batch, size_t paramOffset) final;
@@ -228,7 +368,9 @@ public:
     virtual void do_setStateAntialiasedLineEnable(bool enable) final;
     virtual void do_setStateDepthBias(Vec2 bias) final;
     virtual void do_setStateDepthTest(State::DepthTest test) final;
-    virtual void do_setStateStencil(State::StencilActivation activation, State::StencilTest frontTest, State::StencilTest backTest) final;
+    virtual void do_setStateStencil(State::StencilActivation activation,
+                                    State::StencilTest frontTest,
+                                    State::StencilTest backTest) final;
     virtual void do_setStateAlphaToCoverageEnable(bool enable) final;
     virtual void do_setStateSampleMask(uint32 mask) final;
     virtual void do_setStateBlend(State::BlendFunction blendFunction) final;
@@ -257,7 +399,9 @@ public:
     virtual void releaseQuery(GLuint id) const;
     virtual void queueLambda(const std::function<void()> lambda) const;
 
-    bool isTextureManagementSparseEnabled() const override { return (_textureManagement._sparseCapable && Texture::getEnableSparseTextures()); }
+    bool isTextureManagementSparseEnabled() const override {
+        return (_textureManagement._sparseCapable && Texture::getEnableSparseTextures());
+    }
 
 protected:
     virtual GLint getRealUniformLocation(GLint location) const;
@@ -266,11 +410,11 @@ protected:
 
     // FIXME instead of a single flag, create a features struct similar to
     // https://www.khronos.org/registry/vulkan/specs/1.0/man/html/VkPhysicalDeviceFeatures.html
-    virtual bool supportsBindless() const { return false;  }
+    virtual bool supportsBindless() const { return false; }
 
     static const size_t INVALID_OFFSET = (size_t)-1;
-    bool _inRenderTransferPass { false };
-    int _currentDraw { -1 };
+    bool _inRenderTransferPass{ false };
+    int _currentDraw{ -1 };
 
     std::list<std::string> profileRanges;
     mutable Mutex _trashMutex;
@@ -299,46 +443,42 @@ protected:
     virtual void updateInput() = 0;
 
     struct InputStageState {
-        bool _invalidFormat { true };
-        bool _lastUpdateStereoState{ false }; 
+        bool _invalidFormat{ true };
+        bool _lastUpdateStereoState{ false };
         bool _hadColorAttribute{ true };
-        Stream::FormatPointer _format;
+        FormatReference _format{ GPU_REFERENCE_INIT_VALUE };
         std::string _formatKey;
 
         typedef std::bitset<MAX_NUM_ATTRIBUTES> ActivationCache;
-        ActivationCache _attributeActivation { 0 };
+        ActivationCache _attributeActivation{ 0 };
 
         typedef std::bitset<MAX_NUM_INPUT_BUFFERS> BuffersState;
 
         BuffersState _invalidBuffers{ 0 };
         BuffersState _attribBindingBuffers{ 0 };
 
-        Buffers _buffers;
-        Offsets _bufferOffsets;
-        Offsets _bufferStrides;
-        std::vector<GLuint> _bufferVBOs;
+        std::vector<BufferReference> _buffers{ MAX_NUM_INPUT_BUFFERS, GPU_REFERENCE_INIT_VALUE };
+        Offsets _bufferOffsets; 
+        Offsets _bufferStrides; 
+        std::vector<GLuint> _bufferVBOs; 
 
         glm::vec4 _colorAttribute{ 0.0f };
 
-        BufferPointer _indexBuffer;
-        Offset _indexBufferOffset { 0 };
-        Type _indexBufferType { UINT32 };
-        
-        BufferPointer _indirectBuffer;
+        BufferReference _indexBuffer{ GPU_REFERENCE_INIT_VALUE };
+        Offset _indexBufferOffset{ 0 };
+        Type _indexBufferType{ UINT32 };
+
+        BufferReference _indirectBuffer{ GPU_REFERENCE_INIT_VALUE };
         Offset _indirectBufferOffset{ 0 };
         Offset _indirectBufferStride{ 0 };
 
-        GLuint _defaultVAO { 0 };
+        GLuint _defaultVAO{ 0 };
 
-        InputStageState() :
-            _invalidFormat(true),
-            _format(0),
-            _formatKey(),
-            _attributeActivation(0),
-            _buffers(_invalidBuffers.size(), BufferPointer(0)),
-            _bufferOffsets(_invalidBuffers.size(), 0),
-            _bufferStrides(_invalidBuffers.size(), 0),
-            _bufferVBOs(_invalidBuffers.size(), 0) {}
+        InputStageState() {
+            _bufferOffsets.resize(MAX_NUM_INPUT_BUFFERS, 0);
+            _bufferStrides.resize(MAX_NUM_INPUT_BUFFERS, 0);
+            _bufferVBOs.resize(MAX_NUM_INPUT_BUFFERS, 0);
+        }
     } _input;
 
     virtual void initTransform() = 0;
@@ -349,7 +489,7 @@ protected:
     virtual void resetTransformStage();
 
     // Allows for correction of the camera pose to account for changes
-    // between the time when a was recorded and the time(s) when it is 
+    // between the time when a was recorded and the time(s) when it is
     // executed
     // Prev is the previous correction used at previous frame
     struct CameraCorrection {
@@ -364,9 +504,12 @@ protected:
         struct Cameras {
             TransformCamera _cams[2];
 
-            Cameras() {};
+            Cameras(){};
             Cameras(const TransformCamera& cam) { memcpy(_cams, &cam, sizeof(TransformCamera)); };
-            Cameras(const TransformCamera& camL, const TransformCamera& camR) { memcpy(_cams, &camL, sizeof(TransformCamera)); memcpy(_cams + 1, &camR, sizeof(TransformCamera)); };
+            Cameras(const TransformCamera& camL, const TransformCamera& camR) {
+                memcpy(_cams, &camL, sizeof(TransformCamera));
+                memcpy(_cams + 1, &camR, sizeof(TransformCamera));
+            };
         };
 
         using CameraBufferElement = Cameras;
@@ -380,25 +523,24 @@ protected:
 
         mutable std::map<std::string, GLvoid*> _drawCallInfoOffsets;
 
-        GLuint _objectBuffer { 0 };
-        GLuint _cameraBuffer { 0 };
-        GLuint _drawCallInfoBuffer { 0 };
-        GLuint _objectBufferTexture { 0 };
-        size_t _cameraUboSize { 0 };
+        GLuint _objectBuffer{ 0 };
+        GLuint _cameraBuffer{ 0 };
+        GLuint _drawCallInfoBuffer{ 0 };
+        GLuint _objectBufferTexture{ 0 };
+        size_t _cameraUboSize{ 0 };
         bool _viewIsCamera{ false };
-        bool _skybox { false };
+        bool _skybox{ false };
         Transform _view;
         CameraCorrection _correction;
         bool _viewCorrectionEnabled{ true };
 
-
         Mat4 _projection;
-        Vec4i _viewport { 0, 0, 1, 1 };
-        Vec2 _depthRange { 0.0f, 1.0f };
+        Vec4i _viewport{ 0, 0, 1, 1 };
+        Vec2 _depthRange{ 0.0f, 1.0f };
         Vec2 _projectionJitter{ 0.0f, 0.0f };
-        bool _invalidView { false };
-        bool _invalidProj { false };
-        bool _invalidViewport { false };
+        bool _invalidView{ false };
+        bool _invalidProj{ false };
+        bool _invalidViewport{ false };
 
         bool _enabledDrawcallInfoBuffer{ false };
 
@@ -417,44 +559,47 @@ protected:
 
     struct UniformStageState {
         struct BufferState {
-            BufferPointer buffer;
+            BufferReference buffer{ GPU_REFERENCE_INIT_VALUE };
             GLintptr offset{ 0 };
             GLsizeiptr size{ 0 };
-            BufferState(const BufferPointer& buffer = nullptr, GLintptr offset = 0, GLsizeiptr size = 0);
-            bool operator ==(BufferState& other) const {
-                return offset == other.offset && size == other.size && buffer == other.buffer;
+            //BufferState(const BufferPointer& buffer = nullptr, GLintptr offset = 0, GLsizeiptr size = 0);
+
+            BufferState& operator=(const BufferState& other) = delete;
+            void reset() { gpu::gl::reset(buffer); offset = 0; size = 0; }
+            bool compare(const BufferPointer& buffer, GLintptr offset, GLsizeiptr size) {
+                const auto& self = *this;
+                return (self.offset == offset && self.size == size && gpu::gl::compare(self.buffer, buffer));
             }
         };
 
         // MAX_NUM_UNIFORM_BUFFERS-1 is the max uniform index BATCHES are allowed to set, but
-        // MIN_REQUIRED_UNIFORM_BUFFER_BINDINGS is used here because the backend sets some 
-        // internal UBOs for things like camera correction 
+        // MIN_REQUIRED_UNIFORM_BUFFER_BINDINGS is used here because the backend sets some
+        // internal UBOs for things like camera correction
         std::array<BufferState, MIN_REQUIRED_UNIFORM_BUFFER_BINDINGS> _buffers;
     } _uniform;
 
-    // Helper function that provides common code 
+    // Helper function that provides common code
     void bindUniformBuffer(uint32_t slot, const BufferPointer& buffer, GLintptr offset = 0, GLsizeiptr size = 0);
     void releaseUniformBuffer(uint32_t slot);
     void resetUniformStage();
 
     // update resource cache and do the gl bind/unbind call with the current gpu::Buffer cached at slot s
     // This is using different gl object  depending on the gl version
-    virtual bool bindResourceBuffer(uint32_t slot, BufferPointer& buffer) = 0;
+    virtual bool bindResourceBuffer(uint32_t slot, const BufferPointer& buffer) = 0;
     virtual void releaseResourceBuffer(uint32_t slot) = 0;
 
-    // Helper function that provides common code used by do_setResourceTexture and 
+    // Helper function that provides common code used by do_setResourceTexture and
     // do_setResourceTextureTable (in non-bindless mode)
     void bindResourceTexture(uint32_t slot, const TexturePointer& texture);
 
-
     // update resource cache and do the gl unbind call with the current gpu::Texture cached at slot s
     void releaseResourceTexture(uint32_t slot);
 
     void resetResourceStage();
 
     struct ResourceStageState {
-        std::array<BufferPointer, MAX_NUM_RESOURCE_BUFFERS> _buffers;
-        std::array<TexturePointer, MAX_NUM_RESOURCE_TEXTURES> _textures;
+        std::vector<BufferReference> _buffers{ MAX_NUM_RESOURCE_BUFFERS, BufferReference() };
+        std::vector<TextureReference> _textures{ MAX_NUM_RESOURCE_TEXTURES, GPU_REFERENCE_INIT_VALUE };
         //Textures _textures { { MAX_NUM_RESOURCE_TEXTURES } };
         int findEmptyTextureSlot() const;
     } _resource;
@@ -470,21 +615,22 @@ protected:
     void resetPipelineStage();
 
     struct PipelineStageState {
-        PipelinePointer _pipeline;
+        PipelineReference _pipeline{ GPU_REFERENCE_INIT_VALUE };
 
-        GLuint _program { 0 };
-        bool _cameraCorrection { false };
-        GLShader* _programShader { nullptr };
-        bool _invalidProgram { false };
+        GLuint _program{ 0 };
+        bool _cameraCorrection{ false };
+        GLShader* _programShader{ nullptr };
+        bool _invalidProgram{ false };
 
-        BufferView _cameraCorrectionBuffer { gpu::BufferView(std::make_shared<gpu::Buffer>(sizeof(CameraCorrection), nullptr )) };
-        BufferView _cameraCorrectionBufferIdentity { gpu::BufferView(std::make_shared<gpu::Buffer>(sizeof(CameraCorrection), nullptr )) };
+        BufferView _cameraCorrectionBuffer{ gpu::BufferView(std::make_shared<gpu::Buffer>(sizeof(CameraCorrection), nullptr)) };
+        BufferView _cameraCorrectionBufferIdentity{ gpu::BufferView(
+            std::make_shared<gpu::Buffer>(sizeof(CameraCorrection), nullptr)) };
 
         State::Data _stateCache{ State::DEFAULT };
-        State::Signature _stateSignatureCache { 0 };
+        State::Signature _stateSignatureCache{ 0 };
 
-        GLState* _state { nullptr };
-        bool _invalidState { false };
+        GLState* _state{ nullptr };
+        bool _invalidState{ false };
 
         PipelineStageState() {
             _cameraCorrectionBuffer.edit<CameraCorrection>() = CameraCorrection();
@@ -498,9 +644,9 @@ protected:
     virtual GLShader* compileBackendProgram(const Shader& program, const Shader::CompilationHandler& handler);
     virtual GLShader* compileBackendShader(const Shader& shader, const Shader::CompilationHandler& handler);
     virtual std::string getBackendShaderHeader() const = 0;
-    // For a program, this will return a string containing all the source files (without any 
-    // backend headers or defines).  For a vertex, fragment or geometry shader, this will 
-    // return the fully customized shader with all the version and backend specific 
+    // For a program, this will return a string containing all the source files (without any
+    // backend headers or defines).  For a vertex, fragment or geometry shader, this will
+    // return the fully customized shader with all the version and backend specific
     // preprocessor directives
     // The program string returned can be used as a key for a cache of shader binaries
     // The shader strings can be reliably sent to the low level `compileShader` functions
@@ -516,22 +662,22 @@ protected:
     // Synchronize the state cache of this Backend with the actual real state of the GL Context
     void syncOutputStateCache();
     void resetOutputStage();
-    
+
     struct OutputStageState {
-        FramebufferPointer _framebuffer { nullptr };
-        GLuint _drawFBO { 0 };
+        FramebufferReference _framebuffer{ GPU_REFERENCE_INIT_VALUE };
+        GLuint _drawFBO{ 0 };
     } _output;
 
     void resetQueryStage();
     struct QueryStageState {
-        uint32_t _rangeQueryDepth { 0 };
+        uint32_t _rangeQueryDepth{ 0 };
     } _queryStage;
 
     void resetStages();
 
     // Stores cached binary versions of the shaders for quicker startup on subsequent runs
-    // Note that shaders in the cache can still fail to load due to hardware or driver 
-    // changes that invalidate the cached binary, in which case we fall back on compiling 
+    // Note that shaders in the cache can still fail to load due to hardware or driver
+    // changes that invalidate the cached binary, in which case we fall back on compiling
     // the source again
     struct ShaderBinaryCache {
         std::mutex _mutex;
@@ -543,7 +689,7 @@ protected:
     virtual void killShaderBinaryCache();
 
     struct TextureManagementStageState {
-        bool _sparseCapable { false };
+        bool _sparseCapable{ false };
         GLTextureTransferEnginePointer _transferEngine;
     } _textureManagement;
     virtual void initTextureManagementStage();
@@ -556,6 +702,6 @@ protected:
     friend class GLShader;
 };
 
-} }
+}}  // namespace gpu::gl
 
 #endif
diff --git a/libraries/gpu-gl-common/src/gpu/gl/GLBackendInput.cpp b/libraries/gpu-gl-common/src/gpu/gl/GLBackendInput.cpp
index ee094a2d2c..219efae866 100644
--- a/libraries/gpu-gl-common/src/gpu/gl/GLBackendInput.cpp
+++ b/libraries/gpu-gl-common/src/gpu/gl/GLBackendInput.cpp
@@ -18,8 +18,8 @@ using namespace gpu::gl;
 
 void GLBackend::do_setInputFormat(const Batch& batch, size_t paramOffset) {
     const auto& format = batch._streamFormats.get(batch._params[paramOffset]._uint);
-    if (format != _input._format) {
-        _input._format = format;
+    if (!compare(_input._format, format)) {
+        assign(_input._format, format);
         if (format) {
             auto inputFormat = GLInputFormat::sync((*format));
             assert(inputFormat);
@@ -42,8 +42,8 @@ void GLBackend::do_setInputBuffer(const Batch& batch, size_t paramOffset) {
 
     if (channel < getNumInputBuffers()) {
         bool isModified = false;
-        if (_input._buffers[channel] != buffer) {
-            _input._buffers[channel] = buffer;
+        if (!compare(_input._buffers[channel], buffer)) {
+            assign(_input._buffers[channel], buffer);
             _input._bufferVBOs[channel] = getBufferIDUnsynced((*buffer));
             isModified = true;
         }
@@ -94,18 +94,18 @@ void GLBackend::resetInputStage() {
     // Reset index buffer
     _input._indexBufferType = UINT32;
     _input._indexBufferOffset = 0;
-    _input._indexBuffer.reset();
+    reset(_input._indexBuffer);
     glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
     (void) CHECK_GL_ERROR();
 
     // Reset vertex buffer and format
-    _input._format.reset();
+    reset(_input._format);
     _input._formatKey.clear();
     _input._invalidFormat = false;
     _input._attributeActivation.reset();
 
     for (uint32_t i = 0; i < _input._buffers.size(); i++) {
-        _input._buffers[i].reset();
+        reset(_input._buffers[i]);
         _input._bufferOffsets[i] = 0;
         _input._bufferStrides[i] = 0;
         _input._bufferVBOs[i] = 0;
@@ -120,8 +120,8 @@ void GLBackend::do_setIndexBuffer(const Batch& batch, size_t paramOffset) {
     _input._indexBufferOffset = batch._params[paramOffset + 0]._uint;
 
     const auto& indexBuffer = batch._buffers.get(batch._params[paramOffset + 1]._uint);
-    if (indexBuffer != _input._indexBuffer) {
-        _input._indexBuffer = indexBuffer;
+    if (!compare(_input._indexBuffer, indexBuffer)) {
+        assign(_input._indexBuffer, indexBuffer);
         if (indexBuffer) {
             glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, getBufferIDUnsynced(*indexBuffer));
         } else {
@@ -137,8 +137,8 @@ void GLBackend::do_setIndirectBuffer(const Batch& batch, size_t paramOffset) {
     _input._indirectBufferStride = batch._params[paramOffset + 2]._uint;
 
     const auto& buffer = batch._buffers.get(batch._params[paramOffset]._uint);
-    if (buffer != _input._indirectBuffer) {
-        _input._indirectBuffer = buffer;
+    if (!compare(_input._indirectBuffer, buffer)) {
+        assign(_input._indirectBuffer, buffer);
         if (buffer) {
             glBindBuffer(GL_DRAW_INDIRECT_BUFFER, getBufferIDUnsynced(*buffer));
         } else {
@@ -152,7 +152,7 @@ void GLBackend::do_setIndirectBuffer(const Batch& batch, size_t paramOffset) {
 
 void GLBackend::updateInput() {
     bool isStereoNow = isStereo();
-    // track stereo state change potentially happening wihtout changing the input format
+    // track stereo state change potentially happening without changing the input format
     // this is a rare case requesting to invalid the format
 #ifdef GPU_STEREO_DRAWCALL_INSTANCED
     _input._invalidFormat |= (isStereoNow != _input._lastUpdateStereoState);
@@ -163,13 +163,14 @@ void GLBackend::updateInput() {
         InputStageState::ActivationCache newActivation;
 
         // Assign the vertex format required
-        if (_input._format) {
+        auto format = acquire(_input._format);
+        if (format) {
             bool hasColorAttribute{ false };
 
             _input._attribBindingBuffers.reset();
 
-            const Stream::Format::AttributeMap& attributes = _input._format->getAttributes();
-            auto& inputChannels = _input._format->getChannels();
+            const auto& attributes = format->getAttributes();
+            const auto& inputChannels = format->getChannels();
             for (auto& channelIt : inputChannels) {
                 auto bufferChannelNum = (channelIt).first;
                 const Stream::Format::ChannelMap::value_type::second_type& channel = (channelIt).second;
diff --git a/libraries/gpu-gl-common/src/gpu/gl/GLBackendOutput.cpp b/libraries/gpu-gl-common/src/gpu/gl/GLBackendOutput.cpp
index d1ab34da90..370e50592d 100644
--- a/libraries/gpu-gl-common/src/gpu/gl/GLBackendOutput.cpp
+++ b/libraries/gpu-gl-common/src/gpu/gl/GLBackendOutput.cpp
@@ -25,23 +25,19 @@ using namespace gpu::gl;
 void GLBackend::syncOutputStateCache() {
     GLint currentFBO;
     glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &currentFBO);
-
     _output._drawFBO = currentFBO;
-    _output._framebuffer.reset();
+    reset(_output._framebuffer);
 }
 
 void GLBackend::resetOutputStage() {
-    if (_output._framebuffer) {
-        _output._framebuffer.reset();
-        _output._drawFBO = 0;
-        glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
-    }
-
+    _output._drawFBO = 0;
+    glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
     glEnable(GL_FRAMEBUFFER_SRGB);
+    reset(_output._framebuffer);
 }
 
 void GLBackend::do_setFramebuffer(const Batch& batch, size_t paramOffset) {
-    auto framebuffer = batch._framebuffers.get(batch._params[paramOffset]._uint);
+    const auto& framebuffer = batch._framebuffers.get(batch._params[paramOffset]._uint);
     setFramebuffer(framebuffer);
 }
 
@@ -55,13 +51,13 @@ void GLBackend::do_setFramebufferSwapChain(const Batch& batch, size_t paramOffse
 }
 
 void GLBackend::setFramebuffer(const FramebufferPointer& framebuffer) {
-    if (_output._framebuffer != framebuffer) {
+    if (!compare(_output._framebuffer, framebuffer)) {
         auto newFBO = getFramebufferID(framebuffer);
         if (_output._drawFBO != newFBO) {
             _output._drawFBO = newFBO;
             glBindFramebuffer(GL_DRAW_FRAMEBUFFER, newFBO);
         }
-        _output._framebuffer = framebuffer;
+        assign(_output._framebuffer, framebuffer);
     }
 }
 
@@ -114,8 +110,9 @@ void GLBackend::do_clearFramebuffer(const Batch& batch, size_t paramOffset) {
     }
 
     std::vector<GLenum> drawBuffers;
+    auto framebuffer = acquire(_output._framebuffer);
     if (masks & Framebuffer::BUFFER_COLORS) {
-        if (_output._framebuffer) {
+        if (framebuffer) {
             for (unsigned int i = 0; i < Framebuffer::MAX_NUM_RENDER_BUFFERS; i++) {
                 if (masks & (1 << i)) {
                     drawBuffers.push_back(GL_COLOR_ATTACHMENT0 + i);
@@ -163,8 +160,8 @@ void GLBackend::do_clearFramebuffer(const Batch& batch, size_t paramOffset) {
     }
     
     // Restore the color draw buffers only if a frmaebuffer is bound
-    if (_output._framebuffer && !drawBuffers.empty()) {
-        auto glFramebuffer = syncGPUObject(*_output._framebuffer);
+    if (framebuffer && !drawBuffers.empty()) {
+        auto glFramebuffer = syncGPUObject(*framebuffer);
         if (glFramebuffer) {
             glDrawBuffers((GLsizei)glFramebuffer->_colorBuffers.size(), glFramebuffer->_colorBuffers.data());
         }
diff --git a/libraries/gpu-gl-common/src/gpu/gl/GLBackendPipeline.cpp b/libraries/gpu-gl-common/src/gpu/gl/GLBackendPipeline.cpp
index 7e54774df6..9b28fa8114 100644
--- a/libraries/gpu-gl-common/src/gpu/gl/GLBackendPipeline.cpp
+++ b/libraries/gpu-gl-common/src/gpu/gl/GLBackendPipeline.cpp
@@ -25,7 +25,7 @@ using namespace gpu::gl;
 void GLBackend::do_setPipeline(const Batch& batch, size_t paramOffset) {
     const auto& pipeline = batch._pipelines.get(batch._params[paramOffset + 0]._uint);
 
-    if (_pipeline._pipeline == pipeline) {
+    if (compare(_pipeline._pipeline, pipeline)) {
         return;
     }
 
@@ -34,7 +34,7 @@ void GLBackend::do_setPipeline(const Batch& batch, size_t paramOffset) {
 
     // null pipeline == reset
     if (!pipeline) {
-        _pipeline._pipeline.reset();
+        reset(_pipeline._pipeline);
 
         _pipeline._program = 0;
         _pipeline._cameraCorrection = false;
@@ -73,7 +73,7 @@ void GLBackend::do_setPipeline(const Batch& batch, size_t paramOffset) {
         }
 
         // Remember the new pipeline
-        _pipeline._pipeline = pipeline;
+        assign(_pipeline._pipeline, pipeline);
     }
 
     // THis should be done on Pipeline::update...
@@ -81,7 +81,7 @@ void GLBackend::do_setPipeline(const Batch& batch, size_t paramOffset) {
         glUseProgram(_pipeline._program);
         if (_pipeline._cameraCorrection) {
             // Invalidate uniform buffer cache slot
-            _uniform._buffers[gpu::slot::buffer::CameraCorrection] = {};
+            _uniform._buffers[gpu::slot::buffer::CameraCorrection].reset();
             auto& cameraCorrectionBuffer = _transform._viewCorrectionEnabled ?
                 _pipeline._cameraCorrectionBuffer._buffer : 
                 _pipeline._cameraCorrectionBufferIdentity._buffer;
@@ -112,7 +112,7 @@ void GLBackend::updatePipeline() {
             _pipeline._stateSignatureCache |= _pipeline._state->_signature;
 
             // And perform
-            for (auto command : _pipeline._state->_commands) {
+            for (const auto& command : _pipeline._state->_commands) {
                 command->run(this);
             }
         } else {
@@ -134,23 +134,21 @@ void GLBackend::resetPipelineStage() {
     _pipeline._invalidProgram = false;
     _pipeline._program = 0;
     _pipeline._programShader = nullptr;
-    _pipeline._pipeline.reset();
+    reset(_pipeline._pipeline);
     glUseProgram(0);
 }
 
-GLBackend::UniformStageState::BufferState::BufferState(const BufferPointer& buffer, GLintptr offset, GLsizeiptr size)
-  : buffer(buffer), offset(offset), size(size) {}
-
 void GLBackend::releaseUniformBuffer(uint32_t slot) {
-    auto& buf = _uniform._buffers[slot];
-    if (buf.buffer) {
-        auto* object = Backend::getGPUObject<GLBuffer>(*buf.buffer);
+    auto& bufferState = _uniform._buffers[slot];
+    auto buffer = acquire(bufferState.buffer);
+    if (buffer) {
+        auto* object = Backend::getGPUObject<GLBuffer>(*buffer);
         if (object) {
             glBindBufferBase(GL_UNIFORM_BUFFER, slot, 0);  // RELEASE
             (void)CHECK_GL_ERROR();
         }
-        buf = UniformStageState::BufferState();
     }
+    bufferState.reset();
 }
 
 void GLBackend::resetUniformStage() {
@@ -165,18 +163,20 @@ void GLBackend::bindUniformBuffer(uint32_t slot, const BufferPointer& buffer, GL
         return;
     }
 
-    UniformStageState::BufferState bufferState{ buffer, offset, size };
 
+    auto& currentBufferState = _uniform._buffers[slot];
     // check cache before thinking
-    if (_uniform._buffers[slot] == bufferState) {
+    if (currentBufferState.compare(buffer, offset, size)) {
         return;
     }
 
     // Grab the true gl Buffer object
     auto glBO = getBufferIDUnsynced(*buffer);
     if (glBO) {
-        glBindBufferRange(GL_UNIFORM_BUFFER, slot, glBO, bufferState.offset, bufferState.size);
-        _uniform._buffers[slot] = bufferState;
+        glBindBufferRange(GL_UNIFORM_BUFFER, slot, glBO, offset, size);
+        assign(currentBufferState.buffer, buffer);
+        currentBufferState.offset = offset;
+        currentBufferState.size = size;
         (void)CHECK_GL_ERROR();
     } else {
         releaseUniformBuffer(slot);
@@ -201,7 +201,7 @@ void GLBackend::do_setUniformBuffer(const Batch& batch, size_t paramOffset) {
 }
 
 void GLBackend::releaseResourceTexture(uint32_t slot) {
-    auto& tex = _resource._textures[slot];
+    auto tex = acquire(_resource._textures[slot]);
     if (tex) {
         auto* object = Backend::getGPUObject<GLTexture>(*tex);
         if (object) {
@@ -210,8 +210,8 @@ void GLBackend::releaseResourceTexture(uint32_t slot) {
             glBindTexture(target, 0);  // RELEASE
             (void)CHECK_GL_ERROR();
         }
-        tex.reset();
     }
+    reset(_resource._textures[slot]);
 }
 
 void GLBackend::resetResourceStage() {
@@ -232,14 +232,14 @@ void GLBackend::do_setResourceBuffer(const Batch& batch, size_t paramOffset) {
         return;
     }
 
-    auto resourceBuffer = batch._buffers.get(batch._params[paramOffset + 0]._uint);
+    const auto& resourceBuffer = batch._buffers.get(batch._params[paramOffset + 0]._uint);
 
     if (!resourceBuffer) {
         releaseResourceBuffer(slot);
         return;
     }
     // check cache before thinking
-    if (_resource._buffers[slot] == resourceBuffer) {
+    if (compare(_resource._buffers[slot], resourceBuffer)) {
         return;
     }
 
@@ -248,7 +248,7 @@ void GLBackend::do_setResourceBuffer(const Batch& batch, size_t paramOffset) {
 
     // If successful bind then cache it
     if (bindResourceBuffer(slot, resourceBuffer)) {
-        _resource._buffers[slot] = resourceBuffer;
+        assign(_resource._buffers[slot], resourceBuffer);
     } else {  // else clear slot and cache
         releaseResourceBuffer(slot);
         return;
@@ -293,14 +293,14 @@ void GLBackend::do_setResourceFramebufferSwapChainTexture(const Batch& batch, si
     }
     auto index = batch._params[paramOffset + 2]._uint;
     auto renderBufferSlot = batch._params[paramOffset + 3]._uint;
-    auto resourceFramebuffer = swapChain->get(index);
-    auto resourceTexture = resourceFramebuffer->getRenderBuffer(renderBufferSlot);
+    const auto& resourceFramebuffer = swapChain->get(index);
+    const auto& resourceTexture = resourceFramebuffer->getRenderBuffer(renderBufferSlot);
     setResourceTexture(slot, resourceTexture);
 }
 
 void GLBackend::setResourceTexture(unsigned int slot, const TexturePointer& resourceTexture) {
     // check cache before thinking
-    if (_resource._textures[slot] == resourceTexture) {
+    if (compare(_resource._textures[slot], resourceTexture)) {
         return;
     }
 
@@ -317,7 +317,7 @@ void GLBackend::setResourceTexture(unsigned int slot, const TexturePointer& reso
 
         (void)CHECK_GL_ERROR();
 
-        _resource._textures[slot] = resourceTexture;
+        assign(_resource._textures[slot], resourceTexture);
 
         _stats._RSAmountTextureMemoryBounded += (int)object->size();
 
@@ -343,7 +343,7 @@ void GLBackend::do_setResourceTextureTable(const Batch& batch, size_t paramOffse
 int GLBackend::ResourceStageState::findEmptyTextureSlot() const {
     // start from the end of the slots, try to find an empty one that can be used
     for (auto i = MAX_NUM_RESOURCE_TEXTURES - 1; i > 0; i--) {
-        if (!_textures[i]) {
+        if (!valid(_textures[i])) {
             return i;
         }
     }
diff --git a/libraries/gpu-gl-common/src/gpu/gl/GLPipeline.cpp b/libraries/gpu-gl-common/src/gpu/gl/GLPipeline.cpp
index 1b479dceb8..a099e6e66a 100644
--- a/libraries/gpu-gl-common/src/gpu/gl/GLPipeline.cpp
+++ b/libraries/gpu-gl-common/src/gpu/gl/GLPipeline.cpp
@@ -24,7 +24,7 @@ GLPipeline* GLPipeline::sync(GLBackend& backend, const Pipeline& pipeline) {
     }
 
     // No object allocated yet, let's see if it's worth it...
-    ShaderPointer shader = pipeline.getProgram();
+    const auto& shader = pipeline.getProgram();
 
     // If this pipeline's shader has already failed to compile, don't try again
     if (shader->compilationHasFailed()) {
@@ -37,7 +37,7 @@ GLPipeline* GLPipeline::sync(GLBackend& backend, const Pipeline& pipeline) {
         return nullptr;
     }
 
-    StatePointer state = pipeline.getState();
+    const auto& state = pipeline.getState();
     GLState* stateObject = GLState::sync(*state);
     if (stateObject == nullptr) {
         return nullptr;
diff --git a/libraries/gpu-gl/src/gpu/gl41/GL41Backend.h b/libraries/gpu-gl/src/gpu/gl41/GL41Backend.h
index f4078f5479..e5f7415107 100644
--- a/libraries/gpu-gl/src/gpu/gl41/GL41Backend.h
+++ b/libraries/gpu-gl/src/gpu/gl41/GL41Backend.h
@@ -161,7 +161,7 @@ protected:
     void updateTransform(const Batch& batch) override;
 
     // Resource Stage
-    bool bindResourceBuffer(uint32_t slot, BufferPointer& buffer) override;
+    bool bindResourceBuffer(uint32_t slot, const BufferPointer& buffer) override;
     void releaseResourceBuffer(uint32_t slot) override;
 
     // Output stage
diff --git a/libraries/gpu-gl/src/gpu/gl41/GL41BackendBuffer.cpp b/libraries/gpu-gl/src/gpu/gl41/GL41BackendBuffer.cpp
index ac5d5ee0c9..80fd214515 100644
--- a/libraries/gpu-gl/src/gpu/gl41/GL41BackendBuffer.cpp
+++ b/libraries/gpu-gl/src/gpu/gl41/GL41BackendBuffer.cpp
@@ -100,7 +100,7 @@ GLBuffer* GL41Backend::syncGPUObject(const Buffer& buffer) {
     return GL41Buffer::sync<GL41Buffer>(*this, buffer);
 }
 
-bool GL41Backend::bindResourceBuffer(uint32_t slot, BufferPointer& buffer) {
+bool GL41Backend::bindResourceBuffer(uint32_t slot, const BufferPointer& buffer) {
     GLuint texBuffer = GL41Backend::getResourceBufferID((*buffer));
     if (texBuffer) {
         glActiveTexture(GL_TEXTURE0 + GL41Backend::RESOURCE_BUFFER_SLOT0_TEX_UNIT + slot); 
@@ -108,7 +108,7 @@ bool GL41Backend::bindResourceBuffer(uint32_t slot, BufferPointer& buffer) {
 
         (void)CHECK_GL_ERROR();
 
-        _resource._buffers[slot] = buffer;
+        assign(_resource._buffers[slot], buffer);
 
         return true;
     }
@@ -117,10 +117,7 @@ bool GL41Backend::bindResourceBuffer(uint32_t slot, BufferPointer& buffer) {
 }
 
 void GL41Backend::releaseResourceBuffer(uint32_t slot) {
-    auto& buf = _resource._buffers[slot];
-    if (buf) {
-        glActiveTexture(GL_TEXTURE0 + GL41Backend::RESOURCE_BUFFER_SLOT0_TEX_UNIT + slot); 
-        glBindTexture(GL_TEXTURE_BUFFER, 0);
-        buf.reset();
-    }
+    reset(_resource._buffers[slot]);
+    glActiveTexture(GL_TEXTURE0 + GL41Backend::RESOURCE_BUFFER_SLOT0_TEX_UNIT + slot); 
+    glBindTexture(GL_TEXTURE_BUFFER, 0);
 }
diff --git a/libraries/gpu-gl/src/gpu/gl41/GL41BackendInput.cpp b/libraries/gpu-gl/src/gpu/gl41/GL41BackendInput.cpp
index c61ffb09e5..bd88be0f0d 100644
--- a/libraries/gpu-gl/src/gpu/gl41/GL41BackendInput.cpp
+++ b/libraries/gpu-gl/src/gpu/gl41/GL41BackendInput.cpp
@@ -35,14 +35,15 @@ void GL41Backend::updateInput() {
 
     if (_input._invalidFormat || _input._invalidBuffers.any()) {
 
+        auto format = acquire(_input._format);
         if (_input._invalidFormat) {
             InputStageState::ActivationCache newActivation;
 
             _stats._ISNumFormatChanges++;
 
             // Check expected activation
-            if (_input._format) {
-                for (auto& it : _input._format->getAttributes()) {
+            if (format) {
+                for (auto& it : format->getAttributes()) {
                     const Stream::Attribute& attrib = (it).second;
                     uint8_t locationCount = attrib._element.getLocationCount();
                     for (int i = 0; i < locationCount; ++i) {
@@ -69,15 +70,15 @@ void GL41Backend::updateInput() {
         }
 
         // now we need to bind the buffers and assign the attrib pointers
-        if (_input._format) {
+        if (format) {
             bool hasColorAttribute{ false };
 
-            const Buffers& buffers = _input._buffers;
+            const auto& buffers = _input._buffers;
             const Offsets& offsets = _input._bufferOffsets;
             const Offsets& strides = _input._bufferStrides;
 
-            const Stream::Format::AttributeMap& attributes = _input._format->getAttributes();
-            auto& inputChannels = _input._format->getChannels();
+            const auto& attributes = format->getAttributes();
+            const auto& inputChannels = format->getChannels();
             int numInvalids = (int)_input._invalidBuffers.count();
             _stats._ISNumInputBufferChanges += numInvalids;
             
diff --git a/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h b/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h
index a100faf432..30656b47c7 100644
--- a/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h
+++ b/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h
@@ -262,7 +262,7 @@ protected:
     void updateTransform(const Batch& batch) override;
 
     // Resource Stage
-    bool bindResourceBuffer(uint32_t slot, BufferPointer& buffer) override;
+    bool bindResourceBuffer(uint32_t slot, const BufferPointer& buffer) override;
     void releaseResourceBuffer(uint32_t slot) override;
 
     // Output stage
diff --git a/libraries/gpu-gl/src/gpu/gl45/GL45BackendBuffer.cpp b/libraries/gpu-gl/src/gpu/gl45/GL45BackendBuffer.cpp
index 6d17923ebd..da8f7059bf 100644
--- a/libraries/gpu-gl/src/gpu/gl45/GL45BackendBuffer.cpp
+++ b/libraries/gpu-gl/src/gpu/gl45/GL45BackendBuffer.cpp
@@ -60,14 +60,14 @@ GLBuffer* GL45Backend::syncGPUObject(const Buffer& buffer) {
 }
 
 
-bool GL45Backend::bindResourceBuffer(uint32_t slot, BufferPointer& buffer) {
+bool GL45Backend::bindResourceBuffer(uint32_t slot, const BufferPointer& buffer) {
     GLBuffer* object = syncGPUObject((*buffer));
     if (object) {
         glBindBufferBase(GL_SHADER_STORAGE_BUFFER, slot, object->_id);
 
         (void)CHECK_GL_ERROR();
 
-        _resource._buffers[slot] = buffer;
+        assign(_resource._buffers[slot], buffer);
 
         return true;
     }
@@ -76,10 +76,10 @@ bool GL45Backend::bindResourceBuffer(uint32_t slot, BufferPointer& buffer) {
 }
 
 void GL45Backend::releaseResourceBuffer(uint32_t slot) {
-    auto& buf = _resource._buffers[slot];
+    auto buf = acquire(_resource._buffers[slot]);
     if (buf) {
         glBindBufferBase(GL_SHADER_STORAGE_BUFFER, slot, 0);
-        buf.reset();
+        reset(_resource._buffers[slot]);
     }
 }
 
diff --git a/libraries/gpu-gl/src/gpu/gl45/GL45BackendInput.cpp b/libraries/gpu-gl/src/gpu/gl45/GL45BackendInput.cpp
index 7cd8756ead..5285e62d3e 100644
--- a/libraries/gpu-gl/src/gpu/gl45/GL45BackendInput.cpp
+++ b/libraries/gpu-gl/src/gpu/gl45/GL45BackendInput.cpp
@@ -28,7 +28,7 @@ void GL45Backend::resetInputStage() {
 
 void GL45Backend::updateInput() {
     bool isStereoNow = isStereo();
-    // track stereo state change potentially happening wihtout changing the input format
+    // track stereo state change potentially happening without changing the input format
     // this is a rare case requesting to invalid the format
 #ifdef GPU_STEREO_DRAWCALL_INSTANCED
     _input._invalidFormat |= (isStereoNow != _input._lastUpdateStereoState);
@@ -39,13 +39,14 @@ void GL45Backend::updateInput() {
         InputStageState::ActivationCache newActivation;
 
         // Assign the vertex format required
-        if (_input._format) {
+        auto format = acquire(_input._format);
+        if (format) {
             bool hasColorAttribute{ false };
 
             _input._attribBindingBuffers.reset();
 
-            const Stream::Format::AttributeMap& attributes = _input._format->getAttributes();
-            auto& inputChannels = _input._format->getChannels();
+            const auto& attributes = format->getAttributes();
+            const auto& inputChannels = format->getChannels();
             for (auto& channelIt : inputChannels) {
                 auto bufferChannelNum = (channelIt).first;
                 const Stream::Format::ChannelMap::value_type::second_type& channel = (channelIt).second;
diff --git a/libraries/gpu-gles/src/gpu/gles/GLESBackend.h b/libraries/gpu-gles/src/gpu/gles/GLESBackend.h
index c757de0a72..56ae41da31 100644
--- a/libraries/gpu-gles/src/gpu/gles/GLESBackend.h
+++ b/libraries/gpu-gles/src/gpu/gles/GLESBackend.h
@@ -157,7 +157,7 @@ protected:
     void updateTransform(const Batch& batch) override;
 
     // Resource Stage
-    bool bindResourceBuffer(uint32_t slot, BufferPointer& buffer) override;
+    bool bindResourceBuffer(uint32_t slot, const BufferPointer& buffer) override;
     void releaseResourceBuffer(uint32_t slot) override;
 
     // Output stage
diff --git a/libraries/gpu-gles/src/gpu/gles/GLESBackendBuffer.cpp b/libraries/gpu-gles/src/gpu/gles/GLESBackendBuffer.cpp
index 7dd08df409..04f8628db3 100644
--- a/libraries/gpu-gles/src/gpu/gles/GLESBackendBuffer.cpp
+++ b/libraries/gpu-gles/src/gpu/gles/GLESBackendBuffer.cpp
@@ -72,14 +72,14 @@ GLBuffer* GLESBackend::syncGPUObject(const Buffer& buffer) {
     return GLESBuffer::sync<GLESBuffer>(*this, buffer);
 }
 
-bool GLESBackend::bindResourceBuffer(uint32_t slot, BufferPointer& buffer) {
+bool GLESBackend::bindResourceBuffer(uint32_t slot, const BufferPointer& buffer) {
     GLBuffer* object = syncGPUObject((*buffer));
     if (object) {
         glBindBufferBase(GL_SHADER_STORAGE_BUFFER, slot, object->_id);
 
         (void)CHECK_GL_ERROR();
 
-        _resource._buffers[slot] = buffer;
+        assign(_resource._buffers[slot], buffer);
 
         return true;
     }
@@ -91,7 +91,7 @@ void GLESBackend::releaseResourceBuffer(uint32_t slot) {
     auto& buf = _resource._buffers[slot];
     if (buf) {
         glBindBufferBase(GL_SHADER_STORAGE_BUFFER, slot, 0);
-        buf.reset();
+        reset(buf);
     }
 }
 
diff --git a/libraries/gpu/src/gpu/Batch.cpp b/libraries/gpu/src/gpu/Batch.cpp
index b6714e2f1a..745f1d1845 100644
--- a/libraries/gpu/src/gpu/Batch.cpp
+++ b/libraries/gpu/src/gpu/Batch.cpp
@@ -498,7 +498,7 @@ void Batch::setupNamedCalls(const std::string& instanceName, NamedBatchData::Fun
     captureNamedDrawCallInfo(instanceName);
 }
 
-BufferPointer Batch::getNamedBuffer(const std::string& instanceName, uint8_t index) {
+const BufferPointer& Batch::getNamedBuffer(const std::string& instanceName, uint8_t index) {
     NamedBatchData& instance = _namedData[instanceName];
     if (instance.buffers.size() <= index) {
         instance.buffers.resize(index + 1);
diff --git a/libraries/gpu/src/gpu/Batch.h b/libraries/gpu/src/gpu/Batch.h
index b49d14e5a1..8e607a189e 100644
--- a/libraries/gpu/src/gpu/Batch.h
+++ b/libraries/gpu/src/gpu/Batch.h
@@ -119,7 +119,7 @@ public:
     void multiDrawIndexedIndirect(uint32 numCommands, Primitive primitiveType);
 
     void setupNamedCalls(const std::string& instanceName, NamedBatchData::Function function);
-    BufferPointer getNamedBuffer(const std::string& instanceName, uint8_t index = 0);
+    const BufferPointer& getNamedBuffer(const std::string& instanceName, uint8_t index = 0);
 
     // Input Stage
     // InputFormat
diff --git a/libraries/gpu/src/gpu/Framebuffer.cpp b/libraries/gpu/src/gpu/Framebuffer.cpp
index 8bb9be4a76..e88d986da6 100755
--- a/libraries/gpu/src/gpu/Framebuffer.cpp
+++ b/libraries/gpu/src/gpu/Framebuffer.cpp
@@ -203,11 +203,12 @@ uint32 Framebuffer::getNumRenderBuffers() const {
     return nb;
 }
 
-TexturePointer Framebuffer::getRenderBuffer(uint32 slot) const {
+const TexturePointer& Framebuffer::getRenderBuffer(uint32 slot) const {
+    static const TexturePointer EMPTY;
     if (!isSwapchain() && (slot < getMaxNumRenderBuffers())) {
         return _renderBuffers[slot]._texture;
     } else {
-        return TexturePointer();
+        return EMPTY;
     }
 
 }
@@ -297,9 +298,10 @@ bool Framebuffer::setDepthStencilBuffer(const TexturePointer& texture, const For
     return false;
 }
 
-TexturePointer Framebuffer::getDepthStencilBuffer() const {
+const TexturePointer& Framebuffer::getDepthStencilBuffer() const {
+    static const TexturePointer EMPTY;
     if (isSwapchain()) {
-        return TexturePointer();
+        return EMPTY;
     } else {
         return _depthStencilBuffer._texture;
     }
diff --git a/libraries/gpu/src/gpu/Framebuffer.h b/libraries/gpu/src/gpu/Framebuffer.h
index fbbec50a28..44e945883f 100755
--- a/libraries/gpu/src/gpu/Framebuffer.h
+++ b/libraries/gpu/src/gpu/Framebuffer.h
@@ -95,7 +95,7 @@ public:
     static Framebuffer* createShadowmap(uint16 width);
 
     bool isSwapchain() const;
-    SwapchainPointer getSwapchain() const { return _swapchain; }
+    const SwapchainPointer& getSwapchain() const { return _swapchain; }
 
     uint32 getFrameCount() const;
 
@@ -105,13 +105,13 @@ public:
     const TextureViews& getRenderBuffers() const { return _renderBuffers; }
 
     int32 setRenderBuffer(uint32 slot, const TexturePointer& texture, uint32 subresource = 0);
-    TexturePointer getRenderBuffer(uint32 slot) const;
+    const TexturePointer& getRenderBuffer(uint32 slot) const;
     uint32 getRenderBufferSubresource(uint32 slot) const;
 
     bool setDepthBuffer(const TexturePointer& texture, const Format& format, uint32 subresource = 0);
     bool setStencilBuffer(const TexturePointer& texture, const Format& format, uint32 subresource = 0);
     bool setDepthStencilBuffer(const TexturePointer& texture, const Format& format, uint32 subresource = 0);
-    TexturePointer getDepthStencilBuffer() const;
+    const TexturePointer& getDepthStencilBuffer() const;
     uint32 getDepthStencilBufferSubresource() const;
     Format getDepthStencilBufferFormat() const;