Faster texture transfers

2025-08-07 07:36:59 +02:00 · 2018-05-10 12:08:20 -07:00 · 2018-05-10 12:08:20 -07:00 · fb81cf927a
commit fb81cf927a
parent 40ff3f966f
15 changed files with 866 additions and 816 deletions
--- a/libraries/gpu-gl-common/src/gpu/gl/GLBackend.cpp
+++ b/libraries/gpu-gl-common/src/gpu/gl/GLBackend.cpp
@ -44,9 +44,9 @@ GLBackend::CommandCall GLBackend::_commandCalls[Batch::NUM_COMMANDS] =

    (&::gpu::gl::GLBackend::do_setModelTransform),
    (&::gpu::gl::GLBackend::do_setViewTransform),
-	(&::gpu::gl::GLBackend::do_setProjectionTransform),
-	(&::gpu::gl::GLBackend::do_setProjectionJitter),
-	(&::gpu::gl::GLBackend::do_setViewportTransform),
+    (&::gpu::gl::GLBackend::do_setProjectionTransform),
+    (&::gpu::gl::GLBackend::do_setProjectionJitter),
+    (&::gpu::gl::GLBackend::do_setViewportTransform),
    (&::gpu::gl::GLBackend::do_setDepthRangeTransform),

    (&::gpu::gl::GLBackend::do_setPipeline),
@ -118,12 +118,6 @@ void GLBackend::init() {
 #if !defined(USE_GLES)
        qCDebug(gpugllogging, "V-Sync is %s\n", (::gl::getSwapInterval() > 0 ? "ON" : "OFF"));
 #endif
-#if THREADED_TEXTURE_BUFFERING
-        // This has to happen on the main thread in order to give the thread 
-        // pool a reasonable parent object
-        GLVariableAllocationSupport::TransferJob::startBufferingThread();
-#endif
-
    });
 }

@ -136,6 +130,7 @@ GLBackend::GLBackend() {
 GLBackend::~GLBackend() {
    killInput();
    killTransform();
+    killTextureManagementStage();
 }

 void GLBackend::renderPassTransfer(const Batch& batch) {
@ -167,18 +162,18 @@ void GLBackend::renderPassTransfer(const Batch& batch) {
                case Batch::COMMAND_drawIndexedInstanced:
                case Batch::COMMAND_multiDrawIndirect:
                case Batch::COMMAND_multiDrawIndexedIndirect:
-				{
-					Vec2u outputSize{ 1,1 };
+                {
+                    Vec2u outputSize{ 1,1 };

-					if (_output._framebuffer) {
-						outputSize.x = _output._framebuffer->getWidth();
-						outputSize.y = _output._framebuffer->getHeight();
-					} else if (glm::dot(_transform._projectionJitter, _transform._projectionJitter)>0.0f) {
-						qCWarning(gpugllogging) << "Jittering needs to have a frame buffer to be set";
-					}
+                    if (_output._framebuffer) {
+                        outputSize.x = _output._framebuffer->getWidth();
+                        outputSize.y = _output._framebuffer->getHeight();
+                    } else if (glm::dot(_transform._projectionJitter, _transform._projectionJitter)>0.0f) {
+                        qCWarning(gpugllogging) << "Jittering needs to have a frame buffer to be set";
+                    }

-					_transform.preUpdate(_commandIndex, _stereo, outputSize);
-				}
+                    _transform.preUpdate(_commandIndex, _stereo, outputSize);
+                }
                    break;

                case Batch::COMMAND_disableContextStereo:
@ -191,10 +186,10 @@ void GLBackend::renderPassTransfer(const Batch& batch) {

                case Batch::COMMAND_setViewportTransform:
                case Batch::COMMAND_setViewTransform:
-				case Batch::COMMAND_setProjectionTransform:
-				case Batch::COMMAND_setProjectionJitter:
-				{
-					CommandCall call = _commandCalls[(*command)];
+                case Batch::COMMAND_setProjectionTransform:
+                case Batch::COMMAND_setProjectionJitter:
+                {
+                    CommandCall call = _commandCalls[(*command)];
                    (this->*(call))(batch, *offset);
                    break;
                }
@ -268,8 +263,8 @@ void GLBackend::render(const Batch& batch) {
    if (!batch.isStereoEnabled()) {
        _stereo._enable = false;
    }
-	// Reset jitter
-	_transform._projectionJitter = Vec2(0.0f, 0.0f);
+    // Reset jitter
+    _transform._projectionJitter = Vec2(0.0f, 0.0f);
    
    {
        PROFILE_RANGE(render_gpu_gl_detail, "Transfer");
@ -729,9 +724,8 @@ void GLBackend::recycle() const {
            glDeleteQueries((GLsizei)ids.size(), ids.data());
        }
    }
-
-    GLVariableAllocationSupport::manageMemory();
-    GLVariableAllocationSupport::_frameTexturesCreated = 0;
+    
+    _textureManagement._transferEngine->manageMemory();
    Texture::KtxStorage::releaseOpenKtxFiles();
 }

--- a/libraries/gpu-gl-common/src/gpu/gl/GLBackend.h
+++ b/libraries/gpu-gl-common/src/gpu/gl/GLBackend.h
@ -491,8 +491,10 @@ protected:

    struct TextureManagementStageState {
        bool _sparseCapable { false };
+        GLTextureTransferEnginePointer _transferEngine;
    } _textureManagement;
-    virtual void initTextureManagementStage() {}
+    virtual void initTextureManagementStage();
+    virtual void killTextureManagementStage();

    typedef void (GLBackend::*CommandCall)(const Batch&, size_t);
    static CommandCall _commandCalls[Batch::NUM_COMMANDS];
--- a/libraries/gpu-gl-common/src/gpu/gl/GLShared.h
+++ b/libraries/gpu-gl-common/src/gpu/gl/GLShared.h
@ -137,6 +137,8 @@ class GLQuery;
 class GLState;
 class GLShader;
 class GLTexture;
+class GLTextureTransferEngine;
+using GLTextureTransferEnginePointer = std::shared_ptr<GLTextureTransferEngine>;
 struct ShaderObject;

 } } // namespace gpu::gl 
--- a/libraries/gpu-gl-common/src/gpu/gl/GLTexture.cpp
+++ b/libraries/gpu-gl-common/src/gpu/gl/GLTexture.cpp
@ -48,6 +48,14 @@ const GLFilterMode GLTexture::FILTER_MODES[Sampler::NUM_FILTERS] = {
    { GL_LINEAR_MIPMAP_LINEAR, GL_LINEAR }  //FILTER_ANISOTROPIC,
 };

+static constexpr size_t MAX_PIXEL_BYTE_SIZE{ 4 };
+static constexpr size_t MAX_TRANSFER_DIMENSION{ 1024 };
+
+const uvec3 GLVariableAllocationSupport::MAX_TRANSFER_DIMENSIONS{ MAX_TRANSFER_DIMENSION, MAX_TRANSFER_DIMENSION, 1 };
+const uvec3 GLVariableAllocationSupport::INITIAL_MIP_TRANSFER_DIMENSIONS{ 64, 64, 1 };
+const size_t GLVariableAllocationSupport::MAX_TRANSFER_SIZE = MAX_TRANSFER_DIMENSION * MAX_TRANSFER_DIMENSION * MAX_PIXEL_BYTE_SIZE;
+const size_t GLVariableAllocationSupport::MAX_BUFFER_SIZE = MAX_TRANSFER_SIZE;
+
 GLenum GLTexture::getGLTextureType(const Texture& texture) {
    switch (texture.getType()) {
    case Texture::TEX_2D:
@ -131,7 +139,6 @@ Size GLTexture::copyMipFaceFromTexture(uint16_t sourceMip, uint16_t targetMip, u
    return 0;
 }

-
 GLExternalTexture::GLExternalTexture(const std::weak_ptr<GLBackend>& backend, const Texture& texture, GLuint id) 
    : Parent(backend, texture, id) {
    Backend::textureExternalCount.increment();
@ -151,65 +158,58 @@ GLExternalTexture::~GLExternalTexture() {
    Backend::textureExternalCount.decrement();
 }

-
-// Variable sized textures
-using MemoryPressureState = GLVariableAllocationSupport::MemoryPressureState;
-using WorkQueue = GLVariableAllocationSupport::WorkQueue;
-using TransferJobPointer = GLVariableAllocationSupport::TransferJobPointer;
-
-std::list<TextureWeakPointer> GLVariableAllocationSupport::_memoryManagedTextures;
-MemoryPressureState GLVariableAllocationSupport::_memoryPressureState { MemoryPressureState::Idle };
-std::atomic<bool> GLVariableAllocationSupport::_memoryPressureStateStale { false };
-const uvec3 GLVariableAllocationSupport::INITIAL_MIP_TRANSFER_DIMENSIONS { 64, 64, 1 };
-WorkQueue GLVariableAllocationSupport::_transferQueue;
-WorkQueue GLVariableAllocationSupport::_promoteQueue;
-WorkQueue GLVariableAllocationSupport::_demoteQueue;
-size_t GLVariableAllocationSupport::_frameTexturesCreated { 0 };
-
-#define OVERSUBSCRIBED_PRESSURE_VALUE 0.95f
-#define UNDERSUBSCRIBED_PRESSURE_VALUE 0.85f
-#define DEFAULT_ALLOWED_TEXTURE_MEMORY_MB ((size_t)1024)
-
-static const size_t DEFAULT_ALLOWED_TEXTURE_MEMORY = MB_TO_BYTES(DEFAULT_ALLOWED_TEXTURE_MEMORY_MB);
-
-using TransferJob = GLVariableAllocationSupport::TransferJob;
-
-const uvec3 GLVariableAllocationSupport::MAX_TRANSFER_DIMENSIONS { 1024, 1024, 1 };
-const size_t GLVariableAllocationSupport::MAX_TRANSFER_SIZE = GLVariableAllocationSupport::MAX_TRANSFER_DIMENSIONS.x * GLVariableAllocationSupport::MAX_TRANSFER_DIMENSIONS.y * 4;
-
-#if THREADED_TEXTURE_BUFFERING
-
-TexturePointer GLVariableAllocationSupport::_currentTransferTexture;
-TransferJobPointer GLVariableAllocationSupport::_currentTransferJob;
-QThreadPool* TransferJob::_bufferThreadPool { nullptr };
-
-void TransferJob::startBufferingThread() {
-    static std::once_flag once;
-    std::call_once(once, [&] {
-        _bufferThreadPool = new QThreadPool(qApp);
-        _bufferThreadPool->setMaxThreadCount(1);
-    });
+GLVariableAllocationSupport::GLVariableAllocationSupport() {
 }

-#endif
+GLVariableAllocationSupport::~GLVariableAllocationSupport() {
+}

-TransferJob::TransferJob(const GLTexture& parent, uint16_t sourceMip, uint16_t targetMip, uint8_t face, uint32_t lines, uint32_t lineOffset)
-    : _parent(parent) {
+void GLVariableAllocationSupport::incrementPopulatedSize(Size delta) const {
+    _populatedSize += delta;
+    // Keep the 2 code paths to be able to debug
+    if (_size < _populatedSize) {
+        Backend::textureResourcePopulatedGPUMemSize.update(0, delta);
+    } else  {
+        Backend::textureResourcePopulatedGPUMemSize.update(0, delta);
+    }
+}

-    auto transferDimensions = _parent._gpuObject.evalMipDimensions(sourceMip);
+void GLVariableAllocationSupport::decrementPopulatedSize(Size delta) const {
+    _populatedSize -= delta;
+    // Keep the 2 code paths to be able to debug
+    if (_size < _populatedSize) {
+        Backend::textureResourcePopulatedGPUMemSize.update(delta, 0);
+    } else  {
+        Backend::textureResourcePopulatedGPUMemSize.update(delta, 0);
+    }
+}
+
+void GLVariableAllocationSupport::sanityCheck() const {
+    if (_populatedMip < _allocatedMip) {
+        qCWarning(gpugllogging) << "Invalid mip levels";
+    }
+}
+
+TransferJob::TransferJob(const Texture& texture,
+    uint16_t sourceMip,
+    uint16_t targetMip,
+    uint8_t face,
+    uint32_t lines,
+    uint32_t lineOffset) {
+    auto transferDimensions = texture.evalMipDimensions(sourceMip);
    GLenum format;
    GLenum internalFormat;
    GLenum type;
-    GLTexelFormat texelFormat = GLTexelFormat::evalGLTexelFormat(_parent._gpuObject.getTexelFormat(), _parent._gpuObject.getStoredMipFormat());
+    GLTexelFormat texelFormat = GLTexelFormat::evalGLTexelFormat(texture.getTexelFormat(), texture.getStoredMipFormat());
    format = texelFormat.format;
    internalFormat = texelFormat.internalFormat;
    type = texelFormat.type;
-    _transferSize = _parent._gpuObject.getStoredMipFaceSize(sourceMip, face);
+    _transferSize = texture.getStoredMipFaceSize(sourceMip, face);

    // If we're copying a subsection of the mip, do additional calculations to find the size and offset of the segment
    if (0 != lines) {
        transferDimensions.y = lines;
-        auto dimensions = _parent._gpuObject.evalMipDimensions(sourceMip);
+        auto dimensions = texture.evalMipDimensions(sourceMip);
        auto bytesPerLine = (uint32_t)_transferSize / dimensions.y;
        _transferOffset = bytesPerLine * lineOffset;
        _transferSize = bytesPerLine * lines;
@ -222,481 +222,34 @@ TransferJob::TransferJob(const GLTexture& parent, uint16_t sourceMip, uint16_t t
    }

    // Buffering can invoke disk IO, so it should be off of the main and render threads
-    _bufferingLambda = [=] {
-        auto mipStorage = _parent._gpuObject.accessStoredMipFace(sourceMip, face);
+    _bufferingLambda = [=](const TexturePointer& texture) {
+        auto mipStorage = texture->accessStoredMipFace(sourceMip, face);
        if (mipStorage) {
            _mipData = mipStorage->createView(_transferSize, _transferOffset);
        } else {
-            qCWarning(gpugllogging) << "Buffering failed because mip could not be retrieved from texture " << _parent._source.c_str() ;
+            qCWarning(gpugllogging) << "Buffering failed because mip could not be retrieved from texture "
+                << texture->source().c_str();
        }
    };

-    _transferLambda = [=] {
+    _transferLambda = [=](const TexturePointer& texture) {
        if (_mipData) {
-            _parent.copyMipFaceLinesFromTexture(targetMip, face, transferDimensions, lineOffset, internalFormat, format, type, _mipData->size(), _mipData->readData());
+            auto gltexture = Backend::getGPUObject<GLTexture>(*texture);
+            ;
+            gltexture->copyMipFaceLinesFromTexture(targetMip, face, transferDimensions, lineOffset, internalFormat, format,
+                type, _mipData->size(), _mipData->readData());
            _mipData.reset();
        } else {
-            qCWarning(gpugllogging) << "Transfer failed because mip could not be retrieved from texture " << _parent._source.c_str();
+            qCWarning(gpugllogging) << "Transfer failed because mip could not be retrieved from texture "
+                << texture->source().c_str();
        }
    };
 }

-TransferJob::TransferJob(const GLTexture& parent, std::function<void()> transferLambda)
-    : _parent(parent), _bufferingRequired(false), _transferLambda(transferLambda) {
-}
+TransferJob::TransferJob(const std::function<void()>& transferLambda) :
+    _bufferingRequired(false), _transferLambda([=](const TexturePointer&) { transferLambda(); }) {}

 TransferJob::~TransferJob() {
    Backend::texturePendingGPUTransferMemSize.update(_transferSize, 0);
 }

-bool TransferJob::tryTransfer() {
-#if THREADED_TEXTURE_BUFFERING
-    // Are we ready to transfer
-    if (!bufferingCompleted()) {
-        startBuffering();
-        return false;
-    }
-#else
-    if (_bufferingRequired) {
-        _bufferingLambda();
-    }
-#endif
-    _transferLambda();
-    return true;
-}
-
-#if THREADED_TEXTURE_BUFFERING
-bool TransferJob::bufferingRequired() const {
-    if (!_bufferingRequired) {
-        return false;
-    }
-
-    // The default state of a QFuture is with status Canceled | Started  | Finished, 
-    // so we have to check isCancelled before we check the actual state
-    if (_bufferingStatus.isCanceled()) {
-        return true;
-    }
-
-    return !_bufferingStatus.isStarted();
-}
-
-bool TransferJob::bufferingCompleted() const {
-    if (!_bufferingRequired) {
-        return true;
-    }
-
-    // The default state of a QFuture is with status Canceled | Started  | Finished, 
-    // so we have to check isCancelled before we check the actual state
-    if (_bufferingStatus.isCanceled()) {
-        return false;
-    }
-
-    return _bufferingStatus.isFinished();
-}
-
-void TransferJob::startBuffering() {
-    if (bufferingRequired()) {
-        assert(_bufferingStatus.isCanceled());
-        _bufferingStatus = QtConcurrent::run(_bufferThreadPool, [=] {
-            _bufferingLambda();
-        });
-        assert(!_bufferingStatus.isCanceled());
-        assert(_bufferingStatus.isStarted());
-    }
-}
-#endif
-
-GLVariableAllocationSupport::GLVariableAllocationSupport() {
-    _memoryPressureStateStale = true;
-}
-
-GLVariableAllocationSupport::~GLVariableAllocationSupport() {
-    _memoryPressureStateStale = true;
-}
-
-void GLVariableAllocationSupport::addMemoryManagedTexture(const TexturePointer& texturePointer) {
-    _memoryManagedTextures.push_back(texturePointer);
-    if (MemoryPressureState::Idle != _memoryPressureState) {
-        addToWorkQueue(texturePointer);
-    }
-}
-
-void GLVariableAllocationSupport::addToWorkQueue(const TexturePointer& texturePointer) {
-    GLTexture* gltexture = Backend::getGPUObject<GLTexture>(*texturePointer);
-    GLVariableAllocationSupport* vargltexture = dynamic_cast<GLVariableAllocationSupport*>(gltexture);
-    switch (_memoryPressureState) {
-        case MemoryPressureState::Oversubscribed:
-            if (vargltexture->canDemote()) {
-                // Demote largest first
-                _demoteQueue.push({ texturePointer, (float)gltexture->size() });
-            }
-            break;
-
-        case MemoryPressureState::Undersubscribed:
-            if (vargltexture->canPromote()) {
-                // Promote smallest first
-                _promoteQueue.push({ texturePointer, 1.0f / (float)gltexture->size() });
-            }
-            break;
-
-        case MemoryPressureState::Transfer:
-            if (vargltexture->hasPendingTransfers()) {
-                // Transfer priority given to smaller mips first
-                _transferQueue.push({ texturePointer, 1.0f / (float)gltexture->_gpuObject.evalMipSize(vargltexture->_populatedMip) });
-            }
-            break;
-
-        case MemoryPressureState::Idle:
-            Q_UNREACHABLE();
-            break;
-    }
-}
-
-WorkQueue& GLVariableAllocationSupport::getActiveWorkQueue() {
-    static WorkQueue empty;
-    switch (_memoryPressureState) {
-        case MemoryPressureState::Oversubscribed:
-            return _demoteQueue;
-
-        case MemoryPressureState::Undersubscribed:
-            return _promoteQueue;
-
-        case MemoryPressureState::Transfer:
-            return _transferQueue;
-
-        case MemoryPressureState::Idle:
-            Q_UNREACHABLE();
-            break;
-    }
-    return empty;
-}
-
-// FIXME hack for stats display
-QString getTextureMemoryPressureModeString() {
-    switch (GLVariableAllocationSupport::_memoryPressureState) {
-        case MemoryPressureState::Oversubscribed:
-            return "Oversubscribed";
-
-        case MemoryPressureState::Undersubscribed:
-            return "Undersubscribed";
-
-        case MemoryPressureState::Transfer:
-            return "Transfer";
-
-        case MemoryPressureState::Idle:
-            return "Idle";
-    }
-    Q_UNREACHABLE();
-    return "Unknown";
-}
-
-void GLVariableAllocationSupport::updateMemoryPressure() {
-    static size_t lastAllowedMemoryAllocation = gpu::Texture::getAllowedGPUMemoryUsage();
-
-    size_t allowedMemoryAllocation = gpu::Texture::getAllowedGPUMemoryUsage();
-    if (0 == allowedMemoryAllocation) {
-        allowedMemoryAllocation = DEFAULT_ALLOWED_TEXTURE_MEMORY;
-    }
-
-    // If the user explicitly changed the allowed memory usage, we need to mark ourselves stale 
-    // so that we react
-    if (allowedMemoryAllocation != lastAllowedMemoryAllocation) {
-        _memoryPressureStateStale = true;
-        lastAllowedMemoryAllocation = allowedMemoryAllocation;
-    }
-
-    if (!_memoryPressureStateStale.exchange(false)) {
-        return;
-    }
-
-    PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
-
-    // Clear any defunct textures (weak pointers that no longer have a valid texture)
-    _memoryManagedTextures.remove_if([&](const TextureWeakPointer& weakPointer) {
-        return weakPointer.expired();
-    });
-
-    // Convert weak pointers to strong.  This new list may still contain nulls if a texture was 
-    // deleted on another thread between the previous line and this one
-    std::vector<TexturePointer> strongTextures; {
-        strongTextures.reserve(_memoryManagedTextures.size());
-        std::transform(
-            _memoryManagedTextures.begin(), _memoryManagedTextures.end(),
-            std::back_inserter(strongTextures),
-            [](const TextureWeakPointer& p) { return p.lock(); });
-    }
-
-    size_t totalVariableMemoryAllocation = 0;
-    size_t idealMemoryAllocation = 0;
-    bool canDemote = false;
-    bool canPromote = false;
-    bool hasTransfers = false;
-    for (const auto& texture : strongTextures) {
-        // Race conditions can still leave nulls in the list, so we need to check
-        if (!texture) {
-            continue;
-        }
-        GLTexture* gltexture = Backend::getGPUObject<GLTexture>(*texture);
-        GLVariableAllocationSupport* vartexture = dynamic_cast<GLVariableAllocationSupport*>(gltexture);
-        // Track how much the texture thinks it should be using
-        idealMemoryAllocation += texture->evalTotalSize();
-        // Track how much we're actually using
-        totalVariableMemoryAllocation += gltexture->size();
-        canDemote |= vartexture->canDemote();
-        canPromote |= vartexture->canPromote();
-        hasTransfers |= vartexture->hasPendingTransfers();
-    }
-
-    size_t unallocated = idealMemoryAllocation - totalVariableMemoryAllocation;
-    float pressure = (float)totalVariableMemoryAllocation / (float)allowedMemoryAllocation;
-
-    auto newState = MemoryPressureState::Idle;
-    if (pressure < UNDERSUBSCRIBED_PRESSURE_VALUE && (unallocated != 0 && canPromote)) {
-        newState = MemoryPressureState::Undersubscribed;
-    } else if (pressure > OVERSUBSCRIBED_PRESSURE_VALUE && canDemote) {
-        newState = MemoryPressureState::Oversubscribed;
-    } else if (hasTransfers) {
-        newState = MemoryPressureState::Transfer;
-    }
-
-    if (newState != _memoryPressureState) {
-        _memoryPressureState = newState;
-        // Clear the existing queue
-        _transferQueue = WorkQueue();
-        _promoteQueue = WorkQueue();
-        _demoteQueue = WorkQueue();
-
-        // Populate the existing textures into the queue
-        if (_memoryPressureState != MemoryPressureState::Idle) {
-            for (const auto& texture : strongTextures) {
-                // Race conditions can still leave nulls in the list, so we need to check
-                if (!texture) {
-                    continue;
-                }
-                addToWorkQueue(texture);
-            }
-        }
-    }
-}
-
-TexturePointer GLVariableAllocationSupport::getNextWorkQueueItem(WorkQueue& workQueue) {
-    while (!workQueue.empty()) {
-        auto workTarget = workQueue.top();
-
-        auto texture = workTarget.first.lock();
-        if (!texture) {
-            workQueue.pop();
-            continue;
-        }
-
-        // Check whether the resulting texture can actually have work performed
-        GLTexture* gltexture = Backend::getGPUObject<GLTexture>(*texture);
-        GLVariableAllocationSupport* vartexture = dynamic_cast<GLVariableAllocationSupport*>(gltexture);
-        switch (_memoryPressureState) {
-            case MemoryPressureState::Oversubscribed:
-                if (vartexture->canDemote()) {
-                    return texture;
-                }
-                break;
-
-            case MemoryPressureState::Undersubscribed:
-                if (vartexture->canPromote()) {
-                    return texture;
-                }
-                break;
-
-            case MemoryPressureState::Transfer:
-                if (vartexture->hasPendingTransfers()) {
-                    return texture;
-                }
-                break;
-
-            case MemoryPressureState::Idle:
-                Q_UNREACHABLE();
-                break;
-        }
-
-        // If we got here, then the texture has no work to do in the current state, 
-        // so pop it off the queue and continue
-        workQueue.pop();
-    }
-
-    return TexturePointer();
-}
-
-void GLVariableAllocationSupport::processWorkQueue(WorkQueue& workQueue) {
-    if (workQueue.empty()) {
-        return;
-    }
-
-    // Get the front of the work queue to perform work
-    auto texture = getNextWorkQueueItem(workQueue);
-    if (!texture) {
-        return;
-    }
-
-    // Grab the first item off the demote queue
-    PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
-
-    GLTexture* gltexture = Backend::getGPUObject<GLTexture>(*texture);
-    GLVariableAllocationSupport* vartexture = dynamic_cast<GLVariableAllocationSupport*>(gltexture);
-    switch (_memoryPressureState) {
-        case MemoryPressureState::Oversubscribed:
-            vartexture->demote();
-            workQueue.pop();
-            addToWorkQueue(texture);
-            _memoryPressureStateStale = true;
-            break;
-
-        case MemoryPressureState::Undersubscribed:
-            vartexture->promote();
-            workQueue.pop();
-            addToWorkQueue(texture);
-            _memoryPressureStateStale = true;
-            break;
-
-        case MemoryPressureState::Transfer:
-            if (vartexture->executeNextTransfer(texture)) {
-                workQueue.pop();
-                addToWorkQueue(texture);
-
-#if THREADED_TEXTURE_BUFFERING
-                // Eagerly start the next buffering job if possible
-                texture = getNextWorkQueueItem(workQueue);
-                if (texture) {
-                    gltexture = Backend::getGPUObject<GLTexture>(*texture);
-                    vartexture = dynamic_cast<GLVariableAllocationSupport*>(gltexture);
-                    vartexture->executeNextBuffer(texture);
-                }
-#endif
-            }
-            break;
-
-        case MemoryPressureState::Idle:
-            Q_UNREACHABLE();
-            break;
-    }
-}
-
-void GLVariableAllocationSupport::processWorkQueues() {
-    if (MemoryPressureState::Idle == _memoryPressureState) {
-        return;
-    }
-
-    auto& workQueue = getActiveWorkQueue();
-    // Do work on the front of the queue
-    processWorkQueue(workQueue);
-
-    if (workQueue.empty()) {
-        _memoryPressureState = MemoryPressureState::Idle;
-        _memoryPressureStateStale = true;
-    }
-}
-
-void GLVariableAllocationSupport::manageMemory() {
-    PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
-    updateMemoryPressure();
-    processWorkQueues();
-}
-
-bool GLVariableAllocationSupport::executeNextTransfer(const TexturePointer& currentTexture) {
-#if THREADED_TEXTURE_BUFFERING
-    // If a transfer job is active on the buffering thread, but has not completed it's buffering lambda,
-    // then we need to exit early, since we don't want to have the transfer job leave scope while it's 
-    // being used in another thread -- See https://highfidelity.fogbugz.com/f/cases/4626
-    if (_currentTransferJob && !_currentTransferJob->bufferingCompleted()) {
-        return false;
-    }
-#endif
-
-    if (_populatedMip <= _allocatedMip) {
-#if THREADED_TEXTURE_BUFFERING
-        _currentTransferJob.reset();
-        _currentTransferTexture.reset();
-#endif
-        return true;
-    }
-
-    // If the transfer queue is empty, rebuild it
-    if (_pendingTransfers.empty()) {
-        populateTransferQueue();
-    }
-
-    bool result = false;
-    if (!_pendingTransfers.empty()) {
-#if THREADED_TEXTURE_BUFFERING
-        // If there is a current transfer, but it's not the top of the pending transfer queue, then it's an orphan, so we want to abandon it.
-        if (_currentTransferJob && _currentTransferJob != _pendingTransfers.front()) {
-            _currentTransferJob.reset();
-        }
-
-        if (!_currentTransferJob) {
-            // Keeping hold of a strong pointer to the transfer job ensures that if the pending transfer queue is rebuilt, the transfer job
-            // doesn't leave scope, causing a crash in the buffering thread
-            _currentTransferJob = _pendingTransfers.front();
-
-            // Keeping hold of a strong pointer during the transfer ensures that the transfer thread cannot try to access a destroyed texture
-            _currentTransferTexture = currentTexture;
-        }
-
-        // transfer jobs use asynchronous buffering of the texture data because it may involve disk IO, so we execute a try here to determine if the buffering 
-        // is complete
-        if (_currentTransferJob->tryTransfer()) {
-            _pendingTransfers.pop();
-            // Once a given job is finished, release the shared pointers keeping them alive
-            _currentTransferTexture.reset();
-            _currentTransferJob.reset();
-            result = true;
-        }
-#else
-        if (_pendingTransfers.front()->tryTransfer()) {
-            _pendingTransfers.pop();
-            result = true;
-        }
-#endif
-    }
-    return result;
-}
-
-#if THREADED_TEXTURE_BUFFERING
-void GLVariableAllocationSupport::executeNextBuffer(const TexturePointer& currentTexture) {
-    if (_currentTransferJob && !_currentTransferJob->bufferingCompleted()) {
-        return;
-    }
-
-    // If the transfer queue is empty, rebuild it
-    if (_pendingTransfers.empty()) {
-        populateTransferQueue();
-    }
-
-    if (!_pendingTransfers.empty()) {
-        if (!_currentTransferJob) {
-            _currentTransferJob = _pendingTransfers.front();
-            _currentTransferTexture = currentTexture;
-        }
-
-        _currentTransferJob->startBuffering();
-    }
-}
-#endif
-
-void GLVariableAllocationSupport::incrementPopulatedSize(Size delta) const {
-    _populatedSize += delta;
-    // Keep the 2 code paths to be able to debug
-    if (_size < _populatedSize) {
-        Backend::textureResourcePopulatedGPUMemSize.update(0, delta);
-    } else  {
-        Backend::textureResourcePopulatedGPUMemSize.update(0, delta);
-    }
-}
-void GLVariableAllocationSupport::decrementPopulatedSize(Size delta) const {
-    _populatedSize -= delta;
-    // Keep the 2 code paths to be able to debug
-    if (_size < _populatedSize) {
-        Backend::textureResourcePopulatedGPUMemSize.update(delta, 0);
-    } else  {
-        Backend::textureResourcePopulatedGPUMemSize.update(delta, 0);
-    }
-}
-
-
--- a/libraries/gpu-gl-common/src/gpu/gl/GLTexture.h
+++ b/libraries/gpu-gl-common/src/gpu/gl/GLTexture.h
@ -16,8 +16,6 @@
 #include "GLTexelFormat.h"
 #include <thread>

-#define THREADED_TEXTURE_BUFFERING 1
-
 namespace gpu { namespace gl {

 struct GLFilterMode {
@ -25,107 +23,92 @@ struct GLFilterMode {
    GLint magFilter;
 };

+class GLTextureTransferEngine {
+public:
+    using Pointer = std::shared_ptr<GLTextureTransferEngine>;
+    /// Called once per frame to perform any require memory management or transfer work
+    virtual void manageMemory() = 0;
+    virtual void shutdown() = 0;
+
+    /// Called whenever a client wants to create a new texture.  This allows the transfer engine to 
+    /// potentially limit the number of GL textures created per frame
+    bool allowCreate() const { return _frameTexturesCreated < MAX_RESOURCE_TEXTURES_PER_FRAME; }
+    /// Called whenever a client creates a new resource texture that should use managed memory
+    /// and incremental transfer
+    void addMemoryManagedTexture(const TexturePointer& texturePointer);
+
+protected:
+    // Fetch all the currently active textures as strong pointers, while clearing the 
+    // empty weak pointers out of _registeredTextures
+    std::vector<TexturePointer> getAllTextures();
+    void resetFrameTextureCreated() { _frameTexturesCreated = 0;  }
+
+private:
+    static const size_t MAX_RESOURCE_TEXTURES_PER_FRAME{ 2 };
+    size_t _frameTexturesCreated{ 0 };
+    std::list<TextureWeakPointer> _registeredTextures;
+};
+
+/**
+  A transfer job encapsulates an individual piece of work required to upload texture data to the GPU.  
+  The work can be broken down into two parts, expressed as lambdas.  The buffering lambda is repsonsible
+  for putting the data to be uploaded into a CPU memory buffer.  The transfer lambda is repsonsible for 
+  uploading the data from the CPU memory buffer to the GPU using OpenGL calls.  Ideally the buffering lambda 
+  will be executed on a seprate thread from the OpenGL work to ensure that disk IO operations do not block 
+  OpenGL calls
+
+  Additionally, a TransferJob can encapsulate some kind of post-upload work that changes the state of the 
+  GLTexture derived object wrapping the actual texture ID, such as changing the _populateMip value once
+  a given mip level has been compeltely uploaded
+ */
+class TransferJob {
+public:
+    using Pointer = std::shared_ptr<TransferJob>;
+    using Queue = std::queue<Pointer>;
+    using Lambda = std::function<void(const TexturePointer&)>;
+private:
+    Texture::PixelsPointer _mipData;
+    size_t _transferOffset{ 0 };
+    size_t _transferSize{ 0 };
+    bool _bufferingRequired{ true };
+    Lambda _transferLambda{ [](const TexturePointer&) {} };
+    Lambda _bufferingLambda{ [](const TexturePointer&) {} };
+public:
+    TransferJob(const TransferJob& other) = delete;
+    TransferJob(const std::function<void()>& transferLambda);
+    TransferJob(const Texture& texture, uint16_t sourceMip, uint16_t targetMip, uint8_t face, uint32_t lines = 0, uint32_t lineOffset = 0);
+    ~TransferJob();
+    const size_t& size() const { return _transferSize; }
+    bool bufferingRequired() const { return _bufferingRequired; }
+    void buffer(const TexturePointer& texture) { _bufferingLambda(texture); }
+    void transfer(const TexturePointer& texture) { _transferLambda(texture); }
+};
+
+using TransferJobPointer = std::shared_ptr<TransferJob>;
+using TransferQueue = std::queue<TransferJobPointer>;
+
 class GLVariableAllocationSupport {
    friend class GLBackend;

 public:
    GLVariableAllocationSupport();
    virtual ~GLVariableAllocationSupport();
+    virtual void populateTransferQueue(TransferQueue& pendingTransfers) = 0;

-    enum class MemoryPressureState {
-        Idle,
-        Transfer,
-        Oversubscribed,
-        Undersubscribed,
-    };
-
-    using QueuePair = std::pair<TextureWeakPointer, float>;
-    struct QueuePairLess {
-        bool operator()(const QueuePair& a, const QueuePair& b) {
-            return a.second < b.second;
-        }
-    };
-    using WorkQueue = std::priority_queue<QueuePair, std::vector<QueuePair>, QueuePairLess>;
-
-    class TransferJob {
-        using VoidLambda = std::function<void()>;
-        using VoidLambdaQueue = std::queue<VoidLambda>;
-        const GLTexture& _parent;
-        Texture::PixelsPointer _mipData;
-        size_t _transferOffset { 0 };
-        size_t _transferSize { 0 };
-
-        bool _bufferingRequired { true };
-        VoidLambda _transferLambda;
-        VoidLambda _bufferingLambda;
-
-#if THREADED_TEXTURE_BUFFERING
-        // Indicates if a transfer from backing storage to interal storage has started
-        QFuture<void> _bufferingStatus;
-        static QThreadPool* _bufferThreadPool;
-#endif
-
-    public:
-        TransferJob(const TransferJob& other) = delete;
-        TransferJob(const GLTexture& parent, std::function<void()> transferLambda);
-        TransferJob(const GLTexture& parent, uint16_t sourceMip, uint16_t targetMip, uint8_t face, uint32_t lines = 0, uint32_t lineOffset = 0);
-        ~TransferJob();
-        bool tryTransfer();
-
-#if THREADED_TEXTURE_BUFFERING
-        void startBuffering();
-        bool bufferingRequired() const;
-        bool bufferingCompleted() const;
-        static void startBufferingThread();
-#endif
-
-    private:
-        void transfer();
-    };
-
-    using TransferJobPointer = std::shared_ptr<TransferJob>;
-    using TransferQueue = std::queue<TransferJobPointer>;
-    static MemoryPressureState _memoryPressureState;
-
-public:
-    static void addMemoryManagedTexture(const TexturePointer& texturePointer);
-
-protected:
-    static size_t _frameTexturesCreated;
-    static std::atomic<bool> _memoryPressureStateStale;
-    static std::list<TextureWeakPointer> _memoryManagedTextures;
-    static WorkQueue _transferQueue;
-    static WorkQueue _promoteQueue;
-    static WorkQueue _demoteQueue;
-#if THREADED_TEXTURE_BUFFERING
-    static TexturePointer _currentTransferTexture;
-    static TransferJobPointer _currentTransferJob;
-#endif
-    static const uvec3 INITIAL_MIP_TRANSFER_DIMENSIONS;
-    static const uvec3 MAX_TRANSFER_DIMENSIONS;
-    static const size_t MAX_TRANSFER_SIZE;
-
-
-    static void updateMemoryPressure();
-    static void processWorkQueues();
-    static void processWorkQueue(WorkQueue& workQueue);
-    static TexturePointer getNextWorkQueueItem(WorkQueue& workQueue);
-    static void addToWorkQueue(const TexturePointer& texture);
-    static WorkQueue& getActiveWorkQueue();
-
-    static void manageMemory();
-
-    //bool canPromoteNoAllocate() const { return _allocatedMip < _populatedMip; }
+    void sanityCheck() const;
    bool canPromote() const { return _allocatedMip > _minAllocatedMip; }
    bool canDemote() const { return _allocatedMip < _maxAllocatedMip; }
    bool hasPendingTransfers() const { return _populatedMip > _allocatedMip; }
-#if THREADED_TEXTURE_BUFFERING
-    void executeNextBuffer(const TexturePointer& currentTexture);
-#endif
-    bool executeNextTransfer(const TexturePointer& currentTexture);
-    virtual void populateTransferQueue() = 0;
-    virtual void promote() = 0;
-    virtual void demote() = 0;
+
+    virtual size_t promote() = 0;
+    virtual size_t demote() = 0;
+
+    static const uvec3 MAX_TRANSFER_DIMENSIONS;
+    static const uvec3 INITIAL_MIP_TRANSFER_DIMENSIONS;
+    static const size_t MAX_TRANSFER_SIZE;
+    static const size_t MAX_BUFFER_SIZE;
+
+protected:

    // THe amount of memory currently allocated
    Size _size { 0 };
@ -148,10 +131,6 @@ protected:
    // The lowest (highest resolution) mip that we will support, relative to the number
    // of mips in the gpu::Texture object
    uint16 _minAllocatedMip { 0 };
-    // Contains a series of lambdas that when executed will transfer data to the GPU, modify 
-    // the _populatedMip and update the sampler in order to fully populate the allocated texture 
-    // until _populatedMip == _allocatedMip
-    TransferQueue _pendingTransfers;
 };

 class GLTexture : public GLObject<Texture> {
@ -172,6 +151,9 @@ public:
    static const std::vector<GLenum>& getFaceTargets(GLenum textureType);
    static uint8_t getFaceCount(GLenum textureType);
    static GLenum getGLTextureType(const Texture& texture);
+    virtual Size size() const = 0;
+    virtual Size copyMipFaceLinesFromTexture(uint16_t mip, uint8_t face, const uvec3& size, uint32_t yOffset, GLenum internalFormat, GLenum format, GLenum type, Size sourceSize, const void* sourcePointer) const = 0;
+    virtual Size copyMipFaceFromTexture(uint16_t sourceMip, uint16_t targetMip, uint8_t face) const final;

    static const uint8_t TEXTURE_2D_NUM_FACES = 1;
    static const uint8_t TEXTURE_CUBE_NUM_FACES = 6;
@ -180,12 +162,9 @@ public:
    static const GLenum WRAP_MODES[Sampler::NUM_WRAP_MODES];

 protected:
-    virtual Size size() const = 0;
    virtual void generateMips() const = 0;
    virtual void syncSampler() const = 0;

-    virtual Size copyMipFaceLinesFromTexture(uint16_t mip, uint8_t face, const uvec3& size, uint32_t yOffset, GLenum internalFormat, GLenum format, GLenum type, Size sourceSize, const void* sourcePointer) const = 0;
-    virtual Size copyMipFaceFromTexture(uint16_t sourceMip, uint16_t targetMip, uint8_t face) const final;
    virtual void copyTextureMipsInGPUMem(GLuint srcId, GLuint destId, uint16_t srcMipOffset, uint16_t destMipOffset, uint16_t populatedMips) {} // Only relevant for Variable Allocation textures

    GLTexture(const std::weak_ptr<gl::GLBackend>& backend, const Texture& texture, GLuint id);
@ -205,7 +184,6 @@ protected:
    Size size() const override { return 0; }
 };

-
 } }

 #endif
--- a/libraries/gpu-gl-common/src/gpu/gl/GLTextureTransfer.cpp
+++ b/libraries/gpu-gl-common/src/gpu/gl/GLTextureTransfer.cpp
@ -0,0 +1,502 @@
+//
+//  Created by Bradley Austin Davis on 2016/05/15
+//  Copyright 2013-2016 High Fidelity, Inc.
+//
+//  Distributed under the Apache License, Version 2.0.
+//  See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
+//
+
+#include "GLTexture.h"
+
+#include <QtCore/QThread>
+#include <NumericalConstants.h>
+
+#include "GLBackend.h"
+
+#define OVERSUBSCRIBED_PRESSURE_VALUE 0.95f
+#define UNDERSUBSCRIBED_PRESSURE_VALUE 0.85f
+#define DEFAULT_ALLOWED_TEXTURE_MEMORY_MB ((size_t)1024)
+#define MAX_RESOURCE_TEXTURES_PER_FRAME 2
+#define NO_BUFFER_WORK_SLEEP_TIME_MS 2
+#define THREADED_TEXTURE_BUFFERING 1
+
+static const size_t DEFAULT_ALLOWED_TEXTURE_MEMORY = MB_TO_BYTES(DEFAULT_ALLOWED_TEXTURE_MEMORY_MB);
+
+namespace gpu { namespace gl {
+
+enum class MemoryPressureState
+{
+    Idle,
+    Transfer,
+    Undersubscribed,
+};
+
+static MemoryPressureState _memoryPressureState{ MemoryPressureState::Idle };
+
+template <typename T>
+struct LessPairSecond {
+    bool operator()(const T& a, const T& b) { return a.second < b.second; }
+};
+
+using QueuePair = std::pair<TextureWeakPointer, float>;
+// Contains a priority sorted list of textures on which work is to be done over many frames
+// Uses a weak pointer to the texture to avoid keeping it in scope if the client stops using it
+using WorkQueue = std::priority_queue<QueuePair, std::vector<QueuePair>, LessPairSecond<QueuePair>>;
+
+
+using ImmediateQueuePair = std::pair<TexturePointer, float>;
+// Contains a priority sorted list of textures on which work is to be done in the current frame
+using ImmediateWorkQueue = std::priority_queue<ImmediateQueuePair, std::vector<ImmediateQueuePair>, LessPairSecond<ImmediateQueuePair>>;
+
+// A map of weak texture pointers to queues of work to be done to transfer their data from the backing store to the GPU
+using TransferMap = std::map<TextureWeakPointer, TransferQueue, std::owner_less<TextureWeakPointer>>;
+
+class GLTextureTransferEngineDefault : public GLTextureTransferEngine {
+    using Parent = GLTextureTransferEngine;
+public:
+    // Called once per frame by the GLBackend to manage texture memory
+    // Will deallocate textures if oversubscribed, 
+    void manageMemory() override;
+    void shutdown() override;
+
+protected:
+    class TextureBufferThread : public QThread {
+    public:
+        TextureBufferThread(GLTextureTransferEngineDefault& parent) : _parent(parent) { start(); }
+
+    protected:
+        void run() override {
+            while (!_parent._shutdown) {
+                if (!_parent.processActiveBufferQueue()) {
+                    QThread::msleep(NO_BUFFER_WORK_SLEEP_TIME_MS);
+                }
+            }
+        }
+
+        GLTextureTransferEngineDefault& _parent;
+    };
+
+    using ActiveTransferJob = std::pair<TexturePointer, TransferJobPointer>;
+    using ActiveTransferQueue = std::list<ActiveTransferJob>;
+
+    void populateActiveBufferQueue();
+    bool processActiveBufferQueue();
+    void processTransferQueues();
+    void populateTransferQueue(const TexturePointer& texturePointer);
+    //void addToWorkQueue(const TexturePointer& texturePointer);
+    void updateMemoryPressure();
+
+    void processDemotes(size_t relief, const std::vector<TexturePointer>& strongTextures);
+    void processPromotes();
+
+private:
+    std::atomic<bool> _shutdown{ false };
+    // Contains a priority sorted list of weak texture pointers that have been determined to be eligible for additional allocation
+    // While the memory state is 'undersubscribed', items will be removed from this list and processed, allocating additional memory 
+    // per frame
+    WorkQueue _promoteQueue;
+    // This queue contains jobs that will buffer data from the texture backing store (ideally a memory mapped KTX file)
+    // to a CPU memory buffer.  This queue is populated on the main GPU thread, and drained on a dedicated thread.  
+    // When an item on the _activeBufferQueue is completed it is put into the _activeTransferQueue
+    ActiveTransferQueue _activeBufferQueue;
+    // This queue contains jobs that will upload data from a CPU buffer into a GPU.  This queue is populated on the background
+    // thread that process the _activeBufferQueue and drained on the main GPU thread
+    ActiveTransferQueue _activeTransferQueue;
+    // Mutex protecting the _activeTransferQueue & _activeBufferQueue since they are each accessed both from the main GPU thread
+    // and the buffering thread
+    Mutex _bufferMutex;
+    // The buffering thread which drains the _activeBufferQueue and populates the _activeTransferQueue
+    TextureBufferThread* _transferThread{ nullptr };
+    // The amount of buffering work currently represented by the _activeBufferQueue
+    size_t _queuedBufferSize{ 0 };
+    // This contains a map of all textures to queues of pending transfer jobs.  While in the transfer state, this map is used to
+    // populate the _activeBufferQueue up to the limit specified in GLVariableAllocationTexture::MAX_BUFFER_SIZE
+    TransferMap _pendingTransfersMap;
+};
+
+}}  // namespace gpu::gl
+
+using namespace gpu;
+using namespace gpu::gl;
+
+void GLBackend::initTextureManagementStage() {
+    _textureManagement._transferEngine = std::make_shared<GLTextureTransferEngineDefault>();
+}
+
+void GLBackend::killTextureManagementStage() {
+    _textureManagement._transferEngine->shutdown();
+    _textureManagement._transferEngine.reset();
+}
+
+std::vector<TexturePointer> GLTextureTransferEngine::getAllTextures() {
+    std::vector<TexturePointer> result;
+    result.reserve(_registeredTextures.size());
+    std::remove_if(_registeredTextures.begin(), _registeredTextures.end(), [&](const std::weak_ptr<Texture>& weak)->bool {
+        auto strong = weak.lock();
+        bool strongResult = strong.operator bool();
+        if (strongResult) {
+            result.push_back(strong);
+        }
+        return strongResult;
+    });
+    return result;
+}
+
+void GLTextureTransferEngine::addMemoryManagedTexture(const TexturePointer& texturePointer) {
+    ++_frameTexturesCreated;
+    _registeredTextures.push_back(texturePointer);
+}
+
+void GLTextureTransferEngineDefault::shutdown() {
+    _shutdown = true;
+#if THREADED_TEXTURE_BUFFERING
+    if (_transferThread) {
+        _transferThread->wait();
+        delete _transferThread;
+        _transferThread = nullptr;
+    }
+#endif
+}
+
+
+void GLTextureTransferEngineDefault::manageMemory() {
+    PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
+    // reset the count used to limit the number of textures created per frame
+    resetFrameTextureCreated();
+    // Determine the current memory management state.  It will be either idle (no work to do),
+    // undersubscribed (need to do more allocation) or transfer (need to upload content from the 
+    // backing store to the GPU
+    updateMemoryPressure();
+    if (MemoryPressureState::Undersubscribed == _memoryPressureState) {
+        // If we're undersubscribed, we need to process some of the textures that can have additional allocation
+        processPromotes();
+    } else if (MemoryPressureState::Transfer == _memoryPressureState) {
+        // If we're in transfer mode we need to manage the buffering and upload queues
+        processTransferQueues();
+    }
+}
+
+// Each frame we will check if our memory pressure state has changed.  
+void GLTextureTransferEngineDefault::updateMemoryPressure() {
+    PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
+
+    size_t allowedMemoryAllocation = gpu::Texture::getAllowedGPUMemoryUsage();
+    if (0 == allowedMemoryAllocation) {
+        allowedMemoryAllocation = DEFAULT_ALLOWED_TEXTURE_MEMORY;
+    }
+
+    // Clear any defunct textures (weak pointers that no longer have a valid texture)
+    auto strongTextures = getAllTextures();
+
+    size_t totalVariableMemoryAllocation = 0;
+    size_t idealMemoryAllocation = 0;
+    bool canDemote = false;
+    bool canPromote = false;
+    bool hasTransfers = false;
+    for (const auto& texture : strongTextures) {
+        GLTexture* gltexture = Backend::getGPUObject<GLTexture>(*texture);
+        GLVariableAllocationSupport* vartexture = dynamic_cast<GLVariableAllocationSupport*>(gltexture);
+        vartexture->sanityCheck();
+
+        // Track how much the texture thinks it should be using
+        idealMemoryAllocation += texture->evalTotalSize();
+        // Track how much we're actually using
+        totalVariableMemoryAllocation += gltexture->size();
+        if (vartexture->canDemote()) {
+            canDemote |= true;
+        }
+        if (vartexture->canPromote()) {
+            canPromote |= true;
+        }
+        if (vartexture->hasPendingTransfers()) {
+            hasTransfers |= true;
+        }
+    }
+
+    size_t unallocated = idealMemoryAllocation - totalVariableMemoryAllocation;
+    float pressure = (float)totalVariableMemoryAllocation / (float)allowedMemoryAllocation;
+
+    // If we're oversubscribed we need to demote textures IMMEDIATELY
+    if (pressure > OVERSUBSCRIBED_PRESSURE_VALUE && canDemote) {
+        auto overPressure = pressure - OVERSUBSCRIBED_PRESSURE_VALUE;
+        size_t relief = (size_t)(overPressure * totalVariableMemoryAllocation);
+        processDemotes(relief, strongTextures);
+        return;
+    }
+
+    
+    auto newState = MemoryPressureState::Idle;
+    if (pressure < UNDERSUBSCRIBED_PRESSURE_VALUE && (unallocated != 0 && canPromote)) {
+        newState = MemoryPressureState::Undersubscribed;
+    } else if (hasTransfers) {
+        newState = MemoryPressureState::Transfer;
+    } else {
+        Lock lock(_bufferMutex);
+        if (!_activeBufferQueue.empty() || !_activeTransferQueue.empty() || !_pendingTransfersMap.empty()) {
+            newState = MemoryPressureState::Transfer;
+        }
+    }
+
+    // If we've changed state then we have to populate the appropriate structure with the work to be done
+    if (newState != _memoryPressureState) {
+        _memoryPressureState = newState;
+        _promoteQueue = WorkQueue();
+        _pendingTransfersMap.clear();
+
+        if (MemoryPressureState::Idle == _memoryPressureState) {
+            return;
+        }
+
+        // For each texture, if it's eligible for work in the current state, put it into the appropriate structure
+        for (const auto& texture : strongTextures) {
+            GLTexture* gltexture = Backend::getGPUObject<GLTexture>(*texture);
+            GLVariableAllocationSupport* vargltexture = dynamic_cast<GLVariableAllocationSupport*>(gltexture);
+            if (MemoryPressureState::Undersubscribed == _memoryPressureState && vargltexture->canPromote()) {
+                // Promote smallest first
+                _promoteQueue.push({ texture, 1.0f / (float)gltexture->size() });
+            } else if (MemoryPressureState::Transfer == _memoryPressureState && vargltexture->hasPendingTransfers()) {
+                populateTransferQueue(texture);
+            }
+        }
+    }
+}
+
+// Manage the _activeBufferQueue and _activeTransferQueue queues
+void GLTextureTransferEngineDefault::processTransferQueues() {
+#if THREADED_TEXTURE_BUFFERING
+    if (!_transferThread) {
+        _transferThread = new TextureBufferThread(*this);
+    }
+#endif
+
+    
+    // From the pendingTransferMap, queue jobs into the _activeBufferQueue
+    // Doing so will lock the weak texture pointer so that it can't be destroyed 
+    // while the background thread is working.
+    // 
+    // This will queue jobs until _queuedBufferSize can't be increased without exceeding
+    // GLVariableAllocationTexture::MAX_BUFFER_SIZE or there is no more work to be done
+    populateActiveBufferQueue();
+#if !THREADED_TEXTURE_BUFFERING
+    processActiveBufferQueue();
+#endif
+
+    // Take any tasks which have completed buffering and process them, uploading the buffered
+    // data to the GPU.  Drains the _activeTransferQueue
+    {
+        ActiveTransferQueue activeTransferQueue;
+        {
+            Lock lock(_bufferMutex);
+            activeTransferQueue.swap(_activeTransferQueue);
+        }
+
+        while (!activeTransferQueue.empty()) {
+            const auto& activeTransferJob = activeTransferQueue.front();
+            const auto& texturePointer = activeTransferJob.first;
+            const auto& tranferJob = activeTransferJob.second;
+            tranferJob->transfer(texturePointer);
+            // The pop_front MUST be the last call since all of these varaibles in scope are
+            // references that will be invalid after the pop
+            activeTransferQueue.pop_front();
+        }
+    }
+
+    // If we have no more work in any of the structures, reset the memory state to idle to 
+    // force reconstruction of the _pendingTransfersMap if necessary
+    {
+        Lock lock(_bufferMutex);
+        if (_activeTransferQueue.empty() && _activeBufferQueue.empty() && _pendingTransfersMap.empty()) {
+            _memoryPressureState = MemoryPressureState::Idle;
+        }
+    }
+}
+
+void GLTextureTransferEngineDefault::populateActiveBufferQueue() {
+    size_t queuedBufferSize = _queuedBufferSize;
+    static const auto& MAX_BUFFER_SIZE = GLVariableAllocationSupport::MAX_BUFFER_SIZE;
+    Q_ASSERT(queuedBufferSize <= MAX_BUFFER_SIZE);
+    size_t availableBufferSize = MAX_BUFFER_SIZE - queuedBufferSize;
+
+    // Queue up buffering jobs
+    ActiveTransferQueue newBufferJobs;
+    ActiveTransferQueue newTransferJobs;
+    size_t newTransferSize{ 0 };
+
+    for (auto itr = _pendingTransfersMap.begin(); itr != _pendingTransfersMap.end(); ) {
+        const auto& weakTexture = itr->first;
+        const auto texture = weakTexture.lock();
+
+        // Texture no longer exists, remove from the transfer map and move on
+        if (!texture) {
+            itr = _pendingTransfersMap.erase(itr);
+            continue;
+        }
+
+        GLTexture* gltexture = Backend::getGPUObject<GLTexture>(*texture);
+        GLVariableAllocationSupport* vargltexture = dynamic_cast<GLVariableAllocationSupport*>(gltexture);
+
+        auto& textureTransferQueue = itr->second;
+        // Can't find any pending transfers, so move on
+        if (textureTransferQueue.empty()) {
+            if (vargltexture->hasPendingTransfers()) {
+                qWarning(gpugllogging) << "Texture has no transfer jobs, but has pending transfers";
+            }
+            itr = _pendingTransfersMap.erase(itr);
+            continue;
+        }
+
+        const auto& transferJob = textureTransferQueue.front();
+        if (!transferJob->bufferingRequired()) {
+            newTransferJobs.emplace_back(texture, transferJob);
+        } else {
+            const auto& transferSize = transferJob->size();
+            // If there's not enough space for the buffering, then break out of the loop
+            if (transferSize > availableBufferSize) {
+                break;
+            }
+            availableBufferSize -= transferSize;
+            Q_ASSERT(availableBufferSize <= MAX_BUFFER_SIZE);
+            Q_ASSERT(newTransferSize <= MAX_BUFFER_SIZE);
+            newTransferSize += transferSize;
+            Q_ASSERT(newTransferSize <= MAX_BUFFER_SIZE);
+            newBufferJobs.emplace_back(texture, transferJob);
+        }
+        textureTransferQueue.pop();
+        ++itr;
+    }
+
+    {
+        Lock lock(_bufferMutex);
+        _activeBufferQueue.splice(_activeBufferQueue.end(), newBufferJobs);
+        Q_ASSERT(_queuedBufferSize <= MAX_BUFFER_SIZE);
+        _queuedBufferSize += newTransferSize;
+        Q_ASSERT(_queuedBufferSize <= MAX_BUFFER_SIZE);
+        _activeTransferQueue.splice(_activeTransferQueue.end(), newTransferJobs);
+    }
+}
+
+bool GLTextureTransferEngineDefault::processActiveBufferQueue() {
+    ActiveTransferQueue activeBufferQueue;
+    {
+        Lock lock(_bufferMutex);
+        _activeBufferQueue.swap(activeBufferQueue);
+    }
+
+    if (activeBufferQueue.empty()) {
+        return false;
+    }
+
+    for (const auto& activeJob : activeBufferQueue) {
+        const auto& texture = activeJob.first;
+        const auto& transferJob = activeJob.second;
+        const auto& transferSize = transferJob->size();
+        transferJob->buffer(texture);
+        Q_ASSERT(_queuedBufferSize >= transferSize);
+        _queuedBufferSize -= transferSize;
+    }
+
+    {
+        Lock lock(_bufferMutex);
+        _activeTransferQueue.splice(_activeTransferQueue.end(), activeBufferQueue);
+    }
+
+    return true;
+}
+
+void GLTextureTransferEngineDefault::populateTransferQueue(const TexturePointer& texturePointer) {
+    TextureWeakPointer weakTexture = texturePointer;
+    GLTexture* gltexture = Backend::getGPUObject<GLTexture>(*texturePointer);
+    GLVariableAllocationSupport* vargltexture = dynamic_cast<GLVariableAllocationSupport*>(gltexture);
+    TransferJob::Queue pendingTransfers;
+    PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
+    vargltexture->populateTransferQueue(pendingTransfers);
+    if (!pendingTransfers.empty()) {
+        _pendingTransfersMap[weakTexture] = pendingTransfers;
+    }
+}
+
+// From the queue of textures to be promited
+void GLTextureTransferEngineDefault::processPromotes() {
+    // FIXME use max allocated memory per frame instead of promotion count
+    static const size_t MAX_ALLOCATED_BYTES_PER_FRAME = GLVariableAllocationSupport::MAX_BUFFER_SIZE;
+    static const size_t MAX_ALLOCATIONS_PER_FRAME = 8;
+    size_t allocatedBytes{ 0 };
+    size_t allocations{ 0 };
+
+    while (!_promoteQueue.empty()) {
+        // Grab the first item off the demote queue
+        PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
+        auto entry = _promoteQueue.top();
+        _promoteQueue.pop();
+        auto texture = entry.first.lock();
+        if (!texture) {
+            continue;
+        }
+
+        GLTexture* gltexture = Backend::getGPUObject<GLTexture>(*texture);
+        GLVariableAllocationSupport* vartexture = dynamic_cast<GLVariableAllocationSupport*>(gltexture);
+        auto originalSize = gltexture->size();
+        vartexture->promote();
+        auto allocationDelta = gltexture->size() - originalSize;
+        if (vartexture->canPromote()) {
+            // Promote smallest first
+            _promoteQueue.push({ texture, 1.0f / (float)gltexture->size() });
+        }
+        allocatedBytes += allocationDelta;
+        if (++allocations >= MAX_ALLOCATIONS_PER_FRAME) {
+            break;
+        }
+        if (allocatedBytes >= MAX_ALLOCATED_BYTES_PER_FRAME) {
+            break;
+        }
+    }
+
+    // Get the front of the work queue to perform work
+    if (_promoteQueue.empty()) {
+        // Force rebuild of work queue
+        _memoryPressureState = MemoryPressureState::Idle;
+    }
+}
+
+void GLTextureTransferEngineDefault::processDemotes(size_t reliefRequired, const std::vector<TexturePointer>& strongTextures) {
+    // Demote largest first
+    ImmediateWorkQueue demoteQueue;
+    for (const auto& texture : strongTextures) {
+        GLTexture* gltexture = Backend::getGPUObject<GLTexture>(*texture);
+        GLVariableAllocationSupport* vargltexture = dynamic_cast<GLVariableAllocationSupport*>(gltexture);
+        if (vargltexture->canDemote()) {
+            demoteQueue.push({ texture, (float)gltexture->size() });
+        }
+    }
+
+    size_t relieved = 0;
+    while (!demoteQueue.empty() && relieved < reliefRequired) {
+        {
+            const auto& target = demoteQueue.top();
+            const auto& texture = target.first;
+            GLTexture* gltexture = Backend::getGPUObject<GLTexture>(*texture);
+            auto oldSize = gltexture->size();
+            GLVariableAllocationSupport* vargltexture = dynamic_cast<GLVariableAllocationSupport*>(gltexture);
+            vargltexture->demote();
+            auto newSize = gltexture->size();
+            relieved += (oldSize - newSize);
+        }
+        demoteQueue.pop();
+    }
+}
+
+// FIXME hack for stats display
+QString getTextureMemoryPressureModeString() {
+    switch (_memoryPressureState) {
+    case MemoryPressureState::Undersubscribed:
+        return "Undersubscribed";
+
+    case MemoryPressureState::Transfer:
+        return "Transfer";
+
+    case MemoryPressureState::Idle:
+        return "Idle";
+    }
+    Q_UNREACHABLE();
+    return "Unknown";
+}
--- a/libraries/gpu-gl/src/gpu/gl41/GL41Backend.h
+++ b/libraries/gpu-gl/src/gpu/gl41/GL41Backend.h
@ -114,9 +114,9 @@ public:

        void allocateStorage(uint16 allocatedMip);
        void syncSampler() const override;
-        void promote() override;
-        void demote() override;
-        void populateTransferQueue() override;
+        size_t promote() override;
+        size_t demote() override;
+        void populateTransferQueue(TransferQueue& pendingTransfers) override;

        Size copyMipFaceLinesFromTexture(uint16_t mip, uint8_t face, const uvec3& size, uint32_t yOffset, GLenum internalFormat, GLenum format, GLenum type, Size sourceSize, const void* sourcePointer) const override;
        Size copyMipsFromTexture();
--- a/libraries/gpu-gl/src/gpu/gl41/GL41BackendTexture.cpp
+++ b/libraries/gpu-gl/src/gpu/gl41/GL41BackendTexture.cpp
@ -72,7 +72,7 @@ GLTexture* GL41Backend::syncGPUObject(const TexturePointer& texturePointer) {
            case TextureUsageType::RESOURCE:
                qCDebug(gpugllogging) << "variable / Strict texture " << texture.source().c_str();
                object = new GL41ResourceTexture(shared_from_this(), texture);
-                GLVariableAllocationSupport::addMemoryManagedTexture(texturePointer);
+                _textureManagement._transferEngine->addMemoryManagedTexture(texturePointer);
                break;

            default:
@ -86,7 +86,6 @@ GLTexture* GL41Backend::syncGPUObject(const TexturePointer& texturePointer) {
                auto minAvailableMip = texture.minAvailableMipLevel();
                if (minAvailableMip < varTex->_minAllocatedMip) {
                    varTex->_minAllocatedMip = minAvailableMip;
-                    GL41VariableAllocationTexture::_memoryPressureStateStale = true;
                }
            }
        }
@ -299,9 +298,7 @@ GL41VariableAllocationTexture::GL41VariableAllocationTexture(const std::weak_ptr
    uint16_t allocatedMip = std::max<uint16_t>(_minAllocatedMip, targetMip);

    allocateStorage(allocatedMip);
-    _memoryPressureStateStale = true;
    copyMipsFromTexture();
-
    syncSampler();
 }

@ -496,7 +493,7 @@ void GL41VariableAllocationTexture::copyTextureMipsInGPUMem(GLuint srcId, GLuint
    });
 }

-void GL41VariableAllocationTexture::promote() {
+size_t GL41VariableAllocationTexture::promote() {
    PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
    Q_ASSERT(_allocatedMip > 0);

@ -524,12 +521,11 @@ void GL41VariableAllocationTexture::promote() {

    // update the memory usage
    Backend::textureResourceGPUMemSize.update(oldSize, 0);
+    return (_size - oldSize);
    // no change to Backend::textureResourcePopulatedGPUMemSize
-
-    populateTransferQueue();
 }

-void GL41VariableAllocationTexture::demote() {
+size_t GL41VariableAllocationTexture::demote() {
    PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
    Q_ASSERT(_allocatedMip < _maxAllocatedMip);
    auto oldId = _id;
@ -563,16 +559,16 @@ void GL41VariableAllocationTexture::demote() {
        }
        decrementPopulatedSize(amountUnpopulated);
    }
-    populateTransferQueue();
+
+    return oldSize - _size;
 }


-void GL41VariableAllocationTexture::populateTransferQueue() {
+void GL41VariableAllocationTexture::populateTransferQueue(TransferQueue& pendingTransfers) {
    PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
    if (_populatedMip <= _allocatedMip) {
        return;
    }
-    _pendingTransfers = TransferQueue();

    const uint8_t maxFace = GLTexture::getFaceCount(_target);
    uint16_t sourceMip = _populatedMip;
@ -588,7 +584,7 @@ void GL41VariableAllocationTexture::populateTransferQueue() {
            // If the mip is less than the max transfer size, then just do it in one transfer
            if (glm::all(glm::lessThanEqual(mipDimensions, MAX_TRANSFER_DIMENSIONS))) {
                // Can the mip be transferred in one go
-                _pendingTransfers.emplace(new TransferJob(*this, sourceMip, targetMip, face));
+                pendingTransfers.emplace(new TransferJob(_gpuObject, sourceMip, targetMip, face));
                continue;
            }

@ -605,13 +601,13 @@ void GL41VariableAllocationTexture::populateTransferQueue() {
            uint32_t lineOffset = 0;
            while (lineOffset < lines) {
                uint32_t linesToCopy = std::min<uint32_t>(lines - lineOffset, linesPerTransfer);
-                _pendingTransfers.emplace(new TransferJob(*this, sourceMip, targetMip, face, linesToCopy, lineOffset));
+                pendingTransfers.emplace(new TransferJob(_gpuObject, sourceMip, targetMip, face, linesToCopy, lineOffset));
                lineOffset += linesToCopy;
            }
        }

        // queue up the sampler and populated mip change for after the transfer has completed
-        _pendingTransfers.emplace(new TransferJob(*this, [=] {
+        pendingTransfers.emplace(new TransferJob([=] {
            _populatedMip = sourceMip;
            syncSampler();
        }));
--- a/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h
+++ b/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h
@ -187,9 +187,9 @@ public:
        GL45ResourceTexture(const std::weak_ptr<GLBackend>& backend, const Texture& texture);

        void syncSampler() const override;
-        void promote() override;
-        void demote() override;
-        void populateTransferQueue() override;
+        size_t promote() override;
+        size_t demote() override;
+        void populateTransferQueue(TransferQueue& pendingTransfers) override;
        

        void allocateStorage(uint16 mip);
--- a/libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp
+++ b/libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp
@ -28,7 +28,6 @@ using namespace gpu;
 using namespace gpu::gl;
 using namespace gpu::gl45;

-#define MAX_RESOURCE_TEXTURES_PER_FRAME 2
 #define FORCE_STRICT_TEXTURE 0
 #define ENABLE_SPARSE_TEXTURE 0

@ -82,7 +81,8 @@ GLTexture* GL45Backend::syncGPUObject(const TexturePointer& texturePointer) {

 #if !FORCE_STRICT_TEXTURE
            case TextureUsageType::RESOURCE: {
-                if (GL45VariableAllocationTexture::_frameTexturesCreated < MAX_RESOURCE_TEXTURES_PER_FRAME) {
+                auto& transferEngine  = _textureManagement._transferEngine;
+                if (transferEngine->allowCreate()) {
 #if ENABLE_SPARSE_TEXTURE
                    if (isTextureManagementSparseEnabled() && GL45Texture::isSparseEligible(texture)) {
                        object = new GL45SparseResourceTexture(shared_from_this(), texture);
@ -92,7 +92,7 @@ GLTexture* GL45Backend::syncGPUObject(const TexturePointer& texturePointer) {
 #else 
                    object = new GL45ResourceTexture(shared_from_this(), texture);
 #endif
-                    GLVariableAllocationSupport::addMemoryManagedTexture(texturePointer);
+                    transferEngine->addMemoryManagedTexture(texturePointer);
                } else {
                    auto fallback = texturePointer->getFallbackTexture();
                    if (fallback) {
@ -114,7 +114,6 @@ GLTexture* GL45Backend::syncGPUObject(const TexturePointer& texturePointer) {
                auto minAvailableMip = texture.minAvailableMipLevel();
                if (minAvailableMip < varTex->_minAllocatedMip) {
                    varTex->_minAllocatedMip = minAvailableMip;
-                    GL45VariableAllocationTexture::_memoryPressureStateStale = true;
                }
            }
        }
@ -124,6 +123,7 @@ GLTexture* GL45Backend::syncGPUObject(const TexturePointer& texturePointer) {
 }

 void GL45Backend::initTextureManagementStage() {
+	GLBackend::initTextureManagementStage();
    // enable the Sparse Texture on gl45
    _textureManagement._sparseCapable = true;

--- a/libraries/gpu-gl/src/gpu/gl45/GL45BackendVariableTexture.cpp
+++ b/libraries/gpu-gl/src/gpu/gl45/GL45BackendVariableTexture.cpp
@ -31,7 +31,6 @@ using GL45Texture = GL45Backend::GL45Texture;
 using GL45VariableAllocationTexture = GL45Backend::GL45VariableAllocationTexture;

 GL45VariableAllocationTexture::GL45VariableAllocationTexture(const std::weak_ptr<GLBackend>& backend, const Texture& texture) : GL45Texture(backend, texture) {
-    ++_frameTexturesCreated;
    Backend::textureResourceCount.increment();
 }

@ -104,7 +103,6 @@ GL45ResourceTexture::GL45ResourceTexture(const std::weak_ptr<GLBackend>& backend
    uint16_t allocatedMip = std::max<uint16_t>(_minAllocatedMip, targetMip);

    allocateStorage(allocatedMip);
-    _memoryPressureStateStale = true;
    copyMipsFromTexture();
    syncSampler();
 }
@ -148,7 +146,7 @@ void GL45ResourceTexture::syncSampler() const {
 #endif
 }

-void GL45ResourceTexture::promote() {
+size_t GL45ResourceTexture::promote() {
    PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
    Q_ASSERT(_allocatedMip > 0);

@ -191,11 +189,10 @@ void GL45ResourceTexture::promote() {
    // update the memory usage
    Backend::textureResourceGPUMemSize.update(oldSize, 0);
    // no change to Backend::textureResourcePopulatedGPUMemSize
-
-    populateTransferQueue();
+    return (_size - oldSize);
 }

-void GL45ResourceTexture::demote() {
+size_t GL45ResourceTexture::demote() {
    PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
    Q_ASSERT(_allocatedMip < _maxAllocatedMip);
    auto oldId = _id;
@ -242,16 +239,16 @@ void GL45ResourceTexture::demote() {
        }
       decrementPopulatedSize(amountUnpopulated);
    }
-
-    populateTransferQueue();
+    return (oldSize - _size);
 }

-void GL45ResourceTexture::populateTransferQueue() {
+void GL45ResourceTexture::populateTransferQueue(TransferQueue& pendingTransfers) {
    PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
+    sanityCheck();
+
    if (_populatedMip <= _allocatedMip) {
        return;
    }
-    _pendingTransfers = TransferQueue();

    const uint8_t maxFace = GLTexture::getFaceCount(_target);
    uint16_t sourceMip = _populatedMip;
@ -267,7 +264,7 @@ void GL45ResourceTexture::populateTransferQueue() {
            // If the mip is less than the max transfer size, then just do it in one transfer
            if (glm::all(glm::lessThanEqual(mipDimensions, MAX_TRANSFER_DIMENSIONS))) {
                // Can the mip be transferred in one go
-                _pendingTransfers.emplace(new TransferJob(*this, sourceMip, targetMip, face));
+                pendingTransfers.emplace(new TransferJob(_gpuObject, sourceMip, targetMip, face));
                continue;
            }

@ -284,14 +281,15 @@ void GL45ResourceTexture::populateTransferQueue() {
            uint32_t lineOffset = 0;
            while (lineOffset < lines) {
                uint32_t linesToCopy = std::min<uint32_t>(lines - lineOffset, linesPerTransfer);
-                _pendingTransfers.emplace(new TransferJob(*this, sourceMip, targetMip, face, linesToCopy, lineOffset));
+                pendingTransfers.emplace(new TransferJob(_gpuObject, sourceMip, targetMip, face, linesToCopy, lineOffset));
                lineOffset += linesToCopy;
            }
        }

        // queue up the sampler and populated mip change for after the transfer has completed
-        _pendingTransfers.emplace(new TransferJob(*this, [=] {
+        pendingTransfers.emplace(new TransferJob([=] {
            _populatedMip = sourceMip;
+            sanityCheck();
            syncSampler();
        }));
    } while (sourceMip != _allocatedMip);
--- a/libraries/gpu-gles/src/gpu/gles/GLESBackend.h
+++ b/libraries/gpu-gles/src/gpu/gles/GLESBackend.h
@ -105,9 +105,9 @@ public:

        void allocateStorage(uint16 allocatedMip);
        void syncSampler() const override;
-        void promote() override;
-        void demote() override;
-        void populateTransferQueue() override;
+        size_t promote() override;
+        size_t demote() override;
+        void populateTransferQueue(TransferJob::Queue& queue) override;

        Size copyMipFaceLinesFromTexture(uint16_t mip, uint8_t face, const uvec3& size, uint32_t yOffset, GLenum internalFormat, GLenum format, GLenum type, Size sourceSize, const void* sourcePointer) const override;
        Size copyMipsFromTexture();
--- a/libraries/gpu-gles/src/gpu/gles/GLESBackendTexture.cpp
+++ b/libraries/gpu-gles/src/gpu/gles/GLESBackendTexture.cpp
@ -90,7 +90,6 @@ GLTexture* GLESBackend::syncGPUObject(const TexturePointer& texturePointer) {
                auto minAvailableMip = texture.minAvailableMipLevel();
                if (minAvailableMip < varTex->_minAllocatedMip) {
                    varTex->_minAllocatedMip = minAvailableMip;
-                    GLESVariableAllocationTexture::_memoryPressureStateStale = true;
                }
            }
        }
@ -361,7 +360,6 @@ GLESVariableAllocationTexture::GLESVariableAllocationTexture(const std::weak_ptr
    uint16_t allocatedMip = std::max<uint16_t>(_minAllocatedMip, targetMip);

    allocateStorage(allocatedMip);
-    _memoryPressureStateStale = true;
    copyMipsFromTexture();

    syncSampler();
@ -559,7 +557,7 @@ void GLESVariableAllocationTexture::copyTextureMipsInGPUMem(GLuint srcId, GLuint
    });
 }

-void GLESVariableAllocationTexture::promote() {
+size_t GLESVariableAllocationTexture::promote() {
    PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
    Q_ASSERT(_allocatedMip > 0);

@ -587,12 +585,11 @@ void GLESVariableAllocationTexture::promote() {

    // update the memory usage
    Backend::textureResourceGPUMemSize.update(oldSize, 0);
-    // no change to Backend::textureResourcePopulatedGPUMemSize

-    populateTransferQueue();
+    return _size - oldSize;
 }

-void GLESVariableAllocationTexture::demote() {
+size_t GLESVariableAllocationTexture::demote() {
    PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
    Q_ASSERT(_allocatedMip < _maxAllocatedMip);
    auto oldId = _id;
@ -626,16 +623,16 @@ void GLESVariableAllocationTexture::demote() {
        }
        decrementPopulatedSize(amountUnpopulated);
    }
-    populateTransferQueue();
+
+    return oldSize - _size;
 }


-void GLESVariableAllocationTexture::populateTransferQueue() {
+void GLESVariableAllocationTexture::populateTransferQueue(TransferJob::Queue& queue) {
    PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
    if (_populatedMip <= _allocatedMip) {
        return;
    }
-    _pendingTransfers = TransferQueue();

    const uint8_t maxFace = GLTexture::getFaceCount(_target);
    uint16_t sourceMip = _populatedMip;
@ -651,7 +648,7 @@ void GLESVariableAllocationTexture::populateTransferQueue() {
            // If the mip is less than the max transfer size, then just do it in one transfer
            if (glm::all(glm::lessThanEqual(mipDimensions, MAX_TRANSFER_DIMENSIONS))) {
                // Can the mip be transferred in one go
-                _pendingTransfers.emplace(new TransferJob(*this, sourceMip, targetMip, face));
+                queue.emplace(new TransferJob(_gpuObject, sourceMip, targetMip, face));
                continue;
            }

@ -668,13 +665,13 @@ void GLESVariableAllocationTexture::populateTransferQueue() {
            uint32_t lineOffset = 0;
            while (lineOffset < lines) {
                uint32_t linesToCopy = std::min<uint32_t>(lines - lineOffset, linesPerTransfer);
-                _pendingTransfers.emplace(new TransferJob(*this, sourceMip, targetMip, face, linesToCopy, lineOffset));
+                queue.emplace(new TransferJob(_gpuObject, sourceMip, targetMip, face, linesToCopy, lineOffset));
                lineOffset += linesToCopy;
            }
        }

        // queue up the sampler and populated mip change for after the transfer has completed
-        _pendingTransfers.emplace(new TransferJob(*this, [=] {
+        queue.emplace(new TransferJob([=] {
            _populatedMip = sourceMip;
            syncSampler();
        }));
--- a/tests/gpu/src/TextureTest.cpp
+++ b/tests/gpu/src/TextureTest.cpp
@ -25,13 +25,14 @@

 QTEST_MAIN(TextureTest)

-#define LOAD_TEXTURE_COUNT 40
+#define LOAD_TEXTURE_COUNT 100
+#define FAIL_AFTER_SECONDS 30

 static const QString TEST_DATA("https://hifi-public.s3.amazonaws.com/austin/test_data/test_ktx.zip");
 static const QString TEST_DIR_NAME("{630b8f02-52af-4cdf-a896-24e472b94b28}");
+static const QString KTX_TEST_DIR_ENV("HIFI_KTX_TEST_DIR");

 std::string vertexShaderSource = R"SHADER(
-#line 14
 layout(location = 0) out vec2 outTexCoord0;

 const vec4 VERTICES[] = vec4[](
@ -50,8 +51,6 @@ void main() {
 )SHADER";

 std::string fragmentShaderSource = R"SHADER(
-#line 28
-
 uniform sampler2D tex;

 layout(location = 0) in vec2 inTexCoord0;
@ -87,21 +86,29 @@ void TextureTest::initTestCase() {
    gpu::Context::init<gpu::gl::GLBackend>();
    _gpuContext = std::make_shared<gpu::Context>();

-    _resourcesPath = QStandardPaths::writableLocation(QStandardPaths::TempLocation) + "/" + TEST_DIR_NAME;
-    if (!QFileInfo(_resourcesPath).exists()) {
-        QDir(_resourcesPath).mkpath(".");
-        FileDownloader(TEST_DATA,
-                       [&](const QByteArray& data) {
-                           QTemporaryFile zipFile;
-                           if (zipFile.open()) {
-                               zipFile.write(data);
-                               zipFile.close();
-                           }
-                           JlCompress::extractDir(zipFile.fileName(), _resourcesPath);
-                       })
-            .waitForDownload();
+
+    if (QProcessEnvironment::systemEnvironment().contains(KTX_TEST_DIR_ENV)) {
+        // For local testing with larger data sets
+        _resourcesPath = QProcessEnvironment::systemEnvironment().value(KTX_TEST_DIR_ENV);
+    } else {
+        _resourcesPath = QStandardPaths::writableLocation(QStandardPaths::TempLocation) + "/" + TEST_DIR_NAME;
+        if (!QFileInfo(_resourcesPath).exists()) {
+            QDir(_resourcesPath).mkpath(".");
+            FileDownloader(TEST_DATA,
+                           [&](const QByteArray& data) {
+                               QTemporaryFile zipFile;
+                               if (zipFile.open()) {
+                                   zipFile.write(data);
+                                   zipFile.close();
+                               }
+                               JlCompress::extractDir(zipFile.fileName(), _resourcesPath);
+                           })
+                .waitForDownload();
+        }
    }

+    QVERIFY(!_resourcesPath.isEmpty());
+
    _canvas.makeCurrent();
    {
        auto VS = gpu::Shader::createVertex(vertexShaderSource);
@ -130,15 +137,7 @@ void TextureTest::initTestCase() {
        }
    }

-    // Load the test textures
-    {
-        size_t newTextureCount = std::min<size_t>(_textureFiles.size(), LOAD_TEXTURE_COUNT);
-        for (size_t i = 0; i < newTextureCount; ++i) {
-            const auto& textureFile = _textureFiles[i];
-            auto texture = gpu::Texture::unserialize(textureFile);
-            _textures.push_back(texture);
-        }
-    }
+    QVERIFY(!_textureFiles.empty());
 }

 void TextureTest::cleanupTestCase() {
@ -148,6 +147,18 @@ void TextureTest::cleanupTestCase() {
    _gpuContext.reset();
 }

+std::vector<gpu::TexturePointer> TextureTest::loadTestTextures() const {
+    // Load the test textures
+    std::vector<gpu::TexturePointer> result;
+    size_t newTextureCount = std::min<size_t>(_textureFiles.size(), LOAD_TEXTURE_COUNT);
+    for (size_t i = 0; i < newTextureCount; ++i) {
+        const auto& textureFile = _textureFiles[i];
+        auto texture = gpu::Texture::unserialize(textureFile);
+        result.push_back(texture);
+    }
+    return result;
+}
+
 void TextureTest::beginFrame() {
    _gpuContext->recycle();
    _gpuContext->beginFrame();
@ -169,116 +180,130 @@ void TextureTest::endFrame() {
    QThread::msleep(10);
 }

+
 void TextureTest::renderFrame(const std::function<void(gpu::Batch&)>& renderLambda) {
    beginFrame();
    gpu::doInBatch("Test::body", _gpuContext, renderLambda);
    endFrame();
+    ++_frameCount;
 }
+extern QString getTextureMemoryPressureModeString();

 void TextureTest::testTextureLoading() {
-    QVERIFY(_textures.size() > 0);
-    auto renderTexturesLamdba = [this](gpu::Batch& batch) {
-        batch.setPipeline(_pipeline);
-        for (const auto& texture : _textures) {
-            batch.setResourceTexture(0, texture);
-            batch.draw(gpu::TRIANGLE_STRIP, 4, 0);
+    QBENCHMARK{
+        _frameCount = 0;
+        auto textures = loadTestTextures();
+        QVERIFY(textures.size() > 0);
+        auto renderTexturesLamdba = [&](gpu::Batch& batch) {
+            batch.setPipeline(_pipeline);
+            for (const auto& texture : textures) {
+                batch.setResourceTexture(0, texture);
+                batch.draw(gpu::TRIANGLE_STRIP, 4, 0);
+            }
+        };
+
+        size_t expectedAllocation = 0;
+        for (const auto& texture : textures) {
+            expectedAllocation += texture->evalTotalSize();
        }
-    };
+        QVERIFY(textures.size() > 0);

-    size_t expectedAllocation = 0;
-    for (const auto& texture : _textures) {
-        expectedAllocation += texture->evalTotalSize();
+        auto reportLambda = [=] {
+            qDebug() << "Allowed   " << gpu::Texture::getAllowedGPUMemoryUsage();
+            qDebug() << "Allocated " << gpu::Context::getTextureResourceGPUMemSize();
+            qDebug() << "Populated " << gpu::Context::getTextureResourcePopulatedGPUMemSize();
+            qDebug() << "Pending   " << gpu::Context::getTexturePendingGPUTransferMemSize();
+            qDebug() << "State     " << getTextureMemoryPressureModeString();
+        };
+
+        auto allocatedMemory = gpu::Context::getTextureResourceGPUMemSize();
+        auto populatedMemory = gpu::Context::getTextureResourcePopulatedGPUMemSize();
+
+        // Cycle frames we're fully allocated
+        // We need to use the texture rendering lambda
+        auto lastReport = usecTimestampNow();
+        auto start = usecTimestampNow();
+        qDebug() << "Awaiting texture allocation";
+        while (expectedAllocation != allocatedMemory) {
+            doEvery(lastReport, 4, reportLambda);
+            failAfter(start, FAIL_AFTER_SECONDS, "Failed to allocate texture memory");
+            renderFrame(renderTexturesLamdba);
+            allocatedMemory = gpu::Context::getTextureResourceGPUMemSize();
+            populatedMemory = gpu::Context::getTextureResourcePopulatedGPUMemSize();
+        }
+        reportLambda();
+        QCOMPARE(allocatedMemory, expectedAllocation);
+
+        // Restart the timer
+        start = usecTimestampNow();
+        // Cycle frames we're fully populated
+        qDebug() << "Awaiting texture population";
+        while (allocatedMemory != populatedMemory || 0 != gpu::Context::getTexturePendingGPUTransferMemSize()) {
+            doEvery(lastReport, 4, reportLambda);
+            failAfter(start, FAIL_AFTER_SECONDS, "Failed to populate texture memory");
+            renderFrame();
+            allocatedMemory = gpu::Context::getTextureResourceGPUMemSize();
+            populatedMemory = gpu::Context::getTextureResourcePopulatedGPUMemSize();
+        }
+        reportLambda();
+        QCOMPARE(populatedMemory, allocatedMemory);
+        // FIXME workaround a race condition in the difference between populated size and the actual _populatedMip value in the texture
+        for (size_t i = 0; i < textures.size(); ++i) {
+            renderFrame();
+        }
+
+        // Test on-demand deallocation of memory
+        auto maxMemory = allocatedMemory / 2;
+        gpu::Texture::setAllowedGPUMemoryUsage(maxMemory);
+
+        // Restart the timer
+        start = usecTimestampNow();
+        // Cycle frames until the allocated memory is below the max memory
+        qDebug() << "Awaiting texture deallocation";
+        while (allocatedMemory > maxMemory || allocatedMemory != populatedMemory) {
+            doEvery(lastReport, 4, reportLambda);
+            failAfter(start, FAIL_AFTER_SECONDS, "Failed to deallocate texture memory");
+            renderFrame(renderTexturesLamdba);
+            allocatedMemory = gpu::Context::getTextureResourceGPUMemSize();
+            populatedMemory = gpu::Context::getTextureResourcePopulatedGPUMemSize();
+        }
+        reportLambda();
+
+        // Verify that the allocation is now below the target
+        QVERIFY(allocatedMemory <= maxMemory);
+        // Verify that populated memory is the same as allocated memory
+        QCOMPARE(populatedMemory, allocatedMemory);
+
+        // Restart the timer
+        start = usecTimestampNow();
+        // Reset the max memory to automatic
+        gpu::Texture::setAllowedGPUMemoryUsage(0);
+        // Cycle frames we're fully populated
+        qDebug() << "Awaiting texture reallocation and repopulation";
+        while (allocatedMemory != expectedAllocation || allocatedMemory != populatedMemory) {
+            doEvery(lastReport, 4, reportLambda);
+            failAfter(start, FAIL_AFTER_SECONDS, "Failed to populate texture memory");
+            renderFrame();
+            allocatedMemory = gpu::Context::getTextureResourceGPUMemSize();
+            populatedMemory = gpu::Context::getTextureResourcePopulatedGPUMemSize();
+        }
+        reportLambda();
+        QCOMPARE(allocatedMemory, expectedAllocation);
+        QCOMPARE(populatedMemory, allocatedMemory);
+
+        textures.clear();
+        // Cycle frames we're fully populated
+        qDebug() << "Awaiting texture deallocation";
+        while (allocatedMemory != 0) {
+            failAfter(start, FAIL_AFTER_SECONDS, "Failed to clear texture memory");
+            renderFrame();
+            allocatedMemory = gpu::Context::getTextureResourceGPUMemSize();
+            populatedMemory = gpu::Context::getTextureResourcePopulatedGPUMemSize();
+        }
+        reportLambda();
+        QCOMPARE(allocatedMemory, 0);
+        QCOMPARE(populatedMemory, 0);
+        qDebug() << "Test took " << _frameCount << "frame";
    }
-    QVERIFY(_textures.size() > 0);
-
-    auto reportLambda = [=] {
-        qDebug() << "Allowed   " << gpu::Texture::getAllowedGPUMemoryUsage();
-        qDebug() << "Allocated " << gpu::Context::getTextureResourceGPUMemSize();
-        qDebug() << "Populated " << gpu::Context::getTextureResourcePopulatedGPUMemSize();
-        qDebug() << "Pending   " << gpu::Context::getTexturePendingGPUTransferMemSize();
-    };
-
-    auto allocatedMemory = gpu::Context::getTextureResourceGPUMemSize();
-    auto populatedMemory = gpu::Context::getTextureResourcePopulatedGPUMemSize();
-
-    // Cycle frames we're fully allocated
-    // We need to use the texture rendering lambda
-    auto lastReport = usecTimestampNow();
-    auto start = usecTimestampNow();
-    while (expectedAllocation != allocatedMemory) {
-        doEvery(lastReport, 4, reportLambda);
-        failAfter(start, 10, "Failed to allocate texture memory after 10 seconds");
-        renderFrame(renderTexturesLamdba);
-        allocatedMemory = gpu::Context::getTextureResourceGPUMemSize();
-        populatedMemory = gpu::Context::getTextureResourcePopulatedGPUMemSize();
-    }
-    QCOMPARE(allocatedMemory, expectedAllocation);
-
-    // Restart the timer
-    start = usecTimestampNow();
-    // Cycle frames we're fully populated
-    while (allocatedMemory != populatedMemory || 0 != gpu::Context::getTexturePendingGPUTransferMemSize()) {
-        doEvery(lastReport, 4, reportLambda);
-        failAfter(start, 10, "Failed to populate texture memory after 10 seconds");
-        renderFrame();
-        allocatedMemory = gpu::Context::getTextureResourceGPUMemSize();
-        populatedMemory = gpu::Context::getTextureResourcePopulatedGPUMemSize();
-    }
-    reportLambda();
-    QCOMPARE(populatedMemory, allocatedMemory);
-
-    // FIXME workaround a race condition in the difference between populated size and the actual _populatedMip value in the texture
-    for (size_t i = 0; i < _textures.size(); ++i) {
-        renderFrame();
-    }
-
-    // Test on-demand deallocation of memory
-    auto maxMemory = allocatedMemory / 2;
-    gpu::Texture::setAllowedGPUMemoryUsage(maxMemory);
-
-    // Restart the timer
-    start = usecTimestampNow();
-    // Cycle frames until the allocated memory is below the max memory
-    while (allocatedMemory > maxMemory || allocatedMemory != populatedMemory) {
-        doEvery(lastReport, 4, reportLambda);
-        failAfter(start, 10, "Failed to deallocate texture memory after 10 seconds");
-        renderFrame(renderTexturesLamdba);
-        allocatedMemory = gpu::Context::getTextureResourceGPUMemSize();
-        populatedMemory = gpu::Context::getTextureResourcePopulatedGPUMemSize();
-    }
-    reportLambda();
-
-    // Verify that the allocation is now below the target
-    QVERIFY(allocatedMemory <= maxMemory);
-    // Verify that populated memory is the same as allocated memory
-    QCOMPARE(populatedMemory, allocatedMemory);
-
-    // Restart the timer
-    start = usecTimestampNow();
-    // Reset the max memory to automatic
-    gpu::Texture::setAllowedGPUMemoryUsage(0);
-    // Cycle frames we're fully populated
-    while (allocatedMemory != expectedAllocation || allocatedMemory != populatedMemory) {
-        doEvery(lastReport, 4, reportLambda);
-        failAfter(start, 10, "Failed to populate texture memory after 10 seconds");
-        renderFrame();
-        allocatedMemory = gpu::Context::getTextureResourceGPUMemSize();
-        populatedMemory = gpu::Context::getTextureResourcePopulatedGPUMemSize();
-    }
-    reportLambda();
-    QCOMPARE(allocatedMemory, expectedAllocation);
-    QCOMPARE(populatedMemory, allocatedMemory);
-
-    _textures.clear();
-    // Cycle frames we're fully populated
-    while (allocatedMemory != 0) {
-        failAfter(start, 10, "Failed to clear texture memory after 10 seconds");
-        renderFrame();
-        allocatedMemory = gpu::Context::getTextureResourceGPUMemSize();
-        populatedMemory = gpu::Context::getTextureResourcePopulatedGPUMemSize();
-    }
-    QCOMPARE(allocatedMemory, 0);
-    QCOMPARE(populatedMemory, 0);
    qDebug() << "Done";
-
 }
--- a/tests/gpu/src/TextureTest.h
+++ b/tests/gpu/src/TextureTest.h
@ -21,12 +21,15 @@ private:
    void beginFrame();
    void endFrame();
    void renderFrame(const std::function<void(gpu::Batch&)>& = [](gpu::Batch&) {});
+    std::vector<gpu::TexturePointer> loadTestTextures() const;
+

 private slots:
    void initTestCase();
    void cleanupTestCase();
    void testTextureLoading();

+
 private:
    QString _resourcesPath;
    OffscreenGLCanvas _canvas;
@ -36,5 +39,5 @@ private:
    gpu::TexturePointer _colorBuffer, _depthBuffer;
    const glm::uvec2 _size{ 640, 480 };
    std::vector<std::string> _textureFiles;
-    std::vector<gpu::TexturePointer> _textures;
+    size_t _frameCount { 0 };
 };