First pass at new texture transfer logic

2025-04-20 03:44:02 +02:00 · 2017-02-14 17:58:41 -08:00 · 2017-02-14 17:58:41 -08:00 · 1f058f069e
commit 1f058f069e
parent 75c17e89a2
3 changed files with 192 additions and 43 deletions
--- a/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h
+++ b/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h
@ -14,6 +14,7 @@

 #include "../gl/GLBackend.h"
 #include "../gl/GLTexture.h"
+#include <thread>

 #define INCREMENTAL_TRANSFER 0

@ -39,7 +40,7 @@ public:
        GL45Texture(const std::weak_ptr<GLBackend>& backend, const Texture& texture);
        void generateMips() const override;
        void copyMipFaceFromTexture(uint16_t sourceMip, uint16_t targetMip, uint8_t face) const;
-        void copyMipFaceLinesFromTexture(uint16_t sourceMip, uint16_t targetMip, uint8_t face, uint32_t lineOffset, uint32_t lines, size_t dataOffset) const;
+        void copyMipFaceLinesFromTexture(uint16_t mip, uint8_t face, const uvec3& size, uint32_t yOffset, GLenum format, GLenum type, const void* sourcePointer) const;
        virtual void syncSampler() const;
    };

@ -95,14 +96,50 @@ public:
        };

        using QueuePair = std::pair<TextureWeakPointer, float>;
-        class QueuePairLess {
-        public:
+        struct QueuePairLess {
            bool operator()(const QueuePair& a, const QueuePair& b) {
                return a.second < b.second;
            }
        };
        using WorkQueue = std::priority_queue<QueuePair, std::vector<QueuePair>, QueuePairLess>;

+        class TransferJob {
+            using VoidLambda = std::function<void()>;
+            using VoidLambdaQueue = std::queue<VoidLambda>;
+            using ThreadPointer = std::shared_ptr<std::thread>;
+            const GL45VariableAllocationTexture& _parent;
+            const uint16_t _sourceMip;
+            const uint16_t _targetMip;
+            const uint8_t _face;
+            const uint32_t _lines;
+            const uint32_t _lineOffset;
+            // Holds the contents to transfer to the GPU in CPU memory
+            std::vector<uint8_t> _buffer;
+            // Indicates if a transfer from backing storage to interal storage has started
+            bool _bufferingStarted { false };
+            bool _transferOnly { false };
+            bool _bufferingCompleted { false };
+            VoidLambda _transferLambda;
+            VoidLambda _bufferingLambda;
+            static ThreadPointer _bufferThread;
+            static Mutex _mutex;
+            static VoidLambdaQueue _bufferLambdaQueue;
+            static std::atomic<bool> _shutdownBufferingThread;
+            static void bufferLoop();
+
+        public:
+            TransferJob(const GL45VariableAllocationTexture& parent, std::function<void()> transferLambda);
+            TransferJob(const GL45VariableAllocationTexture& parent, uint16_t sourceMip, uint16_t targetMip, uint8_t face, uint32_t lines = 0, uint32_t lineOffset = 0);
+            bool tryTransfer();
+            static void startTransferLoop();
+            static void stopTransferLoop();
+
+        private:
+            void startBuffering();
+            void transfer();
+        };
+
+        using TransferQueue = std::queue<TransferJob>;
        static MemoryPressureState _memoryPressureState;
    protected:
        static std::atomic<bool> _memoryPressureStateStale;
@ -110,6 +147,7 @@ public:
        static WorkQueue _transferQueue;
        static WorkQueue _promoteQueue;
        static WorkQueue _demoteQueue;
+        static TexturePointer _currentTransferTexture;
        static const uvec3 INITIAL_MIP_TRANSFER_DIMENSIONS;


@ -128,7 +166,7 @@ public:
        bool canPromote() const { return _allocatedMip > 0; }
        bool canDemote() const { return _allocatedMip < _maxAllocatedMip; }
        bool hasPendingTransfers() const { return !_pendingTransfers.empty(); }
-        void executeNextTransfer();
+        void executeNextTransfer(const TexturePointer& currentTexture);
        uint32 size() const override { return _size; }
        virtual void populateTransferQueue() = 0;
        virtual void promote() = 0;
@ -148,7 +186,7 @@ public:
        // Contains a series of lambdas that when executed will transfer data to the GPU, modify 
        // the _populatedMip and update the sampler in order to fully populate the allocated texture 
        // until _populatedMip == _allocatedMip
-        std::queue<PromoteLambda> _pendingTransfers;
+        TransferQueue _pendingTransfers;
    };

    class GL45ResourceTexture : public GL45VariableAllocationTexture {
--- a/libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp
+++ b/libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp
@ -118,26 +118,17 @@ void GL45Texture::generateMips() const {
    (void)CHECK_GL_ERROR();
 }

-void GL45Texture::copyMipFaceLinesFromTexture(uint16_t sourceMip, uint16_t targetMip, uint8_t face, uint32_t lineOffset, uint32_t lines, size_t dataOffset) const {
-    const auto& texture = _gpuObject;
-    if (!texture.isStoredMipFaceAvailable(sourceMip)) {
-        return;
-    }
-    auto mipDimensions = texture.evalMipDimensions(sourceMip);
-    glm::uvec3 size = { mipDimensions.x, lines, mipDimensions.z };
-    auto mipData = texture.accessStoredMipFace(sourceMip, face);
-    auto sourcePointer = mipData->readData() + dataOffset;
-    GLTexelFormat texelFormat = GLTexelFormat::evalGLTexelFormat(texture.getTexelFormat(), mipData->getFormat());
+void GL45Texture::copyMipFaceLinesFromTexture(uint16_t mip, uint8_t face, const uvec3& size, uint32_t yOffset, GLenum format, GLenum type, const void* sourcePointer) const {
    if (GL_TEXTURE_2D == _target) {
-        glTextureSubImage2D(_id, targetMip, 0, lineOffset, size.x, size.y, texelFormat.format, texelFormat.type, sourcePointer);
+        glTextureSubImage2D(_id, mip, 0, yOffset, size.x, size.y, format, type, sourcePointer);
    } else if (GL_TEXTURE_CUBE_MAP == _target) {
        // DSA ARB does not work on AMD, so use EXT
        // unless EXT is not available on the driver
        if (glTextureSubImage2DEXT) {
            auto target = GLTexture::CUBE_FACE_LAYOUT[face];
-            glTextureSubImage2DEXT(_id, target, targetMip, 0, lineOffset, size.x, size.y, texelFormat.format, texelFormat.type, sourcePointer);
+            glTextureSubImage2DEXT(_id, target, mip, 0, yOffset, size.x, size.y, format, type, sourcePointer);
        } else {
-            glTextureSubImage3D(_id, targetMip, 0, lineOffset, face, size.x, size.y, 1, texelFormat.format, texelFormat.type, sourcePointer);
+            glTextureSubImage3D(_id, mip, 0, yOffset, face, size.x, size.y, 1, format, type, sourcePointer);
        }
    } else {
        Q_ASSERT(false);
@ -146,8 +137,13 @@ void GL45Texture::copyMipFaceLinesFromTexture(uint16_t sourceMip, uint16_t targe
 }

 void GL45Texture::copyMipFaceFromTexture(uint16_t sourceMip, uint16_t targetMip, uint8_t face) const {
+    if (!_gpuObject.isStoredMipFaceAvailable(sourceMip)) {
+        return;
+    }
    auto size = _gpuObject.evalMipDimensions(sourceMip);
-    copyMipFaceLinesFromTexture(sourceMip, targetMip, face, 0, size.y, 0);
+    auto mipData = _gpuObject.accessStoredMipFace(sourceMip, face);
+    GLTexelFormat texelFormat = GLTexelFormat::evalGLTexelFormat(_gpuObject.getTexelFormat(), mipData->getFormat());
+    copyMipFaceLinesFromTexture(targetMip, face, size, 0, texelFormat.format, texelFormat.type, mipData->readData());
 }

 void GL45Texture::syncSampler() const {
--- a/libraries/gpu-gl/src/gpu/gl45/GL45BackendVariableTexture.cpp
+++ b/libraries/gpu-gl/src/gpu/gl45/GL45BackendVariableTexture.cpp
@ -39,6 +39,7 @@ const uvec3 GL45VariableAllocationTexture::INITIAL_MIP_TRANSFER_DIMENSIONS { 64,
 WorkQueue GL45VariableAllocationTexture::_transferQueue;
 WorkQueue GL45VariableAllocationTexture::_promoteQueue;
 WorkQueue GL45VariableAllocationTexture::_demoteQueue;
+TexturePointer GL45VariableAllocationTexture::_currentTransferTexture;

 #define OVERSUBSCRIBED_PRESSURE_VALUE 0.95f
 #define UNDERSUBSCRIBED_PRESSURE_VALUE 0.85f
@ -46,6 +47,123 @@ WorkQueue GL45VariableAllocationTexture::_demoteQueue;

 static const size_t DEFAULT_ALLOWED_TEXTURE_MEMORY = MB_TO_BYTES(DEFAULT_ALLOWED_TEXTURE_MEMORY_MB);

+using TransferJob = GL45VariableAllocationTexture::TransferJob;
+
+static const uvec3 MAX_TRANSFER_DIMENSIONS { 1024, 1024, 1 };
+static const size_t MAX_TRANSFER_SIZE = MAX_TRANSFER_DIMENSIONS.x * MAX_TRANSFER_DIMENSIONS.y * 4;
+
+std::shared_ptr<std::thread> TransferJob::_bufferThread { nullptr };
+std::atomic<bool> TransferJob::_shutdownBufferingThread { false };
+Mutex TransferJob::_mutex;
+TransferJob::VoidLambdaQueue TransferJob::_bufferLambdaQueue;
+
+void TransferJob::startTransferLoop() {
+    if (_bufferThread) {
+        return;
+    }
+    _shutdownBufferingThread = false;
+    _bufferThread = std::make_shared<std::thread>([] {
+        TransferJob::bufferLoop();
+    });
+}
+
+void TransferJob::stopTransferLoop() {
+    if (!_bufferThread) {
+        return;
+    }
+    _shutdownBufferingThread = true;
+    _bufferThread->join();
+    _bufferThread.reset();
+    _shutdownBufferingThread = false;
+}
+
+TransferJob::TransferJob(const GL45VariableAllocationTexture& parent, uint16_t sourceMip, uint16_t targetMip, uint8_t face, uint32_t lines, uint32_t lineOffset)
+    : _parent(parent), _sourceMip(sourceMip), _targetMip(targetMip), _face(face), _lines(lines), _lineOffset(lineOffset) {
+
+    if (0 == lines) {
+        _bufferingLambda = [this] {
+            auto mipData = _parent._gpuObject.accessStoredMipFace(_sourceMip, _face);
+            auto size = mipData->getSize();
+            _buffer.resize(size);
+            memcpy(&_buffer[0], mipData->readData(), size);
+            _bufferingCompleted = true;
+        };
+
+    } else {
+        _bufferingLambda = [this] {
+            auto mipData = _parent._gpuObject.accessStoredMipFace(_sourceMip, _face);
+            auto dimensions = _parent._gpuObject.evalMipDimensions(_sourceMip);
+            auto mipSize = mipData->getSize();
+            auto bytesPerLine = (uint32_t)mipSize / dimensions.y;
+            auto transferSize = bytesPerLine * _lines;
+            auto sourceOffset = bytesPerLine * _lineOffset;
+            _buffer.resize(transferSize);
+            memcpy(&_buffer[0], mipData->readData() + sourceOffset, transferSize);
+            _bufferingCompleted = true;
+        };
+    }
+
+    _transferLambda = [this] {
+        auto mipData = _parent._gpuObject.accessStoredMipFace(_sourceMip, _face);
+        auto dimensions = _parent._gpuObject.evalMipDimensions(_sourceMip);
+        GLTexelFormat texelFormat = GLTexelFormat::evalGLTexelFormat(_parent._gpuObject.getTexelFormat(), mipData->getFormat());
+        _parent.copyMipFaceLinesFromTexture(_targetMip, _face, dimensions, _lineOffset, texelFormat.format, texelFormat.type, &_buffer[0]);
+        _buffer.swap(std::vector<uint8_t>());
+    };
+}
+
+TransferJob::TransferJob(const GL45VariableAllocationTexture& parent, std::function<void()> transferLambda)
+    : _parent(parent), _sourceMip(0), _targetMip(0), _face(0), _lines(0), _lineOffset(0), _bufferingCompleted(true), _transferLambda(transferLambda) {
+    if (!_bufferThread) {
+        _bufferThread = std::make_shared<std::thread>([] {
+            TransferJob::bufferLoop();
+        });
+    }
+}
+
+bool TransferJob::tryTransfer() {
+    // Are we ready to transfer
+    if (_bufferingCompleted) {
+        _transferLambda();
+        return true;
+    }
+
+    startBuffering();
+    return false;
+}
+
+void TransferJob::startBuffering() {
+    if (_bufferingStarted) {
+        return;
+    }
+    _bufferingStarted = true;
+    {
+        Lock lock(_mutex);
+        _bufferLambdaQueue.push(_bufferingLambda);
+    }
+}
+
+void TransferJob::bufferLoop() {
+    while (!_shutdownBufferingThread) {
+        VoidLambdaQueue workingQueue;
+        {
+            Lock lock(_mutex);
+            _bufferLambdaQueue.swap(workingQueue);
+        }
+
+        if (workingQueue.empty()) {
+            QThread::msleep(5);
+            continue;
+        }
+
+        while (!workingQueue.empty()) {
+            workingQueue.front()();
+            workingQueue.pop();
+        }
+    }
+}
+
+
 void GL45VariableAllocationTexture::addMemoryManagedTexture(const TexturePointer& texturePointer) {
    _memoryManagedTextures.push_back(texturePointer);
    addToWorkQueue(texturePointer);
@ -190,7 +308,14 @@ void GL45VariableAllocationTexture::updateMemoryPressure() {
    }

    if (newState != _memoryPressureState) {
+        if (MemoryPressureState::Transfer == _memoryPressureState) {
+            TransferJob::stopTransferLoop();
+        }
        _memoryPressureState = newState;
+        if (MemoryPressureState::Transfer == _memoryPressureState) {
+            TransferJob::startTransferLoop();
+        }
+
        // Clear the existing queue
        _transferQueue = WorkQueue();
        _promoteQueue = WorkQueue();
@ -223,20 +348,17 @@ void GL45VariableAllocationTexture::processWorkQueues() {
            if (!object->canDemote()) {
                continue;
            }
-            //qDebug() << "QQQ executing demote for " << texture->source().c_str();
            object->demote();
        } else if (MemoryPressureState::Undersubscribed == _memoryPressureState) {
            if (!object->canPromote()) {
                continue;
            }
-            //qDebug() << "QQQ executing promote for " << texture->source().c_str();
            object->promote();
        } else if (MemoryPressureState::Transfer == _memoryPressureState) {
            if (!object->hasPendingTransfers()) {
                continue;
            }
-            //qDebug() << "QQQ executing transfer for " << texture->source().c_str();
-            object->executeNextTransfer();
+            object->executeNextTransfer(texture);
        } else {
            Q_UNREACHABLE();
        }
@ -265,10 +387,14 @@ GL45VariableAllocationTexture::~GL45VariableAllocationTexture() {
    Backend::updateTextureGPUMemoryUsage(_size, 0);
 }

-void GL45VariableAllocationTexture::executeNextTransfer() {
+void GL45VariableAllocationTexture::executeNextTransfer(const TexturePointer& currentTexture) {
    if (!_pendingTransfers.empty()) {
-        _pendingTransfers.front()();
-        _pendingTransfers.pop();
+        // Keeping hold of a strong pointer during the transfer ensures that the transfer thread cannot try to access a destroyed texture
+        _currentTransferTexture = currentTexture;
+        if (_pendingTransfers.front().tryTransfer()) {
+            _pendingTransfers.pop();
+            _currentTransferTexture.reset();
+        }
    }
 }

@ -394,17 +520,15 @@ void GL45ResourceTexture::demote() {
    populateTransferQueue();
 }

+
 void GL45ResourceTexture::populateTransferQueue() {
    PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
-    _pendingTransfers = std::queue<PromoteLambda>();
    if (_populatedMip <= _allocatedMip) {
        return;
    }
+    _pendingTransfers = TransferQueue();

-    static const uvec3 MAX_TRANSFER_DIMENSIONS { 1024, 1024, 1 };
-    static const size_t MAX_TRANSFER_SIZE = MAX_TRANSFER_DIMENSIONS.x * MAX_TRANSFER_DIMENSIONS.y * 4;
    const uint8_t maxFace = GLTexture::getFaceCount(_target);
-
    uint16_t sourceMip = _populatedMip;
    do {
        --sourceMip;
@ -418,11 +542,7 @@ void GL45ResourceTexture::populateTransferQueue() {
            // If the mip is less than the max transfer size, then just do it in one transfer
            if (glm::all(glm::lessThanEqual(mipDimensions, MAX_TRANSFER_DIMENSIONS))) {
                // Can the mip be transferred in one go
-                _pendingTransfers.push([=] {
-                    Q_ASSERT(sourceMip >= _allocatedMip);
-                    // FIXME modify the copy mechanism to be incremental
-                    copyMipFaceFromTexture(sourceMip, targetMip, face);
-                });
+                _pendingTransfers.emplace(*this, sourceMip, targetMip, face);
                continue;
            }

@ -433,24 +553,19 @@ void GL45ResourceTexture::populateTransferQueue() {
            auto bytesPerLine = (uint32_t)mipData->getSize() / lines;
            Q_ASSERT(0 == (mipData->getSize() % lines));
            uint32_t linesPerTransfer = (uint32_t)(MAX_TRANSFER_SIZE / bytesPerLine);
-            size_t offset = 0;
            uint32_t lineOffset = 0;
            while (lineOffset < lines) {
                uint32_t linesToCopy = std::min<uint32_t>(lines - lineOffset, linesPerTransfer);
-                uvec3 size { mipDimensions.x, linesToCopy, 1 };
-                _pendingTransfers.push([=] {
-                    copyMipFaceLinesFromTexture(sourceMip, targetMip, face, lineOffset, linesToCopy, offset);
-                });
+                _pendingTransfers.emplace(TransferJob(*this, sourceMip, targetMip, face, linesToCopy, lineOffset));
                lineOffset += linesToCopy;
-                offset += (linesToCopy * bytesPerLine);
            }
        }

        // queue up the sampler and populated mip change for after the transfer has completed
-        _pendingTransfers.push([=] {
+        _pendingTransfers.emplace(TransferJob(*this, [=] {
            _populatedMip = sourceMip;
            syncSampler();
-        });
+        }));
    } while (sourceMip != _allocatedMip);
 }