From 1f058f069e16976a35f39ed7d587fd0c92289e39 Mon Sep 17 00:00:00 2001 From: Brad Davis Date: Tue, 14 Feb 2017 17:58:41 -0800 Subject: [PATCH] First pass at new texture transfer logic --- libraries/gpu-gl/src/gpu/gl45/GL45Backend.h | 48 +++++- .../src/gpu/gl45/GL45BackendTexture.cpp | 24 ++- .../gpu/gl45/GL45BackendVariableTexture.cpp | 163 +++++++++++++++--- 3 files changed, 192 insertions(+), 43 deletions(-) diff --git a/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h b/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h index d2d17160ba..4f299d417f 100644 --- a/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h +++ b/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h @@ -14,6 +14,7 @@ #include "../gl/GLBackend.h" #include "../gl/GLTexture.h" +#include #define INCREMENTAL_TRANSFER 0 @@ -39,7 +40,7 @@ public: GL45Texture(const std::weak_ptr& backend, const Texture& texture); void generateMips() const override; void copyMipFaceFromTexture(uint16_t sourceMip, uint16_t targetMip, uint8_t face) const; - void copyMipFaceLinesFromTexture(uint16_t sourceMip, uint16_t targetMip, uint8_t face, uint32_t lineOffset, uint32_t lines, size_t dataOffset) const; + void copyMipFaceLinesFromTexture(uint16_t mip, uint8_t face, const uvec3& size, uint32_t yOffset, GLenum format, GLenum type, const void* sourcePointer) const; virtual void syncSampler() const; }; @@ -95,14 +96,50 @@ public: }; using QueuePair = std::pair; - class QueuePairLess { - public: + struct QueuePairLess { bool operator()(const QueuePair& a, const QueuePair& b) { return a.second < b.second; } }; using WorkQueue = std::priority_queue, QueuePairLess>; + class TransferJob { + using VoidLambda = std::function; + using VoidLambdaQueue = std::queue; + using ThreadPointer = std::shared_ptr; + const GL45VariableAllocationTexture& _parent; + const uint16_t _sourceMip; + const uint16_t _targetMip; + const uint8_t _face; + const uint32_t _lines; + const uint32_t _lineOffset; + // Holds the contents to transfer to the GPU in CPU memory + std::vector _buffer; + // Indicates if a transfer from backing storage to interal storage has started + bool _bufferingStarted { false }; + bool _transferOnly { false }; + bool _bufferingCompleted { false }; + VoidLambda _transferLambda; + VoidLambda _bufferingLambda; + static ThreadPointer _bufferThread; + static Mutex _mutex; + static VoidLambdaQueue _bufferLambdaQueue; + static std::atomic _shutdownBufferingThread; + static void bufferLoop(); + + public: + TransferJob(const GL45VariableAllocationTexture& parent, std::function transferLambda); + TransferJob(const GL45VariableAllocationTexture& parent, uint16_t sourceMip, uint16_t targetMip, uint8_t face, uint32_t lines = 0, uint32_t lineOffset = 0); + bool tryTransfer(); + static void startTransferLoop(); + static void stopTransferLoop(); + + private: + void startBuffering(); + void transfer(); + }; + + using TransferQueue = std::queue; static MemoryPressureState _memoryPressureState; protected: static std::atomic _memoryPressureStateStale; @@ -110,6 +147,7 @@ public: static WorkQueue _transferQueue; static WorkQueue _promoteQueue; static WorkQueue _demoteQueue; + static TexturePointer _currentTransferTexture; static const uvec3 INITIAL_MIP_TRANSFER_DIMENSIONS; @@ -128,7 +166,7 @@ public: bool canPromote() const { return _allocatedMip > 0; } bool canDemote() const { return _allocatedMip < _maxAllocatedMip; } bool hasPendingTransfers() const { return !_pendingTransfers.empty(); } - void executeNextTransfer(); + void executeNextTransfer(const TexturePointer& currentTexture); uint32 size() const override { return _size; } virtual void populateTransferQueue() = 0; virtual void promote() = 0; @@ -148,7 +186,7 @@ public: // Contains a series of lambdas that when executed will transfer data to the GPU, modify // the _populatedMip and update the sampler in order to fully populate the allocated texture // until _populatedMip == _allocatedMip - std::queue _pendingTransfers; + TransferQueue _pendingTransfers; }; class GL45ResourceTexture : public GL45VariableAllocationTexture { diff --git a/libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp b/libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp index c344b453a9..6dd1d6aea3 100644 --- a/libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp +++ b/libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp @@ -118,26 +118,17 @@ void GL45Texture::generateMips() const { (void)CHECK_GL_ERROR(); } -void GL45Texture::copyMipFaceLinesFromTexture(uint16_t sourceMip, uint16_t targetMip, uint8_t face, uint32_t lineOffset, uint32_t lines, size_t dataOffset) const { - const auto& texture = _gpuObject; - if (!texture.isStoredMipFaceAvailable(sourceMip)) { - return; - } - auto mipDimensions = texture.evalMipDimensions(sourceMip); - glm::uvec3 size = { mipDimensions.x, lines, mipDimensions.z }; - auto mipData = texture.accessStoredMipFace(sourceMip, face); - auto sourcePointer = mipData->readData() + dataOffset; - GLTexelFormat texelFormat = GLTexelFormat::evalGLTexelFormat(texture.getTexelFormat(), mipData->getFormat()); +void GL45Texture::copyMipFaceLinesFromTexture(uint16_t mip, uint8_t face, const uvec3& size, uint32_t yOffset, GLenum format, GLenum type, const void* sourcePointer) const { if (GL_TEXTURE_2D == _target) { - glTextureSubImage2D(_id, targetMip, 0, lineOffset, size.x, size.y, texelFormat.format, texelFormat.type, sourcePointer); + glTextureSubImage2D(_id, mip, 0, yOffset, size.x, size.y, format, type, sourcePointer); } else if (GL_TEXTURE_CUBE_MAP == _target) { // DSA ARB does not work on AMD, so use EXT // unless EXT is not available on the driver if (glTextureSubImage2DEXT) { auto target = GLTexture::CUBE_FACE_LAYOUT[face]; - glTextureSubImage2DEXT(_id, target, targetMip, 0, lineOffset, size.x, size.y, texelFormat.format, texelFormat.type, sourcePointer); + glTextureSubImage2DEXT(_id, target, mip, 0, yOffset, size.x, size.y, format, type, sourcePointer); } else { - glTextureSubImage3D(_id, targetMip, 0, lineOffset, face, size.x, size.y, 1, texelFormat.format, texelFormat.type, sourcePointer); + glTextureSubImage3D(_id, mip, 0, yOffset, face, size.x, size.y, 1, format, type, sourcePointer); } } else { Q_ASSERT(false); @@ -146,8 +137,13 @@ void GL45Texture::copyMipFaceLinesFromTexture(uint16_t sourceMip, uint16_t targe } void GL45Texture::copyMipFaceFromTexture(uint16_t sourceMip, uint16_t targetMip, uint8_t face) const { + if (!_gpuObject.isStoredMipFaceAvailable(sourceMip)) { + return; + } auto size = _gpuObject.evalMipDimensions(sourceMip); - copyMipFaceLinesFromTexture(sourceMip, targetMip, face, 0, size.y, 0); + auto mipData = _gpuObject.accessStoredMipFace(sourceMip, face); + GLTexelFormat texelFormat = GLTexelFormat::evalGLTexelFormat(_gpuObject.getTexelFormat(), mipData->getFormat()); + copyMipFaceLinesFromTexture(targetMip, face, size, 0, texelFormat.format, texelFormat.type, mipData->readData()); } void GL45Texture::syncSampler() const { diff --git a/libraries/gpu-gl/src/gpu/gl45/GL45BackendVariableTexture.cpp b/libraries/gpu-gl/src/gpu/gl45/GL45BackendVariableTexture.cpp index 597e35750a..e26a5c262f 100644 --- a/libraries/gpu-gl/src/gpu/gl45/GL45BackendVariableTexture.cpp +++ b/libraries/gpu-gl/src/gpu/gl45/GL45BackendVariableTexture.cpp @@ -39,6 +39,7 @@ const uvec3 GL45VariableAllocationTexture::INITIAL_MIP_TRANSFER_DIMENSIONS { 64, WorkQueue GL45VariableAllocationTexture::_transferQueue; WorkQueue GL45VariableAllocationTexture::_promoteQueue; WorkQueue GL45VariableAllocationTexture::_demoteQueue; +TexturePointer GL45VariableAllocationTexture::_currentTransferTexture; #define OVERSUBSCRIBED_PRESSURE_VALUE 0.95f #define UNDERSUBSCRIBED_PRESSURE_VALUE 0.85f @@ -46,6 +47,123 @@ WorkQueue GL45VariableAllocationTexture::_demoteQueue; static const size_t DEFAULT_ALLOWED_TEXTURE_MEMORY = MB_TO_BYTES(DEFAULT_ALLOWED_TEXTURE_MEMORY_MB); +using TransferJob = GL45VariableAllocationTexture::TransferJob; + +static const uvec3 MAX_TRANSFER_DIMENSIONS { 1024, 1024, 1 }; +static const size_t MAX_TRANSFER_SIZE = MAX_TRANSFER_DIMENSIONS.x * MAX_TRANSFER_DIMENSIONS.y * 4; + +std::shared_ptr TransferJob::_bufferThread { nullptr }; +std::atomic TransferJob::_shutdownBufferingThread { false }; +Mutex TransferJob::_mutex; +TransferJob::VoidLambdaQueue TransferJob::_bufferLambdaQueue; + +void TransferJob::startTransferLoop() { + if (_bufferThread) { + return; + } + _shutdownBufferingThread = false; + _bufferThread = std::make_shared([] { + TransferJob::bufferLoop(); + }); +} + +void TransferJob::stopTransferLoop() { + if (!_bufferThread) { + return; + } + _shutdownBufferingThread = true; + _bufferThread->join(); + _bufferThread.reset(); + _shutdownBufferingThread = false; +} + +TransferJob::TransferJob(const GL45VariableAllocationTexture& parent, uint16_t sourceMip, uint16_t targetMip, uint8_t face, uint32_t lines, uint32_t lineOffset) + : _parent(parent), _sourceMip(sourceMip), _targetMip(targetMip), _face(face), _lines(lines), _lineOffset(lineOffset) { + + if (0 == lines) { + _bufferingLambda = [this] { + auto mipData = _parent._gpuObject.accessStoredMipFace(_sourceMip, _face); + auto size = mipData->getSize(); + _buffer.resize(size); + memcpy(&_buffer[0], mipData->readData(), size); + _bufferingCompleted = true; + }; + + } else { + _bufferingLambda = [this] { + auto mipData = _parent._gpuObject.accessStoredMipFace(_sourceMip, _face); + auto dimensions = _parent._gpuObject.evalMipDimensions(_sourceMip); + auto mipSize = mipData->getSize(); + auto bytesPerLine = (uint32_t)mipSize / dimensions.y; + auto transferSize = bytesPerLine * _lines; + auto sourceOffset = bytesPerLine * _lineOffset; + _buffer.resize(transferSize); + memcpy(&_buffer[0], mipData->readData() + sourceOffset, transferSize); + _bufferingCompleted = true; + }; + } + + _transferLambda = [this] { + auto mipData = _parent._gpuObject.accessStoredMipFace(_sourceMip, _face); + auto dimensions = _parent._gpuObject.evalMipDimensions(_sourceMip); + GLTexelFormat texelFormat = GLTexelFormat::evalGLTexelFormat(_parent._gpuObject.getTexelFormat(), mipData->getFormat()); + _parent.copyMipFaceLinesFromTexture(_targetMip, _face, dimensions, _lineOffset, texelFormat.format, texelFormat.type, &_buffer[0]); + _buffer.swap(std::vector()); + }; +} + +TransferJob::TransferJob(const GL45VariableAllocationTexture& parent, std::function transferLambda) + : _parent(parent), _sourceMip(0), _targetMip(0), _face(0), _lines(0), _lineOffset(0), _bufferingCompleted(true), _transferLambda(transferLambda) { + if (!_bufferThread) { + _bufferThread = std::make_shared([] { + TransferJob::bufferLoop(); + }); + } +} + +bool TransferJob::tryTransfer() { + // Are we ready to transfer + if (_bufferingCompleted) { + _transferLambda(); + return true; + } + + startBuffering(); + return false; +} + +void TransferJob::startBuffering() { + if (_bufferingStarted) { + return; + } + _bufferingStarted = true; + { + Lock lock(_mutex); + _bufferLambdaQueue.push(_bufferingLambda); + } +} + +void TransferJob::bufferLoop() { + while (!_shutdownBufferingThread) { + VoidLambdaQueue workingQueue; + { + Lock lock(_mutex); + _bufferLambdaQueue.swap(workingQueue); + } + + if (workingQueue.empty()) { + QThread::msleep(5); + continue; + } + + while (!workingQueue.empty()) { + workingQueue.front()(); + workingQueue.pop(); + } + } +} + + void GL45VariableAllocationTexture::addMemoryManagedTexture(const TexturePointer& texturePointer) { _memoryManagedTextures.push_back(texturePointer); addToWorkQueue(texturePointer); @@ -190,7 +308,14 @@ void GL45VariableAllocationTexture::updateMemoryPressure() { } if (newState != _memoryPressureState) { + if (MemoryPressureState::Transfer == _memoryPressureState) { + TransferJob::stopTransferLoop(); + } _memoryPressureState = newState; + if (MemoryPressureState::Transfer == _memoryPressureState) { + TransferJob::startTransferLoop(); + } + // Clear the existing queue _transferQueue = WorkQueue(); _promoteQueue = WorkQueue(); @@ -223,20 +348,17 @@ void GL45VariableAllocationTexture::processWorkQueues() { if (!object->canDemote()) { continue; } - //qDebug() << "QQQ executing demote for " << texture->source().c_str(); object->demote(); } else if (MemoryPressureState::Undersubscribed == _memoryPressureState) { if (!object->canPromote()) { continue; } - //qDebug() << "QQQ executing promote for " << texture->source().c_str(); object->promote(); } else if (MemoryPressureState::Transfer == _memoryPressureState) { if (!object->hasPendingTransfers()) { continue; } - //qDebug() << "QQQ executing transfer for " << texture->source().c_str(); - object->executeNextTransfer(); + object->executeNextTransfer(texture); } else { Q_UNREACHABLE(); } @@ -265,10 +387,14 @@ GL45VariableAllocationTexture::~GL45VariableAllocationTexture() { Backend::updateTextureGPUMemoryUsage(_size, 0); } -void GL45VariableAllocationTexture::executeNextTransfer() { +void GL45VariableAllocationTexture::executeNextTransfer(const TexturePointer& currentTexture) { if (!_pendingTransfers.empty()) { - _pendingTransfers.front()(); - _pendingTransfers.pop(); + // Keeping hold of a strong pointer during the transfer ensures that the transfer thread cannot try to access a destroyed texture + _currentTransferTexture = currentTexture; + if (_pendingTransfers.front().tryTransfer()) { + _pendingTransfers.pop(); + _currentTransferTexture.reset(); + } } } @@ -394,17 +520,15 @@ void GL45ResourceTexture::demote() { populateTransferQueue(); } + void GL45ResourceTexture::populateTransferQueue() { PROFILE_RANGE(render_gpu_gl, __FUNCTION__); - _pendingTransfers = std::queue(); if (_populatedMip <= _allocatedMip) { return; } + _pendingTransfers = TransferQueue(); - static const uvec3 MAX_TRANSFER_DIMENSIONS { 1024, 1024, 1 }; - static const size_t MAX_TRANSFER_SIZE = MAX_TRANSFER_DIMENSIONS.x * MAX_TRANSFER_DIMENSIONS.y * 4; const uint8_t maxFace = GLTexture::getFaceCount(_target); - uint16_t sourceMip = _populatedMip; do { --sourceMip; @@ -418,11 +542,7 @@ void GL45ResourceTexture::populateTransferQueue() { // If the mip is less than the max transfer size, then just do it in one transfer if (glm::all(glm::lessThanEqual(mipDimensions, MAX_TRANSFER_DIMENSIONS))) { // Can the mip be transferred in one go - _pendingTransfers.push([=] { - Q_ASSERT(sourceMip >= _allocatedMip); - // FIXME modify the copy mechanism to be incremental - copyMipFaceFromTexture(sourceMip, targetMip, face); - }); + _pendingTransfers.emplace(*this, sourceMip, targetMip, face); continue; } @@ -433,24 +553,19 @@ void GL45ResourceTexture::populateTransferQueue() { auto bytesPerLine = (uint32_t)mipData->getSize() / lines; Q_ASSERT(0 == (mipData->getSize() % lines)); uint32_t linesPerTransfer = (uint32_t)(MAX_TRANSFER_SIZE / bytesPerLine); - size_t offset = 0; uint32_t lineOffset = 0; while (lineOffset < lines) { uint32_t linesToCopy = std::min(lines - lineOffset, linesPerTransfer); - uvec3 size { mipDimensions.x, linesToCopy, 1 }; - _pendingTransfers.push([=] { - copyMipFaceLinesFromTexture(sourceMip, targetMip, face, lineOffset, linesToCopy, offset); - }); + _pendingTransfers.emplace(TransferJob(*this, sourceMip, targetMip, face, linesToCopy, lineOffset)); lineOffset += linesToCopy; - offset += (linesToCopy * bytesPerLine); } } // queue up the sampler and populated mip change for after the transfer has completed - _pendingTransfers.push([=] { + _pendingTransfers.emplace(TransferJob(*this, [=] { _populatedMip = sourceMip; syncSampler(); - }); + })); } while (sourceMip != _allocatedMip); }