// // Created by Bradley Austin Davis on 2016/05/15 // Copyright 2013-2016 High Fidelity, Inc. // // Distributed under the Apache License, Version 2.0. // See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html // #include "GLTexture.h" #include #include #include "GLBackend.h" #define OVERSUBSCRIBED_PRESSURE_VALUE 0.95f #define UNDERSUBSCRIBED_PRESSURE_VALUE 0.85f #define DEFAULT_ALLOWED_TEXTURE_MEMORY_MB ((size_t)1024) #define MAX_RESOURCE_TEXTURES_PER_FRAME 2 #define NO_BUFFER_WORK_SLEEP_TIME_MS 2 #define THREADED_TEXTURE_BUFFERING 1 static const size_t DEFAULT_ALLOWED_TEXTURE_MEMORY = MB_TO_BYTES(DEFAULT_ALLOWED_TEXTURE_MEMORY_MB); namespace gpu { namespace gl { enum class MemoryPressureState { Idle, Transfer, Undersubscribed, }; static MemoryPressureState _memoryPressureState{ MemoryPressureState::Idle }; template struct LessPairSecond { bool operator()(const T& a, const T& b) { return a.second < b.second; } }; using QueuePair = std::pair; // Contains a priority sorted list of textures on which work is to be done over many frames // Uses a weak pointer to the texture to avoid keeping it in scope if the client stops using it using WorkQueue = std::priority_queue, LessPairSecond>; using ImmediateQueuePair = std::pair; // Contains a priority sorted list of textures on which work is to be done in the current frame using ImmediateWorkQueue = std::priority_queue, LessPairSecond>; // A map of weak texture pointers to queues of work to be done to transfer their data from the backing store to the GPU using TransferMap = std::map>; class GLTextureTransferEngineDefault : public GLTextureTransferEngine { using Parent = GLTextureTransferEngine; public: // Called once per frame by the GLBackend to manage texture memory // Will deallocate textures if oversubscribed, void manageMemory() override; void shutdown() override; protected: class TextureBufferThread : public QThread { public: TextureBufferThread(GLTextureTransferEngineDefault& parent) : _parent(parent) { start(); } protected: void run() override { while (!_parent._shutdown) { if (!_parent.processActiveBufferQueue()) { QThread::msleep(NO_BUFFER_WORK_SLEEP_TIME_MS); } } } GLTextureTransferEngineDefault& _parent; }; using ActiveTransferJob = std::pair; using ActiveTransferQueue = std::list; void populateActiveBufferQueue(); bool processActiveBufferQueue(); void processTransferQueues(); void populateTransferQueue(const TexturePointer& texturePointer); //void addToWorkQueue(const TexturePointer& texturePointer); void updateMemoryPressure(); void processDemotes(size_t relief, const std::vector& strongTextures); void processPromotes(); private: std::atomic _shutdown{ false }; // Contains a priority sorted list of weak texture pointers that have been determined to be eligible for additional allocation // While the memory state is 'undersubscribed', items will be removed from this list and processed, allocating additional memory // per frame WorkQueue _promoteQueue; // This queue contains jobs that will buffer data from the texture backing store (ideally a memory mapped KTX file) // to a CPU memory buffer. This queue is populated on the main GPU thread, and drained on a dedicated thread. // When an item on the _activeBufferQueue is completed it is put into the _activeTransferQueue ActiveTransferQueue _activeBufferQueue; // This queue contains jobs that will upload data from a CPU buffer into a GPU. This queue is populated on the background // thread that process the _activeBufferQueue and drained on the main GPU thread ActiveTransferQueue _activeTransferQueue; // Mutex protecting the _activeTransferQueue & _activeBufferQueue since they are each accessed both from the main GPU thread // and the buffering thread Mutex _bufferMutex; // The buffering thread which drains the _activeBufferQueue and populates the _activeTransferQueue TextureBufferThread* _transferThread{ nullptr }; // The amount of buffering work currently represented by the _activeBufferQueue std::atomic _queuedBufferSize{ 0 }; // This contains a map of all textures to queues of pending transfer jobs. While in the transfer state, this map is used to // populate the _activeBufferQueue up to the limit specified in GLVariableAllocationTexture::MAX_BUFFER_SIZE TransferMap _pendingTransfersMap; }; }} // namespace gpu::gl using namespace gpu; using namespace gpu::gl; void GLBackend::initTextureManagementStage() { _textureManagement._transferEngine = std::make_shared(); } void GLBackend::killTextureManagementStage() { _textureManagement._transferEngine->shutdown(); _textureManagement._transferEngine.reset(); } std::vector GLTextureTransferEngine::getAllTextures() { std::vector result; result.reserve(_registeredTextures.size()); std::remove_if(_registeredTextures.begin(), _registeredTextures.end(), [&](const std::weak_ptr& weak)->bool { auto strong = weak.lock(); bool strongResult = strong.operator bool(); if (strongResult) { result.push_back(strong); } return strongResult; }); return result; } void GLTextureTransferEngine::addMemoryManagedTexture(const TexturePointer& texturePointer) { ++_frameTexturesCreated; _registeredTextures.push_back(texturePointer); } void GLTextureTransferEngineDefault::shutdown() { _shutdown = true; #if THREADED_TEXTURE_BUFFERING if (_transferThread) { _transferThread->wait(); delete _transferThread; _transferThread = nullptr; } #endif } void GLTextureTransferEngineDefault::manageMemory() { PROFILE_RANGE(render_gpu_gl, __FUNCTION__); // reset the count used to limit the number of textures created per frame resetFrameTextureCreated(); // Determine the current memory management state. It will be either idle (no work to do), // undersubscribed (need to do more allocation) or transfer (need to upload content from the // backing store to the GPU updateMemoryPressure(); if (MemoryPressureState::Undersubscribed == _memoryPressureState) { // If we're undersubscribed, we need to process some of the textures that can have additional allocation processPromotes(); } else if (MemoryPressureState::Transfer == _memoryPressureState) { // If we're in transfer mode we need to manage the buffering and upload queues processTransferQueues(); } } // Each frame we will check if our memory pressure state has changed. void GLTextureTransferEngineDefault::updateMemoryPressure() { PROFILE_RANGE(render_gpu_gl, __FUNCTION__); size_t allowedMemoryAllocation = gpu::Texture::getAllowedGPUMemoryUsage(); if (0 == allowedMemoryAllocation) { allowedMemoryAllocation = DEFAULT_ALLOWED_TEXTURE_MEMORY; } // Clear any defunct textures (weak pointers that no longer have a valid texture) auto strongTextures = getAllTextures(); size_t totalVariableMemoryAllocation = 0; size_t idealMemoryAllocation = 0; bool canDemote = false; bool canPromote = false; bool hasTransfers = false; for (const auto& texture : strongTextures) { GLTexture* gltexture = Backend::getGPUObject(*texture); GLVariableAllocationSupport* vartexture = dynamic_cast(gltexture); vartexture->sanityCheck(); // Track how much the texture thinks it should be using idealMemoryAllocation += texture->evalTotalSize(); // Track how much we're actually using totalVariableMemoryAllocation += gltexture->size(); if (vartexture->canDemote()) { canDemote |= true; } if (vartexture->canPromote()) { canPromote |= true; } if (vartexture->hasPendingTransfers()) { hasTransfers |= true; } } Backend::textureResourceIdealGPUMemSize.set(idealMemoryAllocation); size_t unallocated = idealMemoryAllocation - totalVariableMemoryAllocation; float pressure = (float)totalVariableMemoryAllocation / (float)allowedMemoryAllocation; // If we're oversubscribed we need to demote textures IMMEDIATELY if (pressure > OVERSUBSCRIBED_PRESSURE_VALUE && canDemote) { auto overPressure = pressure - OVERSUBSCRIBED_PRESSURE_VALUE; size_t relief = (size_t)(overPressure * totalVariableMemoryAllocation); processDemotes(relief, strongTextures); return; } auto newState = MemoryPressureState::Idle; if (pressure < UNDERSUBSCRIBED_PRESSURE_VALUE && (unallocated != 0 && canPromote)) { newState = MemoryPressureState::Undersubscribed; } else if (hasTransfers) { newState = MemoryPressureState::Transfer; } else { Lock lock(_bufferMutex); if (!_activeBufferQueue.empty() || !_activeTransferQueue.empty() || !_pendingTransfersMap.empty()) { newState = MemoryPressureState::Transfer; } } // If we've changed state then we have to populate the appropriate structure with the work to be done if (newState != _memoryPressureState) { _memoryPressureState = newState; _promoteQueue = WorkQueue(); _pendingTransfersMap.clear(); if (MemoryPressureState::Idle == _memoryPressureState) { return; } // For each texture, if it's eligible for work in the current state, put it into the appropriate structure for (const auto& texture : strongTextures) { GLTexture* gltexture = Backend::getGPUObject(*texture); GLVariableAllocationSupport* vargltexture = dynamic_cast(gltexture); if (MemoryPressureState::Undersubscribed == _memoryPressureState && vargltexture->canPromote()) { // Promote smallest first _promoteQueue.push({ texture, 1.0f / (float)gltexture->size() }); } else if (MemoryPressureState::Transfer == _memoryPressureState && vargltexture->hasPendingTransfers()) { populateTransferQueue(texture); } } } } // Manage the _activeBufferQueue and _activeTransferQueue queues void GLTextureTransferEngineDefault::processTransferQueues() { #if THREADED_TEXTURE_BUFFERING if (!_transferThread) { _transferThread = new TextureBufferThread(*this); } #endif // From the pendingTransferMap, queue jobs into the _activeBufferQueue // Doing so will lock the weak texture pointer so that it can't be destroyed // while the background thread is working. // // This will queue jobs until _queuedBufferSize can't be increased without exceeding // GLVariableAllocationTexture::MAX_BUFFER_SIZE or there is no more work to be done populateActiveBufferQueue(); #if !THREADED_TEXTURE_BUFFERING processActiveBufferQueue(); #endif // Take any tasks which have completed buffering and process them, uploading the buffered // data to the GPU. Drains the _activeTransferQueue { ActiveTransferQueue activeTransferQueue; { Lock lock(_bufferMutex); activeTransferQueue.swap(_activeTransferQueue); } while (!activeTransferQueue.empty()) { const auto& activeTransferJob = activeTransferQueue.front(); const auto& texturePointer = activeTransferJob.first; const auto& tranferJob = activeTransferJob.second; tranferJob->transfer(texturePointer); // The pop_front MUST be the last call since all of these varaibles in scope are // references that will be invalid after the pop activeTransferQueue.pop_front(); } } // If we have no more work in any of the structures, reset the memory state to idle to // force reconstruction of the _pendingTransfersMap if necessary { Lock lock(_bufferMutex); if (_activeTransferQueue.empty() && _activeBufferQueue.empty() && _pendingTransfersMap.empty()) { _memoryPressureState = MemoryPressureState::Idle; } } } void GLTextureTransferEngineDefault::populateActiveBufferQueue() { size_t queuedBufferSize = _queuedBufferSize; static const auto& MAX_BUFFER_SIZE = GLVariableAllocationSupport::MAX_BUFFER_SIZE; Q_ASSERT(queuedBufferSize <= MAX_BUFFER_SIZE); size_t availableBufferSize = MAX_BUFFER_SIZE - queuedBufferSize; // Queue up buffering jobs ActiveTransferQueue newBufferJobs; size_t newTransferSize{ 0 }; for (auto itr = _pendingTransfersMap.begin(); itr != _pendingTransfersMap.end(); ) { const auto& weakTexture = itr->first; const auto texture = weakTexture.lock(); // Texture no longer exists, remove from the transfer map and move on if (!texture) { itr = _pendingTransfersMap.erase(itr); continue; } GLTexture* gltexture = Backend::getGPUObject(*texture); GLVariableAllocationSupport* vargltexture = dynamic_cast(gltexture); auto& textureTransferQueue = itr->second; // Can't find any pending transfers, so move on if (textureTransferQueue.empty()) { if (vargltexture->hasPendingTransfers()) { // qWarning(gpugllogging) << "Texture " << gltexture->_id << "(" << texture->source().c_str() << ") has no transfer jobs, but has pending transfers" ; } itr = _pendingTransfersMap.erase(itr); continue; } const auto& transferJob = textureTransferQueue.front(); const auto& transferSize = transferJob->size(); // If there's not enough space for the buffering, then break out of the loop if (transferSize > availableBufferSize) { break; } availableBufferSize -= transferSize; Q_ASSERT(availableBufferSize <= MAX_BUFFER_SIZE); Q_ASSERT(newTransferSize <= MAX_BUFFER_SIZE); newTransferSize += transferSize; Q_ASSERT(newTransferSize <= MAX_BUFFER_SIZE); newBufferJobs.emplace_back(texture, transferJob); textureTransferQueue.pop(); ++itr; } { Lock lock(_bufferMutex); _activeBufferQueue.splice(_activeBufferQueue.end(), newBufferJobs); Q_ASSERT(_queuedBufferSize <= MAX_BUFFER_SIZE); _queuedBufferSize += newTransferSize; Q_ASSERT(_queuedBufferSize <= MAX_BUFFER_SIZE); } } bool GLTextureTransferEngineDefault::processActiveBufferQueue() { ActiveTransferQueue activeBufferQueue; { Lock lock(_bufferMutex); _activeBufferQueue.swap(activeBufferQueue); } if (activeBufferQueue.empty()) { return false; } for (const auto& activeJob : activeBufferQueue) { const auto& texture = activeJob.first; const auto& transferJob = activeJob.second; // Some jobs don't require buffering, but they pass through this queue to ensure that we don't re-order // the buffering & transfer operations. All jobs in the queue must be processed in order. if (!transferJob->bufferingRequired()) { continue; } const auto& transferSize = transferJob->size(); transferJob->buffer(texture); Q_ASSERT(_queuedBufferSize >= transferSize); _queuedBufferSize -= transferSize; } { Lock lock(_bufferMutex); _activeTransferQueue.splice(_activeTransferQueue.end(), activeBufferQueue); } return true; } void GLTextureTransferEngineDefault::populateTransferQueue(const TexturePointer& texturePointer) { TextureWeakPointer weakTexture = texturePointer; GLTexture* gltexture = Backend::getGPUObject(*texturePointer); GLVariableAllocationSupport* vargltexture = dynamic_cast(gltexture); TransferJob::Queue pendingTransfers; PROFILE_RANGE(render_gpu_gl, __FUNCTION__); vargltexture->populateTransferQueue(pendingTransfers); if (!pendingTransfers.empty()) { _pendingTransfersMap[weakTexture] = pendingTransfers; } } // From the queue of textures to be promited void GLTextureTransferEngineDefault::processPromotes() { // FIXME use max allocated memory per frame instead of promotion count static const size_t MAX_ALLOCATED_BYTES_PER_FRAME = GLVariableAllocationSupport::MAX_BUFFER_SIZE; static const size_t MAX_ALLOCATIONS_PER_FRAME = 8; size_t allocatedBytes{ 0 }; size_t allocations{ 0 }; while (!_promoteQueue.empty()) { // Grab the first item off the demote queue PROFILE_RANGE(render_gpu_gl, __FUNCTION__); auto entry = _promoteQueue.top(); _promoteQueue.pop(); auto texture = entry.first.lock(); if (!texture) { continue; } GLTexture* gltexture = Backend::getGPUObject(*texture); GLVariableAllocationSupport* vartexture = dynamic_cast(gltexture); auto originalSize = gltexture->size(); vartexture->promote(); auto allocationDelta = gltexture->size() - originalSize; if (vartexture->canPromote()) { // Promote smallest first _promoteQueue.push({ texture, 1.0f / (float)gltexture->size() }); } allocatedBytes += allocationDelta; if (++allocations >= MAX_ALLOCATIONS_PER_FRAME) { break; } if (allocatedBytes >= MAX_ALLOCATED_BYTES_PER_FRAME) { break; } } // Get the front of the work queue to perform work if (_promoteQueue.empty()) { // Force rebuild of work queue _memoryPressureState = MemoryPressureState::Idle; } } void GLTextureTransferEngineDefault::processDemotes(size_t reliefRequired, const std::vector& strongTextures) { // Demote largest first ImmediateWorkQueue demoteQueue; for (const auto& texture : strongTextures) { GLTexture* gltexture = Backend::getGPUObject(*texture); GLVariableAllocationSupport* vargltexture = dynamic_cast(gltexture); if (vargltexture->canDemote()) { demoteQueue.push({ texture, (float)gltexture->size() }); } } size_t relieved = 0; while (!demoteQueue.empty() && relieved < reliefRequired) { { const auto& target = demoteQueue.top(); const auto& texture = target.first; GLTexture* gltexture = Backend::getGPUObject(*texture); auto oldSize = gltexture->size(); GLVariableAllocationSupport* vargltexture = dynamic_cast(gltexture); vargltexture->demote(); auto newSize = gltexture->size(); relieved += (oldSize - newSize); } demoteQueue.pop(); } } // FIXME hack for stats display QString getTextureMemoryPressureModeString() { switch (_memoryPressureState) { case MemoryPressureState::Undersubscribed: return "Undersubscribed"; case MemoryPressureState::Transfer: return "Transfer"; case MemoryPressureState::Idle: return "Idle"; } Q_UNREACHABLE(); return "Unknown"; }