overte-JulianGro/libraries/gpu-gl-common/src/gpu/gl/GLTextureTransfer.cpp

512 lines
20 KiB
C++

//
// Created by Bradley Austin Davis on 2016/05/15
// Copyright 2013-2016 High Fidelity, Inc.
//
// Distributed under the Apache License, Version 2.0.
// See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
//
#include "GLTexture.h"
#include <QtCore/QThread>
#include <NumericalConstants.h>
#include "GLBackend.h"
#define OVERSUBSCRIBED_PRESSURE_VALUE 0.95f
#define UNDERSUBSCRIBED_PRESSURE_VALUE 0.85f
#define DEFAULT_ALLOWED_TEXTURE_MEMORY_MB ((size_t)2048)
#define MAX_RESOURCE_TEXTURES_PER_FRAME 2
#define NO_BUFFER_WORK_SLEEP_TIME_MS 2
#define THREADED_TEXTURE_BUFFERING 1
static const size_t DEFAULT_ALLOWED_TEXTURE_MEMORY = MB_TO_BYTES(DEFAULT_ALLOWED_TEXTURE_MEMORY_MB);
namespace gpu { namespace gl {
enum class MemoryPressureState
{
Idle,
Transfer,
Undersubscribed,
};
static MemoryPressureState _memoryPressureState{ MemoryPressureState::Idle };
template <typename T>
struct LessPairSecond {
bool operator()(const T& a, const T& b) { return a.second < b.second; }
};
using QueuePair = std::pair<TextureWeakPointer, float>;
// Contains a priority sorted list of textures on which work is to be done over many frames
// Uses a weak pointer to the texture to avoid keeping it in scope if the client stops using it
using WorkQueue = std::priority_queue<QueuePair, std::vector<QueuePair>, LessPairSecond<QueuePair>>;
using ImmediateQueuePair = std::pair<TexturePointer, float>;
// Contains a priority sorted list of textures on which work is to be done in the current frame
using ImmediateWorkQueue = std::priority_queue<ImmediateQueuePair, std::vector<ImmediateQueuePair>, LessPairSecond<ImmediateQueuePair>>;
// A map of weak texture pointers to queues of work to be done to transfer their data from the backing store to the GPU
using TransferMap = std::map<TextureWeakPointer, TransferQueue, std::owner_less<TextureWeakPointer>>;
class GLTextureTransferEngineDefault : public GLTextureTransferEngine {
using Parent = GLTextureTransferEngine;
public:
// Called once per frame by the GLBackend to manage texture memory
// Will deallocate textures if oversubscribed,
void manageMemory() override;
void shutdown() override;
protected:
class TextureBufferThread : public QThread {
public:
TextureBufferThread(GLTextureTransferEngineDefault& parent) : _parent(parent) { start(); }
protected:
void run() override {
while (!_parent._shutdown) {
if (!_parent.processActiveBufferQueue()) {
QThread::msleep(NO_BUFFER_WORK_SLEEP_TIME_MS);
}
}
}
GLTextureTransferEngineDefault& _parent;
};
using ActiveTransferJob = std::pair<TexturePointer, TransferJobPointer>;
using ActiveTransferQueue = std::list<ActiveTransferJob>;
void populateActiveBufferQueue();
bool processActiveBufferQueue();
void processTransferQueues();
void populateTransferQueue(const TexturePointer& texturePointer);
//void addToWorkQueue(const TexturePointer& texturePointer);
void updateMemoryPressure();
void processDemotes(size_t relief, const std::vector<TexturePointer>& strongTextures);
void processPromotes();
private:
std::atomic<bool> _shutdown{ false };
// Contains a priority sorted list of weak texture pointers that have been determined to be eligible for additional allocation
// While the memory state is 'undersubscribed', items will be removed from this list and processed, allocating additional memory
// per frame
WorkQueue _promoteQueue;
// This queue contains jobs that will buffer data from the texture backing store (ideally a memory mapped KTX file)
// to a CPU memory buffer. This queue is populated on the main GPU thread, and drained on a dedicated thread.
// When an item on the _activeBufferQueue is completed it is put into the _activeTransferQueue
ActiveTransferQueue _activeBufferQueue;
// This queue contains jobs that will upload data from a CPU buffer into a GPU. This queue is populated on the background
// thread that process the _activeBufferQueue and drained on the main GPU thread
ActiveTransferQueue _activeTransferQueue;
// Mutex protecting the _activeTransferQueue & _activeBufferQueue since they are each accessed both from the main GPU thread
// and the buffering thread
Mutex _bufferMutex;
// The buffering thread which drains the _activeBufferQueue and populates the _activeTransferQueue
TextureBufferThread* _transferThread{ nullptr };
// The amount of buffering work currently represented by the _activeBufferQueue
std::atomic<size_t> _queuedBufferSize{ 0 };
// This contains a map of all textures to queues of pending transfer jobs. While in the transfer state, this map is used to
// populate the _activeBufferQueue up to the limit specified in GLVariableAllocationTexture::MAX_BUFFER_SIZE
TransferMap _pendingTransfersMap;
};
}} // namespace gpu::gl
using namespace gpu;
using namespace gpu::gl;
void GLBackend::initTextureManagementStage() {
_textureManagement._transferEngine = std::make_shared<GLTextureTransferEngineDefault>();
}
void GLBackend::killTextureManagementStage() {
_textureManagement._transferEngine->shutdown();
_textureManagement._transferEngine.reset();
}
std::vector<TexturePointer> GLTextureTransferEngine::getAllTextures() {
auto expiredBegin = std::remove_if(_registeredTextures.begin(), _registeredTextures.end(), [&](const std::weak_ptr<Texture>& weak) -> bool {
return weak.expired();
});
_registeredTextures.erase(expiredBegin, _registeredTextures.end());
std::vector<TexturePointer> result;
result.reserve(_registeredTextures.size());
for (const auto& weak : _registeredTextures) {
auto strong = weak.lock();
if (!strong) {
continue;
}
result.push_back(strong);
}
return result;
}
void GLTextureTransferEngine::addMemoryManagedTexture(const TexturePointer& texturePointer) {
++_frameTexturesCreated;
_registeredTextures.push_back(texturePointer);
}
void GLTextureTransferEngineDefault::shutdown() {
_shutdown = true;
#if THREADED_TEXTURE_BUFFERING
if (_transferThread) {
_transferThread->wait();
delete _transferThread;
_transferThread = nullptr;
}
#endif
}
void GLTextureTransferEngineDefault::manageMemory() {
PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
// reset the count used to limit the number of textures created per frame
resetFrameTextureCreated();
// Determine the current memory management state. It will be either idle (no work to do),
// undersubscribed (need to do more allocation) or transfer (need to upload content from the
// backing store to the GPU
updateMemoryPressure();
if (MemoryPressureState::Undersubscribed == _memoryPressureState) {
// If we're undersubscribed, we need to process some of the textures that can have additional allocation
processPromotes();
} else if (MemoryPressureState::Transfer == _memoryPressureState) {
// If we're in transfer mode we need to manage the buffering and upload queues
processTransferQueues();
}
}
// Each frame we will check if our memory pressure state has changed.
void GLTextureTransferEngineDefault::updateMemoryPressure() {
PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
size_t allowedMemoryAllocation = gpu::Texture::getAllowedGPUMemoryUsage();
if (0 == allowedMemoryAllocation) {
allowedMemoryAllocation = GLBackend::getTotalMemory();
if ( 0 == allowedMemoryAllocation ) {
// Last resort, if we failed to detect
allowedMemoryAllocation = DEFAULT_ALLOWED_TEXTURE_MEMORY;
}
}
// Clear any defunct textures (weak pointers that no longer have a valid texture)
auto strongTextures = getAllTextures();
size_t totalVariableMemoryAllocation = 0;
size_t idealMemoryAllocation = 0;
bool canDemote = false;
bool canPromote = false;
bool hasTransfers = false;
for (const auto& texture : strongTextures) {
GLTexture* gltexture = Backend::getGPUObject<GLTexture>(*texture);
GLVariableAllocationSupport* vartexture = dynamic_cast<GLVariableAllocationSupport*>(gltexture);
vartexture->sanityCheck();
// Track how much the texture thinks it should be using
idealMemoryAllocation += texture->evalTotalSize();
// Track how much we're actually using
totalVariableMemoryAllocation += gltexture->size();
if (vartexture->canDemote()) {
canDemote |= true;
}
if (vartexture->canPromote()) {
canPromote |= true;
}
if (vartexture->hasPendingTransfers()) {
hasTransfers |= true;
}
}
Backend::textureResourceIdealGPUMemSize.set(idealMemoryAllocation);
size_t unallocated = idealMemoryAllocation - totalVariableMemoryAllocation;
float pressure = (float)totalVariableMemoryAllocation / (float)allowedMemoryAllocation;
// If we're oversubscribed we need to demote textures IMMEDIATELY
if (pressure > OVERSUBSCRIBED_PRESSURE_VALUE && canDemote) {
auto overPressure = pressure - OVERSUBSCRIBED_PRESSURE_VALUE;
size_t relief = (size_t)(overPressure * totalVariableMemoryAllocation);
processDemotes(relief, strongTextures);
return;
}
auto newState = MemoryPressureState::Idle;
if (pressure < UNDERSUBSCRIBED_PRESSURE_VALUE && (unallocated != 0 && canPromote)) {
newState = MemoryPressureState::Undersubscribed;
} else if (hasTransfers) {
newState = MemoryPressureState::Transfer;
} else {
Lock lock(_bufferMutex);
if (!_activeBufferQueue.empty() || !_activeTransferQueue.empty() || !_pendingTransfersMap.empty()) {
newState = MemoryPressureState::Transfer;
}
}
// If we've changed state then we have to populate the appropriate structure with the work to be done
if (newState != _memoryPressureState) {
_memoryPressureState = newState;
_promoteQueue = WorkQueue();
_pendingTransfersMap.clear();
if (MemoryPressureState::Idle == _memoryPressureState) {
return;
}
// For each texture, if it's eligible for work in the current state, put it into the appropriate structure
for (const auto& texture : strongTextures) {
GLTexture* gltexture = Backend::getGPUObject<GLTexture>(*texture);
GLVariableAllocationSupport* vargltexture = dynamic_cast<GLVariableAllocationSupport*>(gltexture);
if (MemoryPressureState::Undersubscribed == _memoryPressureState && vargltexture->canPromote()) {
// Promote smallest first
_promoteQueue.push({ texture, 1.0f / (float)gltexture->size() });
} else if (MemoryPressureState::Transfer == _memoryPressureState && vargltexture->hasPendingTransfers()) {
populateTransferQueue(texture);
}
}
}
}
// Manage the _activeBufferQueue and _activeTransferQueue queues
void GLTextureTransferEngineDefault::processTransferQueues() {
#if THREADED_TEXTURE_BUFFERING
if (!_transferThread) {
_transferThread = new TextureBufferThread(*this);
}
#endif
// From the pendingTransferMap, queue jobs into the _activeBufferQueue
// Doing so will lock the weak texture pointer so that it can't be destroyed
// while the background thread is working.
//
// This will queue jobs until _queuedBufferSize can't be increased without exceeding
// GLVariableAllocationTexture::MAX_BUFFER_SIZE or there is no more work to be done
populateActiveBufferQueue();
#if !THREADED_TEXTURE_BUFFERING
processActiveBufferQueue();
#endif
// Take any tasks which have completed buffering and process them, uploading the buffered
// data to the GPU. Drains the _activeTransferQueue
{
ActiveTransferQueue activeTransferQueue;
{
Lock lock(_bufferMutex);
activeTransferQueue.swap(_activeTransferQueue);
}
while (!activeTransferQueue.empty()) {
const auto& activeTransferJob = activeTransferQueue.front();
const auto& texturePointer = activeTransferJob.first;
GLTexture* gltexture = Backend::getGPUObject<GLTexture>(*texturePointer);
GLVariableAllocationSupport* vargltexture = dynamic_cast<GLVariableAllocationSupport*>(gltexture);
const auto& tranferJob = activeTransferJob.second;
if (tranferJob->sourceMip() < vargltexture->populatedMip()) {
tranferJob->transfer(texturePointer);
}
// The pop_front MUST be the last call since all of these varaibles in scope are
// references that will be invalid after the pop
activeTransferQueue.pop_front();
}
}
// If we have no more work in any of the structures, reset the memory state to idle to
// force reconstruction of the _pendingTransfersMap if necessary
{
Lock lock(_bufferMutex);
if (_activeTransferQueue.empty() && _activeBufferQueue.empty() && _pendingTransfersMap.empty()) {
_memoryPressureState = MemoryPressureState::Idle;
}
}
}
void GLTextureTransferEngineDefault::populateActiveBufferQueue() {
size_t queuedBufferSize = _queuedBufferSize;
static const auto& MAX_BUFFER_SIZE = GLVariableAllocationSupport::MAX_BUFFER_SIZE;
Q_ASSERT(queuedBufferSize <= MAX_BUFFER_SIZE);
size_t availableBufferSize = MAX_BUFFER_SIZE - queuedBufferSize;
// Queue up buffering jobs
ActiveTransferQueue newBufferJobs;
size_t newTransferSize{ 0 };
for (auto itr = _pendingTransfersMap.begin(); itr != _pendingTransfersMap.end();) {
const auto& weakTexture = itr->first;
const auto texture = weakTexture.lock();
// Texture no longer exists, remove from the transfer map and move on
if (!texture) {
itr = _pendingTransfersMap.erase(itr);
continue;
}
GLTexture* gltexture = Backend::getGPUObject<GLTexture>(*texture);
GLVariableAllocationSupport* vargltexture = dynamic_cast<GLVariableAllocationSupport*>(gltexture);
auto& textureTransferQueue = itr->second;
// Can't find any pending transfers, so move on
if (textureTransferQueue.empty()) {
if (vargltexture->hasPendingTransfers()) {
// qWarning(gpugllogging) << "Texture " << gltexture->_id << "(" << texture->source().c_str() << ") has no transfer jobs, but has pending transfers" ;
}
itr = _pendingTransfersMap.erase(itr);
continue;
}
const auto& transferJob = textureTransferQueue.front();
const auto& transferSize = transferJob->size();
// If there's not enough space for the buffering, then break out of the loop
if (transferSize > availableBufferSize) {
break;
}
availableBufferSize -= transferSize;
Q_ASSERT(availableBufferSize <= MAX_BUFFER_SIZE);
Q_ASSERT(newTransferSize <= MAX_BUFFER_SIZE);
newTransferSize += transferSize;
Q_ASSERT(newTransferSize <= MAX_BUFFER_SIZE);
newBufferJobs.emplace_back(texture, transferJob);
textureTransferQueue.pop();
++itr;
}
{
Lock lock(_bufferMutex);
_activeBufferQueue.splice(_activeBufferQueue.end(), newBufferJobs);
Q_ASSERT(_queuedBufferSize <= MAX_BUFFER_SIZE);
_queuedBufferSize += newTransferSize;
Q_ASSERT(_queuedBufferSize <= MAX_BUFFER_SIZE);
}
}
bool GLTextureTransferEngineDefault::processActiveBufferQueue() {
ActiveTransferQueue activeBufferQueue;
{
Lock lock(_bufferMutex);
_activeBufferQueue.swap(activeBufferQueue);
}
if (activeBufferQueue.empty()) {
return false;
}
for (const auto& activeJob : activeBufferQueue) {
const auto& texture = activeJob.first;
const auto& transferJob = activeJob.second;
// Some jobs don't require buffering, but they pass through this queue to ensure that we don't re-order
// the buffering & transfer operations. All jobs in the queue must be processed in order.
if (!transferJob->bufferingRequired()) {
continue;
}
const auto& transferSize = transferJob->size();
transferJob->buffer(texture);
Q_ASSERT(_queuedBufferSize >= transferSize);
_queuedBufferSize -= transferSize;
}
{
Lock lock(_bufferMutex);
_activeTransferQueue.splice(_activeTransferQueue.end(), activeBufferQueue);
}
Texture::KtxStorage::releaseOpenKtxFiles();
return true;
}
void GLTextureTransferEngineDefault::populateTransferQueue(const TexturePointer& texturePointer) {
TextureWeakPointer weakTexture = texturePointer;
GLTexture* gltexture = Backend::getGPUObject<GLTexture>(*texturePointer);
GLVariableAllocationSupport* vargltexture = dynamic_cast<GLVariableAllocationSupport*>(gltexture);
TransferJob::Queue pendingTransfers;
PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
vargltexture->populateTransferQueue(pendingTransfers);
if (!pendingTransfers.empty()) {
_pendingTransfersMap[weakTexture] = pendingTransfers;
}
}
// From the queue of textures to be promited
void GLTextureTransferEngineDefault::processPromotes() {
// FIXME use max allocated memory per frame instead of promotion count
static const size_t MAX_ALLOCATED_BYTES_PER_FRAME = GLVariableAllocationSupport::MAX_BUFFER_SIZE;
static const size_t MAX_ALLOCATIONS_PER_FRAME = 8;
size_t allocatedBytes{ 0 };
size_t allocations{ 0 };
while (!_promoteQueue.empty()) {
// Grab the first item off the demote queue
PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
auto entry = _promoteQueue.top();
_promoteQueue.pop();
auto texture = entry.first.lock();
if (!texture) {
continue;
}
GLTexture* gltexture = Backend::getGPUObject<GLTexture>(*texture);
GLVariableAllocationSupport* vartexture = dynamic_cast<GLVariableAllocationSupport*>(gltexture);
auto originalSize = gltexture->size();
vartexture->promote();
auto allocationDelta = gltexture->size() - originalSize;
if (vartexture->canPromote()) {
// Promote smallest first
_promoteQueue.push({ texture, 1.0f / (float)gltexture->size() });
}
allocatedBytes += allocationDelta;
if (++allocations >= MAX_ALLOCATIONS_PER_FRAME) {
break;
}
if (allocatedBytes >= MAX_ALLOCATED_BYTES_PER_FRAME) {
break;
}
}
// Get the front of the work queue to perform work
if (_promoteQueue.empty()) {
// Force rebuild of work queue
_memoryPressureState = MemoryPressureState::Idle;
}
}
void GLTextureTransferEngineDefault::processDemotes(size_t reliefRequired, const std::vector<TexturePointer>& strongTextures) {
// Demote largest first
ImmediateWorkQueue demoteQueue;
for (const auto& texture : strongTextures) {
GLTexture* gltexture = Backend::getGPUObject<GLTexture>(*texture);
GLVariableAllocationSupport* vargltexture = dynamic_cast<GLVariableAllocationSupport*>(gltexture);
if (vargltexture->canDemote()) {
demoteQueue.push({ texture, (float)gltexture->size() });
}
}
size_t relieved = 0;
while (!demoteQueue.empty() && relieved < reliefRequired) {
{
const auto& target = demoteQueue.top();
const auto& texture = target.first;
GLTexture* gltexture = Backend::getGPUObject<GLTexture>(*texture);
auto oldSize = gltexture->size();
GLVariableAllocationSupport* vargltexture = dynamic_cast<GLVariableAllocationSupport*>(gltexture);
vargltexture->demote();
auto newSize = gltexture->size();
relieved += (oldSize - newSize);
}
demoteQueue.pop();
}
}
// FIXME hack for stats display
QString getTextureMemoryPressureModeString() {
switch (_memoryPressureState) {
case MemoryPressureState::Undersubscribed:
return "Undersubscribed";
case MemoryPressureState::Transfer:
return "Transfer";
case MemoryPressureState::Idle:
return "Idle";
}
Q_UNREACHABLE();
return "Unknown";
}