Faster texture transfers

This commit is contained in:
Brad Davis 2018-05-10 12:08:20 -07:00
parent 40ff3f966f
commit fb81cf927a
15 changed files with 866 additions and 816 deletions

View file

@ -44,9 +44,9 @@ GLBackend::CommandCall GLBackend::_commandCalls[Batch::NUM_COMMANDS] =
(&::gpu::gl::GLBackend::do_setModelTransform),
(&::gpu::gl::GLBackend::do_setViewTransform),
(&::gpu::gl::GLBackend::do_setProjectionTransform),
(&::gpu::gl::GLBackend::do_setProjectionJitter),
(&::gpu::gl::GLBackend::do_setViewportTransform),
(&::gpu::gl::GLBackend::do_setProjectionTransform),
(&::gpu::gl::GLBackend::do_setProjectionJitter),
(&::gpu::gl::GLBackend::do_setViewportTransform),
(&::gpu::gl::GLBackend::do_setDepthRangeTransform),
(&::gpu::gl::GLBackend::do_setPipeline),
@ -118,12 +118,6 @@ void GLBackend::init() {
#if !defined(USE_GLES)
qCDebug(gpugllogging, "V-Sync is %s\n", (::gl::getSwapInterval() > 0 ? "ON" : "OFF"));
#endif
#if THREADED_TEXTURE_BUFFERING
// This has to happen on the main thread in order to give the thread
// pool a reasonable parent object
GLVariableAllocationSupport::TransferJob::startBufferingThread();
#endif
});
}
@ -136,6 +130,7 @@ GLBackend::GLBackend() {
GLBackend::~GLBackend() {
killInput();
killTransform();
killTextureManagementStage();
}
void GLBackend::renderPassTransfer(const Batch& batch) {
@ -167,18 +162,18 @@ void GLBackend::renderPassTransfer(const Batch& batch) {
case Batch::COMMAND_drawIndexedInstanced:
case Batch::COMMAND_multiDrawIndirect:
case Batch::COMMAND_multiDrawIndexedIndirect:
{
Vec2u outputSize{ 1,1 };
{
Vec2u outputSize{ 1,1 };
if (_output._framebuffer) {
outputSize.x = _output._framebuffer->getWidth();
outputSize.y = _output._framebuffer->getHeight();
} else if (glm::dot(_transform._projectionJitter, _transform._projectionJitter)>0.0f) {
qCWarning(gpugllogging) << "Jittering needs to have a frame buffer to be set";
}
if (_output._framebuffer) {
outputSize.x = _output._framebuffer->getWidth();
outputSize.y = _output._framebuffer->getHeight();
} else if (glm::dot(_transform._projectionJitter, _transform._projectionJitter)>0.0f) {
qCWarning(gpugllogging) << "Jittering needs to have a frame buffer to be set";
}
_transform.preUpdate(_commandIndex, _stereo, outputSize);
}
_transform.preUpdate(_commandIndex, _stereo, outputSize);
}
break;
case Batch::COMMAND_disableContextStereo:
@ -191,10 +186,10 @@ void GLBackend::renderPassTransfer(const Batch& batch) {
case Batch::COMMAND_setViewportTransform:
case Batch::COMMAND_setViewTransform:
case Batch::COMMAND_setProjectionTransform:
case Batch::COMMAND_setProjectionJitter:
{
CommandCall call = _commandCalls[(*command)];
case Batch::COMMAND_setProjectionTransform:
case Batch::COMMAND_setProjectionJitter:
{
CommandCall call = _commandCalls[(*command)];
(this->*(call))(batch, *offset);
break;
}
@ -268,8 +263,8 @@ void GLBackend::render(const Batch& batch) {
if (!batch.isStereoEnabled()) {
_stereo._enable = false;
}
// Reset jitter
_transform._projectionJitter = Vec2(0.0f, 0.0f);
// Reset jitter
_transform._projectionJitter = Vec2(0.0f, 0.0f);
{
PROFILE_RANGE(render_gpu_gl_detail, "Transfer");
@ -729,9 +724,8 @@ void GLBackend::recycle() const {
glDeleteQueries((GLsizei)ids.size(), ids.data());
}
}
GLVariableAllocationSupport::manageMemory();
GLVariableAllocationSupport::_frameTexturesCreated = 0;
_textureManagement._transferEngine->manageMemory();
Texture::KtxStorage::releaseOpenKtxFiles();
}

View file

@ -491,8 +491,10 @@ protected:
struct TextureManagementStageState {
bool _sparseCapable { false };
GLTextureTransferEnginePointer _transferEngine;
} _textureManagement;
virtual void initTextureManagementStage() {}
virtual void initTextureManagementStage();
virtual void killTextureManagementStage();
typedef void (GLBackend::*CommandCall)(const Batch&, size_t);
static CommandCall _commandCalls[Batch::NUM_COMMANDS];

View file

@ -137,6 +137,8 @@ class GLQuery;
class GLState;
class GLShader;
class GLTexture;
class GLTextureTransferEngine;
using GLTextureTransferEnginePointer = std::shared_ptr<GLTextureTransferEngine>;
struct ShaderObject;
} } // namespace gpu::gl

View file

@ -48,6 +48,14 @@ const GLFilterMode GLTexture::FILTER_MODES[Sampler::NUM_FILTERS] = {
{ GL_LINEAR_MIPMAP_LINEAR, GL_LINEAR } //FILTER_ANISOTROPIC,
};
static constexpr size_t MAX_PIXEL_BYTE_SIZE{ 4 };
static constexpr size_t MAX_TRANSFER_DIMENSION{ 1024 };
const uvec3 GLVariableAllocationSupport::MAX_TRANSFER_DIMENSIONS{ MAX_TRANSFER_DIMENSION, MAX_TRANSFER_DIMENSION, 1 };
const uvec3 GLVariableAllocationSupport::INITIAL_MIP_TRANSFER_DIMENSIONS{ 64, 64, 1 };
const size_t GLVariableAllocationSupport::MAX_TRANSFER_SIZE = MAX_TRANSFER_DIMENSION * MAX_TRANSFER_DIMENSION * MAX_PIXEL_BYTE_SIZE;
const size_t GLVariableAllocationSupport::MAX_BUFFER_SIZE = MAX_TRANSFER_SIZE;
GLenum GLTexture::getGLTextureType(const Texture& texture) {
switch (texture.getType()) {
case Texture::TEX_2D:
@ -131,7 +139,6 @@ Size GLTexture::copyMipFaceFromTexture(uint16_t sourceMip, uint16_t targetMip, u
return 0;
}
GLExternalTexture::GLExternalTexture(const std::weak_ptr<GLBackend>& backend, const Texture& texture, GLuint id)
: Parent(backend, texture, id) {
Backend::textureExternalCount.increment();
@ -151,65 +158,58 @@ GLExternalTexture::~GLExternalTexture() {
Backend::textureExternalCount.decrement();
}
// Variable sized textures
using MemoryPressureState = GLVariableAllocationSupport::MemoryPressureState;
using WorkQueue = GLVariableAllocationSupport::WorkQueue;
using TransferJobPointer = GLVariableAllocationSupport::TransferJobPointer;
std::list<TextureWeakPointer> GLVariableAllocationSupport::_memoryManagedTextures;
MemoryPressureState GLVariableAllocationSupport::_memoryPressureState { MemoryPressureState::Idle };
std::atomic<bool> GLVariableAllocationSupport::_memoryPressureStateStale { false };
const uvec3 GLVariableAllocationSupport::INITIAL_MIP_TRANSFER_DIMENSIONS { 64, 64, 1 };
WorkQueue GLVariableAllocationSupport::_transferQueue;
WorkQueue GLVariableAllocationSupport::_promoteQueue;
WorkQueue GLVariableAllocationSupport::_demoteQueue;
size_t GLVariableAllocationSupport::_frameTexturesCreated { 0 };
#define OVERSUBSCRIBED_PRESSURE_VALUE 0.95f
#define UNDERSUBSCRIBED_PRESSURE_VALUE 0.85f
#define DEFAULT_ALLOWED_TEXTURE_MEMORY_MB ((size_t)1024)
static const size_t DEFAULT_ALLOWED_TEXTURE_MEMORY = MB_TO_BYTES(DEFAULT_ALLOWED_TEXTURE_MEMORY_MB);
using TransferJob = GLVariableAllocationSupport::TransferJob;
const uvec3 GLVariableAllocationSupport::MAX_TRANSFER_DIMENSIONS { 1024, 1024, 1 };
const size_t GLVariableAllocationSupport::MAX_TRANSFER_SIZE = GLVariableAllocationSupport::MAX_TRANSFER_DIMENSIONS.x * GLVariableAllocationSupport::MAX_TRANSFER_DIMENSIONS.y * 4;
#if THREADED_TEXTURE_BUFFERING
TexturePointer GLVariableAllocationSupport::_currentTransferTexture;
TransferJobPointer GLVariableAllocationSupport::_currentTransferJob;
QThreadPool* TransferJob::_bufferThreadPool { nullptr };
void TransferJob::startBufferingThread() {
static std::once_flag once;
std::call_once(once, [&] {
_bufferThreadPool = new QThreadPool(qApp);
_bufferThreadPool->setMaxThreadCount(1);
});
GLVariableAllocationSupport::GLVariableAllocationSupport() {
}
#endif
GLVariableAllocationSupport::~GLVariableAllocationSupport() {
}
TransferJob::TransferJob(const GLTexture& parent, uint16_t sourceMip, uint16_t targetMip, uint8_t face, uint32_t lines, uint32_t lineOffset)
: _parent(parent) {
void GLVariableAllocationSupport::incrementPopulatedSize(Size delta) const {
_populatedSize += delta;
// Keep the 2 code paths to be able to debug
if (_size < _populatedSize) {
Backend::textureResourcePopulatedGPUMemSize.update(0, delta);
} else {
Backend::textureResourcePopulatedGPUMemSize.update(0, delta);
}
}
auto transferDimensions = _parent._gpuObject.evalMipDimensions(sourceMip);
void GLVariableAllocationSupport::decrementPopulatedSize(Size delta) const {
_populatedSize -= delta;
// Keep the 2 code paths to be able to debug
if (_size < _populatedSize) {
Backend::textureResourcePopulatedGPUMemSize.update(delta, 0);
} else {
Backend::textureResourcePopulatedGPUMemSize.update(delta, 0);
}
}
void GLVariableAllocationSupport::sanityCheck() const {
if (_populatedMip < _allocatedMip) {
qCWarning(gpugllogging) << "Invalid mip levels";
}
}
TransferJob::TransferJob(const Texture& texture,
uint16_t sourceMip,
uint16_t targetMip,
uint8_t face,
uint32_t lines,
uint32_t lineOffset) {
auto transferDimensions = texture.evalMipDimensions(sourceMip);
GLenum format;
GLenum internalFormat;
GLenum type;
GLTexelFormat texelFormat = GLTexelFormat::evalGLTexelFormat(_parent._gpuObject.getTexelFormat(), _parent._gpuObject.getStoredMipFormat());
GLTexelFormat texelFormat = GLTexelFormat::evalGLTexelFormat(texture.getTexelFormat(), texture.getStoredMipFormat());
format = texelFormat.format;
internalFormat = texelFormat.internalFormat;
type = texelFormat.type;
_transferSize = _parent._gpuObject.getStoredMipFaceSize(sourceMip, face);
_transferSize = texture.getStoredMipFaceSize(sourceMip, face);
// If we're copying a subsection of the mip, do additional calculations to find the size and offset of the segment
if (0 != lines) {
transferDimensions.y = lines;
auto dimensions = _parent._gpuObject.evalMipDimensions(sourceMip);
auto dimensions = texture.evalMipDimensions(sourceMip);
auto bytesPerLine = (uint32_t)_transferSize / dimensions.y;
_transferOffset = bytesPerLine * lineOffset;
_transferSize = bytesPerLine * lines;
@ -222,481 +222,34 @@ TransferJob::TransferJob(const GLTexture& parent, uint16_t sourceMip, uint16_t t
}
// Buffering can invoke disk IO, so it should be off of the main and render threads
_bufferingLambda = [=] {
auto mipStorage = _parent._gpuObject.accessStoredMipFace(sourceMip, face);
_bufferingLambda = [=](const TexturePointer& texture) {
auto mipStorage = texture->accessStoredMipFace(sourceMip, face);
if (mipStorage) {
_mipData = mipStorage->createView(_transferSize, _transferOffset);
} else {
qCWarning(gpugllogging) << "Buffering failed because mip could not be retrieved from texture " << _parent._source.c_str() ;
qCWarning(gpugllogging) << "Buffering failed because mip could not be retrieved from texture "
<< texture->source().c_str();
}
};
_transferLambda = [=] {
_transferLambda = [=](const TexturePointer& texture) {
if (_mipData) {
_parent.copyMipFaceLinesFromTexture(targetMip, face, transferDimensions, lineOffset, internalFormat, format, type, _mipData->size(), _mipData->readData());
auto gltexture = Backend::getGPUObject<GLTexture>(*texture);
;
gltexture->copyMipFaceLinesFromTexture(targetMip, face, transferDimensions, lineOffset, internalFormat, format,
type, _mipData->size(), _mipData->readData());
_mipData.reset();
} else {
qCWarning(gpugllogging) << "Transfer failed because mip could not be retrieved from texture " << _parent._source.c_str();
qCWarning(gpugllogging) << "Transfer failed because mip could not be retrieved from texture "
<< texture->source().c_str();
}
};
}
TransferJob::TransferJob(const GLTexture& parent, std::function<void()> transferLambda)
: _parent(parent), _bufferingRequired(false), _transferLambda(transferLambda) {
}
TransferJob::TransferJob(const std::function<void()>& transferLambda) :
_bufferingRequired(false), _transferLambda([=](const TexturePointer&) { transferLambda(); }) {}
TransferJob::~TransferJob() {
Backend::texturePendingGPUTransferMemSize.update(_transferSize, 0);
}
bool TransferJob::tryTransfer() {
#if THREADED_TEXTURE_BUFFERING
// Are we ready to transfer
if (!bufferingCompleted()) {
startBuffering();
return false;
}
#else
if (_bufferingRequired) {
_bufferingLambda();
}
#endif
_transferLambda();
return true;
}
#if THREADED_TEXTURE_BUFFERING
bool TransferJob::bufferingRequired() const {
if (!_bufferingRequired) {
return false;
}
// The default state of a QFuture is with status Canceled | Started | Finished,
// so we have to check isCancelled before we check the actual state
if (_bufferingStatus.isCanceled()) {
return true;
}
return !_bufferingStatus.isStarted();
}
bool TransferJob::bufferingCompleted() const {
if (!_bufferingRequired) {
return true;
}
// The default state of a QFuture is with status Canceled | Started | Finished,
// so we have to check isCancelled before we check the actual state
if (_bufferingStatus.isCanceled()) {
return false;
}
return _bufferingStatus.isFinished();
}
void TransferJob::startBuffering() {
if (bufferingRequired()) {
assert(_bufferingStatus.isCanceled());
_bufferingStatus = QtConcurrent::run(_bufferThreadPool, [=] {
_bufferingLambda();
});
assert(!_bufferingStatus.isCanceled());
assert(_bufferingStatus.isStarted());
}
}
#endif
GLVariableAllocationSupport::GLVariableAllocationSupport() {
_memoryPressureStateStale = true;
}
GLVariableAllocationSupport::~GLVariableAllocationSupport() {
_memoryPressureStateStale = true;
}
void GLVariableAllocationSupport::addMemoryManagedTexture(const TexturePointer& texturePointer) {
_memoryManagedTextures.push_back(texturePointer);
if (MemoryPressureState::Idle != _memoryPressureState) {
addToWorkQueue(texturePointer);
}
}
void GLVariableAllocationSupport::addToWorkQueue(const TexturePointer& texturePointer) {
GLTexture* gltexture = Backend::getGPUObject<GLTexture>(*texturePointer);
GLVariableAllocationSupport* vargltexture = dynamic_cast<GLVariableAllocationSupport*>(gltexture);
switch (_memoryPressureState) {
case MemoryPressureState::Oversubscribed:
if (vargltexture->canDemote()) {
// Demote largest first
_demoteQueue.push({ texturePointer, (float)gltexture->size() });
}
break;
case MemoryPressureState::Undersubscribed:
if (vargltexture->canPromote()) {
// Promote smallest first
_promoteQueue.push({ texturePointer, 1.0f / (float)gltexture->size() });
}
break;
case MemoryPressureState::Transfer:
if (vargltexture->hasPendingTransfers()) {
// Transfer priority given to smaller mips first
_transferQueue.push({ texturePointer, 1.0f / (float)gltexture->_gpuObject.evalMipSize(vargltexture->_populatedMip) });
}
break;
case MemoryPressureState::Idle:
Q_UNREACHABLE();
break;
}
}
WorkQueue& GLVariableAllocationSupport::getActiveWorkQueue() {
static WorkQueue empty;
switch (_memoryPressureState) {
case MemoryPressureState::Oversubscribed:
return _demoteQueue;
case MemoryPressureState::Undersubscribed:
return _promoteQueue;
case MemoryPressureState::Transfer:
return _transferQueue;
case MemoryPressureState::Idle:
Q_UNREACHABLE();
break;
}
return empty;
}
// FIXME hack for stats display
QString getTextureMemoryPressureModeString() {
switch (GLVariableAllocationSupport::_memoryPressureState) {
case MemoryPressureState::Oversubscribed:
return "Oversubscribed";
case MemoryPressureState::Undersubscribed:
return "Undersubscribed";
case MemoryPressureState::Transfer:
return "Transfer";
case MemoryPressureState::Idle:
return "Idle";
}
Q_UNREACHABLE();
return "Unknown";
}
void GLVariableAllocationSupport::updateMemoryPressure() {
static size_t lastAllowedMemoryAllocation = gpu::Texture::getAllowedGPUMemoryUsage();
size_t allowedMemoryAllocation = gpu::Texture::getAllowedGPUMemoryUsage();
if (0 == allowedMemoryAllocation) {
allowedMemoryAllocation = DEFAULT_ALLOWED_TEXTURE_MEMORY;
}
// If the user explicitly changed the allowed memory usage, we need to mark ourselves stale
// so that we react
if (allowedMemoryAllocation != lastAllowedMemoryAllocation) {
_memoryPressureStateStale = true;
lastAllowedMemoryAllocation = allowedMemoryAllocation;
}
if (!_memoryPressureStateStale.exchange(false)) {
return;
}
PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
// Clear any defunct textures (weak pointers that no longer have a valid texture)
_memoryManagedTextures.remove_if([&](const TextureWeakPointer& weakPointer) {
return weakPointer.expired();
});
// Convert weak pointers to strong. This new list may still contain nulls if a texture was
// deleted on another thread between the previous line and this one
std::vector<TexturePointer> strongTextures; {
strongTextures.reserve(_memoryManagedTextures.size());
std::transform(
_memoryManagedTextures.begin(), _memoryManagedTextures.end(),
std::back_inserter(strongTextures),
[](const TextureWeakPointer& p) { return p.lock(); });
}
size_t totalVariableMemoryAllocation = 0;
size_t idealMemoryAllocation = 0;
bool canDemote = false;
bool canPromote = false;
bool hasTransfers = false;
for (const auto& texture : strongTextures) {
// Race conditions can still leave nulls in the list, so we need to check
if (!texture) {
continue;
}
GLTexture* gltexture = Backend::getGPUObject<GLTexture>(*texture);
GLVariableAllocationSupport* vartexture = dynamic_cast<GLVariableAllocationSupport*>(gltexture);
// Track how much the texture thinks it should be using
idealMemoryAllocation += texture->evalTotalSize();
// Track how much we're actually using
totalVariableMemoryAllocation += gltexture->size();
canDemote |= vartexture->canDemote();
canPromote |= vartexture->canPromote();
hasTransfers |= vartexture->hasPendingTransfers();
}
size_t unallocated = idealMemoryAllocation - totalVariableMemoryAllocation;
float pressure = (float)totalVariableMemoryAllocation / (float)allowedMemoryAllocation;
auto newState = MemoryPressureState::Idle;
if (pressure < UNDERSUBSCRIBED_PRESSURE_VALUE && (unallocated != 0 && canPromote)) {
newState = MemoryPressureState::Undersubscribed;
} else if (pressure > OVERSUBSCRIBED_PRESSURE_VALUE && canDemote) {
newState = MemoryPressureState::Oversubscribed;
} else if (hasTransfers) {
newState = MemoryPressureState::Transfer;
}
if (newState != _memoryPressureState) {
_memoryPressureState = newState;
// Clear the existing queue
_transferQueue = WorkQueue();
_promoteQueue = WorkQueue();
_demoteQueue = WorkQueue();
// Populate the existing textures into the queue
if (_memoryPressureState != MemoryPressureState::Idle) {
for (const auto& texture : strongTextures) {
// Race conditions can still leave nulls in the list, so we need to check
if (!texture) {
continue;
}
addToWorkQueue(texture);
}
}
}
}
TexturePointer GLVariableAllocationSupport::getNextWorkQueueItem(WorkQueue& workQueue) {
while (!workQueue.empty()) {
auto workTarget = workQueue.top();
auto texture = workTarget.first.lock();
if (!texture) {
workQueue.pop();
continue;
}
// Check whether the resulting texture can actually have work performed
GLTexture* gltexture = Backend::getGPUObject<GLTexture>(*texture);
GLVariableAllocationSupport* vartexture = dynamic_cast<GLVariableAllocationSupport*>(gltexture);
switch (_memoryPressureState) {
case MemoryPressureState::Oversubscribed:
if (vartexture->canDemote()) {
return texture;
}
break;
case MemoryPressureState::Undersubscribed:
if (vartexture->canPromote()) {
return texture;
}
break;
case MemoryPressureState::Transfer:
if (vartexture->hasPendingTransfers()) {
return texture;
}
break;
case MemoryPressureState::Idle:
Q_UNREACHABLE();
break;
}
// If we got here, then the texture has no work to do in the current state,
// so pop it off the queue and continue
workQueue.pop();
}
return TexturePointer();
}
void GLVariableAllocationSupport::processWorkQueue(WorkQueue& workQueue) {
if (workQueue.empty()) {
return;
}
// Get the front of the work queue to perform work
auto texture = getNextWorkQueueItem(workQueue);
if (!texture) {
return;
}
// Grab the first item off the demote queue
PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
GLTexture* gltexture = Backend::getGPUObject<GLTexture>(*texture);
GLVariableAllocationSupport* vartexture = dynamic_cast<GLVariableAllocationSupport*>(gltexture);
switch (_memoryPressureState) {
case MemoryPressureState::Oversubscribed:
vartexture->demote();
workQueue.pop();
addToWorkQueue(texture);
_memoryPressureStateStale = true;
break;
case MemoryPressureState::Undersubscribed:
vartexture->promote();
workQueue.pop();
addToWorkQueue(texture);
_memoryPressureStateStale = true;
break;
case MemoryPressureState::Transfer:
if (vartexture->executeNextTransfer(texture)) {
workQueue.pop();
addToWorkQueue(texture);
#if THREADED_TEXTURE_BUFFERING
// Eagerly start the next buffering job if possible
texture = getNextWorkQueueItem(workQueue);
if (texture) {
gltexture = Backend::getGPUObject<GLTexture>(*texture);
vartexture = dynamic_cast<GLVariableAllocationSupport*>(gltexture);
vartexture->executeNextBuffer(texture);
}
#endif
}
break;
case MemoryPressureState::Idle:
Q_UNREACHABLE();
break;
}
}
void GLVariableAllocationSupport::processWorkQueues() {
if (MemoryPressureState::Idle == _memoryPressureState) {
return;
}
auto& workQueue = getActiveWorkQueue();
// Do work on the front of the queue
processWorkQueue(workQueue);
if (workQueue.empty()) {
_memoryPressureState = MemoryPressureState::Idle;
_memoryPressureStateStale = true;
}
}
void GLVariableAllocationSupport::manageMemory() {
PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
updateMemoryPressure();
processWorkQueues();
}
bool GLVariableAllocationSupport::executeNextTransfer(const TexturePointer& currentTexture) {
#if THREADED_TEXTURE_BUFFERING
// If a transfer job is active on the buffering thread, but has not completed it's buffering lambda,
// then we need to exit early, since we don't want to have the transfer job leave scope while it's
// being used in another thread -- See https://highfidelity.fogbugz.com/f/cases/4626
if (_currentTransferJob && !_currentTransferJob->bufferingCompleted()) {
return false;
}
#endif
if (_populatedMip <= _allocatedMip) {
#if THREADED_TEXTURE_BUFFERING
_currentTransferJob.reset();
_currentTransferTexture.reset();
#endif
return true;
}
// If the transfer queue is empty, rebuild it
if (_pendingTransfers.empty()) {
populateTransferQueue();
}
bool result = false;
if (!_pendingTransfers.empty()) {
#if THREADED_TEXTURE_BUFFERING
// If there is a current transfer, but it's not the top of the pending transfer queue, then it's an orphan, so we want to abandon it.
if (_currentTransferJob && _currentTransferJob != _pendingTransfers.front()) {
_currentTransferJob.reset();
}
if (!_currentTransferJob) {
// Keeping hold of a strong pointer to the transfer job ensures that if the pending transfer queue is rebuilt, the transfer job
// doesn't leave scope, causing a crash in the buffering thread
_currentTransferJob = _pendingTransfers.front();
// Keeping hold of a strong pointer during the transfer ensures that the transfer thread cannot try to access a destroyed texture
_currentTransferTexture = currentTexture;
}
// transfer jobs use asynchronous buffering of the texture data because it may involve disk IO, so we execute a try here to determine if the buffering
// is complete
if (_currentTransferJob->tryTransfer()) {
_pendingTransfers.pop();
// Once a given job is finished, release the shared pointers keeping them alive
_currentTransferTexture.reset();
_currentTransferJob.reset();
result = true;
}
#else
if (_pendingTransfers.front()->tryTransfer()) {
_pendingTransfers.pop();
result = true;
}
#endif
}
return result;
}
#if THREADED_TEXTURE_BUFFERING
void GLVariableAllocationSupport::executeNextBuffer(const TexturePointer& currentTexture) {
if (_currentTransferJob && !_currentTransferJob->bufferingCompleted()) {
return;
}
// If the transfer queue is empty, rebuild it
if (_pendingTransfers.empty()) {
populateTransferQueue();
}
if (!_pendingTransfers.empty()) {
if (!_currentTransferJob) {
_currentTransferJob = _pendingTransfers.front();
_currentTransferTexture = currentTexture;
}
_currentTransferJob->startBuffering();
}
}
#endif
void GLVariableAllocationSupport::incrementPopulatedSize(Size delta) const {
_populatedSize += delta;
// Keep the 2 code paths to be able to debug
if (_size < _populatedSize) {
Backend::textureResourcePopulatedGPUMemSize.update(0, delta);
} else {
Backend::textureResourcePopulatedGPUMemSize.update(0, delta);
}
}
void GLVariableAllocationSupport::decrementPopulatedSize(Size delta) const {
_populatedSize -= delta;
// Keep the 2 code paths to be able to debug
if (_size < _populatedSize) {
Backend::textureResourcePopulatedGPUMemSize.update(delta, 0);
} else {
Backend::textureResourcePopulatedGPUMemSize.update(delta, 0);
}
}

View file

@ -16,8 +16,6 @@
#include "GLTexelFormat.h"
#include <thread>
#define THREADED_TEXTURE_BUFFERING 1
namespace gpu { namespace gl {
struct GLFilterMode {
@ -25,107 +23,92 @@ struct GLFilterMode {
GLint magFilter;
};
class GLTextureTransferEngine {
public:
using Pointer = std::shared_ptr<GLTextureTransferEngine>;
/// Called once per frame to perform any require memory management or transfer work
virtual void manageMemory() = 0;
virtual void shutdown() = 0;
/// Called whenever a client wants to create a new texture. This allows the transfer engine to
/// potentially limit the number of GL textures created per frame
bool allowCreate() const { return _frameTexturesCreated < MAX_RESOURCE_TEXTURES_PER_FRAME; }
/// Called whenever a client creates a new resource texture that should use managed memory
/// and incremental transfer
void addMemoryManagedTexture(const TexturePointer& texturePointer);
protected:
// Fetch all the currently active textures as strong pointers, while clearing the
// empty weak pointers out of _registeredTextures
std::vector<TexturePointer> getAllTextures();
void resetFrameTextureCreated() { _frameTexturesCreated = 0; }
private:
static const size_t MAX_RESOURCE_TEXTURES_PER_FRAME{ 2 };
size_t _frameTexturesCreated{ 0 };
std::list<TextureWeakPointer> _registeredTextures;
};
/**
A transfer job encapsulates an individual piece of work required to upload texture data to the GPU.
The work can be broken down into two parts, expressed as lambdas. The buffering lambda is repsonsible
for putting the data to be uploaded into a CPU memory buffer. The transfer lambda is repsonsible for
uploading the data from the CPU memory buffer to the GPU using OpenGL calls. Ideally the buffering lambda
will be executed on a seprate thread from the OpenGL work to ensure that disk IO operations do not block
OpenGL calls
Additionally, a TransferJob can encapsulate some kind of post-upload work that changes the state of the
GLTexture derived object wrapping the actual texture ID, such as changing the _populateMip value once
a given mip level has been compeltely uploaded
*/
class TransferJob {
public:
using Pointer = std::shared_ptr<TransferJob>;
using Queue = std::queue<Pointer>;
using Lambda = std::function<void(const TexturePointer&)>;
private:
Texture::PixelsPointer _mipData;
size_t _transferOffset{ 0 };
size_t _transferSize{ 0 };
bool _bufferingRequired{ true };
Lambda _transferLambda{ [](const TexturePointer&) {} };
Lambda _bufferingLambda{ [](const TexturePointer&) {} };
public:
TransferJob(const TransferJob& other) = delete;
TransferJob(const std::function<void()>& transferLambda);
TransferJob(const Texture& texture, uint16_t sourceMip, uint16_t targetMip, uint8_t face, uint32_t lines = 0, uint32_t lineOffset = 0);
~TransferJob();
const size_t& size() const { return _transferSize; }
bool bufferingRequired() const { return _bufferingRequired; }
void buffer(const TexturePointer& texture) { _bufferingLambda(texture); }
void transfer(const TexturePointer& texture) { _transferLambda(texture); }
};
using TransferJobPointer = std::shared_ptr<TransferJob>;
using TransferQueue = std::queue<TransferJobPointer>;
class GLVariableAllocationSupport {
friend class GLBackend;
public:
GLVariableAllocationSupport();
virtual ~GLVariableAllocationSupport();
virtual void populateTransferQueue(TransferQueue& pendingTransfers) = 0;
enum class MemoryPressureState {
Idle,
Transfer,
Oversubscribed,
Undersubscribed,
};
using QueuePair = std::pair<TextureWeakPointer, float>;
struct QueuePairLess {
bool operator()(const QueuePair& a, const QueuePair& b) {
return a.second < b.second;
}
};
using WorkQueue = std::priority_queue<QueuePair, std::vector<QueuePair>, QueuePairLess>;
class TransferJob {
using VoidLambda = std::function<void()>;
using VoidLambdaQueue = std::queue<VoidLambda>;
const GLTexture& _parent;
Texture::PixelsPointer _mipData;
size_t _transferOffset { 0 };
size_t _transferSize { 0 };
bool _bufferingRequired { true };
VoidLambda _transferLambda;
VoidLambda _bufferingLambda;
#if THREADED_TEXTURE_BUFFERING
// Indicates if a transfer from backing storage to interal storage has started
QFuture<void> _bufferingStatus;
static QThreadPool* _bufferThreadPool;
#endif
public:
TransferJob(const TransferJob& other) = delete;
TransferJob(const GLTexture& parent, std::function<void()> transferLambda);
TransferJob(const GLTexture& parent, uint16_t sourceMip, uint16_t targetMip, uint8_t face, uint32_t lines = 0, uint32_t lineOffset = 0);
~TransferJob();
bool tryTransfer();
#if THREADED_TEXTURE_BUFFERING
void startBuffering();
bool bufferingRequired() const;
bool bufferingCompleted() const;
static void startBufferingThread();
#endif
private:
void transfer();
};
using TransferJobPointer = std::shared_ptr<TransferJob>;
using TransferQueue = std::queue<TransferJobPointer>;
static MemoryPressureState _memoryPressureState;
public:
static void addMemoryManagedTexture(const TexturePointer& texturePointer);
protected:
static size_t _frameTexturesCreated;
static std::atomic<bool> _memoryPressureStateStale;
static std::list<TextureWeakPointer> _memoryManagedTextures;
static WorkQueue _transferQueue;
static WorkQueue _promoteQueue;
static WorkQueue _demoteQueue;
#if THREADED_TEXTURE_BUFFERING
static TexturePointer _currentTransferTexture;
static TransferJobPointer _currentTransferJob;
#endif
static const uvec3 INITIAL_MIP_TRANSFER_DIMENSIONS;
static const uvec3 MAX_TRANSFER_DIMENSIONS;
static const size_t MAX_TRANSFER_SIZE;
static void updateMemoryPressure();
static void processWorkQueues();
static void processWorkQueue(WorkQueue& workQueue);
static TexturePointer getNextWorkQueueItem(WorkQueue& workQueue);
static void addToWorkQueue(const TexturePointer& texture);
static WorkQueue& getActiveWorkQueue();
static void manageMemory();
//bool canPromoteNoAllocate() const { return _allocatedMip < _populatedMip; }
void sanityCheck() const;
bool canPromote() const { return _allocatedMip > _minAllocatedMip; }
bool canDemote() const { return _allocatedMip < _maxAllocatedMip; }
bool hasPendingTransfers() const { return _populatedMip > _allocatedMip; }
#if THREADED_TEXTURE_BUFFERING
void executeNextBuffer(const TexturePointer& currentTexture);
#endif
bool executeNextTransfer(const TexturePointer& currentTexture);
virtual void populateTransferQueue() = 0;
virtual void promote() = 0;
virtual void demote() = 0;
virtual size_t promote() = 0;
virtual size_t demote() = 0;
static const uvec3 MAX_TRANSFER_DIMENSIONS;
static const uvec3 INITIAL_MIP_TRANSFER_DIMENSIONS;
static const size_t MAX_TRANSFER_SIZE;
static const size_t MAX_BUFFER_SIZE;
protected:
// THe amount of memory currently allocated
Size _size { 0 };
@ -148,10 +131,6 @@ protected:
// The lowest (highest resolution) mip that we will support, relative to the number
// of mips in the gpu::Texture object
uint16 _minAllocatedMip { 0 };
// Contains a series of lambdas that when executed will transfer data to the GPU, modify
// the _populatedMip and update the sampler in order to fully populate the allocated texture
// until _populatedMip == _allocatedMip
TransferQueue _pendingTransfers;
};
class GLTexture : public GLObject<Texture> {
@ -172,6 +151,9 @@ public:
static const std::vector<GLenum>& getFaceTargets(GLenum textureType);
static uint8_t getFaceCount(GLenum textureType);
static GLenum getGLTextureType(const Texture& texture);
virtual Size size() const = 0;
virtual Size copyMipFaceLinesFromTexture(uint16_t mip, uint8_t face, const uvec3& size, uint32_t yOffset, GLenum internalFormat, GLenum format, GLenum type, Size sourceSize, const void* sourcePointer) const = 0;
virtual Size copyMipFaceFromTexture(uint16_t sourceMip, uint16_t targetMip, uint8_t face) const final;
static const uint8_t TEXTURE_2D_NUM_FACES = 1;
static const uint8_t TEXTURE_CUBE_NUM_FACES = 6;
@ -180,12 +162,9 @@ public:
static const GLenum WRAP_MODES[Sampler::NUM_WRAP_MODES];
protected:
virtual Size size() const = 0;
virtual void generateMips() const = 0;
virtual void syncSampler() const = 0;
virtual Size copyMipFaceLinesFromTexture(uint16_t mip, uint8_t face, const uvec3& size, uint32_t yOffset, GLenum internalFormat, GLenum format, GLenum type, Size sourceSize, const void* sourcePointer) const = 0;
virtual Size copyMipFaceFromTexture(uint16_t sourceMip, uint16_t targetMip, uint8_t face) const final;
virtual void copyTextureMipsInGPUMem(GLuint srcId, GLuint destId, uint16_t srcMipOffset, uint16_t destMipOffset, uint16_t populatedMips) {} // Only relevant for Variable Allocation textures
GLTexture(const std::weak_ptr<gl::GLBackend>& backend, const Texture& texture, GLuint id);
@ -205,7 +184,6 @@ protected:
Size size() const override { return 0; }
};
} }
#endif

View file

@ -0,0 +1,502 @@
//
// Created by Bradley Austin Davis on 2016/05/15
// Copyright 2013-2016 High Fidelity, Inc.
//
// Distributed under the Apache License, Version 2.0.
// See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
//
#include "GLTexture.h"
#include <QtCore/QThread>
#include <NumericalConstants.h>
#include "GLBackend.h"
#define OVERSUBSCRIBED_PRESSURE_VALUE 0.95f
#define UNDERSUBSCRIBED_PRESSURE_VALUE 0.85f
#define DEFAULT_ALLOWED_TEXTURE_MEMORY_MB ((size_t)1024)
#define MAX_RESOURCE_TEXTURES_PER_FRAME 2
#define NO_BUFFER_WORK_SLEEP_TIME_MS 2
#define THREADED_TEXTURE_BUFFERING 1
static const size_t DEFAULT_ALLOWED_TEXTURE_MEMORY = MB_TO_BYTES(DEFAULT_ALLOWED_TEXTURE_MEMORY_MB);
namespace gpu { namespace gl {
enum class MemoryPressureState
{
Idle,
Transfer,
Undersubscribed,
};
static MemoryPressureState _memoryPressureState{ MemoryPressureState::Idle };
template <typename T>
struct LessPairSecond {
bool operator()(const T& a, const T& b) { return a.second < b.second; }
};
using QueuePair = std::pair<TextureWeakPointer, float>;
// Contains a priority sorted list of textures on which work is to be done over many frames
// Uses a weak pointer to the texture to avoid keeping it in scope if the client stops using it
using WorkQueue = std::priority_queue<QueuePair, std::vector<QueuePair>, LessPairSecond<QueuePair>>;
using ImmediateQueuePair = std::pair<TexturePointer, float>;
// Contains a priority sorted list of textures on which work is to be done in the current frame
using ImmediateWorkQueue = std::priority_queue<ImmediateQueuePair, std::vector<ImmediateQueuePair>, LessPairSecond<ImmediateQueuePair>>;
// A map of weak texture pointers to queues of work to be done to transfer their data from the backing store to the GPU
using TransferMap = std::map<TextureWeakPointer, TransferQueue, std::owner_less<TextureWeakPointer>>;
class GLTextureTransferEngineDefault : public GLTextureTransferEngine {
using Parent = GLTextureTransferEngine;
public:
// Called once per frame by the GLBackend to manage texture memory
// Will deallocate textures if oversubscribed,
void manageMemory() override;
void shutdown() override;
protected:
class TextureBufferThread : public QThread {
public:
TextureBufferThread(GLTextureTransferEngineDefault& parent) : _parent(parent) { start(); }
protected:
void run() override {
while (!_parent._shutdown) {
if (!_parent.processActiveBufferQueue()) {
QThread::msleep(NO_BUFFER_WORK_SLEEP_TIME_MS);
}
}
}
GLTextureTransferEngineDefault& _parent;
};
using ActiveTransferJob = std::pair<TexturePointer, TransferJobPointer>;
using ActiveTransferQueue = std::list<ActiveTransferJob>;
void populateActiveBufferQueue();
bool processActiveBufferQueue();
void processTransferQueues();
void populateTransferQueue(const TexturePointer& texturePointer);
//void addToWorkQueue(const TexturePointer& texturePointer);
void updateMemoryPressure();
void processDemotes(size_t relief, const std::vector<TexturePointer>& strongTextures);
void processPromotes();
private:
std::atomic<bool> _shutdown{ false };
// Contains a priority sorted list of weak texture pointers that have been determined to be eligible for additional allocation
// While the memory state is 'undersubscribed', items will be removed from this list and processed, allocating additional memory
// per frame
WorkQueue _promoteQueue;
// This queue contains jobs that will buffer data from the texture backing store (ideally a memory mapped KTX file)
// to a CPU memory buffer. This queue is populated on the main GPU thread, and drained on a dedicated thread.
// When an item on the _activeBufferQueue is completed it is put into the _activeTransferQueue
ActiveTransferQueue _activeBufferQueue;
// This queue contains jobs that will upload data from a CPU buffer into a GPU. This queue is populated on the background
// thread that process the _activeBufferQueue and drained on the main GPU thread
ActiveTransferQueue _activeTransferQueue;
// Mutex protecting the _activeTransferQueue & _activeBufferQueue since they are each accessed both from the main GPU thread
// and the buffering thread
Mutex _bufferMutex;
// The buffering thread which drains the _activeBufferQueue and populates the _activeTransferQueue
TextureBufferThread* _transferThread{ nullptr };
// The amount of buffering work currently represented by the _activeBufferQueue
size_t _queuedBufferSize{ 0 };
// This contains a map of all textures to queues of pending transfer jobs. While in the transfer state, this map is used to
// populate the _activeBufferQueue up to the limit specified in GLVariableAllocationTexture::MAX_BUFFER_SIZE
TransferMap _pendingTransfersMap;
};
}} // namespace gpu::gl
using namespace gpu;
using namespace gpu::gl;
void GLBackend::initTextureManagementStage() {
_textureManagement._transferEngine = std::make_shared<GLTextureTransferEngineDefault>();
}
void GLBackend::killTextureManagementStage() {
_textureManagement._transferEngine->shutdown();
_textureManagement._transferEngine.reset();
}
std::vector<TexturePointer> GLTextureTransferEngine::getAllTextures() {
std::vector<TexturePointer> result;
result.reserve(_registeredTextures.size());
std::remove_if(_registeredTextures.begin(), _registeredTextures.end(), [&](const std::weak_ptr<Texture>& weak)->bool {
auto strong = weak.lock();
bool strongResult = strong.operator bool();
if (strongResult) {
result.push_back(strong);
}
return strongResult;
});
return result;
}
void GLTextureTransferEngine::addMemoryManagedTexture(const TexturePointer& texturePointer) {
++_frameTexturesCreated;
_registeredTextures.push_back(texturePointer);
}
void GLTextureTransferEngineDefault::shutdown() {
_shutdown = true;
#if THREADED_TEXTURE_BUFFERING
if (_transferThread) {
_transferThread->wait();
delete _transferThread;
_transferThread = nullptr;
}
#endif
}
void GLTextureTransferEngineDefault::manageMemory() {
PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
// reset the count used to limit the number of textures created per frame
resetFrameTextureCreated();
// Determine the current memory management state. It will be either idle (no work to do),
// undersubscribed (need to do more allocation) or transfer (need to upload content from the
// backing store to the GPU
updateMemoryPressure();
if (MemoryPressureState::Undersubscribed == _memoryPressureState) {
// If we're undersubscribed, we need to process some of the textures that can have additional allocation
processPromotes();
} else if (MemoryPressureState::Transfer == _memoryPressureState) {
// If we're in transfer mode we need to manage the buffering and upload queues
processTransferQueues();
}
}
// Each frame we will check if our memory pressure state has changed.
void GLTextureTransferEngineDefault::updateMemoryPressure() {
PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
size_t allowedMemoryAllocation = gpu::Texture::getAllowedGPUMemoryUsage();
if (0 == allowedMemoryAllocation) {
allowedMemoryAllocation = DEFAULT_ALLOWED_TEXTURE_MEMORY;
}
// Clear any defunct textures (weak pointers that no longer have a valid texture)
auto strongTextures = getAllTextures();
size_t totalVariableMemoryAllocation = 0;
size_t idealMemoryAllocation = 0;
bool canDemote = false;
bool canPromote = false;
bool hasTransfers = false;
for (const auto& texture : strongTextures) {
GLTexture* gltexture = Backend::getGPUObject<GLTexture>(*texture);
GLVariableAllocationSupport* vartexture = dynamic_cast<GLVariableAllocationSupport*>(gltexture);
vartexture->sanityCheck();
// Track how much the texture thinks it should be using
idealMemoryAllocation += texture->evalTotalSize();
// Track how much we're actually using
totalVariableMemoryAllocation += gltexture->size();
if (vartexture->canDemote()) {
canDemote |= true;
}
if (vartexture->canPromote()) {
canPromote |= true;
}
if (vartexture->hasPendingTransfers()) {
hasTransfers |= true;
}
}
size_t unallocated = idealMemoryAllocation - totalVariableMemoryAllocation;
float pressure = (float)totalVariableMemoryAllocation / (float)allowedMemoryAllocation;
// If we're oversubscribed we need to demote textures IMMEDIATELY
if (pressure > OVERSUBSCRIBED_PRESSURE_VALUE && canDemote) {
auto overPressure = pressure - OVERSUBSCRIBED_PRESSURE_VALUE;
size_t relief = (size_t)(overPressure * totalVariableMemoryAllocation);
processDemotes(relief, strongTextures);
return;
}
auto newState = MemoryPressureState::Idle;
if (pressure < UNDERSUBSCRIBED_PRESSURE_VALUE && (unallocated != 0 && canPromote)) {
newState = MemoryPressureState::Undersubscribed;
} else if (hasTransfers) {
newState = MemoryPressureState::Transfer;
} else {
Lock lock(_bufferMutex);
if (!_activeBufferQueue.empty() || !_activeTransferQueue.empty() || !_pendingTransfersMap.empty()) {
newState = MemoryPressureState::Transfer;
}
}
// If we've changed state then we have to populate the appropriate structure with the work to be done
if (newState != _memoryPressureState) {
_memoryPressureState = newState;
_promoteQueue = WorkQueue();
_pendingTransfersMap.clear();
if (MemoryPressureState::Idle == _memoryPressureState) {
return;
}
// For each texture, if it's eligible for work in the current state, put it into the appropriate structure
for (const auto& texture : strongTextures) {
GLTexture* gltexture = Backend::getGPUObject<GLTexture>(*texture);
GLVariableAllocationSupport* vargltexture = dynamic_cast<GLVariableAllocationSupport*>(gltexture);
if (MemoryPressureState::Undersubscribed == _memoryPressureState && vargltexture->canPromote()) {
// Promote smallest first
_promoteQueue.push({ texture, 1.0f / (float)gltexture->size() });
} else if (MemoryPressureState::Transfer == _memoryPressureState && vargltexture->hasPendingTransfers()) {
populateTransferQueue(texture);
}
}
}
}
// Manage the _activeBufferQueue and _activeTransferQueue queues
void GLTextureTransferEngineDefault::processTransferQueues() {
#if THREADED_TEXTURE_BUFFERING
if (!_transferThread) {
_transferThread = new TextureBufferThread(*this);
}
#endif
// From the pendingTransferMap, queue jobs into the _activeBufferQueue
// Doing so will lock the weak texture pointer so that it can't be destroyed
// while the background thread is working.
//
// This will queue jobs until _queuedBufferSize can't be increased without exceeding
// GLVariableAllocationTexture::MAX_BUFFER_SIZE or there is no more work to be done
populateActiveBufferQueue();
#if !THREADED_TEXTURE_BUFFERING
processActiveBufferQueue();
#endif
// Take any tasks which have completed buffering and process them, uploading the buffered
// data to the GPU. Drains the _activeTransferQueue
{
ActiveTransferQueue activeTransferQueue;
{
Lock lock(_bufferMutex);
activeTransferQueue.swap(_activeTransferQueue);
}
while (!activeTransferQueue.empty()) {
const auto& activeTransferJob = activeTransferQueue.front();
const auto& texturePointer = activeTransferJob.first;
const auto& tranferJob = activeTransferJob.second;
tranferJob->transfer(texturePointer);
// The pop_front MUST be the last call since all of these varaibles in scope are
// references that will be invalid after the pop
activeTransferQueue.pop_front();
}
}
// If we have no more work in any of the structures, reset the memory state to idle to
// force reconstruction of the _pendingTransfersMap if necessary
{
Lock lock(_bufferMutex);
if (_activeTransferQueue.empty() && _activeBufferQueue.empty() && _pendingTransfersMap.empty()) {
_memoryPressureState = MemoryPressureState::Idle;
}
}
}
void GLTextureTransferEngineDefault::populateActiveBufferQueue() {
size_t queuedBufferSize = _queuedBufferSize;
static const auto& MAX_BUFFER_SIZE = GLVariableAllocationSupport::MAX_BUFFER_SIZE;
Q_ASSERT(queuedBufferSize <= MAX_BUFFER_SIZE);
size_t availableBufferSize = MAX_BUFFER_SIZE - queuedBufferSize;
// Queue up buffering jobs
ActiveTransferQueue newBufferJobs;
ActiveTransferQueue newTransferJobs;
size_t newTransferSize{ 0 };
for (auto itr = _pendingTransfersMap.begin(); itr != _pendingTransfersMap.end(); ) {
const auto& weakTexture = itr->first;
const auto texture = weakTexture.lock();
// Texture no longer exists, remove from the transfer map and move on
if (!texture) {
itr = _pendingTransfersMap.erase(itr);
continue;
}
GLTexture* gltexture = Backend::getGPUObject<GLTexture>(*texture);
GLVariableAllocationSupport* vargltexture = dynamic_cast<GLVariableAllocationSupport*>(gltexture);
auto& textureTransferQueue = itr->second;
// Can't find any pending transfers, so move on
if (textureTransferQueue.empty()) {
if (vargltexture->hasPendingTransfers()) {
qWarning(gpugllogging) << "Texture has no transfer jobs, but has pending transfers";
}
itr = _pendingTransfersMap.erase(itr);
continue;
}
const auto& transferJob = textureTransferQueue.front();
if (!transferJob->bufferingRequired()) {
newTransferJobs.emplace_back(texture, transferJob);
} else {
const auto& transferSize = transferJob->size();
// If there's not enough space for the buffering, then break out of the loop
if (transferSize > availableBufferSize) {
break;
}
availableBufferSize -= transferSize;
Q_ASSERT(availableBufferSize <= MAX_BUFFER_SIZE);
Q_ASSERT(newTransferSize <= MAX_BUFFER_SIZE);
newTransferSize += transferSize;
Q_ASSERT(newTransferSize <= MAX_BUFFER_SIZE);
newBufferJobs.emplace_back(texture, transferJob);
}
textureTransferQueue.pop();
++itr;
}
{
Lock lock(_bufferMutex);
_activeBufferQueue.splice(_activeBufferQueue.end(), newBufferJobs);
Q_ASSERT(_queuedBufferSize <= MAX_BUFFER_SIZE);
_queuedBufferSize += newTransferSize;
Q_ASSERT(_queuedBufferSize <= MAX_BUFFER_SIZE);
_activeTransferQueue.splice(_activeTransferQueue.end(), newTransferJobs);
}
}
bool GLTextureTransferEngineDefault::processActiveBufferQueue() {
ActiveTransferQueue activeBufferQueue;
{
Lock lock(_bufferMutex);
_activeBufferQueue.swap(activeBufferQueue);
}
if (activeBufferQueue.empty()) {
return false;
}
for (const auto& activeJob : activeBufferQueue) {
const auto& texture = activeJob.first;
const auto& transferJob = activeJob.second;
const auto& transferSize = transferJob->size();
transferJob->buffer(texture);
Q_ASSERT(_queuedBufferSize >= transferSize);
_queuedBufferSize -= transferSize;
}
{
Lock lock(_bufferMutex);
_activeTransferQueue.splice(_activeTransferQueue.end(), activeBufferQueue);
}
return true;
}
void GLTextureTransferEngineDefault::populateTransferQueue(const TexturePointer& texturePointer) {
TextureWeakPointer weakTexture = texturePointer;
GLTexture* gltexture = Backend::getGPUObject<GLTexture>(*texturePointer);
GLVariableAllocationSupport* vargltexture = dynamic_cast<GLVariableAllocationSupport*>(gltexture);
TransferJob::Queue pendingTransfers;
PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
vargltexture->populateTransferQueue(pendingTransfers);
if (!pendingTransfers.empty()) {
_pendingTransfersMap[weakTexture] = pendingTransfers;
}
}
// From the queue of textures to be promited
void GLTextureTransferEngineDefault::processPromotes() {
// FIXME use max allocated memory per frame instead of promotion count
static const size_t MAX_ALLOCATED_BYTES_PER_FRAME = GLVariableAllocationSupport::MAX_BUFFER_SIZE;
static const size_t MAX_ALLOCATIONS_PER_FRAME = 8;
size_t allocatedBytes{ 0 };
size_t allocations{ 0 };
while (!_promoteQueue.empty()) {
// Grab the first item off the demote queue
PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
auto entry = _promoteQueue.top();
_promoteQueue.pop();
auto texture = entry.first.lock();
if (!texture) {
continue;
}
GLTexture* gltexture = Backend::getGPUObject<GLTexture>(*texture);
GLVariableAllocationSupport* vartexture = dynamic_cast<GLVariableAllocationSupport*>(gltexture);
auto originalSize = gltexture->size();
vartexture->promote();
auto allocationDelta = gltexture->size() - originalSize;
if (vartexture->canPromote()) {
// Promote smallest first
_promoteQueue.push({ texture, 1.0f / (float)gltexture->size() });
}
allocatedBytes += allocationDelta;
if (++allocations >= MAX_ALLOCATIONS_PER_FRAME) {
break;
}
if (allocatedBytes >= MAX_ALLOCATED_BYTES_PER_FRAME) {
break;
}
}
// Get the front of the work queue to perform work
if (_promoteQueue.empty()) {
// Force rebuild of work queue
_memoryPressureState = MemoryPressureState::Idle;
}
}
void GLTextureTransferEngineDefault::processDemotes(size_t reliefRequired, const std::vector<TexturePointer>& strongTextures) {
// Demote largest first
ImmediateWorkQueue demoteQueue;
for (const auto& texture : strongTextures) {
GLTexture* gltexture = Backend::getGPUObject<GLTexture>(*texture);
GLVariableAllocationSupport* vargltexture = dynamic_cast<GLVariableAllocationSupport*>(gltexture);
if (vargltexture->canDemote()) {
demoteQueue.push({ texture, (float)gltexture->size() });
}
}
size_t relieved = 0;
while (!demoteQueue.empty() && relieved < reliefRequired) {
{
const auto& target = demoteQueue.top();
const auto& texture = target.first;
GLTexture* gltexture = Backend::getGPUObject<GLTexture>(*texture);
auto oldSize = gltexture->size();
GLVariableAllocationSupport* vargltexture = dynamic_cast<GLVariableAllocationSupport*>(gltexture);
vargltexture->demote();
auto newSize = gltexture->size();
relieved += (oldSize - newSize);
}
demoteQueue.pop();
}
}
// FIXME hack for stats display
QString getTextureMemoryPressureModeString() {
switch (_memoryPressureState) {
case MemoryPressureState::Undersubscribed:
return "Undersubscribed";
case MemoryPressureState::Transfer:
return "Transfer";
case MemoryPressureState::Idle:
return "Idle";
}
Q_UNREACHABLE();
return "Unknown";
}

View file

@ -114,9 +114,9 @@ public:
void allocateStorage(uint16 allocatedMip);
void syncSampler() const override;
void promote() override;
void demote() override;
void populateTransferQueue() override;
size_t promote() override;
size_t demote() override;
void populateTransferQueue(TransferQueue& pendingTransfers) override;
Size copyMipFaceLinesFromTexture(uint16_t mip, uint8_t face, const uvec3& size, uint32_t yOffset, GLenum internalFormat, GLenum format, GLenum type, Size sourceSize, const void* sourcePointer) const override;
Size copyMipsFromTexture();

View file

@ -72,7 +72,7 @@ GLTexture* GL41Backend::syncGPUObject(const TexturePointer& texturePointer) {
case TextureUsageType::RESOURCE:
qCDebug(gpugllogging) << "variable / Strict texture " << texture.source().c_str();
object = new GL41ResourceTexture(shared_from_this(), texture);
GLVariableAllocationSupport::addMemoryManagedTexture(texturePointer);
_textureManagement._transferEngine->addMemoryManagedTexture(texturePointer);
break;
default:
@ -86,7 +86,6 @@ GLTexture* GL41Backend::syncGPUObject(const TexturePointer& texturePointer) {
auto minAvailableMip = texture.minAvailableMipLevel();
if (minAvailableMip < varTex->_minAllocatedMip) {
varTex->_minAllocatedMip = minAvailableMip;
GL41VariableAllocationTexture::_memoryPressureStateStale = true;
}
}
}
@ -299,9 +298,7 @@ GL41VariableAllocationTexture::GL41VariableAllocationTexture(const std::weak_ptr
uint16_t allocatedMip = std::max<uint16_t>(_minAllocatedMip, targetMip);
allocateStorage(allocatedMip);
_memoryPressureStateStale = true;
copyMipsFromTexture();
syncSampler();
}
@ -496,7 +493,7 @@ void GL41VariableAllocationTexture::copyTextureMipsInGPUMem(GLuint srcId, GLuint
});
}
void GL41VariableAllocationTexture::promote() {
size_t GL41VariableAllocationTexture::promote() {
PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
Q_ASSERT(_allocatedMip > 0);
@ -524,12 +521,11 @@ void GL41VariableAllocationTexture::promote() {
// update the memory usage
Backend::textureResourceGPUMemSize.update(oldSize, 0);
return (_size - oldSize);
// no change to Backend::textureResourcePopulatedGPUMemSize
populateTransferQueue();
}
void GL41VariableAllocationTexture::demote() {
size_t GL41VariableAllocationTexture::demote() {
PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
Q_ASSERT(_allocatedMip < _maxAllocatedMip);
auto oldId = _id;
@ -563,16 +559,16 @@ void GL41VariableAllocationTexture::demote() {
}
decrementPopulatedSize(amountUnpopulated);
}
populateTransferQueue();
return oldSize - _size;
}
void GL41VariableAllocationTexture::populateTransferQueue() {
void GL41VariableAllocationTexture::populateTransferQueue(TransferQueue& pendingTransfers) {
PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
if (_populatedMip <= _allocatedMip) {
return;
}
_pendingTransfers = TransferQueue();
const uint8_t maxFace = GLTexture::getFaceCount(_target);
uint16_t sourceMip = _populatedMip;
@ -588,7 +584,7 @@ void GL41VariableAllocationTexture::populateTransferQueue() {
// If the mip is less than the max transfer size, then just do it in one transfer
if (glm::all(glm::lessThanEqual(mipDimensions, MAX_TRANSFER_DIMENSIONS))) {
// Can the mip be transferred in one go
_pendingTransfers.emplace(new TransferJob(*this, sourceMip, targetMip, face));
pendingTransfers.emplace(new TransferJob(_gpuObject, sourceMip, targetMip, face));
continue;
}
@ -605,13 +601,13 @@ void GL41VariableAllocationTexture::populateTransferQueue() {
uint32_t lineOffset = 0;
while (lineOffset < lines) {
uint32_t linesToCopy = std::min<uint32_t>(lines - lineOffset, linesPerTransfer);
_pendingTransfers.emplace(new TransferJob(*this, sourceMip, targetMip, face, linesToCopy, lineOffset));
pendingTransfers.emplace(new TransferJob(_gpuObject, sourceMip, targetMip, face, linesToCopy, lineOffset));
lineOffset += linesToCopy;
}
}
// queue up the sampler and populated mip change for after the transfer has completed
_pendingTransfers.emplace(new TransferJob(*this, [=] {
pendingTransfers.emplace(new TransferJob([=] {
_populatedMip = sourceMip;
syncSampler();
}));

View file

@ -187,9 +187,9 @@ public:
GL45ResourceTexture(const std::weak_ptr<GLBackend>& backend, const Texture& texture);
void syncSampler() const override;
void promote() override;
void demote() override;
void populateTransferQueue() override;
size_t promote() override;
size_t demote() override;
void populateTransferQueue(TransferQueue& pendingTransfers) override;
void allocateStorage(uint16 mip);

View file

@ -28,7 +28,6 @@ using namespace gpu;
using namespace gpu::gl;
using namespace gpu::gl45;
#define MAX_RESOURCE_TEXTURES_PER_FRAME 2
#define FORCE_STRICT_TEXTURE 0
#define ENABLE_SPARSE_TEXTURE 0
@ -82,7 +81,8 @@ GLTexture* GL45Backend::syncGPUObject(const TexturePointer& texturePointer) {
#if !FORCE_STRICT_TEXTURE
case TextureUsageType::RESOURCE: {
if (GL45VariableAllocationTexture::_frameTexturesCreated < MAX_RESOURCE_TEXTURES_PER_FRAME) {
auto& transferEngine = _textureManagement._transferEngine;
if (transferEngine->allowCreate()) {
#if ENABLE_SPARSE_TEXTURE
if (isTextureManagementSparseEnabled() && GL45Texture::isSparseEligible(texture)) {
object = new GL45SparseResourceTexture(shared_from_this(), texture);
@ -92,7 +92,7 @@ GLTexture* GL45Backend::syncGPUObject(const TexturePointer& texturePointer) {
#else
object = new GL45ResourceTexture(shared_from_this(), texture);
#endif
GLVariableAllocationSupport::addMemoryManagedTexture(texturePointer);
transferEngine->addMemoryManagedTexture(texturePointer);
} else {
auto fallback = texturePointer->getFallbackTexture();
if (fallback) {
@ -114,7 +114,6 @@ GLTexture* GL45Backend::syncGPUObject(const TexturePointer& texturePointer) {
auto minAvailableMip = texture.minAvailableMipLevel();
if (minAvailableMip < varTex->_minAllocatedMip) {
varTex->_minAllocatedMip = minAvailableMip;
GL45VariableAllocationTexture::_memoryPressureStateStale = true;
}
}
}
@ -124,6 +123,7 @@ GLTexture* GL45Backend::syncGPUObject(const TexturePointer& texturePointer) {
}
void GL45Backend::initTextureManagementStage() {
GLBackend::initTextureManagementStage();
// enable the Sparse Texture on gl45
_textureManagement._sparseCapable = true;

View file

@ -31,7 +31,6 @@ using GL45Texture = GL45Backend::GL45Texture;
using GL45VariableAllocationTexture = GL45Backend::GL45VariableAllocationTexture;
GL45VariableAllocationTexture::GL45VariableAllocationTexture(const std::weak_ptr<GLBackend>& backend, const Texture& texture) : GL45Texture(backend, texture) {
++_frameTexturesCreated;
Backend::textureResourceCount.increment();
}
@ -104,7 +103,6 @@ GL45ResourceTexture::GL45ResourceTexture(const std::weak_ptr<GLBackend>& backend
uint16_t allocatedMip = std::max<uint16_t>(_minAllocatedMip, targetMip);
allocateStorage(allocatedMip);
_memoryPressureStateStale = true;
copyMipsFromTexture();
syncSampler();
}
@ -148,7 +146,7 @@ void GL45ResourceTexture::syncSampler() const {
#endif
}
void GL45ResourceTexture::promote() {
size_t GL45ResourceTexture::promote() {
PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
Q_ASSERT(_allocatedMip > 0);
@ -191,11 +189,10 @@ void GL45ResourceTexture::promote() {
// update the memory usage
Backend::textureResourceGPUMemSize.update(oldSize, 0);
// no change to Backend::textureResourcePopulatedGPUMemSize
populateTransferQueue();
return (_size - oldSize);
}
void GL45ResourceTexture::demote() {
size_t GL45ResourceTexture::demote() {
PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
Q_ASSERT(_allocatedMip < _maxAllocatedMip);
auto oldId = _id;
@ -242,16 +239,16 @@ void GL45ResourceTexture::demote() {
}
decrementPopulatedSize(amountUnpopulated);
}
populateTransferQueue();
return (oldSize - _size);
}
void GL45ResourceTexture::populateTransferQueue() {
void GL45ResourceTexture::populateTransferQueue(TransferQueue& pendingTransfers) {
PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
sanityCheck();
if (_populatedMip <= _allocatedMip) {
return;
}
_pendingTransfers = TransferQueue();
const uint8_t maxFace = GLTexture::getFaceCount(_target);
uint16_t sourceMip = _populatedMip;
@ -267,7 +264,7 @@ void GL45ResourceTexture::populateTransferQueue() {
// If the mip is less than the max transfer size, then just do it in one transfer
if (glm::all(glm::lessThanEqual(mipDimensions, MAX_TRANSFER_DIMENSIONS))) {
// Can the mip be transferred in one go
_pendingTransfers.emplace(new TransferJob(*this, sourceMip, targetMip, face));
pendingTransfers.emplace(new TransferJob(_gpuObject, sourceMip, targetMip, face));
continue;
}
@ -284,14 +281,15 @@ void GL45ResourceTexture::populateTransferQueue() {
uint32_t lineOffset = 0;
while (lineOffset < lines) {
uint32_t linesToCopy = std::min<uint32_t>(lines - lineOffset, linesPerTransfer);
_pendingTransfers.emplace(new TransferJob(*this, sourceMip, targetMip, face, linesToCopy, lineOffset));
pendingTransfers.emplace(new TransferJob(_gpuObject, sourceMip, targetMip, face, linesToCopy, lineOffset));
lineOffset += linesToCopy;
}
}
// queue up the sampler and populated mip change for after the transfer has completed
_pendingTransfers.emplace(new TransferJob(*this, [=] {
pendingTransfers.emplace(new TransferJob([=] {
_populatedMip = sourceMip;
sanityCheck();
syncSampler();
}));
} while (sourceMip != _allocatedMip);

View file

@ -105,9 +105,9 @@ public:
void allocateStorage(uint16 allocatedMip);
void syncSampler() const override;
void promote() override;
void demote() override;
void populateTransferQueue() override;
size_t promote() override;
size_t demote() override;
void populateTransferQueue(TransferJob::Queue& queue) override;
Size copyMipFaceLinesFromTexture(uint16_t mip, uint8_t face, const uvec3& size, uint32_t yOffset, GLenum internalFormat, GLenum format, GLenum type, Size sourceSize, const void* sourcePointer) const override;
Size copyMipsFromTexture();

View file

@ -90,7 +90,6 @@ GLTexture* GLESBackend::syncGPUObject(const TexturePointer& texturePointer) {
auto minAvailableMip = texture.minAvailableMipLevel();
if (minAvailableMip < varTex->_minAllocatedMip) {
varTex->_minAllocatedMip = minAvailableMip;
GLESVariableAllocationTexture::_memoryPressureStateStale = true;
}
}
}
@ -361,7 +360,6 @@ GLESVariableAllocationTexture::GLESVariableAllocationTexture(const std::weak_ptr
uint16_t allocatedMip = std::max<uint16_t>(_minAllocatedMip, targetMip);
allocateStorage(allocatedMip);
_memoryPressureStateStale = true;
copyMipsFromTexture();
syncSampler();
@ -559,7 +557,7 @@ void GLESVariableAllocationTexture::copyTextureMipsInGPUMem(GLuint srcId, GLuint
});
}
void GLESVariableAllocationTexture::promote() {
size_t GLESVariableAllocationTexture::promote() {
PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
Q_ASSERT(_allocatedMip > 0);
@ -587,12 +585,11 @@ void GLESVariableAllocationTexture::promote() {
// update the memory usage
Backend::textureResourceGPUMemSize.update(oldSize, 0);
// no change to Backend::textureResourcePopulatedGPUMemSize
populateTransferQueue();
return _size - oldSize;
}
void GLESVariableAllocationTexture::demote() {
size_t GLESVariableAllocationTexture::demote() {
PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
Q_ASSERT(_allocatedMip < _maxAllocatedMip);
auto oldId = _id;
@ -626,16 +623,16 @@ void GLESVariableAllocationTexture::demote() {
}
decrementPopulatedSize(amountUnpopulated);
}
populateTransferQueue();
return oldSize - _size;
}
void GLESVariableAllocationTexture::populateTransferQueue() {
void GLESVariableAllocationTexture::populateTransferQueue(TransferJob::Queue& queue) {
PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
if (_populatedMip <= _allocatedMip) {
return;
}
_pendingTransfers = TransferQueue();
const uint8_t maxFace = GLTexture::getFaceCount(_target);
uint16_t sourceMip = _populatedMip;
@ -651,7 +648,7 @@ void GLESVariableAllocationTexture::populateTransferQueue() {
// If the mip is less than the max transfer size, then just do it in one transfer
if (glm::all(glm::lessThanEqual(mipDimensions, MAX_TRANSFER_DIMENSIONS))) {
// Can the mip be transferred in one go
_pendingTransfers.emplace(new TransferJob(*this, sourceMip, targetMip, face));
queue.emplace(new TransferJob(_gpuObject, sourceMip, targetMip, face));
continue;
}
@ -668,13 +665,13 @@ void GLESVariableAllocationTexture::populateTransferQueue() {
uint32_t lineOffset = 0;
while (lineOffset < lines) {
uint32_t linesToCopy = std::min<uint32_t>(lines - lineOffset, linesPerTransfer);
_pendingTransfers.emplace(new TransferJob(*this, sourceMip, targetMip, face, linesToCopy, lineOffset));
queue.emplace(new TransferJob(_gpuObject, sourceMip, targetMip, face, linesToCopy, lineOffset));
lineOffset += linesToCopy;
}
}
// queue up the sampler and populated mip change for after the transfer has completed
_pendingTransfers.emplace(new TransferJob(*this, [=] {
queue.emplace(new TransferJob([=] {
_populatedMip = sourceMip;
syncSampler();
}));

View file

@ -25,13 +25,14 @@
QTEST_MAIN(TextureTest)
#define LOAD_TEXTURE_COUNT 40
#define LOAD_TEXTURE_COUNT 100
#define FAIL_AFTER_SECONDS 30
static const QString TEST_DATA("https://hifi-public.s3.amazonaws.com/austin/test_data/test_ktx.zip");
static const QString TEST_DIR_NAME("{630b8f02-52af-4cdf-a896-24e472b94b28}");
static const QString KTX_TEST_DIR_ENV("HIFI_KTX_TEST_DIR");
std::string vertexShaderSource = R"SHADER(
#line 14
layout(location = 0) out vec2 outTexCoord0;
const vec4 VERTICES[] = vec4[](
@ -50,8 +51,6 @@ void main() {
)SHADER";
std::string fragmentShaderSource = R"SHADER(
#line 28
uniform sampler2D tex;
layout(location = 0) in vec2 inTexCoord0;
@ -87,21 +86,29 @@ void TextureTest::initTestCase() {
gpu::Context::init<gpu::gl::GLBackend>();
_gpuContext = std::make_shared<gpu::Context>();
_resourcesPath = QStandardPaths::writableLocation(QStandardPaths::TempLocation) + "/" + TEST_DIR_NAME;
if (!QFileInfo(_resourcesPath).exists()) {
QDir(_resourcesPath).mkpath(".");
FileDownloader(TEST_DATA,
[&](const QByteArray& data) {
QTemporaryFile zipFile;
if (zipFile.open()) {
zipFile.write(data);
zipFile.close();
}
JlCompress::extractDir(zipFile.fileName(), _resourcesPath);
})
.waitForDownload();
if (QProcessEnvironment::systemEnvironment().contains(KTX_TEST_DIR_ENV)) {
// For local testing with larger data sets
_resourcesPath = QProcessEnvironment::systemEnvironment().value(KTX_TEST_DIR_ENV);
} else {
_resourcesPath = QStandardPaths::writableLocation(QStandardPaths::TempLocation) + "/" + TEST_DIR_NAME;
if (!QFileInfo(_resourcesPath).exists()) {
QDir(_resourcesPath).mkpath(".");
FileDownloader(TEST_DATA,
[&](const QByteArray& data) {
QTemporaryFile zipFile;
if (zipFile.open()) {
zipFile.write(data);
zipFile.close();
}
JlCompress::extractDir(zipFile.fileName(), _resourcesPath);
})
.waitForDownload();
}
}
QVERIFY(!_resourcesPath.isEmpty());
_canvas.makeCurrent();
{
auto VS = gpu::Shader::createVertex(vertexShaderSource);
@ -130,15 +137,7 @@ void TextureTest::initTestCase() {
}
}
// Load the test textures
{
size_t newTextureCount = std::min<size_t>(_textureFiles.size(), LOAD_TEXTURE_COUNT);
for (size_t i = 0; i < newTextureCount; ++i) {
const auto& textureFile = _textureFiles[i];
auto texture = gpu::Texture::unserialize(textureFile);
_textures.push_back(texture);
}
}
QVERIFY(!_textureFiles.empty());
}
void TextureTest::cleanupTestCase() {
@ -148,6 +147,18 @@ void TextureTest::cleanupTestCase() {
_gpuContext.reset();
}
std::vector<gpu::TexturePointer> TextureTest::loadTestTextures() const {
// Load the test textures
std::vector<gpu::TexturePointer> result;
size_t newTextureCount = std::min<size_t>(_textureFiles.size(), LOAD_TEXTURE_COUNT);
for (size_t i = 0; i < newTextureCount; ++i) {
const auto& textureFile = _textureFiles[i];
auto texture = gpu::Texture::unserialize(textureFile);
result.push_back(texture);
}
return result;
}
void TextureTest::beginFrame() {
_gpuContext->recycle();
_gpuContext->beginFrame();
@ -169,116 +180,130 @@ void TextureTest::endFrame() {
QThread::msleep(10);
}
void TextureTest::renderFrame(const std::function<void(gpu::Batch&)>& renderLambda) {
beginFrame();
gpu::doInBatch("Test::body", _gpuContext, renderLambda);
endFrame();
++_frameCount;
}
extern QString getTextureMemoryPressureModeString();
void TextureTest::testTextureLoading() {
QVERIFY(_textures.size() > 0);
auto renderTexturesLamdba = [this](gpu::Batch& batch) {
batch.setPipeline(_pipeline);
for (const auto& texture : _textures) {
batch.setResourceTexture(0, texture);
batch.draw(gpu::TRIANGLE_STRIP, 4, 0);
QBENCHMARK{
_frameCount = 0;
auto textures = loadTestTextures();
QVERIFY(textures.size() > 0);
auto renderTexturesLamdba = [&](gpu::Batch& batch) {
batch.setPipeline(_pipeline);
for (const auto& texture : textures) {
batch.setResourceTexture(0, texture);
batch.draw(gpu::TRIANGLE_STRIP, 4, 0);
}
};
size_t expectedAllocation = 0;
for (const auto& texture : textures) {
expectedAllocation += texture->evalTotalSize();
}
};
QVERIFY(textures.size() > 0);
size_t expectedAllocation = 0;
for (const auto& texture : _textures) {
expectedAllocation += texture->evalTotalSize();
auto reportLambda = [=] {
qDebug() << "Allowed " << gpu::Texture::getAllowedGPUMemoryUsage();
qDebug() << "Allocated " << gpu::Context::getTextureResourceGPUMemSize();
qDebug() << "Populated " << gpu::Context::getTextureResourcePopulatedGPUMemSize();
qDebug() << "Pending " << gpu::Context::getTexturePendingGPUTransferMemSize();
qDebug() << "State " << getTextureMemoryPressureModeString();
};
auto allocatedMemory = gpu::Context::getTextureResourceGPUMemSize();
auto populatedMemory = gpu::Context::getTextureResourcePopulatedGPUMemSize();
// Cycle frames we're fully allocated
// We need to use the texture rendering lambda
auto lastReport = usecTimestampNow();
auto start = usecTimestampNow();
qDebug() << "Awaiting texture allocation";
while (expectedAllocation != allocatedMemory) {
doEvery(lastReport, 4, reportLambda);
failAfter(start, FAIL_AFTER_SECONDS, "Failed to allocate texture memory");
renderFrame(renderTexturesLamdba);
allocatedMemory = gpu::Context::getTextureResourceGPUMemSize();
populatedMemory = gpu::Context::getTextureResourcePopulatedGPUMemSize();
}
reportLambda();
QCOMPARE(allocatedMemory, expectedAllocation);
// Restart the timer
start = usecTimestampNow();
// Cycle frames we're fully populated
qDebug() << "Awaiting texture population";
while (allocatedMemory != populatedMemory || 0 != gpu::Context::getTexturePendingGPUTransferMemSize()) {
doEvery(lastReport, 4, reportLambda);
failAfter(start, FAIL_AFTER_SECONDS, "Failed to populate texture memory");
renderFrame();
allocatedMemory = gpu::Context::getTextureResourceGPUMemSize();
populatedMemory = gpu::Context::getTextureResourcePopulatedGPUMemSize();
}
reportLambda();
QCOMPARE(populatedMemory, allocatedMemory);
// FIXME workaround a race condition in the difference between populated size and the actual _populatedMip value in the texture
for (size_t i = 0; i < textures.size(); ++i) {
renderFrame();
}
// Test on-demand deallocation of memory
auto maxMemory = allocatedMemory / 2;
gpu::Texture::setAllowedGPUMemoryUsage(maxMemory);
// Restart the timer
start = usecTimestampNow();
// Cycle frames until the allocated memory is below the max memory
qDebug() << "Awaiting texture deallocation";
while (allocatedMemory > maxMemory || allocatedMemory != populatedMemory) {
doEvery(lastReport, 4, reportLambda);
failAfter(start, FAIL_AFTER_SECONDS, "Failed to deallocate texture memory");
renderFrame(renderTexturesLamdba);
allocatedMemory = gpu::Context::getTextureResourceGPUMemSize();
populatedMemory = gpu::Context::getTextureResourcePopulatedGPUMemSize();
}
reportLambda();
// Verify that the allocation is now below the target
QVERIFY(allocatedMemory <= maxMemory);
// Verify that populated memory is the same as allocated memory
QCOMPARE(populatedMemory, allocatedMemory);
// Restart the timer
start = usecTimestampNow();
// Reset the max memory to automatic
gpu::Texture::setAllowedGPUMemoryUsage(0);
// Cycle frames we're fully populated
qDebug() << "Awaiting texture reallocation and repopulation";
while (allocatedMemory != expectedAllocation || allocatedMemory != populatedMemory) {
doEvery(lastReport, 4, reportLambda);
failAfter(start, FAIL_AFTER_SECONDS, "Failed to populate texture memory");
renderFrame();
allocatedMemory = gpu::Context::getTextureResourceGPUMemSize();
populatedMemory = gpu::Context::getTextureResourcePopulatedGPUMemSize();
}
reportLambda();
QCOMPARE(allocatedMemory, expectedAllocation);
QCOMPARE(populatedMemory, allocatedMemory);
textures.clear();
// Cycle frames we're fully populated
qDebug() << "Awaiting texture deallocation";
while (allocatedMemory != 0) {
failAfter(start, FAIL_AFTER_SECONDS, "Failed to clear texture memory");
renderFrame();
allocatedMemory = gpu::Context::getTextureResourceGPUMemSize();
populatedMemory = gpu::Context::getTextureResourcePopulatedGPUMemSize();
}
reportLambda();
QCOMPARE(allocatedMemory, 0);
QCOMPARE(populatedMemory, 0);
qDebug() << "Test took " << _frameCount << "frame";
}
QVERIFY(_textures.size() > 0);
auto reportLambda = [=] {
qDebug() << "Allowed " << gpu::Texture::getAllowedGPUMemoryUsage();
qDebug() << "Allocated " << gpu::Context::getTextureResourceGPUMemSize();
qDebug() << "Populated " << gpu::Context::getTextureResourcePopulatedGPUMemSize();
qDebug() << "Pending " << gpu::Context::getTexturePendingGPUTransferMemSize();
};
auto allocatedMemory = gpu::Context::getTextureResourceGPUMemSize();
auto populatedMemory = gpu::Context::getTextureResourcePopulatedGPUMemSize();
// Cycle frames we're fully allocated
// We need to use the texture rendering lambda
auto lastReport = usecTimestampNow();
auto start = usecTimestampNow();
while (expectedAllocation != allocatedMemory) {
doEvery(lastReport, 4, reportLambda);
failAfter(start, 10, "Failed to allocate texture memory after 10 seconds");
renderFrame(renderTexturesLamdba);
allocatedMemory = gpu::Context::getTextureResourceGPUMemSize();
populatedMemory = gpu::Context::getTextureResourcePopulatedGPUMemSize();
}
QCOMPARE(allocatedMemory, expectedAllocation);
// Restart the timer
start = usecTimestampNow();
// Cycle frames we're fully populated
while (allocatedMemory != populatedMemory || 0 != gpu::Context::getTexturePendingGPUTransferMemSize()) {
doEvery(lastReport, 4, reportLambda);
failAfter(start, 10, "Failed to populate texture memory after 10 seconds");
renderFrame();
allocatedMemory = gpu::Context::getTextureResourceGPUMemSize();
populatedMemory = gpu::Context::getTextureResourcePopulatedGPUMemSize();
}
reportLambda();
QCOMPARE(populatedMemory, allocatedMemory);
// FIXME workaround a race condition in the difference between populated size and the actual _populatedMip value in the texture
for (size_t i = 0; i < _textures.size(); ++i) {
renderFrame();
}
// Test on-demand deallocation of memory
auto maxMemory = allocatedMemory / 2;
gpu::Texture::setAllowedGPUMemoryUsage(maxMemory);
// Restart the timer
start = usecTimestampNow();
// Cycle frames until the allocated memory is below the max memory
while (allocatedMemory > maxMemory || allocatedMemory != populatedMemory) {
doEvery(lastReport, 4, reportLambda);
failAfter(start, 10, "Failed to deallocate texture memory after 10 seconds");
renderFrame(renderTexturesLamdba);
allocatedMemory = gpu::Context::getTextureResourceGPUMemSize();
populatedMemory = gpu::Context::getTextureResourcePopulatedGPUMemSize();
}
reportLambda();
// Verify that the allocation is now below the target
QVERIFY(allocatedMemory <= maxMemory);
// Verify that populated memory is the same as allocated memory
QCOMPARE(populatedMemory, allocatedMemory);
// Restart the timer
start = usecTimestampNow();
// Reset the max memory to automatic
gpu::Texture::setAllowedGPUMemoryUsage(0);
// Cycle frames we're fully populated
while (allocatedMemory != expectedAllocation || allocatedMemory != populatedMemory) {
doEvery(lastReport, 4, reportLambda);
failAfter(start, 10, "Failed to populate texture memory after 10 seconds");
renderFrame();
allocatedMemory = gpu::Context::getTextureResourceGPUMemSize();
populatedMemory = gpu::Context::getTextureResourcePopulatedGPUMemSize();
}
reportLambda();
QCOMPARE(allocatedMemory, expectedAllocation);
QCOMPARE(populatedMemory, allocatedMemory);
_textures.clear();
// Cycle frames we're fully populated
while (allocatedMemory != 0) {
failAfter(start, 10, "Failed to clear texture memory after 10 seconds");
renderFrame();
allocatedMemory = gpu::Context::getTextureResourceGPUMemSize();
populatedMemory = gpu::Context::getTextureResourcePopulatedGPUMemSize();
}
QCOMPARE(allocatedMemory, 0);
QCOMPARE(populatedMemory, 0);
qDebug() << "Done";
}

View file

@ -21,12 +21,15 @@ private:
void beginFrame();
void endFrame();
void renderFrame(const std::function<void(gpu::Batch&)>& = [](gpu::Batch&) {});
std::vector<gpu::TexturePointer> loadTestTextures() const;
private slots:
void initTestCase();
void cleanupTestCase();
void testTextureLoading();
private:
QString _resourcesPath;
OffscreenGLCanvas _canvas;
@ -36,5 +39,5 @@ private:
gpu::TexturePointer _colorBuffer, _depthBuffer;
const glm::uvec2 _size{ 640, 480 };
std::vector<std::string> _textureFiles;
std::vector<gpu::TexturePointer> _textures;
size_t _frameCount { 0 };
};