diff --git a/libraries/gpu-gl-common/src/gpu/gl/GLBackend.cpp b/libraries/gpu-gl-common/src/gpu/gl/GLBackend.cpp index 501e59f38e..f484de57f1 100644 --- a/libraries/gpu-gl-common/src/gpu/gl/GLBackend.cpp +++ b/libraries/gpu-gl-common/src/gpu/gl/GLBackend.cpp @@ -44,9 +44,9 @@ GLBackend::CommandCall GLBackend::_commandCalls[Batch::NUM_COMMANDS] = (&::gpu::gl::GLBackend::do_setModelTransform), (&::gpu::gl::GLBackend::do_setViewTransform), - (&::gpu::gl::GLBackend::do_setProjectionTransform), - (&::gpu::gl::GLBackend::do_setProjectionJitter), - (&::gpu::gl::GLBackend::do_setViewportTransform), + (&::gpu::gl::GLBackend::do_setProjectionTransform), + (&::gpu::gl::GLBackend::do_setProjectionJitter), + (&::gpu::gl::GLBackend::do_setViewportTransform), (&::gpu::gl::GLBackend::do_setDepthRangeTransform), (&::gpu::gl::GLBackend::do_setPipeline), @@ -118,12 +118,6 @@ void GLBackend::init() { #if !defined(USE_GLES) qCDebug(gpugllogging, "V-Sync is %s\n", (::gl::getSwapInterval() > 0 ? "ON" : "OFF")); #endif -#if THREADED_TEXTURE_BUFFERING - // This has to happen on the main thread in order to give the thread - // pool a reasonable parent object - GLVariableAllocationSupport::TransferJob::startBufferingThread(); -#endif - }); } @@ -136,6 +130,7 @@ GLBackend::GLBackend() { GLBackend::~GLBackend() { killInput(); killTransform(); + killTextureManagementStage(); } void GLBackend::renderPassTransfer(const Batch& batch) { @@ -167,18 +162,18 @@ void GLBackend::renderPassTransfer(const Batch& batch) { case Batch::COMMAND_drawIndexedInstanced: case Batch::COMMAND_multiDrawIndirect: case Batch::COMMAND_multiDrawIndexedIndirect: - { - Vec2u outputSize{ 1,1 }; + { + Vec2u outputSize{ 1,1 }; - if (_output._framebuffer) { - outputSize.x = _output._framebuffer->getWidth(); - outputSize.y = _output._framebuffer->getHeight(); - } else if (glm::dot(_transform._projectionJitter, _transform._projectionJitter)>0.0f) { - qCWarning(gpugllogging) << "Jittering needs to have a frame buffer to be set"; - } + if (_output._framebuffer) { + outputSize.x = _output._framebuffer->getWidth(); + outputSize.y = _output._framebuffer->getHeight(); + } else if (glm::dot(_transform._projectionJitter, _transform._projectionJitter)>0.0f) { + qCWarning(gpugllogging) << "Jittering needs to have a frame buffer to be set"; + } - _transform.preUpdate(_commandIndex, _stereo, outputSize); - } + _transform.preUpdate(_commandIndex, _stereo, outputSize); + } break; case Batch::COMMAND_disableContextStereo: @@ -191,10 +186,10 @@ void GLBackend::renderPassTransfer(const Batch& batch) { case Batch::COMMAND_setViewportTransform: case Batch::COMMAND_setViewTransform: - case Batch::COMMAND_setProjectionTransform: - case Batch::COMMAND_setProjectionJitter: - { - CommandCall call = _commandCalls[(*command)]; + case Batch::COMMAND_setProjectionTransform: + case Batch::COMMAND_setProjectionJitter: + { + CommandCall call = _commandCalls[(*command)]; (this->*(call))(batch, *offset); break; } @@ -268,8 +263,8 @@ void GLBackend::render(const Batch& batch) { if (!batch.isStereoEnabled()) { _stereo._enable = false; } - // Reset jitter - _transform._projectionJitter = Vec2(0.0f, 0.0f); + // Reset jitter + _transform._projectionJitter = Vec2(0.0f, 0.0f); { PROFILE_RANGE(render_gpu_gl_detail, "Transfer"); @@ -729,9 +724,8 @@ void GLBackend::recycle() const { glDeleteQueries((GLsizei)ids.size(), ids.data()); } } - - GLVariableAllocationSupport::manageMemory(); - GLVariableAllocationSupport::_frameTexturesCreated = 0; + + _textureManagement._transferEngine->manageMemory(); Texture::KtxStorage::releaseOpenKtxFiles(); } diff --git a/libraries/gpu-gl-common/src/gpu/gl/GLBackend.h b/libraries/gpu-gl-common/src/gpu/gl/GLBackend.h index 314bbee387..32c75d0363 100644 --- a/libraries/gpu-gl-common/src/gpu/gl/GLBackend.h +++ b/libraries/gpu-gl-common/src/gpu/gl/GLBackend.h @@ -491,8 +491,10 @@ protected: struct TextureManagementStageState { bool _sparseCapable { false }; + GLTextureTransferEnginePointer _transferEngine; } _textureManagement; - virtual void initTextureManagementStage() {} + virtual void initTextureManagementStage(); + virtual void killTextureManagementStage(); typedef void (GLBackend::*CommandCall)(const Batch&, size_t); static CommandCall _commandCalls[Batch::NUM_COMMANDS]; diff --git a/libraries/gpu-gl-common/src/gpu/gl/GLShared.h b/libraries/gpu-gl-common/src/gpu/gl/GLShared.h index ccdf0a5c41..f67439f96a 100644 --- a/libraries/gpu-gl-common/src/gpu/gl/GLShared.h +++ b/libraries/gpu-gl-common/src/gpu/gl/GLShared.h @@ -137,6 +137,8 @@ class GLQuery; class GLState; class GLShader; class GLTexture; +class GLTextureTransferEngine; +using GLTextureTransferEnginePointer = std::shared_ptr; struct ShaderObject; } } // namespace gpu::gl diff --git a/libraries/gpu-gl-common/src/gpu/gl/GLTexture.cpp b/libraries/gpu-gl-common/src/gpu/gl/GLTexture.cpp index 943b8148ef..394b44166f 100644 --- a/libraries/gpu-gl-common/src/gpu/gl/GLTexture.cpp +++ b/libraries/gpu-gl-common/src/gpu/gl/GLTexture.cpp @@ -48,6 +48,14 @@ const GLFilterMode GLTexture::FILTER_MODES[Sampler::NUM_FILTERS] = { { GL_LINEAR_MIPMAP_LINEAR, GL_LINEAR } //FILTER_ANISOTROPIC, }; +static constexpr size_t MAX_PIXEL_BYTE_SIZE{ 4 }; +static constexpr size_t MAX_TRANSFER_DIMENSION{ 1024 }; + +const uvec3 GLVariableAllocationSupport::MAX_TRANSFER_DIMENSIONS{ MAX_TRANSFER_DIMENSION, MAX_TRANSFER_DIMENSION, 1 }; +const uvec3 GLVariableAllocationSupport::INITIAL_MIP_TRANSFER_DIMENSIONS{ 64, 64, 1 }; +const size_t GLVariableAllocationSupport::MAX_TRANSFER_SIZE = MAX_TRANSFER_DIMENSION * MAX_TRANSFER_DIMENSION * MAX_PIXEL_BYTE_SIZE; +const size_t GLVariableAllocationSupport::MAX_BUFFER_SIZE = MAX_TRANSFER_SIZE; + GLenum GLTexture::getGLTextureType(const Texture& texture) { switch (texture.getType()) { case Texture::TEX_2D: @@ -131,7 +139,6 @@ Size GLTexture::copyMipFaceFromTexture(uint16_t sourceMip, uint16_t targetMip, u return 0; } - GLExternalTexture::GLExternalTexture(const std::weak_ptr& backend, const Texture& texture, GLuint id) : Parent(backend, texture, id) { Backend::textureExternalCount.increment(); @@ -151,65 +158,58 @@ GLExternalTexture::~GLExternalTexture() { Backend::textureExternalCount.decrement(); } - -// Variable sized textures -using MemoryPressureState = GLVariableAllocationSupport::MemoryPressureState; -using WorkQueue = GLVariableAllocationSupport::WorkQueue; -using TransferJobPointer = GLVariableAllocationSupport::TransferJobPointer; - -std::list GLVariableAllocationSupport::_memoryManagedTextures; -MemoryPressureState GLVariableAllocationSupport::_memoryPressureState { MemoryPressureState::Idle }; -std::atomic GLVariableAllocationSupport::_memoryPressureStateStale { false }; -const uvec3 GLVariableAllocationSupport::INITIAL_MIP_TRANSFER_DIMENSIONS { 64, 64, 1 }; -WorkQueue GLVariableAllocationSupport::_transferQueue; -WorkQueue GLVariableAllocationSupport::_promoteQueue; -WorkQueue GLVariableAllocationSupport::_demoteQueue; -size_t GLVariableAllocationSupport::_frameTexturesCreated { 0 }; - -#define OVERSUBSCRIBED_PRESSURE_VALUE 0.95f -#define UNDERSUBSCRIBED_PRESSURE_VALUE 0.85f -#define DEFAULT_ALLOWED_TEXTURE_MEMORY_MB ((size_t)1024) - -static const size_t DEFAULT_ALLOWED_TEXTURE_MEMORY = MB_TO_BYTES(DEFAULT_ALLOWED_TEXTURE_MEMORY_MB); - -using TransferJob = GLVariableAllocationSupport::TransferJob; - -const uvec3 GLVariableAllocationSupport::MAX_TRANSFER_DIMENSIONS { 1024, 1024, 1 }; -const size_t GLVariableAllocationSupport::MAX_TRANSFER_SIZE = GLVariableAllocationSupport::MAX_TRANSFER_DIMENSIONS.x * GLVariableAllocationSupport::MAX_TRANSFER_DIMENSIONS.y * 4; - -#if THREADED_TEXTURE_BUFFERING - -TexturePointer GLVariableAllocationSupport::_currentTransferTexture; -TransferJobPointer GLVariableAllocationSupport::_currentTransferJob; -QThreadPool* TransferJob::_bufferThreadPool { nullptr }; - -void TransferJob::startBufferingThread() { - static std::once_flag once; - std::call_once(once, [&] { - _bufferThreadPool = new QThreadPool(qApp); - _bufferThreadPool->setMaxThreadCount(1); - }); +GLVariableAllocationSupport::GLVariableAllocationSupport() { } -#endif +GLVariableAllocationSupport::~GLVariableAllocationSupport() { +} -TransferJob::TransferJob(const GLTexture& parent, uint16_t sourceMip, uint16_t targetMip, uint8_t face, uint32_t lines, uint32_t lineOffset) - : _parent(parent) { +void GLVariableAllocationSupport::incrementPopulatedSize(Size delta) const { + _populatedSize += delta; + // Keep the 2 code paths to be able to debug + if (_size < _populatedSize) { + Backend::textureResourcePopulatedGPUMemSize.update(0, delta); + } else { + Backend::textureResourcePopulatedGPUMemSize.update(0, delta); + } +} - auto transferDimensions = _parent._gpuObject.evalMipDimensions(sourceMip); +void GLVariableAllocationSupport::decrementPopulatedSize(Size delta) const { + _populatedSize -= delta; + // Keep the 2 code paths to be able to debug + if (_size < _populatedSize) { + Backend::textureResourcePopulatedGPUMemSize.update(delta, 0); + } else { + Backend::textureResourcePopulatedGPUMemSize.update(delta, 0); + } +} + +void GLVariableAllocationSupport::sanityCheck() const { + if (_populatedMip < _allocatedMip) { + qCWarning(gpugllogging) << "Invalid mip levels"; + } +} + +TransferJob::TransferJob(const Texture& texture, + uint16_t sourceMip, + uint16_t targetMip, + uint8_t face, + uint32_t lines, + uint32_t lineOffset) { + auto transferDimensions = texture.evalMipDimensions(sourceMip); GLenum format; GLenum internalFormat; GLenum type; - GLTexelFormat texelFormat = GLTexelFormat::evalGLTexelFormat(_parent._gpuObject.getTexelFormat(), _parent._gpuObject.getStoredMipFormat()); + GLTexelFormat texelFormat = GLTexelFormat::evalGLTexelFormat(texture.getTexelFormat(), texture.getStoredMipFormat()); format = texelFormat.format; internalFormat = texelFormat.internalFormat; type = texelFormat.type; - _transferSize = _parent._gpuObject.getStoredMipFaceSize(sourceMip, face); + _transferSize = texture.getStoredMipFaceSize(sourceMip, face); // If we're copying a subsection of the mip, do additional calculations to find the size and offset of the segment if (0 != lines) { transferDimensions.y = lines; - auto dimensions = _parent._gpuObject.evalMipDimensions(sourceMip); + auto dimensions = texture.evalMipDimensions(sourceMip); auto bytesPerLine = (uint32_t)_transferSize / dimensions.y; _transferOffset = bytesPerLine * lineOffset; _transferSize = bytesPerLine * lines; @@ -222,481 +222,34 @@ TransferJob::TransferJob(const GLTexture& parent, uint16_t sourceMip, uint16_t t } // Buffering can invoke disk IO, so it should be off of the main and render threads - _bufferingLambda = [=] { - auto mipStorage = _parent._gpuObject.accessStoredMipFace(sourceMip, face); + _bufferingLambda = [=](const TexturePointer& texture) { + auto mipStorage = texture->accessStoredMipFace(sourceMip, face); if (mipStorage) { _mipData = mipStorage->createView(_transferSize, _transferOffset); } else { - qCWarning(gpugllogging) << "Buffering failed because mip could not be retrieved from texture " << _parent._source.c_str() ; + qCWarning(gpugllogging) << "Buffering failed because mip could not be retrieved from texture " + << texture->source().c_str(); } }; - _transferLambda = [=] { + _transferLambda = [=](const TexturePointer& texture) { if (_mipData) { - _parent.copyMipFaceLinesFromTexture(targetMip, face, transferDimensions, lineOffset, internalFormat, format, type, _mipData->size(), _mipData->readData()); + auto gltexture = Backend::getGPUObject(*texture); + ; + gltexture->copyMipFaceLinesFromTexture(targetMip, face, transferDimensions, lineOffset, internalFormat, format, + type, _mipData->size(), _mipData->readData()); _mipData.reset(); } else { - qCWarning(gpugllogging) << "Transfer failed because mip could not be retrieved from texture " << _parent._source.c_str(); + qCWarning(gpugllogging) << "Transfer failed because mip could not be retrieved from texture " + << texture->source().c_str(); } }; } -TransferJob::TransferJob(const GLTexture& parent, std::function transferLambda) - : _parent(parent), _bufferingRequired(false), _transferLambda(transferLambda) { -} +TransferJob::TransferJob(const std::function& transferLambda) : + _bufferingRequired(false), _transferLambda([=](const TexturePointer&) { transferLambda(); }) {} TransferJob::~TransferJob() { Backend::texturePendingGPUTransferMemSize.update(_transferSize, 0); } -bool TransferJob::tryTransfer() { -#if THREADED_TEXTURE_BUFFERING - // Are we ready to transfer - if (!bufferingCompleted()) { - startBuffering(); - return false; - } -#else - if (_bufferingRequired) { - _bufferingLambda(); - } -#endif - _transferLambda(); - return true; -} - -#if THREADED_TEXTURE_BUFFERING -bool TransferJob::bufferingRequired() const { - if (!_bufferingRequired) { - return false; - } - - // The default state of a QFuture is with status Canceled | Started | Finished, - // so we have to check isCancelled before we check the actual state - if (_bufferingStatus.isCanceled()) { - return true; - } - - return !_bufferingStatus.isStarted(); -} - -bool TransferJob::bufferingCompleted() const { - if (!_bufferingRequired) { - return true; - } - - // The default state of a QFuture is with status Canceled | Started | Finished, - // so we have to check isCancelled before we check the actual state - if (_bufferingStatus.isCanceled()) { - return false; - } - - return _bufferingStatus.isFinished(); -} - -void TransferJob::startBuffering() { - if (bufferingRequired()) { - assert(_bufferingStatus.isCanceled()); - _bufferingStatus = QtConcurrent::run(_bufferThreadPool, [=] { - _bufferingLambda(); - }); - assert(!_bufferingStatus.isCanceled()); - assert(_bufferingStatus.isStarted()); - } -} -#endif - -GLVariableAllocationSupport::GLVariableAllocationSupport() { - _memoryPressureStateStale = true; -} - -GLVariableAllocationSupport::~GLVariableAllocationSupport() { - _memoryPressureStateStale = true; -} - -void GLVariableAllocationSupport::addMemoryManagedTexture(const TexturePointer& texturePointer) { - _memoryManagedTextures.push_back(texturePointer); - if (MemoryPressureState::Idle != _memoryPressureState) { - addToWorkQueue(texturePointer); - } -} - -void GLVariableAllocationSupport::addToWorkQueue(const TexturePointer& texturePointer) { - GLTexture* gltexture = Backend::getGPUObject(*texturePointer); - GLVariableAllocationSupport* vargltexture = dynamic_cast(gltexture); - switch (_memoryPressureState) { - case MemoryPressureState::Oversubscribed: - if (vargltexture->canDemote()) { - // Demote largest first - _demoteQueue.push({ texturePointer, (float)gltexture->size() }); - } - break; - - case MemoryPressureState::Undersubscribed: - if (vargltexture->canPromote()) { - // Promote smallest first - _promoteQueue.push({ texturePointer, 1.0f / (float)gltexture->size() }); - } - break; - - case MemoryPressureState::Transfer: - if (vargltexture->hasPendingTransfers()) { - // Transfer priority given to smaller mips first - _transferQueue.push({ texturePointer, 1.0f / (float)gltexture->_gpuObject.evalMipSize(vargltexture->_populatedMip) }); - } - break; - - case MemoryPressureState::Idle: - Q_UNREACHABLE(); - break; - } -} - -WorkQueue& GLVariableAllocationSupport::getActiveWorkQueue() { - static WorkQueue empty; - switch (_memoryPressureState) { - case MemoryPressureState::Oversubscribed: - return _demoteQueue; - - case MemoryPressureState::Undersubscribed: - return _promoteQueue; - - case MemoryPressureState::Transfer: - return _transferQueue; - - case MemoryPressureState::Idle: - Q_UNREACHABLE(); - break; - } - return empty; -} - -// FIXME hack for stats display -QString getTextureMemoryPressureModeString() { - switch (GLVariableAllocationSupport::_memoryPressureState) { - case MemoryPressureState::Oversubscribed: - return "Oversubscribed"; - - case MemoryPressureState::Undersubscribed: - return "Undersubscribed"; - - case MemoryPressureState::Transfer: - return "Transfer"; - - case MemoryPressureState::Idle: - return "Idle"; - } - Q_UNREACHABLE(); - return "Unknown"; -} - -void GLVariableAllocationSupport::updateMemoryPressure() { - static size_t lastAllowedMemoryAllocation = gpu::Texture::getAllowedGPUMemoryUsage(); - - size_t allowedMemoryAllocation = gpu::Texture::getAllowedGPUMemoryUsage(); - if (0 == allowedMemoryAllocation) { - allowedMemoryAllocation = DEFAULT_ALLOWED_TEXTURE_MEMORY; - } - - // If the user explicitly changed the allowed memory usage, we need to mark ourselves stale - // so that we react - if (allowedMemoryAllocation != lastAllowedMemoryAllocation) { - _memoryPressureStateStale = true; - lastAllowedMemoryAllocation = allowedMemoryAllocation; - } - - if (!_memoryPressureStateStale.exchange(false)) { - return; - } - - PROFILE_RANGE(render_gpu_gl, __FUNCTION__); - - // Clear any defunct textures (weak pointers that no longer have a valid texture) - _memoryManagedTextures.remove_if([&](const TextureWeakPointer& weakPointer) { - return weakPointer.expired(); - }); - - // Convert weak pointers to strong. This new list may still contain nulls if a texture was - // deleted on another thread between the previous line and this one - std::vector strongTextures; { - strongTextures.reserve(_memoryManagedTextures.size()); - std::transform( - _memoryManagedTextures.begin(), _memoryManagedTextures.end(), - std::back_inserter(strongTextures), - [](const TextureWeakPointer& p) { return p.lock(); }); - } - - size_t totalVariableMemoryAllocation = 0; - size_t idealMemoryAllocation = 0; - bool canDemote = false; - bool canPromote = false; - bool hasTransfers = false; - for (const auto& texture : strongTextures) { - // Race conditions can still leave nulls in the list, so we need to check - if (!texture) { - continue; - } - GLTexture* gltexture = Backend::getGPUObject(*texture); - GLVariableAllocationSupport* vartexture = dynamic_cast(gltexture); - // Track how much the texture thinks it should be using - idealMemoryAllocation += texture->evalTotalSize(); - // Track how much we're actually using - totalVariableMemoryAllocation += gltexture->size(); - canDemote |= vartexture->canDemote(); - canPromote |= vartexture->canPromote(); - hasTransfers |= vartexture->hasPendingTransfers(); - } - - size_t unallocated = idealMemoryAllocation - totalVariableMemoryAllocation; - float pressure = (float)totalVariableMemoryAllocation / (float)allowedMemoryAllocation; - - auto newState = MemoryPressureState::Idle; - if (pressure < UNDERSUBSCRIBED_PRESSURE_VALUE && (unallocated != 0 && canPromote)) { - newState = MemoryPressureState::Undersubscribed; - } else if (pressure > OVERSUBSCRIBED_PRESSURE_VALUE && canDemote) { - newState = MemoryPressureState::Oversubscribed; - } else if (hasTransfers) { - newState = MemoryPressureState::Transfer; - } - - if (newState != _memoryPressureState) { - _memoryPressureState = newState; - // Clear the existing queue - _transferQueue = WorkQueue(); - _promoteQueue = WorkQueue(); - _demoteQueue = WorkQueue(); - - // Populate the existing textures into the queue - if (_memoryPressureState != MemoryPressureState::Idle) { - for (const auto& texture : strongTextures) { - // Race conditions can still leave nulls in the list, so we need to check - if (!texture) { - continue; - } - addToWorkQueue(texture); - } - } - } -} - -TexturePointer GLVariableAllocationSupport::getNextWorkQueueItem(WorkQueue& workQueue) { - while (!workQueue.empty()) { - auto workTarget = workQueue.top(); - - auto texture = workTarget.first.lock(); - if (!texture) { - workQueue.pop(); - continue; - } - - // Check whether the resulting texture can actually have work performed - GLTexture* gltexture = Backend::getGPUObject(*texture); - GLVariableAllocationSupport* vartexture = dynamic_cast(gltexture); - switch (_memoryPressureState) { - case MemoryPressureState::Oversubscribed: - if (vartexture->canDemote()) { - return texture; - } - break; - - case MemoryPressureState::Undersubscribed: - if (vartexture->canPromote()) { - return texture; - } - break; - - case MemoryPressureState::Transfer: - if (vartexture->hasPendingTransfers()) { - return texture; - } - break; - - case MemoryPressureState::Idle: - Q_UNREACHABLE(); - break; - } - - // If we got here, then the texture has no work to do in the current state, - // so pop it off the queue and continue - workQueue.pop(); - } - - return TexturePointer(); -} - -void GLVariableAllocationSupport::processWorkQueue(WorkQueue& workQueue) { - if (workQueue.empty()) { - return; - } - - // Get the front of the work queue to perform work - auto texture = getNextWorkQueueItem(workQueue); - if (!texture) { - return; - } - - // Grab the first item off the demote queue - PROFILE_RANGE(render_gpu_gl, __FUNCTION__); - - GLTexture* gltexture = Backend::getGPUObject(*texture); - GLVariableAllocationSupport* vartexture = dynamic_cast(gltexture); - switch (_memoryPressureState) { - case MemoryPressureState::Oversubscribed: - vartexture->demote(); - workQueue.pop(); - addToWorkQueue(texture); - _memoryPressureStateStale = true; - break; - - case MemoryPressureState::Undersubscribed: - vartexture->promote(); - workQueue.pop(); - addToWorkQueue(texture); - _memoryPressureStateStale = true; - break; - - case MemoryPressureState::Transfer: - if (vartexture->executeNextTransfer(texture)) { - workQueue.pop(); - addToWorkQueue(texture); - -#if THREADED_TEXTURE_BUFFERING - // Eagerly start the next buffering job if possible - texture = getNextWorkQueueItem(workQueue); - if (texture) { - gltexture = Backend::getGPUObject(*texture); - vartexture = dynamic_cast(gltexture); - vartexture->executeNextBuffer(texture); - } -#endif - } - break; - - case MemoryPressureState::Idle: - Q_UNREACHABLE(); - break; - } -} - -void GLVariableAllocationSupport::processWorkQueues() { - if (MemoryPressureState::Idle == _memoryPressureState) { - return; - } - - auto& workQueue = getActiveWorkQueue(); - // Do work on the front of the queue - processWorkQueue(workQueue); - - if (workQueue.empty()) { - _memoryPressureState = MemoryPressureState::Idle; - _memoryPressureStateStale = true; - } -} - -void GLVariableAllocationSupport::manageMemory() { - PROFILE_RANGE(render_gpu_gl, __FUNCTION__); - updateMemoryPressure(); - processWorkQueues(); -} - -bool GLVariableAllocationSupport::executeNextTransfer(const TexturePointer& currentTexture) { -#if THREADED_TEXTURE_BUFFERING - // If a transfer job is active on the buffering thread, but has not completed it's buffering lambda, - // then we need to exit early, since we don't want to have the transfer job leave scope while it's - // being used in another thread -- See https://highfidelity.fogbugz.com/f/cases/4626 - if (_currentTransferJob && !_currentTransferJob->bufferingCompleted()) { - return false; - } -#endif - - if (_populatedMip <= _allocatedMip) { -#if THREADED_TEXTURE_BUFFERING - _currentTransferJob.reset(); - _currentTransferTexture.reset(); -#endif - return true; - } - - // If the transfer queue is empty, rebuild it - if (_pendingTransfers.empty()) { - populateTransferQueue(); - } - - bool result = false; - if (!_pendingTransfers.empty()) { -#if THREADED_TEXTURE_BUFFERING - // If there is a current transfer, but it's not the top of the pending transfer queue, then it's an orphan, so we want to abandon it. - if (_currentTransferJob && _currentTransferJob != _pendingTransfers.front()) { - _currentTransferJob.reset(); - } - - if (!_currentTransferJob) { - // Keeping hold of a strong pointer to the transfer job ensures that if the pending transfer queue is rebuilt, the transfer job - // doesn't leave scope, causing a crash in the buffering thread - _currentTransferJob = _pendingTransfers.front(); - - // Keeping hold of a strong pointer during the transfer ensures that the transfer thread cannot try to access a destroyed texture - _currentTransferTexture = currentTexture; - } - - // transfer jobs use asynchronous buffering of the texture data because it may involve disk IO, so we execute a try here to determine if the buffering - // is complete - if (_currentTransferJob->tryTransfer()) { - _pendingTransfers.pop(); - // Once a given job is finished, release the shared pointers keeping them alive - _currentTransferTexture.reset(); - _currentTransferJob.reset(); - result = true; - } -#else - if (_pendingTransfers.front()->tryTransfer()) { - _pendingTransfers.pop(); - result = true; - } -#endif - } - return result; -} - -#if THREADED_TEXTURE_BUFFERING -void GLVariableAllocationSupport::executeNextBuffer(const TexturePointer& currentTexture) { - if (_currentTransferJob && !_currentTransferJob->bufferingCompleted()) { - return; - } - - // If the transfer queue is empty, rebuild it - if (_pendingTransfers.empty()) { - populateTransferQueue(); - } - - if (!_pendingTransfers.empty()) { - if (!_currentTransferJob) { - _currentTransferJob = _pendingTransfers.front(); - _currentTransferTexture = currentTexture; - } - - _currentTransferJob->startBuffering(); - } -} -#endif - -void GLVariableAllocationSupport::incrementPopulatedSize(Size delta) const { - _populatedSize += delta; - // Keep the 2 code paths to be able to debug - if (_size < _populatedSize) { - Backend::textureResourcePopulatedGPUMemSize.update(0, delta); - } else { - Backend::textureResourcePopulatedGPUMemSize.update(0, delta); - } -} -void GLVariableAllocationSupport::decrementPopulatedSize(Size delta) const { - _populatedSize -= delta; - // Keep the 2 code paths to be able to debug - if (_size < _populatedSize) { - Backend::textureResourcePopulatedGPUMemSize.update(delta, 0); - } else { - Backend::textureResourcePopulatedGPUMemSize.update(delta, 0); - } -} - - diff --git a/libraries/gpu-gl-common/src/gpu/gl/GLTexture.h b/libraries/gpu-gl-common/src/gpu/gl/GLTexture.h index c2483eb2a1..5ace804683 100644 --- a/libraries/gpu-gl-common/src/gpu/gl/GLTexture.h +++ b/libraries/gpu-gl-common/src/gpu/gl/GLTexture.h @@ -16,8 +16,6 @@ #include "GLTexelFormat.h" #include -#define THREADED_TEXTURE_BUFFERING 1 - namespace gpu { namespace gl { struct GLFilterMode { @@ -25,107 +23,92 @@ struct GLFilterMode { GLint magFilter; }; +class GLTextureTransferEngine { +public: + using Pointer = std::shared_ptr; + /// Called once per frame to perform any require memory management or transfer work + virtual void manageMemory() = 0; + virtual void shutdown() = 0; + + /// Called whenever a client wants to create a new texture. This allows the transfer engine to + /// potentially limit the number of GL textures created per frame + bool allowCreate() const { return _frameTexturesCreated < MAX_RESOURCE_TEXTURES_PER_FRAME; } + /// Called whenever a client creates a new resource texture that should use managed memory + /// and incremental transfer + void addMemoryManagedTexture(const TexturePointer& texturePointer); + +protected: + // Fetch all the currently active textures as strong pointers, while clearing the + // empty weak pointers out of _registeredTextures + std::vector getAllTextures(); + void resetFrameTextureCreated() { _frameTexturesCreated = 0; } + +private: + static const size_t MAX_RESOURCE_TEXTURES_PER_FRAME{ 2 }; + size_t _frameTexturesCreated{ 0 }; + std::list _registeredTextures; +}; + +/** + A transfer job encapsulates an individual piece of work required to upload texture data to the GPU. + The work can be broken down into two parts, expressed as lambdas. The buffering lambda is repsonsible + for putting the data to be uploaded into a CPU memory buffer. The transfer lambda is repsonsible for + uploading the data from the CPU memory buffer to the GPU using OpenGL calls. Ideally the buffering lambda + will be executed on a seprate thread from the OpenGL work to ensure that disk IO operations do not block + OpenGL calls + + Additionally, a TransferJob can encapsulate some kind of post-upload work that changes the state of the + GLTexture derived object wrapping the actual texture ID, such as changing the _populateMip value once + a given mip level has been compeltely uploaded + */ +class TransferJob { +public: + using Pointer = std::shared_ptr; + using Queue = std::queue; + using Lambda = std::function; +private: + Texture::PixelsPointer _mipData; + size_t _transferOffset{ 0 }; + size_t _transferSize{ 0 }; + bool _bufferingRequired{ true }; + Lambda _transferLambda{ [](const TexturePointer&) {} }; + Lambda _bufferingLambda{ [](const TexturePointer&) {} }; +public: + TransferJob(const TransferJob& other) = delete; + TransferJob(const std::function& transferLambda); + TransferJob(const Texture& texture, uint16_t sourceMip, uint16_t targetMip, uint8_t face, uint32_t lines = 0, uint32_t lineOffset = 0); + ~TransferJob(); + const size_t& size() const { return _transferSize; } + bool bufferingRequired() const { return _bufferingRequired; } + void buffer(const TexturePointer& texture) { _bufferingLambda(texture); } + void transfer(const TexturePointer& texture) { _transferLambda(texture); } +}; + +using TransferJobPointer = std::shared_ptr; +using TransferQueue = std::queue; + class GLVariableAllocationSupport { friend class GLBackend; public: GLVariableAllocationSupport(); virtual ~GLVariableAllocationSupport(); + virtual void populateTransferQueue(TransferQueue& pendingTransfers) = 0; - enum class MemoryPressureState { - Idle, - Transfer, - Oversubscribed, - Undersubscribed, - }; - - using QueuePair = std::pair; - struct QueuePairLess { - bool operator()(const QueuePair& a, const QueuePair& b) { - return a.second < b.second; - } - }; - using WorkQueue = std::priority_queue, QueuePairLess>; - - class TransferJob { - using VoidLambda = std::function; - using VoidLambdaQueue = std::queue; - const GLTexture& _parent; - Texture::PixelsPointer _mipData; - size_t _transferOffset { 0 }; - size_t _transferSize { 0 }; - - bool _bufferingRequired { true }; - VoidLambda _transferLambda; - VoidLambda _bufferingLambda; - -#if THREADED_TEXTURE_BUFFERING - // Indicates if a transfer from backing storage to interal storage has started - QFuture _bufferingStatus; - static QThreadPool* _bufferThreadPool; -#endif - - public: - TransferJob(const TransferJob& other) = delete; - TransferJob(const GLTexture& parent, std::function transferLambda); - TransferJob(const GLTexture& parent, uint16_t sourceMip, uint16_t targetMip, uint8_t face, uint32_t lines = 0, uint32_t lineOffset = 0); - ~TransferJob(); - bool tryTransfer(); - -#if THREADED_TEXTURE_BUFFERING - void startBuffering(); - bool bufferingRequired() const; - bool bufferingCompleted() const; - static void startBufferingThread(); -#endif - - private: - void transfer(); - }; - - using TransferJobPointer = std::shared_ptr; - using TransferQueue = std::queue; - static MemoryPressureState _memoryPressureState; - -public: - static void addMemoryManagedTexture(const TexturePointer& texturePointer); - -protected: - static size_t _frameTexturesCreated; - static std::atomic _memoryPressureStateStale; - static std::list _memoryManagedTextures; - static WorkQueue _transferQueue; - static WorkQueue _promoteQueue; - static WorkQueue _demoteQueue; -#if THREADED_TEXTURE_BUFFERING - static TexturePointer _currentTransferTexture; - static TransferJobPointer _currentTransferJob; -#endif - static const uvec3 INITIAL_MIP_TRANSFER_DIMENSIONS; - static const uvec3 MAX_TRANSFER_DIMENSIONS; - static const size_t MAX_TRANSFER_SIZE; - - - static void updateMemoryPressure(); - static void processWorkQueues(); - static void processWorkQueue(WorkQueue& workQueue); - static TexturePointer getNextWorkQueueItem(WorkQueue& workQueue); - static void addToWorkQueue(const TexturePointer& texture); - static WorkQueue& getActiveWorkQueue(); - - static void manageMemory(); - - //bool canPromoteNoAllocate() const { return _allocatedMip < _populatedMip; } + void sanityCheck() const; bool canPromote() const { return _allocatedMip > _minAllocatedMip; } bool canDemote() const { return _allocatedMip < _maxAllocatedMip; } bool hasPendingTransfers() const { return _populatedMip > _allocatedMip; } -#if THREADED_TEXTURE_BUFFERING - void executeNextBuffer(const TexturePointer& currentTexture); -#endif - bool executeNextTransfer(const TexturePointer& currentTexture); - virtual void populateTransferQueue() = 0; - virtual void promote() = 0; - virtual void demote() = 0; + + virtual size_t promote() = 0; + virtual size_t demote() = 0; + + static const uvec3 MAX_TRANSFER_DIMENSIONS; + static const uvec3 INITIAL_MIP_TRANSFER_DIMENSIONS; + static const size_t MAX_TRANSFER_SIZE; + static const size_t MAX_BUFFER_SIZE; + +protected: // THe amount of memory currently allocated Size _size { 0 }; @@ -148,10 +131,6 @@ protected: // The lowest (highest resolution) mip that we will support, relative to the number // of mips in the gpu::Texture object uint16 _minAllocatedMip { 0 }; - // Contains a series of lambdas that when executed will transfer data to the GPU, modify - // the _populatedMip and update the sampler in order to fully populate the allocated texture - // until _populatedMip == _allocatedMip - TransferQueue _pendingTransfers; }; class GLTexture : public GLObject { @@ -172,6 +151,9 @@ public: static const std::vector& getFaceTargets(GLenum textureType); static uint8_t getFaceCount(GLenum textureType); static GLenum getGLTextureType(const Texture& texture); + virtual Size size() const = 0; + virtual Size copyMipFaceLinesFromTexture(uint16_t mip, uint8_t face, const uvec3& size, uint32_t yOffset, GLenum internalFormat, GLenum format, GLenum type, Size sourceSize, const void* sourcePointer) const = 0; + virtual Size copyMipFaceFromTexture(uint16_t sourceMip, uint16_t targetMip, uint8_t face) const final; static const uint8_t TEXTURE_2D_NUM_FACES = 1; static const uint8_t TEXTURE_CUBE_NUM_FACES = 6; @@ -180,12 +162,9 @@ public: static const GLenum WRAP_MODES[Sampler::NUM_WRAP_MODES]; protected: - virtual Size size() const = 0; virtual void generateMips() const = 0; virtual void syncSampler() const = 0; - virtual Size copyMipFaceLinesFromTexture(uint16_t mip, uint8_t face, const uvec3& size, uint32_t yOffset, GLenum internalFormat, GLenum format, GLenum type, Size sourceSize, const void* sourcePointer) const = 0; - virtual Size copyMipFaceFromTexture(uint16_t sourceMip, uint16_t targetMip, uint8_t face) const final; virtual void copyTextureMipsInGPUMem(GLuint srcId, GLuint destId, uint16_t srcMipOffset, uint16_t destMipOffset, uint16_t populatedMips) {} // Only relevant for Variable Allocation textures GLTexture(const std::weak_ptr& backend, const Texture& texture, GLuint id); @@ -205,7 +184,6 @@ protected: Size size() const override { return 0; } }; - } } #endif diff --git a/libraries/gpu-gl-common/src/gpu/gl/GLTextureTransfer.cpp b/libraries/gpu-gl-common/src/gpu/gl/GLTextureTransfer.cpp new file mode 100644 index 0000000000..ae0a68e1e9 --- /dev/null +++ b/libraries/gpu-gl-common/src/gpu/gl/GLTextureTransfer.cpp @@ -0,0 +1,502 @@ +// +// Created by Bradley Austin Davis on 2016/05/15 +// Copyright 2013-2016 High Fidelity, Inc. +// +// Distributed under the Apache License, Version 2.0. +// See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html +// + +#include "GLTexture.h" + +#include +#include + +#include "GLBackend.h" + +#define OVERSUBSCRIBED_PRESSURE_VALUE 0.95f +#define UNDERSUBSCRIBED_PRESSURE_VALUE 0.85f +#define DEFAULT_ALLOWED_TEXTURE_MEMORY_MB ((size_t)1024) +#define MAX_RESOURCE_TEXTURES_PER_FRAME 2 +#define NO_BUFFER_WORK_SLEEP_TIME_MS 2 +#define THREADED_TEXTURE_BUFFERING 1 + +static const size_t DEFAULT_ALLOWED_TEXTURE_MEMORY = MB_TO_BYTES(DEFAULT_ALLOWED_TEXTURE_MEMORY_MB); + +namespace gpu { namespace gl { + +enum class MemoryPressureState +{ + Idle, + Transfer, + Undersubscribed, +}; + +static MemoryPressureState _memoryPressureState{ MemoryPressureState::Idle }; + +template +struct LessPairSecond { + bool operator()(const T& a, const T& b) { return a.second < b.second; } +}; + +using QueuePair = std::pair; +// Contains a priority sorted list of textures on which work is to be done over many frames +// Uses a weak pointer to the texture to avoid keeping it in scope if the client stops using it +using WorkQueue = std::priority_queue, LessPairSecond>; + + +using ImmediateQueuePair = std::pair; +// Contains a priority sorted list of textures on which work is to be done in the current frame +using ImmediateWorkQueue = std::priority_queue, LessPairSecond>; + +// A map of weak texture pointers to queues of work to be done to transfer their data from the backing store to the GPU +using TransferMap = std::map>; + +class GLTextureTransferEngineDefault : public GLTextureTransferEngine { + using Parent = GLTextureTransferEngine; +public: + // Called once per frame by the GLBackend to manage texture memory + // Will deallocate textures if oversubscribed, + void manageMemory() override; + void shutdown() override; + +protected: + class TextureBufferThread : public QThread { + public: + TextureBufferThread(GLTextureTransferEngineDefault& parent) : _parent(parent) { start(); } + + protected: + void run() override { + while (!_parent._shutdown) { + if (!_parent.processActiveBufferQueue()) { + QThread::msleep(NO_BUFFER_WORK_SLEEP_TIME_MS); + } + } + } + + GLTextureTransferEngineDefault& _parent; + }; + + using ActiveTransferJob = std::pair; + using ActiveTransferQueue = std::list; + + void populateActiveBufferQueue(); + bool processActiveBufferQueue(); + void processTransferQueues(); + void populateTransferQueue(const TexturePointer& texturePointer); + //void addToWorkQueue(const TexturePointer& texturePointer); + void updateMemoryPressure(); + + void processDemotes(size_t relief, const std::vector& strongTextures); + void processPromotes(); + +private: + std::atomic _shutdown{ false }; + // Contains a priority sorted list of weak texture pointers that have been determined to be eligible for additional allocation + // While the memory state is 'undersubscribed', items will be removed from this list and processed, allocating additional memory + // per frame + WorkQueue _promoteQueue; + // This queue contains jobs that will buffer data from the texture backing store (ideally a memory mapped KTX file) + // to a CPU memory buffer. This queue is populated on the main GPU thread, and drained on a dedicated thread. + // When an item on the _activeBufferQueue is completed it is put into the _activeTransferQueue + ActiveTransferQueue _activeBufferQueue; + // This queue contains jobs that will upload data from a CPU buffer into a GPU. This queue is populated on the background + // thread that process the _activeBufferQueue and drained on the main GPU thread + ActiveTransferQueue _activeTransferQueue; + // Mutex protecting the _activeTransferQueue & _activeBufferQueue since they are each accessed both from the main GPU thread + // and the buffering thread + Mutex _bufferMutex; + // The buffering thread which drains the _activeBufferQueue and populates the _activeTransferQueue + TextureBufferThread* _transferThread{ nullptr }; + // The amount of buffering work currently represented by the _activeBufferQueue + size_t _queuedBufferSize{ 0 }; + // This contains a map of all textures to queues of pending transfer jobs. While in the transfer state, this map is used to + // populate the _activeBufferQueue up to the limit specified in GLVariableAllocationTexture::MAX_BUFFER_SIZE + TransferMap _pendingTransfersMap; +}; + +}} // namespace gpu::gl + +using namespace gpu; +using namespace gpu::gl; + +void GLBackend::initTextureManagementStage() { + _textureManagement._transferEngine = std::make_shared(); +} + +void GLBackend::killTextureManagementStage() { + _textureManagement._transferEngine->shutdown(); + _textureManagement._transferEngine.reset(); +} + +std::vector GLTextureTransferEngine::getAllTextures() { + std::vector result; + result.reserve(_registeredTextures.size()); + std::remove_if(_registeredTextures.begin(), _registeredTextures.end(), [&](const std::weak_ptr& weak)->bool { + auto strong = weak.lock(); + bool strongResult = strong.operator bool(); + if (strongResult) { + result.push_back(strong); + } + return strongResult; + }); + return result; +} + +void GLTextureTransferEngine::addMemoryManagedTexture(const TexturePointer& texturePointer) { + ++_frameTexturesCreated; + _registeredTextures.push_back(texturePointer); +} + +void GLTextureTransferEngineDefault::shutdown() { + _shutdown = true; +#if THREADED_TEXTURE_BUFFERING + if (_transferThread) { + _transferThread->wait(); + delete _transferThread; + _transferThread = nullptr; + } +#endif +} + + +void GLTextureTransferEngineDefault::manageMemory() { + PROFILE_RANGE(render_gpu_gl, __FUNCTION__); + // reset the count used to limit the number of textures created per frame + resetFrameTextureCreated(); + // Determine the current memory management state. It will be either idle (no work to do), + // undersubscribed (need to do more allocation) or transfer (need to upload content from the + // backing store to the GPU + updateMemoryPressure(); + if (MemoryPressureState::Undersubscribed == _memoryPressureState) { + // If we're undersubscribed, we need to process some of the textures that can have additional allocation + processPromotes(); + } else if (MemoryPressureState::Transfer == _memoryPressureState) { + // If we're in transfer mode we need to manage the buffering and upload queues + processTransferQueues(); + } +} + +// Each frame we will check if our memory pressure state has changed. +void GLTextureTransferEngineDefault::updateMemoryPressure() { + PROFILE_RANGE(render_gpu_gl, __FUNCTION__); + + size_t allowedMemoryAllocation = gpu::Texture::getAllowedGPUMemoryUsage(); + if (0 == allowedMemoryAllocation) { + allowedMemoryAllocation = DEFAULT_ALLOWED_TEXTURE_MEMORY; + } + + // Clear any defunct textures (weak pointers that no longer have a valid texture) + auto strongTextures = getAllTextures(); + + size_t totalVariableMemoryAllocation = 0; + size_t idealMemoryAllocation = 0; + bool canDemote = false; + bool canPromote = false; + bool hasTransfers = false; + for (const auto& texture : strongTextures) { + GLTexture* gltexture = Backend::getGPUObject(*texture); + GLVariableAllocationSupport* vartexture = dynamic_cast(gltexture); + vartexture->sanityCheck(); + + // Track how much the texture thinks it should be using + idealMemoryAllocation += texture->evalTotalSize(); + // Track how much we're actually using + totalVariableMemoryAllocation += gltexture->size(); + if (vartexture->canDemote()) { + canDemote |= true; + } + if (vartexture->canPromote()) { + canPromote |= true; + } + if (vartexture->hasPendingTransfers()) { + hasTransfers |= true; + } + } + + size_t unallocated = idealMemoryAllocation - totalVariableMemoryAllocation; + float pressure = (float)totalVariableMemoryAllocation / (float)allowedMemoryAllocation; + + // If we're oversubscribed we need to demote textures IMMEDIATELY + if (pressure > OVERSUBSCRIBED_PRESSURE_VALUE && canDemote) { + auto overPressure = pressure - OVERSUBSCRIBED_PRESSURE_VALUE; + size_t relief = (size_t)(overPressure * totalVariableMemoryAllocation); + processDemotes(relief, strongTextures); + return; + } + + + auto newState = MemoryPressureState::Idle; + if (pressure < UNDERSUBSCRIBED_PRESSURE_VALUE && (unallocated != 0 && canPromote)) { + newState = MemoryPressureState::Undersubscribed; + } else if (hasTransfers) { + newState = MemoryPressureState::Transfer; + } else { + Lock lock(_bufferMutex); + if (!_activeBufferQueue.empty() || !_activeTransferQueue.empty() || !_pendingTransfersMap.empty()) { + newState = MemoryPressureState::Transfer; + } + } + + // If we've changed state then we have to populate the appropriate structure with the work to be done + if (newState != _memoryPressureState) { + _memoryPressureState = newState; + _promoteQueue = WorkQueue(); + _pendingTransfersMap.clear(); + + if (MemoryPressureState::Idle == _memoryPressureState) { + return; + } + + // For each texture, if it's eligible for work in the current state, put it into the appropriate structure + for (const auto& texture : strongTextures) { + GLTexture* gltexture = Backend::getGPUObject(*texture); + GLVariableAllocationSupport* vargltexture = dynamic_cast(gltexture); + if (MemoryPressureState::Undersubscribed == _memoryPressureState && vargltexture->canPromote()) { + // Promote smallest first + _promoteQueue.push({ texture, 1.0f / (float)gltexture->size() }); + } else if (MemoryPressureState::Transfer == _memoryPressureState && vargltexture->hasPendingTransfers()) { + populateTransferQueue(texture); + } + } + } +} + +// Manage the _activeBufferQueue and _activeTransferQueue queues +void GLTextureTransferEngineDefault::processTransferQueues() { +#if THREADED_TEXTURE_BUFFERING + if (!_transferThread) { + _transferThread = new TextureBufferThread(*this); + } +#endif + + + // From the pendingTransferMap, queue jobs into the _activeBufferQueue + // Doing so will lock the weak texture pointer so that it can't be destroyed + // while the background thread is working. + // + // This will queue jobs until _queuedBufferSize can't be increased without exceeding + // GLVariableAllocationTexture::MAX_BUFFER_SIZE or there is no more work to be done + populateActiveBufferQueue(); +#if !THREADED_TEXTURE_BUFFERING + processActiveBufferQueue(); +#endif + + // Take any tasks which have completed buffering and process them, uploading the buffered + // data to the GPU. Drains the _activeTransferQueue + { + ActiveTransferQueue activeTransferQueue; + { + Lock lock(_bufferMutex); + activeTransferQueue.swap(_activeTransferQueue); + } + + while (!activeTransferQueue.empty()) { + const auto& activeTransferJob = activeTransferQueue.front(); + const auto& texturePointer = activeTransferJob.first; + const auto& tranferJob = activeTransferJob.second; + tranferJob->transfer(texturePointer); + // The pop_front MUST be the last call since all of these varaibles in scope are + // references that will be invalid after the pop + activeTransferQueue.pop_front(); + } + } + + // If we have no more work in any of the structures, reset the memory state to idle to + // force reconstruction of the _pendingTransfersMap if necessary + { + Lock lock(_bufferMutex); + if (_activeTransferQueue.empty() && _activeBufferQueue.empty() && _pendingTransfersMap.empty()) { + _memoryPressureState = MemoryPressureState::Idle; + } + } +} + +void GLTextureTransferEngineDefault::populateActiveBufferQueue() { + size_t queuedBufferSize = _queuedBufferSize; + static const auto& MAX_BUFFER_SIZE = GLVariableAllocationSupport::MAX_BUFFER_SIZE; + Q_ASSERT(queuedBufferSize <= MAX_BUFFER_SIZE); + size_t availableBufferSize = MAX_BUFFER_SIZE - queuedBufferSize; + + // Queue up buffering jobs + ActiveTransferQueue newBufferJobs; + ActiveTransferQueue newTransferJobs; + size_t newTransferSize{ 0 }; + + for (auto itr = _pendingTransfersMap.begin(); itr != _pendingTransfersMap.end(); ) { + const auto& weakTexture = itr->first; + const auto texture = weakTexture.lock(); + + // Texture no longer exists, remove from the transfer map and move on + if (!texture) { + itr = _pendingTransfersMap.erase(itr); + continue; + } + + GLTexture* gltexture = Backend::getGPUObject(*texture); + GLVariableAllocationSupport* vargltexture = dynamic_cast(gltexture); + + auto& textureTransferQueue = itr->second; + // Can't find any pending transfers, so move on + if (textureTransferQueue.empty()) { + if (vargltexture->hasPendingTransfers()) { + qWarning(gpugllogging) << "Texture has no transfer jobs, but has pending transfers"; + } + itr = _pendingTransfersMap.erase(itr); + continue; + } + + const auto& transferJob = textureTransferQueue.front(); + if (!transferJob->bufferingRequired()) { + newTransferJobs.emplace_back(texture, transferJob); + } else { + const auto& transferSize = transferJob->size(); + // If there's not enough space for the buffering, then break out of the loop + if (transferSize > availableBufferSize) { + break; + } + availableBufferSize -= transferSize; + Q_ASSERT(availableBufferSize <= MAX_BUFFER_SIZE); + Q_ASSERT(newTransferSize <= MAX_BUFFER_SIZE); + newTransferSize += transferSize; + Q_ASSERT(newTransferSize <= MAX_BUFFER_SIZE); + newBufferJobs.emplace_back(texture, transferJob); + } + textureTransferQueue.pop(); + ++itr; + } + + { + Lock lock(_bufferMutex); + _activeBufferQueue.splice(_activeBufferQueue.end(), newBufferJobs); + Q_ASSERT(_queuedBufferSize <= MAX_BUFFER_SIZE); + _queuedBufferSize += newTransferSize; + Q_ASSERT(_queuedBufferSize <= MAX_BUFFER_SIZE); + _activeTransferQueue.splice(_activeTransferQueue.end(), newTransferJobs); + } +} + +bool GLTextureTransferEngineDefault::processActiveBufferQueue() { + ActiveTransferQueue activeBufferQueue; + { + Lock lock(_bufferMutex); + _activeBufferQueue.swap(activeBufferQueue); + } + + if (activeBufferQueue.empty()) { + return false; + } + + for (const auto& activeJob : activeBufferQueue) { + const auto& texture = activeJob.first; + const auto& transferJob = activeJob.second; + const auto& transferSize = transferJob->size(); + transferJob->buffer(texture); + Q_ASSERT(_queuedBufferSize >= transferSize); + _queuedBufferSize -= transferSize; + } + + { + Lock lock(_bufferMutex); + _activeTransferQueue.splice(_activeTransferQueue.end(), activeBufferQueue); + } + + return true; +} + +void GLTextureTransferEngineDefault::populateTransferQueue(const TexturePointer& texturePointer) { + TextureWeakPointer weakTexture = texturePointer; + GLTexture* gltexture = Backend::getGPUObject(*texturePointer); + GLVariableAllocationSupport* vargltexture = dynamic_cast(gltexture); + TransferJob::Queue pendingTransfers; + PROFILE_RANGE(render_gpu_gl, __FUNCTION__); + vargltexture->populateTransferQueue(pendingTransfers); + if (!pendingTransfers.empty()) { + _pendingTransfersMap[weakTexture] = pendingTransfers; + } +} + +// From the queue of textures to be promited +void GLTextureTransferEngineDefault::processPromotes() { + // FIXME use max allocated memory per frame instead of promotion count + static const size_t MAX_ALLOCATED_BYTES_PER_FRAME = GLVariableAllocationSupport::MAX_BUFFER_SIZE; + static const size_t MAX_ALLOCATIONS_PER_FRAME = 8; + size_t allocatedBytes{ 0 }; + size_t allocations{ 0 }; + + while (!_promoteQueue.empty()) { + // Grab the first item off the demote queue + PROFILE_RANGE(render_gpu_gl, __FUNCTION__); + auto entry = _promoteQueue.top(); + _promoteQueue.pop(); + auto texture = entry.first.lock(); + if (!texture) { + continue; + } + + GLTexture* gltexture = Backend::getGPUObject(*texture); + GLVariableAllocationSupport* vartexture = dynamic_cast(gltexture); + auto originalSize = gltexture->size(); + vartexture->promote(); + auto allocationDelta = gltexture->size() - originalSize; + if (vartexture->canPromote()) { + // Promote smallest first + _promoteQueue.push({ texture, 1.0f / (float)gltexture->size() }); + } + allocatedBytes += allocationDelta; + if (++allocations >= MAX_ALLOCATIONS_PER_FRAME) { + break; + } + if (allocatedBytes >= MAX_ALLOCATED_BYTES_PER_FRAME) { + break; + } + } + + // Get the front of the work queue to perform work + if (_promoteQueue.empty()) { + // Force rebuild of work queue + _memoryPressureState = MemoryPressureState::Idle; + } +} + +void GLTextureTransferEngineDefault::processDemotes(size_t reliefRequired, const std::vector& strongTextures) { + // Demote largest first + ImmediateWorkQueue demoteQueue; + for (const auto& texture : strongTextures) { + GLTexture* gltexture = Backend::getGPUObject(*texture); + GLVariableAllocationSupport* vargltexture = dynamic_cast(gltexture); + if (vargltexture->canDemote()) { + demoteQueue.push({ texture, (float)gltexture->size() }); + } + } + + size_t relieved = 0; + while (!demoteQueue.empty() && relieved < reliefRequired) { + { + const auto& target = demoteQueue.top(); + const auto& texture = target.first; + GLTexture* gltexture = Backend::getGPUObject(*texture); + auto oldSize = gltexture->size(); + GLVariableAllocationSupport* vargltexture = dynamic_cast(gltexture); + vargltexture->demote(); + auto newSize = gltexture->size(); + relieved += (oldSize - newSize); + } + demoteQueue.pop(); + } +} + +// FIXME hack for stats display +QString getTextureMemoryPressureModeString() { + switch (_memoryPressureState) { + case MemoryPressureState::Undersubscribed: + return "Undersubscribed"; + + case MemoryPressureState::Transfer: + return "Transfer"; + + case MemoryPressureState::Idle: + return "Idle"; + } + Q_UNREACHABLE(); + return "Unknown"; +} diff --git a/libraries/gpu-gl/src/gpu/gl41/GL41Backend.h b/libraries/gpu-gl/src/gpu/gl41/GL41Backend.h index f3b452b1f9..23dcac0d8d 100644 --- a/libraries/gpu-gl/src/gpu/gl41/GL41Backend.h +++ b/libraries/gpu-gl/src/gpu/gl41/GL41Backend.h @@ -114,9 +114,9 @@ public: void allocateStorage(uint16 allocatedMip); void syncSampler() const override; - void promote() override; - void demote() override; - void populateTransferQueue() override; + size_t promote() override; + size_t demote() override; + void populateTransferQueue(TransferQueue& pendingTransfers) override; Size copyMipFaceLinesFromTexture(uint16_t mip, uint8_t face, const uvec3& size, uint32_t yOffset, GLenum internalFormat, GLenum format, GLenum type, Size sourceSize, const void* sourcePointer) const override; Size copyMipsFromTexture(); diff --git a/libraries/gpu-gl/src/gpu/gl41/GL41BackendTexture.cpp b/libraries/gpu-gl/src/gpu/gl41/GL41BackendTexture.cpp index 0298b8b892..624cb4f656 100644 --- a/libraries/gpu-gl/src/gpu/gl41/GL41BackendTexture.cpp +++ b/libraries/gpu-gl/src/gpu/gl41/GL41BackendTexture.cpp @@ -72,7 +72,7 @@ GLTexture* GL41Backend::syncGPUObject(const TexturePointer& texturePointer) { case TextureUsageType::RESOURCE: qCDebug(gpugllogging) << "variable / Strict texture " << texture.source().c_str(); object = new GL41ResourceTexture(shared_from_this(), texture); - GLVariableAllocationSupport::addMemoryManagedTexture(texturePointer); + _textureManagement._transferEngine->addMemoryManagedTexture(texturePointer); break; default: @@ -86,7 +86,6 @@ GLTexture* GL41Backend::syncGPUObject(const TexturePointer& texturePointer) { auto minAvailableMip = texture.minAvailableMipLevel(); if (minAvailableMip < varTex->_minAllocatedMip) { varTex->_minAllocatedMip = minAvailableMip; - GL41VariableAllocationTexture::_memoryPressureStateStale = true; } } } @@ -299,9 +298,7 @@ GL41VariableAllocationTexture::GL41VariableAllocationTexture(const std::weak_ptr uint16_t allocatedMip = std::max(_minAllocatedMip, targetMip); allocateStorage(allocatedMip); - _memoryPressureStateStale = true; copyMipsFromTexture(); - syncSampler(); } @@ -496,7 +493,7 @@ void GL41VariableAllocationTexture::copyTextureMipsInGPUMem(GLuint srcId, GLuint }); } -void GL41VariableAllocationTexture::promote() { +size_t GL41VariableAllocationTexture::promote() { PROFILE_RANGE(render_gpu_gl, __FUNCTION__); Q_ASSERT(_allocatedMip > 0); @@ -524,12 +521,11 @@ void GL41VariableAllocationTexture::promote() { // update the memory usage Backend::textureResourceGPUMemSize.update(oldSize, 0); + return (_size - oldSize); // no change to Backend::textureResourcePopulatedGPUMemSize - - populateTransferQueue(); } -void GL41VariableAllocationTexture::demote() { +size_t GL41VariableAllocationTexture::demote() { PROFILE_RANGE(render_gpu_gl, __FUNCTION__); Q_ASSERT(_allocatedMip < _maxAllocatedMip); auto oldId = _id; @@ -563,16 +559,16 @@ void GL41VariableAllocationTexture::demote() { } decrementPopulatedSize(amountUnpopulated); } - populateTransferQueue(); + + return oldSize - _size; } -void GL41VariableAllocationTexture::populateTransferQueue() { +void GL41VariableAllocationTexture::populateTransferQueue(TransferQueue& pendingTransfers) { PROFILE_RANGE(render_gpu_gl, __FUNCTION__); if (_populatedMip <= _allocatedMip) { return; } - _pendingTransfers = TransferQueue(); const uint8_t maxFace = GLTexture::getFaceCount(_target); uint16_t sourceMip = _populatedMip; @@ -588,7 +584,7 @@ void GL41VariableAllocationTexture::populateTransferQueue() { // If the mip is less than the max transfer size, then just do it in one transfer if (glm::all(glm::lessThanEqual(mipDimensions, MAX_TRANSFER_DIMENSIONS))) { // Can the mip be transferred in one go - _pendingTransfers.emplace(new TransferJob(*this, sourceMip, targetMip, face)); + pendingTransfers.emplace(new TransferJob(_gpuObject, sourceMip, targetMip, face)); continue; } @@ -605,13 +601,13 @@ void GL41VariableAllocationTexture::populateTransferQueue() { uint32_t lineOffset = 0; while (lineOffset < lines) { uint32_t linesToCopy = std::min(lines - lineOffset, linesPerTransfer); - _pendingTransfers.emplace(new TransferJob(*this, sourceMip, targetMip, face, linesToCopy, lineOffset)); + pendingTransfers.emplace(new TransferJob(_gpuObject, sourceMip, targetMip, face, linesToCopy, lineOffset)); lineOffset += linesToCopy; } } // queue up the sampler and populated mip change for after the transfer has completed - _pendingTransfers.emplace(new TransferJob(*this, [=] { + pendingTransfers.emplace(new TransferJob([=] { _populatedMip = sourceMip; syncSampler(); })); diff --git a/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h b/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h index 616b6d1075..0db9271f57 100644 --- a/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h +++ b/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h @@ -187,9 +187,9 @@ public: GL45ResourceTexture(const std::weak_ptr& backend, const Texture& texture); void syncSampler() const override; - void promote() override; - void demote() override; - void populateTransferQueue() override; + size_t promote() override; + size_t demote() override; + void populateTransferQueue(TransferQueue& pendingTransfers) override; void allocateStorage(uint16 mip); diff --git a/libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp b/libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp index 6b3c99ccc3..b0ae1296e9 100644 --- a/libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp +++ b/libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp @@ -28,7 +28,6 @@ using namespace gpu; using namespace gpu::gl; using namespace gpu::gl45; -#define MAX_RESOURCE_TEXTURES_PER_FRAME 2 #define FORCE_STRICT_TEXTURE 0 #define ENABLE_SPARSE_TEXTURE 0 @@ -82,7 +81,8 @@ GLTexture* GL45Backend::syncGPUObject(const TexturePointer& texturePointer) { #if !FORCE_STRICT_TEXTURE case TextureUsageType::RESOURCE: { - if (GL45VariableAllocationTexture::_frameTexturesCreated < MAX_RESOURCE_TEXTURES_PER_FRAME) { + auto& transferEngine = _textureManagement._transferEngine; + if (transferEngine->allowCreate()) { #if ENABLE_SPARSE_TEXTURE if (isTextureManagementSparseEnabled() && GL45Texture::isSparseEligible(texture)) { object = new GL45SparseResourceTexture(shared_from_this(), texture); @@ -92,7 +92,7 @@ GLTexture* GL45Backend::syncGPUObject(const TexturePointer& texturePointer) { #else object = new GL45ResourceTexture(shared_from_this(), texture); #endif - GLVariableAllocationSupport::addMemoryManagedTexture(texturePointer); + transferEngine->addMemoryManagedTexture(texturePointer); } else { auto fallback = texturePointer->getFallbackTexture(); if (fallback) { @@ -114,7 +114,6 @@ GLTexture* GL45Backend::syncGPUObject(const TexturePointer& texturePointer) { auto minAvailableMip = texture.minAvailableMipLevel(); if (minAvailableMip < varTex->_minAllocatedMip) { varTex->_minAllocatedMip = minAvailableMip; - GL45VariableAllocationTexture::_memoryPressureStateStale = true; } } } @@ -124,6 +123,7 @@ GLTexture* GL45Backend::syncGPUObject(const TexturePointer& texturePointer) { } void GL45Backend::initTextureManagementStage() { + GLBackend::initTextureManagementStage(); // enable the Sparse Texture on gl45 _textureManagement._sparseCapable = true; diff --git a/libraries/gpu-gl/src/gpu/gl45/GL45BackendVariableTexture.cpp b/libraries/gpu-gl/src/gpu/gl45/GL45BackendVariableTexture.cpp index 08d077605d..fe74336b2a 100644 --- a/libraries/gpu-gl/src/gpu/gl45/GL45BackendVariableTexture.cpp +++ b/libraries/gpu-gl/src/gpu/gl45/GL45BackendVariableTexture.cpp @@ -31,7 +31,6 @@ using GL45Texture = GL45Backend::GL45Texture; using GL45VariableAllocationTexture = GL45Backend::GL45VariableAllocationTexture; GL45VariableAllocationTexture::GL45VariableAllocationTexture(const std::weak_ptr& backend, const Texture& texture) : GL45Texture(backend, texture) { - ++_frameTexturesCreated; Backend::textureResourceCount.increment(); } @@ -104,7 +103,6 @@ GL45ResourceTexture::GL45ResourceTexture(const std::weak_ptr& backend uint16_t allocatedMip = std::max(_minAllocatedMip, targetMip); allocateStorage(allocatedMip); - _memoryPressureStateStale = true; copyMipsFromTexture(); syncSampler(); } @@ -148,7 +146,7 @@ void GL45ResourceTexture::syncSampler() const { #endif } -void GL45ResourceTexture::promote() { +size_t GL45ResourceTexture::promote() { PROFILE_RANGE(render_gpu_gl, __FUNCTION__); Q_ASSERT(_allocatedMip > 0); @@ -191,11 +189,10 @@ void GL45ResourceTexture::promote() { // update the memory usage Backend::textureResourceGPUMemSize.update(oldSize, 0); // no change to Backend::textureResourcePopulatedGPUMemSize - - populateTransferQueue(); + return (_size - oldSize); } -void GL45ResourceTexture::demote() { +size_t GL45ResourceTexture::demote() { PROFILE_RANGE(render_gpu_gl, __FUNCTION__); Q_ASSERT(_allocatedMip < _maxAllocatedMip); auto oldId = _id; @@ -242,16 +239,16 @@ void GL45ResourceTexture::demote() { } decrementPopulatedSize(amountUnpopulated); } - - populateTransferQueue(); + return (oldSize - _size); } -void GL45ResourceTexture::populateTransferQueue() { +void GL45ResourceTexture::populateTransferQueue(TransferQueue& pendingTransfers) { PROFILE_RANGE(render_gpu_gl, __FUNCTION__); + sanityCheck(); + if (_populatedMip <= _allocatedMip) { return; } - _pendingTransfers = TransferQueue(); const uint8_t maxFace = GLTexture::getFaceCount(_target); uint16_t sourceMip = _populatedMip; @@ -267,7 +264,7 @@ void GL45ResourceTexture::populateTransferQueue() { // If the mip is less than the max transfer size, then just do it in one transfer if (glm::all(glm::lessThanEqual(mipDimensions, MAX_TRANSFER_DIMENSIONS))) { // Can the mip be transferred in one go - _pendingTransfers.emplace(new TransferJob(*this, sourceMip, targetMip, face)); + pendingTransfers.emplace(new TransferJob(_gpuObject, sourceMip, targetMip, face)); continue; } @@ -284,14 +281,15 @@ void GL45ResourceTexture::populateTransferQueue() { uint32_t lineOffset = 0; while (lineOffset < lines) { uint32_t linesToCopy = std::min(lines - lineOffset, linesPerTransfer); - _pendingTransfers.emplace(new TransferJob(*this, sourceMip, targetMip, face, linesToCopy, lineOffset)); + pendingTransfers.emplace(new TransferJob(_gpuObject, sourceMip, targetMip, face, linesToCopy, lineOffset)); lineOffset += linesToCopy; } } // queue up the sampler and populated mip change for after the transfer has completed - _pendingTransfers.emplace(new TransferJob(*this, [=] { + pendingTransfers.emplace(new TransferJob([=] { _populatedMip = sourceMip; + sanityCheck(); syncSampler(); })); } while (sourceMip != _allocatedMip); diff --git a/libraries/gpu-gles/src/gpu/gles/GLESBackend.h b/libraries/gpu-gles/src/gpu/gles/GLESBackend.h index 47a123718a..cb8e4abb29 100644 --- a/libraries/gpu-gles/src/gpu/gles/GLESBackend.h +++ b/libraries/gpu-gles/src/gpu/gles/GLESBackend.h @@ -105,9 +105,9 @@ public: void allocateStorage(uint16 allocatedMip); void syncSampler() const override; - void promote() override; - void demote() override; - void populateTransferQueue() override; + size_t promote() override; + size_t demote() override; + void populateTransferQueue(TransferJob::Queue& queue) override; Size copyMipFaceLinesFromTexture(uint16_t mip, uint8_t face, const uvec3& size, uint32_t yOffset, GLenum internalFormat, GLenum format, GLenum type, Size sourceSize, const void* sourcePointer) const override; Size copyMipsFromTexture(); diff --git a/libraries/gpu-gles/src/gpu/gles/GLESBackendTexture.cpp b/libraries/gpu-gles/src/gpu/gles/GLESBackendTexture.cpp index 2009dc5dc9..2ffa421bc8 100644 --- a/libraries/gpu-gles/src/gpu/gles/GLESBackendTexture.cpp +++ b/libraries/gpu-gles/src/gpu/gles/GLESBackendTexture.cpp @@ -90,7 +90,6 @@ GLTexture* GLESBackend::syncGPUObject(const TexturePointer& texturePointer) { auto minAvailableMip = texture.minAvailableMipLevel(); if (minAvailableMip < varTex->_minAllocatedMip) { varTex->_minAllocatedMip = minAvailableMip; - GLESVariableAllocationTexture::_memoryPressureStateStale = true; } } } @@ -361,7 +360,6 @@ GLESVariableAllocationTexture::GLESVariableAllocationTexture(const std::weak_ptr uint16_t allocatedMip = std::max(_minAllocatedMip, targetMip); allocateStorage(allocatedMip); - _memoryPressureStateStale = true; copyMipsFromTexture(); syncSampler(); @@ -559,7 +557,7 @@ void GLESVariableAllocationTexture::copyTextureMipsInGPUMem(GLuint srcId, GLuint }); } -void GLESVariableAllocationTexture::promote() { +size_t GLESVariableAllocationTexture::promote() { PROFILE_RANGE(render_gpu_gl, __FUNCTION__); Q_ASSERT(_allocatedMip > 0); @@ -587,12 +585,11 @@ void GLESVariableAllocationTexture::promote() { // update the memory usage Backend::textureResourceGPUMemSize.update(oldSize, 0); - // no change to Backend::textureResourcePopulatedGPUMemSize - populateTransferQueue(); + return _size - oldSize; } -void GLESVariableAllocationTexture::demote() { +size_t GLESVariableAllocationTexture::demote() { PROFILE_RANGE(render_gpu_gl, __FUNCTION__); Q_ASSERT(_allocatedMip < _maxAllocatedMip); auto oldId = _id; @@ -626,16 +623,16 @@ void GLESVariableAllocationTexture::demote() { } decrementPopulatedSize(amountUnpopulated); } - populateTransferQueue(); + + return oldSize - _size; } -void GLESVariableAllocationTexture::populateTransferQueue() { +void GLESVariableAllocationTexture::populateTransferQueue(TransferJob::Queue& queue) { PROFILE_RANGE(render_gpu_gl, __FUNCTION__); if (_populatedMip <= _allocatedMip) { return; } - _pendingTransfers = TransferQueue(); const uint8_t maxFace = GLTexture::getFaceCount(_target); uint16_t sourceMip = _populatedMip; @@ -651,7 +648,7 @@ void GLESVariableAllocationTexture::populateTransferQueue() { // If the mip is less than the max transfer size, then just do it in one transfer if (glm::all(glm::lessThanEqual(mipDimensions, MAX_TRANSFER_DIMENSIONS))) { // Can the mip be transferred in one go - _pendingTransfers.emplace(new TransferJob(*this, sourceMip, targetMip, face)); + queue.emplace(new TransferJob(_gpuObject, sourceMip, targetMip, face)); continue; } @@ -668,13 +665,13 @@ void GLESVariableAllocationTexture::populateTransferQueue() { uint32_t lineOffset = 0; while (lineOffset < lines) { uint32_t linesToCopy = std::min(lines - lineOffset, linesPerTransfer); - _pendingTransfers.emplace(new TransferJob(*this, sourceMip, targetMip, face, linesToCopy, lineOffset)); + queue.emplace(new TransferJob(_gpuObject, sourceMip, targetMip, face, linesToCopy, lineOffset)); lineOffset += linesToCopy; } } // queue up the sampler and populated mip change for after the transfer has completed - _pendingTransfers.emplace(new TransferJob(*this, [=] { + queue.emplace(new TransferJob([=] { _populatedMip = sourceMip; syncSampler(); })); diff --git a/tests/gpu/src/TextureTest.cpp b/tests/gpu/src/TextureTest.cpp index 18361af791..c9229fb826 100644 --- a/tests/gpu/src/TextureTest.cpp +++ b/tests/gpu/src/TextureTest.cpp @@ -25,13 +25,14 @@ QTEST_MAIN(TextureTest) -#define LOAD_TEXTURE_COUNT 40 +#define LOAD_TEXTURE_COUNT 100 +#define FAIL_AFTER_SECONDS 30 static const QString TEST_DATA("https://hifi-public.s3.amazonaws.com/austin/test_data/test_ktx.zip"); static const QString TEST_DIR_NAME("{630b8f02-52af-4cdf-a896-24e472b94b28}"); +static const QString KTX_TEST_DIR_ENV("HIFI_KTX_TEST_DIR"); std::string vertexShaderSource = R"SHADER( -#line 14 layout(location = 0) out vec2 outTexCoord0; const vec4 VERTICES[] = vec4[]( @@ -50,8 +51,6 @@ void main() { )SHADER"; std::string fragmentShaderSource = R"SHADER( -#line 28 - uniform sampler2D tex; layout(location = 0) in vec2 inTexCoord0; @@ -87,21 +86,29 @@ void TextureTest::initTestCase() { gpu::Context::init(); _gpuContext = std::make_shared(); - _resourcesPath = QStandardPaths::writableLocation(QStandardPaths::TempLocation) + "/" + TEST_DIR_NAME; - if (!QFileInfo(_resourcesPath).exists()) { - QDir(_resourcesPath).mkpath("."); - FileDownloader(TEST_DATA, - [&](const QByteArray& data) { - QTemporaryFile zipFile; - if (zipFile.open()) { - zipFile.write(data); - zipFile.close(); - } - JlCompress::extractDir(zipFile.fileName(), _resourcesPath); - }) - .waitForDownload(); + + if (QProcessEnvironment::systemEnvironment().contains(KTX_TEST_DIR_ENV)) { + // For local testing with larger data sets + _resourcesPath = QProcessEnvironment::systemEnvironment().value(KTX_TEST_DIR_ENV); + } else { + _resourcesPath = QStandardPaths::writableLocation(QStandardPaths::TempLocation) + "/" + TEST_DIR_NAME; + if (!QFileInfo(_resourcesPath).exists()) { + QDir(_resourcesPath).mkpath("."); + FileDownloader(TEST_DATA, + [&](const QByteArray& data) { + QTemporaryFile zipFile; + if (zipFile.open()) { + zipFile.write(data); + zipFile.close(); + } + JlCompress::extractDir(zipFile.fileName(), _resourcesPath); + }) + .waitForDownload(); + } } + QVERIFY(!_resourcesPath.isEmpty()); + _canvas.makeCurrent(); { auto VS = gpu::Shader::createVertex(vertexShaderSource); @@ -130,15 +137,7 @@ void TextureTest::initTestCase() { } } - // Load the test textures - { - size_t newTextureCount = std::min(_textureFiles.size(), LOAD_TEXTURE_COUNT); - for (size_t i = 0; i < newTextureCount; ++i) { - const auto& textureFile = _textureFiles[i]; - auto texture = gpu::Texture::unserialize(textureFile); - _textures.push_back(texture); - } - } + QVERIFY(!_textureFiles.empty()); } void TextureTest::cleanupTestCase() { @@ -148,6 +147,18 @@ void TextureTest::cleanupTestCase() { _gpuContext.reset(); } +std::vector TextureTest::loadTestTextures() const { + // Load the test textures + std::vector result; + size_t newTextureCount = std::min(_textureFiles.size(), LOAD_TEXTURE_COUNT); + for (size_t i = 0; i < newTextureCount; ++i) { + const auto& textureFile = _textureFiles[i]; + auto texture = gpu::Texture::unserialize(textureFile); + result.push_back(texture); + } + return result; +} + void TextureTest::beginFrame() { _gpuContext->recycle(); _gpuContext->beginFrame(); @@ -169,116 +180,130 @@ void TextureTest::endFrame() { QThread::msleep(10); } + void TextureTest::renderFrame(const std::function& renderLambda) { beginFrame(); gpu::doInBatch("Test::body", _gpuContext, renderLambda); endFrame(); + ++_frameCount; } +extern QString getTextureMemoryPressureModeString(); void TextureTest::testTextureLoading() { - QVERIFY(_textures.size() > 0); - auto renderTexturesLamdba = [this](gpu::Batch& batch) { - batch.setPipeline(_pipeline); - for (const auto& texture : _textures) { - batch.setResourceTexture(0, texture); - batch.draw(gpu::TRIANGLE_STRIP, 4, 0); + QBENCHMARK{ + _frameCount = 0; + auto textures = loadTestTextures(); + QVERIFY(textures.size() > 0); + auto renderTexturesLamdba = [&](gpu::Batch& batch) { + batch.setPipeline(_pipeline); + for (const auto& texture : textures) { + batch.setResourceTexture(0, texture); + batch.draw(gpu::TRIANGLE_STRIP, 4, 0); + } + }; + + size_t expectedAllocation = 0; + for (const auto& texture : textures) { + expectedAllocation += texture->evalTotalSize(); } - }; + QVERIFY(textures.size() > 0); - size_t expectedAllocation = 0; - for (const auto& texture : _textures) { - expectedAllocation += texture->evalTotalSize(); + auto reportLambda = [=] { + qDebug() << "Allowed " << gpu::Texture::getAllowedGPUMemoryUsage(); + qDebug() << "Allocated " << gpu::Context::getTextureResourceGPUMemSize(); + qDebug() << "Populated " << gpu::Context::getTextureResourcePopulatedGPUMemSize(); + qDebug() << "Pending " << gpu::Context::getTexturePendingGPUTransferMemSize(); + qDebug() << "State " << getTextureMemoryPressureModeString(); + }; + + auto allocatedMemory = gpu::Context::getTextureResourceGPUMemSize(); + auto populatedMemory = gpu::Context::getTextureResourcePopulatedGPUMemSize(); + + // Cycle frames we're fully allocated + // We need to use the texture rendering lambda + auto lastReport = usecTimestampNow(); + auto start = usecTimestampNow(); + qDebug() << "Awaiting texture allocation"; + while (expectedAllocation != allocatedMemory) { + doEvery(lastReport, 4, reportLambda); + failAfter(start, FAIL_AFTER_SECONDS, "Failed to allocate texture memory"); + renderFrame(renderTexturesLamdba); + allocatedMemory = gpu::Context::getTextureResourceGPUMemSize(); + populatedMemory = gpu::Context::getTextureResourcePopulatedGPUMemSize(); + } + reportLambda(); + QCOMPARE(allocatedMemory, expectedAllocation); + + // Restart the timer + start = usecTimestampNow(); + // Cycle frames we're fully populated + qDebug() << "Awaiting texture population"; + while (allocatedMemory != populatedMemory || 0 != gpu::Context::getTexturePendingGPUTransferMemSize()) { + doEvery(lastReport, 4, reportLambda); + failAfter(start, FAIL_AFTER_SECONDS, "Failed to populate texture memory"); + renderFrame(); + allocatedMemory = gpu::Context::getTextureResourceGPUMemSize(); + populatedMemory = gpu::Context::getTextureResourcePopulatedGPUMemSize(); + } + reportLambda(); + QCOMPARE(populatedMemory, allocatedMemory); + // FIXME workaround a race condition in the difference between populated size and the actual _populatedMip value in the texture + for (size_t i = 0; i < textures.size(); ++i) { + renderFrame(); + } + + // Test on-demand deallocation of memory + auto maxMemory = allocatedMemory / 2; + gpu::Texture::setAllowedGPUMemoryUsage(maxMemory); + + // Restart the timer + start = usecTimestampNow(); + // Cycle frames until the allocated memory is below the max memory + qDebug() << "Awaiting texture deallocation"; + while (allocatedMemory > maxMemory || allocatedMemory != populatedMemory) { + doEvery(lastReport, 4, reportLambda); + failAfter(start, FAIL_AFTER_SECONDS, "Failed to deallocate texture memory"); + renderFrame(renderTexturesLamdba); + allocatedMemory = gpu::Context::getTextureResourceGPUMemSize(); + populatedMemory = gpu::Context::getTextureResourcePopulatedGPUMemSize(); + } + reportLambda(); + + // Verify that the allocation is now below the target + QVERIFY(allocatedMemory <= maxMemory); + // Verify that populated memory is the same as allocated memory + QCOMPARE(populatedMemory, allocatedMemory); + + // Restart the timer + start = usecTimestampNow(); + // Reset the max memory to automatic + gpu::Texture::setAllowedGPUMemoryUsage(0); + // Cycle frames we're fully populated + qDebug() << "Awaiting texture reallocation and repopulation"; + while (allocatedMemory != expectedAllocation || allocatedMemory != populatedMemory) { + doEvery(lastReport, 4, reportLambda); + failAfter(start, FAIL_AFTER_SECONDS, "Failed to populate texture memory"); + renderFrame(); + allocatedMemory = gpu::Context::getTextureResourceGPUMemSize(); + populatedMemory = gpu::Context::getTextureResourcePopulatedGPUMemSize(); + } + reportLambda(); + QCOMPARE(allocatedMemory, expectedAllocation); + QCOMPARE(populatedMemory, allocatedMemory); + + textures.clear(); + // Cycle frames we're fully populated + qDebug() << "Awaiting texture deallocation"; + while (allocatedMemory != 0) { + failAfter(start, FAIL_AFTER_SECONDS, "Failed to clear texture memory"); + renderFrame(); + allocatedMemory = gpu::Context::getTextureResourceGPUMemSize(); + populatedMemory = gpu::Context::getTextureResourcePopulatedGPUMemSize(); + } + reportLambda(); + QCOMPARE(allocatedMemory, 0); + QCOMPARE(populatedMemory, 0); + qDebug() << "Test took " << _frameCount << "frame"; } - QVERIFY(_textures.size() > 0); - - auto reportLambda = [=] { - qDebug() << "Allowed " << gpu::Texture::getAllowedGPUMemoryUsage(); - qDebug() << "Allocated " << gpu::Context::getTextureResourceGPUMemSize(); - qDebug() << "Populated " << gpu::Context::getTextureResourcePopulatedGPUMemSize(); - qDebug() << "Pending " << gpu::Context::getTexturePendingGPUTransferMemSize(); - }; - - auto allocatedMemory = gpu::Context::getTextureResourceGPUMemSize(); - auto populatedMemory = gpu::Context::getTextureResourcePopulatedGPUMemSize(); - - // Cycle frames we're fully allocated - // We need to use the texture rendering lambda - auto lastReport = usecTimestampNow(); - auto start = usecTimestampNow(); - while (expectedAllocation != allocatedMemory) { - doEvery(lastReport, 4, reportLambda); - failAfter(start, 10, "Failed to allocate texture memory after 10 seconds"); - renderFrame(renderTexturesLamdba); - allocatedMemory = gpu::Context::getTextureResourceGPUMemSize(); - populatedMemory = gpu::Context::getTextureResourcePopulatedGPUMemSize(); - } - QCOMPARE(allocatedMemory, expectedAllocation); - - // Restart the timer - start = usecTimestampNow(); - // Cycle frames we're fully populated - while (allocatedMemory != populatedMemory || 0 != gpu::Context::getTexturePendingGPUTransferMemSize()) { - doEvery(lastReport, 4, reportLambda); - failAfter(start, 10, "Failed to populate texture memory after 10 seconds"); - renderFrame(); - allocatedMemory = gpu::Context::getTextureResourceGPUMemSize(); - populatedMemory = gpu::Context::getTextureResourcePopulatedGPUMemSize(); - } - reportLambda(); - QCOMPARE(populatedMemory, allocatedMemory); - - // FIXME workaround a race condition in the difference between populated size and the actual _populatedMip value in the texture - for (size_t i = 0; i < _textures.size(); ++i) { - renderFrame(); - } - - // Test on-demand deallocation of memory - auto maxMemory = allocatedMemory / 2; - gpu::Texture::setAllowedGPUMemoryUsage(maxMemory); - - // Restart the timer - start = usecTimestampNow(); - // Cycle frames until the allocated memory is below the max memory - while (allocatedMemory > maxMemory || allocatedMemory != populatedMemory) { - doEvery(lastReport, 4, reportLambda); - failAfter(start, 10, "Failed to deallocate texture memory after 10 seconds"); - renderFrame(renderTexturesLamdba); - allocatedMemory = gpu::Context::getTextureResourceGPUMemSize(); - populatedMemory = gpu::Context::getTextureResourcePopulatedGPUMemSize(); - } - reportLambda(); - - // Verify that the allocation is now below the target - QVERIFY(allocatedMemory <= maxMemory); - // Verify that populated memory is the same as allocated memory - QCOMPARE(populatedMemory, allocatedMemory); - - // Restart the timer - start = usecTimestampNow(); - // Reset the max memory to automatic - gpu::Texture::setAllowedGPUMemoryUsage(0); - // Cycle frames we're fully populated - while (allocatedMemory != expectedAllocation || allocatedMemory != populatedMemory) { - doEvery(lastReport, 4, reportLambda); - failAfter(start, 10, "Failed to populate texture memory after 10 seconds"); - renderFrame(); - allocatedMemory = gpu::Context::getTextureResourceGPUMemSize(); - populatedMemory = gpu::Context::getTextureResourcePopulatedGPUMemSize(); - } - reportLambda(); - QCOMPARE(allocatedMemory, expectedAllocation); - QCOMPARE(populatedMemory, allocatedMemory); - - _textures.clear(); - // Cycle frames we're fully populated - while (allocatedMemory != 0) { - failAfter(start, 10, "Failed to clear texture memory after 10 seconds"); - renderFrame(); - allocatedMemory = gpu::Context::getTextureResourceGPUMemSize(); - populatedMemory = gpu::Context::getTextureResourcePopulatedGPUMemSize(); - } - QCOMPARE(allocatedMemory, 0); - QCOMPARE(populatedMemory, 0); qDebug() << "Done"; - } diff --git a/tests/gpu/src/TextureTest.h b/tests/gpu/src/TextureTest.h index 91f8a358ea..2175300406 100644 --- a/tests/gpu/src/TextureTest.h +++ b/tests/gpu/src/TextureTest.h @@ -21,12 +21,15 @@ private: void beginFrame(); void endFrame(); void renderFrame(const std::function& = [](gpu::Batch&) {}); + std::vector loadTestTextures() const; + private slots: void initTestCase(); void cleanupTestCase(); void testTextureLoading(); + private: QString _resourcesPath; OffscreenGLCanvas _canvas; @@ -36,5 +39,5 @@ private: gpu::TexturePointer _colorBuffer, _depthBuffer; const glm::uvec2 _size{ 640, 480 }; std::vector _textureFiles; - std::vector _textures; + size_t _frameCount { 0 }; };