From f18b8f80fe4327b2bc9bcb7c02947233df23834e Mon Sep 17 00:00:00 2001 From: Brad Davis Date: Fri, 2 Sep 2016 12:17:35 -0700 Subject: [PATCH 1/7] Sparse texture transfers Conflicts: libraries/gpu-gl/src/gpu/gl/GLTextureTransfer.cpp --- libraries/gpu-gl/CMakeLists.txt | 1 + libraries/gpu-gl/src/gpu/gl/GLBackend.cpp | 1 - libraries/gpu-gl/src/gpu/gl/GLTexture.cpp | 11 + libraries/gpu-gl/src/gpu/gl/GLTexture.h | 13 +- .../gpu-gl/src/gpu/gl/GLTextureTransfer.cpp | 170 ++++++++---- .../gpu-gl/src/gpu/gl/GLTextureTransfer.h | 43 ++- libraries/gpu-gl/src/gpu/gl41/GL41Backend.h | 4 +- .../src/gpu/gl41/GL41BackendTexture.cpp | 54 +--- libraries/gpu-gl/src/gpu/gl45/GL45Backend.h | 34 ++- .../src/gpu/gl45/GL45BackendTexture.cpp | 251 ++++++++++++------ 10 files changed, 388 insertions(+), 194 deletions(-) diff --git a/libraries/gpu-gl/CMakeLists.txt b/libraries/gpu-gl/CMakeLists.txt index 320f9b3c71..65df5ed9dc 100644 --- a/libraries/gpu-gl/CMakeLists.txt +++ b/libraries/gpu-gl/CMakeLists.txt @@ -4,6 +4,7 @@ link_hifi_libraries(shared gl gpu) GroupSources("src") target_opengl() +target_nsight() if (NOT ANDROID) target_glew() diff --git a/libraries/gpu-gl/src/gpu/gl/GLBackend.cpp b/libraries/gpu-gl/src/gpu/gl/GLBackend.cpp index 7c369f4124..98a073e283 100644 --- a/libraries/gpu-gl/src/gpu/gl/GLBackend.cpp +++ b/libraries/gpu-gl/src/gpu/gl/GLBackend.cpp @@ -315,7 +315,6 @@ void GLBackend::render(const Batch& batch) { void GLBackend::syncCache() { - recycle(); syncTransformStateCache(); syncPipelineStateCache(); syncInputStateCache(); diff --git a/libraries/gpu-gl/src/gpu/gl/GLTexture.cpp b/libraries/gpu-gl/src/gpu/gl/GLTexture.cpp index d90ca3bbd6..255258d762 100644 --- a/libraries/gpu-gl/src/gpu/gl/GLTexture.cpp +++ b/libraries/gpu-gl/src/gpu/gl/GLTexture.cpp @@ -292,3 +292,14 @@ void GLTexture::postTransfer() { void GLTexture::initTextureTransferHelper() { _textureTransferHelper = std::make_shared(); } + +void GLTexture::startTransfer() { + createTexture(); +} + +void GLTexture::finishTransfer() { + if (_gpuObject.isAutogenerateMips()) { + generateMips(); + } +} + diff --git a/libraries/gpu-gl/src/gpu/gl/GLTexture.h b/libraries/gpu-gl/src/gpu/gl/GLTexture.h index 4f67039aa8..742b223e36 100644 --- a/libraries/gpu-gl/src/gpu/gl/GLTexture.h +++ b/libraries/gpu-gl/src/gpu/gl/GLTexture.h @@ -11,6 +11,7 @@ #include "GLShared.h" #include "GLTextureTransfer.h" #include "GLBackend.h" +#include "GLTexelFormat.h" namespace gpu { namespace gl { @@ -19,6 +20,7 @@ struct GLFilterMode { GLint magFilter; }; + class GLTexture : public GLObject { public: static const uint16_t INVALID_MIP { (uint16_t)-1 }; @@ -162,11 +164,13 @@ public: bool isOverMaxMemory() const; -protected: + uint16 usedMipLevels() const { return (_maxMip - _minMip) + 1; } + static const size_t CUBE_NUM_FACES = 6; static const GLenum CUBE_FACE_LAYOUT[6]; static const GLFilterMode FILTER_MODES[Sampler::NUM_FILTERS]; static const GLenum WRAP_MODES[Sampler::NUM_WRAP_MODES]; +protected: static const std::vector& getFaceTargets(GLenum textureType); @@ -185,13 +189,11 @@ protected: GLTexture(const std::weak_ptr& backend, const Texture& texture, GLuint id, GLTexture* originalTexture); void setSyncState(GLSyncState syncState) { _syncState = syncState; } - uint16 usedMipLevels() const { return (_maxMip - _minMip) + 1; } void createTexture(); virtual void allocateStorage() const = 0; virtual void updateSize() const = 0; - virtual void transfer() const = 0; virtual void syncSampler() const = 0; virtual void generateMips() const = 0; virtual void withPreservedTexture(std::function f) const = 0; @@ -199,6 +201,11 @@ protected: protected: void setSize(GLuint size) const; + virtual void startTransfer(); + // Returns true if this is the last block required to complete transfer + virtual bool continueTransfer() { return false; } + virtual void finishTransfer(); + private: GLTexture(const std::weak_ptr& backend, const gpu::Texture& gpuTexture, GLuint id, GLTexture* originalTexture, bool transferrable); diff --git a/libraries/gpu-gl/src/gpu/gl/GLTextureTransfer.cpp b/libraries/gpu-gl/src/gpu/gl/GLTextureTransfer.cpp index 1d22ae7a52..0f64ea1182 100644 --- a/libraries/gpu-gl/src/gpu/gl/GLTextureTransfer.cpp +++ b/libraries/gpu-gl/src/gpu/gl/GLTextureTransfer.cpp @@ -13,6 +13,18 @@ #include "GLShared.h" #include "GLTexture.h" +#ifdef HAVE_NSIGHT +#include "nvToolsExt.h" +std::unordered_map _map; +#endif + +//#define TEXTURE_TRANSFER_PBOS + +#ifdef TEXTURE_TRANSFER_PBOS +#define TEXTURE_TRANSFER_BLOCK_SIZE (64 * 1024) +#define TEXTURE_TRANSFER_PBO_COUNT 128 +#endif + using namespace gpu; using namespace gpu::gl; @@ -36,82 +48,126 @@ GLTextureTransferHelper::~GLTextureTransferHelper() { void GLTextureTransferHelper::transferTexture(const gpu::TexturePointer& texturePointer) { GLTexture* object = Backend::getGPUObject(*texturePointer); - Backend::incrementTextureGPUTransferCount(); -#ifdef THREADED_TEXTURE_TRANSFER - GLsync fence { 0 }; - //fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); - //glFlush(); - TextureTransferPackage package { texturePointer, fence }; +#ifdef THREADED_TEXTURE_TRANSFER + Backend::incrementTextureGPUTransferCount(); object->setSyncState(GLSyncState::Pending); - queueItem(package); + Lock lock(_mutex); + _pendingTextures.push_back(texturePointer); #else - object->withPreservedTexture([&] { - do_transfer(*object); - }); + for (object->startTransfer(); object->continueTransfer(); ) { } + object->finishTransfer(); object->_contentStamp = texturePointer->getDataStamp(); object->setSyncState(GLSyncState::Transferred); #endif } void GLTextureTransferHelper::setup() { +#ifdef THREADED_TEXTURE_TRANSFER + _context.makeCurrent(); + glCreateRenderbuffers(1, &_drawRenderbuffer); + glNamedRenderbufferStorage(_drawRenderbuffer, GL_RGBA8, 128, 128); + glCreateFramebuffers(1, &_drawFramebuffer); + glNamedFramebufferRenderbuffer(_drawFramebuffer, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, _drawRenderbuffer); + glCreateFramebuffers(1, &_readFramebuffer); +#ifdef TEXTURE_TRANSFER_PBOS + std::array pbos; + glCreateBuffers(TEXTURE_TRANSFER_PBO_COUNT, &pbos[0]); + for (uint32_t i = 0; i < TEXTURE_TRANSFER_PBO_COUNT; ++i) { + TextureTransferBlock newBlock; + newBlock._pbo = pbos[i]; + glNamedBufferStorage(newBlock._pbo, TEXTURE_TRANSFER_BLOCK_SIZE, 0, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT); + newBlock._mapped = glMapNamedBufferRange(newBlock._pbo, 0, TEXTURE_TRANSFER_BLOCK_SIZE, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT); + _readyQueue.push(newBlock); + } +#endif +#endif } void GLTextureTransferHelper::shutdown() { -} - -void GLTextureTransferHelper::do_transfer(GLTexture& texture) { - texture.createTexture(); - texture.transfer(); - texture.updateSize(); - Backend::decrementTextureGPUTransferCount(); -} - -bool GLTextureTransferHelper::processQueueItems(const Queue& messages) { #ifdef THREADED_TEXTURE_TRANSFER _context.makeCurrent(); + + glNamedFramebufferRenderbuffer(_drawFramebuffer, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, 0); + glDeleteFramebuffers(1, &_drawFramebuffer); + _drawFramebuffer = 0; + glDeleteFramebuffers(1, &_readFramebuffer); + _readFramebuffer = 0; + + glNamedFramebufferTexture(_readFramebuffer, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0); + glDeleteRenderbuffers(1, &_drawRenderbuffer); + _drawRenderbuffer = 0; #endif - for (auto package : messages) { - TexturePointer texturePointer = package.texture.lock(); - // Texture no longer exists, move on to the next - if (!texturePointer) { +} + +bool GLTextureTransferHelper::process() { +#ifdef THREADED_TEXTURE_TRANSFER + // Take any new textures off the queue + TextureList newTransferTextures; + { + Lock lock(_mutex); + newTransferTextures.swap(_pendingTextures); + } + + if (!newTransferTextures.empty()) { + for (auto& texturePointer : newTransferTextures) { +#ifdef HAVE_NSIGHT + _map[texturePointer] = nvtxRangeStart("TextureTansfer"); +#endif + GLTexture* object = Backend::getGPUObject(*texturePointer); + object->startTransfer(); + _transferringTextures.push_back(texturePointer); + _textureIterator = _transferringTextures.begin(); + } + } + + // No transfers in progress, sleep + if (_transferringTextures.empty()) { + QThread::usleep(1); + return true; + } + + static auto lastReport = usecTimestampNow(); + auto now = usecTimestampNow(); + auto lastReportInterval = now - lastReport; + if (lastReportInterval > USECS_PER_SECOND * 4) { + lastReport = now; + qDebug() << "Texture list " << _transferringTextures.size(); + } + + for (auto _textureIterator = _transferringTextures.begin(); _textureIterator != _transferringTextures.end();) { + auto texture = *_textureIterator; + GLTexture* gltexture = Backend::getGPUObject(*texture); + if (gltexture->continueTransfer()) { + ++_textureIterator; continue; } - if (package.fence) { - auto result = glClientWaitSync(package.fence, 0, 0); - while (GL_TIMEOUT_EXPIRED == result || GL_WAIT_FAILED == result) { - // Minimum sleep - QThread::usleep(1); - result = glClientWaitSync(package.fence, 0, 0); - } - assert(GL_CONDITION_SATISFIED == result || GL_ALREADY_SIGNALED == result); - glDeleteSync(package.fence); - package.fence = 0; - } - - GLTexture* object = Backend::getGPUObject(*texturePointer); - - do_transfer(*object); - glBindTexture(object->_target, 0); - - { - auto fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); - assert(fence); - auto result = glClientWaitSync(fence, GL_SYNC_FLUSH_COMMANDS_BIT, 0); - while (GL_TIMEOUT_EXPIRED == result || GL_WAIT_FAILED == result) { - // Minimum sleep - QThread::usleep(1); - result = glClientWaitSync(fence, 0, 0); - } - glDeleteSync(fence); - } - - object->_contentStamp = texturePointer->getDataStamp(); - object->setSyncState(GLSyncState::Transferred); + gltexture->finishTransfer(); + glNamedFramebufferTexture(_readFramebuffer, GL_COLOR_ATTACHMENT0, gltexture->_id, 0); + glBlitNamedFramebuffer(_readFramebuffer, _drawFramebuffer, 0, 0, 1, 1, 0, 0, 1, 1, GL_COLOR_BUFFER_BIT, GL_NEAREST); + glFinish(); + gltexture->_contentStamp = gltexture->_gpuObject.getDataStamp(); + gltexture->updateSize(); + gltexture->setSyncState(gpu::gl::GLSyncState::Transferred); + Backend::decrementTextureGPUTransferCount(); +#ifdef HAVE_NSIGHT + // Mark the texture as transferred + nvtxRangeEnd(_map[texture]); + _map.erase(texture); +#endif + _textureIterator = _transferringTextures.erase(_textureIterator); } -#ifdef THREADED_TEXTURE_TRANSFER - _context.doneCurrent(); + + if (!_transferringTextures.empty()) { + // Don't saturate the GPU + glFinish(); + } else { + // Don't saturate the CPU + QThread::msleep(1); + } +#else + QThread::msleep(1); #endif return true; } diff --git a/libraries/gpu-gl/src/gpu/gl/GLTextureTransfer.h b/libraries/gpu-gl/src/gpu/gl/GLTextureTransfer.h index e64e204af3..f88dddc5ff 100644 --- a/libraries/gpu-gl/src/gpu/gl/GLTextureTransfer.h +++ b/libraries/gpu-gl/src/gpu/gl/GLTextureTransfer.h @@ -23,27 +23,62 @@ namespace gpu { namespace gl { +using VoidLambda = std::function; + +#if 0 struct TextureTransferPackage { std::weak_ptr texture; GLsync fence; }; -class GLTextureTransferHelper : public GenericQueueThread { +struct TextureTransferBlock { + GLuint _pbo { 0 }; + void* _mapped { nullptr }; + GLsync _fence; + std::function _transferCallback; + bool isSignaled(); + void transfer(); +}; + +using CommandQueue = std::list; +struct FencedLambda { + GLsync _fence { 0 }; + VoidLambda _callback; +}; +#endif + +using TextureList = std::list; +using TextureListIterator = TextureList::iterator; + +class GLTextureTransferHelper : public GenericThread { public: using Pointer = std::shared_ptr; GLTextureTransferHelper(); ~GLTextureTransferHelper(); void transferTexture(const gpu::TexturePointer& texturePointer); - void postTransfer(const gpu::TexturePointer& texturePointer); protected: void setup() override; void shutdown() override; - bool processQueueItems(const Queue& messages) override; - void do_transfer(GLTexture& texturePointer); + bool process() override; private: +#ifdef THREADED_TEXTURE_TRANSFER ::gl::OffscreenContext _context; + // A mutex for protecting items access on the render and transfer threads + Mutex _mutex; + // Textures that have been submitted for transfer + TextureList _pendingTextures; + // Textures currently in the transfer process + // Only used on the transfer thread + TextureList _transferringTextures; + TextureListIterator _textureIterator; + + // Framebuffers / renderbuffers for forcing access to the texture on the transfer thread + GLuint _drawRenderbuffer { 0 }; + GLuint _drawFramebuffer { 0 }; + GLuint _readFramebuffer { 0 }; +#endif }; } } diff --git a/libraries/gpu-gl/src/gpu/gl41/GL41Backend.h b/libraries/gpu-gl/src/gpu/gl41/GL41Backend.h index 6e15542310..c89024b7e8 100644 --- a/libraries/gpu-gl/src/gpu/gl41/GL41Backend.h +++ b/libraries/gpu-gl/src/gpu/gl41/GL41Backend.h @@ -46,10 +46,10 @@ public: GL41Texture(const std::weak_ptr& backend, const Texture& buffer, GL41Texture* original); protected: - void transferMip(uint16_t mipLevel, uint8_t face = 0) const; + void transferMip(uint16_t mipLevel, uint8_t face) const; + void startTransfer() override; void allocateStorage() const override; void updateSize() const override; - void transfer() const override; void syncSampler() const override; void generateMips() const override; void withPreservedTexture(std::function f) const override; diff --git a/libraries/gpu-gl/src/gpu/gl41/GL41BackendTexture.cpp b/libraries/gpu-gl/src/gpu/gl41/GL41BackendTexture.cpp index ff1a790ba5..3d55802ec2 100644 --- a/libraries/gpu-gl/src/gpu/gl41/GL41BackendTexture.cpp +++ b/libraries/gpu-gl/src/gpu/gl41/GL41BackendTexture.cpp @@ -42,7 +42,7 @@ GL41Texture::GL41Texture(const std::weak_ptr& backend, const Texture& GL41Texture::GL41Texture(const std::weak_ptr& backend, const Texture& texture, GL41Texture* original) : GLTexture(backend, texture, allocate(), original) {} -void GL41Backend::GL41Texture::withPreservedTexture(std::function f) const { +void GL41Texture::withPreservedTexture(std::function f) const { GLint boundTex = -1; switch (_target) { case GL_TEXTURE_2D: @@ -64,14 +64,14 @@ void GL41Backend::GL41Texture::withPreservedTexture(std::function f) con (void)CHECK_GL_ERROR(); } -void GL41Backend::GL41Texture::generateMips() const { +void GL41Texture::generateMips() const { withPreservedTexture([&] { glGenerateMipmap(_target); }); (void)CHECK_GL_ERROR(); } -void GL41Backend::GL41Texture::allocateStorage() const { +void GL41Texture::allocateStorage() const { GLTexelFormat texelFormat = GLTexelFormat::evalGLTexelFormat(_gpuObject.getTexelFormat()); glTexParameteri(_target, GL_TEXTURE_BASE_LEVEL, 0); (void)CHECK_GL_ERROR(); @@ -94,7 +94,7 @@ void GL41Backend::GL41Texture::allocateStorage() const { } } -void GL41Backend::GL41Texture::updateSize() const { +void GL41Texture::updateSize() const { setSize(_virtualSize); if (!_id) { return; @@ -130,7 +130,7 @@ void GL41Backend::GL41Texture::updateSize() const { } // Move content bits from the CPU to the GPU for a given mip / face -void GL41Backend::GL41Texture::transferMip(uint16_t mipLevel, uint8_t face) const { +void GL41Texture::transferMip(uint16_t mipLevel, uint8_t face) const { auto mip = _gpuObject.accessStoredMipFace(mipLevel, face); GLTexelFormat texelFormat = GLTexelFormat::evalGLTexelFormat(_gpuObject.getTexelFormat(), mip->getFormat()); //GLenum target = getFaceTargets()[face]; @@ -140,15 +140,9 @@ void GL41Backend::GL41Texture::transferMip(uint16_t mipLevel, uint8_t face) cons (void)CHECK_GL_ERROR(); } -// This should never happen on the main thread -// Move content bits from the CPU to the GPU -void GL41Backend::GL41Texture::transfer() const { +void GL41Texture::startTransfer() { PROFILE_RANGE(__FUNCTION__); - //qDebug() << "Transferring texture: " << _privateTexture; - // Need to update the content of the GPU object from the source sysmem of the texture - if (_contentStamp >= _gpuObject.getDataStamp()) { - return; - } + Parent::startTransfer(); glBindTexture(_target, _id); (void)CHECK_GL_ERROR(); @@ -175,38 +169,16 @@ void GL41Backend::GL41Texture::transfer() const { glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); glDeleteFramebuffers(1, &fbo); } else { - // GO through the process of allocating the correct storage and/or update the content - switch (_gpuObject.getType()) { - case Texture::TEX_2D: - { - for (uint16_t i = _minMip; i <= _maxMip; ++i) { - if (_gpuObject.isStoredMipFaceAvailable(i)) { - transferMip(i); - } + // transfer pixels from each faces + uint8_t numFaces = (Texture::TEX_CUBE == _gpuObject.getType()) ? CUBE_NUM_FACES : 1; + for (uint8_t f = 0; f < numFaces; f++) { + for (uint16_t i = 0; i < Sampler::MAX_MIP_LEVEL; ++i) { + if (_gpuObject.isStoredMipFaceAvailable(i, f)) { + transferMip(i, f); } } - break; - - case Texture::TEX_CUBE: - // transfer pixels from each faces - for (uint8_t f = 0; f < CUBE_NUM_FACES; f++) { - for (uint16_t i = 0; i < Sampler::MAX_MIP_LEVEL; ++i) { - if (_gpuObject.isStoredMipFaceAvailable(i, f)) { - transferMip(i, f); - } - } - } - break; - - default: - qCWarning(gpugl41logging) << __FUNCTION__ << " case for Texture Type " << _gpuObject.getType() << " not supported"; - break; } } - if (_gpuObject.isAutogenerateMips()) { - glGenerateMipmap(_target); - (void)CHECK_GL_ERROR(); - } } void GL41Backend::GL41Texture::syncSampler() const { diff --git a/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h b/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h index 0d737ef3ba..22e1a87719 100644 --- a/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h +++ b/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h @@ -18,6 +18,29 @@ namespace gpu { namespace gl45 { using namespace gpu::gl; +struct TransferState { + GLTexture& _texture; + GLenum _internalFormat { GL_RGBA8 }; + GLTexelFormat _texelFormat; + uint8_t _face { 0 }; + uint16_t _mipLevel { 0 }; + uint32_t _bytesPerLine { 0 }; + uint32_t _bytesPerPixel { 0 }; + uint32_t _bytesPerPage { 0 }; + GLuint _maxSparseLevel { 0 }; + + uvec3 _mipDimensions; + uvec3 _mipOffset; + uvec3 _pageSize; + const uint8_t* _srcPointer { nullptr }; + uvec3 currentPageSize() const; + void updateSparse(); + void updateMip(); + void populatePage(uint8_t* dest); + bool increment(); + TransferState(GLTexture& texture); +}; + class GL45Backend : public GLBackend { using Parent = GLBackend; // Context Backend static interface required @@ -29,19 +52,26 @@ public: class GL45Texture : public GLTexture { using Parent = GLTexture; - GLuint allocate(const Texture& texture); + static GLuint allocate(const Texture& texture); public: GL45Texture(const std::weak_ptr& backend, const Texture& texture, bool transferrable); GL45Texture(const std::weak_ptr& backend, const Texture& texture, GLTexture* original); + ~GL45Texture(); protected: + void startTransfer() override; + bool continueTransfer() override; + void incrementalTransfer(const uvec3& size, const gpu::Texture::PixelsPointer& mip, std::function f) const; void transferMip(uint16_t mipLevel, uint8_t face = 0) const; + void allocateMip(uint16_t mipLevel, uint8_t face = 0) const; void allocateStorage() const override; void updateSize() const override; - void transfer() const override; +// void transfer() const override; void syncSampler() const override; void generateMips() const override; void withPreservedTexture(std::function f) const override; + + TransferState _transferState; }; diff --git a/libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp b/libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp index d3222b9acf..3b512b3278 100644 --- a/libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp +++ b/libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp @@ -10,8 +10,11 @@ // #include "GL45Backend.h" +#include +#include #include #include +#include #include @@ -21,10 +24,114 @@ using namespace gpu; using namespace gpu::gl; using namespace gpu::gl45; +#define SPARSE_TEXTURES 1 + using GL45Texture = GL45Backend::GL45Texture; +GLTexture* GL45Backend::syncGPUObject(const TexturePointer& texture, bool transfer) { + return GL45Texture::sync(*this, texture, transfer); +} + +void serverWait() { + auto fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + assert(fence); + glWaitSync(fence, 0, GL_TIMEOUT_IGNORED); + glDeleteSync(fence); +} + +void clientWait() { + auto fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + assert(fence); + auto result = glClientWaitSync(fence, GL_SYNC_FLUSH_COMMANDS_BIT, 0); + while (GL_TIMEOUT_EXPIRED == result || GL_WAIT_FAILED == result) { + // Minimum sleep + QThread::usleep(1); + result = glClientWaitSync(fence, 0, 0); + } + glDeleteSync(fence); +} + +TransferState::TransferState(GLTexture& texture) : _texture(texture) { +} + +void TransferState::updateSparse() { + glGetTextureParameterIuiv(_texture._id, GL_NUM_SPARSE_LEVELS_ARB, &_maxSparseLevel); + _internalFormat = gl::GLTexelFormat::evalGLTexelFormat(_texture._gpuObject.getTexelFormat(), _texture._gpuObject.getTexelFormat()).internalFormat; + ivec3 pageSize; + glGetInternalformativ(_texture._target, _internalFormat, GL_VIRTUAL_PAGE_SIZE_X_ARB, 1, &pageSize.x); + glGetInternalformativ(_texture._target, _internalFormat, GL_VIRTUAL_PAGE_SIZE_Y_ARB, 1, &pageSize.y); + glGetInternalformativ(_texture._target, _internalFormat, GL_VIRTUAL_PAGE_SIZE_Z_ARB, 1, &pageSize.z); + _pageSize = uvec3(pageSize); +} + +void TransferState::updateMip() { + _mipDimensions = _texture._gpuObject.evalMipDimensions(_mipLevel); + _mipOffset = uvec3(); + if (!_texture._gpuObject.isStoredMipFaceAvailable(_mipLevel, _face)) { + _srcPointer = nullptr; + return; + } + + auto mip = _texture._gpuObject.accessStoredMipFace(_mipLevel, _face); + _texelFormat = gl::GLTexelFormat::evalGLTexelFormat(_texture._gpuObject.getTexelFormat(), mip->getFormat()); + _srcPointer = mip->readData(); + _bytesPerLine = (uint32_t)mip->getSize() / _mipDimensions.y; + _bytesPerPixel = _bytesPerLine / _mipDimensions.x; +} + +bool TransferState::increment() { + if ((_mipOffset.x + _pageSize.x) < _mipDimensions.x) { + _mipOffset.x += _pageSize.x; + return true; + } + + if ((_mipOffset.y + _pageSize.y) < _mipDimensions.y) { + _mipOffset.x = 0; + _mipOffset.y += _pageSize.y; + return true; + } + + if (_mipOffset.z + _pageSize.z < _mipDimensions.z) { + _mipOffset.x = 0; + _mipOffset.y = 0; + ++_mipOffset.z; + return true; + } + + // Done with this mip?, move on to the next mip + if (_mipLevel + 1 < _texture.usedMipLevels()) { + _mipOffset = uvec3(0); + ++_mipLevel; + updateMip(); + return true; + } + + // Done with this face? Move on to the next + if (_face + 1 < ((_texture._target == GL_TEXTURE_CUBE_MAP) ? GLTexture::CUBE_NUM_FACES : 1)) { + ++_face; + _mipOffset = uvec3(0); + _mipLevel = 0; + updateMip(); + return true; + } + + return false; +} + +void TransferState::populatePage(uint8_t* dst) { + uvec3 pageSize = currentPageSize(); + for (uint32_t y = 0; y < pageSize.y; ++y) { + uint32_t srcOffset = (_bytesPerLine * (_mipOffset.y + y)) + (_bytesPerPixel * _mipOffset.x); + uint32_t dstOffset = (_bytesPerPixel * pageSize.x) * y; + memcpy(dst + dstOffset, _srcPointer + srcOffset, pageSize.x * _bytesPerPixel); + } +} + +uvec3 TransferState::currentPageSize() const { + return glm::clamp(_mipDimensions - _mipOffset, uvec3(1), _pageSize); +} + GLuint GL45Texture::allocate(const Texture& texture) { - Backend::incrementTextureGPUCount(); GLuint result; glCreateTextures(getGLTextureType(texture), 1, &result); return result; @@ -34,26 +141,38 @@ GLuint GL45Backend::getTextureID(const TexturePointer& texture, bool transfer) { return GL45Texture::getId(*this, texture, transfer); } -GLTexture* GL45Backend::syncGPUObject(const TexturePointer& texture, bool transfer) { - return GL45Texture::sync(*this, texture, transfer); +GL45Texture::GL45Texture(const std::weak_ptr& backend, const Texture& texture, bool transferrable) + : GLTexture(backend, texture, allocate(texture), transferrable), _transferState(*this) { + +#if SPARSE_TEXTURES + if (transferrable) { + glTextureParameteri(_id, GL_TEXTURE_SPARSE_ARB, GL_TRUE); + } +#endif } -GL45Backend::GL45Texture::GL45Texture(const std::weak_ptr& backend, const Texture& texture, bool transferrable) - : GLTexture(backend, texture, allocate(texture), transferrable) {} +GL45Texture::GL45Texture(const std::weak_ptr& backend, const Texture& texture, GLTexture* original) + : GLTexture(backend, texture, allocate(texture), original), _transferState(*this) { } -GL45Backend::GL45Texture::GL45Texture(const std::weak_ptr& backend, const Texture& texture, GLTexture* original) - : GLTexture(backend, texture, allocate(texture), original) {} +GL45Texture::~GL45Texture() { + // FIXME do we need to explicitly deallocate the virtual memory here? + //if (_transferrable) { + // for (uint16_t mipLevel = 0; mipLevel < usedMipLevels(); ++i) { + // glTexturePageCommitmentEXT(_id, mipLevel, offset.x, offset.y, offset.z, size.x, size.y, size.z, GL_TRUE); + // } + //} +} -void GL45Backend::GL45Texture::withPreservedTexture(std::function f) const { +void GL45Texture::withPreservedTexture(std::function f) const { f(); } -void GL45Backend::GL45Texture::generateMips() const { +void GL45Texture::generateMips() const { glGenerateTextureMipmap(_id); (void)CHECK_GL_ERROR(); } -void GL45Backend::GL45Texture::allocateStorage() const { +void GL45Texture::allocateStorage() const { GLTexelFormat texelFormat = GLTexelFormat::evalGLTexelFormat(_gpuObject.getTexelFormat()); glTextureParameteri(_id, GL_TEXTURE_BASE_LEVEL, 0); glTextureParameteri(_id, GL_TEXTURE_MAX_LEVEL, _maxMip - _minMip); @@ -66,7 +185,7 @@ void GL45Backend::GL45Texture::allocateStorage() const { (void)CHECK_GL_ERROR(); } -void GL45Backend::GL45Texture::updateSize() const { +void GL45Texture::updateSize() const { setSize(_virtualSize); if (!_id) { return; @@ -77,86 +196,50 @@ void GL45Backend::GL45Texture::updateSize() const { } } -// Move content bits from the CPU to the GPU for a given mip / face -void GL45Backend::GL45Texture::transferMip(uint16_t mipLevel, uint8_t face) const { - auto mip = _gpuObject.accessStoredMipFace(mipLevel, face); - GLTexelFormat texelFormat = GLTexelFormat::evalGLTexelFormat(_gpuObject.getTexelFormat(), mip->getFormat()); - auto size = _gpuObject.evalMipDimensions(mipLevel); - if (GL_TEXTURE_2D == _target) { - glTextureSubImage2D(_id, mipLevel, 0, 0, size.x, size.y, texelFormat.format, texelFormat.type, mip->readData()); - } else if (GL_TEXTURE_CUBE_MAP == _target) { - // DSA ARB does not work on AMD, so use EXT - // glTextureSubImage3D(_id, mipLevel, 0, 0, face, size.x, size.y, 1, texelFormat.format, texelFormat.type, mip->readData()); - auto target = CUBE_FACE_LAYOUT[face]; - glTextureSubImage2DEXT(_id, target, mipLevel, 0, 0, size.x, size.y, texelFormat.format, texelFormat.type, mip->readData()); - } else { - Q_ASSERT(false); - } - (void)CHECK_GL_ERROR(); +void GL45Texture::startTransfer() { + Parent::startTransfer(); + _transferState.updateSparse(); + _transferState.updateMip(); } -// This should never happen on the main thread -// Move content bits from the CPU to the GPU -void GL45Backend::GL45Texture::transfer() const { - PROFILE_RANGE(__FUNCTION__); - //qDebug() << "Transferring texture: " << _privateTexture; - // Need to update the content of the GPU object from the source sysmem of the texture - if (_contentStamp >= _gpuObject.getDataStamp()) { - return; +bool GL45Texture::continueTransfer() { + static std::vector buffer; + if (buffer.empty()) { + buffer.resize(1024 * 1024); } + uvec3 pageSize = _transferState.currentPageSize(); + uvec3 offset = _transferState._mipOffset; - if (_downsampleSource._texture) { - GLuint fbo { 0 }; - glCreateFramebuffers(1, &fbo); - glBindFramebuffer(GL_READ_FRAMEBUFFER, fbo); - // Find the distance between the old min mip and the new one - uint16 mipOffset = _minMip - _downsampleSource._minMip; - for (uint16 i = _minMip; i <= _maxMip; ++i) { - uint16 targetMip = i - _minMip; - uint16 sourceMip = targetMip + mipOffset; - Vec3u dimensions = _gpuObject.evalMipDimensions(i); - for (GLenum target : getFaceTargets(_target)) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, target, _downsampleSource._texture, sourceMip); - (void)CHECK_GL_ERROR(); - glCopyTextureSubImage2D(_id, targetMip, 0, 0, 0, 0, dimensions.x, dimensions.y); - (void)CHECK_GL_ERROR(); - } - } - glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); - glDeleteFramebuffers(1, &fbo); - } else { - // GO through the process of allocating the correct storage and/or update the content - switch (_gpuObject.getType()) { - case Texture::TEX_2D: - { - for (uint16_t i = _minMip; i <= _maxMip; ++i) { - if (_gpuObject.isStoredMipFaceAvailable(i)) { - transferMip(i); - } - } - } - break; +#if SPARSE_TEXTURES + if (_transferState._mipLevel <= _transferState._maxSparseLevel) { + glTexturePageCommitmentEXT(_id, _transferState._mipLevel, + offset.x, offset.y, _transferState._face, + pageSize.x, pageSize.y, pageSize.z, + GL_TRUE); + } +#endif - case Texture::TEX_CUBE: - // transfer pixels from each faces - for (uint8_t f = 0; f < CUBE_NUM_FACES; f++) { - for (uint16_t i = 0; i < Sampler::MAX_MIP_LEVEL; ++i) { - if (_gpuObject.isStoredMipFaceAvailable(i, f)) { - transferMip(i, f); - } - } - } - break; - - default: - qCWarning(gpugl45logging) << __FUNCTION__ << " case for Texture Type " << _gpuObject.getType() << " not supported"; - break; + if (_transferState._srcPointer) { + // Transfer the mip data + _transferState.populatePage(&buffer[0]); + if (GL_TEXTURE_2D == _target) { + glTextureSubImage2D(_id, _transferState._mipLevel, + offset.x, offset.y, + pageSize.x, pageSize.y, + _transferState._texelFormat.format, _transferState._texelFormat.type, &buffer[0]); + } else if (GL_TEXTURE_CUBE_MAP == _target) { + auto target = CUBE_FACE_LAYOUT[_transferState._face]; + // DSA ARB does not work on AMD, so use EXT + // glTextureSubImage3D(_id, mipLevel, 0, 0, face, size.x, size.y, 1, texelFormat.format, texelFormat.type, mip->readData()); + glTextureSubImage2DEXT(_id, target, _transferState._mipLevel, + offset.x, offset.y, + pageSize.x, pageSize.y, + _transferState._texelFormat.format, _transferState._texelFormat.type, &buffer[0]); } } - if (_gpuObject.isAutogenerateMips()) { - glGenerateTextureMipmap(_id); - (void)CHECK_GL_ERROR(); - } + + serverWait(); + return _transferState.increment(); } void GL45Backend::GL45Texture::syncSampler() const { From a9ba92f16c3ca325de870892615747ea48aa490a Mon Sep 17 00:00:00 2001 From: Brad Davis Date: Tue, 6 Sep 2016 13:22:27 -0700 Subject: [PATCH 2/7] Fix alignment of small pages --- libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp b/libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp index 3b512b3278..ce359ad6c2 100644 --- a/libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp +++ b/libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp @@ -118,11 +118,17 @@ bool TransferState::increment() { return false; } +#define DEFAULT_GL_PIXEL_ALIGNMENT 4 void TransferState::populatePage(uint8_t* dst) { uvec3 pageSize = currentPageSize(); + auto bytesPerPageLine = _bytesPerPixel * pageSize.x; + if (0 != (bytesPerPageLine % DEFAULT_GL_PIXEL_ALIGNMENT)) { + bytesPerPageLine += DEFAULT_GL_PIXEL_ALIGNMENT - (bytesPerPageLine % DEFAULT_GL_PIXEL_ALIGNMENT); + assert(0 == (bytesPerPageLine % DEFAULT_GL_PIXEL_ALIGNMENT)); + } for (uint32_t y = 0; y < pageSize.y; ++y) { uint32_t srcOffset = (_bytesPerLine * (_mipOffset.y + y)) + (_bytesPerPixel * _mipOffset.x); - uint32_t dstOffset = (_bytesPerPixel * pageSize.x) * y; + uint32_t dstOffset = bytesPerPageLine * y; memcpy(dst + dstOffset, _srcPointer + srcOffset, pageSize.x * _bytesPerPixel); } } From e03b0b5825ed8d0309c8b9aacfe00d97adeb44d2 Mon Sep 17 00:00:00 2001 From: Brad Davis Date: Wed, 7 Sep 2016 12:45:44 -0700 Subject: [PATCH 3/7] Cleanup dead code --- .../gpu-gl/src/gpu/gl/GLTextureTransfer.h | 24 ------------------- libraries/gpu-gl/src/gpu/gl45/GL45Backend.h | 1 - 2 files changed, 25 deletions(-) diff --git a/libraries/gpu-gl/src/gpu/gl/GLTextureTransfer.h b/libraries/gpu-gl/src/gpu/gl/GLTextureTransfer.h index f88dddc5ff..d2207df7e6 100644 --- a/libraries/gpu-gl/src/gpu/gl/GLTextureTransfer.h +++ b/libraries/gpu-gl/src/gpu/gl/GLTextureTransfer.h @@ -23,30 +23,6 @@ namespace gpu { namespace gl { -using VoidLambda = std::function; - -#if 0 -struct TextureTransferPackage { - std::weak_ptr texture; - GLsync fence; -}; - -struct TextureTransferBlock { - GLuint _pbo { 0 }; - void* _mapped { nullptr }; - GLsync _fence; - std::function _transferCallback; - bool isSignaled(); - void transfer(); -}; - -using CommandQueue = std::list; -struct FencedLambda { - GLsync _fence { 0 }; - VoidLambda _callback; -}; -#endif - using TextureList = std::list; using TextureListIterator = TextureList::iterator; diff --git a/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h b/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h index 22e1a87719..eab1aa07d8 100644 --- a/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h +++ b/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h @@ -66,7 +66,6 @@ public: void allocateMip(uint16_t mipLevel, uint8_t face = 0) const; void allocateStorage() const override; void updateSize() const override; -// void transfer() const override; void syncSampler() const override; void generateMips() const override; void withPreservedTexture(std::function f) const override; From bc6b43b7bea23c9581175f4eca00794b5ff25e7e Mon Sep 17 00:00:00 2001 From: Brad Davis Date: Thu, 8 Sep 2016 11:20:37 -0700 Subject: [PATCH 4/7] Add debug option to lower texture resolution --- interface/src/Application.cpp | 2 +- interface/src/Menu.cpp | 10 ++++++++++ libraries/model/src/model/TextureMap.cpp | 23 +++++++++++++++-------- 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/interface/src/Application.cpp b/interface/src/Application.cpp index 904a6c5b65..f475bf18e2 100644 --- a/interface/src/Application.cpp +++ b/interface/src/Application.cpp @@ -2006,7 +2006,7 @@ void Application::resizeGL() { static qreal lastDevicePixelRatio = 0; qreal devicePixelRatio = _window->devicePixelRatio(); if (offscreenUi->size() != fromGlm(uiSize) || devicePixelRatio != lastDevicePixelRatio) { - qDebug() << "Device pixel ratio changed, triggering resize"; + qDebug() << "Device pixel ratio changed, triggering resize to " << uiSize; offscreenUi->resize(fromGlm(uiSize), true); _offscreenContext->makeCurrent(); lastDevicePixelRatio = devicePixelRatio; diff --git a/interface/src/Menu.cpp b/interface/src/Menu.cpp index d3caa4a092..08abbf63d2 100644 --- a/interface/src/Menu.cpp +++ b/interface/src/Menu.cpp @@ -47,6 +47,8 @@ #include "Menu.h" +extern bool DEV_DECIMATE_TEXTURES; + Menu* Menu::getInstance() { return dynamic_cast(qApp->getWindow()->menuBar()); } @@ -390,6 +392,14 @@ Menu::Menu() { // Developer > Render > LOD Tools addActionToQMenuAndActionHash(renderOptionsMenu, MenuOption::LodTools, 0, dialogsManager.data(), SLOT(lodTools())); + // HACK enable texture decimation + { + auto action = addCheckableActionToQMenuAndActionHash(renderOptionsMenu, "Decimate Textures"); + connect(action, &QAction::triggered, [&](bool checked) { + DEV_DECIMATE_TEXTURES = checked; + }); + } + // Developer > Assets >>> MenuWrapper* assetDeveloperMenu = developerMenu->addMenu("Assets"); auto& atpMigrator = ATPAssetMigrator::getInstance(); diff --git a/libraries/model/src/model/TextureMap.cpp b/libraries/model/src/model/TextureMap.cpp index 754862aa4a..9345124d54 100755 --- a/libraries/model/src/model/TextureMap.cpp +++ b/libraries/model/src/model/TextureMap.cpp @@ -22,6 +22,13 @@ using namespace gpu; // FIXME: Declare this to enable compression //#define COMPRESS_TEXTURES +bool DEV_DECIMATE_TEXTURES = false; +QImage processSourceImage(const QImage& srcImage) { + if (DEV_DECIMATE_TEXTURES) { + return srcImage.scaled(srcImage.size() * 0.5f); + } + return srcImage; +} void TextureMap::setTextureSource(TextureSourcePointer& textureSource) { _textureSource = textureSource; @@ -53,7 +60,7 @@ void TextureMap::setLightmapOffsetScale(float offset, float scale) { } const QImage TextureUsage::process2DImageColor(const QImage& srcImage, bool& validAlpha, bool& alphaAsMask) { - QImage image = srcImage; + QImage image = processSourceImage(srcImage); validAlpha = false; alphaAsMask = true; const uint8 OPAQUE_ALPHA = 255; @@ -221,7 +228,7 @@ gpu::Texture* TextureUsage::createLightmapTextureFromImage(const QImage& srcImag gpu::Texture* TextureUsage::createNormalTextureFromNormalImage(const QImage& srcImage, const std::string& srcImageName) { - QImage image = srcImage; + QImage image = processSourceImage(srcImage); if (image.format() != QImage::Format_RGB888) { image = image.convertToFormat(QImage::Format_RGB888); @@ -254,8 +261,8 @@ double mapComponent(double sobelValue) { } gpu::Texture* TextureUsage::createNormalTextureFromBumpImage(const QImage& srcImage, const std::string& srcImageName) { - QImage image = srcImage; - + QImage image = processSourceImage(srcImage); + if (image.format() != QImage::Format_RGB888) { image = image.convertToFormat(QImage::Format_RGB888); } @@ -325,7 +332,7 @@ gpu::Texture* TextureUsage::createNormalTextureFromBumpImage(const QImage& srcIm } gpu::Texture* TextureUsage::createRoughnessTextureFromImage(const QImage& srcImage, const std::string& srcImageName) { - QImage image = srcImage; + QImage image = processSourceImage(srcImage); if (!image.hasAlphaChannel()) { if (image.format() != QImage::Format_RGB888) { image = image.convertToFormat(QImage::Format_RGB888); @@ -358,7 +365,7 @@ gpu::Texture* TextureUsage::createRoughnessTextureFromImage(const QImage& srcIma } gpu::Texture* TextureUsage::createRoughnessTextureFromGlossImage(const QImage& srcImage, const std::string& srcImageName) { - QImage image = srcImage; + QImage image = processSourceImage(srcImage); if (!image.hasAlphaChannel()) { if (image.format() != QImage::Format_RGB888) { image = image.convertToFormat(QImage::Format_RGB888); @@ -395,7 +402,7 @@ gpu::Texture* TextureUsage::createRoughnessTextureFromGlossImage(const QImage& s } gpu::Texture* TextureUsage::createMetallicTextureFromImage(const QImage& srcImage, const std::string& srcImageName) { - QImage image = srcImage; + QImage image = processSourceImage(srcImage); if (!image.hasAlphaChannel()) { if (image.format() != QImage::Format_RGB888) { image = image.convertToFormat(QImage::Format_RGB888); @@ -687,7 +694,7 @@ const int CubeLayout::NUM_CUBEMAP_LAYOUTS = sizeof(CubeLayout::CUBEMAP_LAYOUTS) gpu::Texture* TextureUsage::processCubeTextureColorFromImage(const QImage& srcImage, const std::string& srcImageName, bool isLinear, bool doCompress, bool generateMips, bool generateIrradiance) { gpu::Texture* theTexture = nullptr; if ((srcImage.width() > 0) && (srcImage.height() > 0)) { - QImage image = srcImage; + QImage image = processSourceImage(srcImage); if (image.format() != QImage::Format_RGB888) { image = image.convertToFormat(QImage::Format_RGB888); } From a13450b36f6a5dfe3121ad344aab4ff598c0c4ab Mon Sep 17 00:00:00 2001 From: Brad Davis Date: Thu, 8 Sep 2016 22:25:04 -0700 Subject: [PATCH 5/7] Fix ubuntu warning --- libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp b/libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp index ce359ad6c2..db8186221c 100644 --- a/libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp +++ b/libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp @@ -106,8 +106,10 @@ bool TransferState::increment() { return true; } + uint8_t maxFace = (uint8_t)((_texture._target == GL_TEXTURE_CUBE_MAP) ? GLTexture::CUBE_NUM_FACES : 1); + uint8_t nextFace = _face + 1; // Done with this face? Move on to the next - if (_face + 1 < ((_texture._target == GL_TEXTURE_CUBE_MAP) ? GLTexture::CUBE_NUM_FACES : 1)) { + if (nextFace < maxFace) { ++_face; _mipOffset = uvec3(0); _mipLevel = 0; From 27ddd39a22058b28cf057ed5aae9433278670b81 Mon Sep 17 00:00:00 2001 From: Brad Davis Date: Fri, 9 Sep 2016 09:30:38 -0700 Subject: [PATCH 6/7] PR comments --- libraries/gpu-gl/src/gpu/gl/GLShared.cpp | 21 +++++++++++++++++++ libraries/gpu-gl/src/gpu/gl/GLShared.h | 6 ++++++ .../gpu-gl/src/gpu/gl/GLTextureTransfer.cpp | 4 ++-- .../src/gpu/gl45/GL45BackendTexture.cpp | 19 ----------------- 4 files changed, 29 insertions(+), 21 deletions(-) diff --git a/libraries/gpu-gl/src/gpu/gl/GLShared.cpp b/libraries/gpu-gl/src/gpu/gl/GLShared.cpp index fd6857c4c0..d59be0d9de 100644 --- a/libraries/gpu-gl/src/gpu/gl/GLShared.cpp +++ b/libraries/gpu-gl/src/gpu/gl/GLShared.cpp @@ -9,6 +9,8 @@ #include +#include + #include #include #include @@ -933,9 +935,28 @@ void makeProgramBindings(ShaderObject& shaderObject) { (void)CHECK_GL_ERROR(); } +void serverWait() { + auto fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + assert(fence); + glWaitSync(fence, 0, GL_TIMEOUT_IGNORED); + glDeleteSync(fence); +} + +void clientWait() { + auto fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + assert(fence); + auto result = glClientWaitSync(fence, GL_SYNC_FLUSH_COMMANDS_BIT, 0); + while (GL_TIMEOUT_EXPIRED == result || GL_WAIT_FAILED == result) { + // Minimum sleep + QThread::usleep(1); + result = glClientWaitSync(fence, 0, 0); + } + glDeleteSync(fence); +} } } + using namespace gpu; diff --git a/libraries/gpu-gl/src/gpu/gl/GLShared.h b/libraries/gpu-gl/src/gpu/gl/GLShared.h index 676d3910ff..7ec6deeb9d 100644 --- a/libraries/gpu-gl/src/gpu/gl/GLShared.h +++ b/libraries/gpu-gl/src/gpu/gl/GLShared.h @@ -18,6 +18,12 @@ Q_DECLARE_LOGGING_CATEGORY(gpugllogging) namespace gpu { namespace gl { +// Create a fence and inject a GPU wait on the fence +void serverWait(); + +// Create a fence and synchronously wait on the fence +void clientWait(); + gpu::Size getDedicatedMemory(); ComparisonFunction comparisonFuncFromGL(GLenum func); State::StencilOp stencilOpFromGL(GLenum stencilOp); diff --git a/libraries/gpu-gl/src/gpu/gl/GLTextureTransfer.cpp b/libraries/gpu-gl/src/gpu/gl/GLTextureTransfer.cpp index 0f64ea1182..ae8739bb3b 100644 --- a/libraries/gpu-gl/src/gpu/gl/GLTextureTransfer.cpp +++ b/libraries/gpu-gl/src/gpu/gl/GLTextureTransfer.cpp @@ -146,7 +146,7 @@ bool GLTextureTransferHelper::process() { gltexture->finishTransfer(); glNamedFramebufferTexture(_readFramebuffer, GL_COLOR_ATTACHMENT0, gltexture->_id, 0); glBlitNamedFramebuffer(_readFramebuffer, _drawFramebuffer, 0, 0, 1, 1, 0, 0, 1, 1, GL_COLOR_BUFFER_BIT, GL_NEAREST); - glFinish(); + clientWait(); gltexture->_contentStamp = gltexture->_gpuObject.getDataStamp(); gltexture->updateSize(); gltexture->setSyncState(gpu::gl::GLSyncState::Transferred); @@ -161,7 +161,7 @@ bool GLTextureTransferHelper::process() { if (!_transferringTextures.empty()) { // Don't saturate the GPU - glFinish(); + clientWait(); } else { // Don't saturate the CPU QThread::msleep(1); diff --git a/libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp b/libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp index db8186221c..871a2c8a03 100644 --- a/libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp +++ b/libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp @@ -32,25 +32,6 @@ GLTexture* GL45Backend::syncGPUObject(const TexturePointer& texture, bool transf return GL45Texture::sync(*this, texture, transfer); } -void serverWait() { - auto fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); - assert(fence); - glWaitSync(fence, 0, GL_TIMEOUT_IGNORED); - glDeleteSync(fence); -} - -void clientWait() { - auto fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); - assert(fence); - auto result = glClientWaitSync(fence, GL_SYNC_FLUSH_COMMANDS_BIT, 0); - while (GL_TIMEOUT_EXPIRED == result || GL_WAIT_FAILED == result) { - // Minimum sleep - QThread::usleep(1); - result = glClientWaitSync(fence, 0, 0); - } - glDeleteSync(fence); -} - TransferState::TransferState(GLTexture& texture) : _texture(texture) { } From 017181f0203b24b5bdc6ef4307f240daaf6bcfb9 Mon Sep 17 00:00:00 2001 From: Brad Davis Date: Fri, 9 Sep 2016 09:46:54 -0700 Subject: [PATCH 7/7] Remove magic numbers, ensure proper buffer size for page transfers --- libraries/gpu-gl/src/gpu/gl45/GL45Backend.h | 2 +- .../gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp | 14 +++++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h b/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h index eab1aa07d8..db297e77fd 100644 --- a/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h +++ b/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h @@ -36,7 +36,7 @@ struct TransferState { uvec3 currentPageSize() const; void updateSparse(); void updateMip(); - void populatePage(uint8_t* dest); + void populatePage(std::vector& dest); bool increment(); TransferState(GLTexture& texture); }; diff --git a/libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp b/libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp index 871a2c8a03..b511ed7811 100644 --- a/libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp +++ b/libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp @@ -26,6 +26,9 @@ using namespace gpu::gl45; #define SPARSE_TEXTURES 1 +// Allocate 1 MB of buffer space for paged transfers +#define DEFAULT_PAGE_BUFFER_SIZE (1024*1024) + using GL45Texture = GL45Backend::GL45Texture; GLTexture* GL45Backend::syncGPUObject(const TexturePointer& texture, bool transfer) { @@ -102,13 +105,18 @@ bool TransferState::increment() { } #define DEFAULT_GL_PIXEL_ALIGNMENT 4 -void TransferState::populatePage(uint8_t* dst) { +void TransferState::populatePage(std::vector& buffer) { uvec3 pageSize = currentPageSize(); auto bytesPerPageLine = _bytesPerPixel * pageSize.x; if (0 != (bytesPerPageLine % DEFAULT_GL_PIXEL_ALIGNMENT)) { bytesPerPageLine += DEFAULT_GL_PIXEL_ALIGNMENT - (bytesPerPageLine % DEFAULT_GL_PIXEL_ALIGNMENT); assert(0 == (bytesPerPageLine % DEFAULT_GL_PIXEL_ALIGNMENT)); } + auto totalPageSize = bytesPerPageLine * pageSize.y; + if (totalPageSize > buffer.size()) { + buffer.resize(totalPageSize); + } + uint8_t* dst = &buffer[0]; for (uint32_t y = 0; y < pageSize.y; ++y) { uint32_t srcOffset = (_bytesPerLine * (_mipOffset.y + y)) + (_bytesPerPixel * _mipOffset.x); uint32_t dstOffset = bytesPerPageLine * y; @@ -194,7 +202,7 @@ void GL45Texture::startTransfer() { bool GL45Texture::continueTransfer() { static std::vector buffer; if (buffer.empty()) { - buffer.resize(1024 * 1024); + buffer.resize(DEFAULT_PAGE_BUFFER_SIZE); } uvec3 pageSize = _transferState.currentPageSize(); uvec3 offset = _transferState._mipOffset; @@ -210,7 +218,7 @@ bool GL45Texture::continueTransfer() { if (_transferState._srcPointer) { // Transfer the mip data - _transferState.populatePage(&buffer[0]); + _transferState.populatePage(buffer); if (GL_TEXTURE_2D == _target) { glTextureSubImage2D(_id, _transferState._mipLevel, offset.x, offset.y,