diff --git a/libraries/gpu-gl/CMakeLists.txt b/libraries/gpu-gl/CMakeLists.txt index 320f9b3c71..65df5ed9dc 100644 --- a/libraries/gpu-gl/CMakeLists.txt +++ b/libraries/gpu-gl/CMakeLists.txt @@ -4,6 +4,7 @@ link_hifi_libraries(shared gl gpu) GroupSources("src") target_opengl() +target_nsight() if (NOT ANDROID) target_glew() diff --git a/libraries/gpu-gl/src/gpu/gl/GLBackend.cpp b/libraries/gpu-gl/src/gpu/gl/GLBackend.cpp index 7c369f4124..98a073e283 100644 --- a/libraries/gpu-gl/src/gpu/gl/GLBackend.cpp +++ b/libraries/gpu-gl/src/gpu/gl/GLBackend.cpp @@ -315,7 +315,6 @@ void GLBackend::render(const Batch& batch) { void GLBackend::syncCache() { - recycle(); syncTransformStateCache(); syncPipelineStateCache(); syncInputStateCache(); diff --git a/libraries/gpu-gl/src/gpu/gl/GLTexture.cpp b/libraries/gpu-gl/src/gpu/gl/GLTexture.cpp index d90ca3bbd6..255258d762 100644 --- a/libraries/gpu-gl/src/gpu/gl/GLTexture.cpp +++ b/libraries/gpu-gl/src/gpu/gl/GLTexture.cpp @@ -292,3 +292,14 @@ void GLTexture::postTransfer() { void GLTexture::initTextureTransferHelper() { _textureTransferHelper = std::make_shared(); } + +void GLTexture::startTransfer() { + createTexture(); +} + +void GLTexture::finishTransfer() { + if (_gpuObject.isAutogenerateMips()) { + generateMips(); + } +} + diff --git a/libraries/gpu-gl/src/gpu/gl/GLTexture.h b/libraries/gpu-gl/src/gpu/gl/GLTexture.h index 4f67039aa8..742b223e36 100644 --- a/libraries/gpu-gl/src/gpu/gl/GLTexture.h +++ b/libraries/gpu-gl/src/gpu/gl/GLTexture.h @@ -11,6 +11,7 @@ #include "GLShared.h" #include "GLTextureTransfer.h" #include "GLBackend.h" +#include "GLTexelFormat.h" namespace gpu { namespace gl { @@ -19,6 +20,7 @@ struct GLFilterMode { GLint magFilter; }; + class GLTexture : public GLObject { public: static const uint16_t INVALID_MIP { (uint16_t)-1 }; @@ -162,11 +164,13 @@ public: bool isOverMaxMemory() const; -protected: + uint16 usedMipLevels() const { return (_maxMip - _minMip) + 1; } + static const size_t CUBE_NUM_FACES = 6; static const GLenum CUBE_FACE_LAYOUT[6]; static const GLFilterMode FILTER_MODES[Sampler::NUM_FILTERS]; static const GLenum WRAP_MODES[Sampler::NUM_WRAP_MODES]; +protected: static const std::vector& getFaceTargets(GLenum textureType); @@ -185,13 +189,11 @@ protected: GLTexture(const std::weak_ptr& backend, const Texture& texture, GLuint id, GLTexture* originalTexture); void setSyncState(GLSyncState syncState) { _syncState = syncState; } - uint16 usedMipLevels() const { return (_maxMip - _minMip) + 1; } void createTexture(); virtual void allocateStorage() const = 0; virtual void updateSize() const = 0; - virtual void transfer() const = 0; virtual void syncSampler() const = 0; virtual void generateMips() const = 0; virtual void withPreservedTexture(std::function f) const = 0; @@ -199,6 +201,11 @@ protected: protected: void setSize(GLuint size) const; + virtual void startTransfer(); + // Returns true if this is the last block required to complete transfer + virtual bool continueTransfer() { return false; } + virtual void finishTransfer(); + private: GLTexture(const std::weak_ptr& backend, const gpu::Texture& gpuTexture, GLuint id, GLTexture* originalTexture, bool transferrable); diff --git a/libraries/gpu-gl/src/gpu/gl/GLTextureTransfer.cpp b/libraries/gpu-gl/src/gpu/gl/GLTextureTransfer.cpp index 1d22ae7a52..0f64ea1182 100644 --- a/libraries/gpu-gl/src/gpu/gl/GLTextureTransfer.cpp +++ b/libraries/gpu-gl/src/gpu/gl/GLTextureTransfer.cpp @@ -13,6 +13,18 @@ #include "GLShared.h" #include "GLTexture.h" +#ifdef HAVE_NSIGHT +#include "nvToolsExt.h" +std::unordered_map _map; +#endif + +//#define TEXTURE_TRANSFER_PBOS + +#ifdef TEXTURE_TRANSFER_PBOS +#define TEXTURE_TRANSFER_BLOCK_SIZE (64 * 1024) +#define TEXTURE_TRANSFER_PBO_COUNT 128 +#endif + using namespace gpu; using namespace gpu::gl; @@ -36,82 +48,126 @@ GLTextureTransferHelper::~GLTextureTransferHelper() { void GLTextureTransferHelper::transferTexture(const gpu::TexturePointer& texturePointer) { GLTexture* object = Backend::getGPUObject(*texturePointer); - Backend::incrementTextureGPUTransferCount(); -#ifdef THREADED_TEXTURE_TRANSFER - GLsync fence { 0 }; - //fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); - //glFlush(); - TextureTransferPackage package { texturePointer, fence }; +#ifdef THREADED_TEXTURE_TRANSFER + Backend::incrementTextureGPUTransferCount(); object->setSyncState(GLSyncState::Pending); - queueItem(package); + Lock lock(_mutex); + _pendingTextures.push_back(texturePointer); #else - object->withPreservedTexture([&] { - do_transfer(*object); - }); + for (object->startTransfer(); object->continueTransfer(); ) { } + object->finishTransfer(); object->_contentStamp = texturePointer->getDataStamp(); object->setSyncState(GLSyncState::Transferred); #endif } void GLTextureTransferHelper::setup() { +#ifdef THREADED_TEXTURE_TRANSFER + _context.makeCurrent(); + glCreateRenderbuffers(1, &_drawRenderbuffer); + glNamedRenderbufferStorage(_drawRenderbuffer, GL_RGBA8, 128, 128); + glCreateFramebuffers(1, &_drawFramebuffer); + glNamedFramebufferRenderbuffer(_drawFramebuffer, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, _drawRenderbuffer); + glCreateFramebuffers(1, &_readFramebuffer); +#ifdef TEXTURE_TRANSFER_PBOS + std::array pbos; + glCreateBuffers(TEXTURE_TRANSFER_PBO_COUNT, &pbos[0]); + for (uint32_t i = 0; i < TEXTURE_TRANSFER_PBO_COUNT; ++i) { + TextureTransferBlock newBlock; + newBlock._pbo = pbos[i]; + glNamedBufferStorage(newBlock._pbo, TEXTURE_TRANSFER_BLOCK_SIZE, 0, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT); + newBlock._mapped = glMapNamedBufferRange(newBlock._pbo, 0, TEXTURE_TRANSFER_BLOCK_SIZE, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT); + _readyQueue.push(newBlock); + } +#endif +#endif } void GLTextureTransferHelper::shutdown() { -} - -void GLTextureTransferHelper::do_transfer(GLTexture& texture) { - texture.createTexture(); - texture.transfer(); - texture.updateSize(); - Backend::decrementTextureGPUTransferCount(); -} - -bool GLTextureTransferHelper::processQueueItems(const Queue& messages) { #ifdef THREADED_TEXTURE_TRANSFER _context.makeCurrent(); + + glNamedFramebufferRenderbuffer(_drawFramebuffer, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, 0); + glDeleteFramebuffers(1, &_drawFramebuffer); + _drawFramebuffer = 0; + glDeleteFramebuffers(1, &_readFramebuffer); + _readFramebuffer = 0; + + glNamedFramebufferTexture(_readFramebuffer, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0); + glDeleteRenderbuffers(1, &_drawRenderbuffer); + _drawRenderbuffer = 0; #endif - for (auto package : messages) { - TexturePointer texturePointer = package.texture.lock(); - // Texture no longer exists, move on to the next - if (!texturePointer) { +} + +bool GLTextureTransferHelper::process() { +#ifdef THREADED_TEXTURE_TRANSFER + // Take any new textures off the queue + TextureList newTransferTextures; + { + Lock lock(_mutex); + newTransferTextures.swap(_pendingTextures); + } + + if (!newTransferTextures.empty()) { + for (auto& texturePointer : newTransferTextures) { +#ifdef HAVE_NSIGHT + _map[texturePointer] = nvtxRangeStart("TextureTansfer"); +#endif + GLTexture* object = Backend::getGPUObject(*texturePointer); + object->startTransfer(); + _transferringTextures.push_back(texturePointer); + _textureIterator = _transferringTextures.begin(); + } + } + + // No transfers in progress, sleep + if (_transferringTextures.empty()) { + QThread::usleep(1); + return true; + } + + static auto lastReport = usecTimestampNow(); + auto now = usecTimestampNow(); + auto lastReportInterval = now - lastReport; + if (lastReportInterval > USECS_PER_SECOND * 4) { + lastReport = now; + qDebug() << "Texture list " << _transferringTextures.size(); + } + + for (auto _textureIterator = _transferringTextures.begin(); _textureIterator != _transferringTextures.end();) { + auto texture = *_textureIterator; + GLTexture* gltexture = Backend::getGPUObject(*texture); + if (gltexture->continueTransfer()) { + ++_textureIterator; continue; } - if (package.fence) { - auto result = glClientWaitSync(package.fence, 0, 0); - while (GL_TIMEOUT_EXPIRED == result || GL_WAIT_FAILED == result) { - // Minimum sleep - QThread::usleep(1); - result = glClientWaitSync(package.fence, 0, 0); - } - assert(GL_CONDITION_SATISFIED == result || GL_ALREADY_SIGNALED == result); - glDeleteSync(package.fence); - package.fence = 0; - } - - GLTexture* object = Backend::getGPUObject(*texturePointer); - - do_transfer(*object); - glBindTexture(object->_target, 0); - - { - auto fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); - assert(fence); - auto result = glClientWaitSync(fence, GL_SYNC_FLUSH_COMMANDS_BIT, 0); - while (GL_TIMEOUT_EXPIRED == result || GL_WAIT_FAILED == result) { - // Minimum sleep - QThread::usleep(1); - result = glClientWaitSync(fence, 0, 0); - } - glDeleteSync(fence); - } - - object->_contentStamp = texturePointer->getDataStamp(); - object->setSyncState(GLSyncState::Transferred); + gltexture->finishTransfer(); + glNamedFramebufferTexture(_readFramebuffer, GL_COLOR_ATTACHMENT0, gltexture->_id, 0); + glBlitNamedFramebuffer(_readFramebuffer, _drawFramebuffer, 0, 0, 1, 1, 0, 0, 1, 1, GL_COLOR_BUFFER_BIT, GL_NEAREST); + glFinish(); + gltexture->_contentStamp = gltexture->_gpuObject.getDataStamp(); + gltexture->updateSize(); + gltexture->setSyncState(gpu::gl::GLSyncState::Transferred); + Backend::decrementTextureGPUTransferCount(); +#ifdef HAVE_NSIGHT + // Mark the texture as transferred + nvtxRangeEnd(_map[texture]); + _map.erase(texture); +#endif + _textureIterator = _transferringTextures.erase(_textureIterator); } -#ifdef THREADED_TEXTURE_TRANSFER - _context.doneCurrent(); + + if (!_transferringTextures.empty()) { + // Don't saturate the GPU + glFinish(); + } else { + // Don't saturate the CPU + QThread::msleep(1); + } +#else + QThread::msleep(1); #endif return true; } diff --git a/libraries/gpu-gl/src/gpu/gl/GLTextureTransfer.h b/libraries/gpu-gl/src/gpu/gl/GLTextureTransfer.h index e64e204af3..f88dddc5ff 100644 --- a/libraries/gpu-gl/src/gpu/gl/GLTextureTransfer.h +++ b/libraries/gpu-gl/src/gpu/gl/GLTextureTransfer.h @@ -23,27 +23,62 @@ namespace gpu { namespace gl { +using VoidLambda = std::function; + +#if 0 struct TextureTransferPackage { std::weak_ptr texture; GLsync fence; }; -class GLTextureTransferHelper : public GenericQueueThread { +struct TextureTransferBlock { + GLuint _pbo { 0 }; + void* _mapped { nullptr }; + GLsync _fence; + std::function _transferCallback; + bool isSignaled(); + void transfer(); +}; + +using CommandQueue = std::list; +struct FencedLambda { + GLsync _fence { 0 }; + VoidLambda _callback; +}; +#endif + +using TextureList = std::list; +using TextureListIterator = TextureList::iterator; + +class GLTextureTransferHelper : public GenericThread { public: using Pointer = std::shared_ptr; GLTextureTransferHelper(); ~GLTextureTransferHelper(); void transferTexture(const gpu::TexturePointer& texturePointer); - void postTransfer(const gpu::TexturePointer& texturePointer); protected: void setup() override; void shutdown() override; - bool processQueueItems(const Queue& messages) override; - void do_transfer(GLTexture& texturePointer); + bool process() override; private: +#ifdef THREADED_TEXTURE_TRANSFER ::gl::OffscreenContext _context; + // A mutex for protecting items access on the render and transfer threads + Mutex _mutex; + // Textures that have been submitted for transfer + TextureList _pendingTextures; + // Textures currently in the transfer process + // Only used on the transfer thread + TextureList _transferringTextures; + TextureListIterator _textureIterator; + + // Framebuffers / renderbuffers for forcing access to the texture on the transfer thread + GLuint _drawRenderbuffer { 0 }; + GLuint _drawFramebuffer { 0 }; + GLuint _readFramebuffer { 0 }; +#endif }; } } diff --git a/libraries/gpu-gl/src/gpu/gl41/GL41Backend.h b/libraries/gpu-gl/src/gpu/gl41/GL41Backend.h index 6e15542310..c89024b7e8 100644 --- a/libraries/gpu-gl/src/gpu/gl41/GL41Backend.h +++ b/libraries/gpu-gl/src/gpu/gl41/GL41Backend.h @@ -46,10 +46,10 @@ public: GL41Texture(const std::weak_ptr& backend, const Texture& buffer, GL41Texture* original); protected: - void transferMip(uint16_t mipLevel, uint8_t face = 0) const; + void transferMip(uint16_t mipLevel, uint8_t face) const; + void startTransfer() override; void allocateStorage() const override; void updateSize() const override; - void transfer() const override; void syncSampler() const override; void generateMips() const override; void withPreservedTexture(std::function f) const override; diff --git a/libraries/gpu-gl/src/gpu/gl41/GL41BackendTexture.cpp b/libraries/gpu-gl/src/gpu/gl41/GL41BackendTexture.cpp index ff1a790ba5..3d55802ec2 100644 --- a/libraries/gpu-gl/src/gpu/gl41/GL41BackendTexture.cpp +++ b/libraries/gpu-gl/src/gpu/gl41/GL41BackendTexture.cpp @@ -42,7 +42,7 @@ GL41Texture::GL41Texture(const std::weak_ptr& backend, const Texture& GL41Texture::GL41Texture(const std::weak_ptr& backend, const Texture& texture, GL41Texture* original) : GLTexture(backend, texture, allocate(), original) {} -void GL41Backend::GL41Texture::withPreservedTexture(std::function f) const { +void GL41Texture::withPreservedTexture(std::function f) const { GLint boundTex = -1; switch (_target) { case GL_TEXTURE_2D: @@ -64,14 +64,14 @@ void GL41Backend::GL41Texture::withPreservedTexture(std::function f) con (void)CHECK_GL_ERROR(); } -void GL41Backend::GL41Texture::generateMips() const { +void GL41Texture::generateMips() const { withPreservedTexture([&] { glGenerateMipmap(_target); }); (void)CHECK_GL_ERROR(); } -void GL41Backend::GL41Texture::allocateStorage() const { +void GL41Texture::allocateStorage() const { GLTexelFormat texelFormat = GLTexelFormat::evalGLTexelFormat(_gpuObject.getTexelFormat()); glTexParameteri(_target, GL_TEXTURE_BASE_LEVEL, 0); (void)CHECK_GL_ERROR(); @@ -94,7 +94,7 @@ void GL41Backend::GL41Texture::allocateStorage() const { } } -void GL41Backend::GL41Texture::updateSize() const { +void GL41Texture::updateSize() const { setSize(_virtualSize); if (!_id) { return; @@ -130,7 +130,7 @@ void GL41Backend::GL41Texture::updateSize() const { } // Move content bits from the CPU to the GPU for a given mip / face -void GL41Backend::GL41Texture::transferMip(uint16_t mipLevel, uint8_t face) const { +void GL41Texture::transferMip(uint16_t mipLevel, uint8_t face) const { auto mip = _gpuObject.accessStoredMipFace(mipLevel, face); GLTexelFormat texelFormat = GLTexelFormat::evalGLTexelFormat(_gpuObject.getTexelFormat(), mip->getFormat()); //GLenum target = getFaceTargets()[face]; @@ -140,15 +140,9 @@ void GL41Backend::GL41Texture::transferMip(uint16_t mipLevel, uint8_t face) cons (void)CHECK_GL_ERROR(); } -// This should never happen on the main thread -// Move content bits from the CPU to the GPU -void GL41Backend::GL41Texture::transfer() const { +void GL41Texture::startTransfer() { PROFILE_RANGE(__FUNCTION__); - //qDebug() << "Transferring texture: " << _privateTexture; - // Need to update the content of the GPU object from the source sysmem of the texture - if (_contentStamp >= _gpuObject.getDataStamp()) { - return; - } + Parent::startTransfer(); glBindTexture(_target, _id); (void)CHECK_GL_ERROR(); @@ -175,38 +169,16 @@ void GL41Backend::GL41Texture::transfer() const { glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); glDeleteFramebuffers(1, &fbo); } else { - // GO through the process of allocating the correct storage and/or update the content - switch (_gpuObject.getType()) { - case Texture::TEX_2D: - { - for (uint16_t i = _minMip; i <= _maxMip; ++i) { - if (_gpuObject.isStoredMipFaceAvailable(i)) { - transferMip(i); - } + // transfer pixels from each faces + uint8_t numFaces = (Texture::TEX_CUBE == _gpuObject.getType()) ? CUBE_NUM_FACES : 1; + for (uint8_t f = 0; f < numFaces; f++) { + for (uint16_t i = 0; i < Sampler::MAX_MIP_LEVEL; ++i) { + if (_gpuObject.isStoredMipFaceAvailable(i, f)) { + transferMip(i, f); } } - break; - - case Texture::TEX_CUBE: - // transfer pixels from each faces - for (uint8_t f = 0; f < CUBE_NUM_FACES; f++) { - for (uint16_t i = 0; i < Sampler::MAX_MIP_LEVEL; ++i) { - if (_gpuObject.isStoredMipFaceAvailable(i, f)) { - transferMip(i, f); - } - } - } - break; - - default: - qCWarning(gpugl41logging) << __FUNCTION__ << " case for Texture Type " << _gpuObject.getType() << " not supported"; - break; } } - if (_gpuObject.isAutogenerateMips()) { - glGenerateMipmap(_target); - (void)CHECK_GL_ERROR(); - } } void GL41Backend::GL41Texture::syncSampler() const { diff --git a/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h b/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h index 0d737ef3ba..22e1a87719 100644 --- a/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h +++ b/libraries/gpu-gl/src/gpu/gl45/GL45Backend.h @@ -18,6 +18,29 @@ namespace gpu { namespace gl45 { using namespace gpu::gl; +struct TransferState { + GLTexture& _texture; + GLenum _internalFormat { GL_RGBA8 }; + GLTexelFormat _texelFormat; + uint8_t _face { 0 }; + uint16_t _mipLevel { 0 }; + uint32_t _bytesPerLine { 0 }; + uint32_t _bytesPerPixel { 0 }; + uint32_t _bytesPerPage { 0 }; + GLuint _maxSparseLevel { 0 }; + + uvec3 _mipDimensions; + uvec3 _mipOffset; + uvec3 _pageSize; + const uint8_t* _srcPointer { nullptr }; + uvec3 currentPageSize() const; + void updateSparse(); + void updateMip(); + void populatePage(uint8_t* dest); + bool increment(); + TransferState(GLTexture& texture); +}; + class GL45Backend : public GLBackend { using Parent = GLBackend; // Context Backend static interface required @@ -29,19 +52,26 @@ public: class GL45Texture : public GLTexture { using Parent = GLTexture; - GLuint allocate(const Texture& texture); + static GLuint allocate(const Texture& texture); public: GL45Texture(const std::weak_ptr& backend, const Texture& texture, bool transferrable); GL45Texture(const std::weak_ptr& backend, const Texture& texture, GLTexture* original); + ~GL45Texture(); protected: + void startTransfer() override; + bool continueTransfer() override; + void incrementalTransfer(const uvec3& size, const gpu::Texture::PixelsPointer& mip, std::function f) const; void transferMip(uint16_t mipLevel, uint8_t face = 0) const; + void allocateMip(uint16_t mipLevel, uint8_t face = 0) const; void allocateStorage() const override; void updateSize() const override; - void transfer() const override; +// void transfer() const override; void syncSampler() const override; void generateMips() const override; void withPreservedTexture(std::function f) const override; + + TransferState _transferState; }; diff --git a/libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp b/libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp index d3222b9acf..3b512b3278 100644 --- a/libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp +++ b/libraries/gpu-gl/src/gpu/gl45/GL45BackendTexture.cpp @@ -10,8 +10,11 @@ // #include "GL45Backend.h" +#include +#include #include #include +#include #include @@ -21,10 +24,114 @@ using namespace gpu; using namespace gpu::gl; using namespace gpu::gl45; +#define SPARSE_TEXTURES 1 + using GL45Texture = GL45Backend::GL45Texture; +GLTexture* GL45Backend::syncGPUObject(const TexturePointer& texture, bool transfer) { + return GL45Texture::sync(*this, texture, transfer); +} + +void serverWait() { + auto fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + assert(fence); + glWaitSync(fence, 0, GL_TIMEOUT_IGNORED); + glDeleteSync(fence); +} + +void clientWait() { + auto fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + assert(fence); + auto result = glClientWaitSync(fence, GL_SYNC_FLUSH_COMMANDS_BIT, 0); + while (GL_TIMEOUT_EXPIRED == result || GL_WAIT_FAILED == result) { + // Minimum sleep + QThread::usleep(1); + result = glClientWaitSync(fence, 0, 0); + } + glDeleteSync(fence); +} + +TransferState::TransferState(GLTexture& texture) : _texture(texture) { +} + +void TransferState::updateSparse() { + glGetTextureParameterIuiv(_texture._id, GL_NUM_SPARSE_LEVELS_ARB, &_maxSparseLevel); + _internalFormat = gl::GLTexelFormat::evalGLTexelFormat(_texture._gpuObject.getTexelFormat(), _texture._gpuObject.getTexelFormat()).internalFormat; + ivec3 pageSize; + glGetInternalformativ(_texture._target, _internalFormat, GL_VIRTUAL_PAGE_SIZE_X_ARB, 1, &pageSize.x); + glGetInternalformativ(_texture._target, _internalFormat, GL_VIRTUAL_PAGE_SIZE_Y_ARB, 1, &pageSize.y); + glGetInternalformativ(_texture._target, _internalFormat, GL_VIRTUAL_PAGE_SIZE_Z_ARB, 1, &pageSize.z); + _pageSize = uvec3(pageSize); +} + +void TransferState::updateMip() { + _mipDimensions = _texture._gpuObject.evalMipDimensions(_mipLevel); + _mipOffset = uvec3(); + if (!_texture._gpuObject.isStoredMipFaceAvailable(_mipLevel, _face)) { + _srcPointer = nullptr; + return; + } + + auto mip = _texture._gpuObject.accessStoredMipFace(_mipLevel, _face); + _texelFormat = gl::GLTexelFormat::evalGLTexelFormat(_texture._gpuObject.getTexelFormat(), mip->getFormat()); + _srcPointer = mip->readData(); + _bytesPerLine = (uint32_t)mip->getSize() / _mipDimensions.y; + _bytesPerPixel = _bytesPerLine / _mipDimensions.x; +} + +bool TransferState::increment() { + if ((_mipOffset.x + _pageSize.x) < _mipDimensions.x) { + _mipOffset.x += _pageSize.x; + return true; + } + + if ((_mipOffset.y + _pageSize.y) < _mipDimensions.y) { + _mipOffset.x = 0; + _mipOffset.y += _pageSize.y; + return true; + } + + if (_mipOffset.z + _pageSize.z < _mipDimensions.z) { + _mipOffset.x = 0; + _mipOffset.y = 0; + ++_mipOffset.z; + return true; + } + + // Done with this mip?, move on to the next mip + if (_mipLevel + 1 < _texture.usedMipLevels()) { + _mipOffset = uvec3(0); + ++_mipLevel; + updateMip(); + return true; + } + + // Done with this face? Move on to the next + if (_face + 1 < ((_texture._target == GL_TEXTURE_CUBE_MAP) ? GLTexture::CUBE_NUM_FACES : 1)) { + ++_face; + _mipOffset = uvec3(0); + _mipLevel = 0; + updateMip(); + return true; + } + + return false; +} + +void TransferState::populatePage(uint8_t* dst) { + uvec3 pageSize = currentPageSize(); + for (uint32_t y = 0; y < pageSize.y; ++y) { + uint32_t srcOffset = (_bytesPerLine * (_mipOffset.y + y)) + (_bytesPerPixel * _mipOffset.x); + uint32_t dstOffset = (_bytesPerPixel * pageSize.x) * y; + memcpy(dst + dstOffset, _srcPointer + srcOffset, pageSize.x * _bytesPerPixel); + } +} + +uvec3 TransferState::currentPageSize() const { + return glm::clamp(_mipDimensions - _mipOffset, uvec3(1), _pageSize); +} + GLuint GL45Texture::allocate(const Texture& texture) { - Backend::incrementTextureGPUCount(); GLuint result; glCreateTextures(getGLTextureType(texture), 1, &result); return result; @@ -34,26 +141,38 @@ GLuint GL45Backend::getTextureID(const TexturePointer& texture, bool transfer) { return GL45Texture::getId(*this, texture, transfer); } -GLTexture* GL45Backend::syncGPUObject(const TexturePointer& texture, bool transfer) { - return GL45Texture::sync(*this, texture, transfer); +GL45Texture::GL45Texture(const std::weak_ptr& backend, const Texture& texture, bool transferrable) + : GLTexture(backend, texture, allocate(texture), transferrable), _transferState(*this) { + +#if SPARSE_TEXTURES + if (transferrable) { + glTextureParameteri(_id, GL_TEXTURE_SPARSE_ARB, GL_TRUE); + } +#endif } -GL45Backend::GL45Texture::GL45Texture(const std::weak_ptr& backend, const Texture& texture, bool transferrable) - : GLTexture(backend, texture, allocate(texture), transferrable) {} +GL45Texture::GL45Texture(const std::weak_ptr& backend, const Texture& texture, GLTexture* original) + : GLTexture(backend, texture, allocate(texture), original), _transferState(*this) { } -GL45Backend::GL45Texture::GL45Texture(const std::weak_ptr& backend, const Texture& texture, GLTexture* original) - : GLTexture(backend, texture, allocate(texture), original) {} +GL45Texture::~GL45Texture() { + // FIXME do we need to explicitly deallocate the virtual memory here? + //if (_transferrable) { + // for (uint16_t mipLevel = 0; mipLevel < usedMipLevels(); ++i) { + // glTexturePageCommitmentEXT(_id, mipLevel, offset.x, offset.y, offset.z, size.x, size.y, size.z, GL_TRUE); + // } + //} +} -void GL45Backend::GL45Texture::withPreservedTexture(std::function f) const { +void GL45Texture::withPreservedTexture(std::function f) const { f(); } -void GL45Backend::GL45Texture::generateMips() const { +void GL45Texture::generateMips() const { glGenerateTextureMipmap(_id); (void)CHECK_GL_ERROR(); } -void GL45Backend::GL45Texture::allocateStorage() const { +void GL45Texture::allocateStorage() const { GLTexelFormat texelFormat = GLTexelFormat::evalGLTexelFormat(_gpuObject.getTexelFormat()); glTextureParameteri(_id, GL_TEXTURE_BASE_LEVEL, 0); glTextureParameteri(_id, GL_TEXTURE_MAX_LEVEL, _maxMip - _minMip); @@ -66,7 +185,7 @@ void GL45Backend::GL45Texture::allocateStorage() const { (void)CHECK_GL_ERROR(); } -void GL45Backend::GL45Texture::updateSize() const { +void GL45Texture::updateSize() const { setSize(_virtualSize); if (!_id) { return; @@ -77,86 +196,50 @@ void GL45Backend::GL45Texture::updateSize() const { } } -// Move content bits from the CPU to the GPU for a given mip / face -void GL45Backend::GL45Texture::transferMip(uint16_t mipLevel, uint8_t face) const { - auto mip = _gpuObject.accessStoredMipFace(mipLevel, face); - GLTexelFormat texelFormat = GLTexelFormat::evalGLTexelFormat(_gpuObject.getTexelFormat(), mip->getFormat()); - auto size = _gpuObject.evalMipDimensions(mipLevel); - if (GL_TEXTURE_2D == _target) { - glTextureSubImage2D(_id, mipLevel, 0, 0, size.x, size.y, texelFormat.format, texelFormat.type, mip->readData()); - } else if (GL_TEXTURE_CUBE_MAP == _target) { - // DSA ARB does not work on AMD, so use EXT - // glTextureSubImage3D(_id, mipLevel, 0, 0, face, size.x, size.y, 1, texelFormat.format, texelFormat.type, mip->readData()); - auto target = CUBE_FACE_LAYOUT[face]; - glTextureSubImage2DEXT(_id, target, mipLevel, 0, 0, size.x, size.y, texelFormat.format, texelFormat.type, mip->readData()); - } else { - Q_ASSERT(false); - } - (void)CHECK_GL_ERROR(); +void GL45Texture::startTransfer() { + Parent::startTransfer(); + _transferState.updateSparse(); + _transferState.updateMip(); } -// This should never happen on the main thread -// Move content bits from the CPU to the GPU -void GL45Backend::GL45Texture::transfer() const { - PROFILE_RANGE(__FUNCTION__); - //qDebug() << "Transferring texture: " << _privateTexture; - // Need to update the content of the GPU object from the source sysmem of the texture - if (_contentStamp >= _gpuObject.getDataStamp()) { - return; +bool GL45Texture::continueTransfer() { + static std::vector buffer; + if (buffer.empty()) { + buffer.resize(1024 * 1024); } + uvec3 pageSize = _transferState.currentPageSize(); + uvec3 offset = _transferState._mipOffset; - if (_downsampleSource._texture) { - GLuint fbo { 0 }; - glCreateFramebuffers(1, &fbo); - glBindFramebuffer(GL_READ_FRAMEBUFFER, fbo); - // Find the distance between the old min mip and the new one - uint16 mipOffset = _minMip - _downsampleSource._minMip; - for (uint16 i = _minMip; i <= _maxMip; ++i) { - uint16 targetMip = i - _minMip; - uint16 sourceMip = targetMip + mipOffset; - Vec3u dimensions = _gpuObject.evalMipDimensions(i); - for (GLenum target : getFaceTargets(_target)) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, target, _downsampleSource._texture, sourceMip); - (void)CHECK_GL_ERROR(); - glCopyTextureSubImage2D(_id, targetMip, 0, 0, 0, 0, dimensions.x, dimensions.y); - (void)CHECK_GL_ERROR(); - } - } - glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); - glDeleteFramebuffers(1, &fbo); - } else { - // GO through the process of allocating the correct storage and/or update the content - switch (_gpuObject.getType()) { - case Texture::TEX_2D: - { - for (uint16_t i = _minMip; i <= _maxMip; ++i) { - if (_gpuObject.isStoredMipFaceAvailable(i)) { - transferMip(i); - } - } - } - break; +#if SPARSE_TEXTURES + if (_transferState._mipLevel <= _transferState._maxSparseLevel) { + glTexturePageCommitmentEXT(_id, _transferState._mipLevel, + offset.x, offset.y, _transferState._face, + pageSize.x, pageSize.y, pageSize.z, + GL_TRUE); + } +#endif - case Texture::TEX_CUBE: - // transfer pixels from each faces - for (uint8_t f = 0; f < CUBE_NUM_FACES; f++) { - for (uint16_t i = 0; i < Sampler::MAX_MIP_LEVEL; ++i) { - if (_gpuObject.isStoredMipFaceAvailable(i, f)) { - transferMip(i, f); - } - } - } - break; - - default: - qCWarning(gpugl45logging) << __FUNCTION__ << " case for Texture Type " << _gpuObject.getType() << " not supported"; - break; + if (_transferState._srcPointer) { + // Transfer the mip data + _transferState.populatePage(&buffer[0]); + if (GL_TEXTURE_2D == _target) { + glTextureSubImage2D(_id, _transferState._mipLevel, + offset.x, offset.y, + pageSize.x, pageSize.y, + _transferState._texelFormat.format, _transferState._texelFormat.type, &buffer[0]); + } else if (GL_TEXTURE_CUBE_MAP == _target) { + auto target = CUBE_FACE_LAYOUT[_transferState._face]; + // DSA ARB does not work on AMD, so use EXT + // glTextureSubImage3D(_id, mipLevel, 0, 0, face, size.x, size.y, 1, texelFormat.format, texelFormat.type, mip->readData()); + glTextureSubImage2DEXT(_id, target, _transferState._mipLevel, + offset.x, offset.y, + pageSize.x, pageSize.y, + _transferState._texelFormat.format, _transferState._texelFormat.type, &buffer[0]); } } - if (_gpuObject.isAutogenerateMips()) { - glGenerateTextureMipmap(_id); - (void)CHECK_GL_ERROR(); - } + + serverWait(); + return _transferState.increment(); } void GL45Backend::GL45Texture::syncSampler() const {