diff --git a/libraries/gpu/src/gpu/GLBackend.h b/libraries/gpu/src/gpu/GLBackend.h index d9ead354ea..3a1a204d01 100644 --- a/libraries/gpu/src/gpu/GLBackend.h +++ b/libraries/gpu/src/gpu/GLBackend.h @@ -65,15 +65,22 @@ public: class GLBuffer : public GPUObject { public: - Stamp _stamp; - GLuint _buffer; - GLuint _size; + const GLuint _buffer; + const GLuint _size; + const Stamp _stamp; - GLBuffer(); + GLBuffer(const Buffer& buffer); ~GLBuffer(); - void setSize(GLuint size); + void transfer(bool forceAll = false); + + private: + bool getNextTransferBlock(GLintptr& outOffset, GLsizeiptr& outSize, size_t& currentPage) const; + + // The owning texture + const Buffer& _gpuBuffer; }; + static GLBuffer* syncGPUObject(const Buffer& buffer); static GLuint getBufferID(const Buffer& buffer); diff --git a/libraries/gpu/src/gpu/GLBackendBuffer.cpp b/libraries/gpu/src/gpu/GLBackendBuffer.cpp index 080d743104..486f4cee8e 100755 --- a/libraries/gpu/src/gpu/GLBackendBuffer.cpp +++ b/libraries/gpu/src/gpu/GLBackendBuffer.cpp @@ -12,14 +12,43 @@ using namespace gpu; -GLBackend::GLBuffer::GLBuffer() : - _stamp(0), - _buffer(0), - _size(0) -{ - Backend::incrementBufferGPUCount(); +static std::once_flag check_dsa; +static bool DSA_SUPPORTED { false }; + +GLuint allocateSingleBuffer() { + std::call_once(check_dsa, [&] { + DSA_SUPPORTED = (GLEW_VERSION_4_5 || GLEW_ARB_direct_state_access); + }); + GLuint result; + if (DSA_SUPPORTED) { + glCreateBuffers(1, &result); + } else { + glGenBuffers(1, &result); + } + return result; } +GLBackend::GLBuffer::GLBuffer(const Buffer& buffer) : + _buffer(allocateSingleBuffer()), + _size(buffer._sysmem.getSize()), + _stamp(buffer._sysmem.getStamp()), + _gpuBuffer(buffer) { + (void)CHECK_GL_ERROR(); + Backend::setGPUObject(buffer, this); + if (DSA_SUPPORTED) { + glNamedBufferStorage(_buffer, _size, nullptr, GL_DYNAMIC_STORAGE_BIT); + } else { + glBindBuffer(GL_ARRAY_BUFFER, _buffer); + if (GLEW_VERSION_4_4 || GLEW_ARB_buffer_storage) { + glBufferStorage(GL_ARRAY_BUFFER, _size, nullptr, GL_DYNAMIC_STORAGE_BIT); + } else { + glBufferData(GL_ARRAY_BUFFER, buffer.getSysmem().getSize(), buffer.getSysmem().readData(), GL_DYNAMIC_DRAW); + } + glBindBuffer(GL_ARRAY_BUFFER, 0); + } + Backend::incrementBufferGPUCount(); +} + GLBackend::GLBuffer::~GLBuffer() { if (_buffer != 0) { glDeleteBuffers(1, &_buffer); @@ -28,37 +57,99 @@ GLBackend::GLBuffer::~GLBuffer() { Backend::decrementBufferGPUCount(); } -void GLBackend::GLBuffer::setSize(GLuint size) { - Backend::updateBufferGPUMemoryUsage(_size, size); - _size = size; +void GLBackend::GLBuffer::transfer(bool forceAll) { + const auto& pageFlags = _gpuBuffer._pages; + if (!forceAll) { + size_t transitions = 0; + if (pageFlags.size()) { + bool lastDirty = (0 != (pageFlags[0] & Buffer::DIRTY)); + for (size_t i = 1; i < pageFlags.size(); ++i) { + bool newDirty = (0 != (pageFlags[0] & Buffer::DIRTY)); + if (newDirty != lastDirty) { + ++transitions; + lastDirty = newDirty; + } + } + } + + // If there are no transitions (implying the whole buffer is dirty) + // or more than 20 transitions, then just transfer the whole buffer + if (transitions == 0 || transitions > 20) { + forceAll = true; + } + } + + // Are we transferring the whole buffer? + if (forceAll) { + if (DSA_SUPPORTED) { + glNamedBufferSubData(_buffer, 0, _size, _gpuBuffer.getSysmem().readData()); + } else { + // Now let's update the content of the bo with the sysmem version + // TODO: in the future, be smarter about when to actually upload the glBO version based on the data that did change + //if () { + glBindBuffer(GL_ARRAY_BUFFER, _buffer); + glBufferData(GL_ARRAY_BUFFER, _gpuBuffer.getSysmem().getSize(), _gpuBuffer.getSysmem().readData(), GL_DYNAMIC_DRAW); + glBindBuffer(GL_ARRAY_BUFFER, 0); + } + } else { + if (!DSA_SUPPORTED) { + glBindBuffer(GL_ARRAY_BUFFER, _buffer); + } + GLintptr offset; + GLsizeiptr size; + size_t currentPage { 0 }; + auto data = _gpuBuffer.getSysmem().readData(); + while (getNextTransferBlock(offset, size, currentPage)) { + if (DSA_SUPPORTED) { + glNamedBufferSubData(_buffer, offset, size, data + offset); + } else { + glBufferSubData(GL_ARRAY_BUFFER, offset, size, data + offset); + } + } + + if (!DSA_SUPPORTED) { + glBindBuffer(GL_ARRAY_BUFFER, 0); + } + } + _gpuBuffer._flags &= ~Buffer::DIRTY; + (void)CHECK_GL_ERROR(); +} + +bool GLBackend::GLBuffer::getNextTransferBlock(GLintptr& outOffset, GLsizeiptr& outSize, size_t& currentPage) const { + size_t pageCount = _gpuBuffer._pages.size(); + // Advance to the first dirty page + while (currentPage < pageCount && (0 == (Buffer::DIRTY & _gpuBuffer._pages[currentPage]))) { + ++currentPage; + } + + // If we got to the end, we're done + if (currentPage >= pageCount) { + return false; + } + + // Advance to the next clean page + outOffset = static_cast(currentPage * _gpuBuffer._pageSize); + while (currentPage < pageCount && (0 != (Buffer::DIRTY & _gpuBuffer._pages[currentPage]))) { + ++currentPage; + } + outSize = static_cast((currentPage * _gpuBuffer._pageSize) - outOffset); + return true; } GLBackend::GLBuffer* GLBackend::syncGPUObject(const Buffer& buffer) { GLBuffer* object = Backend::getGPUObject(buffer); - if (object && (object->_stamp == buffer.getSysmem().getStamp())) { - return object; + bool forceTransferAll = false; + // Has the storage size changed? + if (!object || object->_stamp != buffer.getSysmem().getStamp()) { + object = new GLBuffer(buffer); + forceTransferAll = true; } - // need to have a gpu object? - if (!object) { - object = new GLBuffer(); - glGenBuffers(1, &object->_buffer); - (void) CHECK_GL_ERROR(); - Backend::setGPUObject(buffer, object); + if (forceTransferAll || (0 != (buffer._flags & Buffer::DIRTY))) { + object->transfer(forceTransferAll); } - // Now let's update the content of the bo with the sysmem version - // TODO: in the future, be smarter about when to actually upload the glBO version based on the data that did change - //if () { - glBindBuffer(GL_ARRAY_BUFFER, object->_buffer); - glBufferData(GL_ARRAY_BUFFER, buffer.getSysmem().getSize(), buffer.getSysmem().readData(), GL_DYNAMIC_DRAW); - glBindBuffer(GL_ARRAY_BUFFER, 0); - object->_stamp = buffer.getSysmem().getStamp(); - object->setSize((GLuint)buffer.getSysmem().getSize()); - //} - (void) CHECK_GL_ERROR(); - return object; } diff --git a/libraries/gpu/src/gpu/Resource.cpp b/libraries/gpu/src/gpu/Resource.cpp index deb17300c3..add9b1df3c 100644 --- a/libraries/gpu/src/gpu/Resource.cpp +++ b/libraries/gpu/src/gpu/Resource.cpp @@ -109,40 +109,18 @@ void Resource::Sysmem::deallocateMemory(Byte* dataAllocated, Size size) { } } -Resource::Sysmem::Sysmem() : - _stamp(0), - _size(0), - _data(NULL) -{ -} +Resource::Sysmem::Sysmem() {} -Resource::Sysmem::Sysmem(Size size, const Byte* bytes) : - _stamp(0), - _size(0), - _data(NULL) -{ - if (size > 0) { - _size = allocateMemory(&_data, size); - if (_size >= size) { - if (bytes) { - memcpy(_data, bytes, size); - } - } +Resource::Sysmem::Sysmem(Size size, const Byte* bytes) { + if (size > 0 && bytes) { + setData(_size, bytes); } } -Resource::Sysmem::Sysmem(const Sysmem& sysmem) : - _stamp(0), - _size(0), - _data(NULL) -{ +Resource::Sysmem::Sysmem(const Sysmem& sysmem) { if (sysmem.getSize() > 0) { - _size = allocateMemory(&_data, sysmem.getSize()); - if (_size >= sysmem.getSize()) { - if (sysmem.readData()) { - memcpy(_data, sysmem.readData(), sysmem.getSize()); - } - } + allocate(sysmem._size); + setData(_size, sysmem._data); } } @@ -208,7 +186,6 @@ Resource::Size Resource::Sysmem::setData( Size size, const Byte* bytes ) { if (allocate(size) == size) { if (size && bytes) { memcpy( _data, bytes, _size ); - _stamp++; } } return _size; @@ -217,7 +194,6 @@ Resource::Size Resource::Sysmem::setData( Size size, const Byte* bytes ) { Resource::Size Resource::Sysmem::setSubData( Size offset, Size size, const Byte* bytes) { if (size && ((offset + size) <= getSize()) && bytes) { memcpy( _data + offset, bytes, size ); - _stamp++; return size; } return 0; @@ -264,65 +240,105 @@ Buffer::Size Buffer::getBufferGPUMemoryUsage() { return Context::getBufferGPUMemoryUsage(); } -Buffer::Buffer() : - Resource(), - _sysmem(new Sysmem()) { +Buffer::Buffer(Size pageSize) : + _pageSize(pageSize) { _bufferCPUCount++; - } -Buffer::Buffer(Size size, const Byte* bytes) : - Resource(), - _sysmem(new Sysmem(size, bytes)) { - _bufferCPUCount++; - Buffer::updateBufferCPUMemoryUsage(0, _sysmem->getSize()); +Buffer::Buffer(Size size, const Byte* bytes, Size pageSize) : Buffer(pageSize) { + setData(size, bytes); } -Buffer::Buffer(const Buffer& buf) : - Resource(), - _sysmem(new Sysmem(buf.getSysmem())) { - _bufferCPUCount++; - Buffer::updateBufferCPUMemoryUsage(0, _sysmem->getSize()); +Buffer::Buffer(const Buffer& buf) : Buffer(buf._pageSize) { + setData(buf.getSize(), buf.getData()); } Buffer& Buffer::operator=(const Buffer& buf) { - (*_sysmem) = buf.getSysmem(); + const_cast(_pageSize) = buf._pageSize; + setData(buf.getSize(), buf.getData()); return (*this); } Buffer::~Buffer() { _bufferCPUCount--; - - if (_sysmem) { - Buffer::updateBufferCPUMemoryUsage(_sysmem->getSize(), 0); - delete _sysmem; - _sysmem = NULL; - } + Buffer::updateBufferCPUMemoryUsage(_sysmem.getSize(), 0); } Buffer::Size Buffer::resize(Size size) { + _end = size; auto prevSize = editSysmem().getSize(); - auto newSize = editSysmem().resize(size); - Buffer::updateBufferCPUMemoryUsage(prevSize, newSize); - return newSize; + if (prevSize < size) { + auto newPages = getRequiredPageCount(); + auto newSize = newPages * _pageSize; + editSysmem().resize(size); + // All new pages start off as clean, because they haven't been populated by data + _pages.resize(newPages, 0); + Buffer::updateBufferCPUMemoryUsage(prevSize, newSize); + } + return _end; } +void Buffer::dirtyPages(Size offset, Size bytes) { + if (!bytes) { + return; + } + _flags |= DIRTY; + // Find the starting page + Size startPage = (offset / _pageSize); + // Non-zero byte count, so at least one page is dirty + Size pageCount = 1; + // How much of the page is after the offset? + Size remainder = _pageSize - (offset % _pageSize); + // If there are more bytes than page space remaining, we need to increase the page count + if (bytes > remainder) { + // Get rid of the amount that will fit in the current page + bytes -= remainder; + + pageCount += (bytes / _pageSize); + if (bytes % _pageSize) { + ++pageCount; + } + } + + // Mark the pages dirty + for (Size i = 0; i < pageCount; ++i) { + _pages[i + startPage] |= DIRTY; + } +} + + Buffer::Size Buffer::setData(Size size, const Byte* data) { - auto prevSize = editSysmem().getSize(); - auto newSize = editSysmem().setData(size, data); - Buffer::updateBufferCPUMemoryUsage(prevSize, newSize); - return newSize; + resize(size); + setSubData(0, size, data); + return _end; } Buffer::Size Buffer::setSubData(Size offset, Size size, const Byte* data) { - return editSysmem().setSubData( offset, size, data); + auto changedBytes = editSysmem().setSubData(offset, size, data); + if (changedBytes) { + dirtyPages(offset, changedBytes); + } + return changedBytes; } Buffer::Size Buffer::append(Size size, const Byte* data) { - auto prevSize = editSysmem().getSize(); - auto newSize = editSysmem().append( size, data); - Buffer::updateBufferCPUMemoryUsage(prevSize, newSize); - return newSize; + auto offset = _end; + resize(_end + size); + setSubData(offset, size, data); + return _end; +} + +Buffer::Size Buffer::getSize() const { + Q_ASSERT(getSysmem().getSize() >= _end); + return _end; +} + +Buffer::Size Buffer::getRequiredPageCount() const { + Size result = _end / _pageSize; + if (_end % _pageSize) { + ++result; + } + return result; } const Element BufferView::DEFAULT_ELEMENT = Element( gpu::SCALAR, gpu::UINT8, gpu::RAW ); diff --git a/libraries/gpu/src/gpu/Resource.h b/libraries/gpu/src/gpu/Resource.h index 570aff00fc..3f9a87b52c 100644 --- a/libraries/gpu/src/gpu/Resource.h +++ b/libraries/gpu/src/gpu/Resource.h @@ -88,10 +88,10 @@ protected: // Access the byte array. // The edit version allow to map data. const Byte* readData() const { return _data; } - Byte* editData() { _stamp++; return _data; } + Byte* editData() { return _data; } template< typename T > const T* read() const { return reinterpret_cast< T* > ( _data ); } - template< typename T > T* edit() { _stamp++; return reinterpret_cast< T* > ( _data ); } + template< typename T > T* edit() { return reinterpret_cast< T* > ( _data ); } // Access the current version of the sysmem, used to compare if copies are in sync Stamp getStamp() const { return _stamp; } @@ -102,9 +102,9 @@ protected: bool isAvailable() const { return (_data != 0); } private: - Stamp _stamp; - Size _size; - Byte* _data; + Stamp _stamp { 0 }; + Size _size { 0 }; + Byte* _data { nullptr }; }; }; @@ -115,19 +115,26 @@ class Buffer : public Resource { static void updateBufferCPUMemoryUsage(Size prevObjectSize, Size newObjectSize); public: + enum Flag { + DIRTY = 0x01, + }; + + // Currently only one flag... 'dirty' + using PageFlags = std::vector; + static const Size DEFAULT_PAGE_SIZE = 4096; static uint32_t getBufferCPUCount(); static Size getBufferCPUMemoryUsage(); static uint32_t getBufferGPUCount(); static Size getBufferGPUMemoryUsage(); - Buffer(); - Buffer(Size size, const Byte* bytes); + Buffer(Size pageSize = DEFAULT_PAGE_SIZE); + Buffer(Size size, const Byte* bytes, Size pageSize = DEFAULT_PAGE_SIZE); Buffer(const Buffer& buf); // deep copy of the sysmem buffer Buffer& operator=(const Buffer& buf); // deep copy of the sysmem buffer ~Buffer(); // The size in bytes of data stored in the buffer - Size getSize() const { return getSysmem().getSize(); } + Size getSize() const; const Byte* getData() const { return getSysmem().readData(); } Byte* editData() { return editSysmem().editData(); } @@ -143,6 +150,20 @@ public: // \return the number of bytes copied Size setSubData(Size offset, Size size, const Byte* data); + template + Size setSubData(Size index, const T& t) { + Size offset = index * sizeof(T); + Size size = sizeof(T); + return setSubData(offset, size, reinterpret_cast(&t)); + } + + template + Size setSubData(Size index, const std::vector& t) { + Size offset = index * sizeof(T); + Size size = t.size() * sizeof(T); + return setSubData(offset, size, reinterpret_cast(&t[0])); + } + // Append new data at the end of the current buffer // do a resize( size + getSize) and copy the new data // \return the number of bytes copied @@ -158,15 +179,24 @@ public: return append(sizeof(T) * t.size(), reinterpret_cast(&t[0])); } - // Access the sysmem object. - const Sysmem& getSysmem() const { assert(_sysmem); return (*_sysmem); } - Sysmem& editSysmem() { assert(_sysmem); return (*_sysmem); } - const GPUObjectPointer gpuObject {}; protected: + // Access the sysmem object, limited to ourselves and GPUObject derived classes + const Sysmem& getSysmem() const { return _sysmem; } + Sysmem& editSysmem() { return _sysmem; } - Sysmem* _sysmem = NULL; + Size getRequiredPageCount() const; + void dirtyPages(Size offset, Size bytes); + + Size _end { 0 }; + mutable uint8_t _flags; + mutable PageFlags _pages; + const Size _pageSize; + Sysmem _sysmem; + + // FIXME find a more generic way to do this. + friend class GLBackend; }; typedef std::shared_ptr BufferPointer;