mirror of
https://github.com/JulianGro/overte.git
synced 2025-04-25 14:53:01 +02:00
Support partial CPU->GPU buffer transfers
This commit is contained in:
parent
a5f99ba370
commit
eb84459f03
4 changed files with 254 additions and 110 deletions
|
@ -65,15 +65,22 @@ public:
|
|||
|
||||
class GLBuffer : public GPUObject {
|
||||
public:
|
||||
Stamp _stamp;
|
||||
GLuint _buffer;
|
||||
GLuint _size;
|
||||
const GLuint _buffer;
|
||||
const GLuint _size;
|
||||
const Stamp _stamp;
|
||||
|
||||
GLBuffer();
|
||||
GLBuffer(const Buffer& buffer);
|
||||
~GLBuffer();
|
||||
|
||||
void setSize(GLuint size);
|
||||
void transfer(bool forceAll = false);
|
||||
|
||||
private:
|
||||
bool getNextTransferBlock(GLintptr& outOffset, GLsizeiptr& outSize, size_t& currentPage) const;
|
||||
|
||||
// The owning texture
|
||||
const Buffer& _gpuBuffer;
|
||||
};
|
||||
|
||||
static GLBuffer* syncGPUObject(const Buffer& buffer);
|
||||
static GLuint getBufferID(const Buffer& buffer);
|
||||
|
||||
|
|
|
@ -12,11 +12,40 @@
|
|||
|
||||
using namespace gpu;
|
||||
|
||||
GLBackend::GLBuffer::GLBuffer() :
|
||||
_stamp(0),
|
||||
_buffer(0),
|
||||
_size(0)
|
||||
{
|
||||
static std::once_flag check_dsa;
|
||||
static bool DSA_SUPPORTED { false };
|
||||
|
||||
GLuint allocateSingleBuffer() {
|
||||
std::call_once(check_dsa, [&] {
|
||||
DSA_SUPPORTED = (GLEW_VERSION_4_5 || GLEW_ARB_direct_state_access);
|
||||
});
|
||||
GLuint result;
|
||||
if (DSA_SUPPORTED) {
|
||||
glCreateBuffers(1, &result);
|
||||
} else {
|
||||
glGenBuffers(1, &result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
GLBackend::GLBuffer::GLBuffer(const Buffer& buffer) :
|
||||
_buffer(allocateSingleBuffer()),
|
||||
_size(buffer._sysmem.getSize()),
|
||||
_stamp(buffer._sysmem.getStamp()),
|
||||
_gpuBuffer(buffer) {
|
||||
(void)CHECK_GL_ERROR();
|
||||
Backend::setGPUObject(buffer, this);
|
||||
if (DSA_SUPPORTED) {
|
||||
glNamedBufferStorage(_buffer, _size, nullptr, GL_DYNAMIC_STORAGE_BIT);
|
||||
} else {
|
||||
glBindBuffer(GL_ARRAY_BUFFER, _buffer);
|
||||
if (GLEW_VERSION_4_4 || GLEW_ARB_buffer_storage) {
|
||||
glBufferStorage(GL_ARRAY_BUFFER, _size, nullptr, GL_DYNAMIC_STORAGE_BIT);
|
||||
} else {
|
||||
glBufferData(GL_ARRAY_BUFFER, buffer.getSysmem().getSize(), buffer.getSysmem().readData(), GL_DYNAMIC_DRAW);
|
||||
}
|
||||
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||
}
|
||||
Backend::incrementBufferGPUCount();
|
||||
}
|
||||
|
||||
|
@ -28,37 +57,99 @@ GLBackend::GLBuffer::~GLBuffer() {
|
|||
Backend::decrementBufferGPUCount();
|
||||
}
|
||||
|
||||
void GLBackend::GLBuffer::setSize(GLuint size) {
|
||||
Backend::updateBufferGPUMemoryUsage(_size, size);
|
||||
_size = size;
|
||||
void GLBackend::GLBuffer::transfer(bool forceAll) {
|
||||
const auto& pageFlags = _gpuBuffer._pages;
|
||||
if (!forceAll) {
|
||||
size_t transitions = 0;
|
||||
if (pageFlags.size()) {
|
||||
bool lastDirty = (0 != (pageFlags[0] & Buffer::DIRTY));
|
||||
for (size_t i = 1; i < pageFlags.size(); ++i) {
|
||||
bool newDirty = (0 != (pageFlags[0] & Buffer::DIRTY));
|
||||
if (newDirty != lastDirty) {
|
||||
++transitions;
|
||||
lastDirty = newDirty;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If there are no transitions (implying the whole buffer is dirty)
|
||||
// or more than 20 transitions, then just transfer the whole buffer
|
||||
if (transitions == 0 || transitions > 20) {
|
||||
forceAll = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Are we transferring the whole buffer?
|
||||
if (forceAll) {
|
||||
if (DSA_SUPPORTED) {
|
||||
glNamedBufferSubData(_buffer, 0, _size, _gpuBuffer.getSysmem().readData());
|
||||
} else {
|
||||
// Now let's update the content of the bo with the sysmem version
|
||||
// TODO: in the future, be smarter about when to actually upload the glBO version based on the data that did change
|
||||
//if () {
|
||||
glBindBuffer(GL_ARRAY_BUFFER, _buffer);
|
||||
glBufferData(GL_ARRAY_BUFFER, _gpuBuffer.getSysmem().getSize(), _gpuBuffer.getSysmem().readData(), GL_DYNAMIC_DRAW);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||
}
|
||||
} else {
|
||||
if (!DSA_SUPPORTED) {
|
||||
glBindBuffer(GL_ARRAY_BUFFER, _buffer);
|
||||
}
|
||||
GLintptr offset;
|
||||
GLsizeiptr size;
|
||||
size_t currentPage { 0 };
|
||||
auto data = _gpuBuffer.getSysmem().readData();
|
||||
while (getNextTransferBlock(offset, size, currentPage)) {
|
||||
if (DSA_SUPPORTED) {
|
||||
glNamedBufferSubData(_buffer, offset, size, data + offset);
|
||||
} else {
|
||||
glBufferSubData(GL_ARRAY_BUFFER, offset, size, data + offset);
|
||||
}
|
||||
}
|
||||
|
||||
if (!DSA_SUPPORTED) {
|
||||
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||
}
|
||||
}
|
||||
_gpuBuffer._flags &= ~Buffer::DIRTY;
|
||||
(void)CHECK_GL_ERROR();
|
||||
}
|
||||
|
||||
bool GLBackend::GLBuffer::getNextTransferBlock(GLintptr& outOffset, GLsizeiptr& outSize, size_t& currentPage) const {
|
||||
size_t pageCount = _gpuBuffer._pages.size();
|
||||
// Advance to the first dirty page
|
||||
while (currentPage < pageCount && (0 == (Buffer::DIRTY & _gpuBuffer._pages[currentPage]))) {
|
||||
++currentPage;
|
||||
}
|
||||
|
||||
// If we got to the end, we're done
|
||||
if (currentPage >= pageCount) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Advance to the next clean page
|
||||
outOffset = static_cast<GLintptr>(currentPage * _gpuBuffer._pageSize);
|
||||
while (currentPage < pageCount && (0 != (Buffer::DIRTY & _gpuBuffer._pages[currentPage]))) {
|
||||
++currentPage;
|
||||
}
|
||||
outSize = static_cast<GLsizeiptr>((currentPage * _gpuBuffer._pageSize) - outOffset);
|
||||
return true;
|
||||
}
|
||||
|
||||
GLBackend::GLBuffer* GLBackend::syncGPUObject(const Buffer& buffer) {
|
||||
GLBuffer* object = Backend::getGPUObject<GLBackend::GLBuffer>(buffer);
|
||||
|
||||
if (object && (object->_stamp == buffer.getSysmem().getStamp())) {
|
||||
return object;
|
||||
bool forceTransferAll = false;
|
||||
// Has the storage size changed?
|
||||
if (!object || object->_stamp != buffer.getSysmem().getStamp()) {
|
||||
object = new GLBuffer(buffer);
|
||||
forceTransferAll = true;
|
||||
}
|
||||
|
||||
// need to have a gpu object?
|
||||
if (!object) {
|
||||
object = new GLBuffer();
|
||||
glGenBuffers(1, &object->_buffer);
|
||||
(void) CHECK_GL_ERROR();
|
||||
Backend::setGPUObject(buffer, object);
|
||||
if (forceTransferAll || (0 != (buffer._flags & Buffer::DIRTY))) {
|
||||
object->transfer(forceTransferAll);
|
||||
}
|
||||
|
||||
// Now let's update the content of the bo with the sysmem version
|
||||
// TODO: in the future, be smarter about when to actually upload the glBO version based on the data that did change
|
||||
//if () {
|
||||
glBindBuffer(GL_ARRAY_BUFFER, object->_buffer);
|
||||
glBufferData(GL_ARRAY_BUFFER, buffer.getSysmem().getSize(), buffer.getSysmem().readData(), GL_DYNAMIC_DRAW);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||
object->_stamp = buffer.getSysmem().getStamp();
|
||||
object->setSize((GLuint)buffer.getSysmem().getSize());
|
||||
//}
|
||||
(void) CHECK_GL_ERROR();
|
||||
|
||||
return object;
|
||||
}
|
||||
|
||||
|
|
|
@ -109,40 +109,18 @@ void Resource::Sysmem::deallocateMemory(Byte* dataAllocated, Size size) {
|
|||
}
|
||||
}
|
||||
|
||||
Resource::Sysmem::Sysmem() :
|
||||
_stamp(0),
|
||||
_size(0),
|
||||
_data(NULL)
|
||||
{
|
||||
}
|
||||
Resource::Sysmem::Sysmem() {}
|
||||
|
||||
Resource::Sysmem::Sysmem(Size size, const Byte* bytes) :
|
||||
_stamp(0),
|
||||
_size(0),
|
||||
_data(NULL)
|
||||
{
|
||||
if (size > 0) {
|
||||
_size = allocateMemory(&_data, size);
|
||||
if (_size >= size) {
|
||||
if (bytes) {
|
||||
memcpy(_data, bytes, size);
|
||||
}
|
||||
}
|
||||
Resource::Sysmem::Sysmem(Size size, const Byte* bytes) {
|
||||
if (size > 0 && bytes) {
|
||||
setData(_size, bytes);
|
||||
}
|
||||
}
|
||||
|
||||
Resource::Sysmem::Sysmem(const Sysmem& sysmem) :
|
||||
_stamp(0),
|
||||
_size(0),
|
||||
_data(NULL)
|
||||
{
|
||||
Resource::Sysmem::Sysmem(const Sysmem& sysmem) {
|
||||
if (sysmem.getSize() > 0) {
|
||||
_size = allocateMemory(&_data, sysmem.getSize());
|
||||
if (_size >= sysmem.getSize()) {
|
||||
if (sysmem.readData()) {
|
||||
memcpy(_data, sysmem.readData(), sysmem.getSize());
|
||||
}
|
||||
}
|
||||
allocate(sysmem._size);
|
||||
setData(_size, sysmem._data);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -208,7 +186,6 @@ Resource::Size Resource::Sysmem::setData( Size size, const Byte* bytes ) {
|
|||
if (allocate(size) == size) {
|
||||
if (size && bytes) {
|
||||
memcpy( _data, bytes, _size );
|
||||
_stamp++;
|
||||
}
|
||||
}
|
||||
return _size;
|
||||
|
@ -217,7 +194,6 @@ Resource::Size Resource::Sysmem::setData( Size size, const Byte* bytes ) {
|
|||
Resource::Size Resource::Sysmem::setSubData( Size offset, Size size, const Byte* bytes) {
|
||||
if (size && ((offset + size) <= getSize()) && bytes) {
|
||||
memcpy( _data + offset, bytes, size );
|
||||
_stamp++;
|
||||
return size;
|
||||
}
|
||||
return 0;
|
||||
|
@ -264,65 +240,105 @@ Buffer::Size Buffer::getBufferGPUMemoryUsage() {
|
|||
return Context::getBufferGPUMemoryUsage();
|
||||
}
|
||||
|
||||
Buffer::Buffer() :
|
||||
Resource(),
|
||||
_sysmem(new Sysmem()) {
|
||||
Buffer::Buffer(Size pageSize) :
|
||||
_pageSize(pageSize) {
|
||||
_bufferCPUCount++;
|
||||
|
||||
}
|
||||
|
||||
Buffer::Buffer(Size size, const Byte* bytes) :
|
||||
Resource(),
|
||||
_sysmem(new Sysmem(size, bytes)) {
|
||||
_bufferCPUCount++;
|
||||
Buffer::updateBufferCPUMemoryUsage(0, _sysmem->getSize());
|
||||
Buffer::Buffer(Size size, const Byte* bytes, Size pageSize) : Buffer(pageSize) {
|
||||
setData(size, bytes);
|
||||
}
|
||||
|
||||
Buffer::Buffer(const Buffer& buf) :
|
||||
Resource(),
|
||||
_sysmem(new Sysmem(buf.getSysmem())) {
|
||||
_bufferCPUCount++;
|
||||
Buffer::updateBufferCPUMemoryUsage(0, _sysmem->getSize());
|
||||
Buffer::Buffer(const Buffer& buf) : Buffer(buf._pageSize) {
|
||||
setData(buf.getSize(), buf.getData());
|
||||
}
|
||||
|
||||
Buffer& Buffer::operator=(const Buffer& buf) {
|
||||
(*_sysmem) = buf.getSysmem();
|
||||
const_cast<Size&>(_pageSize) = buf._pageSize;
|
||||
setData(buf.getSize(), buf.getData());
|
||||
return (*this);
|
||||
}
|
||||
|
||||
Buffer::~Buffer() {
|
||||
_bufferCPUCount--;
|
||||
|
||||
if (_sysmem) {
|
||||
Buffer::updateBufferCPUMemoryUsage(_sysmem->getSize(), 0);
|
||||
delete _sysmem;
|
||||
_sysmem = NULL;
|
||||
}
|
||||
Buffer::updateBufferCPUMemoryUsage(_sysmem.getSize(), 0);
|
||||
}
|
||||
|
||||
Buffer::Size Buffer::resize(Size size) {
|
||||
_end = size;
|
||||
auto prevSize = editSysmem().getSize();
|
||||
auto newSize = editSysmem().resize(size);
|
||||
Buffer::updateBufferCPUMemoryUsage(prevSize, newSize);
|
||||
return newSize;
|
||||
if (prevSize < size) {
|
||||
auto newPages = getRequiredPageCount();
|
||||
auto newSize = newPages * _pageSize;
|
||||
editSysmem().resize(size);
|
||||
// All new pages start off as clean, because they haven't been populated by data
|
||||
_pages.resize(newPages, 0);
|
||||
Buffer::updateBufferCPUMemoryUsage(prevSize, newSize);
|
||||
}
|
||||
return _end;
|
||||
}
|
||||
|
||||
void Buffer::dirtyPages(Size offset, Size bytes) {
|
||||
if (!bytes) {
|
||||
return;
|
||||
}
|
||||
_flags |= DIRTY;
|
||||
// Find the starting page
|
||||
Size startPage = (offset / _pageSize);
|
||||
// Non-zero byte count, so at least one page is dirty
|
||||
Size pageCount = 1;
|
||||
// How much of the page is after the offset?
|
||||
Size remainder = _pageSize - (offset % _pageSize);
|
||||
// If there are more bytes than page space remaining, we need to increase the page count
|
||||
if (bytes > remainder) {
|
||||
// Get rid of the amount that will fit in the current page
|
||||
bytes -= remainder;
|
||||
|
||||
pageCount += (bytes / _pageSize);
|
||||
if (bytes % _pageSize) {
|
||||
++pageCount;
|
||||
}
|
||||
}
|
||||
|
||||
// Mark the pages dirty
|
||||
for (Size i = 0; i < pageCount; ++i) {
|
||||
_pages[i + startPage] |= DIRTY;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Buffer::Size Buffer::setData(Size size, const Byte* data) {
|
||||
auto prevSize = editSysmem().getSize();
|
||||
auto newSize = editSysmem().setData(size, data);
|
||||
Buffer::updateBufferCPUMemoryUsage(prevSize, newSize);
|
||||
return newSize;
|
||||
resize(size);
|
||||
setSubData(0, size, data);
|
||||
return _end;
|
||||
}
|
||||
|
||||
Buffer::Size Buffer::setSubData(Size offset, Size size, const Byte* data) {
|
||||
return editSysmem().setSubData( offset, size, data);
|
||||
auto changedBytes = editSysmem().setSubData(offset, size, data);
|
||||
if (changedBytes) {
|
||||
dirtyPages(offset, changedBytes);
|
||||
}
|
||||
return changedBytes;
|
||||
}
|
||||
|
||||
Buffer::Size Buffer::append(Size size, const Byte* data) {
|
||||
auto prevSize = editSysmem().getSize();
|
||||
auto newSize = editSysmem().append( size, data);
|
||||
Buffer::updateBufferCPUMemoryUsage(prevSize, newSize);
|
||||
return newSize;
|
||||
auto offset = _end;
|
||||
resize(_end + size);
|
||||
setSubData(offset, size, data);
|
||||
return _end;
|
||||
}
|
||||
|
||||
Buffer::Size Buffer::getSize() const {
|
||||
Q_ASSERT(getSysmem().getSize() >= _end);
|
||||
return _end;
|
||||
}
|
||||
|
||||
Buffer::Size Buffer::getRequiredPageCount() const {
|
||||
Size result = _end / _pageSize;
|
||||
if (_end % _pageSize) {
|
||||
++result;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
const Element BufferView::DEFAULT_ELEMENT = Element( gpu::SCALAR, gpu::UINT8, gpu::RAW );
|
||||
|
|
|
@ -88,10 +88,10 @@ protected:
|
|||
// Access the byte array.
|
||||
// The edit version allow to map data.
|
||||
const Byte* readData() const { return _data; }
|
||||
Byte* editData() { _stamp++; return _data; }
|
||||
Byte* editData() { return _data; }
|
||||
|
||||
template< typename T > const T* read() const { return reinterpret_cast< T* > ( _data ); }
|
||||
template< typename T > T* edit() { _stamp++; return reinterpret_cast< T* > ( _data ); }
|
||||
template< typename T > T* edit() { return reinterpret_cast< T* > ( _data ); }
|
||||
|
||||
// Access the current version of the sysmem, used to compare if copies are in sync
|
||||
Stamp getStamp() const { return _stamp; }
|
||||
|
@ -102,9 +102,9 @@ protected:
|
|||
bool isAvailable() const { return (_data != 0); }
|
||||
|
||||
private:
|
||||
Stamp _stamp;
|
||||
Size _size;
|
||||
Byte* _data;
|
||||
Stamp _stamp { 0 };
|
||||
Size _size { 0 };
|
||||
Byte* _data { nullptr };
|
||||
};
|
||||
|
||||
};
|
||||
|
@ -115,19 +115,26 @@ class Buffer : public Resource {
|
|||
static void updateBufferCPUMemoryUsage(Size prevObjectSize, Size newObjectSize);
|
||||
|
||||
public:
|
||||
enum Flag {
|
||||
DIRTY = 0x01,
|
||||
};
|
||||
|
||||
// Currently only one flag... 'dirty'
|
||||
using PageFlags = std::vector<uint8_t>;
|
||||
static const Size DEFAULT_PAGE_SIZE = 4096;
|
||||
static uint32_t getBufferCPUCount();
|
||||
static Size getBufferCPUMemoryUsage();
|
||||
static uint32_t getBufferGPUCount();
|
||||
static Size getBufferGPUMemoryUsage();
|
||||
|
||||
Buffer();
|
||||
Buffer(Size size, const Byte* bytes);
|
||||
Buffer(Size pageSize = DEFAULT_PAGE_SIZE);
|
||||
Buffer(Size size, const Byte* bytes, Size pageSize = DEFAULT_PAGE_SIZE);
|
||||
Buffer(const Buffer& buf); // deep copy of the sysmem buffer
|
||||
Buffer& operator=(const Buffer& buf); // deep copy of the sysmem buffer
|
||||
~Buffer();
|
||||
|
||||
// The size in bytes of data stored in the buffer
|
||||
Size getSize() const { return getSysmem().getSize(); }
|
||||
Size getSize() const;
|
||||
const Byte* getData() const { return getSysmem().readData(); }
|
||||
Byte* editData() { return editSysmem().editData(); }
|
||||
|
||||
|
@ -143,6 +150,20 @@ public:
|
|||
// \return the number of bytes copied
|
||||
Size setSubData(Size offset, Size size, const Byte* data);
|
||||
|
||||
template <typename T>
|
||||
Size setSubData(Size index, const T& t) {
|
||||
Size offset = index * sizeof(T);
|
||||
Size size = sizeof(T);
|
||||
return setSubData(offset, size, reinterpret_cast<const Byte*>(&t));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Size setSubData(Size index, const std::vector<T>& t) {
|
||||
Size offset = index * sizeof(T);
|
||||
Size size = t.size() * sizeof(T);
|
||||
return setSubData(offset, size, reinterpret_cast<const Byte*>(&t[0]));
|
||||
}
|
||||
|
||||
// Append new data at the end of the current buffer
|
||||
// do a resize( size + getSize) and copy the new data
|
||||
// \return the number of bytes copied
|
||||
|
@ -158,15 +179,24 @@ public:
|
|||
return append(sizeof(T) * t.size(), reinterpret_cast<const Byte*>(&t[0]));
|
||||
}
|
||||
|
||||
// Access the sysmem object.
|
||||
const Sysmem& getSysmem() const { assert(_sysmem); return (*_sysmem); }
|
||||
Sysmem& editSysmem() { assert(_sysmem); return (*_sysmem); }
|
||||
|
||||
const GPUObjectPointer gpuObject {};
|
||||
|
||||
protected:
|
||||
// Access the sysmem object, limited to ourselves and GPUObject derived classes
|
||||
const Sysmem& getSysmem() const { return _sysmem; }
|
||||
Sysmem& editSysmem() { return _sysmem; }
|
||||
|
||||
Sysmem* _sysmem = NULL;
|
||||
Size getRequiredPageCount() const;
|
||||
void dirtyPages(Size offset, Size bytes);
|
||||
|
||||
Size _end { 0 };
|
||||
mutable uint8_t _flags;
|
||||
mutable PageFlags _pages;
|
||||
const Size _pageSize;
|
||||
Sysmem _sysmem;
|
||||
|
||||
// FIXME find a more generic way to do this.
|
||||
friend class GLBackend;
|
||||
};
|
||||
|
||||
typedef std::shared_ptr<Buffer> BufferPointer;
|
||||
|
|
Loading…
Reference in a new issue