Support partial CPU->GPU buffer transfers

This commit is contained in:
Brad Davis 2016-05-12 16:32:29 -07:00
parent a5f99ba370
commit eb84459f03
4 changed files with 254 additions and 110 deletions

View file

@ -65,15 +65,22 @@ public:
class GLBuffer : public GPUObject { class GLBuffer : public GPUObject {
public: public:
Stamp _stamp; const GLuint _buffer;
GLuint _buffer; const GLuint _size;
GLuint _size; const Stamp _stamp;
GLBuffer(); GLBuffer(const Buffer& buffer);
~GLBuffer(); ~GLBuffer();
void setSize(GLuint size); void transfer(bool forceAll = false);
private:
bool getNextTransferBlock(GLintptr& outOffset, GLsizeiptr& outSize, size_t& currentPage) const;
// The owning texture
const Buffer& _gpuBuffer;
}; };
static GLBuffer* syncGPUObject(const Buffer& buffer); static GLBuffer* syncGPUObject(const Buffer& buffer);
static GLuint getBufferID(const Buffer& buffer); static GLuint getBufferID(const Buffer& buffer);

View file

@ -12,14 +12,43 @@
using namespace gpu; using namespace gpu;
GLBackend::GLBuffer::GLBuffer() : static std::once_flag check_dsa;
_stamp(0), static bool DSA_SUPPORTED { false };
_buffer(0),
_size(0) GLuint allocateSingleBuffer() {
{ std::call_once(check_dsa, [&] {
Backend::incrementBufferGPUCount(); DSA_SUPPORTED = (GLEW_VERSION_4_5 || GLEW_ARB_direct_state_access);
});
GLuint result;
if (DSA_SUPPORTED) {
glCreateBuffers(1, &result);
} else {
glGenBuffers(1, &result);
}
return result;
} }
GLBackend::GLBuffer::GLBuffer(const Buffer& buffer) :
_buffer(allocateSingleBuffer()),
_size(buffer._sysmem.getSize()),
_stamp(buffer._sysmem.getStamp()),
_gpuBuffer(buffer) {
(void)CHECK_GL_ERROR();
Backend::setGPUObject(buffer, this);
if (DSA_SUPPORTED) {
glNamedBufferStorage(_buffer, _size, nullptr, GL_DYNAMIC_STORAGE_BIT);
} else {
glBindBuffer(GL_ARRAY_BUFFER, _buffer);
if (GLEW_VERSION_4_4 || GLEW_ARB_buffer_storage) {
glBufferStorage(GL_ARRAY_BUFFER, _size, nullptr, GL_DYNAMIC_STORAGE_BIT);
} else {
glBufferData(GL_ARRAY_BUFFER, buffer.getSysmem().getSize(), buffer.getSysmem().readData(), GL_DYNAMIC_DRAW);
}
glBindBuffer(GL_ARRAY_BUFFER, 0);
}
Backend::incrementBufferGPUCount();
}
GLBackend::GLBuffer::~GLBuffer() { GLBackend::GLBuffer::~GLBuffer() {
if (_buffer != 0) { if (_buffer != 0) {
glDeleteBuffers(1, &_buffer); glDeleteBuffers(1, &_buffer);
@ -28,37 +57,99 @@ GLBackend::GLBuffer::~GLBuffer() {
Backend::decrementBufferGPUCount(); Backend::decrementBufferGPUCount();
} }
void GLBackend::GLBuffer::setSize(GLuint size) { void GLBackend::GLBuffer::transfer(bool forceAll) {
Backend::updateBufferGPUMemoryUsage(_size, size); const auto& pageFlags = _gpuBuffer._pages;
_size = size; if (!forceAll) {
size_t transitions = 0;
if (pageFlags.size()) {
bool lastDirty = (0 != (pageFlags[0] & Buffer::DIRTY));
for (size_t i = 1; i < pageFlags.size(); ++i) {
bool newDirty = (0 != (pageFlags[0] & Buffer::DIRTY));
if (newDirty != lastDirty) {
++transitions;
lastDirty = newDirty;
}
}
}
// If there are no transitions (implying the whole buffer is dirty)
// or more than 20 transitions, then just transfer the whole buffer
if (transitions == 0 || transitions > 20) {
forceAll = true;
}
}
// Are we transferring the whole buffer?
if (forceAll) {
if (DSA_SUPPORTED) {
glNamedBufferSubData(_buffer, 0, _size, _gpuBuffer.getSysmem().readData());
} else {
// Now let's update the content of the bo with the sysmem version
// TODO: in the future, be smarter about when to actually upload the glBO version based on the data that did change
//if () {
glBindBuffer(GL_ARRAY_BUFFER, _buffer);
glBufferData(GL_ARRAY_BUFFER, _gpuBuffer.getSysmem().getSize(), _gpuBuffer.getSysmem().readData(), GL_DYNAMIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
}
} else {
if (!DSA_SUPPORTED) {
glBindBuffer(GL_ARRAY_BUFFER, _buffer);
}
GLintptr offset;
GLsizeiptr size;
size_t currentPage { 0 };
auto data = _gpuBuffer.getSysmem().readData();
while (getNextTransferBlock(offset, size, currentPage)) {
if (DSA_SUPPORTED) {
glNamedBufferSubData(_buffer, offset, size, data + offset);
} else {
glBufferSubData(GL_ARRAY_BUFFER, offset, size, data + offset);
}
}
if (!DSA_SUPPORTED) {
glBindBuffer(GL_ARRAY_BUFFER, 0);
}
}
_gpuBuffer._flags &= ~Buffer::DIRTY;
(void)CHECK_GL_ERROR();
}
bool GLBackend::GLBuffer::getNextTransferBlock(GLintptr& outOffset, GLsizeiptr& outSize, size_t& currentPage) const {
size_t pageCount = _gpuBuffer._pages.size();
// Advance to the first dirty page
while (currentPage < pageCount && (0 == (Buffer::DIRTY & _gpuBuffer._pages[currentPage]))) {
++currentPage;
}
// If we got to the end, we're done
if (currentPage >= pageCount) {
return false;
}
// Advance to the next clean page
outOffset = static_cast<GLintptr>(currentPage * _gpuBuffer._pageSize);
while (currentPage < pageCount && (0 != (Buffer::DIRTY & _gpuBuffer._pages[currentPage]))) {
++currentPage;
}
outSize = static_cast<GLsizeiptr>((currentPage * _gpuBuffer._pageSize) - outOffset);
return true;
} }
GLBackend::GLBuffer* GLBackend::syncGPUObject(const Buffer& buffer) { GLBackend::GLBuffer* GLBackend::syncGPUObject(const Buffer& buffer) {
GLBuffer* object = Backend::getGPUObject<GLBackend::GLBuffer>(buffer); GLBuffer* object = Backend::getGPUObject<GLBackend::GLBuffer>(buffer);
if (object && (object->_stamp == buffer.getSysmem().getStamp())) { bool forceTransferAll = false;
return object; // Has the storage size changed?
if (!object || object->_stamp != buffer.getSysmem().getStamp()) {
object = new GLBuffer(buffer);
forceTransferAll = true;
} }
// need to have a gpu object? if (forceTransferAll || (0 != (buffer._flags & Buffer::DIRTY))) {
if (!object) { object->transfer(forceTransferAll);
object = new GLBuffer();
glGenBuffers(1, &object->_buffer);
(void) CHECK_GL_ERROR();
Backend::setGPUObject(buffer, object);
} }
// Now let's update the content of the bo with the sysmem version
// TODO: in the future, be smarter about when to actually upload the glBO version based on the data that did change
//if () {
glBindBuffer(GL_ARRAY_BUFFER, object->_buffer);
glBufferData(GL_ARRAY_BUFFER, buffer.getSysmem().getSize(), buffer.getSysmem().readData(), GL_DYNAMIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
object->_stamp = buffer.getSysmem().getStamp();
object->setSize((GLuint)buffer.getSysmem().getSize());
//}
(void) CHECK_GL_ERROR();
return object; return object;
} }

View file

@ -109,40 +109,18 @@ void Resource::Sysmem::deallocateMemory(Byte* dataAllocated, Size size) {
} }
} }
Resource::Sysmem::Sysmem() : Resource::Sysmem::Sysmem() {}
_stamp(0),
_size(0),
_data(NULL)
{
}
Resource::Sysmem::Sysmem(Size size, const Byte* bytes) : Resource::Sysmem::Sysmem(Size size, const Byte* bytes) {
_stamp(0), if (size > 0 && bytes) {
_size(0), setData(_size, bytes);
_data(NULL)
{
if (size > 0) {
_size = allocateMemory(&_data, size);
if (_size >= size) {
if (bytes) {
memcpy(_data, bytes, size);
}
}
} }
} }
Resource::Sysmem::Sysmem(const Sysmem& sysmem) : Resource::Sysmem::Sysmem(const Sysmem& sysmem) {
_stamp(0),
_size(0),
_data(NULL)
{
if (sysmem.getSize() > 0) { if (sysmem.getSize() > 0) {
_size = allocateMemory(&_data, sysmem.getSize()); allocate(sysmem._size);
if (_size >= sysmem.getSize()) { setData(_size, sysmem._data);
if (sysmem.readData()) {
memcpy(_data, sysmem.readData(), sysmem.getSize());
}
}
} }
} }
@ -208,7 +186,6 @@ Resource::Size Resource::Sysmem::setData( Size size, const Byte* bytes ) {
if (allocate(size) == size) { if (allocate(size) == size) {
if (size && bytes) { if (size && bytes) {
memcpy( _data, bytes, _size ); memcpy( _data, bytes, _size );
_stamp++;
} }
} }
return _size; return _size;
@ -217,7 +194,6 @@ Resource::Size Resource::Sysmem::setData( Size size, const Byte* bytes ) {
Resource::Size Resource::Sysmem::setSubData( Size offset, Size size, const Byte* bytes) { Resource::Size Resource::Sysmem::setSubData( Size offset, Size size, const Byte* bytes) {
if (size && ((offset + size) <= getSize()) && bytes) { if (size && ((offset + size) <= getSize()) && bytes) {
memcpy( _data + offset, bytes, size ); memcpy( _data + offset, bytes, size );
_stamp++;
return size; return size;
} }
return 0; return 0;
@ -264,65 +240,105 @@ Buffer::Size Buffer::getBufferGPUMemoryUsage() {
return Context::getBufferGPUMemoryUsage(); return Context::getBufferGPUMemoryUsage();
} }
Buffer::Buffer() : Buffer::Buffer(Size pageSize) :
Resource(), _pageSize(pageSize) {
_sysmem(new Sysmem()) {
_bufferCPUCount++; _bufferCPUCount++;
} }
Buffer::Buffer(Size size, const Byte* bytes) : Buffer::Buffer(Size size, const Byte* bytes, Size pageSize) : Buffer(pageSize) {
Resource(), setData(size, bytes);
_sysmem(new Sysmem(size, bytes)) {
_bufferCPUCount++;
Buffer::updateBufferCPUMemoryUsage(0, _sysmem->getSize());
} }
Buffer::Buffer(const Buffer& buf) : Buffer::Buffer(const Buffer& buf) : Buffer(buf._pageSize) {
Resource(), setData(buf.getSize(), buf.getData());
_sysmem(new Sysmem(buf.getSysmem())) {
_bufferCPUCount++;
Buffer::updateBufferCPUMemoryUsage(0, _sysmem->getSize());
} }
Buffer& Buffer::operator=(const Buffer& buf) { Buffer& Buffer::operator=(const Buffer& buf) {
(*_sysmem) = buf.getSysmem(); const_cast<Size&>(_pageSize) = buf._pageSize;
setData(buf.getSize(), buf.getData());
return (*this); return (*this);
} }
Buffer::~Buffer() { Buffer::~Buffer() {
_bufferCPUCount--; _bufferCPUCount--;
Buffer::updateBufferCPUMemoryUsage(_sysmem.getSize(), 0);
if (_sysmem) {
Buffer::updateBufferCPUMemoryUsage(_sysmem->getSize(), 0);
delete _sysmem;
_sysmem = NULL;
}
} }
Buffer::Size Buffer::resize(Size size) { Buffer::Size Buffer::resize(Size size) {
_end = size;
auto prevSize = editSysmem().getSize(); auto prevSize = editSysmem().getSize();
auto newSize = editSysmem().resize(size); if (prevSize < size) {
Buffer::updateBufferCPUMemoryUsage(prevSize, newSize); auto newPages = getRequiredPageCount();
return newSize; auto newSize = newPages * _pageSize;
editSysmem().resize(size);
// All new pages start off as clean, because they haven't been populated by data
_pages.resize(newPages, 0);
Buffer::updateBufferCPUMemoryUsage(prevSize, newSize);
}
return _end;
} }
void Buffer::dirtyPages(Size offset, Size bytes) {
if (!bytes) {
return;
}
_flags |= DIRTY;
// Find the starting page
Size startPage = (offset / _pageSize);
// Non-zero byte count, so at least one page is dirty
Size pageCount = 1;
// How much of the page is after the offset?
Size remainder = _pageSize - (offset % _pageSize);
// If there are more bytes than page space remaining, we need to increase the page count
if (bytes > remainder) {
// Get rid of the amount that will fit in the current page
bytes -= remainder;
pageCount += (bytes / _pageSize);
if (bytes % _pageSize) {
++pageCount;
}
}
// Mark the pages dirty
for (Size i = 0; i < pageCount; ++i) {
_pages[i + startPage] |= DIRTY;
}
}
Buffer::Size Buffer::setData(Size size, const Byte* data) { Buffer::Size Buffer::setData(Size size, const Byte* data) {
auto prevSize = editSysmem().getSize(); resize(size);
auto newSize = editSysmem().setData(size, data); setSubData(0, size, data);
Buffer::updateBufferCPUMemoryUsage(prevSize, newSize); return _end;
return newSize;
} }
Buffer::Size Buffer::setSubData(Size offset, Size size, const Byte* data) { Buffer::Size Buffer::setSubData(Size offset, Size size, const Byte* data) {
return editSysmem().setSubData( offset, size, data); auto changedBytes = editSysmem().setSubData(offset, size, data);
if (changedBytes) {
dirtyPages(offset, changedBytes);
}
return changedBytes;
} }
Buffer::Size Buffer::append(Size size, const Byte* data) { Buffer::Size Buffer::append(Size size, const Byte* data) {
auto prevSize = editSysmem().getSize(); auto offset = _end;
auto newSize = editSysmem().append( size, data); resize(_end + size);
Buffer::updateBufferCPUMemoryUsage(prevSize, newSize); setSubData(offset, size, data);
return newSize; return _end;
}
Buffer::Size Buffer::getSize() const {
Q_ASSERT(getSysmem().getSize() >= _end);
return _end;
}
Buffer::Size Buffer::getRequiredPageCount() const {
Size result = _end / _pageSize;
if (_end % _pageSize) {
++result;
}
return result;
} }
const Element BufferView::DEFAULT_ELEMENT = Element( gpu::SCALAR, gpu::UINT8, gpu::RAW ); const Element BufferView::DEFAULT_ELEMENT = Element( gpu::SCALAR, gpu::UINT8, gpu::RAW );

View file

@ -88,10 +88,10 @@ protected:
// Access the byte array. // Access the byte array.
// The edit version allow to map data. // The edit version allow to map data.
const Byte* readData() const { return _data; } const Byte* readData() const { return _data; }
Byte* editData() { _stamp++; return _data; } Byte* editData() { return _data; }
template< typename T > const T* read() const { return reinterpret_cast< T* > ( _data ); } template< typename T > const T* read() const { return reinterpret_cast< T* > ( _data ); }
template< typename T > T* edit() { _stamp++; return reinterpret_cast< T* > ( _data ); } template< typename T > T* edit() { return reinterpret_cast< T* > ( _data ); }
// Access the current version of the sysmem, used to compare if copies are in sync // Access the current version of the sysmem, used to compare if copies are in sync
Stamp getStamp() const { return _stamp; } Stamp getStamp() const { return _stamp; }
@ -102,9 +102,9 @@ protected:
bool isAvailable() const { return (_data != 0); } bool isAvailable() const { return (_data != 0); }
private: private:
Stamp _stamp; Stamp _stamp { 0 };
Size _size; Size _size { 0 };
Byte* _data; Byte* _data { nullptr };
}; };
}; };
@ -115,19 +115,26 @@ class Buffer : public Resource {
static void updateBufferCPUMemoryUsage(Size prevObjectSize, Size newObjectSize); static void updateBufferCPUMemoryUsage(Size prevObjectSize, Size newObjectSize);
public: public:
enum Flag {
DIRTY = 0x01,
};
// Currently only one flag... 'dirty'
using PageFlags = std::vector<uint8_t>;
static const Size DEFAULT_PAGE_SIZE = 4096;
static uint32_t getBufferCPUCount(); static uint32_t getBufferCPUCount();
static Size getBufferCPUMemoryUsage(); static Size getBufferCPUMemoryUsage();
static uint32_t getBufferGPUCount(); static uint32_t getBufferGPUCount();
static Size getBufferGPUMemoryUsage(); static Size getBufferGPUMemoryUsage();
Buffer(); Buffer(Size pageSize = DEFAULT_PAGE_SIZE);
Buffer(Size size, const Byte* bytes); Buffer(Size size, const Byte* bytes, Size pageSize = DEFAULT_PAGE_SIZE);
Buffer(const Buffer& buf); // deep copy of the sysmem buffer Buffer(const Buffer& buf); // deep copy of the sysmem buffer
Buffer& operator=(const Buffer& buf); // deep copy of the sysmem buffer Buffer& operator=(const Buffer& buf); // deep copy of the sysmem buffer
~Buffer(); ~Buffer();
// The size in bytes of data stored in the buffer // The size in bytes of data stored in the buffer
Size getSize() const { return getSysmem().getSize(); } Size getSize() const;
const Byte* getData() const { return getSysmem().readData(); } const Byte* getData() const { return getSysmem().readData(); }
Byte* editData() { return editSysmem().editData(); } Byte* editData() { return editSysmem().editData(); }
@ -143,6 +150,20 @@ public:
// \return the number of bytes copied // \return the number of bytes copied
Size setSubData(Size offset, Size size, const Byte* data); Size setSubData(Size offset, Size size, const Byte* data);
template <typename T>
Size setSubData(Size index, const T& t) {
Size offset = index * sizeof(T);
Size size = sizeof(T);
return setSubData(offset, size, reinterpret_cast<const Byte*>(&t));
}
template <typename T>
Size setSubData(Size index, const std::vector<T>& t) {
Size offset = index * sizeof(T);
Size size = t.size() * sizeof(T);
return setSubData(offset, size, reinterpret_cast<const Byte*>(&t[0]));
}
// Append new data at the end of the current buffer // Append new data at the end of the current buffer
// do a resize( size + getSize) and copy the new data // do a resize( size + getSize) and copy the new data
// \return the number of bytes copied // \return the number of bytes copied
@ -158,15 +179,24 @@ public:
return append(sizeof(T) * t.size(), reinterpret_cast<const Byte*>(&t[0])); return append(sizeof(T) * t.size(), reinterpret_cast<const Byte*>(&t[0]));
} }
// Access the sysmem object.
const Sysmem& getSysmem() const { assert(_sysmem); return (*_sysmem); }
Sysmem& editSysmem() { assert(_sysmem); return (*_sysmem); }
const GPUObjectPointer gpuObject {}; const GPUObjectPointer gpuObject {};
protected: protected:
// Access the sysmem object, limited to ourselves and GPUObject derived classes
const Sysmem& getSysmem() const { return _sysmem; }
Sysmem& editSysmem() { return _sysmem; }
Sysmem* _sysmem = NULL; Size getRequiredPageCount() const;
void dirtyPages(Size offset, Size bytes);
Size _end { 0 };
mutable uint8_t _flags;
mutable PageFlags _pages;
const Size _pageSize;
Sysmem _sysmem;
// FIXME find a more generic way to do this.
friend class GLBackend;
}; };
typedef std::shared_ptr<Buffer> BufferPointer; typedef std::shared_ptr<Buffer> BufferPointer;