Support partial CPU->GPU buffer transfers

This commit is contained in:
Brad Davis 2016-05-12 16:32:29 -07:00
parent a5f99ba370
commit eb84459f03
4 changed files with 254 additions and 110 deletions

View file

@ -65,15 +65,22 @@ public:
class GLBuffer : public GPUObject {
public:
Stamp _stamp;
GLuint _buffer;
GLuint _size;
const GLuint _buffer;
const GLuint _size;
const Stamp _stamp;
GLBuffer();
GLBuffer(const Buffer& buffer);
~GLBuffer();
void setSize(GLuint size);
void transfer(bool forceAll = false);
private:
bool getNextTransferBlock(GLintptr& outOffset, GLsizeiptr& outSize, size_t& currentPage) const;
// The owning texture
const Buffer& _gpuBuffer;
};
static GLBuffer* syncGPUObject(const Buffer& buffer);
static GLuint getBufferID(const Buffer& buffer);

View file

@ -12,11 +12,40 @@
using namespace gpu;
GLBackend::GLBuffer::GLBuffer() :
_stamp(0),
_buffer(0),
_size(0)
{
static std::once_flag check_dsa;
static bool DSA_SUPPORTED { false };
GLuint allocateSingleBuffer() {
std::call_once(check_dsa, [&] {
DSA_SUPPORTED = (GLEW_VERSION_4_5 || GLEW_ARB_direct_state_access);
});
GLuint result;
if (DSA_SUPPORTED) {
glCreateBuffers(1, &result);
} else {
glGenBuffers(1, &result);
}
return result;
}
GLBackend::GLBuffer::GLBuffer(const Buffer& buffer) :
_buffer(allocateSingleBuffer()),
_size(buffer._sysmem.getSize()),
_stamp(buffer._sysmem.getStamp()),
_gpuBuffer(buffer) {
(void)CHECK_GL_ERROR();
Backend::setGPUObject(buffer, this);
if (DSA_SUPPORTED) {
glNamedBufferStorage(_buffer, _size, nullptr, GL_DYNAMIC_STORAGE_BIT);
} else {
glBindBuffer(GL_ARRAY_BUFFER, _buffer);
if (GLEW_VERSION_4_4 || GLEW_ARB_buffer_storage) {
glBufferStorage(GL_ARRAY_BUFFER, _size, nullptr, GL_DYNAMIC_STORAGE_BIT);
} else {
glBufferData(GL_ARRAY_BUFFER, buffer.getSysmem().getSize(), buffer.getSysmem().readData(), GL_DYNAMIC_DRAW);
}
glBindBuffer(GL_ARRAY_BUFFER, 0);
}
Backend::incrementBufferGPUCount();
}
@ -28,37 +57,99 @@ GLBackend::GLBuffer::~GLBuffer() {
Backend::decrementBufferGPUCount();
}
void GLBackend::GLBuffer::setSize(GLuint size) {
Backend::updateBufferGPUMemoryUsage(_size, size);
_size = size;
void GLBackend::GLBuffer::transfer(bool forceAll) {
const auto& pageFlags = _gpuBuffer._pages;
if (!forceAll) {
size_t transitions = 0;
if (pageFlags.size()) {
bool lastDirty = (0 != (pageFlags[0] & Buffer::DIRTY));
for (size_t i = 1; i < pageFlags.size(); ++i) {
bool newDirty = (0 != (pageFlags[0] & Buffer::DIRTY));
if (newDirty != lastDirty) {
++transitions;
lastDirty = newDirty;
}
}
}
// If there are no transitions (implying the whole buffer is dirty)
// or more than 20 transitions, then just transfer the whole buffer
if (transitions == 0 || transitions > 20) {
forceAll = true;
}
}
// Are we transferring the whole buffer?
if (forceAll) {
if (DSA_SUPPORTED) {
glNamedBufferSubData(_buffer, 0, _size, _gpuBuffer.getSysmem().readData());
} else {
// Now let's update the content of the bo with the sysmem version
// TODO: in the future, be smarter about when to actually upload the glBO version based on the data that did change
//if () {
glBindBuffer(GL_ARRAY_BUFFER, _buffer);
glBufferData(GL_ARRAY_BUFFER, _gpuBuffer.getSysmem().getSize(), _gpuBuffer.getSysmem().readData(), GL_DYNAMIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
}
} else {
if (!DSA_SUPPORTED) {
glBindBuffer(GL_ARRAY_BUFFER, _buffer);
}
GLintptr offset;
GLsizeiptr size;
size_t currentPage { 0 };
auto data = _gpuBuffer.getSysmem().readData();
while (getNextTransferBlock(offset, size, currentPage)) {
if (DSA_SUPPORTED) {
glNamedBufferSubData(_buffer, offset, size, data + offset);
} else {
glBufferSubData(GL_ARRAY_BUFFER, offset, size, data + offset);
}
}
if (!DSA_SUPPORTED) {
glBindBuffer(GL_ARRAY_BUFFER, 0);
}
}
_gpuBuffer._flags &= ~Buffer::DIRTY;
(void)CHECK_GL_ERROR();
}
bool GLBackend::GLBuffer::getNextTransferBlock(GLintptr& outOffset, GLsizeiptr& outSize, size_t& currentPage) const {
size_t pageCount = _gpuBuffer._pages.size();
// Advance to the first dirty page
while (currentPage < pageCount && (0 == (Buffer::DIRTY & _gpuBuffer._pages[currentPage]))) {
++currentPage;
}
// If we got to the end, we're done
if (currentPage >= pageCount) {
return false;
}
// Advance to the next clean page
outOffset = static_cast<GLintptr>(currentPage * _gpuBuffer._pageSize);
while (currentPage < pageCount && (0 != (Buffer::DIRTY & _gpuBuffer._pages[currentPage]))) {
++currentPage;
}
outSize = static_cast<GLsizeiptr>((currentPage * _gpuBuffer._pageSize) - outOffset);
return true;
}
GLBackend::GLBuffer* GLBackend::syncGPUObject(const Buffer& buffer) {
GLBuffer* object = Backend::getGPUObject<GLBackend::GLBuffer>(buffer);
if (object && (object->_stamp == buffer.getSysmem().getStamp())) {
return object;
bool forceTransferAll = false;
// Has the storage size changed?
if (!object || object->_stamp != buffer.getSysmem().getStamp()) {
object = new GLBuffer(buffer);
forceTransferAll = true;
}
// need to have a gpu object?
if (!object) {
object = new GLBuffer();
glGenBuffers(1, &object->_buffer);
(void) CHECK_GL_ERROR();
Backend::setGPUObject(buffer, object);
if (forceTransferAll || (0 != (buffer._flags & Buffer::DIRTY))) {
object->transfer(forceTransferAll);
}
// Now let's update the content of the bo with the sysmem version
// TODO: in the future, be smarter about when to actually upload the glBO version based on the data that did change
//if () {
glBindBuffer(GL_ARRAY_BUFFER, object->_buffer);
glBufferData(GL_ARRAY_BUFFER, buffer.getSysmem().getSize(), buffer.getSysmem().readData(), GL_DYNAMIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
object->_stamp = buffer.getSysmem().getStamp();
object->setSize((GLuint)buffer.getSysmem().getSize());
//}
(void) CHECK_GL_ERROR();
return object;
}

View file

@ -109,40 +109,18 @@ void Resource::Sysmem::deallocateMemory(Byte* dataAllocated, Size size) {
}
}
Resource::Sysmem::Sysmem() :
_stamp(0),
_size(0),
_data(NULL)
{
}
Resource::Sysmem::Sysmem() {}
Resource::Sysmem::Sysmem(Size size, const Byte* bytes) :
_stamp(0),
_size(0),
_data(NULL)
{
if (size > 0) {
_size = allocateMemory(&_data, size);
if (_size >= size) {
if (bytes) {
memcpy(_data, bytes, size);
}
}
Resource::Sysmem::Sysmem(Size size, const Byte* bytes) {
if (size > 0 && bytes) {
setData(_size, bytes);
}
}
Resource::Sysmem::Sysmem(const Sysmem& sysmem) :
_stamp(0),
_size(0),
_data(NULL)
{
Resource::Sysmem::Sysmem(const Sysmem& sysmem) {
if (sysmem.getSize() > 0) {
_size = allocateMemory(&_data, sysmem.getSize());
if (_size >= sysmem.getSize()) {
if (sysmem.readData()) {
memcpy(_data, sysmem.readData(), sysmem.getSize());
}
}
allocate(sysmem._size);
setData(_size, sysmem._data);
}
}
@ -208,7 +186,6 @@ Resource::Size Resource::Sysmem::setData( Size size, const Byte* bytes ) {
if (allocate(size) == size) {
if (size && bytes) {
memcpy( _data, bytes, _size );
_stamp++;
}
}
return _size;
@ -217,7 +194,6 @@ Resource::Size Resource::Sysmem::setData( Size size, const Byte* bytes ) {
Resource::Size Resource::Sysmem::setSubData( Size offset, Size size, const Byte* bytes) {
if (size && ((offset + size) <= getSize()) && bytes) {
memcpy( _data + offset, bytes, size );
_stamp++;
return size;
}
return 0;
@ -264,65 +240,105 @@ Buffer::Size Buffer::getBufferGPUMemoryUsage() {
return Context::getBufferGPUMemoryUsage();
}
Buffer::Buffer() :
Resource(),
_sysmem(new Sysmem()) {
Buffer::Buffer(Size pageSize) :
_pageSize(pageSize) {
_bufferCPUCount++;
}
Buffer::Buffer(Size size, const Byte* bytes) :
Resource(),
_sysmem(new Sysmem(size, bytes)) {
_bufferCPUCount++;
Buffer::updateBufferCPUMemoryUsage(0, _sysmem->getSize());
Buffer::Buffer(Size size, const Byte* bytes, Size pageSize) : Buffer(pageSize) {
setData(size, bytes);
}
Buffer::Buffer(const Buffer& buf) :
Resource(),
_sysmem(new Sysmem(buf.getSysmem())) {
_bufferCPUCount++;
Buffer::updateBufferCPUMemoryUsage(0, _sysmem->getSize());
Buffer::Buffer(const Buffer& buf) : Buffer(buf._pageSize) {
setData(buf.getSize(), buf.getData());
}
Buffer& Buffer::operator=(const Buffer& buf) {
(*_sysmem) = buf.getSysmem();
const_cast<Size&>(_pageSize) = buf._pageSize;
setData(buf.getSize(), buf.getData());
return (*this);
}
Buffer::~Buffer() {
_bufferCPUCount--;
if (_sysmem) {
Buffer::updateBufferCPUMemoryUsage(_sysmem->getSize(), 0);
delete _sysmem;
_sysmem = NULL;
}
Buffer::updateBufferCPUMemoryUsage(_sysmem.getSize(), 0);
}
Buffer::Size Buffer::resize(Size size) {
_end = size;
auto prevSize = editSysmem().getSize();
auto newSize = editSysmem().resize(size);
Buffer::updateBufferCPUMemoryUsage(prevSize, newSize);
return newSize;
if (prevSize < size) {
auto newPages = getRequiredPageCount();
auto newSize = newPages * _pageSize;
editSysmem().resize(size);
// All new pages start off as clean, because they haven't been populated by data
_pages.resize(newPages, 0);
Buffer::updateBufferCPUMemoryUsage(prevSize, newSize);
}
return _end;
}
void Buffer::dirtyPages(Size offset, Size bytes) {
if (!bytes) {
return;
}
_flags |= DIRTY;
// Find the starting page
Size startPage = (offset / _pageSize);
// Non-zero byte count, so at least one page is dirty
Size pageCount = 1;
// How much of the page is after the offset?
Size remainder = _pageSize - (offset % _pageSize);
// If there are more bytes than page space remaining, we need to increase the page count
if (bytes > remainder) {
// Get rid of the amount that will fit in the current page
bytes -= remainder;
pageCount += (bytes / _pageSize);
if (bytes % _pageSize) {
++pageCount;
}
}
// Mark the pages dirty
for (Size i = 0; i < pageCount; ++i) {
_pages[i + startPage] |= DIRTY;
}
}
Buffer::Size Buffer::setData(Size size, const Byte* data) {
auto prevSize = editSysmem().getSize();
auto newSize = editSysmem().setData(size, data);
Buffer::updateBufferCPUMemoryUsage(prevSize, newSize);
return newSize;
resize(size);
setSubData(0, size, data);
return _end;
}
Buffer::Size Buffer::setSubData(Size offset, Size size, const Byte* data) {
return editSysmem().setSubData( offset, size, data);
auto changedBytes = editSysmem().setSubData(offset, size, data);
if (changedBytes) {
dirtyPages(offset, changedBytes);
}
return changedBytes;
}
Buffer::Size Buffer::append(Size size, const Byte* data) {
auto prevSize = editSysmem().getSize();
auto newSize = editSysmem().append( size, data);
Buffer::updateBufferCPUMemoryUsage(prevSize, newSize);
return newSize;
auto offset = _end;
resize(_end + size);
setSubData(offset, size, data);
return _end;
}
Buffer::Size Buffer::getSize() const {
Q_ASSERT(getSysmem().getSize() >= _end);
return _end;
}
Buffer::Size Buffer::getRequiredPageCount() const {
Size result = _end / _pageSize;
if (_end % _pageSize) {
++result;
}
return result;
}
const Element BufferView::DEFAULT_ELEMENT = Element( gpu::SCALAR, gpu::UINT8, gpu::RAW );

View file

@ -88,10 +88,10 @@ protected:
// Access the byte array.
// The edit version allow to map data.
const Byte* readData() const { return _data; }
Byte* editData() { _stamp++; return _data; }
Byte* editData() { return _data; }
template< typename T > const T* read() const { return reinterpret_cast< T* > ( _data ); }
template< typename T > T* edit() { _stamp++; return reinterpret_cast< T* > ( _data ); }
template< typename T > T* edit() { return reinterpret_cast< T* > ( _data ); }
// Access the current version of the sysmem, used to compare if copies are in sync
Stamp getStamp() const { return _stamp; }
@ -102,9 +102,9 @@ protected:
bool isAvailable() const { return (_data != 0); }
private:
Stamp _stamp;
Size _size;
Byte* _data;
Stamp _stamp { 0 };
Size _size { 0 };
Byte* _data { nullptr };
};
};
@ -115,19 +115,26 @@ class Buffer : public Resource {
static void updateBufferCPUMemoryUsage(Size prevObjectSize, Size newObjectSize);
public:
enum Flag {
DIRTY = 0x01,
};
// Currently only one flag... 'dirty'
using PageFlags = std::vector<uint8_t>;
static const Size DEFAULT_PAGE_SIZE = 4096;
static uint32_t getBufferCPUCount();
static Size getBufferCPUMemoryUsage();
static uint32_t getBufferGPUCount();
static Size getBufferGPUMemoryUsage();
Buffer();
Buffer(Size size, const Byte* bytes);
Buffer(Size pageSize = DEFAULT_PAGE_SIZE);
Buffer(Size size, const Byte* bytes, Size pageSize = DEFAULT_PAGE_SIZE);
Buffer(const Buffer& buf); // deep copy of the sysmem buffer
Buffer& operator=(const Buffer& buf); // deep copy of the sysmem buffer
~Buffer();
// The size in bytes of data stored in the buffer
Size getSize() const { return getSysmem().getSize(); }
Size getSize() const;
const Byte* getData() const { return getSysmem().readData(); }
Byte* editData() { return editSysmem().editData(); }
@ -143,6 +150,20 @@ public:
// \return the number of bytes copied
Size setSubData(Size offset, Size size, const Byte* data);
template <typename T>
Size setSubData(Size index, const T& t) {
Size offset = index * sizeof(T);
Size size = sizeof(T);
return setSubData(offset, size, reinterpret_cast<const Byte*>(&t));
}
template <typename T>
Size setSubData(Size index, const std::vector<T>& t) {
Size offset = index * sizeof(T);
Size size = t.size() * sizeof(T);
return setSubData(offset, size, reinterpret_cast<const Byte*>(&t[0]));
}
// Append new data at the end of the current buffer
// do a resize( size + getSize) and copy the new data
// \return the number of bytes copied
@ -158,15 +179,24 @@ public:
return append(sizeof(T) * t.size(), reinterpret_cast<const Byte*>(&t[0]));
}
// Access the sysmem object.
const Sysmem& getSysmem() const { assert(_sysmem); return (*_sysmem); }
Sysmem& editSysmem() { assert(_sysmem); return (*_sysmem); }
const GPUObjectPointer gpuObject {};
protected:
// Access the sysmem object, limited to ourselves and GPUObject derived classes
const Sysmem& getSysmem() const { return _sysmem; }
Sysmem& editSysmem() { return _sysmem; }
Sysmem* _sysmem = NULL;
Size getRequiredPageCount() const;
void dirtyPages(Size offset, Size bytes);
Size _end { 0 };
mutable uint8_t _flags;
mutable PageFlags _pages;
const Size _pageSize;
Sysmem _sysmem;
// FIXME find a more generic way to do this.
friend class GLBackend;
};
typedef std::shared_ptr<Buffer> BufferPointer;