3
0
Fork 0
mirror of https://github.com/lubosz/overte.git synced 2025-04-27 05:35:37 +02:00

First pass at new texture transfer logic

This commit is contained in:
Brad Davis 2017-02-14 17:58:41 -08:00
parent 75c17e89a2
commit 1f058f069e
3 changed files with 192 additions and 43 deletions

View file

@ -14,6 +14,7 @@
#include "../gl/GLBackend.h"
#include "../gl/GLTexture.h"
#include <thread>
#define INCREMENTAL_TRANSFER 0
@ -39,7 +40,7 @@ public:
GL45Texture(const std::weak_ptr<GLBackend>& backend, const Texture& texture);
void generateMips() const override;
void copyMipFaceFromTexture(uint16_t sourceMip, uint16_t targetMip, uint8_t face) const;
void copyMipFaceLinesFromTexture(uint16_t sourceMip, uint16_t targetMip, uint8_t face, uint32_t lineOffset, uint32_t lines, size_t dataOffset) const;
void copyMipFaceLinesFromTexture(uint16_t mip, uint8_t face, const uvec3& size, uint32_t yOffset, GLenum format, GLenum type, const void* sourcePointer) const;
virtual void syncSampler() const;
};
@ -95,14 +96,50 @@ public:
};
using QueuePair = std::pair<TextureWeakPointer, float>;
class QueuePairLess {
public:
struct QueuePairLess {
bool operator()(const QueuePair& a, const QueuePair& b) {
return a.second < b.second;
}
};
using WorkQueue = std::priority_queue<QueuePair, std::vector<QueuePair>, QueuePairLess>;
class TransferJob {
using VoidLambda = std::function<void()>;
using VoidLambdaQueue = std::queue<VoidLambda>;
using ThreadPointer = std::shared_ptr<std::thread>;
const GL45VariableAllocationTexture& _parent;
const uint16_t _sourceMip;
const uint16_t _targetMip;
const uint8_t _face;
const uint32_t _lines;
const uint32_t _lineOffset;
// Holds the contents to transfer to the GPU in CPU memory
std::vector<uint8_t> _buffer;
// Indicates if a transfer from backing storage to interal storage has started
bool _bufferingStarted { false };
bool _transferOnly { false };
bool _bufferingCompleted { false };
VoidLambda _transferLambda;
VoidLambda _bufferingLambda;
static ThreadPointer _bufferThread;
static Mutex _mutex;
static VoidLambdaQueue _bufferLambdaQueue;
static std::atomic<bool> _shutdownBufferingThread;
static void bufferLoop();
public:
TransferJob(const GL45VariableAllocationTexture& parent, std::function<void()> transferLambda);
TransferJob(const GL45VariableAllocationTexture& parent, uint16_t sourceMip, uint16_t targetMip, uint8_t face, uint32_t lines = 0, uint32_t lineOffset = 0);
bool tryTransfer();
static void startTransferLoop();
static void stopTransferLoop();
private:
void startBuffering();
void transfer();
};
using TransferQueue = std::queue<TransferJob>;
static MemoryPressureState _memoryPressureState;
protected:
static std::atomic<bool> _memoryPressureStateStale;
@ -110,6 +147,7 @@ public:
static WorkQueue _transferQueue;
static WorkQueue _promoteQueue;
static WorkQueue _demoteQueue;
static TexturePointer _currentTransferTexture;
static const uvec3 INITIAL_MIP_TRANSFER_DIMENSIONS;
@ -128,7 +166,7 @@ public:
bool canPromote() const { return _allocatedMip > 0; }
bool canDemote() const { return _allocatedMip < _maxAllocatedMip; }
bool hasPendingTransfers() const { return !_pendingTransfers.empty(); }
void executeNextTransfer();
void executeNextTransfer(const TexturePointer& currentTexture);
uint32 size() const override { return _size; }
virtual void populateTransferQueue() = 0;
virtual void promote() = 0;
@ -148,7 +186,7 @@ public:
// Contains a series of lambdas that when executed will transfer data to the GPU, modify
// the _populatedMip and update the sampler in order to fully populate the allocated texture
// until _populatedMip == _allocatedMip
std::queue<PromoteLambda> _pendingTransfers;
TransferQueue _pendingTransfers;
};
class GL45ResourceTexture : public GL45VariableAllocationTexture {

View file

@ -118,26 +118,17 @@ void GL45Texture::generateMips() const {
(void)CHECK_GL_ERROR();
}
void GL45Texture::copyMipFaceLinesFromTexture(uint16_t sourceMip, uint16_t targetMip, uint8_t face, uint32_t lineOffset, uint32_t lines, size_t dataOffset) const {
const auto& texture = _gpuObject;
if (!texture.isStoredMipFaceAvailable(sourceMip)) {
return;
}
auto mipDimensions = texture.evalMipDimensions(sourceMip);
glm::uvec3 size = { mipDimensions.x, lines, mipDimensions.z };
auto mipData = texture.accessStoredMipFace(sourceMip, face);
auto sourcePointer = mipData->readData() + dataOffset;
GLTexelFormat texelFormat = GLTexelFormat::evalGLTexelFormat(texture.getTexelFormat(), mipData->getFormat());
void GL45Texture::copyMipFaceLinesFromTexture(uint16_t mip, uint8_t face, const uvec3& size, uint32_t yOffset, GLenum format, GLenum type, const void* sourcePointer) const {
if (GL_TEXTURE_2D == _target) {
glTextureSubImage2D(_id, targetMip, 0, lineOffset, size.x, size.y, texelFormat.format, texelFormat.type, sourcePointer);
glTextureSubImage2D(_id, mip, 0, yOffset, size.x, size.y, format, type, sourcePointer);
} else if (GL_TEXTURE_CUBE_MAP == _target) {
// DSA ARB does not work on AMD, so use EXT
// unless EXT is not available on the driver
if (glTextureSubImage2DEXT) {
auto target = GLTexture::CUBE_FACE_LAYOUT[face];
glTextureSubImage2DEXT(_id, target, targetMip, 0, lineOffset, size.x, size.y, texelFormat.format, texelFormat.type, sourcePointer);
glTextureSubImage2DEXT(_id, target, mip, 0, yOffset, size.x, size.y, format, type, sourcePointer);
} else {
glTextureSubImage3D(_id, targetMip, 0, lineOffset, face, size.x, size.y, 1, texelFormat.format, texelFormat.type, sourcePointer);
glTextureSubImage3D(_id, mip, 0, yOffset, face, size.x, size.y, 1, format, type, sourcePointer);
}
} else {
Q_ASSERT(false);
@ -146,8 +137,13 @@ void GL45Texture::copyMipFaceLinesFromTexture(uint16_t sourceMip, uint16_t targe
}
void GL45Texture::copyMipFaceFromTexture(uint16_t sourceMip, uint16_t targetMip, uint8_t face) const {
if (!_gpuObject.isStoredMipFaceAvailable(sourceMip)) {
return;
}
auto size = _gpuObject.evalMipDimensions(sourceMip);
copyMipFaceLinesFromTexture(sourceMip, targetMip, face, 0, size.y, 0);
auto mipData = _gpuObject.accessStoredMipFace(sourceMip, face);
GLTexelFormat texelFormat = GLTexelFormat::evalGLTexelFormat(_gpuObject.getTexelFormat(), mipData->getFormat());
copyMipFaceLinesFromTexture(targetMip, face, size, 0, texelFormat.format, texelFormat.type, mipData->readData());
}
void GL45Texture::syncSampler() const {

View file

@ -39,6 +39,7 @@ const uvec3 GL45VariableAllocationTexture::INITIAL_MIP_TRANSFER_DIMENSIONS { 64,
WorkQueue GL45VariableAllocationTexture::_transferQueue;
WorkQueue GL45VariableAllocationTexture::_promoteQueue;
WorkQueue GL45VariableAllocationTexture::_demoteQueue;
TexturePointer GL45VariableAllocationTexture::_currentTransferTexture;
#define OVERSUBSCRIBED_PRESSURE_VALUE 0.95f
#define UNDERSUBSCRIBED_PRESSURE_VALUE 0.85f
@ -46,6 +47,123 @@ WorkQueue GL45VariableAllocationTexture::_demoteQueue;
static const size_t DEFAULT_ALLOWED_TEXTURE_MEMORY = MB_TO_BYTES(DEFAULT_ALLOWED_TEXTURE_MEMORY_MB);
using TransferJob = GL45VariableAllocationTexture::TransferJob;
static const uvec3 MAX_TRANSFER_DIMENSIONS { 1024, 1024, 1 };
static const size_t MAX_TRANSFER_SIZE = MAX_TRANSFER_DIMENSIONS.x * MAX_TRANSFER_DIMENSIONS.y * 4;
std::shared_ptr<std::thread> TransferJob::_bufferThread { nullptr };
std::atomic<bool> TransferJob::_shutdownBufferingThread { false };
Mutex TransferJob::_mutex;
TransferJob::VoidLambdaQueue TransferJob::_bufferLambdaQueue;
void TransferJob::startTransferLoop() {
if (_bufferThread) {
return;
}
_shutdownBufferingThread = false;
_bufferThread = std::make_shared<std::thread>([] {
TransferJob::bufferLoop();
});
}
void TransferJob::stopTransferLoop() {
if (!_bufferThread) {
return;
}
_shutdownBufferingThread = true;
_bufferThread->join();
_bufferThread.reset();
_shutdownBufferingThread = false;
}
TransferJob::TransferJob(const GL45VariableAllocationTexture& parent, uint16_t sourceMip, uint16_t targetMip, uint8_t face, uint32_t lines, uint32_t lineOffset)
: _parent(parent), _sourceMip(sourceMip), _targetMip(targetMip), _face(face), _lines(lines), _lineOffset(lineOffset) {
if (0 == lines) {
_bufferingLambda = [this] {
auto mipData = _parent._gpuObject.accessStoredMipFace(_sourceMip, _face);
auto size = mipData->getSize();
_buffer.resize(size);
memcpy(&_buffer[0], mipData->readData(), size);
_bufferingCompleted = true;
};
} else {
_bufferingLambda = [this] {
auto mipData = _parent._gpuObject.accessStoredMipFace(_sourceMip, _face);
auto dimensions = _parent._gpuObject.evalMipDimensions(_sourceMip);
auto mipSize = mipData->getSize();
auto bytesPerLine = (uint32_t)mipSize / dimensions.y;
auto transferSize = bytesPerLine * _lines;
auto sourceOffset = bytesPerLine * _lineOffset;
_buffer.resize(transferSize);
memcpy(&_buffer[0], mipData->readData() + sourceOffset, transferSize);
_bufferingCompleted = true;
};
}
_transferLambda = [this] {
auto mipData = _parent._gpuObject.accessStoredMipFace(_sourceMip, _face);
auto dimensions = _parent._gpuObject.evalMipDimensions(_sourceMip);
GLTexelFormat texelFormat = GLTexelFormat::evalGLTexelFormat(_parent._gpuObject.getTexelFormat(), mipData->getFormat());
_parent.copyMipFaceLinesFromTexture(_targetMip, _face, dimensions, _lineOffset, texelFormat.format, texelFormat.type, &_buffer[0]);
_buffer.swap(std::vector<uint8_t>());
};
}
TransferJob::TransferJob(const GL45VariableAllocationTexture& parent, std::function<void()> transferLambda)
: _parent(parent), _sourceMip(0), _targetMip(0), _face(0), _lines(0), _lineOffset(0), _bufferingCompleted(true), _transferLambda(transferLambda) {
if (!_bufferThread) {
_bufferThread = std::make_shared<std::thread>([] {
TransferJob::bufferLoop();
});
}
}
bool TransferJob::tryTransfer() {
// Are we ready to transfer
if (_bufferingCompleted) {
_transferLambda();
return true;
}
startBuffering();
return false;
}
void TransferJob::startBuffering() {
if (_bufferingStarted) {
return;
}
_bufferingStarted = true;
{
Lock lock(_mutex);
_bufferLambdaQueue.push(_bufferingLambda);
}
}
void TransferJob::bufferLoop() {
while (!_shutdownBufferingThread) {
VoidLambdaQueue workingQueue;
{
Lock lock(_mutex);
_bufferLambdaQueue.swap(workingQueue);
}
if (workingQueue.empty()) {
QThread::msleep(5);
continue;
}
while (!workingQueue.empty()) {
workingQueue.front()();
workingQueue.pop();
}
}
}
void GL45VariableAllocationTexture::addMemoryManagedTexture(const TexturePointer& texturePointer) {
_memoryManagedTextures.push_back(texturePointer);
addToWorkQueue(texturePointer);
@ -190,7 +308,14 @@ void GL45VariableAllocationTexture::updateMemoryPressure() {
}
if (newState != _memoryPressureState) {
if (MemoryPressureState::Transfer == _memoryPressureState) {
TransferJob::stopTransferLoop();
}
_memoryPressureState = newState;
if (MemoryPressureState::Transfer == _memoryPressureState) {
TransferJob::startTransferLoop();
}
// Clear the existing queue
_transferQueue = WorkQueue();
_promoteQueue = WorkQueue();
@ -223,20 +348,17 @@ void GL45VariableAllocationTexture::processWorkQueues() {
if (!object->canDemote()) {
continue;
}
//qDebug() << "QQQ executing demote for " << texture->source().c_str();
object->demote();
} else if (MemoryPressureState::Undersubscribed == _memoryPressureState) {
if (!object->canPromote()) {
continue;
}
//qDebug() << "QQQ executing promote for " << texture->source().c_str();
object->promote();
} else if (MemoryPressureState::Transfer == _memoryPressureState) {
if (!object->hasPendingTransfers()) {
continue;
}
//qDebug() << "QQQ executing transfer for " << texture->source().c_str();
object->executeNextTransfer();
object->executeNextTransfer(texture);
} else {
Q_UNREACHABLE();
}
@ -265,10 +387,14 @@ GL45VariableAllocationTexture::~GL45VariableAllocationTexture() {
Backend::updateTextureGPUMemoryUsage(_size, 0);
}
void GL45VariableAllocationTexture::executeNextTransfer() {
void GL45VariableAllocationTexture::executeNextTransfer(const TexturePointer& currentTexture) {
if (!_pendingTransfers.empty()) {
_pendingTransfers.front()();
_pendingTransfers.pop();
// Keeping hold of a strong pointer during the transfer ensures that the transfer thread cannot try to access a destroyed texture
_currentTransferTexture = currentTexture;
if (_pendingTransfers.front().tryTransfer()) {
_pendingTransfers.pop();
_currentTransferTexture.reset();
}
}
}
@ -394,17 +520,15 @@ void GL45ResourceTexture::demote() {
populateTransferQueue();
}
void GL45ResourceTexture::populateTransferQueue() {
PROFILE_RANGE(render_gpu_gl, __FUNCTION__);
_pendingTransfers = std::queue<PromoteLambda>();
if (_populatedMip <= _allocatedMip) {
return;
}
_pendingTransfers = TransferQueue();
static const uvec3 MAX_TRANSFER_DIMENSIONS { 1024, 1024, 1 };
static const size_t MAX_TRANSFER_SIZE = MAX_TRANSFER_DIMENSIONS.x * MAX_TRANSFER_DIMENSIONS.y * 4;
const uint8_t maxFace = GLTexture::getFaceCount(_target);
uint16_t sourceMip = _populatedMip;
do {
--sourceMip;
@ -418,11 +542,7 @@ void GL45ResourceTexture::populateTransferQueue() {
// If the mip is less than the max transfer size, then just do it in one transfer
if (glm::all(glm::lessThanEqual(mipDimensions, MAX_TRANSFER_DIMENSIONS))) {
// Can the mip be transferred in one go
_pendingTransfers.push([=] {
Q_ASSERT(sourceMip >= _allocatedMip);
// FIXME modify the copy mechanism to be incremental
copyMipFaceFromTexture(sourceMip, targetMip, face);
});
_pendingTransfers.emplace(*this, sourceMip, targetMip, face);
continue;
}
@ -433,24 +553,19 @@ void GL45ResourceTexture::populateTransferQueue() {
auto bytesPerLine = (uint32_t)mipData->getSize() / lines;
Q_ASSERT(0 == (mipData->getSize() % lines));
uint32_t linesPerTransfer = (uint32_t)(MAX_TRANSFER_SIZE / bytesPerLine);
size_t offset = 0;
uint32_t lineOffset = 0;
while (lineOffset < lines) {
uint32_t linesToCopy = std::min<uint32_t>(lines - lineOffset, linesPerTransfer);
uvec3 size { mipDimensions.x, linesToCopy, 1 };
_pendingTransfers.push([=] {
copyMipFaceLinesFromTexture(sourceMip, targetMip, face, lineOffset, linesToCopy, offset);
});
_pendingTransfers.emplace(TransferJob(*this, sourceMip, targetMip, face, linesToCopy, lineOffset));
lineOffset += linesToCopy;
offset += (linesToCopy * bytesPerLine);
}
}
// queue up the sampler and populated mip change for after the transfer has completed
_pendingTransfers.push([=] {
_pendingTransfers.emplace(TransferJob(*this, [=] {
_populatedMip = sourceMip;
syncSampler();
});
}));
} while (sourceMip != _allocatedMip);
}