Merge pull request #9010 from samcake/orange

Improve Input Stage Format switching in GL45Backend
This commit is contained in:
Chris Collins 2016-11-16 14:00:17 -08:00 committed by GitHub
commit 44b2b090df
21 changed files with 491 additions and 259 deletions

View file

@ -83,9 +83,7 @@ public:
const Vec4i& region, QImage& destImage) final override;
static const int MAX_NUM_ATTRIBUTES = Stream::NUM_INPUT_SLOTS;
static const int MAX_NUM_INPUT_BUFFERS = 16;
// this is the maximum numeber of available input buffers
size_t getNumInputBuffers() const { return _input._invalidBuffers.size(); }
// this is the maximum per shader stage on the low end apple
@ -147,6 +145,10 @@ public:
virtual void do_startNamedCall(const Batch& batch, size_t paramOffset) final;
virtual void do_stopNamedCall(const Batch& batch, size_t paramOffset) final;
static const int MAX_NUM_ATTRIBUTES = Stream::NUM_INPUT_SLOTS;
// The drawcall Info attribute channel is reserved and is the upper bound for the number of availables Input buffers
static const int MAX_NUM_INPUT_BUFFERS = Stream::DRAW_CALL_INFO;
virtual void do_pushProfileRange(const Batch& batch, size_t paramOffset) final;
virtual void do_popProfileRange(const Batch& batch, size_t paramOffset) final;
@ -235,18 +237,21 @@ protected:
virtual void initInput() final;
virtual void killInput() final;
virtual void syncInputStateCache() final;
virtual void resetInputStage() final;
virtual void updateInput();
virtual void resetInputStage();
virtual void updateInput() = 0;
struct InputStageState {
bool _invalidFormat { true };
Stream::FormatPointer _format;
std::string _formatKey;
typedef std::bitset<MAX_NUM_ATTRIBUTES> ActivationCache;
ActivationCache _attributeActivation { 0 };
typedef std::bitset<MAX_NUM_INPUT_BUFFERS> BuffersState;
BuffersState _invalidBuffers { 0 };
BuffersState _invalidBuffers{ 0 };
BuffersState _attribBindingBuffers{ 0 };
Buffers _buffers;
Offsets _bufferOffsets;
@ -266,7 +271,11 @@ protected:
GLuint _defaultVAO { 0 };
InputStageState() :
_buffers(_invalidBuffers.size()),
_invalidFormat(true),
_format(0),
_formatKey(),
_attributeActivation(0),
_buffers(_invalidBuffers.size(), BufferPointer(0)),
_bufferOffsets(_invalidBuffers.size(), 0),
_bufferStrides(_invalidBuffers.size(), 0),
_bufferVBOs(_invalidBuffers.size(), 0) {}
@ -276,8 +285,8 @@ protected:
void killTransform();
// Synchronize the state cache of this Backend with the actual real state of the GL Context
void syncTransformStateCache();
void updateTransform(const Batch& batch);
void resetTransformStage();
virtual void updateTransform(const Batch& batch) = 0;
virtual void resetTransformStage();
// Allows for correction of the camera pose to account for changes
// between the time when a was recorded and the time(s) when it is
@ -325,6 +334,8 @@ protected:
bool _invalidProj { false };
bool _invalidViewport { false };
bool _enabledDrawcallInfoBuffer{ false };
using Pair = std::pair<size_t, size_t>;
using List = std::list<Pair>;
List _cameraOffsets;
@ -399,8 +410,8 @@ protected:
void resetQueryStage();
struct QueryStageState {
};
uint32_t _rangeQueryDepth { 0 };
} _queryStage;
void resetStages();

View file

@ -10,16 +10,26 @@
//
#include "GLBackend.h"
#include "GLShared.h"
#include "GLInputFormat.h"
using namespace gpu;
using namespace gpu::gl;
void GLBackend::do_setInputFormat(const Batch& batch, size_t paramOffset) {
Stream::FormatPointer format = batch._streamFormats.get(batch._params[paramOffset]._uint);
if (format != _input._format) {
_input._format = format;
_input._invalidFormat = true;
if (format) {
auto inputFormat = GLInputFormat::sync((*format));
assert(inputFormat);
if (_input._formatKey != inputFormat->key) {
_input._formatKey = inputFormat->key;
_input._invalidFormat = true;
}
} else {
_input._formatKey.clear();
_input._invalidFormat = true;
}
}
}
@ -93,16 +103,9 @@ void GLBackend::resetInputStage() {
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
(void) CHECK_GL_ERROR();
glBindBuffer(GL_ARRAY_BUFFER, 0);
for (uint32_t i = 0; i < _input._attributeActivation.size(); i++) {
glDisableVertexAttribArray(i);
glVertexAttribPointer(i, 4, GL_FLOAT, GL_FALSE, 0, 0);
}
// Reset vertex buffer and format
_input._format.reset();
_input._formatKey.clear();
_input._invalidFormat = false;
_input._attributeActivation.reset();
@ -114,6 +117,7 @@ void GLBackend::resetInputStage() {
}
_input._invalidBuffers.reset();
// THe vertex array binding MUST be reset in the specific Backend versions as they use different techniques
}
void GLBackend::do_setIndexBuffer(const Batch& batch, size_t paramOffset) {
@ -151,183 +155,3 @@ void GLBackend::do_setIndirectBuffer(const Batch& batch, size_t paramOffset) {
(void)CHECK_GL_ERROR();
}
// Core 41 doesn't expose the features to really separate the vertex format from the vertex buffers binding
// Core 43 does :)
// FIXME crashing problem with glVertexBindingDivisor / glVertexAttribFormat
// Once resolved, break this up into the GL 4.1 and 4.5 backends
#if 1 || (GPU_INPUT_PROFILE == GPU_CORE_41)
#define NO_SUPPORT_VERTEX_ATTRIB_FORMAT
#else
#define SUPPORT_VERTEX_ATTRIB_FORMAT
#endif
void GLBackend::updateInput() {
#if defined(SUPPORT_VERTEX_ATTRIB_FORMAT)
if (_input._invalidFormat) {
InputStageState::ActivationCache newActivation;
// Assign the vertex format required
if (_input._format) {
for (auto& it : _input._format->getAttributes()) {
const Stream::Attribute& attrib = (it).second;
GLuint slot = attrib._slot;
GLuint count = attrib._element.getLocationScalarCount();
uint8_t locationCount = attrib._element.getLocationCount();
GLenum type = _elementTypeToGL41Type[attrib._element.getType()];
GLuint offset = attrib._offset;;
GLboolean isNormalized = attrib._element.isNormalized();
GLenum perLocationSize = attrib._element.getLocationSize();
for (size_t locNum = 0; locNum < locationCount; ++locNum) {
newActivation.set(slot + locNum);
glVertexAttribFormat(slot + locNum, count, type, isNormalized, offset + locNum * perLocationSize);
glVertexAttribBinding(slot + locNum, attrib._channel);
}
#ifdef GPU_STEREO_DRAWCALL_INSTANCED
glVertexBindingDivisor(attrib._channel, attrib._frequency * (isStereo() ? 2 : 1));
#else
glVertexBindingDivisor(attrib._channel, attrib._frequency);
#endif
}
(void)CHECK_GL_ERROR();
}
// Manage Activation what was and what is expected now
for (size_t i = 0; i < newActivation.size(); i++) {
bool newState = newActivation[i];
if (newState != _input._attributeActivation[i]) {
if (newState) {
glEnableVertexAttribArray(i);
} else {
glDisableVertexAttribArray(i);
}
_input._attributeActivation.flip(i);
}
}
(void)CHECK_GL_ERROR();
_input._invalidFormat = false;
_stats._ISNumFormatChanges++;
}
if (_input._invalidBuffers.any()) {
int numBuffers = _input._buffers.size();
auto buffer = _input._buffers.data();
auto vbo = _input._bufferVBOs.data();
auto offset = _input._bufferOffsets.data();
auto stride = _input._bufferStrides.data();
for (int bufferNum = 0; bufferNum < numBuffers; bufferNum++) {
if (_input._invalidBuffers.test(bufferNum)) {
glBindVertexBuffer(bufferNum, (*vbo), (*offset), (*stride));
}
buffer++;
vbo++;
offset++;
stride++;
}
_input._invalidBuffers.reset();
(void)CHECK_GL_ERROR();
}
#else
if (_input._invalidFormat || _input._invalidBuffers.any()) {
if (_input._invalidFormat) {
InputStageState::ActivationCache newActivation;
_stats._ISNumFormatChanges++;
// Check expected activation
if (_input._format) {
for (auto& it : _input._format->getAttributes()) {
const Stream::Attribute& attrib = (it).second;
uint8_t locationCount = attrib._element.getLocationCount();
for (int i = 0; i < locationCount; ++i) {
newActivation.set(attrib._slot + i);
}
}
}
// Manage Activation what was and what is expected now
for (unsigned int i = 0; i < newActivation.size(); i++) {
bool newState = newActivation[i];
if (newState != _input._attributeActivation[i]) {
if (newState) {
glEnableVertexAttribArray(i);
} else {
glDisableVertexAttribArray(i);
}
(void)CHECK_GL_ERROR();
_input._attributeActivation.flip(i);
}
}
}
// now we need to bind the buffers and assign the attrib pointers
if (_input._format) {
const Buffers& buffers = _input._buffers;
const Offsets& offsets = _input._bufferOffsets;
const Offsets& strides = _input._bufferStrides;
const Stream::Format::AttributeMap& attributes = _input._format->getAttributes();
auto& inputChannels = _input._format->getChannels();
_stats._ISNumInputBufferChanges++;
GLuint boundVBO = 0;
for (auto& channelIt : inputChannels) {
const Stream::Format::ChannelMap::value_type::second_type& channel = (channelIt).second;
if ((channelIt).first < buffers.size()) {
int bufferNum = (channelIt).first;
if (_input._invalidBuffers.test(bufferNum) || _input._invalidFormat) {
// GLuint vbo = gpu::GL41Backend::getBufferID((*buffers[bufferNum]));
GLuint vbo = _input._bufferVBOs[bufferNum];
if (boundVBO != vbo) {
glBindBuffer(GL_ARRAY_BUFFER, vbo);
(void)CHECK_GL_ERROR();
boundVBO = vbo;
}
_input._invalidBuffers[bufferNum] = false;
for (unsigned int i = 0; i < channel._slots.size(); i++) {
const Stream::Attribute& attrib = attributes.at(channel._slots[i]);
GLuint slot = attrib._slot;
GLuint count = attrib._element.getLocationScalarCount();
uint8_t locationCount = attrib._element.getLocationCount();
GLenum type = gl::ELEMENT_TYPE_TO_GL[attrib._element.getType()];
// GLenum perLocationStride = strides[bufferNum];
GLenum perLocationStride = attrib._element.getLocationSize();
GLuint stride = (GLuint)strides[bufferNum];
GLuint pointer = (GLuint)(attrib._offset + offsets[bufferNum]);
GLboolean isNormalized = attrib._element.isNormalized();
for (size_t locNum = 0; locNum < locationCount; ++locNum) {
glVertexAttribPointer(slot + (GLuint)locNum, count, type, isNormalized, stride,
reinterpret_cast<GLvoid*>(pointer + perLocationStride * (GLuint)locNum));
#ifdef GPU_STEREO_DRAWCALL_INSTANCED
glVertexAttribDivisor(slot + (GLuint)locNum, attrib._frequency * (isStereo() ? 2 : 1));
#else
glVertexAttribDivisor(slot + (GLuint)locNum, attrib._frequency);
#endif
}
// TODO: Support properly the IAttrib version
(void)CHECK_GL_ERROR();
}
}
}
}
}
// everything format related should be in sync now
_input._invalidFormat = false;
}
#endif
}

View file

@ -16,8 +16,10 @@ using namespace gpu::gl;
// Eventually, we want to test with TIME_ELAPSED instead of TIMESTAMP
#ifdef Q_OS_MAC
const uint32_t MAX_RANGE_QUERY_DEPTH = 1;
static bool timeElapsed = true;
#else
const uint32_t MAX_RANGE_QUERY_DEPTH = 10000;
static bool timeElapsed = false;
#endif
@ -25,12 +27,16 @@ void GLBackend::do_beginQuery(const Batch& batch, size_t paramOffset) {
auto query = batch._queries.get(batch._params[paramOffset]._uint);
GLQuery* glquery = syncGPUObject(*query);
if (glquery) {
++_queryStage._rangeQueryDepth;
glGetInteger64v(GL_TIMESTAMP, (GLint64*)&glquery->_batchElapsedTime);
if (timeElapsed) {
glBeginQuery(GL_TIME_ELAPSED, glquery->_endqo);
if (_queryStage._rangeQueryDepth <= MAX_RANGE_QUERY_DEPTH) {
glBeginQuery(GL_TIME_ELAPSED, glquery->_endqo);
}
} else {
glQueryCounter(glquery->_beginqo, GL_TIMESTAMP);
}
glquery->_rangeQueryDepth = _queryStage._rangeQueryDepth;
(void)CHECK_GL_ERROR();
}
}
@ -40,10 +46,13 @@ void GLBackend::do_endQuery(const Batch& batch, size_t paramOffset) {
GLQuery* glquery = syncGPUObject(*query);
if (glquery) {
if (timeElapsed) {
glEndQuery(GL_TIME_ELAPSED);
if (_queryStage._rangeQueryDepth <= MAX_RANGE_QUERY_DEPTH) {
glEndQuery(GL_TIME_ELAPSED);
}
} else {
glQueryCounter(glquery->_endqo, GL_TIMESTAMP);
}
--_queryStage._rangeQueryDepth;
GLint64 now;
glGetInteger64v(GL_TIMESTAMP, &now);
glquery->_batchElapsedTime = now - glquery->_batchElapsedTime;
@ -55,20 +64,24 @@ void GLBackend::do_endQuery(const Batch& batch, size_t paramOffset) {
void GLBackend::do_getQuery(const Batch& batch, size_t paramOffset) {
auto query = batch._queries.get(batch._params[paramOffset]._uint);
GLQuery* glquery = syncGPUObject(*query);
if (glquery) {
glGetQueryObjectui64v(glquery->_endqo, GL_QUERY_RESULT_AVAILABLE, &glquery->_result);
if (glquery->_result == GL_TRUE) {
if (timeElapsed) {
glGetQueryObjectui64v(glquery->_endqo, GL_QUERY_RESULT, &glquery->_result);
} else {
GLuint64 start, end;
glGetQueryObjectui64v(glquery->_beginqo, GL_QUERY_RESULT, &start);
glGetQueryObjectui64v(glquery->_endqo, GL_QUERY_RESULT, &end);
glquery->_result = end - start;
}
if (glquery) {
if (glquery->_rangeQueryDepth > MAX_RANGE_QUERY_DEPTH) {
query->triggerReturnHandler(glquery->_result, glquery->_batchElapsedTime);
} else {
glGetQueryObjectui64v(glquery->_endqo, GL_QUERY_RESULT_AVAILABLE, &glquery->_result);
if (glquery->_result == GL_TRUE) {
if (timeElapsed) {
glGetQueryObjectui64v(glquery->_endqo, GL_QUERY_RESULT, &glquery->_result);
} else {
GLuint64 start, end;
glGetQueryObjectui64v(glquery->_beginqo, GL_QUERY_RESULT, &start);
glGetQueryObjectui64v(glquery->_endqo, GL_QUERY_RESULT, &end);
glquery->_result = end - start;
}
query->triggerReturnHandler(glquery->_result, glquery->_batchElapsedTime);
}
(void)CHECK_GL_ERROR();
}
(void)CHECK_GL_ERROR();
}
}

View file

@ -85,6 +85,9 @@ void GLBackend::syncTransformStateCache() {
Mat4 modelView;
auto modelViewInv = glm::inverse(modelView);
_transform._view.evalFromRawMatrix(modelViewInv);
glDisableVertexAttribArray(gpu::Stream::DRAW_CALL_INFO);
_transform._enabledDrawcallInfoBuffer = false;
}
void GLBackend::TransformStageState::preUpdate(size_t commandIndex, const StereoState& stereo) {
@ -162,29 +165,7 @@ void GLBackend::TransformStageState::bindCurrentCamera(int eye) const {
}
}
void GLBackend::updateTransform(const Batch& batch) {
_transform.update(_commandIndex, _stereo);
auto& drawCallInfoBuffer = batch.getDrawCallInfoBuffer();
if (batch._currentNamedCall.empty()) {
auto& drawCallInfo = drawCallInfoBuffer[_currentDraw];
glDisableVertexAttribArray(gpu::Stream::DRAW_CALL_INFO); // Make sure attrib array is disabled
glVertexAttribI2i(gpu::Stream::DRAW_CALL_INFO, drawCallInfo.index, drawCallInfo.unused);
} else {
glEnableVertexAttribArray(gpu::Stream::DRAW_CALL_INFO); // Make sure attrib array is enabled
glBindBuffer(GL_ARRAY_BUFFER, _transform._drawCallInfoBuffer);
glVertexAttribIPointer(gpu::Stream::DRAW_CALL_INFO, 2, GL_UNSIGNED_SHORT, 0,
_transform._drawCallInfoOffsets[batch._currentNamedCall]);
#ifdef GPU_STEREO_DRAWCALL_INSTANCED
glVertexAttribDivisor(gpu::Stream::DRAW_CALL_INFO, (isStereo() ? 2 : 1));
#else
glVertexAttribDivisor(gpu::Stream::DRAW_CALL_INFO, 1);
#endif
}
(void)CHECK_GL_ERROR();
}
void GLBackend::resetTransformStage() {
glDisableVertexAttribArray(gpu::Stream::DRAW_CALL_INFO);
_transform._enabledDrawcallInfoBuffer = false;
}

View file

@ -0,0 +1,33 @@
//
// Created by Sam Gateau on 2016/07/21
// Copyright 2013-2016 High Fidelity, Inc.
//
// Distributed under the Apache License, Version 2.0.
// See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
//
#include "GLInputFormat.h"
#include "GLBackend.h"
using namespace gpu;
using namespace gpu::gl;
GLInputFormat::GLInputFormat() {
}
GLInputFormat:: ~GLInputFormat() {
}
GLInputFormat* GLInputFormat::sync(const Stream::Format& inputFormat) {
GLInputFormat* object = Backend::getGPUObject<GLInputFormat>(inputFormat);
if (!object) {
object = new GLInputFormat();
object->key = inputFormat.getKey();
Backend::setGPUObject(inputFormat, object);
}
return object;
}

View file

@ -0,0 +1,29 @@
//
// Created by Sam Gateau on 2016/07/21
// Copyright 2013-2016 High Fidelity, Inc.
//
// Distributed under the Apache License, Version 2.0.
// See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
//
#ifndef hifi_gpu_gl_GLInputFormat_h
#define hifi_gpu_gl_GLInputFormat_h
#include "GLShared.h"
namespace gpu {
namespace gl {
class GLInputFormat : public GPUObject {
public:
static GLInputFormat* sync(const Stream::Format& inputFormat);
GLInputFormat();
~GLInputFormat();
std::string key;
};
}
}
#endif

View file

@ -49,6 +49,7 @@ public:
const GLuint _beginqo = { 0 };
GLuint64 _result { (GLuint64)-1 };
GLuint64 _batchElapsedTime { (GLuint64) 0 };
uint32_t _rangeQueryDepth { 0 };
protected:
GLQuery(const std::weak_ptr<GLBackend>& backend, const Query& query, GLuint endId, GLuint beginId) : Parent(backend, query, endId), _beginqo(beginId) {}

View file

@ -77,13 +77,13 @@ protected:
void do_multiDrawIndexedIndirect(const Batch& batch, size_t paramOffset) override;
// Input Stage
void resetInputStage() override;
void updateInput() override;
// Synchronize the state cache of this Backend with the actual real state of the GL Context
void transferTransformState(const Batch& batch) const override;
void initTransform() override;
void updateTransform(const Batch& batch);
void resetTransformStage();
void updateTransform(const Batch& batch) override;
// Output stage
void do_blit(const Batch& batch, size_t paramOffset) override;

View file

@ -13,7 +13,111 @@
using namespace gpu;
using namespace gpu::gl41;
void GL41Backend::updateInput() {
Parent::updateInput();
void GL41Backend::resetInputStage() {
Parent::resetInputStage();
glBindBuffer(GL_ARRAY_BUFFER, 0);
for (uint32_t i = 0; i < _input._attributeActivation.size(); i++) {
glDisableVertexAttribArray(i);
glVertexAttribPointer(i, 4, GL_FLOAT, GL_FALSE, 0, 0);
}
}
void GL41Backend::updateInput() {
if (_input._invalidFormat || _input._invalidBuffers.any()) {
if (_input._invalidFormat) {
InputStageState::ActivationCache newActivation;
_stats._ISNumFormatChanges++;
// Check expected activation
if (_input._format) {
for (auto& it : _input._format->getAttributes()) {
const Stream::Attribute& attrib = (it).second;
uint8_t locationCount = attrib._element.getLocationCount();
for (int i = 0; i < locationCount; ++i) {
newActivation.set(attrib._slot + i);
}
}
}
// Manage Activation what was and what is expected now
for (unsigned int i = 0; i < newActivation.size(); i++) {
bool newState = newActivation[i];
if (newState != _input._attributeActivation[i]) {
if (newState) {
glEnableVertexAttribArray(i);
} else {
glDisableVertexAttribArray(i);
}
(void)CHECK_GL_ERROR();
_input._attributeActivation.flip(i);
}
}
}
// now we need to bind the buffers and assign the attrib pointers
if (_input._format) {
const Buffers& buffers = _input._buffers;
const Offsets& offsets = _input._bufferOffsets;
const Offsets& strides = _input._bufferStrides;
const Stream::Format::AttributeMap& attributes = _input._format->getAttributes();
auto& inputChannels = _input._format->getChannels();
_stats._ISNumInputBufferChanges++;
GLuint boundVBO = 0;
for (auto& channelIt : inputChannels) {
const Stream::Format::ChannelMap::value_type::second_type& channel = (channelIt).second;
if ((channelIt).first < buffers.size()) {
int bufferNum = (channelIt).first;
if (_input._invalidBuffers.test(bufferNum) || _input._invalidFormat) {
// GLuint vbo = gpu::GL41Backend::getBufferID((*buffers[bufferNum]));
GLuint vbo = _input._bufferVBOs[bufferNum];
if (boundVBO != vbo) {
glBindBuffer(GL_ARRAY_BUFFER, vbo);
(void)CHECK_GL_ERROR();
boundVBO = vbo;
}
_input._invalidBuffers[bufferNum] = false;
for (unsigned int i = 0; i < channel._slots.size(); i++) {
const Stream::Attribute& attrib = attributes.at(channel._slots[i]);
GLuint slot = attrib._slot;
GLuint count = attrib._element.getLocationScalarCount();
uint8_t locationCount = attrib._element.getLocationCount();
GLenum type = gl::ELEMENT_TYPE_TO_GL[attrib._element.getType()];
// GLenum perLocationStride = strides[bufferNum];
GLenum perLocationStride = attrib._element.getLocationSize();
GLuint stride = (GLuint)strides[bufferNum];
GLuint pointer = (GLuint)(attrib._offset + offsets[bufferNum]);
GLboolean isNormalized = attrib._element.isNormalized();
for (size_t locNum = 0; locNum < locationCount; ++locNum) {
glVertexAttribPointer(slot + (GLuint)locNum, count, type, isNormalized, stride,
reinterpret_cast<GLvoid*>(pointer + perLocationStride * (GLuint)locNum));
#ifdef GPU_STEREO_DRAWCALL_INSTANCED
glVertexAttribDivisor(slot + (GLuint)locNum, attrib._frequency * (isStereo() ? 2 : 1));
#else
glVertexAttribDivisor(slot + (GLuint)locNum, attrib._frequency);
#endif
}
// TODO: Support properly the IAttrib version
(void)CHECK_GL_ERROR();
}
}
}
}
}
// everything format related should be in sync now
_input._invalidFormat = false;
}
}

View file

@ -79,3 +79,32 @@ void GL41Backend::transferTransformState(const Batch& batch) const {
// Make sure the current Camera offset is unknown before render Draw
_transform._currentCameraOffset = INVALID_OFFSET;
}
void GL41Backend::updateTransform(const Batch& batch) {
_transform.update(_commandIndex, _stereo);
auto& drawCallInfoBuffer = batch.getDrawCallInfoBuffer();
if (batch._currentNamedCall.empty()) {
auto& drawCallInfo = drawCallInfoBuffer[_currentDraw];
if (_transform._enabledDrawcallInfoBuffer) {
glDisableVertexAttribArray(gpu::Stream::DRAW_CALL_INFO); // Make sure attrib array is disabled
_transform._enabledDrawcallInfoBuffer = false;
}
glVertexAttribI2i(gpu::Stream::DRAW_CALL_INFO, drawCallInfo.index, drawCallInfo.unused);
} else {
if (!_transform._enabledDrawcallInfoBuffer) {
glEnableVertexAttribArray(gpu::Stream::DRAW_CALL_INFO); // Make sure attrib array is enabled
glBindBuffer(GL_ARRAY_BUFFER, _transform._drawCallInfoBuffer);
#ifdef GPU_STEREO_DRAWCALL_INSTANCED
glVertexAttribDivisor(gpu::Stream::DRAW_CALL_INFO, (isStereo() ? 2 : 1));
#else
glVertexAttribDivisor(gpu::Stream::DRAW_CALL_INFO, 1);
#endif
_transform._enabledDrawcallInfoBuffer = true;
}
glVertexAttribIPointer(gpu::Stream::DRAW_CALL_INFO, 2, GL_UNSIGNED_SHORT, 0, _transform._drawCallInfoOffsets[batch._currentNamedCall]);
}
(void)CHECK_GL_ERROR();
}

View file

@ -130,13 +130,13 @@ protected:
void do_multiDrawIndexedIndirect(const Batch& batch, size_t paramOffset) override;
// Input Stage
void resetInputStage() override;
void updateInput() override;
// Synchronize the state cache of this Backend with the actual real state of the GL Context
void transferTransformState(const Batch& batch) const override;
void initTransform() override;
void updateTransform(const Batch& batch);
void resetTransformStage();
void updateTransform(const Batch& batch) override;
// Output stage
void do_blit(const Batch& batch, size_t paramOffset) override;

View file

@ -9,10 +9,112 @@
// See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
//
#include "GL45Backend.h"
#include "../gl/GLShared.h"
using namespace gpu;
using namespace gpu::gl45;
void GL45Backend::updateInput() {
Parent::updateInput();
void GL45Backend::resetInputStage() {
Parent::resetInputStage();
glBindBuffer(GL_ARRAY_BUFFER, 0);
for (uint32_t i = 0; i < _input._attributeActivation.size(); i++) {
glDisableVertexAttribArray(i);
}
for (uint32_t i = 0; i < _input._attribBindingBuffers.size(); i++) {
glBindVertexBuffer(i, 0, 0, 0);
}
}
void GL45Backend::updateInput() {
if (_input._invalidFormat) {
InputStageState::ActivationCache newActivation;
// Assign the vertex format required
if (_input._format) {
_input._attribBindingBuffers.reset();
const Stream::Format::AttributeMap& attributes = _input._format->getAttributes();
auto& inputChannels = _input._format->getChannels();
for (auto& channelIt : inputChannels) {
auto bufferChannelNum = (channelIt).first;
const Stream::Format::ChannelMap::value_type::second_type& channel = (channelIt).second;
_input._attribBindingBuffers.set(bufferChannelNum);
GLuint frequency = 0;
for (unsigned int i = 0; i < channel._slots.size(); i++) {
const Stream::Attribute& attrib = attributes.at(channel._slots[i]);
GLuint slot = attrib._slot;
GLuint count = attrib._element.getLocationScalarCount();
uint8_t locationCount = attrib._element.getLocationCount();
GLenum type = gl::ELEMENT_TYPE_TO_GL[attrib._element.getType()];
GLuint offset = (GLuint)attrib._offset;;
GLboolean isNormalized = attrib._element.isNormalized();
GLenum perLocationSize = attrib._element.getLocationSize();
for (GLuint locNum = 0; locNum < locationCount; ++locNum) {
GLuint attriNum = (GLuint)(slot + locNum);
newActivation.set(attriNum);
if (!_input._attributeActivation[attriNum]) {
_input._attributeActivation.set(attriNum);
glEnableVertexAttribArray(attriNum);
}
glVertexAttribFormat(attriNum, count, type, isNormalized, offset + locNum * perLocationSize);
// TODO: Support properly the IAttrib version
glVertexAttribBinding(attriNum, attrib._channel);
}
if (i == 0) {
frequency = attrib._frequency;
} else {
assert(frequency == attrib._frequency);
}
(void)CHECK_GL_ERROR();
}
#ifdef GPU_STEREO_DRAWCALL_INSTANCED
glVertexBindingDivisor(bufferChannelNum, frequency * (isStereo() ? 2 : 1));
#else
glVertexBindingDivisor(bufferChannelNum, frequency);
#endif
}
// Manage Activation what was and what is expected now
// This should only disable VertexAttribs since the one in use have been disabled above
for (GLuint i = 0; i < (GLuint)newActivation.size(); i++) {
bool newState = newActivation[i];
if (newState != _input._attributeActivation[i]) {
if (newState) {
glEnableVertexAttribArray(i);
} else {
glDisableVertexAttribArray(i);
}
_input._attributeActivation.flip(i);
}
}
(void)CHECK_GL_ERROR();
}
_input._invalidFormat = false;
_stats._ISNumFormatChanges++;
}
if (_input._invalidBuffers.any()) {
auto vbo = _input._bufferVBOs.data();
auto offset = _input._bufferOffsets.data();
auto stride = _input._bufferStrides.data();
for (GLuint buffer = 0; buffer < _input._buffers.size(); buffer++, vbo++, offset++, stride++) {
if (_input._invalidBuffers.test(buffer)) {
glBindVertexBuffer(buffer, (*vbo), (*offset), (GLsizei)(*stride));
}
}
_input._invalidBuffers.reset();
(void)CHECK_GL_ERROR();
}
}

View file

@ -66,3 +66,36 @@ void GL45Backend::transferTransformState(const Batch& batch) const {
// Make sure the current Camera offset is unknown before render Draw
_transform._currentCameraOffset = INVALID_OFFSET;
}
void GL45Backend::updateTransform(const Batch& batch) {
_transform.update(_commandIndex, _stereo);
auto& drawCallInfoBuffer = batch.getDrawCallInfoBuffer();
if (batch._currentNamedCall.empty()) {
auto& drawCallInfo = drawCallInfoBuffer[_currentDraw];
if (_transform._enabledDrawcallInfoBuffer) {
glDisableVertexAttribArray(gpu::Stream::DRAW_CALL_INFO); // Make sure attrib array is disabled
_transform._enabledDrawcallInfoBuffer = false;
}
glVertexAttribI2i(gpu::Stream::DRAW_CALL_INFO, drawCallInfo.index, drawCallInfo.unused);
} else {
if (!_transform._enabledDrawcallInfoBuffer) {
glEnableVertexAttribArray(gpu::Stream::DRAW_CALL_INFO); // Make sure attrib array is enabled
glVertexAttribIFormat(gpu::Stream::DRAW_CALL_INFO, 2, GL_UNSIGNED_SHORT, 0);
glVertexAttribBinding(gpu::Stream::DRAW_CALL_INFO, gpu::Stream::DRAW_CALL_INFO);
#ifdef GPU_STEREO_DRAWCALL_INSTANCED
glVertexBindingDivisor(gpu::Stream::DRAW_CALL_INFO, (isStereo() ? 2 : 1));
#else
glVertexBindingDivisor(gpu::Stream::DRAW_CALL_INFO, 1);
#endif
_transform._enabledDrawcallInfoBuffer = true;
}
// NOTE: A stride of zero in BindVertexBuffer signifies that all elements are sourced from the same location,
// so we must provide a stride.
// This is in contrast to VertexAttrib*Pointer, where a zero signifies tightly-packed elements.
glBindVertexBuffer(gpu::Stream::DRAW_CALL_INFO, _transform._drawCallInfoBuffer, (GLintptr)_transform._drawCallInfoOffsets[batch._currentNamedCall], 2 * sizeof(GLushort));
}
(void)CHECK_GL_ERROR();
}

View file

@ -13,6 +13,23 @@
#include "GPULogging.h"
using namespace gpu;
void ContextStats::evalDelta(const ContextStats& begin, const ContextStats& end) {
_ISNumFormatChanges = end._ISNumFormatChanges - begin._ISNumFormatChanges;
_ISNumInputBufferChanges = end._ISNumInputBufferChanges - begin._ISNumInputBufferChanges;
_ISNumIndexBufferChanges = end._ISNumIndexBufferChanges - begin._ISNumIndexBufferChanges;
_RSNumTextureBounded = end._RSNumTextureBounded - begin._RSNumTextureBounded;
_RSAmountTextureMemoryBounded = end._RSAmountTextureMemoryBounded - begin._RSAmountTextureMemoryBounded;
_DSNumAPIDrawcalls = end._DSNumAPIDrawcalls - begin._DSNumAPIDrawcalls;
_DSNumDrawcalls = end._DSNumDrawcalls - begin._DSNumDrawcalls;
_DSNumTriangles= end._DSNumTriangles - begin._DSNumTriangles;
_PSNumSetPipelines = end._PSNumSetPipelines - begin._PSNumSetPipelines;
}
Context::CreateBackend Context::_createBackendCallback = nullptr;
Context::MakeProgram Context::_makeProgramCallback = nullptr;
std::once_flag Context::_initialized;
@ -73,6 +90,10 @@ void Context::consumeFrameUpdates(const FramePointer& frame) const {
}
void Context::executeFrame(const FramePointer& frame) const {
// Grab the stats at the around the frame and delta to have a consistent sampling
ContextStats beginStats;
getStats(beginStats);
// FIXME? probably not necessary, but safe
consumeFrameUpdates(frame);
_backend->setStereoState(frame->stereoState);
@ -90,6 +111,10 @@ void Context::executeFrame(const FramePointer& frame) const {
_frameRangeTimer->end(endBatch);
_backend->render(endBatch);
}
ContextStats endStats;
getStats(endStats);
_frameStats.evalDelta(beginStats, endStats);
}
bool Context::makeProgram(Shader& shader, const Shader::BindingSet& bindings) {
@ -135,10 +160,18 @@ void Context::downloadFramebuffer(const FramebufferPointer& srcFramebuffer, cons
_backend->downloadFramebuffer(srcFramebuffer, region, destImage);
}
void Context::resetStats() const {
_backend->resetStats();
}
void Context::getStats(ContextStats& stats) const {
_backend->getStats(stats);
}
void Context::getFrameStats(ContextStats& stats) const {
stats = _frameStats;
}
double Context::getFrameTimerGPUAverage() const {
if (_frameRangeTimer) {
return _frameRangeTimer->getGPUAverage();

View file

@ -45,6 +45,8 @@ public:
ContextStats() {}
ContextStats(const ContextStats& stats) = default;
void evalDelta(const ContextStats& begin, const ContextStats& end);
};
class Backend {
@ -83,6 +85,7 @@ public:
return reinterpret_cast<T*>(object.gpuObject.getGPUObject());
}
void resetStats() const { _stats = ContextStats(); }
void getStats(ContextStats& stats) const { stats = _stats; }
virtual bool isTextureManagementSparseEnabled() const = 0;
@ -123,7 +126,7 @@ protected:
}
friend class Context;
ContextStats _stats;
mutable ContextStats _stats;
StereoState _stereo;
};
@ -201,8 +204,11 @@ public:
void downloadFramebuffer(const FramebufferPointer& srcFramebuffer, const Vec4i& region, QImage& destImage);
// Repporting stats of the context
void resetStats() const;
void getStats(ContextStats& stats) const;
// Same as above but grabbed at every end of a frame
void getFrameStats(ContextStats& stats) const;
double getFrameTimerGPUAverage() const;
double getFrameTimerBatchAverage() const;
@ -229,8 +235,8 @@ protected:
RangeTimerPointer _frameRangeTimer;
StereoState _stereo;
double getGPUAverage() const;
double getBatchAverage() const;
// Sampled at the end of every frame, the stats of all the counters
mutable ContextStats _frameStats;
// This function can only be called by "static Shader::makeProgram()"
// makeProgramShader(...) make a program shader ready to be used in a Batch.

View file

@ -12,6 +12,8 @@
#include "Stream.h"
#include <algorithm> //min max and more
#include <sstream>
#include <iomanip>
using namespace gpu;
@ -39,9 +41,21 @@ const ElementArray& getDefaultElements() {
return defaultElements;
}
std::string Stream::Attribute::getKey() const {
std::stringstream skey;
skey << std::hex;
skey << std::setw(8) << std::setfill('0') << (uint32)((((uint32)_slot) << 24) | (((uint32)_channel) << 16) | ((uint32)_element.getRaw()));
skey << _offset;
skey << _frequency;
return skey.str();
}
void Stream::Format::evaluateCache() {
_key.clear();
_channels.clear();
_elementTotalSize = 0;
for(AttributeMap::iterator it = _attributes.begin(); it != _attributes.end(); it++) {
Attribute& attrib = (*it).second;
ChannelInfo& channel = _channels[attrib._channel];
@ -49,6 +63,8 @@ void Stream::Format::evaluateCache() {
channel._stride = std::max(channel._stride, attrib.getSize() + attrib._offset);
channel._netSize += attrib.getSize();
_elementTotalSize += attrib.getSize();
_key += attrib.getKey();
}
}

View file

@ -14,6 +14,7 @@
#include <vector>
#include <map>
#include <array>
#include <string>
#include <assert.h>
@ -73,6 +74,9 @@ public:
// Size of the
uint32 getSize() const { return _element.getSize(); }
// Generate a string key describing the attribute uniquely
std::string getKey() const;
};
// Stream Format is describing how to feed a list of attributes from a bunch of stream buffer channels
@ -106,10 +110,15 @@ public:
bool hasAttribute(Slot slot) const { return (_attributes.find(slot) != _attributes.end()); }
const std::string& getKey() const { return _key; }
const GPUObjectPointer gpuObject{};
protected:
AttributeMap _attributes;
ChannelMap _channels;
uint32 _elementTotalSize { 0 };
std::string _key;
void evaluateCache();
};

View file

@ -35,21 +35,21 @@ void EngineStats::run(const SceneContextPointer& sceneContext, const RenderConte
config->textureGPUVirtualMemoryUsage = gpu::Texture::getTextureGPUVirtualMemoryUsage();
config->textureGPUTransferCount = gpu::Texture::getTextureGPUTransferCount();
gpu::ContextStats gpuStats(_gpuStats);
renderContext->args->_context->getStats(_gpuStats);
renderContext->args->_context->getFrameStats(_gpuStats);
config->frameAPIDrawcallCount = _gpuStats._DSNumAPIDrawcalls - gpuStats._DSNumAPIDrawcalls;
config->frameDrawcallCount = _gpuStats._DSNumDrawcalls - gpuStats._DSNumDrawcalls;
config->frameAPIDrawcallCount = _gpuStats._DSNumAPIDrawcalls;
config->frameDrawcallCount = _gpuStats._DSNumDrawcalls;
config->frameDrawcallRate = config->frameDrawcallCount * frequency;
config->frameTriangleCount = _gpuStats._DSNumTriangles - gpuStats._DSNumTriangles;
config->frameTriangleCount = _gpuStats._DSNumTriangles;
config->frameTriangleRate = config->frameTriangleCount * frequency;
config->frameTextureCount = _gpuStats._RSNumTextureBounded - gpuStats._RSNumTextureBounded;
config->frameTextureCount = _gpuStats._RSNumTextureBounded;
config->frameTextureRate = config->frameTextureCount * frequency;
config->frameTextureMemoryUsage = _gpuStats._RSAmountTextureMemoryBounded - gpuStats._RSAmountTextureMemoryBounded;
config->frameTextureMemoryUsage = _gpuStats._RSAmountTextureMemoryBounded;
config->frameSetPipelineCount = _gpuStats._PSNumSetPipelines - gpuStats._PSNumSetPipelines;
config->frameSetPipelineCount = _gpuStats._PSNumSetPipelines;
config->frameSetInputFormatCount = _gpuStats._ISNumFormatChanges;
config->emitDirty();
}

View file

@ -48,6 +48,7 @@ namespace render {
Q_PROPERTY(quint32 frameTextureMemoryUsage MEMBER frameTextureMemoryUsage NOTIFY dirty)
Q_PROPERTY(quint32 frameSetPipelineCount MEMBER frameSetPipelineCount NOTIFY dirty)
Q_PROPERTY(quint32 frameSetInputFormatCount MEMBER frameSetInputFormatCount NOTIFY dirty)
public:
@ -78,6 +79,8 @@ namespace render {
quint32 frameSetPipelineCount{ 0 };
quint32 frameSetInputFormatCount{ 0 };
void emitDirty() { emit dirty(); }

View file

@ -197,7 +197,7 @@ GPUIdent* GPUIdent::ensureQuery(const QString& vendor, const QString& renderer)
ULONG uNumOfInstances = 0;
CComPtr<IWbemClassObject> spInstance = NULL;
hr = spEnumInst->Next(WBEM_INFINITE, 1, &spInstance, &uNumOfInstances);
hr = spEnumInst->Next(WBEM_INFINITE, 1, &spInstance.p, &uNumOfInstances);
while (hr == S_OK && spInstance && uNumOfInstances) {
// Get properties from the object
CComVariant var;

View file

@ -173,6 +173,11 @@ Item {
prop: "frameSetPipelineCount",
label: "Pipelines",
color: "#E2334D"
},
{
prop: "frameSetInputFormatCount",
label: "Input Formats",
color: "#1AC567"
}
]
}