Merge pull request #7636 from samcake/red

Stereo drawcall amplification
This commit is contained in:
Chris Collins 2016-04-22 15:46:18 -07:00
commit 9bd9de5710
15 changed files with 145 additions and 83 deletions

View file

@ -241,7 +241,6 @@ Item {
color: "#E2334D"
}
]
}
}
}
}

View file

@ -281,7 +281,7 @@ void ApplicationOverlay::buildFramebufferObject() {
_overlayFramebuffer->setRenderBuffer(0, newColorAttachment);
}
}
// If the overlay framebuffer still has no color attachment, no textures were available for rendering, so build a new one
if (!_overlayFramebuffer->getRenderBuffer(0)) {
const gpu::Sampler OVERLAY_SAMPLER(gpu::Sampler::FILTER_MIN_MAG_LINEAR, gpu::Sampler::WRAP_CLAMP);

View file

@ -207,6 +207,7 @@ protected:
static void incrementBufferGPUCount();
static void decrementBufferGPUCount();
static void updateBufferGPUMemoryUsage(Size prevObjectSize, Size newObjectSize);
static void incrementTextureGPUCount();
static void decrementTextureGPUCount();
static void updateTextureGPUMemoryUsage(Size prevObjectSize, Size newObjectSize);

View file

@ -125,7 +125,7 @@ void Framebuffer::resize(uint16 width, uint16 height, uint16 numSamples) {
if (_depthStencilBuffer) {
_depthStencilBuffer._texture->resize2D(width, height, numSamples);
_numSamples = _depthStencilBuffer._texture->getNumSamples();
++_depthStamp;
++_depthStamp;
}
_width = width;

View file

@ -194,6 +194,7 @@ void GLBackend::renderPassTransfer(Batch& batch) {
const Batch::Commands::value_type* command = batch.getCommands().data();
const Batch::CommandOffsets::value_type* offset = batch.getCommandOffsets().data();
_inRenderTransferPass = true;
{ // Sync all the buffers
PROFILE_RANGE("syncGPUBuffer");
@ -241,7 +242,7 @@ void GLBackend::renderPassTransfer(Batch& batch) {
_transform.transfer(batch);
}
_inRenderTransferPass = false;
}
void GLBackend::renderPassDraw(Batch& batch) {
@ -300,24 +301,17 @@ void GLBackend::render(Batch& batch) {
if (!batch.isStereoEnabled()) {
_stereo._enable = false;
}
{
PROFILE_RANGE("Transfer");
renderPassTransfer(batch);
}
{
PROFILE_RANGE(_stereo._enable ? "LeftRender" : "Render");
PROFILE_RANGE(_stereo._enable ? "Render Stereo" : "Render");
renderPassDraw(batch);
}
if (_stereo._enable) {
PROFILE_RANGE("RightRender");
_stereo._pass = 1;
renderPassDraw(batch);
_stereo._pass = 0;
}
// Restore the saved stereo state for the next batch
_stereo._enable = savedStereo;
}
@ -373,17 +367,38 @@ void GLBackend::syncCache() {
glEnable(GL_LINE_SMOOTH);
}
void GLBackend::setupStereoSide(int side) {
ivec4 vp = _transform._viewport;
vp.z /= 2;
glViewport(vp.x + side * vp.z, vp.y, vp.z, vp.w);
_transform.bindCurrentCamera(side);
}
void GLBackend::do_draw(Batch& batch, size_t paramOffset) {
Primitive primitiveType = (Primitive)batch._params[paramOffset + 2]._uint;
GLenum mode = _primitiveToGLmode[primitiveType];
uint32 numVertices = batch._params[paramOffset + 1]._uint;
uint32 startVertex = batch._params[paramOffset + 0]._uint;
glDrawArrays(mode, startVertex, numVertices);
_stats._DSNumTriangles += numVertices / 3;
_stats._DSNumDrawcalls++;
if (isStereo()) {
setupStereoSide(0);
glDrawArrays(mode, startVertex, numVertices);
setupStereoSide(1);
glDrawArrays(mode, startVertex, numVertices);
_stats._DSNumTriangles += 2 * numVertices / 3;
_stats._DSNumDrawcalls += 2;
} else {
glDrawArrays(mode, startVertex, numVertices);
_stats._DSNumTriangles += numVertices / 3;
_stats._DSNumDrawcalls++;
}
_stats._DSNumAPIDrawcalls++;
(void)CHECK_GL_ERROR();
(void) CHECK_GL_ERROR();
}
void GLBackend::do_drawIndexed(Batch& batch, size_t paramOffset) {
@ -397,9 +412,19 @@ void GLBackend::do_drawIndexed(Batch& batch, size_t paramOffset) {
auto typeByteSize = TYPE_SIZE[_input._indexBufferType];
GLvoid* indexBufferByteOffset = reinterpret_cast<GLvoid*>(startIndex * typeByteSize + _input._indexBufferOffset);
glDrawElements(mode, numIndices, glType, indexBufferByteOffset);
_stats._DSNumTriangles += numIndices / 3;
_stats._DSNumDrawcalls++;
if (isStereo()) {
setupStereoSide(0);
glDrawElements(mode, numIndices, glType, indexBufferByteOffset);
setupStereoSide(1);
glDrawElements(mode, numIndices, glType, indexBufferByteOffset);
_stats._DSNumTriangles += 2 * numIndices / 3;
_stats._DSNumDrawcalls += 2;
} else {
glDrawElements(mode, numIndices, glType, indexBufferByteOffset);
_stats._DSNumTriangles += numIndices / 3;
_stats._DSNumDrawcalls++;
}
_stats._DSNumAPIDrawcalls++;
(void) CHECK_GL_ERROR();
@ -412,14 +437,35 @@ void GLBackend::do_drawInstanced(Batch& batch, size_t paramOffset) {
uint32 numVertices = batch._params[paramOffset + 2]._uint;
uint32 startVertex = batch._params[paramOffset + 1]._uint;
glDrawArraysInstancedARB(mode, startVertex, numVertices, numInstances);
_stats._DSNumTriangles += (numInstances * numVertices) / 3;
_stats._DSNumDrawcalls += numInstances;
if (isStereo()) {
GLint trueNumInstances = 2 * numInstances;
setupStereoSide(0);
glDrawArraysInstancedARB(mode, startVertex, numVertices, numInstances);
setupStereoSide(1);
glDrawArraysInstancedARB(mode, startVertex, numVertices, numInstances);
_stats._DSNumTriangles += (trueNumInstances * numVertices) / 3;
_stats._DSNumDrawcalls += trueNumInstances;
} else {
glDrawArraysInstancedARB(mode, startVertex, numVertices, numInstances);
_stats._DSNumTriangles += (numInstances * numVertices) / 3;
_stats._DSNumDrawcalls += numInstances;
}
_stats._DSNumAPIDrawcalls++;
(void) CHECK_GL_ERROR();
}
void glbackend_glDrawElementsInstancedBaseVertexBaseInstance(GLenum mode, GLsizei count, GLenum type, const GLvoid *indices, GLsizei primcount, GLint basevertex, GLuint baseinstance) {
#if (GPU_INPUT_PROFILE == GPU_CORE_43)
glDrawElementsInstancedBaseVertexBaseInstance(mode, count, type, indices, primcount, basevertex, baseinstance);
#else
glDrawElementsInstanced(mode, count, type, indices, primcount);
#endif
}
void GLBackend::do_drawIndexedInstanced(Batch& batch, size_t paramOffset) {
GLint numInstances = batch._params[paramOffset + 4]._uint;
GLenum mode = _primitiveToGLmode[(Primitive)batch._params[paramOffset + 3]._uint];
@ -432,15 +478,23 @@ void GLBackend::do_drawIndexedInstanced(Batch& batch, size_t paramOffset) {
auto typeByteSize = TYPE_SIZE[_input._indexBufferType];
GLvoid* indexBufferByteOffset = reinterpret_cast<GLvoid*>(startIndex * typeByteSize + _input._indexBufferOffset);
#if (GPU_INPUT_PROFILE == GPU_CORE_43)
glDrawElementsInstancedBaseVertexBaseInstance(mode, numIndices, glType, indexBufferByteOffset, numInstances, 0, startInstance);
#else
glDrawElementsInstanced(mode, numIndices, glType, indexBufferByteOffset, numInstances);
Q_UNUSED(startInstance);
#endif
_stats._DSNumTriangles += (numInstances * numIndices) / 3;
_stats._DSNumDrawcalls += numInstances;
if (isStereo()) {
GLint trueNumInstances = 2 * numInstances;
setupStereoSide(0);
glbackend_glDrawElementsInstancedBaseVertexBaseInstance(mode, numIndices, glType, indexBufferByteOffset, numInstances, 0, startInstance);
setupStereoSide(1);
glbackend_glDrawElementsInstancedBaseVertexBaseInstance(mode, numIndices, glType, indexBufferByteOffset, numInstances, 0, startInstance);
_stats._DSNumTriangles += (trueNumInstances * numIndices) / 3;
_stats._DSNumDrawcalls += trueNumInstances;
} else {
glbackend_glDrawElementsInstancedBaseVertexBaseInstance(mode, numIndices, glType, indexBufferByteOffset, numInstances, 0, startInstance);
_stats._DSNumTriangles += (numInstances * numIndices) / 3;
_stats._DSNumDrawcalls += numInstances;
}
_stats._DSNumAPIDrawcalls++;
(void)CHECK_GL_ERROR();
@ -515,10 +569,9 @@ void GLBackend::resetStages() {
#define ADD_COMMAND_GL(call) _commands.push_back(COMMAND_##call); _commandOffsets.push_back(_params.size());
// As long as we don;t use several versions of shaders we can avoid this more complex code path
// #define GET_UNIFORM_LOCATION(shaderUniformLoc) _pipeline._programShader->getUniformLocation(shaderUniformLoc, isStereo());
#define GET_UNIFORM_LOCATION(shaderUniformLoc) shaderUniformLoc
// THis will be used in the next PR
// #define GET_UNIFORM_LOCATION(shaderUniformLoc) _pipeline._programShader->getUniformLocation(shaderUniformLoc)
void Batch::_glActiveBindTexture(GLenum unit, GLenum target, GLuint texture) {
// clean the cache on the texture unit we are going to use so the next call to setResourceTexture() at the same slot works fine
@ -546,6 +599,7 @@ void Batch::_glUniform1i(GLint location, GLint v0) {
_params.push_back(v0);
_params.push_back(location);
}
void GLBackend::do_glUniform1i(Batch& batch, size_t paramOffset) {
if (_pipeline._program == 0) {
// We should call updatePipeline() to bind the program but we are not doing that
@ -553,6 +607,7 @@ void GLBackend::do_glUniform1i(Batch& batch, size_t paramOffset) {
return;
}
updatePipeline();
glUniform1f(
GET_UNIFORM_LOCATION(batch._params[paramOffset + 1]._int),
batch._params[paramOffset + 0]._int);
@ -742,6 +797,7 @@ void GLBackend::do_glUniformMatrix4fv(Batch& batch, size_t paramOffset) {
return;
}
updatePipeline();
glUniformMatrix4fv(
GET_UNIFORM_LOCATION(batch._params[paramOffset + 3]._int),
batch._params[paramOffset + 2]._uint,

View file

@ -17,6 +17,7 @@
#include <queue>
#include <utility>
#include <list>
#include <array>
#include <gl/Config.h>
@ -201,24 +202,24 @@ public:
};
using ShaderObjects = std::array< ShaderObject, NumVersions >;
using UniformMapping = std::map<GLint, GLint>;
using UniformMappingVersions = std::vector<UniformMapping>;
GLShader();
~GLShader();
ShaderObjects _shaderObjects;
UniformMappingVersions _uniformMappings;
GLuint getProgram() const {
return _shaderObjects[Mono].glprogram;
GLuint getProgram(Version version = Mono) const {
return _shaderObjects[version].glprogram;
}
GLint getUniformLocation(GLint srcLoc) {
GLint getUniformLocation(GLint srcLoc, Version version = Mono) {
// THIS will be used in the future PR as we grow the number of versions
// return _uniformMappings[version][srcLoc];
return srcLoc;
// THIS will be used in the next PR
// return _uniformMappings[Mono][srcLoc];
}
};
@ -353,9 +354,15 @@ public:
void do_setStateColorWriteMask(uint32 mask);
protected:
static const size_t INVALID_OFFSET = (size_t)-1;
bool _inRenderTransferPass;
void renderPassTransfer(Batch& batch);
void renderPassDraw(Batch& batch);
void setupStereoSide(int side);
void initTextureTransferHelper();
static void transferGPUObject(const TexturePointer& texture);
@ -438,7 +445,8 @@ protected:
void resetTransformStage();
struct TransformStageState {
using TransformCameras = std::vector<TransformCamera>;
using CameraBufferElement = TransformCamera;
using TransformCameras = std::vector<CameraBufferElement>;
TransformCamera _camera;
TransformCameras _cameras;
@ -462,9 +470,11 @@ protected:
using List = std::list<Pair>;
List _cameraOffsets;
mutable List::const_iterator _camerasItr;
mutable size_t _currentCameraOffset{ INVALID_OFFSET };
void preUpdate(size_t commandIndex, const StereoState& stereo);
void update(size_t commandIndex, const StereoState& stereo) const;
void bindCurrentCamera(int stereoSide) const;
void transfer(const Batch& batch) const;
} _transform;

View file

@ -84,7 +84,9 @@ void GLBackend::do_setPipeline(Batch& batch, size_t paramOffset) {
}
// check the program cache
// pick the program version
GLuint glprogram = pipelineObject->_program->getProgram();
if (_pipeline._program != glprogram) {
_pipeline._program = glprogram;
_pipeline._programShader = pipelineObject->_program;

View file

@ -312,12 +312,10 @@ GLBackend::GLShader* compileBackendShader(const Shader& shader) {
// Domain specific defines
const std::string domainDefines[NUM_SHADER_DOMAINS] = {
"#define VERTEX_SHADER",
"#define PIXEL_SHADER"
"#define GPU_VERTEX_SHADER",
"#define GPU_PIXEL_SHADER"
};
// Versions specific of the shader
const std::string versionDefines[GLBackend::GLShader::NumVersions] = {
""
@ -375,7 +373,6 @@ GLBackend::GLShader* compileBackendProgram(const Shader& program) {
makeProgramBindings(programObject);
}
// So far so good, the program versions have all been created successfully
GLBackend::GLShader* object = new GLBackend::GLShader();
object->_shaderObjects = programObjects;

View file

@ -31,17 +31,10 @@ void GLBackend::do_setProjectionTransform(Batch& batch, size_t paramOffset) {
void GLBackend::do_setViewportTransform(Batch& batch, size_t paramOffset) {
memcpy(&_transform._viewport, batch.editData(batch._params[paramOffset]._uint), sizeof(Vec4i));
ivec4& vp = _transform._viewport;
// Where we assign the GL viewport
if (_stereo._enable) {
vp.z /= 2;
if (_stereo._pass) {
vp.x += vp.z;
}
}
glViewport(vp.x, vp.y, vp.z, vp.w);
if (!_inRenderTransferPass && !isStereo()) {
ivec4& vp = _transform._viewport;
glViewport(vp.x, vp.y, vp.z, vp.w);
}
// The Viewport is tagged invalid because the CameraTransformUBO is not up to date and will need update on next drawcall
_transform._invalidViewport = true;
@ -65,7 +58,7 @@ void GLBackend::initTransform() {
#ifndef GPU_SSBO_DRAW_CALL_INFO
glGenTextures(1, &_transform._objectBufferTexture);
#endif
size_t cameraSize = sizeof(TransformCamera);
size_t cameraSize = sizeof(TransformStageState::CameraBufferElement);
while (_transform._cameraUboSize < cameraSize) {
_transform._cameraUboSize += _uboAlignment;
}
@ -111,15 +104,14 @@ void GLBackend::TransformStageState::preUpdate(size_t commandIndex, const Stereo
if (_invalidView || _invalidProj || _invalidViewport) {
size_t offset = _cameraUboSize * _cameras.size();
_cameraOffsets.push_back(TransformStageState::Pair(commandIndex, offset));
if (stereo._enable) {
_cameraOffsets.push_back(TransformStageState::Pair(commandIndex, offset));
for (int i = 0; i < 2; ++i) {
_cameras.push_back(_camera.getEyeCamera(i, stereo, _view));
}
_cameras.push_back((_camera.getEyeCamera(0, stereo, _view)));
_cameras.push_back((_camera.getEyeCamera(1, stereo, _view)));
} else {
_cameraOffsets.push_back(TransformStageState::Pair(commandIndex, offset));
_cameras.push_back(_camera.recomputeDerived(_view));
_cameras.push_back((_camera.recomputeDerived(_view)));
}
}
// Flags are clean
@ -132,7 +124,7 @@ void GLBackend::TransformStageState::transfer(const Batch& batch) const {
if (!_cameras.empty()) {
bufferData.resize(_cameraUboSize * _cameras.size());
for (size_t i = 0; i < _cameras.size(); ++i) {
memcpy(bufferData.data() + (_cameraUboSize * i), &_cameras[i], sizeof(TransformCamera));
memcpy(bufferData.data() + (_cameraUboSize * i), &_cameras[i], sizeof(CameraBufferElement));
}
glBindBuffer(GL_UNIFORM_BUFFER, _cameraBuffer);
glBufferData(GL_UNIFORM_BUFFER, bufferData.size(), bufferData.data(), GL_DYNAMIC_DRAW);
@ -179,27 +171,33 @@ void GLBackend::TransformStageState::transfer(const Batch& batch) const {
#endif
CHECK_GL_ERROR();
// Make sure the current Camera offset is unknown before render Draw
_currentCameraOffset = INVALID_OFFSET;
}
void GLBackend::TransformStageState::update(size_t commandIndex, const StereoState& stereo) const {
static const size_t INVALID_OFFSET = (size_t)-1;
size_t offset = INVALID_OFFSET;
while ((_camerasItr != _cameraOffsets.end()) && (commandIndex >= (*_camerasItr).first)) {
offset = (*_camerasItr).second;
_currentCameraOffset = offset;
++_camerasItr;
}
if (offset != INVALID_OFFSET) {
// We include both camera offsets for stereo
if (stereo._enable && stereo._pass) {
offset += _cameraUboSize;
}
glBindBufferRange(GL_UNIFORM_BUFFER, TRANSFORM_CAMERA_SLOT,
_cameraBuffer, offset, sizeof(Backend::TransformCamera));
}
if (offset != INVALID_OFFSET) {
if (!stereo._enable) {
bindCurrentCamera(0);
}
}
(void)CHECK_GL_ERROR();
}
void GLBackend::TransformStageState::bindCurrentCamera(int eye) const {
if (_currentCameraOffset != INVALID_OFFSET) {
glBindBufferRange(GL_UNIFORM_BUFFER, TRANSFORM_CAMERA_SLOT, _cameraBuffer, _currentCameraOffset + eye * _cameraUboSize, sizeof(CameraBufferElement));
}
}
void GLBackend::updateTransform(const Batch& batch) {
_transform.update(_commandIndex, _stereo);

View file

@ -23,6 +23,7 @@ struct TransformCamera {
layout(std140) uniform transformCameraBuffer {
TransformCamera _camera;
};
TransformCamera getTransformCamera() {
return _camera;
}
@ -68,8 +69,8 @@ TransformObject getTransformObject() {
<@func declareStandardTransform()@>
<$declareStandardObjectTransform()$>
<$declareStandardCameraTransform()$>
<$declareStandardObjectTransform()$>
<@endfunc@>
<@func transformCameraViewport(cameraTransform, viewport)@>

View file

@ -247,6 +247,7 @@ void MeshPartPayload::bindMaterial(gpu::Batch& batch, const ShapePipeline::Locat
// Texcoord transforms ?
if (locations->texcoordMatrices >= 0) {
batch._glUniformMatrix4fv(locations->texcoordMatrices, 2, false, (const float*)&texcoordTransform);
// batch._glUniformMatrix4fv(locations->texcoordMatrices, (materialKey.isLightmapMap() ? 2 : 1), false, (const float*)&texcoordTransform);
}
}

View file

@ -170,7 +170,7 @@ void RenderDeferredTask::run(const SceneContextPointer& sceneContext, const Rend
for (auto job : _jobs) {
job.run(sceneContext, renderContext);
}
};
}
void DrawDeferred::run(const SceneContextPointer& sceneContext, const RenderContextPointer& renderContext, const ItemBounds& inItems) {
assert(renderContext->args);
@ -244,7 +244,6 @@ void DrawOverlay3D::run(const SceneContextPointer& sceneContext, const RenderCon
auto config = std::static_pointer_cast<Config>(renderContext->jobConfig);
config->setNumDrawn((int)inItems.size());
emit config->numDrawnChanged();

View file

@ -109,7 +109,6 @@ protected:
bool _stateSort;
};
class DrawStencilDeferred {
public:
using JobModel = render::Job::Model<DrawStencilDeferred>;

View file

@ -81,7 +81,6 @@ void render::renderStateSortShapes(const SceneContextPointer& sceneContext, cons
SortedShapes sortedShapes;
std::vector<Item> ownPipelineBucket;
for (auto i = 0; i < numItemsToDraw; ++i) {
auto item = scene->getItem(inItems[i].id);

View file

@ -78,7 +78,7 @@ public:
PerformanceTimer(const QString& name);
~PerformanceTimer();
static bool isActive();
static void setActive(bool active);