Little improvments on the performance side to get the Stencil pass and the masking of the background under reasonable performances

This commit is contained in:
samcake 2015-10-01 18:42:09 -07:00
parent 0398f9429e
commit 3490e08521
7 changed files with 108 additions and 45 deletions

View file

@ -0,0 +1,28 @@
<@include gpu/Config.slh@>
<$VERSION_HEADER$>
// Generated on <$_SCRIBE_DATE$>
//
// Draw the unit quad [-1,-1 -> 1,1] amd pass along the unit texcoords [0, 0 -> 1, 1]. Not transform used.
// Simply draw a Triangle_strip of 2 triangles, no input buffers or index buffer needed
//
// Created by Sam Gateau on 6/22/2015
// Copyright 2015 High Fidelity, Inc.
//
// Distributed under the Apache License, Version 2.0.
// See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
//
out vec2 varTexCoord0;
void main(void) {
const vec4 UNIT_QUAD[4] = vec4[4](
vec4(-1.0, -1.0, 0.0, 1.0),
vec4(1.0, -1.0, 0.0, 1.0),
vec4(-1.0, 1.0, 0.0, 1.0),
vec4(1.0, 1.0, 0.0, 1.0)
);
vec4 pos = UNIT_QUAD[gl_VertexID];
varTexCoord0 = (pos.xy + 1) * 0.5;
gl_Position = pos;
}

View file

@ -132,20 +132,26 @@ void GLBackend::renderPassTransfer(Batch& batch) {
const size_t numCommands = batch.getCommands().size(); const size_t numCommands = batch.getCommands().size();
const Batch::Commands::value_type* command = batch.getCommands().data(); const Batch::Commands::value_type* command = batch.getCommands().data();
const Batch::CommandOffsets::value_type* offset = batch.getCommandOffsets().data(); const Batch::CommandOffsets::value_type* offset = batch.getCommandOffsets().data();
for (auto& cached : batch._buffers._items) { { // Sync all the buffers
if (cached._data) { PROFILE_RANGE("syncGPUBuffer");
syncGPUObject(*cached._data);
for (auto& cached : batch._buffers._items) {
if (cached._data) {
syncGPUObject(*cached._data);
}
} }
} }
// Reset the transform buffers
_transform._cameras.resize(0);
_transform._cameraOffsets.clear();
_transform._objects.resize(0);
_transform._objectOffsets.clear();
for (_commandIndex = 0; _commandIndex < numCommands; ++_commandIndex) { { // Sync all the buffers
switch (*command) { PROFILE_RANGE("syncCPUTransform");
_transform._cameras.resize(0);
_transform._cameraOffsets.clear();
_transform._objects.resize(0);
_transform._objectOffsets.clear();
for (_commandIndex = 0; _commandIndex < numCommands; ++_commandIndex) {
switch (*command) {
case Batch::COMMAND_draw: case Batch::COMMAND_draw:
case Batch::COMMAND_drawIndexed: case Batch::COMMAND_drawIndexed:
case Batch::COMMAND_drawInstanced: case Batch::COMMAND_drawInstanced:
@ -164,11 +170,16 @@ void GLBackend::renderPassTransfer(Batch& batch) {
default: default:
break; break;
}
command++;
offset++;
} }
command++;
offset++;
} }
_transform.transfer();
{ // Sync the transform buffers
PROFILE_RANGE("syncGPUTransform");
_transform.transfer();
}
} }
void GLBackend::renderPassDraw(Batch& batch) { void GLBackend::renderPassDraw(Batch& batch) {

View file

@ -642,8 +642,13 @@ void GLBackend::do_setStateStencil(State::StencilActivation activation, State::S
if (activation.isEnabled()) { if (activation.isEnabled()) {
glEnable(GL_STENCIL_TEST); glEnable(GL_STENCIL_TEST);
glStencilMaskSeparate(GL_FRONT, activation.getWriteMaskFront());
glStencilMaskSeparate(GL_BACK, activation.getWriteMaskBack()); if (activation.getWriteMaskFront() != activation.getWriteMaskBack()) {
glStencilMaskSeparate(GL_FRONT, activation.getWriteMaskFront());
glStencilMaskSeparate(GL_BACK, activation.getWriteMaskBack());
} else {
glStencilMask(activation.getWriteMaskFront());
}
static GLenum STENCIL_OPS[] = { static GLenum STENCIL_OPS[] = {
GL_KEEP, GL_KEEP,
@ -655,12 +660,16 @@ void GLBackend::do_setStateStencil(State::StencilActivation activation, State::S
GL_INCR, GL_INCR,
GL_DECR }; GL_DECR };
glStencilOpSeparate(GL_FRONT, STENCIL_OPS[frontTest.getFailOp()], STENCIL_OPS[frontTest.getPassOp()], STENCIL_OPS[frontTest.getDepthFailOp()]); if (frontTest != backTest) {
glStencilFuncSeparate(GL_FRONT, GL_COMPARISON_FUNCTIONS[frontTest.getFunction()], frontTest.getReference(), frontTest.getReadMask()); glStencilOpSeparate(GL_FRONT, STENCIL_OPS[frontTest.getFailOp()], STENCIL_OPS[frontTest.getPassOp()], STENCIL_OPS[frontTest.getDepthFailOp()]);
glStencilFuncSeparate(GL_FRONT, GL_COMPARISON_FUNCTIONS[frontTest.getFunction()], frontTest.getReference(), frontTest.getReadMask());
glStencilOpSeparate(GL_BACK, STENCIL_OPS[backTest.getFailOp()], STENCIL_OPS[backTest.getPassOp()], STENCIL_OPS[backTest.getDepthFailOp()]);
glStencilFuncSeparate(GL_BACK, GL_COMPARISON_FUNCTIONS[backTest.getFunction()], backTest.getReference(), backTest.getReadMask());
glStencilOpSeparate(GL_BACK, STENCIL_OPS[backTest.getFailOp()], STENCIL_OPS[backTest.getPassOp()], STENCIL_OPS[backTest.getDepthFailOp()]);
glStencilFuncSeparate(GL_BACK, GL_COMPARISON_FUNCTIONS[backTest.getFunction()], backTest.getReference(), backTest.getReadMask());
} else {
glStencilOp(STENCIL_OPS[frontTest.getFailOp()], STENCIL_OPS[frontTest.getPassOp()], STENCIL_OPS[frontTest.getDepthFailOp()]);
glStencilFunc(GL_COMPARISON_FUNCTIONS[frontTest.getFunction()], frontTest.getReference(), frontTest.getReadMask());
}
} else { } else {
glDisable(GL_STENCIL_TEST); glDisable(GL_STENCIL_TEST);
} }

View file

@ -130,19 +130,27 @@ void GLBackend::TransformStageState::preUpdate(size_t commandIndex, const Stereo
void GLBackend::TransformStageState::transfer() const { void GLBackend::TransformStageState::transfer() const {
static QByteArray bufferData; static QByteArray bufferData;
glBindBuffer(GL_UNIFORM_BUFFER, _cameraBuffer); if (!_cameras.empty()) {
bufferData.resize(_cameraUboSize * _cameras.size()); glBindBuffer(GL_UNIFORM_BUFFER, _cameraBuffer);
for (size_t i = 0; i < _cameras.size(); ++i) { bufferData.resize(_cameraUboSize * _cameras.size());
memcpy(bufferData.data() + (_cameraUboSize * i), &_cameras[i], sizeof(TransformCamera)); for (size_t i = 0; i < _cameras.size(); ++i) {
memcpy(bufferData.data() + (_cameraUboSize * i), &_cameras[i], sizeof(TransformCamera));
}
glBufferData(GL_UNIFORM_BUFFER, bufferData.size(), bufferData.data(), GL_DYNAMIC_DRAW);
} }
glBufferData(GL_UNIFORM_BUFFER, bufferData.size(), bufferData.data(), GL_DYNAMIC_DRAW);
glBindBuffer(GL_UNIFORM_BUFFER, _objectBuffer); if (!_objects.empty()) {
bufferData.resize(_objectUboSize * _objects.size()); glBindBuffer(GL_UNIFORM_BUFFER, _objectBuffer);
for (size_t i = 0; i < _objects.size(); ++i) { bufferData.resize(_objectUboSize * _objects.size());
memcpy(bufferData.data() + (_objectUboSize * i), &_objects[i], sizeof(TransformObject)); for (size_t i = 0; i < _objects.size(); ++i) {
memcpy(bufferData.data() + (_objectUboSize * i), &_objects[i], sizeof(TransformObject));
}
glBufferData(GL_UNIFORM_BUFFER, bufferData.size(), bufferData.data(), GL_DYNAMIC_DRAW);
}
if (!_cameras.empty() || !_objects.empty()) {
glBindBuffer(GL_UNIFORM_BUFFER, 0);
} }
glBufferData(GL_UNIFORM_BUFFER, bufferData.size(), bufferData.data(), GL_DYNAMIC_DRAW);
glBindBuffer(GL_UNIFORM_BUFFER, 0);
CHECK_GL_ERROR(); CHECK_GL_ERROR();
} }

View file

@ -12,6 +12,7 @@
// //
#include "StandardShaderLib.h" #include "StandardShaderLib.h"
#include "DrawUnitQuadTexcoord_vert.h"
#include "DrawTransformUnitQuad_vert.h" #include "DrawTransformUnitQuad_vert.h"
#include "DrawTexcoordRectTransformUnitQuad_vert.h" #include "DrawTexcoordRectTransformUnitQuad_vert.h"
#include "DrawViewportQuadTransformTexcoord_vert.h" #include "DrawViewportQuadTransformTexcoord_vert.h"
@ -21,6 +22,7 @@
using namespace gpu; using namespace gpu;
ShaderPointer StandardShaderLib::_drawUnitQuadTexcoordVS;
ShaderPointer StandardShaderLib::_drawTransformUnitQuadVS; ShaderPointer StandardShaderLib::_drawTransformUnitQuadVS;
ShaderPointer StandardShaderLib::_drawTexcoordRectTransformUnitQuadVS; ShaderPointer StandardShaderLib::_drawTexcoordRectTransformUnitQuadVS;
ShaderPointer StandardShaderLib::_drawViewportQuadTransformTexcoordVS; ShaderPointer StandardShaderLib::_drawViewportQuadTransformTexcoordVS;
@ -55,6 +57,12 @@ ShaderPointer StandardShaderLib::getProgram(GetShader getVS, GetShader getPS) {
} }
ShaderPointer StandardShaderLib::getDrawUnitQuadTexcoordVS() {
if (!_drawUnitQuadTexcoordVS) {
_drawUnitQuadTexcoordVS = gpu::ShaderPointer(gpu::Shader::createVertex(std::string(DrawUnitQuadTexcoord_vert)));
}
return _drawUnitQuadTexcoordVS;
}
ShaderPointer StandardShaderLib::getDrawTransformUnitQuadVS() { ShaderPointer StandardShaderLib::getDrawTransformUnitQuadVS() {
if (!_drawTransformUnitQuadVS) { if (!_drawTransformUnitQuadVS) {

View file

@ -23,6 +23,9 @@ namespace gpu {
class StandardShaderLib { class StandardShaderLib {
public: public:
// Shader draws the unit quad in the full viewport clipPos = ([(-1,-1),(1,1)]) and the unit texcoord = [(0,0),(1,1)].
static ShaderPointer getDrawUnitQuadTexcoordVS();
// Shader draw the unit quad objectPos = ([(-1,-1),(1,1)]) and transform it by the full model transform stack (Model, View, Proj). // Shader draw the unit quad objectPos = ([(-1,-1),(1,1)]) and transform it by the full model transform stack (Model, View, Proj).
// A texcoord attribute is also generated texcoord = [(0,0),(1,1)] // A texcoord attribute is also generated texcoord = [(0,0),(1,1)]
static ShaderPointer getDrawTransformUnitQuadVS(); static ShaderPointer getDrawTransformUnitQuadVS();
@ -44,6 +47,7 @@ public:
protected: protected:
static ShaderPointer _drawUnitQuadTexcoordVS;
static ShaderPointer _drawTransformUnitQuadVS; static ShaderPointer _drawTransformUnitQuadVS;
static ShaderPointer _drawTexcoordRectTransformUnitQuadVS; static ShaderPointer _drawTexcoordRectTransformUnitQuadVS;
static ShaderPointer _drawViewportQuadTransformTexcoordVS; static ShaderPointer _drawViewportQuadTransformTexcoordVS;

View file

@ -37,15 +37,19 @@ void SetupDeferred::run(const SceneContextPointer& sceneContext, const RenderCon
RenderArgs* args = renderContext->args; RenderArgs* args = renderContext->args;
gpu::doInBatch(args->_context, [=](gpu::Batch& batch) { gpu::doInBatch(args->_context, [=](gpu::Batch& batch) {
auto primaryFboStencil = DependencyManager::get<FramebufferCache>()->getPrimaryFramebufferStencilColor();
auto primaryFbo = DependencyManager::get<FramebufferCache>()->getPrimaryFramebufferDepthColor(); auto primaryFbo = DependencyManager::get<FramebufferCache>()->getPrimaryFramebufferDepthColor();
batch.enableStereo(false); batch.enableStereo(false);
batch.setFramebuffer(nullptr);
batch.setFramebuffer(primaryFbo);
batch.setViewportTransform(args->_viewport); batch.setViewportTransform(args->_viewport);
batch.setStateScissorRect(args->_viewport); batch.setStateScissorRect(args->_viewport);
batch.setFramebuffer(primaryFboStencil);
batch.clearFramebuffer(
gpu::Framebuffer::BUFFER_STENCIL,
vec4(vec3(0), 1), 1.0, 0.0, true);
batch.setFramebuffer(primaryFbo);
batch.clearFramebuffer( batch.clearFramebuffer(
gpu::Framebuffer::BUFFER_COLOR0 | gpu::Framebuffer::BUFFER_COLOR0 |
gpu::Framebuffer::BUFFER_DEPTH, gpu::Framebuffer::BUFFER_DEPTH,
@ -68,7 +72,6 @@ void ResolveDeferred::run(const SceneContextPointer& sceneContext, const RenderC
RenderDeferredTask::RenderDeferredTask() : Task() { RenderDeferredTask::RenderDeferredTask() : Task() {
_jobs.push_back(Job(new SetupDeferred::JobModel("SetupFramebuffer"))); _jobs.push_back(Job(new SetupDeferred::JobModel("SetupFramebuffer")));
// _jobs.push_back(Job(new DrawBackground::JobModel("DrawBackground")));
_jobs.push_back(Job(new PrepareDeferred::JobModel("PrepareDeferred"))); _jobs.push_back(Job(new PrepareDeferred::JobModel("PrepareDeferred")));
_jobs.push_back(Job(new FetchItems::JobModel("FetchOpaque", _jobs.push_back(Job(new FetchItems::JobModel("FetchOpaque",
@ -302,7 +305,7 @@ gpu::PipelinePointer DrawStencilDeferred::_opaquePipeline;
const gpu::PipelinePointer& DrawStencilDeferred::getOpaquePipeline() { const gpu::PipelinePointer& DrawStencilDeferred::getOpaquePipeline() {
if (!_opaquePipeline) { if (!_opaquePipeline) {
const gpu::int8 STENCIL_OPAQUE = 1; const gpu::int8 STENCIL_OPAQUE = 1;
auto vs = gpu::StandardShaderLib::getDrawViewportQuadTransformTexcoordVS(); auto vs = gpu::StandardShaderLib::getDrawUnitQuadTexcoordVS();
auto ps = gpu::ShaderPointer(gpu::Shader::createPixel(std::string(drawOpaqueStencil_frag))); auto ps = gpu::ShaderPointer(gpu::Shader::createPixel(std::string(drawOpaqueStencil_frag)));
auto program = gpu::ShaderPointer(gpu::Shader::createProgram(vs, ps)); auto program = gpu::ShaderPointer(gpu::Shader::createProgram(vs, ps));
@ -329,7 +332,6 @@ void DrawStencilDeferred::run(const SceneContextPointer& sceneContext, const Ren
args->_batch = &batch; args->_batch = &batch;
auto primaryFboColorDepthStencil = DependencyManager::get<FramebufferCache>()->getPrimaryFramebufferStencilColor(); auto primaryFboColorDepthStencil = DependencyManager::get<FramebufferCache>()->getPrimaryFramebufferStencilColor();
auto primaryFboFull = DependencyManager::get<FramebufferCache>()->getPrimaryFramebuffer();
auto primaryDepth = DependencyManager::get<FramebufferCache>()->getPrimaryDepthTexture(); auto primaryDepth = DependencyManager::get<FramebufferCache>()->getPrimaryDepthTexture();
batch.enableStereo(false); batch.enableStereo(false);
@ -337,11 +339,6 @@ void DrawStencilDeferred::run(const SceneContextPointer& sceneContext, const Ren
batch.setFramebuffer(primaryFboColorDepthStencil); batch.setFramebuffer(primaryFboColorDepthStencil);
batch.setViewportTransform(args->_viewport); batch.setViewportTransform(args->_viewport);
batch.setStateScissorRect(args->_viewport); batch.setStateScissorRect(args->_viewport);
batch.clearStencilFramebuffer(0, true);
Transform modelMat;
batch.setModelTransform(modelMat);
batch.setPipeline(getOpaquePipeline()); batch.setPipeline(getOpaquePipeline());
batch.setResourceTexture(0, primaryDepth); batch.setResourceTexture(0, primaryDepth);
@ -349,8 +346,6 @@ void DrawStencilDeferred::run(const SceneContextPointer& sceneContext, const Ren
batch.draw(gpu::TRIANGLE_STRIP, 4); batch.draw(gpu::TRIANGLE_STRIP, 4);
batch.setResourceTexture(0, nullptr); batch.setResourceTexture(0, nullptr);
batch.setFramebuffer(primaryFboFull);
}); });
args->_batch = nullptr; args->_batch = nullptr;
} }