Optimized performance of bloom by reducing blur resolution and merging some downsampling passes with blur passes

This commit is contained in:
Olivier Prat 2017-11-08 11:38:20 +01:00
parent 91d8435a99
commit 39604363ea
8 changed files with 151 additions and 60 deletions

View file

@ -23,5 +23,5 @@ void main(void) {
vec4 blur1 = texture(blurMap1, varTexCoord0);
vec4 blur2 = texture(blurMap2, varTexCoord0);
outFragColor = vec4((blur0.rgb+blur1.rgb+blur2.rgb)/3.0, intensity);
outFragColor = vec4((blur0.rgb+blur1.rgb+blur2.rgb)*intensity, 1.0f);
}

View file

@ -21,8 +21,9 @@
#define BLOOM_BLUR_LEVEL_COUNT 3
BloomThreshold::BloomThreshold() {
BloomThreshold::BloomThreshold(unsigned int downsamplingFactor) :
_downsamplingFactor(downsamplingFactor) {
assert(downsamplingFactor > 0);
}
void BloomThreshold::configure(const Config& config) {
@ -41,9 +42,13 @@ void BloomThreshold::run(const render::RenderContextPointer& renderContext, cons
assert(inputFrameBuffer->hasColor());
auto inputBuffer = inputFrameBuffer->getRenderBuffer(0);
auto bufferSize = glm::ivec2(inputBuffer->getDimensions());
auto bufferSize = gpu::Vec2u(inputBuffer->getDimensions());
if (!_outputBuffer || _outputBuffer->getSize() != inputFrameBuffer->getSize()) {
// Downsample resolution
bufferSize.x /= _downsamplingFactor;
bufferSize.y /= _downsamplingFactor;
if (!_outputBuffer || _outputBuffer->getSize() != bufferSize) {
auto colorTexture = gpu::TexturePointer(gpu::Texture::createRenderBuffer(inputBuffer->getTexelFormat(), bufferSize.x, bufferSize.y,
gpu::Texture::SINGLE_MIP, gpu::Sampler(gpu::Sampler::FILTER_MIN_MAG_LINEAR_MIP_POINT)));
@ -68,13 +73,15 @@ void BloomThreshold::run(const render::RenderContextPointer& renderContext, cons
_pipeline = gpu::Pipeline::create(program, state);
}
glm::ivec4 viewport{ 0, 0, bufferSize.x, bufferSize.y };
gpu::doInBatch(args->_context, [&](gpu::Batch& batch) {
batch.enableStereo(false);
batch.setViewportTransform(args->_viewport);
batch.setViewportTransform(viewport);
batch.setProjectionTransform(glm::mat4());
batch.resetViewTransform();
batch.setModelTransform(gpu::Framebuffer::evalSubregionTexcoordTransform(bufferSize, args->_viewport));
batch.setModelTransform(gpu::Framebuffer::evalSubregionTexcoordTransform(bufferSize, viewport));
batch.setPipeline(_pipeline);
batch.setFramebuffer(_outputBuffer);
@ -117,8 +124,7 @@ void BloomApply::run(const render::RenderContextPointer& renderContext, const In
gpu::Shader::makeProgram(*program, slotBindings);
gpu::StatePointer state = gpu::StatePointer(new gpu::State());
state->setBlendFunction(true, gpu::State::SRC_ALPHA, gpu::State::BLEND_OP_ADD, gpu::State::ONE,
gpu::State::SRC_ALPHA, gpu::State::BLEND_OP_ADD, gpu::State::ONE);
state->setDepthTest(gpu::State::DepthTest(false, false));
_pipeline = gpu::Pipeline::create(program, state);
}
@ -127,26 +133,70 @@ void BloomApply::run(const render::RenderContextPointer& renderContext, const In
const auto blur0FB = inputs.get1();
const auto blur1FB = inputs.get2();
const auto blur2FB = inputs.get3();
const glm::ivec4 viewport{ 0, 0, framebufferSize.x, framebufferSize.y };
gpu::doInBatch(args->_context, [&](gpu::Batch& batch) {
batch.enableStereo(false);
batch.setFramebuffer(frameBuffer);
batch.setViewportTransform(args->_viewport);
batch.setViewportTransform(viewport);
batch.setProjectionTransform(glm::mat4());
batch.resetViewTransform();
batch.setPipeline(_pipeline);
batch.setModelTransform(gpu::Framebuffer::evalSubregionTexcoordTransform(framebufferSize, args->_viewport));
batch.setModelTransform(gpu::Framebuffer::evalSubregionTexcoordTransform(framebufferSize, viewport));
batch.setResourceTexture(BLUR0_SLOT, blur0FB->getRenderBuffer(0));
batch.setResourceTexture(BLUR1_SLOT, blur1FB->getRenderBuffer(0));
batch.setResourceTexture(BLUR2_SLOT, blur2FB->getRenderBuffer(0));
batch._glUniform1f(INTENSITY_SLOT, _intensity);
batch._glUniform1f(INTENSITY_SLOT, _intensity / 3.0f);
batch.draw(gpu::TRIANGLE_STRIP, 4);
});
}
void BloomDraw::run(const render::RenderContextPointer& renderContext, const Inputs& inputs) {
assert(renderContext->args);
assert(renderContext->args->hasViewFrustum());
RenderArgs* args = renderContext->args;
const auto frameBuffer = inputs.get0();
const auto bloomFrameBuffer = inputs.get1();
if (frameBuffer && bloomFrameBuffer) {
const auto framebufferSize = frameBuffer->getSize();
if (!_pipeline) {
auto vs = gpu::StandardShaderLib::getDrawTransformUnitQuadVS();
auto ps = gpu::StandardShaderLib::getDrawTextureOpaquePS();
gpu::ShaderPointer program = gpu::Shader::createProgram(vs, ps);
gpu::Shader::BindingSet slotBindings;
gpu::Shader::makeProgram(*program, slotBindings);
gpu::StatePointer state = gpu::StatePointer(new gpu::State());
state->setDepthTest(gpu::State::DepthTest(false, false));
state->setBlendFunction(true, gpu::State::ONE, gpu::State::BLEND_OP_ADD, gpu::State::ONE,
gpu::State::ZERO, gpu::State::BLEND_OP_ADD, gpu::State::ONE);
_pipeline = gpu::Pipeline::create(program, state);
}
gpu::doInBatch(args->_context, [&](gpu::Batch& batch) {
batch.enableStereo(false);
batch.setFramebuffer(frameBuffer);
batch.setViewportTransform(args->_viewport);
batch.setProjectionTransform(glm::mat4());
batch.resetViewTransform();
batch.setPipeline(_pipeline);
batch.setModelTransform(gpu::Framebuffer::evalSubregionTexcoordTransform(framebufferSize, args->_viewport));
batch.setResourceTexture(0, bloomFrameBuffer->getRenderBuffer(0));
batch.draw(gpu::TRIANGLE_STRIP, 4);
});
}
}
DebugBloom::DebugBloom() {
}
@ -161,6 +211,7 @@ void DebugBloom::run(const render::RenderContextPointer& renderContext, const In
RenderArgs* args = renderContext->args;
const auto frameBuffer = inputs.get0();
const auto combinedBlurBuffer = inputs.get4();
const auto framebufferSize = frameBuffer->getSize();
const auto level0FB = inputs.get1();
const auto level1FB = inputs.get2();
@ -216,6 +267,11 @@ void DebugBloom::run(const render::RenderContextPointer& renderContext, const In
batch.setModelTransform(modelTransform);
batch.setResourceTexture(0, levelTextures[2]);
batch.draw(gpu::TRIANGLE_STRIP, 4);
modelTransform.postTranslate(glm::vec3(2.0f, 0.0f, 0.0f));
batch.setModelTransform(modelTransform);
batch.setResourceTexture(0, combinedBlurBuffer->getRenderBuffer(0));
batch.draw(gpu::TRIANGLE_STRIP, 4);
} else {
auto viewport = args->_viewport;
auto blurLevel = _mode - DebugBloomConfig::MODE_LEVEL0;
@ -258,7 +314,7 @@ void BloomConfig::setSize(float value) {
assert(blurJobIt != task->_jobs.end());
auto& gaussianBlur = blurJobIt->edit<render::BlurGaussian>();
auto gaussianBlurParams = gaussianBlur.getParameters();
gaussianBlurParams->setFilterGaussianTaps((BLUR_MAX_NUM_TAPS - 1) / 2, sigma);
gaussianBlurParams->setFilterGaussianTaps(5, sigma);
// Gaussian blur increases at each level to have a slower rolloff on the edge
// of the response
sigma *= 1.5f;
@ -280,32 +336,24 @@ void Bloom::configure(const Config& config) {
}
void Bloom::build(JobModel& task, const render::Varying& inputs, render::Varying& outputs) {
const auto bloomInputBuffer = task.addJob<BloomThreshold>("BloomThreshold", inputs);
const auto bloomHalfInputBuffer = task.addJob<render::HalfDownsample>("BloomHalf", bloomInputBuffer);
const auto bloomQuarterInputBuffer = task.addJob<render::HalfDownsample>("BloomQuarter", bloomHalfInputBuffer);
// Start by computing threshold of color buffer input at quarter resolution
const auto bloomInputBuffer = task.addJob<BloomThreshold>("BloomThreshold", inputs, 4U);
#if 1
// Multi-scale blur
const auto blurFB0 = task.addJob<render::BlurGaussian>("BloomBlur0", bloomQuarterInputBuffer);
const auto halfBlurFB0 = task.addJob<render::HalfDownsample>("BloomHalfBlur0", blurFB0);
const auto blurFB1 = task.addJob<render::BlurGaussian>("BloomBlur1", halfBlurFB0, true);
const auto halfBlurFB1 = task.addJob<render::HalfDownsample>("BloomHalfBlur1", blurFB1);
const auto blurFB2 = task.addJob<render::BlurGaussian>("BloomBlur2", halfBlurFB1, true);
#else
// Multi-scale downsampling debug
const auto blurFB0 = bloomQuarterInputBuffer;
const auto blurFB1 = task.addJob<render::HalfDownsample>("BloomHalfBlur1", blurFB0);
const auto blurFB2 = task.addJob<render::HalfDownsample>("BloomHalfBlur2", blurFB1);
// This is only needed so as not to crash as we expect to have the three blur jobs
task.addJob<render::BlurGaussian>("BloomBlur0", bloomHalfInputBuffer, true);
task.addJob<render::BlurGaussian>("BloomBlur1", blurFB1, true);
task.addJob<render::BlurGaussian>("BloomBlur2", blurFB2, true);
#endif
// Multi-scale blur, each new blur is half resolution of the previous pass
const auto blurFB0 = task.addJob<render::BlurGaussian>("BloomBlur0", bloomInputBuffer, true);
const auto blurFB1 = task.addJob<render::BlurGaussian>("BloomBlur1", blurFB0, true, 2U);
const auto blurFB2 = task.addJob<render::BlurGaussian>("BloomBlur2", blurFB1, true, 2U);
const auto& input = inputs.get<Inputs>();
const auto& frameBuffer = input[1];
const auto applyInput = DebugBloom::Inputs(frameBuffer, blurFB0, blurFB1, blurFB2).asVarying();
// Mix all blur levels at quarter resolution
const auto applyInput = BloomApply::Inputs(bloomInputBuffer, blurFB0, blurFB1, blurFB2).asVarying();
task.addJob<BloomApply>("BloomApply", applyInput);
task.addJob<DebugBloom>("DebugBloom", applyInput);
// And them blend result in additive manner on top of final color buffer
const auto drawInput = BloomDraw::Inputs(frameBuffer, bloomInputBuffer).asVarying();
task.addJob<BloomDraw>("BloomDraw", drawInput);
const auto debugInput = DebugBloom::Inputs(frameBuffer, blurFB0, blurFB1, blurFB2, bloomInputBuffer).asVarying();
task.addJob<DebugBloom>("DebugBloom", debugInput);
}

View file

@ -54,7 +54,7 @@ public:
using Config = BloomThresholdConfig;
using JobModel = render::Job::ModelIO<BloomThreshold, Inputs, Outputs, Config>;
BloomThreshold();
BloomThreshold(unsigned int downsamplingFactor);
void configure(const Config& config);
void run(const render::RenderContextPointer& renderContext, const Inputs& inputs, Outputs& outputs);
@ -64,6 +64,7 @@ private:
gpu::FramebufferPointer _outputBuffer;
gpu::PipelinePointer _pipeline;
float _threshold;
unsigned int _downsamplingFactor;
};
@ -96,6 +97,20 @@ private:
float _intensity{ 1.0f };
};
class BloomDraw {
public:
using Inputs = render::VaryingSet2<gpu::FramebufferPointer, gpu::FramebufferPointer>;
using JobModel = render::Job::ModelI<BloomDraw, Inputs>;
BloomDraw() {}
void run(const render::RenderContextPointer& renderContext, const Inputs& inputs);
private:
gpu::PipelinePointer _pipeline;
};
class DebugBloomConfig : public render::Job::Config {
Q_OBJECT
Q_PROPERTY(int mode MEMBER mode NOTIFY dirty)
@ -121,7 +136,7 @@ signals:
class DebugBloom {
public:
using Inputs = render::VaryingSet4<gpu::FramebufferPointer, gpu::FramebufferPointer, gpu::FramebufferPointer, gpu::FramebufferPointer>;
using Inputs = render::VaryingSet5<gpu::FramebufferPointer, gpu::FramebufferPointer, gpu::FramebufferPointer, gpu::FramebufferPointer, gpu::FramebufferPointer>;
using Config = DebugBloomConfig;
using JobModel = render::Job::ModelI<DebugBloom, Inputs, Config>;

View file

@ -16,12 +16,30 @@ uniform float threshold;
in vec2 varTexCoord0;
out vec4 outFragColor;
#define DOWNSAMPLING_FACTOR 4
#define SAMPLE_COUNT (DOWNSAMPLING_FACTOR/2)
void main(void) {
vec4 color = texture(colorMap, varTexCoord0);
float luminance = (color.r+color.g+color.b) / 3.0;
float mask = clamp((luminance-threshold)*0.25, 0, 1);
vec2 deltaX = dFdx(varTexCoord0) / SAMPLE_COUNT;
vec2 deltaY = dFdy(varTexCoord0) / SAMPLE_COUNT;
vec2 startUv = varTexCoord0;
vec4 maskedColor = vec4(0,0,0,0);
color *= mask;
for (int y=0 ; y<SAMPLE_COUNT ; y++) {
vec2 uv = startUv;
outFragColor = vec4(color.rgb, 1.0);
for (int x=0 ; x<SAMPLE_COUNT ; x++) {
vec4 color = texture(colorMap, uv);
float luminance = (color.r+color.g+color.b) / 3.0;
float mask = clamp((luminance-threshold)*0.25, 0, 1);
color *= mask;
maskedColor += color;
uv += deltaX;
}
startUv += deltaY;
}
maskedColor /= SAMPLE_COUNT*SAMPLE_COUNT;
outFragColor = vec4(maskedColor.rgb, 1.0);
}

View file

@ -136,17 +136,23 @@ void BlurParams::setLinearDepthPosFar(float farPosDepth) {
}
BlurInOutResource::BlurInOutResource(bool generateOutputFramebuffer) :
_generateOutputFramebuffer(generateOutputFramebuffer)
{
BlurInOutResource::BlurInOutResource(bool generateOutputFramebuffer, unsigned int downsampleFactor) :
_generateOutputFramebuffer(generateOutputFramebuffer),
_downsampleFactor(downsampleFactor) {
assert(downsampleFactor > 0);
}
bool BlurInOutResource::updateResources(const gpu::FramebufferPointer& sourceFramebuffer, Resources& blurringResources) {
if (!sourceFramebuffer) {
return false;
}
if (_blurredFramebuffer && _blurredFramebuffer->getSize() != sourceFramebuffer->getSize()) {
auto blurBufferSize = sourceFramebuffer->getSize();
blurBufferSize.x /= _downsampleFactor;
blurBufferSize.y /= _downsampleFactor;
if (_blurredFramebuffer && _blurredFramebuffer->getSize() != blurBufferSize) {
_blurredFramebuffer.reset();
}
@ -158,7 +164,7 @@ bool BlurInOutResource::updateResources(const gpu::FramebufferPointer& sourceFra
// _blurredFramebuffer->setDepthStencilBuffer(sourceFramebuffer->getDepthStencilBuffer(), sourceFramebuffer->getDepthStencilBufferFormat());
//}
auto blurringSampler = gpu::Sampler(gpu::Sampler::FILTER_MIN_MAG_LINEAR_MIP_POINT);
auto blurringTarget = gpu::Texture::createRenderBuffer(sourceFramebuffer->getRenderBuffer(0)->getTexelFormat(), sourceFramebuffer->getWidth(), sourceFramebuffer->getHeight(), gpu::Texture::SINGLE_MIP, blurringSampler);
auto blurringTarget = gpu::Texture::createRenderBuffer(sourceFramebuffer->getRenderBuffer(0)->getTexelFormat(), blurBufferSize.x, blurBufferSize.y, gpu::Texture::SINGLE_MIP, blurringSampler);
_blurredFramebuffer->setRenderBuffer(0, blurringTarget);
}
@ -167,7 +173,7 @@ bool BlurInOutResource::updateResources(const gpu::FramebufferPointer& sourceFra
blurringResources.blurringTexture = _blurredFramebuffer->getRenderBuffer(0);
if (_generateOutputFramebuffer) {
if (_outputFramebuffer && _outputFramebuffer->getSize() != sourceFramebuffer->getSize()) {
if (_outputFramebuffer && _outputFramebuffer->getSize() != blurBufferSize) {
_outputFramebuffer.reset();
}
@ -181,7 +187,7 @@ bool BlurInOutResource::updateResources(const gpu::FramebufferPointer& sourceFra
_outputFramebuffer->setDepthStencilBuffer(sourceFramebuffer->getDepthStencilBuffer(), sourceFramebuffer->getDepthStencilBufferFormat());
}*/
auto blurringSampler = gpu::Sampler(gpu::Sampler::FILTER_MIN_MAG_LINEAR_MIP_POINT);
auto blurringTarget = gpu::Texture::createRenderBuffer(sourceFramebuffer->getRenderBuffer(0)->getTexelFormat(), sourceFramebuffer->getWidth(), sourceFramebuffer->getHeight(), gpu::Texture::SINGLE_MIP, blurringSampler);
auto blurringTarget = gpu::Texture::createRenderBuffer(sourceFramebuffer->getRenderBuffer(0)->getTexelFormat(), blurBufferSize.x, blurBufferSize.y, gpu::Texture::SINGLE_MIP, blurringSampler);
_outputFramebuffer->setRenderBuffer(0, blurringTarget);
}
@ -195,8 +201,8 @@ bool BlurInOutResource::updateResources(const gpu::FramebufferPointer& sourceFra
return true;
}
BlurGaussian::BlurGaussian(bool generateOutputFramebuffer) :
_inOutResources(generateOutputFramebuffer)
BlurGaussian::BlurGaussian(bool generateOutputFramebuffer, unsigned int downsampleFactor) :
_inOutResources(generateOutputFramebuffer, downsampleFactor)
{
_parameters = std::make_shared<BlurParams>();
}
@ -276,8 +282,8 @@ void BlurGaussian::run(const RenderContextPointer& renderContext, const gpu::Fra
auto blurHPipeline = getBlurHPipeline();
glm::ivec4 viewport { 0, 0, blurredFramebuffer->getWidth(), blurredFramebuffer->getHeight() };
_parameters->setWidthHeight(viewport.z, viewport.w, args->isStereo());
glm::ivec2 textureSize(blurringResources.sourceTexture->getDimensions());
glm::ivec2 textureSize = blurredFramebuffer->getSize();
_parameters->setWidthHeight(blurredFramebuffer->getWidth(), blurredFramebuffer->getHeight(), args->isStereo());
_parameters->setTexcoordTransform(gpu::Framebuffer::evalSubregionTexcoordTransformCoefficients(textureSize, viewport));
gpu::doInBatch(args->_context, [=](gpu::Batch& batch) {
@ -310,7 +316,7 @@ void BlurGaussian::run(const RenderContextPointer& renderContext, const gpu::Fra
BlurGaussianDepthAware::BlurGaussianDepthAware(bool generateOutputFramebuffer, const BlurParamsPointer& params) :
_inOutResources(generateOutputFramebuffer),
_inOutResources(generateOutputFramebuffer, 1U),
_parameters((params ? params : std::make_shared<BlurParams>()))
{
}

View file

@ -72,7 +72,7 @@ using BlurParamsPointer = std::shared_ptr<BlurParams>;
class BlurInOutResource {
public:
BlurInOutResource(bool generateOutputFramebuffer = false);
BlurInOutResource(bool generateOutputFramebuffer, unsigned int downsampleFactor);
struct Resources {
gpu::TexturePointer sourceTexture;
@ -85,8 +85,9 @@ public:
gpu::FramebufferPointer _blurredFramebuffer;
// the output framebuffer defined if the job needs to output the result in a new framebuffer and not in place in th einput buffer
// the output framebuffer defined if the job needs to output the result in a new framebuffer and not in place in the input buffer
gpu::FramebufferPointer _outputFramebuffer;
unsigned int _downsampleFactor{ 1U };
bool _generateOutputFramebuffer{ false };
};
@ -115,7 +116,7 @@ public:
using Config = BlurGaussianConfig;
using JobModel = Job::ModelIO<BlurGaussian, gpu::FramebufferPointer, gpu::FramebufferPointer, Config>;
BlurGaussian(bool generateOutputFramebuffer = false);
BlurGaussian(bool generateOutputFramebuffer = false, unsigned int downsampleFactor = 1U);
void configure(const Config& config);
void run(const RenderContextPointer& renderContext, const gpu::FramebufferPointer& sourceFramebuffer, gpu::FramebufferPointer& blurredFramebuffer);

View file

@ -17,6 +17,8 @@
using namespace render;
gpu::PipelinePointer HalfDownsample::_pipeline;
HalfDownsample::HalfDownsample() {
}
@ -57,7 +59,7 @@ void HalfDownsample::run(const RenderContextPointer& renderContext, const gpu::F
gpu::Shader::makeProgram(*program, slotBindings);
gpu::StatePointer state = gpu::StatePointer(new gpu::State());
state->setDepthTest(gpu::State::DepthTest(false));
state->setDepthTest(gpu::State::DepthTest(false, false));
_pipeline = gpu::Pipeline::create(program, state);
}

View file

@ -30,7 +30,8 @@ namespace render {
protected:
gpu::PipelinePointer _pipeline;
static gpu::PipelinePointer _pipeline;
gpu::FramebufferPointer _destinationFrameBuffer;
gpu::FramebufferPointer getResampledFrameBuffer(const gpu::FramebufferPointer& sourceFramebuffer);