From deaa4a747b57d1996365df5a5d45a1bcfa70942d Mon Sep 17 00:00:00 2001 From: Brad Davis Date: Tue, 22 Sep 2015 10:11:49 -0700 Subject: [PATCH] Batch side implementation of multi-draw indirect --- libraries/gpu/src/gpu/Batch.cpp | 27 +++ libraries/gpu/src/gpu/Batch.h | 36 +++- libraries/gpu/src/gpu/GLBackend.cpp | 56 +++++- libraries/gpu/src/gpu/GLBackend.h | 13 +- libraries/gpu/src/gpu/GLBackendInput.cpp | 37 ++-- libraries/render-utils/src/GeometryCache.cpp | 17 +- libraries/render-utils/src/GeometryCache.h | 14 +- tests/gpu-test/src/main.cpp | 196 +++++++++++++++---- 8 files changed, 318 insertions(+), 78 deletions(-) diff --git a/libraries/gpu/src/gpu/Batch.cpp b/libraries/gpu/src/gpu/Batch.cpp index e6e176be88..15b841dd04 100644 --- a/libraries/gpu/src/gpu/Batch.cpp +++ b/libraries/gpu/src/gpu/Batch.cpp @@ -102,6 +102,19 @@ void Batch::drawIndexedInstanced(uint32 nbInstances, Primitive primitiveType, ui _params.push_back(nbInstances); } + +void Batch::multiDrawIndirect(uint32 nbCommands, Primitive primitiveType) { + ADD_COMMAND(multiDrawIndirect); + _params.push_back(nbCommands); + _params.push_back(primitiveType); +} + +void Batch::multiDrawIndexedIndirect(uint32 nbCommands, Primitive primitiveType) { + ADD_COMMAND(multiDrawIndexedIndirect); + _params.push_back(nbCommands); + _params.push_back(primitiveType); +} + void Batch::setInputFormat(const Stream::FormatPointer& format) { ADD_COMMAND(setInputFormat); @@ -144,6 +157,15 @@ void Batch::setIndexBuffer(const BufferView& buffer) { setIndexBuffer(buffer._element.getType(), buffer._buffer, buffer._offset); } +void Batch::setIndirectBuffer(const BufferPointer& buffer, Offset offset, Offset stride) { + ADD_COMMAND(setIndirectBuffer); + + _params.push_back(_buffers.cache(buffer)); + _params.push_back(offset); + _params.push_back(stride); +} + + void Batch::setModelTransform(const Transform& model) { ADD_COMMAND(setModelTransform); @@ -288,6 +310,11 @@ void Batch::resetStages() { ADD_COMMAND(resetStages); } +void Batch::runLambda(std::function f) { + ADD_COMMAND(runLambda); + _params.push_back(_lambdas.cache(f)); +} + void Batch::enableStereo(bool enable) { _enableStereo = enable; } diff --git a/libraries/gpu/src/gpu/Batch.h b/libraries/gpu/src/gpu/Batch.h index ec6fb26c34..6dd92739c5 100644 --- a/libraries/gpu/src/gpu/Batch.h +++ b/libraries/gpu/src/gpu/Batch.h @@ -63,8 +63,8 @@ public: void process(Batch& batch) { if (_function) { - _function(batch, *this); - } + _function(batch, *this); + } } }; @@ -96,12 +96,15 @@ public: void drawIndexed(Primitive primitiveType, uint32 nbIndices, uint32 startIndex = 0); void drawInstanced(uint32 nbInstances, Primitive primitiveType, uint32 nbVertices, uint32 startVertex = 0, uint32 startInstance = 0); void drawIndexedInstanced(uint32 nbInstances, Primitive primitiveType, uint32 nbIndices, uint32 startIndex = 0, uint32 startInstance = 0); + void multiDrawIndirect(uint32 nbCommands, Primitive primitiveType); + void multiDrawIndexedIndirect(uint32 nbCommands, Primitive primitiveType); void setupNamedCalls(const std::string& instanceName, size_t count, NamedBatchData::Function function); void setupNamedCalls(const std::string& instanceName, NamedBatchData::Function function); BufferPointer getNamedBuffer(const std::string& instanceName, uint8_t index = 0); - + void setNamedBuffer(const std::string& instanceName, BufferPointer& buffer, uint8_t index = 0); + // Input Stage @@ -117,6 +120,8 @@ public: void setIndexBuffer(Type type, const BufferPointer& buffer, Offset offset); void setIndexBuffer(const BufferView& buffer); // not a command, just a shortcut from a BufferView + void setIndirectBuffer(const BufferPointer& buffer, Offset offset = 0, Offset stride = 0); + // Transform Stage // Vertex position is transformed by ModelTransform from object space to world space // Then by the inverse of the ViewTransform from world space to eye space @@ -169,6 +174,8 @@ public: // Reset the stage caches and states void resetStages(); + void runLambda(std::function f); + // TODO: As long as we have gl calls explicitely issued from interface // code, we need to be able to record and batch these calls. THe long // term strategy is to get rid of any GL calls in favor of the HIFI GPU API @@ -194,10 +201,13 @@ public: COMMAND_drawIndexed, COMMAND_drawInstanced, COMMAND_drawIndexedInstanced, + COMMAND_multiDrawIndirect, + COMMAND_multiDrawIndexedIndirect, COMMAND_setInputFormat, COMMAND_setInputBuffer, COMMAND_setIndexBuffer, + COMMAND_setIndirectBuffer, COMMAND_setModelTransform, COMMAND_setViewTransform, @@ -221,6 +231,8 @@ public: COMMAND_resetStages, + COMMAND_runLambda, + // TODO: As long as we have gl calls explicitely issued from interface // code, we need to be able to record and batch these calls. THe long // term strategy is to get rid of any GL calls in favor of the HIFI GPU API @@ -302,6 +314,7 @@ public: typedef Cache::Vector PipelineCaches; typedef Cache::Vector FramebufferCaches; typedef Cache::Vector QueryCaches; + typedef Cache>::Vector LambdaCache; // Cache Data in a byte array if too big to fit in Param // FOr example Mat4s are going there @@ -327,6 +340,7 @@ public: PipelineCaches _pipelines; FramebufferCaches _framebuffers; QueryCaches _queries; + LambdaCache _lambdas; NamedBatchDataMap _namedData; @@ -336,6 +350,20 @@ public: protected: }; -}; +template +void popVectorParam(Batch::Params& params, uint32& paramOffset, V& v) { + for (size_t i = 0; i < v.length(); ++i) { + v[i] = params[paramOffset++]._float; + } +} + +template +void pushVectorParam(Batch::Params& params, const V& v) { + for (size_t i = 0; i < v.length(); ++i) { + params.push_back(v[i]); + } +} + +} #endif diff --git a/libraries/gpu/src/gpu/GLBackend.cpp b/libraries/gpu/src/gpu/GLBackend.cpp index 62508f273c..79b37ddc0e 100644 --- a/libraries/gpu/src/gpu/GLBackend.cpp +++ b/libraries/gpu/src/gpu/GLBackend.cpp @@ -23,10 +23,13 @@ GLBackend::CommandCall GLBackend::_commandCalls[Batch::NUM_COMMANDS] = (&::gpu::GLBackend::do_drawIndexed), (&::gpu::GLBackend::do_drawInstanced), (&::gpu::GLBackend::do_drawIndexedInstanced), - + (&::gpu::GLBackend::do_multiDrawIndirect), + (&::gpu::GLBackend::do_multiDrawIndexedIndirect), + (&::gpu::GLBackend::do_setInputFormat), (&::gpu::GLBackend::do_setInputBuffer), (&::gpu::GLBackend::do_setIndexBuffer), + (&::gpu::GLBackend::do_setIndirectBuffer), (&::gpu::GLBackend::do_setModelTransform), (&::gpu::GLBackend::do_setViewTransform), @@ -50,6 +53,8 @@ GLBackend::CommandCall GLBackend::_commandCalls[Batch::NUM_COMMANDS] = (&::gpu::GLBackend::do_resetStages), + (&::gpu::GLBackend::do_runLambda), + (&::gpu::GLBackend::do_glActiveBindTexture), (&::gpu::GLBackend::do_glUniform1i), @@ -323,6 +328,9 @@ void GLBackend::do_drawInstanced(Batch& batch, uint32 paramOffset) { (void) CHECK_GL_ERROR(); } +// DO NOT MERGE THIS, it will break mac clients +#define GL_430 + void GLBackend::do_drawIndexedInstanced(Batch& batch, uint32 paramOffset) { updateInput(); updateTransform(); @@ -332,17 +340,63 @@ void GLBackend::do_drawIndexedInstanced(Batch& batch, uint32 paramOffset) { GLenum mode = _primitiveToGLmode[(Primitive)batch._params[paramOffset + 3]._uint]; uint32 numIndices = batch._params[paramOffset + 2]._uint; uint32 startIndex = batch._params[paramOffset + 1]._uint; + // FIXME glDrawElementsInstancedBaseVertexBaseInstance is only available in GL 4.3 + // and higher, so currently we ignore this field uint32 startInstance = batch._params[paramOffset + 0]._uint; GLenum glType = _elementTypeToGLType[_input._indexBufferType]; +#ifdef GL_430 + glDrawElementsInstancedBaseVertexBaseInstance(mode, numIndices, glType, reinterpret_cast(startIndex + _input._indexBufferOffset), numInstances, 0, startInstance); +#else glDrawElementsInstanced(mode, numIndices, glType, reinterpret_cast(startIndex + _input._indexBufferOffset), numInstances); +#endif (void)CHECK_GL_ERROR(); } + +void GLBackend::do_multiDrawIndirect(Batch& batch, uint32 paramOffset) { +#ifdef GL_430 + updateInput(); + updateTransform(); + updatePipeline(); + + uint commandCount = batch._params[paramOffset + 0]._uint; + GLenum mode = _primitiveToGLmode[(Primitive)batch._params[paramOffset + 1]._uint]; + + glMultiDrawArraysIndirect(mode, reinterpret_cast(_input._indirectBufferOffset), commandCount, _input._indirectBufferStride); +#else + // FIXME implement the slow path +#endif + (void)CHECK_GL_ERROR(); +} + +void GLBackend::do_multiDrawIndexedIndirect(Batch& batch, uint32 paramOffset) { +#ifdef GL_430 + updateInput(); + updateTransform(); + updatePipeline(); + + uint commandCount = batch._params[paramOffset + 0]._uint; + GLenum mode = _primitiveToGLmode[(Primitive)batch._params[paramOffset + 1]._uint]; + GLenum indexType = _elementTypeToGLType[_input._indexBufferType]; + + glMultiDrawElementsIndirect(mode, indexType, reinterpret_cast(_input._indirectBufferOffset), commandCount, _input._indirectBufferStride); +#else + // FIXME implement the slow path +#endif + (void)CHECK_GL_ERROR(); +} + + void GLBackend::do_resetStages(Batch& batch, uint32 paramOffset) { resetStages(); } +void GLBackend::do_runLambda(Batch& batch, uint32 paramOffset) { + std::function f = batch._lambdas.get(batch._params[paramOffset]._uint); + f(); +} + void GLBackend::resetStages() { resetInputStage(); resetPipelineStage(); diff --git a/libraries/gpu/src/gpu/GLBackend.h b/libraries/gpu/src/gpu/GLBackend.h index dabc69dedb..f12cda827a 100644 --- a/libraries/gpu/src/gpu/GLBackend.h +++ b/libraries/gpu/src/gpu/GLBackend.h @@ -252,11 +252,14 @@ protected: void do_drawIndexed(Batch& batch, uint32 paramOffset); void do_drawInstanced(Batch& batch, uint32 paramOffset); void do_drawIndexedInstanced(Batch& batch, uint32 paramOffset); - + void do_multiDrawIndirect(Batch& batch, uint32 paramOffset); + void do_multiDrawIndexedIndirect(Batch& batch, uint32 paramOffset); + // Input Stage void do_setInputFormat(Batch& batch, uint32 paramOffset); void do_setInputBuffer(Batch& batch, uint32 paramOffset); void do_setIndexBuffer(Batch& batch, uint32 paramOffset); + void do_setIndirectBuffer(Batch& batch, uint32 paramOffset); void initInput(); void killInput(); @@ -284,6 +287,10 @@ protected: Offset _indexBufferOffset; Type _indexBufferType; + BufferPointer _indirectBuffer; + Offset _indirectBufferOffset{ 0 }; + Offset _indirectBufferStride{ 0 }; + GLuint _defaultVAO; InputStageState() : @@ -448,6 +455,9 @@ protected: // Reset stages void do_resetStages(Batch& batch, uint32 paramOffset); + + void do_runLambda(Batch& batch, uint32 paramOffset); + void resetStages(); // TODO: As long as we have gl calls explicitely issued from interface @@ -471,7 +481,6 @@ protected: static CommandCall _commandCalls[Batch::NUM_COMMANDS]; }; - }; #endif diff --git a/libraries/gpu/src/gpu/GLBackendInput.cpp b/libraries/gpu/src/gpu/GLBackendInput.cpp index 7f021fd5c5..2b14e4d7f0 100755 --- a/libraries/gpu/src/gpu/GLBackendInput.cpp +++ b/libraries/gpu/src/gpu/GLBackendInput.cpp @@ -273,21 +273,36 @@ void GLBackend::resetInputStage() { } void GLBackend::do_setIndexBuffer(Batch& batch, uint32 paramOffset) { - _input._indexBufferType = (Type) batch._params[paramOffset + 2]._uint; - BufferPointer indexBuffer = batch._buffers.get(batch._params[paramOffset + 1]._uint); + _input._indexBufferType = (Type)batch._params[paramOffset + 2]._uint; _input._indexBufferOffset = batch._params[paramOffset + 0]._uint; - _input._indexBuffer = indexBuffer; - if (indexBuffer) { - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, getBufferID(*indexBuffer)); - } else { - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); + + BufferPointer indexBuffer = batch._buffers.get(batch._params[paramOffset + 1]._uint); + if (indexBuffer != _input._indexBuffer) { + _input._indexBuffer = indexBuffer; + if (indexBuffer) { + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, getBufferID(*indexBuffer)); + } else { + // FIXME do we really need this? Is there ever a draw call where we care that the element buffer is null? + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); + } } (void) CHECK_GL_ERROR(); } -template -void popParam(Batch::Params& params, uint32& paramOffset, V& v) { - for (size_t i = 0; i < v.length(); ++i) { - v[i] = params[paramOffset++]._float; +void GLBackend::do_setIndirectBuffer(Batch& batch, uint32 paramOffset) { + _input._indirectBufferOffset = batch._params[paramOffset + 1]._uint; + _input._indirectBufferStride = batch._params[paramOffset + 2]._uint; + + BufferPointer buffer = batch._buffers.get(batch._params[paramOffset]._uint); + if (buffer != _input._indirectBuffer) { + _input._indirectBuffer = buffer; + if (buffer) { + glBindBuffer(GL_DRAW_INDIRECT_BUFFER, getBufferID(*buffer)); + } else { + // FIXME do we really need this? Is there ever a draw call where we care that the element buffer is null? + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); + } } + + (void)CHECK_GL_ERROR(); } diff --git a/libraries/render-utils/src/GeometryCache.cpp b/libraries/render-utils/src/GeometryCache.cpp index 093434f079..5e04fe867f 100644 --- a/libraries/render-utils/src/GeometryCache.cpp +++ b/libraries/render-utils/src/GeometryCache.cpp @@ -87,37 +87,34 @@ void GeometryCache::ShapeData::setupIndices(gpu::BufferPointer& indexBuffer, con void GeometryCache::ShapeData::setupBatch(gpu::Batch& batch) const { batch.setInputBuffer(gpu::Stream::POSITION, _positionView); batch.setInputBuffer(gpu::Stream::NORMAL, _normalView); + batch.setIndexBuffer(gpu::UINT16, _indices, 0); } void GeometryCache::ShapeData::draw(gpu::Batch& batch) const { if (_indexCount) { setupBatch(batch); - batch.setIndexBuffer(gpu::UINT16, _indices, _indexOffset); - batch.drawIndexed(gpu::TRIANGLES, _indexCount); + batch.drawIndexed(gpu::TRIANGLES, _indexCount, _indexOffset); } } void GeometryCache::ShapeData::drawWire(gpu::Batch& batch) const { if (_wireIndexCount) { setupBatch(batch); - batch.setIndexBuffer(gpu::UINT16, _indices, _wireIndexOffset); - batch.drawIndexed(gpu::LINES, _wireIndexCount); + batch.drawIndexed(gpu::LINES, _wireIndexCount, _wireIndexOffset); } } void GeometryCache::ShapeData::drawInstances(gpu::Batch& batch, size_t count) const { if (_indexCount) { setupBatch(batch); - batch.setIndexBuffer(gpu::UINT16, _indices, _indexOffset); - batch.drawIndexedInstanced(count, gpu::TRIANGLES, _indexCount); + batch.drawIndexedInstanced(count, gpu::TRIANGLES, _indexCount, _indexOffset); } } void GeometryCache::ShapeData::drawWireInstances(gpu::Batch& batch, size_t count) const { if (_wireIndexCount) { setupBatch(batch); - batch.setIndexBuffer(gpu::UINT16, _indices, _wireIndexOffset); - batch.drawIndexedInstanced(count, gpu::LINES, _wireIndexCount); + batch.drawIndexedInstanced(count, gpu::LINES, _wireIndexCount, _wireIndexOffset); } } @@ -323,7 +320,7 @@ void GeometryCache::buildShapes() { 20, 21, 21, 22, 22, 23, 23, 20, // back 0, 23, 1, 22, 2, 21, 3, 20 // sides }; - for (int i = 0; i < wireIndices.size(); ++i) { + for (size_t i = 0; i < wireIndices.size(); ++i) { indices[i] += startingIndex; } @@ -374,7 +371,7 @@ void GeometryCache::buildShapes() { 0, 3, 1, 3, 2, 3, }; - for (int i = 0; i < wireIndices.size(); ++i) { + for (size_t i = 0; i < wireIndices.size(); ++i) { wireIndices[i] += startingIndex; } diff --git a/libraries/render-utils/src/GeometryCache.h b/libraries/render-utils/src/GeometryCache.h index 2e0d0a5493..aa1593db78 100644 --- a/libraries/render-utils/src/GeometryCache.h +++ b/libraries/render-utils/src/GeometryCache.h @@ -232,14 +232,6 @@ public: /// Set a batch to the simple pipeline, returning the previous pipeline void useSimpleDrawPipeline(gpu::Batch& batch, bool noBlend = false); -private: - GeometryCache(); - virtual ~GeometryCache(); - void buildShapes(); - - typedef QPair IntPair; - typedef QPair VerticesIndices; - struct ShapeData { size_t _indexOffset{ 0 }; size_t _indexCount{ 0 }; @@ -263,7 +255,13 @@ private: VShape _shapes; +private: + GeometryCache(); + virtual ~GeometryCache(); + void buildShapes(); + typedef QPair IntPair; + typedef QPair VerticesIndices; gpu::PipelinePointer _standardDrawPipeline; gpu::PipelinePointer _standardDrawPipelineNoBlend; diff --git a/tests/gpu-test/src/main.cpp b/tests/gpu-test/src/main.cpp index ad9ed9bb4a..0acbbcd725 100644 --- a/tests/gpu-test/src/main.cpp +++ b/tests/gpu-test/src/main.cpp @@ -35,6 +35,7 @@ // Must come after GL headers #include +#include #include #include @@ -101,7 +102,24 @@ float getSeconds(quint64 start = 0) { return seconds; } +struct DrawElementsIndirectCommand { + uint _count{ 0 }; + uint _instanceCount{ 0 }; + uint _firstIndex{ 0 }; + uint _baseVertex{ 0 }; + uint _baseInstance{ 0 }; +}; +static const size_t TYPE_COUNT = 4; +static GeometryCache::Shape SHAPE[TYPE_COUNT] = { + GeometryCache::Icosahedron, + GeometryCache::Cube, + GeometryCache::Sphere, + GeometryCache::Tetrahedron, + //GeometryCache::Line, +}; + +gpu::Stream::FormatPointer& getInstancedSolidStreamFormat(); // Creates an OpenGL window that renders a simple unlit scene using the gpu library and GeometryCache // Should eventually get refactored into something that supports multiple gpu backends. @@ -134,7 +152,7 @@ public: // Qt Quick may need a depth and stencil buffer. Always make sure these are available. format.setDepthBufferSize(16); format.setStencilBufferSize(8); - format.setVersion(4, 1); + format.setVersion(4, 3); format.setProfile(QSurfaceFormat::OpenGLContextProfile::CoreProfile); format.setOption(QSurfaceFormat::DebugContext); format.setSwapInterval(0); @@ -147,6 +165,13 @@ public: show(); makeCurrent(); + QOpenGLDebugLogger *logger = new QOpenGLDebugLogger(this); + logger->initialize(); // initializes in the current context, i.e. ctx + connect(logger, &QOpenGLDebugLogger::messageLogged, [](const QOpenGLDebugMessage& message){ + qDebug() << message; + }); + logger->startLogging(QOpenGLDebugLogger::SynchronousLogging); + gpu::Context::init(); _context = std::make_shared(); @@ -177,7 +202,9 @@ public: void draw() { static auto startTime = usecTimestampNow(); - if (!isVisible()) { + // Attempting to draw before we're visible and have a valid size will + // produce GL errors. + if (!isVisible() || _size.width() <= 0 || _size.height() <= 0) { return; } makeCurrent(); @@ -192,7 +219,8 @@ public: glm::vec3 unitscale { 1.0f }; glm::vec3 up { 0.0f, 1.0f, 0.0f }; - glm::vec3 camera_position { 1.5f * sinf(t), 0.0f, 1.5f * cos(t) }; + float distance = 3.0f; + glm::vec3 camera_position{ distance * sinf(t), 0.0f, distance * cos(t) }; static const vec3 camera_focus(0); static const vec3 camera_up(0, 1, 0); @@ -202,57 +230,141 @@ public: batch.setModelTransform(Transform()); auto geometryCache = DependencyManager::get(); - + // Render grid on xz plane (not the optimal way to do things, but w/e) // Note: GeometryCache::renderGrid will *not* work, as it is apparenly unaffected by batch rotations and renders xy only - static const std::string GRID_INSTANCE = "Grid"; - static auto compactColor1 = toCompactColor(vec4{ 0.35f, 0.25f, 0.15f, 1.0f }); - static auto compactColor2 = toCompactColor(vec4{ 0.15f, 0.25f, 0.35f, 1.0f }); - auto transformBuffer = batch.getNamedBuffer(GRID_INSTANCE, 0); - auto colorBuffer = batch.getNamedBuffer(GRID_INSTANCE, 1); - for (int i = 0; i < 100; ++i) { - { - glm::mat4 transform = glm::translate(mat4(), vec3(0, -1, -50 + i)); - transform = glm::scale(transform, vec3(100, 1, 1)); - transformBuffer->append(transform); - colorBuffer->append(compactColor1); - } + { + static const std::string GRID_INSTANCE = "Grid"; + static auto compactColor1 = toCompactColor(vec4{ 0.35f, 0.25f, 0.15f, 1.0f }); + static auto compactColor2 = toCompactColor(vec4{ 0.15f, 0.25f, 0.35f, 1.0f }); + static gpu::BufferPointer transformBuffer; + static gpu::BufferPointer colorBuffer; + if (!transformBuffer) { + transformBuffer = std::make_shared(); + colorBuffer = std::make_shared(); + for (int i = 0; i < 100; ++i) { + { + glm::mat4 transform = glm::translate(mat4(), vec3(0, -1, -50 + i)); + transform = glm::scale(transform, vec3(100, 1, 1)); + transformBuffer->append(transform); + colorBuffer->append(compactColor1); + } - { - glm::mat4 transform = glm::mat4_cast(quat(vec3(0, PI / 2.0f, 0))); - transform = glm::translate(transform, vec3(0, -1, -50 + i)); - transform = glm::scale(transform, vec3(100, 1, 1)); - transformBuffer->append(transform); - colorBuffer->append(compactColor2); + { + glm::mat4 transform = glm::mat4_cast(quat(vec3(0, PI / 2.0f, 0))); + transform = glm::translate(transform, vec3(0, -1, -50 + i)); + transform = glm::scale(transform, vec3(100, 1, 1)); + transformBuffer->append(transform); + colorBuffer->append(compactColor2); + } + } } + + batch.setupNamedCalls(GRID_INSTANCE, 200, [=](gpu::Batch& batch, gpu::Batch::NamedBatchData& data) { + batch.setViewTransform(camera); + batch.setModelTransform(Transform()); + batch.setPipeline(_pipeline); + batch._glUniform1i(_instanceLocation, 1); + geometryCache->renderWireShapeInstances(batch, GeometryCache::Line, data._count, transformBuffer, colorBuffer); + batch._glUniform1i(_instanceLocation, 0); + }); } - batch.setupNamedCalls(GRID_INSTANCE, 200, [=](gpu::Batch& batch, gpu::Batch::NamedBatchData& data) { + { + static const size_t ITEM_COUNT = 1000; + static const float SHAPE_INTERVAL = (PI * 2.0f) / ITEM_COUNT; + static const float ITEM_INTERVAL = SHAPE_INTERVAL / TYPE_COUNT; + + static const gpu::Element POSITION_ELEMENT{ gpu::VEC3, gpu::FLOAT, gpu::XYZ }; + static const gpu::Element NORMAL_ELEMENT{ gpu::VEC3, gpu::FLOAT, gpu::XYZ }; + static const gpu::Element COLOR_ELEMENT{ gpu::VEC4, gpu::NUINT8, gpu::RGBA }; + static const gpu::Element TRANSFORM_ELEMENT{ gpu::MAT4, gpu::FLOAT, gpu::XYZW }; + + + static std::vector transforms; + static std::vector colors; + static gpu::BufferPointer indirectBuffer; + static gpu::BufferPointer transformBuffer; + static gpu::BufferPointer colorBuffer; + static gpu::BufferView colorView; + static gpu::BufferView instanceXfmView; + + if (!transformBuffer) { + transformBuffer = std::make_shared(); + colorBuffer = std::make_shared(); + indirectBuffer = std::make_shared(); + + static const float ITEM_RADIUS = 20; + static const vec3 ITEM_TRANSLATION{ 0, 0, -ITEM_RADIUS }; + for (size_t i = 0; i < TYPE_COUNT; ++i) { + GeometryCache::Shape shape = SHAPE[i]; + GeometryCache::ShapeData shapeData = geometryCache->_shapes[shape]; + { + DrawElementsIndirectCommand indirectCommand; + indirectCommand._count = shapeData._indexCount; + indirectCommand._instanceCount = ITEM_COUNT; + indirectCommand._baseInstance = i * ITEM_COUNT; + indirectCommand._firstIndex = shapeData._indexOffset / 2; + indirectCommand._baseVertex = 0; + indirectBuffer->append(indirectCommand); + } + + //indirectCommand._count + float startingInterval = ITEM_INTERVAL * i; + for (size_t j = 0; j < ITEM_COUNT; ++j) { + float theta = j * SHAPE_INTERVAL + startingInterval; + auto transform = glm::rotate(mat4(), theta, Vectors::UP); + transform = glm::rotate(transform, (randFloat() - 0.5f) * PI / 4.0f, Vectors::UNIT_X); + transform = glm::translate(transform, ITEM_TRANSLATION); + transform = glm::scale(transform, vec3(randFloat() / 2.0f + 0.5f)); + transformBuffer->append(transform); + transforms.push_back(transform); + auto color = vec4{ randomColorValue(64), randomColorValue(64), randomColorValue(64), 255 }; + color /= 255.0f; + colors.push_back(color); + colorBuffer->append(toCompactColor(color)); + } + } + colorView = gpu::BufferView(colorBuffer, COLOR_ELEMENT); + instanceXfmView = gpu::BufferView(transformBuffer, TRANSFORM_ELEMENT); + } + +#if 1 + GeometryCache::ShapeData shapeData = geometryCache->_shapes[GeometryCache::Icosahedron]; + { + batch.setViewTransform(camera); + batch.setModelTransform(Transform()); + batch.setPipeline(_pipeline); + batch._glUniform1i(_instanceLocation, 1); + batch.setInputFormat(getInstancedSolidStreamFormat()); + batch.setInputBuffer(gpu::Stream::COLOR, colorView); + batch.setInputBuffer(gpu::Stream::INSTANCE_XFM, instanceXfmView); + batch.setIndirectBuffer(indirectBuffer); + shapeData.setupBatch(batch); + batch.multiDrawIndexedIndirect(TYPE_COUNT, gpu::TRIANGLES); + batch._glUniform1i(_instanceLocation, 0); + } +#else batch.setViewTransform(camera); - batch.setModelTransform(Transform()); batch.setPipeline(_pipeline); - auto& xfm = data._buffers[0]; - auto& color = data._buffers[1]; - batch._glUniform1i(_instanceLocation, 1); - geometryCache->renderWireShapeInstances(batch, GeometryCache::Line, data._count, xfm, color); - batch._glUniform1i(_instanceLocation, 0); - }); - - + for (size_t i = 0; i < TYPE_COUNT; ++i) { + GeometryCache::Shape shape = SHAPE[i]; + for (size_t j = 0; j < ITEM_COUNT; ++j) { + int index = i * ITEM_COUNT + j; + batch.setModelTransform(transforms[index]); + const vec4& color = colors[index]; + batch._glColor4f(color.r, color.g, color.b, 1.0); + geometryCache->renderShape(batch, shape); + } + } +#endif + } // Render unlit cube + sphere - - static GeometryCache::Shape SHAPE[] = { - GeometryCache::Cube, - GeometryCache::Sphere, - GeometryCache::Tetrahedron, - GeometryCache::Icosahedron, - }; - static auto startUsecs = usecTimestampNow(); float seconds = getSeconds(startUsecs); seconds /= 4.0; - int shapeIndex = ((int)seconds) % 4; + int shapeIndex = ((int)seconds) % TYPE_COUNT; bool wire = seconds - floor(seconds) > 0.5f; batch.setModelTransform(Transform()); batch._glColor4f(0.8f, 0.25f, 0.25f, 1.0f); @@ -263,7 +375,7 @@ public: geometryCache->renderShape(batch, SHAPE[shapeIndex]); } - batch.setModelTransform(Transform().setScale(1.05f)); + batch.setModelTransform(Transform().setScale(2.05f)); batch._glColor4f(1, 1, 1, 1); geometryCache->renderWireCube(batch);