From 1e6d3b7be4128163f9bc18f191c74a79ab8ed467 Mon Sep 17 00:00:00 2001 From: Karol Suprynowicz Date: Fri, 15 Nov 2024 18:20:58 +0100 Subject: [PATCH] Vulkan descriptor sets work --- .../display-plugins/VulkanDisplayPlugin.cpp | 2 +- libraries/gpu-vk/src/gpu/vk/VKBackend.cpp | 380 ++++++++++++++---- libraries/gpu-vk/src/gpu/vk/VKBackend.h | 98 ++++- libraries/gpu-vk/src/gpu/vk/VKFramebuffer.cpp | 4 +- libraries/gpu-vk/src/gpu/vk/VKTexture.cpp | 2 +- libraries/gpu/src/gpu/FrameReader.cpp | 3 +- libraries/shaders/headers/450/header.glsl | 2 +- libraries/vk/src/vk/Context.cpp | 22 +- libraries/vk/src/vk/Context.h | 19 +- libraries/vk/src/vk/VKWindow.cpp | 2 +- libraries/vk/src/vk/VulkanBuffer.cpp | 29 ++ libraries/vk/src/vk/VulkanBuffer.h | 2 + libraries/vk/src/vk/VulkanDevice.cpp | 32 +- libraries/vk/src/vk/VulkanDevice.h | 8 +- libraries/vk/src/vk/VulkanTexture.cpp | 4 +- tools/gpu-frame-player/src/RenderThread.cpp | 6 +- 16 files changed, 488 insertions(+), 127 deletions(-) diff --git a/libraries/display-plugins/src/display-plugins/VulkanDisplayPlugin.cpp b/libraries/display-plugins/src/display-plugins/VulkanDisplayPlugin.cpp index 9c0cc6540b..4c07dea923 100644 --- a/libraries/display-plugins/src/display-plugins/VulkanDisplayPlugin.cpp +++ b/libraries/display-plugins/src/display-plugins/VulkanDisplayPlugin.cpp @@ -755,7 +755,7 @@ void VulkanDisplayPlugin::present(const std::shared_ptr& PROFILE_RANGE_EX(render, "internalPresent", 0xff00ffff, frameId) internalPresent(); } - _vkWindow->_swapchain.queuePresent(_vkWindow->_context.queue, currentImageIndex, _vkWindow->_renderCompleteSemaphore); + _vkWindow->_swapchain.queuePresent(_vkWindow->_context.graphicsQueue, currentImageIndex, _vkWindow->_renderCompleteSemaphore); gpu::Backend::freeGPUMemSize.set(gpu::gl::getFreeDedicatedMemory()); } else if (alwaysPresent()) { diff --git a/libraries/gpu-vk/src/gpu/vk/VKBackend.cpp b/libraries/gpu-vk/src/gpu/vk/VKBackend.cpp index abdb71c5e6..00bb45a6a3 100644 --- a/libraries/gpu-vk/src/gpu/vk/VKBackend.cpp +++ b/libraries/gpu-vk/src/gpu/vk/VKBackend.cpp @@ -104,12 +104,15 @@ VKBackend::VKBackend() { qCDebug(gpu_vk_logging) << "VK Device Type: " << _context.device->properties.deviceType; initTransform(); + createDescriptorPool(); + initDefaultTexture(); } VKBackend::~VKBackend() { // FIXME queue up all the trash calls - VK_CHECK_RESULT(vkQueueWaitIdle(_graphicsQueue)); - VK_CHECK_RESULT(vkQueueWaitIdle(_transferQueue)); + // VKTODO: move to context + VK_CHECK_RESULT(vkQueueWaitIdle(_context.graphicsQueue)); + VK_CHECK_RESULT(vkQueueWaitIdle(_context.transferQueue) ); VK_CHECK_RESULT(vkDeviceWaitIdle(_context.device->logicalDevice)); { @@ -206,7 +209,14 @@ struct Cache { gpu::FormatReference format{ GPU_REFERENCE_INIT_VALUE }; gpu::FramebufferReference framebuffer{ GPU_REFERENCE_INIT_VALUE }; - std::unordered_map _layoutMap; + struct PipelineLayout { + VkPipelineLayout pipelineLayout; + VkDescriptorSetLayout uniformLayout; + VkDescriptorSetLayout textureLayout; + VkDescriptorSetLayout storageLayout; + }; + + std::unordered_map _layoutMap; std::unordered_map> _renderPassMap; template @@ -254,7 +264,8 @@ struct Cache { return result; } - VkPipelineLayout getPipelineLayout(const vks::Context& context) { + // Returns structure containing pipeline layout and descriptor set layouts + PipelineLayout getPipelineAndDescriptorLayout(const vks::Context& context) { auto itr = _layoutMap.find(pipeline); if (_layoutMap.end() == itr) { auto pipeline = gpu::acquire(this->pipeline); @@ -271,6 +282,7 @@ struct Cache { auto& texLayout = uniLayout; auto& stoLayout = uniLayout; #endif + PipelineLayout layout {}; for (const auto& entry : getBindingMap(vertexReflection.uniformBuffers, fragmentRefelection.uniformBuffers)) { VkDescriptorSetLayoutBinding binding = vks::initializers::descriptorSetLayoutBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,entry.second,entry.first,1); @@ -292,6 +304,7 @@ struct Cache { VkDescriptorSetLayout descriptorSetLayout; VK_CHECK_RESULT(vkCreateDescriptorSetLayout(context.device->logicalDevice, &descriptorSetLayoutCI, nullptr, &descriptorSetLayout)); layouts.push_back(descriptorSetLayout); + layout.uniformLayout = descriptorSetLayout; } #if SEP_DESC if (!texLayout.empty()) { @@ -299,18 +312,23 @@ struct Cache { VkDescriptorSetLayout descriptorSetLayout; VK_CHECK_RESULT(vkCreateDescriptorSetLayout(context.device->logicalDevice, &descriptorSetLayoutCI, nullptr, &descriptorSetLayout)); layouts.push_back(descriptorSetLayout); + layout.textureLayout = descriptorSetLayout; } if (!stoLayout.empty()) { VkDescriptorSetLayoutCreateInfo descriptorSetLayoutCI = vks::initializers::descriptorSetLayoutCreateInfo(stoLayout.data(), stoLayout.size()); VkDescriptorSetLayout descriptorSetLayout; VK_CHECK_RESULT(vkCreateDescriptorSetLayout(context.device->logicalDevice, &descriptorSetLayoutCI, nullptr, &descriptorSetLayout)); layouts.push_back(descriptorSetLayout); + layout.storageLayout = descriptorSetLayout; } #endif VkPipelineLayoutCreateInfo pipelineLayoutCI = vks::initializers::pipelineLayoutCreateInfo(layouts.data(), (uint32_t)layouts.size()); VkPipelineLayout pipelineLayout; VK_CHECK_RESULT(vkCreatePipelineLayout(context.device->logicalDevice, &pipelineLayoutCI, nullptr, &pipelineLayout)); - return _layoutMap[this->pipeline] = pipelineLayout; + + layout.pipelineLayout = pipelineLayout; + + return _layoutMap[this->pipeline] = layout; //return _layoutMap[this->pipeline] = nullptr; } return itr->second; @@ -323,7 +341,9 @@ struct Cache { } else { for (const auto& attachment : framebuffer->getRenderBuffers()) { if (attachment.isValid()) { - result.push_back(evalTexelFormatInternal(attachment._element)); + // VKTODO: why _element often has different format than texture's pixel format, and seemingly wrong one? + //result.push_back(evalTexelFormatInternal(attachment._element)); + result.push_back(evalTexelFormatInternal(attachment._texture->getTexelFormat())); } } if (framebuffer->hasDepthStencil()) { @@ -448,7 +468,7 @@ struct Cache { VkPipeline getPipeline(const vks::Context& context) { auto renderpass = pipelineState.getRenderPass(context); - auto pipelineLayout = pipelineState.getPipelineLayout(context); + auto pipelineLayout = pipelineState.getPipelineAndDescriptorLayout(context); const gpu::Pipeline& pipeline = *gpu::acquire(pipelineState.pipeline); const gpu::State& state = *pipeline.getState(); @@ -457,7 +477,7 @@ struct Cache { // FIXME const gpu::State::Data& stateData = state.getValues(); - vks::pipelines::GraphicsPipelineBuilder builder{ context.device->logicalDevice, pipelineLayout, renderpass }; + vks::pipelines::GraphicsPipelineBuilder builder{ context.device->logicalDevice, pipelineLayout.pipelineLayout, renderpass }; // Input assembly { @@ -579,6 +599,9 @@ Cache _cache; void VKBackend::executeFrame(const FramePointer& frame) { using namespace vks::debugutils; + // Create descriptor pool + // VKTODO: delete descriptor pool after it's not needed + //_frameData._descriptorPool { const auto& commandBuffer = _currentCommandBuffer; for (const auto& batchPtr : frame->batches) { @@ -783,7 +806,7 @@ void VKBackend::TransformStageState::preUpdate(size_t commandIndex, const Stereo _invalidView = _invalidProj = _invalidViewport = false; } -void VKBackend::TransformStageState::update(size_t commandIndex, const StereoState& stereo) const { +void VKBackend::TransformStageState::update(size_t commandIndex, const StereoState& stereo, VKBackend::UniformStageState &uniform) const { size_t offset = INVALID_OFFSET; while ((_camerasItr != _cameraOffsets.end()) && (commandIndex >= (*_camerasItr).first)) { offset = (*_camerasItr).second; @@ -793,7 +816,7 @@ void VKBackend::TransformStageState::update(size_t commandIndex, const StereoSta if (offset != INVALID_OFFSET) { #ifdef GPU_STEREO_CAMERA_BUFFER - bindCurrentCamera(0); + bindCurrentCamera(0, uniform); #else if (!stereo.isStereo()) { bindCurrentCamera(0); @@ -802,12 +825,16 @@ void VKBackend::TransformStageState::update(size_t commandIndex, const StereoSta } } -void VKBackend::TransformStageState::bindCurrentCamera(int eye) const { - // VKTODO - /*if (_currentCameraOffset != INVALID_OFFSET) { +void VKBackend::TransformStageState::bindCurrentCamera(int eye, VKBackend::UniformStageState &uniform) const { + if (_currentCameraOffset != INVALID_OFFSET) { static_assert(slot::buffer::Buffer::CameraTransform >= MAX_NUM_UNIFORM_BUFFERS, "TransformCamera may overlap pipeline uniform buffer slots. Invalidate uniform buffer slot cache for safety (call _uniform._buffers[TRANSFORM_CAMERA_SLOT].reset())."); - glBindBufferRange(GL_UNIFORM_BUFFER, slot::buffer::Buffer::CameraTransform, _cameraBuffer, _currentCameraOffset + eye * _cameraUboSize, sizeof(CameraBufferElement)); - }*/ + // VKTODO: add convenience function for this? + auto &buffer = uniform._buffers[slot::buffer::Buffer::CameraTransform]; + buffer.vksBuffer = _cameraBuffer.get(); + buffer.size = sizeof(CameraBufferElement); + buffer.offset = _currentCameraOffset + eye * _cameraUboSize; + //glBindBufferRange(GL_UNIFORM_BUFFER, slot::buffer::Buffer::CameraTransform, _cameraBuffer, _currentCameraOffset + eye * _cameraUboSize, sizeof(CameraBufferElement)); + } } void VKBackend::do_resetStages(const Batch& batch, size_t paramOffset) { @@ -931,6 +958,132 @@ void VKBackend::setCameraCorrection(const Mat4& correction, const Mat4& prevRend _pipeline._cameraCorrectionBuffer._buffer->flush();*/ } +void VKBackend::updateVkDescriptorWriteSetsUniform(VkDescriptorSet target) { + std::vector sets; + for (size_t i = 0; i < _uniform._buffers.size(); i++) { + if (_uniform._buffers[i].buffer || _uniform._buffers[i].vksBuffer) { + // These cannot be set at the same time + Q_ASSERT(!(_uniform._buffers[i].buffer && _uniform._buffers[i].vksBuffer)); + // VKTODO: move vulkan buffer creation to the transfer parts and aggregate several buffers together maybe? + VkDescriptorBufferInfo bufferInfo{}; + if (_uniform._buffers[i].buffer) { + Q_ASSERT(i != slot::buffer::Buffer::CameraTransform); // Camera buffer slot cannot be occupied by anything else + VKBuffer * buffer = syncGPUObject(*_uniform._buffers[i].buffer); + bufferInfo.buffer = buffer->buffer; + } else if (_uniform._buffers[i].vksBuffer) { + bufferInfo.buffer = _uniform._buffers[i].vksBuffer->buffer; + } + bufferInfo.offset = _uniform._buffers[i].offset; + bufferInfo.range = _uniform._buffers[i].size; + + VkWriteDescriptorSet descriptorWriteSet{}; + descriptorWriteSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + descriptorWriteSet.dstSet = target; + descriptorWriteSet.dstBinding = i; + descriptorWriteSet.dstArrayElement = 0; + descriptorWriteSet.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + descriptorWriteSet.descriptorCount = 1; + descriptorWriteSet.pBufferInfo = &bufferInfo; + sets.push_back(descriptorWriteSet); + } + } + vkUpdateDescriptorSets(_context.device->logicalDevice, sets.size(), sets.data(), 0, nullptr); +} + +void VKBackend::updateVkDescriptorWriteSetsTexture(VkDescriptorSet target) { + std::vector sets; + for (size_t i = 0; i < _resource._textures.size(); i++) { + if (_resource._textures[i].texture) { + // VKTODO: move vulkan texture creation to the transfer parts + // VKTODO: this doesn't work yet + //VKTexture * texture = syncGPUObject(*_resource._textures[i]._texture); + VkDescriptorImageInfo imageInfo{}; + imageInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + imageInfo.imageView = _defaultTexture.view; + imageInfo.sampler = _defaultTexture.sampler; + + VkWriteDescriptorSet descriptorWriteSet{}; + descriptorWriteSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + descriptorWriteSet.dstSet = target; + descriptorWriteSet.dstBinding = i; + descriptorWriteSet.dstArrayElement = 0; + descriptorWriteSet.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + descriptorWriteSet.descriptorCount = 1; + descriptorWriteSet.pImageInfo = &imageInfo; + sets.push_back(descriptorWriteSet); + } + } + vkUpdateDescriptorSets(_context.device->logicalDevice, sets.size(), sets.data(), 0, nullptr); +} + +void VKBackend::updateVkDescriptorWriteSetsStorage(VkDescriptorSet target) { + std::vector sets; + for (size_t i = 0; i < _resource._buffers.size(); i++) { + if (_resource._buffers[i].buffer || _resource._buffers[i].vksBuffer) { + Q_ASSERT(!(_resource._buffers[i].buffer && _resource._buffers[i].vksBuffer)); + // VKTODO: move vulkan buffer creation to the transfer parts and aggregate several buffers together maybe? + VkDescriptorBufferInfo bufferInfo{}; + if (_resource._buffers[i].buffer) { + VKBuffer* buffer = syncGPUObject(*_resource._buffers[i].buffer); + bufferInfo.buffer = buffer->buffer; + bufferInfo.range = _resource._buffers[i].buffer->getSize(); + } else if (_resource._buffers[i].vksBuffer) + { + bufferInfo.buffer = _resource._buffers[i].vksBuffer->buffer; + bufferInfo.range = _resource._buffers[i].vksBuffer->size; + } + bufferInfo.offset = 0; + + VkWriteDescriptorSet descriptorWriteSet{}; + descriptorWriteSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + descriptorWriteSet.dstSet = target; + descriptorWriteSet.dstBinding = i; + descriptorWriteSet.dstArrayElement = 0; + descriptorWriteSet.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + descriptorWriteSet.descriptorCount = 1; + descriptorWriteSet.pBufferInfo = &bufferInfo; + sets.push_back(descriptorWriteSet); + } + } + vkUpdateDescriptorSets(_context.device->logicalDevice, sets.size(), sets.data(), 0, nullptr); +} + + +void VKBackend::releaseUniformBuffer(uint32_t slot) { + auto& bufferState = _uniform._buffers[slot]; + if (valid(bufferState.buffer)) { + // VKTODO + //glBindBufferBase(GL_UNIFORM_BUFFER, slot, 0); // RELEASE + //(void)CHECK_GL_ERROR(); + } + bufferState.reset(); +} + +void VKBackend::releaseResourceTexture(uint32_t slot) { + auto& textureState = _resource._textures[slot]; + if (valid(textureState.texture)) { + // VKTODO + //glActiveTexture(GL_TEXTURE0 + slot); + //glBindTexture(textureState._target, 0); // RELEASE + //(void)CHECK_GL_ERROR(); + reset(textureState.texture); + } +} + +void VKBackend::releaseResourceBuffer(uint32_t slot) { + auto& bufferReference = _resource._buffers[slot].buffer; + auto buffer = acquire(bufferReference); + if (buffer) { + // VKTODO + //glActiveTexture(GL_TEXTURE0 + GLESBackend::RESOURCE_BUFFER_SLOT0_TEX_UNIT + slot); + //glBindTexture(GL_TEXTURE_BUFFER, 0); + reset(bufferReference); + } + if (_resource._buffers[slot].vksBuffer) { + reset(_resource._buffers[slot].vksBuffer); + } +} + void VKBackend::renderPassTransfer(const Batch& batch) { const size_t numCommands = batch.getCommands().size(); const Batch::Commands::value_type* command = batch.getCommands().data(); @@ -1016,6 +1169,9 @@ void VKBackend::renderPassDraw(const Batch& batch) { updateInput(); updateTransform(batch); updatePipeline(); + if (_cache.pipelineState.framebuffer->getRenderBuffers()[0]._texture->getTexelFormat().getSemantic() == gpu::R11G11B10) { + printf("Test"); + } auto renderPassBeginInfo = vks::initializers::renderPassBeginInfo(); renderPassBeginInfo.renderPass = _cache.pipelineState.getRenderPass(_context); Q_ASSERT(_cache.pipelineState.framebuffer); @@ -1034,7 +1190,7 @@ void VKBackend::renderPassDraw(const Batch& batch) { } } renderPassBeginInfo.pClearValues = clearValues.data(); - renderPassBeginInfo.renderArea = VkRect2D{VkOffset2D {_transform._viewport.x, _transform._viewport.y}, VkExtent2D {_transform._viewport.z, _transform._viewport.w}}; + renderPassBeginInfo.renderArea = VkRect2D{VkOffset2D {_transform._viewport.x, _transform._viewport.y}, VkExtent2D {(uint32_t)_transform._viewport.z, (uint32_t)_transform._viewport.w}}; // VKTODO: this is inefficient vkCmdBeginRenderPass(_currentCommandBuffer, &renderPassBeginInfo, VK_SUBPASS_CONTENTS_INLINE); // VKTODO: this is inefficient @@ -1055,6 +1211,51 @@ void VKBackend::renderPassDraw(const Batch& batch) { scissor.extent.width = _currentScissorRect.z; scissor.extent.height = _currentScissorRect.w; vkCmdSetScissor(_currentCommandBuffer, 0, 1, &scissor); + auto layout = _cache.pipelineState.getPipelineAndDescriptorLayout(_context); + // VKTODO: Descriptor sets and associated buffers should be set up during pre-pass + // VKTODO: move this to a function + if (layout.uniformLayout) { + // TODO: allocate 3 at once? + VkDescriptorSetAllocateInfo allocInfo{}; + allocInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + allocInfo.descriptorPool = _frameData._descriptorPool; + allocInfo.descriptorSetCount = 1; + allocInfo.pSetLayouts = &layout.uniformLayout; + VkDescriptorSet descriptorSet; + VK_CHECK_RESULT(vkAllocateDescriptorSets(_context.device->logicalDevice, &allocInfo, &descriptorSet)); + + updateVkDescriptorWriteSetsUniform(descriptorSet); + vkCmdBindDescriptorSets(_currentCommandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, layout.pipelineLayout, 0, 1, + &descriptorSet, 0, nullptr); + } + if (layout.textureLayout) { + // TODO: allocate 3 at once? + VkDescriptorSetAllocateInfo allocInfo{}; + allocInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + allocInfo.descriptorPool = _frameData._descriptorPool; + allocInfo.descriptorSetCount = 1; + allocInfo.pSetLayouts = &layout.textureLayout; + VkDescriptorSet descriptorSet; + VK_CHECK_RESULT(vkAllocateDescriptorSets(_context.device->logicalDevice, &allocInfo, &descriptorSet)); + + updateVkDescriptorWriteSetsTexture(descriptorSet); + vkCmdBindDescriptorSets(_currentCommandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, layout.pipelineLayout, 1, 1, + &descriptorSet, 0, nullptr); + } + if (layout.storageLayout) { + // TODO: allocate 3 at once? + VkDescriptorSetAllocateInfo allocInfo{}; + allocInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + allocInfo.descriptorPool = _frameData._descriptorPool; + allocInfo.descriptorSetCount = 1; + allocInfo.pSetLayouts = &layout.storageLayout; + VkDescriptorSet descriptorSet; + VK_CHECK_RESULT(vkAllocateDescriptorSets(_context.device->logicalDevice, &allocInfo, &descriptorSet)); + + updateVkDescriptorWriteSetsStorage(descriptorSet); + vkCmdBindDescriptorSets(_currentCommandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, layout.pipelineLayout, 2, 1, + &descriptorSet, 0, nullptr); + } CommandCall call = _commandCalls[(*command)]; (this->*(call))(batch, *offset); vkCmdEndRenderPass(_currentCommandBuffer); @@ -1127,13 +1328,12 @@ VKBuffer* VKBackend::syncGPUObject(const Buffer& buffer) { return vk::VKBuffer::sync(*this, buffer); } -VKTexture* VKBackend::syncGPUObject(const TexturePointer& texturePointer) { +VKTexture* VKBackend::syncGPUObject(const Texture& texture) { // VKTODO - if (!texturePointer) { + /*if (!texture) { return nullptr; - } + }*/ - const Texture& texture = *texturePointer; if (TextureUsageType::EXTERNAL == texture.getUsageType()) { // VKTODO: return nullptr; @@ -1157,7 +1357,7 @@ VKTexture* VKBackend::syncGPUObject(const TexturePointer& texturePointer) { #endif case TextureUsageType::STRICT_RESOURCE: // VKTODO - //qCDebug(gpugllogging) << "Strict texture " << texture.source().c_str(); + //qCDebug(gpu_vk_logging) << "Strict texture " << texture.source().c_str(); //object = new GL45StrictResourceTexture(shared_from_this(), texture); break; @@ -1324,6 +1524,39 @@ void VKBackend::updateInput() { } } +void VKBackend::createDescriptorPool() { + std::vector poolSizes = { + { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 100000 }, + { VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 50000 }, + { VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 50000 } + }; + + VkDescriptorPoolCreateInfo descriptorPoolCI = {}; + descriptorPoolCI.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + descriptorPoolCI.flags = 0; + descriptorPoolCI.maxSets = 1000; + descriptorPoolCI.poolSizeCount = (uint32_t)poolSizes.size(); + descriptorPoolCI.pPoolSizes = poolSizes.data(); + + VK_CHECK_RESULT(vkCreateDescriptorPool(_context.device->logicalDevice, &descriptorPoolCI, nullptr, &_frameData._descriptorPool)); +} + +void VKBackend::initDefaultTexture() { + int width = 256; + int height = 256; + std::vector buffer; + buffer.resize(width * height * 4); + for (int x = 0; x < width; x++) { + for (int y = 0; y < height; y++) { + buffer[x + y * width] = x; + buffer[x + y * width + 1] = y; + buffer[x + y * width + 2] = x + y; + buffer[x + y * width + 3] = x - y; + } + } + _defaultTexture.fromBuffer(buffer.data(), buffer.size(), VK_FORMAT_R8G8B8A8_SRGB, width, height, _context.device.get(), _context.transferQueue); +} + void VKBackend::initTransform() { #ifdef GPU_SSBO_TRANSFORM_OBJECT @@ -1338,11 +1571,11 @@ void VKBackend::initTransform() { void VKBackend::updateTransform(const gpu::Batch& batch) { // VKTODO - _transform.update(_commandIndex, _stereo); + _transform.update(_commandIndex, _stereo, _uniform); auto& drawCallInfoBuffer = batch.getDrawCallInfoBuffer(); if (batch._currentNamedCall.empty()) { - auto& drawCallInfo = drawCallInfoBuffer[_currentDraw]; + //auto& drawCallInfo = drawCallInfoBuffer[_currentDraw]; if (_transform._enabledDrawcallInfoBuffer) { //glDisableVertexAttribArray(gpu::Stream::DRAW_CALL_INFO); // Make sure attrib array is disabled _transform._enabledDrawcallInfoBuffer = false; @@ -1350,7 +1583,8 @@ void VKBackend::updateTransform(const gpu::Batch& batch) { // VKTODO // Since Vulkan has no glVertexAttrib equivalent we need to pass a buffer pointer here //glVertexAttribI2i(gpu::Stream::DRAW_CALL_INFO, drawCallInfo.index, drawCallInfo.unused); - VkDeviceSize vkOffset = _transform._drawCallInfoOffsets[batch._currentNamedCall]; + // Draw call info for unnamed calls starts at the beginning of the buffer, with offset dependent on _currentDraw + VkDeviceSize vkOffset = _currentDraw * sizeof(gpu::Batch::DrawCallInfo); vkCmdBindVertexBuffers(_currentCommandBuffer, gpu::Stream::DRAW_CALL_INFO, 1, &_transform._drawCallInfoBuffer->buffer, &vkOffset); } else { if (!_transform._enabledDrawcallInfoBuffer) { @@ -1424,7 +1658,7 @@ void VKBackend::transferTransformState(const Batch& batch) { } if (!batch._objects.empty()) { - _transform._objectBuffer = vks::Buffer::createUniform(batch._objects.size() * sizeof(Batch::TransformObject)); + _transform._objectBuffer = vks::Buffer::createStorage(batch._objects.size() * sizeof(Batch::TransformObject)); _frameData._buffers.push_back(_transform._objectBuffer); _transform._objectBuffer->map(); _transform._objectBuffer->copy(batch._objects.size() * sizeof(Batch::TransformObject), batch._objects.data()); @@ -1435,14 +1669,16 @@ void VKBackend::transferTransformState(const Batch& batch) { _transform._objectBuffer.reset(); } - if (!batch._namedData.empty() || batch._drawCallInfos.empty()) { + if (!batch._namedData.empty() || !batch._drawCallInfos.empty()) { bufferData.clear(); bufferData.reserve(batch._drawCallInfos.size() * sizeof(Batch::DrawCallInfo)); // VKTODO - auto bytesToCopy = data.second.drawCallInfos.size() * sizeof(Batch::DrawCallInfo); - bufferData.resize(currentSize + bytesToCopy); - memcpy(bufferData.data() + currentSize, data.second.drawCallInfos.data(), bytesToCopy); - _transform._drawCallInfoOffsets[data.first] = currentSize; + { + auto currentSize = bufferData.size(); + auto bytesToCopy = batch._drawCallInfos.size() * sizeof(Batch::DrawCallInfo); + bufferData.resize(currentSize + bytesToCopy); + memcpy(bufferData.data() + currentSize, batch._drawCallInfos.data(), bytesToCopy); + } for (auto& data : batch._namedData) { auto currentSize = bufferData.size(); auto bytesToCopy = data.second.drawCallInfos.size() * sizeof(Batch::DrawCallInfo); @@ -1452,7 +1688,7 @@ void VKBackend::transferTransformState(const Batch& batch) { } //_transform._drawCallInfoBuffer = std::make_shared(); //_frameData._buffers.push_back(_transform._drawCallInfoBuffer); - _transform._drawCallInfoBuffer = vks::Buffer::createUniform(bufferData.size()); + _transform._drawCallInfoBuffer = vks::Buffer::createVertex(bufferData.size()); _frameData._buffers.push_back(_transform._drawCallInfoBuffer); _transform._drawCallInfoBuffer->map(); _transform._drawCallInfoBuffer->copy(bufferData.size(), bufferData.data()); @@ -1464,6 +1700,9 @@ void VKBackend::transferTransformState(const Batch& batch) { } // VKTODO + if (_transform._objectBuffer) { + _resource._buffers[slot::storage::ObjectTransforms].vksBuffer = _transform._objectBuffer.get(); + } //glBindBufferBase(GL_SHADER_STORAGE_BUFFER, slot::storage::ObjectTransforms, _transform._objectBuffer); // Make sure the current Camera offset is unknown before render Draw @@ -2066,10 +2305,12 @@ void VKBackend::do_setStateBlendFactor(const Batch& batch, size_t paramOffset) { void VKBackend::do_setUniformBuffer(const Batch& batch, size_t paramOffset) { //VKTODO - /*VKuint slot = batch._params[paramOffset + 3]._uint; + uint32_t slot = batch._params[paramOffset + 3]._uint; BufferPointer uniformBuffer = batch._buffers.get(batch._params[paramOffset + 2]._uint); - VKintptr rangeStart = batch._params[paramOffset + 1]._uint; - VKsizeiptr rangeSize = batch._params[paramOffset + 0]._uint; + uint32_t rangeStart = batch._params[paramOffset + 1]._uint; + uint32_t rangeSize = batch._params[paramOffset + 0]._uint; + + // Create descriptor if (!uniformBuffer) { releaseUniformBuffer(slot); @@ -2077,29 +2318,28 @@ void VKBackend::do_setUniformBuffer(const Batch& batch, size_t paramOffset) { } // check cache before thinking - if (_uniform._buffers[slot] == uniformBuffer) { + if (_uniform._buffers[slot].buffer == uniformBuffer.get()) { return; } // Sync BufferObject auto* object = syncGPUObject(*uniformBuffer); if (object) { - glBindBufferRange(VK_UNIFORM_BUFFER, slot, object->_buffer, rangeStart, rangeSize); + //glBindBufferRange(VK_UNIFORM_BUFFER, slot, object->_buffer, rangeStart, rangeSize); - _uniform._buffers[slot] = uniformBuffer; - (void)CHECK_VK_ERROR(); + _uniform._buffers[slot].buffer = uniformBuffer.get(); } else { releaseResourceTexture(slot); return; - }*/ + } } void VKBackend::do_setResourceBuffer(const Batch& batch, size_t paramOffset) { //VKTODO: - /*GLuint slot = batch._params[paramOffset + 1]._uint; - if (slot >= (GLuint)MAX_NUM_RESOURCE_BUFFERS) { - qCDebug(gpugllogging) << "GLBackend::do_setResourceBuffer: Trying to set a resource Buffer at slot #" << slot - << " which doesn't exist. MaxNumResourceBuffers = " << getMaxNumResourceBuffers(); + uint32_t slot = batch._params[paramOffset + 1]._uint; + if (slot >= (uint32_t)MAX_NUM_RESOURCE_BUFFERS) { + qCDebug(gpu_vk_logging) << "GLBackend::do_setResourceBuffer: Trying to set a resource Buffer at slot #" << slot + << " which doesn't exist. MaxNumResourceBuffers = " << MAX_NUM_RESOURCE_BUFFERS; return; } @@ -2110,33 +2350,38 @@ void VKBackend::do_setResourceBuffer(const Batch& batch, size_t paramOffset) { return; } // check cache before thinking - if (compare(_resource._buffers[slot], resourceBuffer)) { + if (compare(_resource._buffers[slot].buffer, resourceBuffer)) { return; } // One more True Buffer bound _stats._RSNumResourceBufferBounded++; - // If successful bind then cache it - if (bindResourceBuffer(slot, resourceBuffer)) { - assign(_resource._buffers[slot], resourceBuffer); + // If successful then cache it + auto* object = syncGPUObject(*resourceBuffer); + if (object) { + assign(_resource._buffers[slot].buffer, resourceBuffer); } else { // else clear slot and cache releaseResourceBuffer(slot); return; - }*/ + } } void VKBackend::do_setResourceTexture(const Batch& batch, size_t paramOffset) { // VKTODO: - /*VKuint slot = batch._params[paramOffset + 1]._uint; + uint32_t slot = batch._params[paramOffset + 1]._uint; TexturePointer resourceTexture = batch._textures.get(batch._params[paramOffset + 0]._uint); + if (slot == 2) { + printf("break"); + } + if (!resourceTexture) { releaseResourceTexture(slot); return; } // check cache before thinking - if (_resource._textures[slot] == resourceTexture) { + if (_resource._textures[slot].texture == resourceTexture.get()) { return; } @@ -2144,31 +2389,30 @@ void VKBackend::do_setResourceTexture(const Batch& batch, size_t paramOffset) { _stats._RSNumTextureBounded++; // Always make sure the VKObject is in sync - VKTexture* object = syncGPUObject(resourceTexture); - if (object) { - VKuint to = object->_texture; - VKuint target = object->_target; - glActiveTexture(VK_TEXTURE0 + slot); - glBindTexture(target, to); + // VKTODO + //VKTexture* object = syncGPUObject(resourceTexture); + //if (object) { + //uint32_t to = object->_texture; + //uint32_t target = object->_target; + //glActiveTexture(VK_TEXTURE0 + slot); + //glBindTexture(target, to); - (void)CHECK_VK_ERROR(); + _resource._textures[slot].texture = resourceTexture.get(); - _resource._textures[slot] = resourceTexture; + //_stats._RSAmountTextureMemoryBounded += object->size(); - _stats._RSAmountTextureMemoryBounded += object->size(); - - } else { - releaseResourceTexture(slot); - return; - }*/ + //} else { + // releaseResourceTexture(slot); + // return; + //} } void VKBackend::do_setResourceFramebufferSwapChainTexture(const Batch& batch, size_t paramOffset) { - /*GLuint slot = batch._params[paramOffset + 1]._uint; - if (slot >= (GLuint)MAX_NUM_RESOURCE_TEXTURES) { - qCDebug(gpugllogging) + /*uint32_t slot = batch._params[paramOffset + 1]._uint; + if (slot >= MAX_NUM_RESOURCE_TEXTURES) { + qCDebug(gpu_vk_logging) << "GLBackend::do_setResourceFramebufferSwapChainTexture: Trying to set a resource Texture at slot #" << slot - << " which doesn't exist. MaxNumResourceTextures = " << getMaxNumResourceTextures(); + << " which doesn't exist. MaxNumResourceTextures = " << MAX_NUM_RESOURCE_TEXTURES; return; } diff --git a/libraries/gpu-vk/src/gpu/vk/VKBackend.h b/libraries/gpu-vk/src/gpu/vk/VKBackend.h index 9157d3727d..c2b3b8732f 100644 --- a/libraries/gpu-vk/src/gpu/vk/VKBackend.h +++ b/libraries/gpu-vk/src/gpu/vk/VKBackend.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #include "VKForward.h" @@ -48,6 +49,17 @@ namespace gpu { namespace vk { +static const int MAX_NUM_UNIFORM_BUFFERS = 14; // There's also camera buffer at slot 15 + +static const int32_t MIN_REQUIRED_TEXTURE_IMAGE_UNITS = 16; +static const int32_t MIN_REQUIRED_COMBINED_UNIFORM_BLOCKS = 70; +static const int32_t MIN_REQUIRED_COMBINED_TEXTURE_IMAGE_UNITS = 48; +static const int32_t MIN_REQUIRED_UNIFORM_BUFFER_BINDINGS = 36; +static const int32_t MIN_REQUIRED_UNIFORM_LOCATIONS = 1024; + +static const int MAX_NUM_RESOURCE_BUFFERS = 16; +static const int MAX_NUM_RESOURCE_TEXTURES = 16; + class VKInputFormat : public GPUObject { public: static VKInputFormat* sync(const Stream::Format& inputFormat); @@ -76,6 +88,8 @@ protected: mat4 prevViewInverse; }; + struct UniformStageState; + struct TransformStageState { #ifdef GPU_STEREO_CAMERA_BUFFER struct Cameras { @@ -128,8 +142,8 @@ protected: mutable size_t _currentCameraOffset{ INVALID_OFFSET }; void preUpdate(size_t commandIndex, const StereoState& stereo, Vec2u framebufferSize); - void update(size_t commandIndex, const StereoState& stereo) const; - void bindCurrentCamera(int stereoSide) const; + void update(size_t commandIndex, const StereoState& stereo, VKBackend::UniformStageState &uniform) const; + void bindCurrentCamera(int stereoSide, VKBackend::UniformStageState &uniform) const; } _transform; static const int MAX_NUM_ATTRIBUTES = Stream::NUM_INPUT_SLOTS; @@ -166,9 +180,77 @@ protected: uint32_t _defaultVAO { 0 }; } _input; + struct UniformStageState { + struct BufferState { + // Only one of buffer or vksBuffer may be not NULL + BufferReference buffer{}; + vks::Buffer *vksBuffer{}; + uint32_t offset{ 0 }; // VKTODO: is it correct type + uint32_t size{ 0 }; // VKTODO: is it correct type + + BufferState& operator=(const BufferState& other) = delete; + void reset() { + gpu::reset(buffer); + gpu::reset(vksBuffer); + offset = 0; + size = 0; + } + + /*bool compare(const BufferPointer& buffer, uint32_t offset, uint32_t size) { + const auto& self = *this; + return (self.offset == offset && self.size == size && gpu::compare(self.buffer, buffer)); + }*/ + }; + + // MAX_NUM_UNIFORM_BUFFERS-1 is the max uniform index BATCHES are allowed to set, but + // MIN_REQUIRED_UNIFORM_BUFFER_BINDINGS is used here because the backend sets some + // internal UBOs for things like camera correction + std::array _buffers; + } _uniform; + + void updateVkDescriptorWriteSetsUniform(VkDescriptorSet target); + void releaseUniformBuffer(uint32_t slot); + + // VKTODO + struct ResourceStageState { + struct TextureState { + TextureReference texture{}; + }; + struct BufferState { + BufferReference buffer{}; + vks::Buffer *vksBuffer{}; + }; + std::array _buffers{}; + std::array _textures{}; + //int findEmptyTextureSlot() const; + } _resource; + + void updateVkDescriptorWriteSetsTexture(VkDescriptorSet target); + void releaseResourceTexture(uint32_t slot); + + void updateVkDescriptorWriteSetsStorage(VkDescriptorSet target); + void releaseResourceBuffer(uint32_t slot); + + // VKTODO + struct OutputStageState { + FramebufferReference _framebuffer{}; + int _drawFBO{ 0 }; + } _output; + + // VKTODO + void resetQueryStage(); + struct QueryStageState { + uint32_t _rangeQueryDepth{ 0 }; + } _queryStage; + + // VKTODO: one instance per each frame // Contains objects that are created per frame and need to be deleted after the frame is rendered struct FrameData { + std::vector uniformDescriptorSets; + std::vector textureDescriptorSets; + std::vector storageDescriptorSets; + VkDescriptorPool _descriptorPool; std::vector> _buffers; std::vector _renderPasses; void reset() {}; // VKTODO @@ -184,7 +266,7 @@ protected: vk::VKFramebuffer* syncGPUObject(const Framebuffer& framebuffer); VKBuffer* syncGPUObject(const Buffer& buffer); - VKTexture* syncGPUObject(const TexturePointer& texturePointer); + VKTexture* syncGPUObject(const Texture& texture); VKQuery* syncGPUObject(const Query& query); public: @@ -241,7 +323,7 @@ public: // Resource Stage virtual void do_setResourceBuffer(const Batch& batch, size_t paramOffset) final; virtual void do_setResourceTexture(const Batch& batch, size_t paramOffset) final; - virtual void do_setResourceTextureTable(const Batch& batch, size_t paramOffset) {}; // VKTODO: not needed currently, to be implemented in the future + virtual void do_setResourceTextureTable(const Batch& batch, size_t paramOffset) {}; virtual void do_setResourceFramebufferSwapChainTexture(const Batch& batch, size_t paramOffset) final; // Pipeline Stage @@ -278,14 +360,18 @@ public: virtual void do_popProfileRange(const Batch& batch, size_t paramOffset) final; protected: + // Creates descriptor pool for current frame + void createDescriptorPool(); void initTransform(); + void initDefaultTexture(); // Logical device, application's view of the physical device (GPU) // VkPipeline cache object VkPipelineCache _pipelineCache; vks::Context& _context{ vks::Context::get() }; - VkQueue _graphicsQueue; //TODO: initialize from device - VkQueue _transferQueue; //TODO: initialize from device + //VkQueue _graphicsQueue; //TODO: initialize from device + //VkQueue _transferQueue; //TODO: initialize from device + vks::Texture2D _defaultTexture; friend class VKBuffer; friend class VKFramebuffer; VkCommandBuffer _currentCommandBuffer; diff --git a/libraries/gpu-vk/src/gpu/vk/VKFramebuffer.cpp b/libraries/gpu-vk/src/gpu/vk/VKFramebuffer.cpp index 2c5f26e0e9..3eb9addb03 100644 --- a/libraries/gpu-vk/src/gpu/vk/VKFramebuffer.cpp +++ b/libraries/gpu-vk/src/gpu/vk/VKFramebuffer.cpp @@ -48,7 +48,7 @@ void gpu::vk::VKFramebuffer::update() { surface = b._texture; if (surface) { Q_ASSERT(TextureUsageType::RENDERBUFFER == surface->getUsageType()); - vkTexture = backend->syncGPUObject(surface); + vkTexture = backend->syncGPUObject(*surface.get()); } else { vkTexture = nullptr; } @@ -99,7 +99,7 @@ void gpu::vk::VKFramebuffer::update() { auto backend = _backend.lock(); if (_gpuObject.hasDepthStencil() && surface) { Q_ASSERT(TextureUsageType::RENDERBUFFER == surface->getUsageType()); - vkTexture = backend->syncGPUObject(surface); + vkTexture = backend->syncGPUObject(*surface.get()); } if (vkTexture) { diff --git a/libraries/gpu-vk/src/gpu/vk/VKTexture.cpp b/libraries/gpu-vk/src/gpu/vk/VKTexture.cpp index aa4e17e1d3..5b6c7c7214 100644 --- a/libraries/gpu-vk/src/gpu/vk/VKTexture.cpp +++ b/libraries/gpu-vk/src/gpu/vk/VKTexture.cpp @@ -195,7 +195,7 @@ void VKAttachmentTexture::createTexture() { imageCI.arrayLayers = _gpuObject.isArray() ? _gpuObject.getNumSlices() : 1; imageCI.samples = VK_SAMPLE_COUNT_1_BIT; imageCI.tiling = VK_IMAGE_TILING_OPTIMAL; - if (_gpuObject.isColorRenderTarget()) { + if (_gpuObject.isColorRenderTarget() || _gpuObject.getTexelFormat().getSemantic() == gpu::R11G11B10) { imageCI.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; } else if (_gpuObject.isDepthStencilRenderTarget()) { imageCI.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; diff --git a/libraries/gpu/src/gpu/FrameReader.cpp b/libraries/gpu/src/gpu/FrameReader.cpp index 25c9499091..c24b925e53 100644 --- a/libraries/gpu/src/gpu/FrameReader.cpp +++ b/libraries/gpu/src/gpu/FrameReader.cpp @@ -631,7 +631,8 @@ TextureView Deserializer::readTextureView(const json& node) { uint32_t textureIndex = node; return textures[textureIndex]; }; - readOptionalTransformed(result._texture, node, keys::texture, texturePointerReader); + bool textureFound = readOptionalTransformed(result._texture, node, keys::texture, texturePointerReader); + Q_ASSERT(textureFound); readOptionalTransformed(result._element, node, keys::element, &readElement); readOptional(result._subresource, node, keys::subresource); return result; diff --git a/libraries/shaders/headers/450/header.glsl b/libraries/shaders/headers/450/header.glsl index b0f3625082..cf840dc3ac 100644 --- a/libraries/shaders/headers/450/header.glsl +++ b/libraries/shaders/headers/450/header.glsl @@ -7,7 +7,7 @@ #define gl_VertexID gl_VertexIndex #define UNIFORM_BUFFER(SLOT, NAME) layout(std140, set=0, binding=SLOT) uniform NAME #define TEXTURE(SLOT, TYPE, NAME) layout(set=1, binding=SLOT) uniform TYPE NAME - #define RESOURCE_BUFFER(SLOT, NAME) layout(set=2, binding=SLOT) buffer NAME + #define RESOURCE_BUFFER(SLOT, NAME) layout(set=2, binding=SLOT) readonly buffer NAME #else #define UNIFORM_BUFFER(SLOT, NAME) layout(std140, binding=SLOT) uniform NAME #define TEXTURE(SLOT, TYPE, NAME) layout(binding=SLOT) uniform TYPE NAME diff --git a/libraries/vk/src/vk/Context.cpp b/libraries/vk/src/vk/Context.cpp index 5a4e6070de..2d429656f9 100644 --- a/libraries/vk/src/vk/Context.cpp +++ b/libraries/vk/src/vk/Context.cpp @@ -189,7 +189,7 @@ void Context::createInstance() { } void Context::destroyContext() { - VK_CHECK_RESULT(vkQueueWaitIdle(queue)); + VK_CHECK_RESULT(vkQueueWaitIdle(graphicsQueue)); for (const auto& trash : dumpster) { trash(); } @@ -233,9 +233,8 @@ void Context::destroyContext() { }*/ void Context::trashCommandBuffers(const std::vector& cmdBuffers, VkCommandPool commandPool) const { - if (!commandPool) { - commandPool = getCommandPool(); - } + Q_ASSERT(commandPool); + using DtorLambda = std::function&)>; DtorLambda destructor = [=](const std::vector& cmdBuffers) { @@ -331,7 +330,8 @@ void Context::createDevice() { #endif // Get the graphics queue - vkGetDeviceQueue(device->logicalDevice, device->queueFamilyIndices.graphics, 0, &queue); + vkGetDeviceQueue(device->logicalDevice, device->queueFamilyIndices.graphics, 0, &graphicsQueue); + vkGetDeviceQueue(device->logicalDevice, device->queueFamilyIndices.transfer, 0, &transferQueue); //queue = device.getQueue(queueIndices.graphics, 0); } @@ -428,9 +428,9 @@ void Context::buildDevice() { return result; }*/ -VkCommandBuffer Context::createCommandBuffer(VkCommandBufferLevel level) const { +VkCommandBuffer Context::createCommandBuffer(VkCommandPool commandPool, VkCommandBufferLevel level) const { VkCommandBuffer cmdBuffer; - VkCommandBufferAllocateInfo cmdBufAllocateInfo = vks::initializers::commandBufferAllocateInfo(getCommandPool(), level, 1); + VkCommandBufferAllocateInfo cmdBufAllocateInfo = vks::initializers::commandBufferAllocateInfo(commandPool, level, 1); VK_CHECK_RESULT(vkAllocateCommandBuffers(device->logicalDevice, &cmdBufAllocateInfo, &cmdBuffer)); return cmdBuffer; } @@ -444,10 +444,6 @@ VkCommandBuffer Context::createCommandBuffer(VkCommandBufferLevel level) const { device.waitIdle(); }*/ -const VkCommandPool& Context::getCommandPool() const { - return device->commandPool; -} - Image Context::createImage(const VkImageCreateInfo& imageCreateInfo, const VkMemoryPropertyFlags& memoryPropertyFlags) const { Image result; result.device = device->logicalDevice; @@ -512,7 +508,7 @@ Image Context::stageToDeviceImage(VkImageCreateInfo imageCreateInfo, // Prepare for shader read setImageLayout(copyCmd, result.image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, range); - }); + }, device->transferCommandPool); staging.destroy(); return result; } @@ -580,7 +576,7 @@ Buffer Context::stageToDeviceBuffer(const VkBufferUsageFlags& usage, size_t size [&](VkCommandBuffer copyCmd) { VkBufferCopy bufferCopy{ 0, 0, size }; vkCmdCopyBuffer(copyCmd, staging.buffer, result.buffer, 1, &bufferCopy); - }); + }, device->transferCommandPool); staging.destroy(); return result; } diff --git a/libraries/vk/src/vk/Context.h b/libraries/vk/src/vk/Context.h index aa740ff51a..d5b09fc8ac 100644 --- a/libraries/vk/src/vk/Context.h +++ b/libraries/vk/src/vk/Context.h @@ -185,22 +185,24 @@ public: VkImageLayout newImageLayout) const; void setImageLayout(VkImage image, + VkCommandPool pool, VkImageLayout oldImageLayout, VkImageLayout newImageLayout, VkImageSubresourceRange subresourceRange) const { withPrimaryCommandBuffer([&](const auto& commandBuffer) { setImageLayout(commandBuffer, image, oldImageLayout, newImageLayout, subresourceRange); - }); + }, pool); } // Fixed sub resource on first mip level and layer void setImageLayout(VkImage image, + VkCommandPool pool, VkImageAspectFlags aspectMask, VkImageLayout oldImageLayout, VkImageLayout newImageLayout) const { withPrimaryCommandBuffer([&](const auto& commandBuffer) { setImageLayout(commandBuffer, image, aspectMask, oldImageLayout, newImageLayout); - }); + }, pool); } void createDevice(); @@ -223,23 +225,24 @@ public: std::shared_ptr device; - VkQueue queue; + VkQueue graphicsQueue; + VkQueue transferQueue; - const VkCommandPool& getCommandPool() const; + //const VkCommandPool& getCommandPool() const; /*std::vector allocateCommandBuffers( uint32_t count, VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY) const;*/ - VkCommandBuffer createCommandBuffer(VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY) const; + VkCommandBuffer createCommandBuffer(VkCommandPool commandPool, VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY) const; //void flushCommandBuffer(VkCommandBuffer& commandBuffer) const; // Create a short-lived command buffer which is immediately executed and released // This function is intended for initialization only. It incurs a queue and device // flush and may impact performance if used in non-setup code - void withPrimaryCommandBuffer(const std::function& f) const { - VkCommandBuffer commandBuffer = device->createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY); + void withPrimaryCommandBuffer(const std::function& f, VkCommandPool commandPool) const { + VkCommandBuffer commandBuffer = device->createCommandBuffer(commandPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY); VkCommandBufferBeginInfo vkCommandBufferBeginInfo { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, .pNext = nullptr, @@ -249,7 +252,7 @@ public: vkBeginCommandBuffer(commandBuffer, &vkCommandBufferBeginInfo); f(commandBuffer); vkEndCommandBuffer(commandBuffer); - device->flushCommandBuffer(commandBuffer, queue, true); + device->flushCommandBuffer(commandBuffer, graphicsQueue, commandPool, true); } Image createImage(const VkImageCreateInfo& imageCreateInfo, const VkMemoryPropertyFlags& memoryPropertyFlags) const; diff --git a/libraries/vk/src/vk/VKWindow.cpp b/libraries/vk/src/vk/VKWindow.cpp index aa1bdf99a9..f48912b4f4 100644 --- a/libraries/vk/src/vk/VKWindow.cpp +++ b/libraries/vk/src/vk/VKWindow.cpp @@ -74,7 +74,7 @@ void VKWindow::createCommandBuffers() { VK_CHECK_RESULT(vkCreateSemaphore(_device, &semaphoreCreateInfo, nullptr, &_renderCompleteSemaphore)); // Create one command buffer for each swap chain image _drawCommandBuffers.resize(_swapchain.imageCount); - VkCommandBufferAllocateInfo cmdBufAllocateInfo = vks::initializers::commandBufferAllocateInfo(_context.device->commandPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY, static_cast(_drawCommandBuffers.size())); + VkCommandBufferAllocateInfo cmdBufAllocateInfo = vks::initializers::commandBufferAllocateInfo(_context.device->graphicsCommandPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY, static_cast(_drawCommandBuffers.size())); VK_CHECK_RESULT(vkAllocateCommandBuffers(_device, &cmdBufAllocateInfo, _drawCommandBuffers.data())); } diff --git a/libraries/vk/src/vk/VulkanBuffer.cpp b/libraries/vk/src/vk/VulkanBuffer.cpp index 1b14d83286..2adaaf1d95 100644 --- a/libraries/vk/src/vk/VulkanBuffer.cpp +++ b/libraries/vk/src/vk/VulkanBuffer.cpp @@ -93,6 +93,35 @@ namespace vks return newBuffer; } + std::shared_ptr Buffer::createStorage(VkDeviceSize bufferSize) { + std::shared_ptr newBuffer = std::make_shared(); + newBuffer->size = bufferSize; + VkBufferCreateInfo bufferCI = { }; + bufferCI.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + bufferCI.size = newBuffer->size; + bufferCI.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + VmaAllocationCreateInfo allocationCI{}; + allocationCI.usage = VMA_MEMORY_USAGE_CPU_TO_GPU; + allocationCI.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; + vmaCreateBuffer(vks::Allocation::getAllocator(), &bufferCI, &allocationCI, &newBuffer->buffer, &newBuffer->allocation, nullptr); + return newBuffer; + } + + std::shared_ptr Buffer::createVertex(VkDeviceSize bufferSize) { + //VKTODO: This needs to be on GPU-only memory in the future + std::shared_ptr newBuffer = std::make_shared(); + newBuffer->size = bufferSize; + VkBufferCreateInfo bufferCI = { }; + bufferCI.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + bufferCI.size = newBuffer->size; + bufferCI.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; + VmaAllocationCreateInfo allocationCI{}; + allocationCI.usage = VMA_MEMORY_USAGE_CPU_TO_GPU; + allocationCI.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; + vmaCreateBuffer(vks::Allocation::getAllocator(), &bufferCI, &allocationCI, &newBuffer->buffer, &newBuffer->allocation, nullptr); + return newBuffer; + } + /** * Copies the specified data to the mapped buffer * diff --git a/libraries/vk/src/vk/VulkanBuffer.h b/libraries/vk/src/vk/VulkanBuffer.h index ae3550ba57..51ade4f895 100644 --- a/libraries/vk/src/vk/VulkanBuffer.h +++ b/libraries/vk/src/vk/VulkanBuffer.h @@ -44,6 +44,8 @@ namespace vks VkResult bind(VkDeviceSize offset = 0); void setupDescriptor(VkDeviceSize size = VK_WHOLE_SIZE, VkDeviceSize offset = 0); static std::shared_ptr createUniform(VkDeviceSize bufferSize); + static std::shared_ptr createStorage(VkDeviceSize bufferSize); + static std::shared_ptr createVertex(VkDeviceSize bufferSize); void copyTo(void* data, VkDeviceSize size); //VkResult flush(VkDeviceSize size = VK_WHOLE_SIZE, VkDeviceSize offset = 0); //VkResult invalidate(VkDeviceSize size = VK_WHOLE_SIZE, VkDeviceSize offset = 0); diff --git a/libraries/vk/src/vk/VulkanDevice.cpp b/libraries/vk/src/vk/VulkanDevice.cpp index a5dd0499d0..487c984f26 100644 --- a/libraries/vk/src/vk/VulkanDevice.cpp +++ b/libraries/vk/src/vk/VulkanDevice.cpp @@ -64,9 +64,17 @@ namespace vks */ VulkanDevice::~VulkanDevice() { - if (commandPool) + if (graphicsCommandPool) { - vkDestroyCommandPool(logicalDevice, commandPool, nullptr); + vkDestroyCommandPool(logicalDevice, graphicsCommandPool, nullptr); + } + if (transferCommandPool) + { + vkDestroyCommandPool(logicalDevice, transferCommandPool, nullptr); + } + if (computeCommandPool) + { + vkDestroyCommandPool(logicalDevice, computeCommandPool, nullptr); } if (logicalDevice) { @@ -299,7 +307,9 @@ namespace vks } // Create a default command pool for graphics command buffers - commandPool = createCommandPool(queueFamilyIndices.graphics); + graphicsCommandPool = createCommandPool(queueFamilyIndices.graphics); + transferCommandPool = createCommandPool(queueFamilyIndices.transfer); + //computeCommandPool = createCommandPool(queueFamilyIndices.compute); // VKTODO return result; } @@ -454,7 +464,7 @@ namespace vks { assert(dst->size <= src->size); assert(src->buffer); - VkCommandBuffer copyCmd = createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true); + VkCommandBuffer copyCmd = createCommandBuffer(transferCommandPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY, true); VkBufferCopy bufferCopy{}; if (copyRegion == nullptr) { @@ -467,7 +477,7 @@ namespace vks vkCmdCopyBuffer(copyCmd, src->buffer, dst->buffer, 1, &bufferCopy); - flushCommandBuffer(copyCmd, queue); + flushCommandBuffer(copyCmd, queue, transferCommandPool); } /** @@ -500,7 +510,7 @@ namespace vks * * @return A handle to the allocated command buffer */ - VkCommandBuffer VulkanDevice::createCommandBuffer(VkCommandBufferLevel level, VkCommandPool pool, bool begin) + VkCommandBuffer VulkanDevice::createCommandBuffer(VkCommandPool pool, VkCommandBufferLevel level, bool begin) { VkCommandBufferAllocateInfo cmdBufAllocateInfo = vks::initializers::commandBufferAllocateInfo(pool, level, 1); VkCommandBuffer cmdBuffer; @@ -514,11 +524,6 @@ namespace vks return cmdBuffer; } - VkCommandBuffer VulkanDevice::createCommandBuffer(VkCommandBufferLevel level, bool begin) - { - return createCommandBuffer(level, commandPool, begin); - } - /** * Finish command buffer recording and submit it to a queue * @@ -557,11 +562,6 @@ namespace vks } } - void VulkanDevice::flushCommandBuffer(VkCommandBuffer commandBuffer, VkQueue queue, bool free) - { - return flushCommandBuffer(commandBuffer, queue, commandPool, free); - } - /** * Check if an extension is supported by the (physical device) * diff --git a/libraries/vk/src/vk/VulkanDevice.h b/libraries/vk/src/vk/VulkanDevice.h index 8c2fc39e38..94e285ba2d 100644 --- a/libraries/vk/src/vk/VulkanDevice.h +++ b/libraries/vk/src/vk/VulkanDevice.h @@ -39,7 +39,9 @@ struct VulkanDevice /** @brief List of extensions supported by the device */ std::vector supportedExtensions; /** @brief Default command pool for the graphics queue family index */ - VkCommandPool commandPool = VK_NULL_HANDLE; + VkCommandPool graphicsCommandPool = VK_NULL_HANDLE; + VkCommandPool transferCommandPool = VK_NULL_HANDLE; + VkCommandPool computeCommandPool = VK_NULL_HANDLE; // VKTODO: this is not assigned yet /** @brief Contains queue family indices */ struct { @@ -60,10 +62,8 @@ struct VulkanDevice VkResult createBuffer(VkBufferUsageFlags usageFlags, VkMemoryPropertyFlags memoryPropertyFlags, vks::Buffer *buffer, VkDeviceSize size, void *data = nullptr); void copyBuffer(vks::Buffer *src, vks::Buffer *dst, VkQueue queue, VkBufferCopy *copyRegion = nullptr); VkCommandPool createCommandPool(uint32_t queueFamilyIndex, VkCommandPoolCreateFlags createFlags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT); - VkCommandBuffer createCommandBuffer(VkCommandBufferLevel level, VkCommandPool pool, bool begin = false); - VkCommandBuffer createCommandBuffer(VkCommandBufferLevel level, bool begin = false); + VkCommandBuffer createCommandBuffer(VkCommandPool pool, VkCommandBufferLevel level, bool begin = false); void flushCommandBuffer(VkCommandBuffer commandBuffer, VkQueue queue, VkCommandPool pool, bool free = true); - void flushCommandBuffer(VkCommandBuffer commandBuffer, VkQueue queue, bool free = true); bool extensionSupported(std::string extension); VkFormat getSupportedDepthFormat(bool checkSamplingSupport); }; diff --git a/libraries/vk/src/vk/VulkanTexture.cpp b/libraries/vk/src/vk/VulkanTexture.cpp index 025cb13267..83ae6a1861 100644 --- a/libraries/vk/src/vk/VulkanTexture.cpp +++ b/libraries/vk/src/vk/VulkanTexture.cpp @@ -357,7 +357,7 @@ namespace vks VkMemoryRequirements memReqs; // Use a separate command buffer for texture loading - VkCommandBuffer copyCmd = device->createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true); + VkCommandBuffer copyCmd = device->createCommandBuffer(device->transferCommandPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY, true); // Create a host-visible staging buffer that contains the raw image data VkBuffer stagingBuffer; @@ -458,7 +458,7 @@ namespace vks imageLayout, subresourceRange); - device->flushCommandBuffer(copyCmd, copyQueue); + device->flushCommandBuffer(copyCmd, copyQueue, device->transferCommandPool); // Clean up staging resources vkDestroyBuffer(device->logicalDevice, stagingBuffer, nullptr); diff --git a/tools/gpu-frame-player/src/RenderThread.cpp b/tools/gpu-frame-player/src/RenderThread.cpp index 0bc8ad994e..6d47dda2eb 100644 --- a/tools/gpu-frame-player/src/RenderThread.cpp +++ b/tools/gpu-frame-player/src/RenderThread.cpp @@ -174,7 +174,7 @@ void RenderThread::renderFrame(gpu::FramePointer& frame) { uint32_t swapchainIndex; VK_CHECK_RESULT(_swapchain.acquireNextImage(acquireComplete, &swapchainIndex)); auto framebuffer = _framebuffers[swapchainIndex]; - const auto& commandBuffer = _vkcontext.createCommandBuffer(); + const auto& commandBuffer = _vkcontext.createCommandBuffer(_vkcontext.device->graphicsCommandPool); //auto vkBackend = dynamic_pointer_cast(getBackend()); //Q_ASSERT(vkBackend); @@ -262,8 +262,8 @@ void RenderThread::renderFrame(gpu::FramePointer& frame) { VkFenceCreateInfo fenceCI = vks::initializers::fenceCreateInfo(); VkFence frameFence; vkCreateFence(_vkcontext.device->logicalDevice, &fenceCI, nullptr, &frameFence); - vkQueueSubmit(_vkcontext.queue, 1, &submitInfo, frameFence); - _swapchain.queuePresent(_vkcontext.queue, swapchainIndex, renderComplete); + vkQueueSubmit(_vkcontext.graphicsQueue, 1, &submitInfo, frameFence); + _swapchain.queuePresent(_vkcontext.graphicsQueue, swapchainIndex, renderComplete); _vkcontext.trashCommandBuffers({ commandBuffer }); _vkcontext.emptyDumpster(frameFence); _vkcontext.recycle();