overte-org
diff --git a/‎libraries/display-plugins/src/display-plugins/VulkanDisplayPlugin.cpp‎
Lines changed: 21 additions & 1 deletion b/‎libraries/display-plugins/src/display-plugins/VulkanDisplayPlugin.cpp‎
Lines changed: 21 additions & 1 deletion
diff --git a/‎libraries/gpu-vk/src/gpu/vk/VKBackend.cpp‎
Lines changed: 13 additions & 6 deletions b/‎libraries/gpu-vk/src/gpu/vk/VKBackend.cpp‎
Lines changed: 13 additions & 6 deletions
diff --git a/‎libraries/gpu-vk/src/gpu/vk/VKFramebuffer.cpp‎
Lines changed: 3 additions & 3 deletions b/‎libraries/gpu-vk/src/gpu/vk/VKFramebuffer.cpp‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎libraries/gpu-vk/src/gpu/vk/VKPipelineCache.cpp‎
Lines changed: 55 additions & 8 deletions b/‎libraries/gpu-vk/src/gpu/vk/VKPipelineCache.cpp‎
Lines changed: 55 additions & 8 deletions
diff --git a/‎libraries/gpu-vk/src/gpu/vk/VKPipelineCache.h‎
Lines changed: 2 additions & 2 deletions b/‎libraries/gpu-vk/src/gpu/vk/VKPipelineCache.h‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎libraries/gpu-vk/src/gpu/vk/VKShared.cpp‎
Lines changed: 3 additions & 2 deletions b/‎libraries/gpu-vk/src/gpu/vk/VKShared.cpp‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎libraries/gpu-vk/src/gpu/vk/VKShared.h‎
Lines changed: 1 addition & 1 deletion b/‎libraries/gpu-vk/src/gpu/vk/VKShared.h‎
Lines changed: 1 addition & 1 deletion
@@ -433,7 +433,7 @@ void VulkanDisplayPlugin::customizeContext() {
 
         _SRGBToLinearPipeline = gpu::Pipeline::create(gpu::Shader::createProgram(DrawTextureSRGBToLinear), scissorState);
 
-        _hudPipeline = gpu::Pipeline::create(gpu::Shader::createProgram(DrawTextureSRGBToLinear), blendState);
+        _hudPipeline = gpu::Pipeline::create(gpu::Shader::createProgram(DrawTexturePremultipliedSRGBToLinear), blendState);
 
         _cursorPipeline = gpu::Pipeline::create(gpu::Shader::createProgram(DrawTransformedTexture), blendState);
     }
@@ -750,6 +750,11 @@ void VulkanDisplayPlugin::present(const std::shared_ptr<RefreshRateController>&
 
         uint32_t currentImageIndex = UINT32_MAX;
         //VK_CHECK_RESULT(_vkWindow->_swapchain.acquireNextImage(_vkWindow->_acquireCompleteSemaphore, &currentImageIndex));
+        if (!_vkWindow->_acquireCompleteSemaphore) {
+            VkSemaphoreCreateInfo semaphoreCreateInfo = vks::initializers::semaphoreCreateInfo();
+            VK_CHECK_RESULT(vkCreateSemaphore(_vkWindow->_context.device->logicalDevice, &semaphoreCreateInfo, nullptr, &_vkWindow->_acquireCompleteSemaphore));
+        }
+
         if(_vkWindow->_swapchain.acquireNextImage(_vkWindow->_acquireCompleteSemaphore, &currentImageIndex) != VK_SUCCESS) {
             qDebug() << "_vkWindow->_swapchain.acquireNextImage fail";
             _vkWindow->resizeFramebuffer(); //VKTODO: workaround
@@ -898,6 +903,11 @@ void VulkanDisplayPlugin::present(const std::shared_ptr<RefreshRateController>&
             cmdEndLabel(commandBuffer);
             VK_CHECK_RESULT(vkEndCommandBuffer(commandBuffer));
 
+            if (!_vkWindow->_renderCompleteSemaphore) {
+                VkSemaphoreCreateInfo semaphoreCreateInfo = vks::initializers::semaphoreCreateInfo();
+                VK_CHECK_RESULT(vkCreateSemaphore(_vkWindow->_context.device->logicalDevice, &semaphoreCreateInfo, nullptr, &_vkWindow->_renderCompleteSemaphore));
+            }
+
             static const VkPipelineStageFlags waitFlags{ VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT };
             VkSubmitInfo submitInfo = vks::initializers::submitInfo();
             submitInfo.waitSemaphoreCount = 1;
@@ -925,8 +935,18 @@ void VulkanDisplayPlugin::present(const std::shared_ptr<RefreshRateController>&
 
             _vkWindow->_previousFrameFence = frameFence;
             _vkWindow->_previousCommandBuffer = commandBuffer;
+            if (_vkWindow->_previousAcquireCompleteSemaphore) {
+                _vkWindow->_context.recycler.trashVkSemaphore(_vkWindow->_previousAcquireCompleteSemaphore);
+            }
+            if (_vkWindow->_previousRenderCompleteSemaphore) {
+                _vkWindow->_context.recycler.trashVkSemaphore(_vkWindow->_previousRenderCompleteSemaphore);
+            }
         }
         _vkWindow->_swapchain.queuePresent(_vkWindow->_context.graphicsQueue, currentImageIndex, _vkWindow->_renderCompleteSemaphore);
+        _vkWindow->_previousAcquireCompleteSemaphore = _vkWindow->_acquireCompleteSemaphore;
+        _vkWindow->_previousRenderCompleteSemaphore = _vkWindow->_renderCompleteSemaphore;
+        _vkWindow->_acquireCompleteSemaphore = VK_NULL_HANDLE;
+        _vkWindow->_renderCompleteSemaphore = VK_NULL_HANDLE;
 
         // VKTODO
         gpu::Backend::freeGPUMemSize.set(gpu::gl::getFreeDedicatedMemory());
 
@@ -1486,8 +1486,8 @@ VKTexture* VKBackend::syncGPUObject(const Texture *texture) {
                         qDebug() << " mismatch, stored: " << storedFormat.getType() << " texel: " << texelFormat.getType();
                         return nullptr;
                     }
-                    auto storedVkFormat = evalTexelFormatInternal(texture->getStoredMipFormat());
-                    auto texelVkFormat = evalTexelFormatInternal(texture->getTexelFormat());
+                    auto storedVkFormat = evalTexelFormatInternal(texture->getStoredMipFormat(), _context);
+                    auto texelVkFormat = evalTexelFormatInternal(texture->getTexelFormat(), _context);
                     if (storedVkFormat != texelVkFormat) {
                         if (!(storedVkFormat == VK_FORMAT_R8G8B8_UNORM && texelVkFormat == VK_FORMAT_R8G8B8A8_UNORM) // Adding alpha channel needed
                             && !(storedVkFormat == VK_FORMAT_R8G8B8_UNORM && texelVkFormat == VK_FORMAT_R8G8B8A8_SRGB) // Adding alpha channel needed and maybe SRGB conversion?
@@ -1795,9 +1795,16 @@ void VKBackend::FrameData::createDescriptorPool() {
 }
 
 void VKBackend::FrameData::addGlUniform(size_t size, const void* data, size_t commandIndex) {
-    _glUniformData.resize(_glUniformBufferPosition + size);
+    size_t alignment = _backend->_context.device->properties.limits.minUniformBufferOffsetAlignment;
+    size_t newSizeUnaligned = _glUniformBufferPosition + size;
+    size_t newSizeAligned = newSizeUnaligned - (newSizeUnaligned % alignment);
+    if (newSizeAligned < newSizeUnaligned) {
+        newSizeAligned += alignment;
+    }
+    _glUniformData.resize(newSizeAligned);
     memcpy(_glUniformData.data()+_glUniformBufferPosition, data, size);
-    _glUniformBufferPosition += size;
+    _glUniformOffsetMap.insert({commandIndex, _glUniformBufferPosition});
+    _glUniformBufferPosition = newSizeAligned;
 }
 
 VKBackend::FrameData::FrameData(VKBackend *backend) : _backend(backend) {
@@ -2616,7 +2623,7 @@ void VKBackend::do_clearFramebuffer(const Batch& batch, size_t paramOffset) {
     auto gpuFramebuffer = syncGPUObject(framebuffer);
     auto &renderBuffers = framebuffer->getRenderBuffers();
 
-    Cache::Pipeline::RenderpassKey key = _cache.pipelineState.getRenderPassKey(framebuffer);
+    Cache::Pipeline::RenderpassKey key = _cache.pipelineState.getRenderPassKey(framebuffer, _context);
     std::vector<VkAttachmentDescription> attachments;
     std::vector<VkClearValue> clearValues;
     attachments.reserve(key.size());
@@ -2796,7 +2803,7 @@ void VKBackend::do_clearFramebuffer(const Batch& batch, size_t paramOffset) {
 
         VkImageSubresourceRange mipSubRange = {};
         mipSubRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
-        if (formatHasStencil(evalTexelFormatInternal(texture->getTexelFormat()))) {
+        if (formatHasStencil(evalTexelFormatInternal(texture->getTexelFormat(), _context))) {
             mipSubRange.aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT;
         }
         mipSubRange.baseMipLevel = 0;
 
@@ -55,7 +55,7 @@ void gpu::vk::VKFramebuffer::update() {
                         attachmentCI.width = vkTexture->_gpuObject.getWidth();
                         attachmentCI.height = vkTexture->_gpuObject.getHeight();
                         attachmentCI.layerCount = 1;
-                        attachmentCI.format = gpu::vk::evalTexelFormatInternal(vkTexture->_gpuObject.getTexelFormat());
+                        attachmentCI.format = gpu::vk::evalTexelFormatInternal(vkTexture->_gpuObject.getTexelFormat(), backend->getContext());
                         attachmentCI.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
                         attachmentCI.imageSampleCount = VK_SAMPLE_COUNT_1_BIT;
                         addAttachment(attachmentCI, vkTexture);
@@ -101,7 +101,7 @@ void gpu::vk::VKFramebuffer::update() {
                 attachmentCI.width = vkTexture->_gpuObject.getWidth();
                 attachmentCI.height = vkTexture->_gpuObject.getHeight();
                 attachmentCI.layerCount = 1;
-                attachmentCI.format = gpu::vk::evalTexelFormatInternal(vkTexture->_gpuObject.getTexelFormat());
+                attachmentCI.format = gpu::vk::evalTexelFormatInternal(vkTexture->_gpuObject.getTexelFormat(), backend->getContext());
                 attachmentCI.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
                 attachmentCI.imageSampleCount = VK_SAMPLE_COUNT_1_BIT;
                 addAttachment(attachmentCI, vkTexture);
@@ -112,7 +112,7 @@ void gpu::vk::VKFramebuffer::update() {
                 attachmentCI.width = vkTexture->_gpuObject.getWidth();
                 attachmentCI.height = vkTexture->_gpuObject.getHeight();
                 attachmentCI.layerCount = vkTexture->_gpuObject.getNumSlices();
-                attachmentCI.format = gpu::vk::evalTexelFormatInternal(vkTexture->_gpuObject.getTexelFormat());
+                attachmentCI.format = gpu::vk::evalTexelFormatInternal(vkTexture->_gpuObject.getTexelFormat(), backend->getContext());
                 attachmentCI.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
                 attachmentCI.imageSampleCount = VK_SAMPLE_COUNT_1_BIT;
                 addAttachment(attachmentCI, vkTexture);
 
@@ -158,7 +158,7 @@ Cache::PipelineLayout Cache::Pipeline::getPipelineAndDescriptorLayout(const vks:
     return layout;
 }
 
-Cache::Pipeline::RenderpassKey Cache::Pipeline::getRenderPassKey(gpu::Framebuffer* framebuffer) const {
+Cache::Pipeline::RenderpassKey Cache::Pipeline::getRenderPassKey(gpu::Framebuffer* framebuffer, const vks::Context &context) const {
     RenderpassKey result;
     if (!framebuffer) {
         result.emplace_back(VK_FORMAT_R8G8B8A8_SRGB, VK_IMAGE_LAYOUT_UNDEFINED); // VKTODO: this is definitely wrong, why is it that way?
@@ -176,7 +176,7 @@ Cache::Pipeline::RenderpassKey Cache::Pipeline::getRenderPassKey(gpu::Framebuffe
                         layout = attachmentTexture->getVkImageLayout();
                     }
                 }
-                result.emplace_back(evalTexelFormatInternal(attachment._texture->getTexelFormat()), layout);
+                result.emplace_back(evalTexelFormatInternal(attachment._texture->getTexelFormat(), context), layout);
             }
         }
         if (framebuffer->hasDepthStencil()) {
@@ -190,7 +190,7 @@ Cache::Pipeline::RenderpassKey Cache::Pipeline::getRenderPassKey(gpu::Framebuffe
                     }
                 }
             }
-            result.emplace_back(evalTexelFormatInternal(framebuffer->getDepthStencilBufferFormat()), layout);
+            result.emplace_back(evalTexelFormatInternal(framebuffer->getDepthStencilBufferFormat(), context), layout);
         }
     }
     return result;
@@ -199,7 +199,7 @@ Cache::Pipeline::RenderpassKey Cache::Pipeline::getRenderPassKey(gpu::Framebuffe
 VkRenderPass Cache::Pipeline::getRenderPass(const vks::Context& context) {
     const auto framebuffer = gpu::acquire(this->framebuffer);
 
-    RenderpassKey key = getRenderPassKey(framebuffer);
+    RenderpassKey key = getRenderPassKey(framebuffer, context);
     auto itr = _renderPassMap.find(key);
     if (itr == _renderPassMap.end()) {
         auto &renderBuffers = framebuffer->getRenderBuffers();
@@ -357,9 +357,9 @@ std::string Cache::Pipeline::getStridesKey() const {
 }
 
 // VKTODO: use binary key if performance with text key is not good enough
-std::string Cache::Pipeline::getKey() const {
+std::string Cache::Pipeline::getKey(const vks::Context& context) const {
     const auto framebuffer = gpu::acquire(this->framebuffer);
-    RenderpassKey renderpassKey = getRenderPassKey(framebuffer);
+    RenderpassKey renderpassKey = getRenderPassKey(framebuffer, context);
     const gpu::Pipeline& pipeline = *gpu::acquire(this->pipeline);
     const gpu::State& state = *pipeline.getState();
     const auto& vertexShader = pipeline.getProgram()->getShaders()[0]->getSource();
@@ -406,7 +406,7 @@ VkShaderModule Cache::getShaderModule(const vks::Context& context, const shader:
 }
 
 Cache::PipelineLayout Cache::getPipeline(const vks::Context& context) {
-    auto key = pipelineState.getKey();
+    auto key = pipelineState.getKey(context);
     auto pipelineIterator = pipelineMap.find(key);
     if (pipelineIterator != pipelineMap.end()) {
         return pipelineIterator->second;
@@ -541,13 +541,31 @@ Cache::PipelineLayout Cache::getPipeline(const vks::Context& context) {
             isAttributeSlotOccupied[slot] = true;
 
             attributeDescriptions.push_back(
-                { slot, attribute._channel, evalTexelFormatInternal(attribute._element), (uint32_t)attribute._offset });
+                { slot, attribute._channel, evalTexelFormatInternal(attribute._element, context), (uint32_t)attribute._offset });
         }
 
         if (!colorFound && vertexReflection.validInput(Stream::COLOR)) {
             attributeDescriptions.push_back({ Stream::COLOR, 0, VK_FORMAT_R8G8B8A8_UNORM, 0 });
         }
 
+        if (!isAttributeSlotOccupied[Stream::TANGENT] && vertexReflection.validInput(Stream::TANGENT)) {
+            attributeDescriptions.push_back({ Stream::TANGENT, 0, VK_FORMAT_R8G8B8A8_UNORM, 0 });
+        }
+
+        // VKTODO: is it safe to provide empty skin cluster data?
+        // It would be used for meshes with blendshapes but no skinning.
+        // Since blendshapes and skinning use the same shader, currently I added
+        // a workaround that disables blendshapes on meshes with blendshapes but no skinning.
+        /*if (!isAttributeSlotOccupied[Stream::SKIN_CLUSTER_INDEX] && vertexReflection.validInput(Stream::SKIN_CLUSTER_INDEX)) {
+            // VKTODO: provide valid format here.
+            attributeDescriptions.push_back({ Stream::SKIN_CLUSTER_INDEX, 0, VK_FORMAT_R8G8B8A8_UNORM, 0 });
+        }
+
+        if (!isAttributeSlotOccupied[Stream::SKIN_CLUSTER_WEIGHT] && vertexReflection.validInput(Stream::SKIN_CLUSTER_WEIGHT)) {
+            // VKTODO: provide valid format here.
+            attributeDescriptions.push_back({ Stream::SKIN_CLUSTER_WEIGHT, 0, VK_FORMAT_R8G8B8A8_UNORM, 0 });
+        }*/
+
         if (!isAttributeSlotOccupied[Stream::TEXCOORD0] && vertexReflection.validInput(Stream::TEXCOORD0)) {
             attributeDescriptions.push_back({ Stream::TEXCOORD0, 0, VK_FORMAT_R8G8B8A8_UNORM, 0 });
         }
@@ -556,6 +574,35 @@ Cache::PipelineLayout Cache::getPipeline(const vks::Context& context) {
             attributeDescriptions.push_back({ Stream::TEXCOORD1, 0, VK_FORMAT_R8G8B8A8_UNORM, 0 });
         }
 
+        if (!isAttributeSlotOccupied[Stream::TEXCOORD2] && vertexReflection.validInput(Stream::TEXCOORD2)) {
+            attributeDescriptions.push_back({ Stream::TEXCOORD2, 0, VK_FORMAT_R8G8B8A8_UNORM, 0 });
+        }
+
+        if (!isAttributeSlotOccupied[Stream::TEXCOORD3] && vertexReflection.validInput(Stream::TEXCOORD3)) {
+            attributeDescriptions.push_back({ Stream::TEXCOORD3, 0, VK_FORMAT_R8G8B8A8_UNORM, 0 });
+        }
+
+        if (!isAttributeSlotOccupied[Stream::TEXCOORD4] && vertexReflection.validInput(Stream::TEXCOORD4)) {
+            attributeDescriptions.push_back({ Stream::TEXCOORD4, 0, VK_FORMAT_R8G8B8A8_UNORM, 0 });
+        }
+
+        // VKTODO: Fade inputs are not set for shapes drawn with drawIndexed.
+        // I currently don't know how to fix it, so I added a workaround.
+        if (!isAttributeSlotOccupied[GPU_ATTR_FADEDATA5] && vertexReflection.validInput(GPU_ATTR_FADEDATA5)) {
+            // VKTODO: provide valid format here.
+            attributeDescriptions.push_back({ GPU_ATTR_FADEDATA5, 0, VK_FORMAT_R8G8B8A8_UNORM, 0 });
+        }
+
+        if (!isAttributeSlotOccupied[GPU_ATTR_FADEDATA6] && vertexReflection.validInput(GPU_ATTR_FADEDATA6)) {
+            // VKTODO: provide valid format here.
+            attributeDescriptions.push_back({ GPU_ATTR_FADEDATA6, 0, VK_FORMAT_R8G8B8A8_UNORM, 0 });
+        }
+
+        if (!isAttributeSlotOccupied[GPU_ATTR_FADEDATA7] && vertexReflection.validInput(GPU_ATTR_FADEDATA7)) {
+            // VKTODO: provide valid format here.
+            attributeDescriptions.push_back({ GPU_ATTR_FADEDATA7, 0, VK_FORMAT_R8G8B8A8_UNORM, 0 });
+        }
+
         // Explicitly add the draw call info slot if required
         if (vertexReflection.validInput(gpu::slot::attr::DrawCallInfo)) {
             attributeDescriptions.push_back(
 
@@ -115,12 +115,12 @@ struct Cache {
         // Returns structure containing pipeline layout and descriptor set layouts
         PipelineLayout getPipelineAndDescriptorLayout(const vks::Context& context);
 
-        RenderpassKey getRenderPassKey(gpu::Framebuffer* framebuffer) const;
+        RenderpassKey getRenderPassKey(gpu::Framebuffer* framebuffer, const vks::Context &context) const;
 
         VkRenderPass getRenderPass(const vks::Context& context);
         static std::string getRenderpassKeyString(const RenderpassKey& renderpassKey);
         std::string getStridesKey() const;
-        std::string getKey() const;
+        std::string getKey(const vks::Context& context) const;
     } pipelineState;
 
     static VkStencilOpState getStencilOp(const gpu::State::StencilTest& stencil);
 
@@ -14,13 +14,14 @@ Q_LOGGING_CATEGORY(gpu_vk_logging, "hifi.gpu.vk")
 Q_LOGGING_CATEGORY(trace_gpu_vk, "trace.gpu.vk")
 Q_LOGGING_CATEGORY(trace_gpu_vk_detail, "trace.gpu.vk.detail")
 
-VkFormat gpu::vk::evalTexelFormatInternal(const gpu::Element& dstFormat) {
+VkFormat gpu::vk::evalTexelFormatInternal(const gpu::Element& dstFormat, const vks::Context &context) {
     // VKTODO: add BGRA and SBGRA
     VkFormat result = VK_FORMAT_R8G8B8_UNORM;
     switch (dstFormat.getDimension()) {
         case gpu::SCALAR:
         {
             switch (dstFormat.getSemantic()) {
+                case gpu::RAW:
                 case gpu::RED:
                 case gpu::RGB:
                 case gpu::RGBA:
@@ -123,7 +124,7 @@ VkFormat gpu::vk::evalTexelFormatInternal(const gpu::Element& dstFormat) {
 
                 case gpu::DEPTH_STENCIL:
                     // The only possible depth stencil format
-                    result = VK_FORMAT_D24_UNORM_S8_UINT;
+                    result = context.getBestDepthStencilFormat();
                     break;
 
                 default:
 
@@ -30,7 +30,7 @@ State::StencilOp stencilOpFromGL(VkStencilOp stencilOp);
 State::BlendOp blendOpFromGL(VkBlendOp blendOp);
 State::BlendArg blendArgFromGL(VkBlendFactor blendArg);
 
-VkFormat evalTexelFormatInternal(const Element& dstFormat);
+VkFormat evalTexelFormatInternal(const Element& dstFormat, const vks::Context &context);
 
 //bool isDepthStencilFormat(VkFormat format);
 bool formatHasStencil(VkFormat format);