@@ -503,7 +503,8 @@ namespace dxvk {
503503 VkImageSubresourceLayers srcSubresource,
504504 VkOffset3D srcOffset,
505505 VkExtent3D extent) {
506- if (this ->copyImageClear (dstImage, dstSubresource, dstOffset, extent, srcImage, srcSubresource))
506+ if (this ->copyImageClear (dstImage, dstSubresource, dstOffset, extent, srcImage, srcSubresource)
507+ || this ->copyImageInline (*dstImage, dstSubresource, dstOffset, *srcImage, srcSubresource, srcOffset, extent))
507508 return ;
508509
509510 bool useFb = !formatsAreImageCopyCompatible (dstImage->info ().format , srcImage->info ().format );
@@ -4561,8 +4562,6 @@ namespace dxvk {
45614562 VkExtent3D dstExtent,
45624563 const Rc<DxvkImage>& srcImage,
45634564 VkImageSubresourceLayers srcSubresource) {
4564- this ->endCurrentPass (true );
4565-
45664565 // If the source image has a pending deferred clear, we can
45674566 // implement the copy by clearing the destination image to
45684567 // the same clear value.
@@ -4606,9 +4605,263 @@ namespace dxvk {
46064605 if (dstImage->mipLevelExtent (dstSubresource.mipLevel , dstSubresource.aspectMask ) != dstExtent)
46074606 return false ;
46084607
4609- auto view = dstImage->createView (viewInfo);
4608+ clearRenderTarget (dstImage->createView (viewInfo),
4609+ srcSubresource.aspectMask , clear->clearValue , 0u );
4610+ return true ;
4611+ }
4612+
4613+
4614+ bool DxvkContext::copyImageInline (
4615+ DxvkImage& dstImage,
4616+ VkImageSubresourceLayers dstSubresource,
4617+ VkOffset3D dstOffset,
4618+ DxvkImage& srcImage,
4619+ VkImageSubresourceLayers srcSubresource,
4620+ VkOffset3D srcOffset,
4621+ VkExtent3D extent) {
4622+ if (!m_flags.test (DxvkContextFlag::GpRenderPassActive))
4623+ return false ;
4624+
4625+ // Ignore non-2D image due to extra complexity
4626+ if (dstImage.info ().type != VK_IMAGE_TYPE_2D
4627+ || srcImage.info ().type != VK_IMAGE_TYPE_2D )
4628+ return false ;
4629+
4630+ // We need to write a storage image, so ignore non-color images
4631+ if (dstSubresource.aspectMask != VK_IMAGE_ASPECT_COLOR_BIT
4632+ || srcSubresource.aspectMask != VK_IMAGE_ASPECT_COLOR_BIT )
4633+ return false ;
4634+
4635+ // Check whether the source image is bound as a color attachment
4636+ auto srcSubresourceRange = vk::makeSubresourceRange (srcSubresource);
4637+ int32_t colorAttachmentIndex = findColorAttachmentIndex (srcImage, srcSubresourceRange);
4638+
4639+ if (colorAttachmentIndex < 0 )
4640+ return false ;
4641+
4642+ // Destination must not be bound as a render target. We could technically
4643+ // support this by drawing to that render target, but things would get
4644+ // complicated real fast and no game actually seems to do that.
4645+ if (isBoundAsRenderTarget (dstImage, vk::makeSubresourceRange (dstSubresource)))
4646+ return false ;
4647+
4648+ // Ignore images with feedback loop usage since there are weird interactions.
4649+ if (srcImage.info ().usage & VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT )
4650+ return false ;
4651+
4652+ // Make sure we can actually hit all the fast paths
4653+ if (!m_device->features ().khrDynamicRenderingLocalRead .dynamicRenderingLocalRead
4654+ || !m_device->features ().khrMaintenance10 .maintenance10
4655+ || !srcImage.hasUnifiedLayout () || !dstImage.hasUnifiedLayout ())
4656+ return false ;
4657+
4658+ // We fake unified layouts on some GPUs, so we still need to ensure
4659+ // that we don't use the input attachment path with invalid layouts.
4660+ // That could happen with the feedback loop layout in some cases.
4661+ Rc<DxvkImageView> srcView = m_state.om .framebufferInfo .getColorTarget (colorAttachmentIndex).view ;
4662+
4663+ if (srcView->getLayout () != VK_IMAGE_LAYOUT_GENERAL )
4664+ return false ;
4665+
4666+ // Verify that the source region fits within the framebuffer
4667+ DxvkFramebufferSize fbSize = m_state.om .framebufferInfo .size ();
46104668
4611- deferClear (view, srcSubresource.aspectMask , clear->clearValue );
4669+ if (uint32_t (srcOffset.x + extent.width ) > fbSize.width
4670+ || uint32_t (srcOffset.y + extent.height ) > fbSize.height
4671+ || srcSubresource.baseArrayLayer + srcSubresource.layerCount > srcView->info ().layerIndex + fbSize.layers )
4672+ return false ;
4673+
4674+ // Modern hardware tends to not suffer from adding STORAGE_IMAGE
4675+ // usage to images, so just do that if unified layouts are supported
4676+ VkFormat srcFormat = srcView->info ().format ;
4677+ VkFormat dstFormat = getLinearFormat (srcFormat);
4678+
4679+ DxvkImageUsageInfo srcUsage = { };
4680+ srcUsage.usage |= VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT ;
4681+ srcUsage.stages |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT ;
4682+ srcUsage.access |= VK_ACCESS_INPUT_ATTACHMENT_READ_BIT ;
4683+
4684+ DxvkImageUsageInfo dstUsage = { };
4685+ dstUsage.usage |= VK_IMAGE_USAGE_STORAGE_BIT ;
4686+ dstUsage.stages |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT ;
4687+ dstUsage.access |= VK_ACCESS_SHADER_WRITE_BIT ;
4688+ dstUsage.viewFormatCount = 1u ;
4689+ dstUsage.viewFormats = &dstFormat;
4690+
4691+ if (dstImage.formatInfo ()->flags .test (DxvkFormatFlag::BlockCompressed)) {
4692+ dstUsage.flags |= VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT
4693+ | VK_IMAGE_CREATE_EXTENDED_USAGE_BIT_KHR ;
4694+
4695+ if (dstSubresource.layerCount > 1u && !m_device->properties ().khrMaintenance6 .blockTexelViewCompatibleMultipleLayers )
4696+ return false ;
4697+ }
4698+
4699+ if (!(dstImage.info ().usage & VK_IMAGE_USAGE_STORAGE_BIT )) {
4700+ auto formatFeatures = m_device->adapter ()->getFormatFeatures (dstFormat);
4701+ auto features = dstImage.info ().tiling == VK_IMAGE_TILING_LINEAR ? formatFeatures.linear : formatFeatures.optimal ;
4702+
4703+ if (!(features & VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT ))
4704+ return false ;
4705+ }
4706+
4707+ if (!ensureImageCompatibility (&dstImage, dstUsage)
4708+ || !ensureImageCompatibility (&srcImage, srcUsage))
4709+ return false ;
4710+
4711+ // Track access to the destination resource since it may be bound
4712+ // as a resource to graphics staders as well
4713+ if (!dstImage.trackGfxStores ())
4714+ return false ;
4715+
4716+ // Might have ended the render pass in the meantime. This is fine,
4717+ // just means that we'll end up hitting the fast path next time.
4718+ if (!m_flags.test (DxvkContextFlag::GpRenderPassActive))
4719+ return false ;
4720+
4721+ // Create actual storage image view to bind for the copy
4722+ DxvkImageViewKey key = { };
4723+ key.viewType = dstSubresource.layerCount > 1u
4724+ ? VK_IMAGE_VIEW_TYPE_2D_ARRAY
4725+ : VK_IMAGE_VIEW_TYPE_2D ;
4726+ key.usage = VK_IMAGE_USAGE_STORAGE_BIT ;
4727+ key.layout = VK_IMAGE_LAYOUT_GENERAL ;
4728+ key.format = dstFormat;
4729+ key.aspects = dstSubresource.aspectMask ;
4730+ key.layerIndex = dstSubresource.baseArrayLayer ;
4731+ key.layerCount = dstSubresource.layerCount ;
4732+ key.mipIndex = dstSubresource.mipLevel ;
4733+ key.mipCount = 1u ;
4734+
4735+ Rc<DxvkImageView> dstView = dstImage.createView (key);
4736+
4737+ // Check whether there are any hazards for the destination image,
4738+ // and track the write access as necessary.
4739+ if (resourceHasAccess (dstImage, dstSubresource, dstOffset, extent, DxvkAccess::Write, DxvkAccessOp::None)
4740+ || resourceHasAccess (dstImage, dstSubresource, dstOffset, extent, DxvkAccess::Read, DxvkAccessOp::None))
4741+ return false ;
4742+
4743+ accessImageRegion (DxvkCmdBuffer::ExecBuffer, dstImage, dstSubresource,
4744+ dstOffset, extent, dstView->info ().layout , VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT ,
4745+ VK_ACCESS_2_SHADER_WRITE_BIT , DxvkAccessOp::None);
4746+
4747+ m_cmd->track (&dstImage, DxvkAccess::Write);
4748+
4749+ // Flush pending clears for the source image that we want to copy from
4750+ if (findOverlappingDeferredClear (srcImage, srcSubresourceRange))
4751+ flushClearsInline ();
4752+
4753+ if (unlikely (m_features.test (DxvkContextFeature::DebugUtils))) {
4754+ const char * dstName = dstImage.info ().debugName ;
4755+ const char * srcName = srcImage.info ().debugName ;
4756+
4757+ m_cmd->cmdBeginDebugUtilsLabel (DxvkCmdBuffer::ExecBuffer,
4758+ vk::makeLabel (0xf0dcdc , str::format (" Copy image (" ,
4759+ dstName ? dstName : " unknown" , " , " ,
4760+ srcName ? srcName : " unknown" , " )" ).c_str ()));
4761+ }
4762+
4763+ // Get pipeline for the current render pass setup
4764+ DxvkMetaInputAttachmentImageCopy::Key pipelineKey = { };
4765+ pipelineKey.srcViewType = srcView->info ().viewType ;
4766+ pipelineKey.dstViewType = key.viewType ;
4767+ pipelineKey.dstFormat = key.format ;
4768+ pipelineKey.srcAttachment = colorAttachmentIndex;
4769+ pipelineKey.depthFormat = m_state.om .framebufferInfo .getDepthFormat ();
4770+
4771+ for (uint32_t i = 0u ; i < MaxNumRenderTargets; i++)
4772+ pipelineKey.colorFormats [i] = m_state.om .framebufferInfo .getColorFormat (i);
4773+
4774+ DxvkMetaInputAttachmentImageCopy pipeline = m_common->metaCopy ().getPipeline (pipelineKey);
4775+
4776+ if (!pipeline.pipeline )
4777+ return false ;
4778+
4779+ // Issue by-region barrier before the copy
4780+ VkImageMemoryBarrier2 barrier = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2 };
4781+ barrier.image = srcImage.handle ();
4782+ barrier.srcStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT ;
4783+ barrier.srcAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT
4784+ | VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT ;
4785+ barrier.dstStageMask = VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT ;
4786+ barrier.dstAccessMask = VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT ;
4787+ barrier.oldLayout = srcView->getLayout ();
4788+ barrier.newLayout = srcView->getLayout ();
4789+ barrier.subresourceRange = srcSubresourceRange;
4790+
4791+ VkDependencyInfo depInfo = { VK_STRUCTURE_TYPE_DEPENDENCY_INFO };
4792+ depInfo.dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT ;
4793+ depInfo.imageMemoryBarrierCount = 1u ;
4794+ depInfo.pImageMemoryBarriers = &barrier;
4795+
4796+ m_cmd->cmdPipelineBarrier (DxvkCmdBuffer::ExecBuffer, &depInfo);
4797+
4798+ // Invalidate pipeline state and perform the actual draw
4799+ unbindGraphicsPipeline ();
4800+
4801+ VkViewport viewport = { };
4802+ viewport.x = float (srcOffset.x );
4803+ viewport.y = float (srcOffset.y );
4804+ viewport.width = float (extent.width );
4805+ viewport.height = float (extent.height );
4806+ viewport.maxDepth = 1 .0f ;
4807+
4808+ VkRect2D scissor = { };
4809+ scissor.offset .x = srcOffset.x ;
4810+ scissor.offset .y = srcOffset.y ;
4811+ scissor.extent .width = extent.width ;
4812+ scissor.extent .height = extent.height ;
4813+
4814+ m_cmd->cmdSetViewport (1 , &viewport);
4815+ m_cmd->cmdSetScissor (1 , &scissor);
4816+
4817+ adjustRenderArea (scissor);
4818+
4819+ std::array<DxvkDescriptorWrite, 2u > descriptors = { };
4820+ descriptors[0 ].descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT ;
4821+ descriptors[0 ].descriptor = srcView->getDescriptor ();
4822+
4823+ descriptors[1 ].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE ;
4824+ descriptors[1 ].descriptor = dstView->getDescriptor ();
4825+
4826+ DxvkMetaInputAttachmentImageCopy::Args copyArgs = { };
4827+ copyArgs.srcOffset = VkOffset2D { srcOffset.x , srcOffset.y };
4828+ copyArgs.dstOffset = VkOffset2D { dstOffset.x , dstOffset.y };
4829+
4830+ if (dstImage.formatInfo ()->flags .test (DxvkFormatFlag::BlockCompressed)) {
4831+ copyArgs.dstOffset .x /= dstImage.formatInfo ()->blockSize .width ;
4832+ copyArgs.dstOffset .y /= dstImage.formatInfo ()->blockSize .height ;
4833+ }
4834+
4835+ m_cmd->cmdBindPipeline (DxvkCmdBuffer::ExecBuffer,
4836+ VK_PIPELINE_BIND_POINT_GRAPHICS , pipeline.pipeline );
4837+
4838+ m_cmd->bindResources (DxvkCmdBuffer::ExecBuffer, pipeline.layout ,
4839+ descriptors.size (), descriptors.data (), 0u , nullptr );
4840+
4841+ for (uint32_t i = 0u ; i < dstSubresource.layerCount ; i++) {
4842+ copyArgs.srcLayer = i + srcSubresource.baseArrayLayer - srcView->info ().layerIndex ;
4843+ copyArgs.dstLayer = i + dstSubresource.baseArrayLayer ;
4844+
4845+ m_cmd->bindResources (DxvkCmdBuffer::ExecBuffer, pipeline.layout ,
4846+ 0u , nullptr , sizeof (copyArgs), ©Args);
4847+ m_cmd->cmdDraw (3u , srcSubresource.layerCount , 0u , 1u );
4848+ }
4849+
4850+ // Issue by-region barrier after the copy and before subsequent rendering
4851+ std::swap (barrier.srcStageMask , barrier.dstStageMask );
4852+ std::swap (barrier.srcAccessMask , barrier.dstAccessMask );
4853+
4854+ m_cmd->cmdPipelineBarrier (DxvkCmdBuffer::ExecBuffer, &depInfo);
4855+
4856+ if (unlikely (m_features.test (DxvkContextFeature::DebugUtils)))
4857+ m_cmd->cmdEndDebugUtilsLabel (DxvkCmdBuffer::ExecBuffer);
4858+
4859+ m_renderPassBarrierSrc.stages |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT ;
4860+ m_renderPassBarrierSrc.access |= VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INPUT_ATTACHMENT_READ_BIT ;
4861+
4862+ m_state.om .attachmentMask .trackColorRead (colorAttachmentIndex);
4863+
4864+ m_flags.set (DxvkContextFlag::GpRenderPassSideEffects);
46124865 return true ;
46134866 }
46144867
@@ -6339,7 +6592,8 @@ namespace dxvk {
63396592 DxvkContextFlag::GpDirtyDepthBias,
63406593 DxvkContextFlag::GpDirtyDepthBounds,
63416594 DxvkContextFlag::GpDirtyDepthClip,
6342- DxvkContextFlag::GpDirtyDepthTest);
6595+ DxvkContextFlag::GpDirtyDepthTest,
6596+ DxvkContextFlag::GpDirtySpecConstants);
63436597
63446598 m_flags.clr (DxvkContextFlag::GpHasPushData);
63456599
@@ -7261,6 +7515,26 @@ namespace dxvk {
72617515 }
72627516
72637517
7518+ int32_t DxvkContext::findColorAttachmentIndex (
7519+ const DxvkImage& image,
7520+ const VkImageSubresourceRange& subresources) {
7521+ for (uint32_t i = 0u ; i < MaxNumRenderTargets; i++) {
7522+ const auto & attachment = m_state.om .framebufferInfo .getColorTarget (i).view ;
7523+
7524+ if (!attachment || attachment->image () != &image)
7525+ continue ;
7526+
7527+ auto viewSubresources = attachment->imageSubresources ();
7528+
7529+ if ((viewSubresources.aspectMask & subresources.aspectMask ) == subresources.aspectMask
7530+ && vk::checkSubresourceRangeSuperset (viewSubresources, subresources))
7531+ return int32_t (i);
7532+ }
7533+
7534+ return -1 ;
7535+ }
7536+
7537+
72647538 void DxvkContext::updateIndexBufferBinding () {
72657539 m_flags.clr (DxvkContextFlag::GpDirtyIndexBuffer);
72667540
0 commit comments