@@ -503,7 +503,8 @@ namespace dxvk {
503503 VkImageSubresourceLayers srcSubresource,
504504 VkOffset3D srcOffset,
505505 VkExtent3D extent) {
506- if (this ->copyImageClear (dstImage, dstSubresource, dstOffset, extent, srcImage, srcSubresource))
506+ if (this ->copyImageClear (dstImage, dstSubresource, dstOffset, extent, srcImage, srcSubresource)
507+ || this ->copyImageInline (*dstImage, dstSubresource, dstOffset, *srcImage, srcSubresource, srcOffset, extent))
507508 return ;
508509
509510 bool useFb = !formatsAreImageCopyCompatible (dstImage->info ().format , srcImage->info ().format );
@@ -4572,8 +4573,6 @@ namespace dxvk {
45724573 VkExtent3D dstExtent,
45734574 const Rc<DxvkImage>& srcImage,
45744575 VkImageSubresourceLayers srcSubresource) {
4575- this ->endCurrentPass (true );
4576-
45774576 // If the source image has a pending deferred clear, we can
45784577 // implement the copy by clearing the destination image to
45794578 // the same clear value.
@@ -4617,9 +4616,263 @@ namespace dxvk {
46174616 if (dstImage->mipLevelExtent (dstSubresource.mipLevel , dstSubresource.aspectMask ) != dstExtent)
46184617 return false ;
46194618
4620- auto view = dstImage->createView (viewInfo);
4619+ clearRenderTarget (dstImage->createView (viewInfo),
4620+ srcSubresource.aspectMask , clear->clearValue , 0u );
4621+ return true ;
4622+ }
4623+
4624+
4625+ bool DxvkContext::copyImageInline (
4626+ DxvkImage& dstImage,
4627+ VkImageSubresourceLayers dstSubresource,
4628+ VkOffset3D dstOffset,
4629+ DxvkImage& srcImage,
4630+ VkImageSubresourceLayers srcSubresource,
4631+ VkOffset3D srcOffset,
4632+ VkExtent3D extent) {
4633+ if (!m_flags.test (DxvkContextFlag::GpRenderPassActive))
4634+ return false ;
4635+
4636+ // Ignore non-2D image due to extra complexity
4637+ if (dstImage.info ().type != VK_IMAGE_TYPE_2D
4638+ || srcImage.info ().type != VK_IMAGE_TYPE_2D )
4639+ return false ;
4640+
4641+ // We need to write a storage image, so ignore non-color images
4642+ if (dstSubresource.aspectMask != VK_IMAGE_ASPECT_COLOR_BIT
4643+ || srcSubresource.aspectMask != VK_IMAGE_ASPECT_COLOR_BIT )
4644+ return false ;
4645+
4646+ // Check whether the source image is bound as a color attachment
4647+ auto srcSubresourceRange = vk::makeSubresourceRange (srcSubresource);
4648+ int32_t colorAttachmentIndex = findColorAttachmentIndex (srcImage, srcSubresourceRange);
4649+
4650+ if (colorAttachmentIndex < 0 )
4651+ return false ;
4652+
4653+ // Destination must not be bound as a render target. We could technically
4654+ // support this by drawing to that render target, but things would get
4655+ // complicated real fast and no game actually seems to do that.
4656+ if (isBoundAsRenderTarget (dstImage, vk::makeSubresourceRange (dstSubresource)))
4657+ return false ;
4658+
4659+ // Ignore images with feedback loop usage since there are weird interactions.
4660+ if (srcImage.info ().usage & VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT )
4661+ return false ;
4662+
4663+ // Make sure we can actually hit all the fast paths
4664+ if (!m_device->features ().khrDynamicRenderingLocalRead .dynamicRenderingLocalRead
4665+ || !m_device->features ().khrMaintenance10 .maintenance10
4666+ || !srcImage.hasUnifiedLayout () || !dstImage.hasUnifiedLayout ())
4667+ return false ;
4668+
4669+ // We fake unified layouts on some GPUs, so we still need to ensure
4670+ // that we don't use the input attachment path with invalid layouts.
4671+ // That could happen with the feedback loop layout in some cases.
4672+ Rc<DxvkImageView> srcView = m_state.om .framebufferInfo .getColorTarget (colorAttachmentIndex).view ;
4673+
4674+ if (srcView->getLayout () != VK_IMAGE_LAYOUT_GENERAL )
4675+ return false ;
4676+
4677+ // Verify that the source region fits within the framebuffer
4678+ DxvkFramebufferSize fbSize = m_state.om .framebufferInfo .size ();
46214679
4622- deferClear (view, srcSubresource.aspectMask , clear->clearValue );
4680+ if (uint32_t (srcOffset.x + extent.width ) > fbSize.width
4681+ || uint32_t (srcOffset.y + extent.height ) > fbSize.height
4682+ || srcSubresource.baseArrayLayer + srcSubresource.layerCount > srcView->info ().layerIndex + fbSize.layers )
4683+ return false ;
4684+
4685+ // Modern hardware tends to not suffer from adding STORAGE_IMAGE
4686+ // usage to images, so just do that if unified layouts are supported
4687+ VkFormat srcFormat = srcView->info ().format ;
4688+ VkFormat dstFormat = getLinearFormat (srcFormat);
4689+
4690+ DxvkImageUsageInfo srcUsage = { };
4691+ srcUsage.usage |= VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT ;
4692+ srcUsage.stages |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT ;
4693+ srcUsage.access |= VK_ACCESS_INPUT_ATTACHMENT_READ_BIT ;
4694+
4695+ DxvkImageUsageInfo dstUsage = { };
4696+ dstUsage.usage |= VK_IMAGE_USAGE_STORAGE_BIT ;
4697+ dstUsage.stages |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT ;
4698+ dstUsage.access |= VK_ACCESS_SHADER_WRITE_BIT ;
4699+ dstUsage.viewFormatCount = 1u ;
4700+ dstUsage.viewFormats = &dstFormat;
4701+
4702+ if (dstImage.formatInfo ()->flags .test (DxvkFormatFlag::BlockCompressed)) {
4703+ dstUsage.flags |= VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT
4704+ | VK_IMAGE_CREATE_EXTENDED_USAGE_BIT_KHR ;
4705+
4706+ if (dstSubresource.layerCount > 1u && !m_device->properties ().khrMaintenance6 .blockTexelViewCompatibleMultipleLayers )
4707+ return false ;
4708+ }
4709+
4710+ if (!(dstImage.info ().usage & VK_IMAGE_USAGE_STORAGE_BIT )) {
4711+ auto formatFeatures = m_device->adapter ()->getFormatFeatures (dstFormat);
4712+ auto features = dstImage.info ().tiling == VK_IMAGE_TILING_LINEAR ? formatFeatures.linear : formatFeatures.optimal ;
4713+
4714+ if (!(features & VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT ))
4715+ return false ;
4716+ }
4717+
4718+ if (!ensureImageCompatibility (&dstImage, dstUsage)
4719+ || !ensureImageCompatibility (&srcImage, srcUsage))
4720+ return false ;
4721+
4722+ // Track access to the destination resource since it may be bound
4723+ // as a resource to graphics staders as well
4724+ if (!dstImage.trackGfxStores ())
4725+ return false ;
4726+
4727+ // Might have ended the render pass in the meantime. This is fine,
4728+ // just means that we'll end up hitting the fast path next time.
4729+ if (!m_flags.test (DxvkContextFlag::GpRenderPassActive))
4730+ return false ;
4731+
4732+ // Create actual storage image view to bind for the copy
4733+ DxvkImageViewKey key = { };
4734+ key.viewType = dstSubresource.layerCount > 1u
4735+ ? VK_IMAGE_VIEW_TYPE_2D_ARRAY
4736+ : VK_IMAGE_VIEW_TYPE_2D ;
4737+ key.usage = VK_IMAGE_USAGE_STORAGE_BIT ;
4738+ key.layout = VK_IMAGE_LAYOUT_GENERAL ;
4739+ key.format = dstFormat;
4740+ key.aspects = dstSubresource.aspectMask ;
4741+ key.layerIndex = dstSubresource.baseArrayLayer ;
4742+ key.layerCount = dstSubresource.layerCount ;
4743+ key.mipIndex = dstSubresource.mipLevel ;
4744+ key.mipCount = 1u ;
4745+
4746+ Rc<DxvkImageView> dstView = dstImage.createView (key);
4747+
4748+ // Check whether there are any hazards for the destination image,
4749+ // and track the write access as necessary.
4750+ if (resourceHasAccess (dstImage, dstSubresource, dstOffset, extent, DxvkAccess::Write, DxvkAccessOp::None)
4751+ || resourceHasAccess (dstImage, dstSubresource, dstOffset, extent, DxvkAccess::Read, DxvkAccessOp::None))
4752+ return false ;
4753+
4754+ accessImageRegion (DxvkCmdBuffer::ExecBuffer, dstImage, dstSubresource,
4755+ dstOffset, extent, dstView->info ().layout , VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT ,
4756+ VK_ACCESS_2_SHADER_WRITE_BIT , DxvkAccessOp::None);
4757+
4758+ m_cmd->track (&dstImage, DxvkAccess::Write);
4759+
4760+ // Flush pending clears for the source image that we want to copy from
4761+ if (findOverlappingDeferredClear (srcImage, srcSubresourceRange))
4762+ flushClearsInline ();
4763+
4764+ if (unlikely (m_features.test (DxvkContextFeature::DebugUtils))) {
4765+ const char * dstName = dstImage.info ().debugName ;
4766+ const char * srcName = srcImage.info ().debugName ;
4767+
4768+ m_cmd->cmdBeginDebugUtilsLabel (DxvkCmdBuffer::ExecBuffer,
4769+ vk::makeLabel (0xf0dcdc , str::format (" Copy image (" ,
4770+ dstName ? dstName : " unknown" , " , " ,
4771+ srcName ? srcName : " unknown" , " )" ).c_str ()));
4772+ }
4773+
4774+ // Get pipeline for the current render pass setup
4775+ DxvkMetaInputAttachmentImageCopy::Key pipelineKey = { };
4776+ pipelineKey.srcViewType = srcView->info ().viewType ;
4777+ pipelineKey.dstViewType = key.viewType ;
4778+ pipelineKey.dstFormat = key.format ;
4779+ pipelineKey.srcAttachment = colorAttachmentIndex;
4780+ pipelineKey.depthFormat = m_state.om .framebufferInfo .getDepthFormat ();
4781+
4782+ for (uint32_t i = 0u ; i < MaxNumRenderTargets; i++)
4783+ pipelineKey.colorFormats [i] = m_state.om .framebufferInfo .getColorFormat (i);
4784+
4785+ DxvkMetaInputAttachmentImageCopy pipeline = m_common->metaCopy ().getPipeline (pipelineKey);
4786+
4787+ if (!pipeline.pipeline )
4788+ return false ;
4789+
4790+ // Issue by-region barrier before the copy
4791+ VkImageMemoryBarrier2 barrier = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2 };
4792+ barrier.image = srcImage.handle ();
4793+ barrier.srcStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT ;
4794+ barrier.srcAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT
4795+ | VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT ;
4796+ barrier.dstStageMask = VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT ;
4797+ barrier.dstAccessMask = VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT ;
4798+ barrier.oldLayout = srcView->getLayout ();
4799+ barrier.newLayout = srcView->getLayout ();
4800+ barrier.subresourceRange = srcSubresourceRange;
4801+
4802+ VkDependencyInfo depInfo = { VK_STRUCTURE_TYPE_DEPENDENCY_INFO };
4803+ depInfo.dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT ;
4804+ depInfo.imageMemoryBarrierCount = 1u ;
4805+ depInfo.pImageMemoryBarriers = &barrier;
4806+
4807+ m_cmd->cmdPipelineBarrier (DxvkCmdBuffer::ExecBuffer, &depInfo);
4808+
4809+ // Invalidate pipeline state and perform the actual draw
4810+ unbindGraphicsPipeline ();
4811+
4812+ VkViewport viewport = { };
4813+ viewport.x = float (srcOffset.x );
4814+ viewport.y = float (srcOffset.y );
4815+ viewport.width = float (extent.width );
4816+ viewport.height = float (extent.height );
4817+ viewport.maxDepth = 1 .0f ;
4818+
4819+ VkRect2D scissor = { };
4820+ scissor.offset .x = srcOffset.x ;
4821+ scissor.offset .y = srcOffset.y ;
4822+ scissor.extent .width = extent.width ;
4823+ scissor.extent .height = extent.height ;
4824+
4825+ m_cmd->cmdSetViewport (1 , &viewport);
4826+ m_cmd->cmdSetScissor (1 , &scissor);
4827+
4828+ adjustRenderArea (scissor);
4829+
4830+ std::array<DxvkDescriptorWrite, 2u > descriptors = { };
4831+ descriptors[0 ].descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT ;
4832+ descriptors[0 ].descriptor = srcView->getDescriptor ();
4833+
4834+ descriptors[1 ].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE ;
4835+ descriptors[1 ].descriptor = dstView->getDescriptor ();
4836+
4837+ DxvkMetaInputAttachmentImageCopy::Args copyArgs = { };
4838+ copyArgs.srcOffset = VkOffset2D { srcOffset.x , srcOffset.y };
4839+ copyArgs.dstOffset = VkOffset2D { dstOffset.x , dstOffset.y };
4840+
4841+ if (dstImage.formatInfo ()->flags .test (DxvkFormatFlag::BlockCompressed)) {
4842+ copyArgs.dstOffset .x /= dstImage.formatInfo ()->blockSize .width ;
4843+ copyArgs.dstOffset .y /= dstImage.formatInfo ()->blockSize .height ;
4844+ }
4845+
4846+ m_cmd->cmdBindPipeline (DxvkCmdBuffer::ExecBuffer,
4847+ VK_PIPELINE_BIND_POINT_GRAPHICS , pipeline.pipeline );
4848+
4849+ m_cmd->bindResources (DxvkCmdBuffer::ExecBuffer, pipeline.layout ,
4850+ descriptors.size (), descriptors.data (), 0u , nullptr );
4851+
4852+ for (uint32_t i = 0u ; i < dstSubresource.layerCount ; i++) {
4853+ copyArgs.srcLayer = i + srcSubresource.baseArrayLayer - srcView->info ().layerIndex ;
4854+ copyArgs.dstLayer = i + dstSubresource.baseArrayLayer ;
4855+
4856+ m_cmd->bindResources (DxvkCmdBuffer::ExecBuffer, pipeline.layout ,
4857+ 0u , nullptr , sizeof (copyArgs), ©Args);
4858+ m_cmd->cmdDraw (3u , srcSubresource.layerCount , 0u , 1u );
4859+ }
4860+
4861+ // Issue by-region barrier after the copy and before subsequent rendering
4862+ std::swap (barrier.srcStageMask , barrier.dstStageMask );
4863+ std::swap (barrier.srcAccessMask , barrier.dstAccessMask );
4864+
4865+ m_cmd->cmdPipelineBarrier (DxvkCmdBuffer::ExecBuffer, &depInfo);
4866+
4867+ if (unlikely (m_features.test (DxvkContextFeature::DebugUtils)))
4868+ m_cmd->cmdEndDebugUtilsLabel (DxvkCmdBuffer::ExecBuffer);
4869+
4870+ m_renderPassBarrierSrc.stages |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT ;
4871+ m_renderPassBarrierSrc.access |= VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INPUT_ATTACHMENT_READ_BIT ;
4872+
4873+ m_state.om .attachmentMask .trackColorRead (colorAttachmentIndex);
4874+
4875+ m_flags.set (DxvkContextFlag::GpRenderPassSideEffects);
46234876 return true ;
46244877 }
46254878
@@ -6350,7 +6603,8 @@ namespace dxvk {
63506603 DxvkContextFlag::GpDirtyDepthBias,
63516604 DxvkContextFlag::GpDirtyDepthBounds,
63526605 DxvkContextFlag::GpDirtyDepthClip,
6353- DxvkContextFlag::GpDirtyDepthTest);
6606+ DxvkContextFlag::GpDirtyDepthTest,
6607+ DxvkContextFlag::GpDirtySpecConstants);
63546608
63556609 m_flags.clr (DxvkContextFlag::GpHasPushData);
63566610
@@ -7272,6 +7526,26 @@ namespace dxvk {
72727526 }
72737527
72747528
7529+ int32_t DxvkContext::findColorAttachmentIndex (
7530+ const DxvkImage& image,
7531+ const VkImageSubresourceRange& subresources) {
7532+ for (uint32_t i = 0u ; i < MaxNumRenderTargets; i++) {
7533+ const auto & attachment = m_state.om .framebufferInfo .getColorTarget (i).view ;
7534+
7535+ if (!attachment || attachment->image () != &image)
7536+ continue ;
7537+
7538+ auto viewSubresources = attachment->imageSubresources ();
7539+
7540+ if ((viewSubresources.aspectMask & subresources.aspectMask ) == subresources.aspectMask
7541+ && vk::checkSubresourceRangeSuperset (viewSubresources, subresources))
7542+ return int32_t (i);
7543+ }
7544+
7545+ return -1 ;
7546+ }
7547+
7548+
72757549 void DxvkContext::updateIndexBufferBinding () {
72767550 m_flags.clr (DxvkContextFlag::GpDirtyIndexBuffer);
72777551
0 commit comments