Skip to content

Commit 6fc422b

Browse files
committed
[dxvk] Try to use draw with input attachments to implement image copies
Potential tiler optimization. Many games run into this in one way or another. Needs maintenance10 to not regress performance.
1 parent 64e42ea commit 6fc422b

3 files changed

Lines changed: 317 additions & 6 deletions

File tree

src/dxvk/dxvk_context.cpp

Lines changed: 280 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -503,7 +503,8 @@ namespace dxvk {
503503
VkImageSubresourceLayers srcSubresource,
504504
VkOffset3D srcOffset,
505505
VkExtent3D extent) {
506-
if (this->copyImageClear(dstImage, dstSubresource, dstOffset, extent, srcImage, srcSubresource))
506+
if (this->copyImageClear(dstImage, dstSubresource, dstOffset, extent, srcImage, srcSubresource)
507+
|| this->copyImageInline(*dstImage, dstSubresource, dstOffset, *srcImage, srcSubresource, srcOffset, extent))
507508
return;
508509

509510
bool useFb = !formatsAreImageCopyCompatible(dstImage->info().format, srcImage->info().format);
@@ -4572,8 +4573,6 @@ namespace dxvk {
45724573
VkExtent3D dstExtent,
45734574
const Rc<DxvkImage>& srcImage,
45744575
VkImageSubresourceLayers srcSubresource) {
4575-
this->endCurrentPass(true);
4576-
45774576
// If the source image has a pending deferred clear, we can
45784577
// implement the copy by clearing the destination image to
45794578
// the same clear value.
@@ -4617,9 +4616,263 @@ namespace dxvk {
46174616
if (dstImage->mipLevelExtent(dstSubresource.mipLevel, dstSubresource.aspectMask) != dstExtent)
46184617
return false;
46194618

4620-
auto view = dstImage->createView(viewInfo);
4619+
clearRenderTarget(dstImage->createView(viewInfo),
4620+
srcSubresource.aspectMask, clear->clearValue, 0u);
4621+
return true;
4622+
}
4623+
4624+
4625+
bool DxvkContext::copyImageInline(
4626+
DxvkImage& dstImage,
4627+
VkImageSubresourceLayers dstSubresource,
4628+
VkOffset3D dstOffset,
4629+
DxvkImage& srcImage,
4630+
VkImageSubresourceLayers srcSubresource,
4631+
VkOffset3D srcOffset,
4632+
VkExtent3D extent) {
4633+
if (!m_flags.test(DxvkContextFlag::GpRenderPassActive))
4634+
return false;
4635+
4636+
// Ignore non-2D image due to extra complexity
4637+
if (dstImage.info().type != VK_IMAGE_TYPE_2D
4638+
|| srcImage.info().type != VK_IMAGE_TYPE_2D)
4639+
return false;
4640+
4641+
// We need to write a storage image, so ignore non-color images
4642+
if (dstSubresource.aspectMask != VK_IMAGE_ASPECT_COLOR_BIT
4643+
|| srcSubresource.aspectMask != VK_IMAGE_ASPECT_COLOR_BIT)
4644+
return false;
4645+
4646+
// Check whether the source image is bound as a color attachment
4647+
auto srcSubresourceRange = vk::makeSubresourceRange(srcSubresource);
4648+
int32_t colorAttachmentIndex = findColorAttachmentIndex(srcImage, srcSubresourceRange);
4649+
4650+
if (colorAttachmentIndex < 0)
4651+
return false;
4652+
4653+
// Destination must not be bound as a render target. We could technically
4654+
// support this by drawing to that render target, but things would get
4655+
// complicated real fast and no game actually seems to do that.
4656+
if (isBoundAsRenderTarget(dstImage, vk::makeSubresourceRange(dstSubresource)))
4657+
return false;
4658+
4659+
// Ignore images with feedback loop usage since there are weird interactions.
4660+
if (srcImage.info().usage & VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT)
4661+
return false;
4662+
4663+
// Make sure we can actually hit all the fast paths
4664+
if (!m_device->features().khrDynamicRenderingLocalRead.dynamicRenderingLocalRead
4665+
|| !m_device->features().khrMaintenance10.maintenance10
4666+
|| !srcImage.hasUnifiedLayout() || !dstImage.hasUnifiedLayout())
4667+
return false;
4668+
4669+
// We fake unified layouts on some GPUs, so we still need to ensure
4670+
// that we don't use the input attachment path with invalid layouts.
4671+
// That could happen with the feedback loop layout in some cases.
4672+
Rc<DxvkImageView> srcView = m_state.om.framebufferInfo.getColorTarget(colorAttachmentIndex).view;
4673+
4674+
if (srcView->getLayout() != VK_IMAGE_LAYOUT_GENERAL)
4675+
return false;
4676+
4677+
// Verify that the source region fits within the framebuffer
4678+
DxvkFramebufferSize fbSize = m_state.om.framebufferInfo.size();
46214679

4622-
deferClear(view, srcSubresource.aspectMask, clear->clearValue);
4680+
if (uint32_t(srcOffset.x + extent.width) > fbSize.width
4681+
|| uint32_t(srcOffset.y + extent.height) > fbSize.height
4682+
|| srcSubresource.baseArrayLayer + srcSubresource.layerCount > srcView->info().layerIndex + fbSize.layers)
4683+
return false;
4684+
4685+
// Modern hardware tends to not suffer from adding STORAGE_IMAGE
4686+
// usage to images, so just do that if unified layouts are supported
4687+
VkFormat srcFormat = srcView->info().format;
4688+
VkFormat dstFormat = getLinearFormat(srcFormat);
4689+
4690+
DxvkImageUsageInfo srcUsage = { };
4691+
srcUsage.usage |= VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT;
4692+
srcUsage.stages |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
4693+
srcUsage.access |= VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
4694+
4695+
DxvkImageUsageInfo dstUsage = { };
4696+
dstUsage.usage |= VK_IMAGE_USAGE_STORAGE_BIT;
4697+
dstUsage.stages |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
4698+
dstUsage.access |= VK_ACCESS_SHADER_WRITE_BIT;
4699+
dstUsage.viewFormatCount = 1u;
4700+
dstUsage.viewFormats = &dstFormat;
4701+
4702+
if (dstImage.formatInfo()->flags.test(DxvkFormatFlag::BlockCompressed)) {
4703+
dstUsage.flags |= VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT
4704+
| VK_IMAGE_CREATE_EXTENDED_USAGE_BIT_KHR;
4705+
4706+
if (dstSubresource.layerCount > 1u && !m_device->properties().khrMaintenance6.blockTexelViewCompatibleMultipleLayers)
4707+
return false;
4708+
}
4709+
4710+
if (!(dstImage.info().usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
4711+
auto formatFeatures = m_device->adapter()->getFormatFeatures(dstFormat);
4712+
auto features = dstImage.info().tiling == VK_IMAGE_TILING_LINEAR ? formatFeatures.linear : formatFeatures.optimal;
4713+
4714+
if (!(features & VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT))
4715+
return false;
4716+
}
4717+
4718+
if (!ensureImageCompatibility(&dstImage, dstUsage)
4719+
|| !ensureImageCompatibility(&srcImage, srcUsage))
4720+
return false;
4721+
4722+
// Track access to the destination resource since it may be bound
4723+
// as a resource to graphics staders as well
4724+
if (!dstImage.trackGfxStores())
4725+
return false;
4726+
4727+
// Might have ended the render pass in the meantime. This is fine,
4728+
// just means that we'll end up hitting the fast path next time.
4729+
if (!m_flags.test(DxvkContextFlag::GpRenderPassActive))
4730+
return false;
4731+
4732+
// Create actual storage image view to bind for the copy
4733+
DxvkImageViewKey key = { };
4734+
key.viewType = dstSubresource.layerCount > 1u
4735+
? VK_IMAGE_VIEW_TYPE_2D_ARRAY
4736+
: VK_IMAGE_VIEW_TYPE_2D;
4737+
key.usage = VK_IMAGE_USAGE_STORAGE_BIT;
4738+
key.layout = VK_IMAGE_LAYOUT_GENERAL;
4739+
key.format = dstFormat;
4740+
key.aspects = dstSubresource.aspectMask;
4741+
key.layerIndex = dstSubresource.baseArrayLayer;
4742+
key.layerCount = dstSubresource.layerCount;
4743+
key.mipIndex = dstSubresource.mipLevel;
4744+
key.mipCount = 1u;
4745+
4746+
Rc<DxvkImageView> dstView = dstImage.createView(key);
4747+
4748+
// Check whether there are any hazards for the destination image,
4749+
// and track the write access as necessary.
4750+
if (resourceHasAccess(dstImage, dstSubresource, dstOffset, extent, DxvkAccess::Write, DxvkAccessOp::None)
4751+
|| resourceHasAccess(dstImage, dstSubresource, dstOffset, extent, DxvkAccess::Read, DxvkAccessOp::None))
4752+
return false;
4753+
4754+
accessImageRegion(DxvkCmdBuffer::ExecBuffer, dstImage, dstSubresource,
4755+
dstOffset, extent, dstView->info().layout, VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT,
4756+
VK_ACCESS_2_SHADER_WRITE_BIT, DxvkAccessOp::None);
4757+
4758+
m_cmd->track(&dstImage, DxvkAccess::Write);
4759+
4760+
// Flush pending clears for the source image that we want to copy from
4761+
if (findOverlappingDeferredClear(srcImage, srcSubresourceRange))
4762+
flushClearsInline();
4763+
4764+
if (unlikely(m_features.test(DxvkContextFeature::DebugUtils))) {
4765+
const char* dstName = dstImage.info().debugName;
4766+
const char* srcName = srcImage.info().debugName;
4767+
4768+
m_cmd->cmdBeginDebugUtilsLabel(DxvkCmdBuffer::ExecBuffer,
4769+
vk::makeLabel(0xf0dcdc, str::format("Copy image (",
4770+
dstName ? dstName : "unknown", ", ",
4771+
srcName ? srcName : "unknown", ")").c_str()));
4772+
}
4773+
4774+
// Get pipeline for the current render pass setup
4775+
DxvkMetaInputAttachmentImageCopy::Key pipelineKey = { };
4776+
pipelineKey.srcViewType = srcView->info().viewType;
4777+
pipelineKey.dstViewType = key.viewType;
4778+
pipelineKey.dstFormat = key.format;
4779+
pipelineKey.srcAttachment = colorAttachmentIndex;
4780+
pipelineKey.depthFormat = m_state.om.framebufferInfo.getDepthFormat();
4781+
4782+
for (uint32_t i = 0u; i < MaxNumRenderTargets; i++)
4783+
pipelineKey.colorFormats[i] = m_state.om.framebufferInfo.getColorFormat(i);
4784+
4785+
DxvkMetaInputAttachmentImageCopy pipeline = m_common->metaCopy().getPipeline(pipelineKey);
4786+
4787+
if (!pipeline.pipeline)
4788+
return false;
4789+
4790+
// Issue by-region barrier before the copy
4791+
VkImageMemoryBarrier2 barrier = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2 };
4792+
barrier.image = srcImage.handle();
4793+
barrier.srcStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
4794+
barrier.srcAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT
4795+
| VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT;
4796+
barrier.dstStageMask = VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT;
4797+
barrier.dstAccessMask = VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT;
4798+
barrier.oldLayout = srcView->getLayout();
4799+
barrier.newLayout = srcView->getLayout();
4800+
barrier.subresourceRange = srcSubresourceRange;
4801+
4802+
VkDependencyInfo depInfo = { VK_STRUCTURE_TYPE_DEPENDENCY_INFO };
4803+
depInfo.dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
4804+
depInfo.imageMemoryBarrierCount = 1u;
4805+
depInfo.pImageMemoryBarriers = &barrier;
4806+
4807+
m_cmd->cmdPipelineBarrier(DxvkCmdBuffer::ExecBuffer, &depInfo);
4808+
4809+
// Invalidate pipeline state and perform the actual draw
4810+
unbindGraphicsPipeline();
4811+
4812+
VkViewport viewport = { };
4813+
viewport.x = float(srcOffset.x);
4814+
viewport.y = float(srcOffset.y);
4815+
viewport.width = float(extent.width);
4816+
viewport.height = float(extent.height);
4817+
viewport.maxDepth = 1.0f;
4818+
4819+
VkRect2D scissor = { };
4820+
scissor.offset.x = srcOffset.x;
4821+
scissor.offset.y = srcOffset.y;
4822+
scissor.extent.width = extent.width;
4823+
scissor.extent.height = extent.height;
4824+
4825+
m_cmd->cmdSetViewport(1, &viewport);
4826+
m_cmd->cmdSetScissor(1, &scissor);
4827+
4828+
adjustRenderArea(scissor);
4829+
4830+
std::array<DxvkDescriptorWrite, 2u> descriptors = { };
4831+
descriptors[0].descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
4832+
descriptors[0].descriptor = srcView->getDescriptor();
4833+
4834+
descriptors[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
4835+
descriptors[1].descriptor = dstView->getDescriptor();
4836+
4837+
DxvkMetaInputAttachmentImageCopy::Args copyArgs = { };
4838+
copyArgs.srcOffset = VkOffset2D { srcOffset.x, srcOffset.y };
4839+
copyArgs.dstOffset = VkOffset2D { dstOffset.x, dstOffset.y };
4840+
4841+
if (dstImage.formatInfo()->flags.test(DxvkFormatFlag::BlockCompressed)) {
4842+
copyArgs.dstOffset.x /= dstImage.formatInfo()->blockSize.width;
4843+
copyArgs.dstOffset.y /= dstImage.formatInfo()->blockSize.height;
4844+
}
4845+
4846+
m_cmd->cmdBindPipeline(DxvkCmdBuffer::ExecBuffer,
4847+
VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.pipeline);
4848+
4849+
m_cmd->bindResources(DxvkCmdBuffer::ExecBuffer, pipeline.layout,
4850+
descriptors.size(), descriptors.data(), 0u, nullptr);
4851+
4852+
for (uint32_t i = 0u; i < dstSubresource.layerCount; i++) {
4853+
copyArgs.srcLayer = i + srcSubresource.baseArrayLayer - srcView->info().layerIndex;
4854+
copyArgs.dstLayer = i + dstSubresource.baseArrayLayer;
4855+
4856+
m_cmd->bindResources(DxvkCmdBuffer::ExecBuffer, pipeline.layout,
4857+
0u, nullptr, sizeof(copyArgs), &copyArgs);
4858+
m_cmd->cmdDraw(3u, srcSubresource.layerCount, 0u, 1u);
4859+
}
4860+
4861+
// Issue by-region barrier after the copy and before subsequent rendering
4862+
std::swap(barrier.srcStageMask, barrier.dstStageMask);
4863+
std::swap(barrier.srcAccessMask, barrier.dstAccessMask);
4864+
4865+
m_cmd->cmdPipelineBarrier(DxvkCmdBuffer::ExecBuffer, &depInfo);
4866+
4867+
if (unlikely(m_features.test(DxvkContextFeature::DebugUtils)))
4868+
m_cmd->cmdEndDebugUtilsLabel(DxvkCmdBuffer::ExecBuffer);
4869+
4870+
m_renderPassBarrierSrc.stages |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
4871+
m_renderPassBarrierSrc.access |= VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
4872+
4873+
m_state.om.attachmentMask.trackColorRead(colorAttachmentIndex);
4874+
4875+
m_flags.set(DxvkContextFlag::GpRenderPassSideEffects);
46234876
return true;
46244877
}
46254878

@@ -6350,7 +6603,8 @@ namespace dxvk {
63506603
DxvkContextFlag::GpDirtyDepthBias,
63516604
DxvkContextFlag::GpDirtyDepthBounds,
63526605
DxvkContextFlag::GpDirtyDepthClip,
6353-
DxvkContextFlag::GpDirtyDepthTest);
6606+
DxvkContextFlag::GpDirtyDepthTest,
6607+
DxvkContextFlag::GpDirtySpecConstants);
63546608

63556609
m_flags.clr(DxvkContextFlag::GpHasPushData);
63566610

@@ -7272,6 +7526,26 @@ namespace dxvk {
72727526
}
72737527

72747528

7529+
int32_t DxvkContext::findColorAttachmentIndex(
7530+
const DxvkImage& image,
7531+
const VkImageSubresourceRange& subresources) {
7532+
for (uint32_t i = 0u; i < MaxNumRenderTargets; i++) {
7533+
const auto& attachment = m_state.om.framebufferInfo.getColorTarget(i).view;
7534+
7535+
if (!attachment || attachment->image() != &image)
7536+
continue;
7537+
7538+
auto viewSubresources = attachment->imageSubresources();
7539+
7540+
if ((viewSubresources.aspectMask & subresources.aspectMask) == subresources.aspectMask
7541+
&& vk::checkSubresourceRangeSuperset(viewSubresources, subresources))
7542+
return int32_t(i);
7543+
}
7544+
7545+
return -1;
7546+
}
7547+
7548+
72757549
void DxvkContext::updateIndexBufferBinding() {
72767550
m_flags.clr(DxvkContextFlag::GpDirtyIndexBuffer);
72777551

src/dxvk/dxvk_context.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1506,6 +1506,15 @@ namespace dxvk {
15061506
const Rc<DxvkImage>& srcImage,
15071507
VkImageSubresourceLayers srcSubresource);
15081508

1509+
bool copyImageInline(
1510+
DxvkImage& dstImage,
1511+
VkImageSubresourceLayers dstSubresource,
1512+
VkOffset3D dstOffset,
1513+
DxvkImage& srcImage,
1514+
VkImageSubresourceLayers srcSubresource,
1515+
VkOffset3D srcOffset,
1516+
VkExtent3D extent);
1517+
15091518
template<bool ToBuffer>
15101519
void copySparsePages(
15111520
const Rc<DxvkPagedResource>& sparse,
@@ -1743,6 +1752,10 @@ namespace dxvk {
17431752
const DxvkImage& image,
17441753
const VkImageSubresourceRange& subresources);
17451754

1755+
int32_t findColorAttachmentIndex(
1756+
const DxvkImage& image,
1757+
const VkImageSubresourceRange& subresources);
1758+
17461759
void updateIndexBufferBinding();
17471760
void updateVertexBufferBindings();
17481761

src/dxvk/dxvk_framebuffer.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,18 @@ namespace dxvk {
125125
return m_renderTargets.depth;
126126
}
127127

128+
/**
129+
* \brief Queries depth-stencil format
130+
*
131+
* \param [in] id Target Index
132+
* \returns The depth-stencil format
133+
*/
134+
VkFormat getDepthFormat() const {
135+
return getDepthTarget().view
136+
? getDepthTarget().view->info().format
137+
: VK_FORMAT_UNDEFINED;
138+
}
139+
128140
/**
129141
* \brief Color target
130142
*
@@ -135,6 +147,18 @@ namespace dxvk {
135147
return m_renderTargets.color[id];
136148
}
137149

150+
/**
151+
* \brief Queries color format
152+
*
153+
* \param [in] id Target Index
154+
* \returns The color target format
155+
*/
156+
VkFormat getColorFormat(uint32_t id) const {
157+
return getColorTarget(id).view
158+
? getColorTarget(id).view->info().format
159+
: VK_FORMAT_UNDEFINED;
160+
}
161+
138162
/**
139163
* \brief Number of framebuffer attachment
140164
* \returns Total attachment count

0 commit comments

Comments
 (0)