Skip to content

Commit f1bf932

Browse files
committed
[Vulkan] Implement initial support for sparse resources
This commit introduces the ability to create and test sparse resources in the Vulkan backend. Key changes: - **Vulkan Backend**: Updated the Vulkan implementation to support sparse binding and residency for buffers. - **Test Infrastructure**: Enhanced test/lit.cfg.py to automatically detect and expose all boolean Vulkan features from api-query as lit features. This enables tests to use REQUIRE for specific hardware capabilities like sparseBinding and sparseResidencyBuffer. - **New Tests**: Added a suite of feature tests in test/Feature/Sparse/ to verify sparse resource behavior across different buffer types, including residency checks and partially mapped resource loads. This provides the necessary framework to validate sparse resource handling and residency behavior in HLSL-to-Vulkan pipelines.
1 parent 9cdb2b0 commit f1bf932

File tree

8 files changed

+673
-41
lines changed

8 files changed

+673
-41
lines changed

lib/API/VK/Device.cpp

Lines changed: 212 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,24 @@
1414
#include "llvm/ADT/DenseSet.h"
1515
#include "llvm/Support/Error.h"
1616

17+
#include <limits>
1718
#include <memory>
1819
#include <numeric>
1920
#include <system_error>
2021
#include <vulkan/vulkan.h>
2122

2223
using namespace offloadtest;
2324

25+
// We use 64KB tile size because DX has a fixed tile size, and the offload test
26+
// suite must work for all APIs.
27+
constexpr uint32_t SparseBufferTileSize = 65536;
28+
29+
static uint32_t getNumTiles(std::optional<uint32_t> NumTiles, size_t Size) {
30+
if (NumTiles)
31+
return *NumTiles;
32+
return (Size + SparseBufferTileSize - 1) / SparseBufferTileSize;
33+
}
34+
2435
#define VKFormats(FMT, BITS) \
2536
if (Channels == 1) \
2637
return VK_FORMAT_R##BITS##_##FMT; \
@@ -58,6 +69,7 @@ static VkFormat getVKFormat(DataFormat Format, int Channels) {
5869
static VkDescriptorType getDescriptorType(const ResourceKind RK) {
5970
switch (RK) {
6071
case ResourceKind::Buffer:
72+
return VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
6173
case ResourceKind::RWBuffer:
6274
return VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
6375
case ResourceKind::Texture2D:
@@ -343,6 +355,7 @@ class VKDevice : public offloadtest::Device {
343355
struct InvocationState {
344356
VkDevice Device;
345357
VkQueue Queue;
358+
VkQueue SparseQueue = VK_NULL_HANDLE;
346359
VkCommandPool CmdPool;
347360
VkCommandBuffer CmdBuffer;
348361
VkPipelineLayout PipelineLayout;
@@ -537,6 +550,18 @@ class VKDevice : public offloadtest::Device {
537550
Extensions.data());
538551
}
539552

553+
static constexpr uint32_t InvalidQueueIndex =
554+
std::numeric_limits<uint32_t>::max();
555+
556+
uint32_t findQueue(const VkQueueFamilyProperties *Props, uint32_t Count,
557+
VkQueueFlags Flags) {
558+
for (uint32_t I = 0; I < Count; ++I) {
559+
if ((Props[I].queueFlags & Flags) == Flags)
560+
return I;
561+
}
562+
return InvalidQueueIndex;
563+
}
564+
540565
public:
541566
llvm::Error createDevice(InvocationState &IS) {
542567

@@ -552,33 +577,52 @@ class VKDevice : public offloadtest::Device {
552577
vkGetPhysicalDeviceQueueFamilyProperties(Device, &QueueCount,
553578
QueueFamilyProps.get());
554579

555-
int SelectedIdx = -1;
556-
for (uint32_t I = 0; I < QueueCount; ++I) {
557-
const VkQueueFlags Flags = QueueFamilyProps[I].queueFlags;
558-
// Prefer family supporting both GRAPHICS and COMPUTE
559-
if ((Flags & VK_QUEUE_GRAPHICS_BIT) && (Flags & VK_QUEUE_COMPUTE_BIT)) {
560-
SelectedIdx = static_cast<int>(I);
561-
break;
562-
}
580+
uint32_t MainQueueIdx =
581+
findQueue(QueueFamilyProps.get(), QueueCount,
582+
VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT |
583+
VK_QUEUE_SPARSE_BINDING_BIT);
584+
uint32_t SparseQueueIdx = MainQueueIdx;
585+
586+
// If not found, find separate queues
587+
if (MainQueueIdx == InvalidQueueIndex) {
588+
MainQueueIdx = findQueue(QueueFamilyProps.get(), QueueCount,
589+
VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT);
590+
SparseQueueIdx = findQueue(QueueFamilyProps.get(), QueueCount,
591+
VK_QUEUE_SPARSE_BINDING_BIT);
563592
}
564593

565-
if (SelectedIdx == -1)
566-
return llvm::createStringError(std::errc::no_such_device,
567-
"No suitable queue family found.");
594+
if (MainQueueIdx == InvalidQueueIndex)
595+
return llvm::createStringError(
596+
std::errc::no_such_device,
597+
"No suitable queue family found for graphics and compute.");
568598

569-
const uint32_t QueueIdx = static_cast<uint32_t>(SelectedIdx);
599+
if (SparseQueueIdx == InvalidQueueIndex)
600+
return llvm::createStringError(
601+
std::errc::no_such_device,
602+
"No suitable queue family found for sparse "
603+
"binding.");
570604

571-
VkDeviceQueueCreateInfo QueueInfo = {};
572605
const float QueuePriority = 1.0f;
573-
QueueInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
574-
QueueInfo.queueFamilyIndex = QueueIdx;
575-
QueueInfo.queueCount = 1;
576-
QueueInfo.pQueuePriorities = &QueuePriority;
606+
std::vector<VkDeviceQueueCreateInfo> QueueCreateInfos;
607+
608+
auto AddQueueCreateInfo = [&](uint32_t QFamilyIndex) {
609+
VkDeviceQueueCreateInfo QueueCreateInfo = {};
610+
QueueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
611+
QueueCreateInfo.queueFamilyIndex = QFamilyIndex;
612+
QueueCreateInfo.queueCount = 1;
613+
QueueCreateInfo.pQueuePriorities = &QueuePriority;
614+
QueueCreateInfos.push_back(QueueCreateInfo);
615+
};
616+
617+
AddQueueCreateInfo(MainQueueIdx);
618+
if (MainQueueIdx != SparseQueueIdx)
619+
AddQueueCreateInfo(SparseQueueIdx);
577620

578621
VkDeviceCreateInfo DeviceInfo = {};
579622
DeviceInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
580-
DeviceInfo.queueCreateInfoCount = 1;
581-
DeviceInfo.pQueueCreateInfos = &QueueInfo;
623+
DeviceInfo.queueCreateInfoCount =
624+
static_cast<uint32_t>(QueueCreateInfos.size());
625+
DeviceInfo.pQueueCreateInfos = QueueCreateInfos.data();
582626

583627
VkPhysicalDeviceFeatures2 Features{};
584628
Features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
@@ -610,11 +654,12 @@ class VKDevice : public offloadtest::Device {
610654
if (vkCreateDevice(Device, &DeviceInfo, nullptr, &IS.Device))
611655
return llvm::createStringError(std::errc::no_such_device,
612656
"Could not create Vulkan logical device.");
613-
vkGetDeviceQueue(IS.Device, QueueIdx, 0, &IS.Queue);
657+
vkGetDeviceQueue(IS.Device, MainQueueIdx, 0, &IS.Queue);
658+
vkGetDeviceQueue(IS.Device, SparseQueueIdx, 0, &IS.SparseQueue);
614659

615660
VkCommandPoolCreateInfo CmdPoolInfo = {};
616661
CmdPoolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
617-
CmdPoolInfo.queueFamilyIndex = QueueIdx;
662+
CmdPoolInfo.queueFamilyIndex = MainQueueIdx;
618663
CmdPoolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
619664

620665
if (vkCreateCommandPool(IS.Device, &CmdPoolInfo, nullptr, &IS.CmdPool))
@@ -640,38 +685,58 @@ class VKDevice : public offloadtest::Device {
640685
return llvm::Error::success();
641686
}
642687

643-
llvm::Expected<BufferRef> createBuffer(InvocationState &IS,
644-
VkBufferUsageFlags Usage,
645-
VkMemoryPropertyFlags MemoryFlags,
646-
size_t Size, void *Data = nullptr) {
688+
llvm::Expected<VkBuffer> createVkBuffer(VkDevice Device, size_t Size,
689+
VkBufferUsageFlags Usage,
690+
VkBufferCreateFlags Flags = 0) {
647691
VkBuffer Buffer;
648-
VkDeviceMemory Memory;
649692
VkBufferCreateInfo BufferInfo = {};
650693
BufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
651694
BufferInfo.size = Size;
652695
BufferInfo.usage = Usage;
653696
BufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
697+
BufferInfo.flags = Flags;
654698

655-
if (vkCreateBuffer(IS.Device, &BufferInfo, nullptr, &Buffer))
699+
if (vkCreateBuffer(Device, &BufferInfo, nullptr, &Buffer))
656700
return llvm::createStringError(std::errc::not_enough_memory,
657701
"Could not create buffer.");
702+
return Buffer;
703+
}
658704

659-
VkMemoryRequirements MemReqs;
660-
vkGetBufferMemoryRequirements(IS.Device, Buffer, &MemReqs);
705+
llvm::Expected<VkDeviceMemory>
706+
allocateMemory(VkDevice Device, VkDeviceSize Size, uint32_t MemoryTypeIndex) {
661707
VkMemoryAllocateInfo AllocInfo = {};
662708
AllocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
663-
AllocInfo.allocationSize = MemReqs.size;
709+
AllocInfo.allocationSize = Size;
710+
AllocInfo.memoryTypeIndex = MemoryTypeIndex;
711+
VkDeviceMemory Memory;
712+
if (vkAllocateMemory(Device, &AllocInfo, nullptr, &Memory))
713+
return llvm::createStringError(std::errc::not_enough_memory,
714+
"Memory allocation failed.");
715+
return Memory;
716+
}
717+
718+
llvm::Expected<BufferRef> createBuffer(InvocationState &IS,
719+
VkBufferUsageFlags Usage,
720+
VkMemoryPropertyFlags MemoryFlags,
721+
size_t Size, void *Data = nullptr) {
722+
auto ExBuffer = createVkBuffer(IS.Device, Size, Usage);
723+
if (!ExBuffer)
724+
return ExBuffer.takeError();
725+
VkBuffer Buffer = *ExBuffer;
726+
727+
VkMemoryRequirements MemReqs;
728+
vkGetBufferMemoryRequirements(IS.Device, Buffer, &MemReqs);
664729

665730
llvm::Expected<uint32_t> MemIdx =
666731
getMemoryIndex(Device, MemReqs.memoryTypeBits, MemoryFlags);
667732
if (!MemIdx)
668733
return MemIdx.takeError();
669734

670-
AllocInfo.memoryTypeIndex = *MemIdx;
735+
auto ExMemory = allocateMemory(IS.Device, MemReqs.size, *MemIdx);
736+
if (!ExMemory)
737+
return ExMemory.takeError();
738+
VkDeviceMemory Memory = *ExMemory;
671739

672-
if (vkAllocateMemory(IS.Device, &AllocInfo, nullptr, &Memory))
673-
return llvm::createStringError(std::errc::not_enough_memory,
674-
"Memory allocation failed.");
675740
if (Data) {
676741
void *Dst = nullptr;
677742
if (vkMapMemory(IS.Device, Memory, 0, Size, 0, &Dst))
@@ -777,6 +842,18 @@ class VKDevice : public offloadtest::Device {
777842
return ResourceRef(Host, ImageRef{0, Sampler, 0});
778843
}
779844

845+
VkDeviceSize getCopySize(InvocationState &IS, Resource &R, VkBuffer Buffer) {
846+
VkDeviceSize CopySize = R.size();
847+
if (R.IsReserved) {
848+
VkDeviceSize MappedSize =
849+
static_cast<VkDeviceSize>(getNumTiles(R.TilesMapped, R.size())) *
850+
SparseBufferTileSize;
851+
if (CopySize > MappedSize)
852+
CopySize = MappedSize;
853+
}
854+
return CopySize;
855+
}
856+
780857
llvm::Error createResource(Resource &R, InvocationState &IS) {
781858
// Samplers don't have backing data buffers, so handle them separately
782859
if (R.isSampler()) {
@@ -790,6 +867,11 @@ class VKDevice : public offloadtest::Device {
790867
return llvm::Error::success();
791868
}
792869

870+
if (!R.BufferPtr)
871+
return llvm::createStringError(std::errc::invalid_argument,
872+
"Resource '%s' has no backing buffer.",
873+
R.Name.c_str());
874+
793875
ResourceBundle Bundle{getDescriptorType(R.Kind), R.size(), R.BufferPtr};
794876
for (auto &ResData : R.BufferPtr->Data) {
795877
auto ExHostBuf = createBuffer(
@@ -805,15 +887,25 @@ class VKDevice : public offloadtest::Device {
805887
return ExImageRef.takeError();
806888
Bundle.ResourceRefs.push_back(*ExImageRef);
807889
} else {
808-
auto ExDeviceBuf = createBuffer(
809-
IS,
810-
getFlagBits(R.Kind) | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
811-
VK_BUFFER_USAGE_TRANSFER_DST_BIT,
812-
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, R.size());
890+
llvm::Expected<BufferRef> ExDeviceBuf =
891+
R.IsReserved
892+
? createSparseBuffer(
893+
IS,
894+
getFlagBits(R.Kind) | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
895+
VK_BUFFER_USAGE_TRANSFER_DST_BIT,
896+
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, R.size(),
897+
getNumTiles(R.TilesMapped, R.size()))
898+
: createBuffer(IS,
899+
getFlagBits(R.Kind) |
900+
VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
901+
VK_BUFFER_USAGE_TRANSFER_DST_BIT,
902+
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, R.size());
903+
813904
if (!ExDeviceBuf)
814905
return ExDeviceBuf.takeError();
906+
815907
VkBufferCopy Copy = {};
816-
Copy.size = R.size();
908+
Copy.size = getCopySize(IS, R, ExDeviceBuf->Buffer);
817909
vkCmdCopyBuffer(IS.CmdBuffer, ExHostBuf->Buffer, ExDeviceBuf->Buffer, 1,
818910
&Copy);
819911
Bundle.ResourceRefs.emplace_back(*ExHostBuf, *ExDeviceBuf);
@@ -848,6 +940,85 @@ class VKDevice : public offloadtest::Device {
848940
return llvm::Error::success();
849941
}
850942

943+
llvm::Expected<BufferRef>
944+
createSparseBuffer(InvocationState &IS, VkBufferUsageFlags Usage,
945+
VkMemoryPropertyFlags MemoryFlags, size_t Size,
946+
uint32_t TilesMapped) {
947+
auto ExBuffer = createVkBuffer(IS.Device, Size, Usage,
948+
VK_BUFFER_CREATE_SPARSE_BINDING_BIT |
949+
VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT);
950+
if (!ExBuffer)
951+
return ExBuffer.takeError();
952+
VkBuffer Buffer = *ExBuffer;
953+
954+
VkMemoryRequirements MemReqs;
955+
vkGetBufferMemoryRequirements(IS.Device, Buffer, &MemReqs);
956+
957+
if (SparseBufferTileSize % MemReqs.alignment != 0)
958+
return llvm::createStringError(
959+
std::errc::not_supported,
960+
"Sparse buffer alignment must be a factor of 64KB.");
961+
962+
// Calculate size for the mapped region (TilesMapped * SparseBufferTileSize)
963+
VkDeviceSize MappedSize = TilesMapped * SparseBufferTileSize;
964+
965+
if (MappedSize > MemReqs.size)
966+
MappedSize = MemReqs.size;
967+
968+
llvm::Expected<uint32_t> MemIdx =
969+
getMemoryIndex(Device, MemReqs.memoryTypeBits, MemoryFlags);
970+
if (!MemIdx)
971+
return MemIdx.takeError();
972+
973+
auto ExMemory = allocateMemory(IS.Device, MappedSize, *MemIdx);
974+
if (!ExMemory)
975+
return ExMemory.takeError();
976+
VkDeviceMemory Memory = *ExMemory;
977+
978+
// Bind the allocated memory to the start of the buffer
979+
VkSparseMemoryBind Bind = {};
980+
Bind.resourceOffset = 0;
981+
Bind.size = MappedSize;
982+
Bind.memory = Memory;
983+
Bind.memoryOffset = 0;
984+
Bind.flags = 0;
985+
986+
VkSparseBufferMemoryBindInfo BufferBindInfo = {};
987+
BufferBindInfo.buffer = Buffer;
988+
BufferBindInfo.bindCount = 1;
989+
BufferBindInfo.pBinds = &Bind;
990+
991+
VkBindSparseInfo BindInfo = {};
992+
BindInfo.sType = VK_STRUCTURE_TYPE_BIND_SPARSE_INFO;
993+
BindInfo.bufferBindCount = 1;
994+
BindInfo.pBufferBinds = &BufferBindInfo;
995+
996+
// Use a fence to ensure binding is complete before use, though for simple
997+
// cases strict ordering might suffice if on same queue. Ideally we should
998+
// wait, but here we just submit. IS.Queue is used for commands. Note:
999+
// vkQueueBindSparse requires the queue to support SPARSI_BINDING. We assume
1000+
// the main queue supports it.
1001+
VkFence Fence;
1002+
VkFenceCreateInfo FenceInfo = {};
1003+
FenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
1004+
if (vkCreateFence(IS.Device, &FenceInfo, nullptr, &Fence))
1005+
return llvm::createStringError(std::errc::device_or_resource_busy,
1006+
"Failed to create fence for sparse bind");
1007+
1008+
if (vkQueueBindSparse(IS.SparseQueue, 1, &BindInfo, Fence) != VK_SUCCESS)
1009+
return llvm::createStringError(std::errc::io_error,
1010+
"vkQueueBindSparse failed");
1011+
1012+
if (vkWaitForFences(IS.Device, 1, &Fence, VK_TRUE, UINT64_MAX) !=
1013+
VK_SUCCESS)
1014+
return llvm::createStringError(std::errc::device_or_resource_busy,
1015+
"Failed to wait for sparse bind fence");
1016+
1017+
vkDestroyFence(IS.Device, Fence, nullptr);
1018+
1019+
return BufferRef{Buffer, Memory};
1020+
}
1021+
8511022
llvm::Error createDepthStencil(Pipeline &P, InvocationState &IS) {
8521023
// Create an optimal image used as the depth stencil attachment
8531024
VkImageCreateInfo ImageCi = {};
@@ -991,7 +1162,7 @@ class VKDevice : public offloadtest::Device {
9911162
// Submit to the queue
9921163
if (vkQueueSubmit(IS.Queue, 1, &SubmitInfo, Fence))
9931164
return llvm::createStringError(std::errc::device_or_resource_busy,
994-
"Failed to submit to queue.");
1165+
"Failed to submit command buffer.");
9951166
if (vkWaitForFences(IS.Device, 1, &Fence, VK_TRUE, UINT64_MAX))
9961167
return llvm::createStringError(std::errc::device_or_resource_busy,
9971168
"Failed waiting for fence.");

0 commit comments

Comments
 (0)