1414#include " llvm/ADT/DenseSet.h"
1515#include " llvm/Support/Error.h"
1616
17+ #include < limits>
1718#include < memory>
1819#include < numeric>
1920#include < system_error>
2021#include < vulkan/vulkan.h>
2122
2223using namespace offloadtest ;
2324
25+ // We use 64KB tile size because DX has a fixed tile size, and the offload test
26+ // suite must work for all APIs.
27+ constexpr uint32_t SparseBufferTileSize = 65536 ;
28+
29+ static uint32_t getNumTiles (std::optional<uint32_t > NumTiles, size_t Size) {
30+ if (NumTiles)
31+ return *NumTiles;
32+ return (Size + SparseBufferTileSize - 1 ) / SparseBufferTileSize;
33+ }
34+
2435#define VKFormats (FMT, BITS ) \
2536 if (Channels == 1 ) \
2637 return VK_FORMAT_R##BITS##_##FMT; \
@@ -58,6 +69,7 @@ static VkFormat getVKFormat(DataFormat Format, int Channels) {
5869static VkDescriptorType getDescriptorType (const ResourceKind RK) {
5970 switch (RK) {
6071 case ResourceKind::Buffer:
72+ return VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
6173 case ResourceKind::RWBuffer:
6274 return VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
6375 case ResourceKind::Texture2D:
@@ -343,6 +355,7 @@ class VKDevice : public offloadtest::Device {
343355 struct InvocationState {
344356 VkDevice Device;
345357 VkQueue Queue;
358+ VkQueue SparseQueue = VK_NULL_HANDLE;
346359 VkCommandPool CmdPool;
347360 VkCommandBuffer CmdBuffer;
348361 VkPipelineLayout PipelineLayout;
@@ -537,6 +550,18 @@ class VKDevice : public offloadtest::Device {
537550 Extensions.data ());
538551 }
539552
553+ static constexpr uint32_t InvalidQueueIndex =
554+ std::numeric_limits<uint32_t >::max();
555+
556+ uint32_t findQueue (const VkQueueFamilyProperties *Props, uint32_t Count,
557+ VkQueueFlags Flags) {
558+ for (uint32_t I = 0 ; I < Count; ++I) {
559+ if ((Props[I].queueFlags & Flags) == Flags)
560+ return I;
561+ }
562+ return InvalidQueueIndex;
563+ }
564+
540565public:
541566 llvm::Error createDevice (InvocationState &IS) {
542567
@@ -552,33 +577,52 @@ class VKDevice : public offloadtest::Device {
552577 vkGetPhysicalDeviceQueueFamilyProperties (Device, &QueueCount,
553578 QueueFamilyProps.get ());
554579
555- int SelectedIdx = -1 ;
556- for (uint32_t I = 0 ; I < QueueCount; ++I) {
557- const VkQueueFlags Flags = QueueFamilyProps[I].queueFlags ;
558- // Prefer family supporting both GRAPHICS and COMPUTE
559- if ((Flags & VK_QUEUE_GRAPHICS_BIT) && (Flags & VK_QUEUE_COMPUTE_BIT)) {
560- SelectedIdx = static_cast <int >(I);
561- break ;
562- }
580+ uint32_t MainQueueIdx =
581+ findQueue (QueueFamilyProps.get (), QueueCount,
582+ VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT |
583+ VK_QUEUE_SPARSE_BINDING_BIT);
584+ uint32_t SparseQueueIdx = MainQueueIdx;
585+
586+ // If not found, find separate queues
587+ if (MainQueueIdx == InvalidQueueIndex) {
588+ MainQueueIdx = findQueue (QueueFamilyProps.get (), QueueCount,
589+ VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT);
590+ SparseQueueIdx = findQueue (QueueFamilyProps.get (), QueueCount,
591+ VK_QUEUE_SPARSE_BINDING_BIT);
563592 }
564593
565- if (SelectedIdx == -1 )
566- return llvm::createStringError (std::errc::no_such_device,
567- " No suitable queue family found." );
594+ if (MainQueueIdx == InvalidQueueIndex)
595+ return llvm::createStringError (
596+ std::errc::no_such_device,
597+ " No suitable queue family found for graphics and compute." );
568598
569- const uint32_t QueueIdx = static_cast <uint32_t >(SelectedIdx);
599+ if (SparseQueueIdx == InvalidQueueIndex)
600+ return llvm::createStringError (
601+ std::errc::no_such_device,
602+ " No suitable queue family found for sparse "
603+ " binding." );
570604
571- VkDeviceQueueCreateInfo QueueInfo = {};
572605 const float QueuePriority = 1 .0f ;
573- QueueInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
574- QueueInfo.queueFamilyIndex = QueueIdx;
575- QueueInfo.queueCount = 1 ;
576- QueueInfo.pQueuePriorities = &QueuePriority;
606+ std::vector<VkDeviceQueueCreateInfo> QueueCreateInfos;
607+
608+ auto AddQueueCreateInfo = [&](uint32_t QFamilyIndex) {
609+ VkDeviceQueueCreateInfo QueueCreateInfo = {};
610+ QueueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
611+ QueueCreateInfo.queueFamilyIndex = QFamilyIndex;
612+ QueueCreateInfo.queueCount = 1 ;
613+ QueueCreateInfo.pQueuePriorities = &QueuePriority;
614+ QueueCreateInfos.push_back (QueueCreateInfo);
615+ };
616+
617+ AddQueueCreateInfo (MainQueueIdx);
618+ if (MainQueueIdx != SparseQueueIdx)
619+ AddQueueCreateInfo (SparseQueueIdx);
577620
578621 VkDeviceCreateInfo DeviceInfo = {};
579622 DeviceInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
580- DeviceInfo.queueCreateInfoCount = 1 ;
581- DeviceInfo.pQueueCreateInfos = &QueueInfo;
623+ DeviceInfo.queueCreateInfoCount =
624+ static_cast <uint32_t >(QueueCreateInfos.size ());
625+ DeviceInfo.pQueueCreateInfos = QueueCreateInfos.data ();
582626
583627 VkPhysicalDeviceFeatures2 Features{};
584628 Features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
@@ -610,11 +654,12 @@ class VKDevice : public offloadtest::Device {
610654 if (vkCreateDevice (Device, &DeviceInfo, nullptr , &IS.Device ))
611655 return llvm::createStringError (std::errc::no_such_device,
612656 " Could not create Vulkan logical device." );
613- vkGetDeviceQueue (IS.Device , QueueIdx, 0 , &IS.Queue );
657+ vkGetDeviceQueue (IS.Device , MainQueueIdx, 0 , &IS.Queue );
658+ vkGetDeviceQueue (IS.Device , SparseQueueIdx, 0 , &IS.SparseQueue );
614659
615660 VkCommandPoolCreateInfo CmdPoolInfo = {};
616661 CmdPoolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
617- CmdPoolInfo.queueFamilyIndex = QueueIdx ;
662+ CmdPoolInfo.queueFamilyIndex = MainQueueIdx ;
618663 CmdPoolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
619664
620665 if (vkCreateCommandPool (IS.Device , &CmdPoolInfo, nullptr , &IS.CmdPool ))
@@ -640,38 +685,58 @@ class VKDevice : public offloadtest::Device {
640685 return llvm::Error::success ();
641686 }
642687
643- llvm::Expected<BufferRef> createBuffer (InvocationState &IS,
644- VkBufferUsageFlags Usage,
645- VkMemoryPropertyFlags MemoryFlags,
646- size_t Size, void *Data = nullptr ) {
688+ llvm::Expected<VkBuffer> createVkBuffer (VkDevice Device, size_t Size,
689+ VkBufferUsageFlags Usage,
690+ VkBufferCreateFlags Flags = 0 ) {
647691 VkBuffer Buffer;
648- VkDeviceMemory Memory;
649692 VkBufferCreateInfo BufferInfo = {};
650693 BufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
651694 BufferInfo.size = Size;
652695 BufferInfo.usage = Usage;
653696 BufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
697+ BufferInfo.flags = Flags;
654698
655- if (vkCreateBuffer (IS. Device , &BufferInfo, nullptr , &Buffer))
699+ if (vkCreateBuffer (Device, &BufferInfo, nullptr , &Buffer))
656700 return llvm::createStringError (std::errc::not_enough_memory,
657701 " Could not create buffer." );
702+ return Buffer;
703+ }
658704
659- VkMemoryRequirements MemReqs;
660- vkGetBufferMemoryRequirements (IS. Device , Buffer, &MemReqs);
705+ llvm::Expected<VkDeviceMemory>
706+ allocateMemory (VkDevice Device, VkDeviceSize Size, uint32_t MemoryTypeIndex) {
661707 VkMemoryAllocateInfo AllocInfo = {};
662708 AllocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
663- AllocInfo.allocationSize = MemReqs.size ;
709+ AllocInfo.allocationSize = Size;
710+ AllocInfo.memoryTypeIndex = MemoryTypeIndex;
711+ VkDeviceMemory Memory;
712+ if (vkAllocateMemory (Device, &AllocInfo, nullptr , &Memory))
713+ return llvm::createStringError (std::errc::not_enough_memory,
714+ " Memory allocation failed." );
715+ return Memory;
716+ }
717+
718+ llvm::Expected<BufferRef> createBuffer (InvocationState &IS,
719+ VkBufferUsageFlags Usage,
720+ VkMemoryPropertyFlags MemoryFlags,
721+ size_t Size, void *Data = nullptr ) {
722+ auto ExBuffer = createVkBuffer (IS.Device , Size, Usage);
723+ if (!ExBuffer)
724+ return ExBuffer.takeError ();
725+ VkBuffer Buffer = *ExBuffer;
726+
727+ VkMemoryRequirements MemReqs;
728+ vkGetBufferMemoryRequirements (IS.Device , Buffer, &MemReqs);
664729
665730 llvm::Expected<uint32_t > MemIdx =
666731 getMemoryIndex (Device, MemReqs.memoryTypeBits , MemoryFlags);
667732 if (!MemIdx)
668733 return MemIdx.takeError ();
669734
670- AllocInfo.memoryTypeIndex = *MemIdx;
735+ auto ExMemory = allocateMemory (IS.Device , MemReqs.size , *MemIdx);
736+ if (!ExMemory)
737+ return ExMemory.takeError ();
738+ VkDeviceMemory Memory = *ExMemory;
671739
672- if (vkAllocateMemory (IS.Device , &AllocInfo, nullptr , &Memory))
673- return llvm::createStringError (std::errc::not_enough_memory,
674- " Memory allocation failed." );
675740 if (Data) {
676741 void *Dst = nullptr ;
677742 if (vkMapMemory (IS.Device , Memory, 0 , Size, 0 , &Dst))
@@ -777,6 +842,18 @@ class VKDevice : public offloadtest::Device {
777842 return ResourceRef (Host, ImageRef{0 , Sampler, 0 });
778843 }
779844
845+ VkDeviceSize getCopySize (InvocationState &IS, Resource &R, VkBuffer Buffer) {
846+ VkDeviceSize CopySize = R.size ();
847+ if (R.IsReserved ) {
848+ VkDeviceSize MappedSize =
849+ static_cast <VkDeviceSize>(getNumTiles (R.TilesMapped , R.size ())) *
850+ SparseBufferTileSize;
851+ if (CopySize > MappedSize)
852+ CopySize = MappedSize;
853+ }
854+ return CopySize;
855+ }
856+
780857 llvm::Error createResource (Resource &R, InvocationState &IS) {
781858 // Samplers don't have backing data buffers, so handle them separately
782859 if (R.isSampler ()) {
@@ -790,6 +867,11 @@ class VKDevice : public offloadtest::Device {
790867 return llvm::Error::success ();
791868 }
792869
870+ if (!R.BufferPtr )
871+ return llvm::createStringError (std::errc::invalid_argument,
872+ " Resource '%s' has no backing buffer." ,
873+ R.Name .c_str ());
874+
793875 ResourceBundle Bundle{getDescriptorType (R.Kind ), R.size (), R.BufferPtr };
794876 for (auto &ResData : R.BufferPtr ->Data ) {
795877 auto ExHostBuf = createBuffer (
@@ -805,15 +887,25 @@ class VKDevice : public offloadtest::Device {
805887 return ExImageRef.takeError ();
806888 Bundle.ResourceRefs .push_back (*ExImageRef);
807889 } else {
808- auto ExDeviceBuf = createBuffer (
809- IS,
810- getFlagBits (R.Kind ) | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
811- VK_BUFFER_USAGE_TRANSFER_DST_BIT,
812- VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, R.size ());
890+ llvm::Expected<BufferRef> ExDeviceBuf =
891+ R.IsReserved
892+ ? createSparseBuffer (
893+ IS,
894+ getFlagBits (R.Kind ) | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
895+ VK_BUFFER_USAGE_TRANSFER_DST_BIT,
896+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, R.size (),
897+ getNumTiles (R.TilesMapped , R.size ()))
898+ : createBuffer (IS,
899+ getFlagBits (R.Kind ) |
900+ VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
901+ VK_BUFFER_USAGE_TRANSFER_DST_BIT,
902+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, R.size ());
903+
813904 if (!ExDeviceBuf)
814905 return ExDeviceBuf.takeError ();
906+
815907 VkBufferCopy Copy = {};
816- Copy.size = R. size ( );
908+ Copy.size = getCopySize (IS, R, ExDeviceBuf-> Buffer );
817909 vkCmdCopyBuffer (IS.CmdBuffer , ExHostBuf->Buffer , ExDeviceBuf->Buffer , 1 ,
818910 &Copy);
819911 Bundle.ResourceRefs .emplace_back (*ExHostBuf, *ExDeviceBuf);
@@ -848,6 +940,85 @@ class VKDevice : public offloadtest::Device {
848940 return llvm::Error::success ();
849941 }
850942
943+ llvm::Expected<BufferRef>
944+ createSparseBuffer (InvocationState &IS, VkBufferUsageFlags Usage,
945+ VkMemoryPropertyFlags MemoryFlags, size_t Size,
946+ uint32_t TilesMapped) {
947+ auto ExBuffer = createVkBuffer (IS.Device , Size, Usage,
948+ VK_BUFFER_CREATE_SPARSE_BINDING_BIT |
949+ VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT);
950+ if (!ExBuffer)
951+ return ExBuffer.takeError ();
952+ VkBuffer Buffer = *ExBuffer;
953+
954+ VkMemoryRequirements MemReqs;
955+ vkGetBufferMemoryRequirements (IS.Device , Buffer, &MemReqs);
956+
957+ if (SparseBufferTileSize % MemReqs.alignment != 0 )
958+ return llvm::createStringError (
959+ std::errc::not_supported,
960+ " Sparse buffer alignment must be a factor of 64KB." );
961+
962+ // Calculate size for the mapped region (TilesMapped * SparseBufferTileSize)
963+ VkDeviceSize MappedSize = TilesMapped * SparseBufferTileSize;
964+
965+ if (MappedSize > MemReqs.size )
966+ MappedSize = MemReqs.size ;
967+
968+ llvm::Expected<uint32_t > MemIdx =
969+ getMemoryIndex (Device, MemReqs.memoryTypeBits , MemoryFlags);
970+ if (!MemIdx)
971+ return MemIdx.takeError ();
972+
973+ auto ExMemory = allocateMemory (IS.Device , MappedSize, *MemIdx);
974+ if (!ExMemory)
975+ return ExMemory.takeError ();
976+ VkDeviceMemory Memory = *ExMemory;
977+
978+ // Bind the allocated memory to the start of the buffer
979+ VkSparseMemoryBind Bind = {};
980+ Bind.resourceOffset = 0 ;
981+ Bind.size = MappedSize;
982+ Bind.memory = Memory;
983+ Bind.memoryOffset = 0 ;
984+ Bind.flags = 0 ;
985+
986+ VkSparseBufferMemoryBindInfo BufferBindInfo = {};
987+ BufferBindInfo.buffer = Buffer;
988+ BufferBindInfo.bindCount = 1 ;
989+ BufferBindInfo.pBinds = &Bind;
990+
991+ VkBindSparseInfo BindInfo = {};
992+ BindInfo.sType = VK_STRUCTURE_TYPE_BIND_SPARSE_INFO;
993+ BindInfo.bufferBindCount = 1 ;
994+ BindInfo.pBufferBinds = &BufferBindInfo;
995+
996+ // Use a fence to ensure binding is complete before use, though for simple
997+ // cases strict ordering might suffice if on same queue. Ideally we should
998+ // wait, but here we just submit. IS.Queue is used for commands. Note:
999+ // vkQueueBindSparse requires the queue to support SPARSI_BINDING. We assume
1000+ // the main queue supports it.
1001+ VkFence Fence;
1002+ VkFenceCreateInfo FenceInfo = {};
1003+ FenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
1004+ if (vkCreateFence (IS.Device , &FenceInfo, nullptr , &Fence))
1005+ return llvm::createStringError (std::errc::device_or_resource_busy,
1006+ " Failed to create fence for sparse bind" );
1007+
1008+ if (vkQueueBindSparse (IS.SparseQueue , 1 , &BindInfo, Fence) != VK_SUCCESS)
1009+ return llvm::createStringError (std::errc::io_error,
1010+ " vkQueueBindSparse failed" );
1011+
1012+ if (vkWaitForFences (IS.Device , 1 , &Fence, VK_TRUE, UINT64_MAX) !=
1013+ VK_SUCCESS)
1014+ return llvm::createStringError (std::errc::device_or_resource_busy,
1015+ " Failed to wait for sparse bind fence" );
1016+
1017+ vkDestroyFence (IS.Device , Fence, nullptr );
1018+
1019+ return BufferRef{Buffer, Memory};
1020+ }
1021+
8511022 llvm::Error createDepthStencil (Pipeline &P, InvocationState &IS) {
8521023 // Create an optimal image used as the depth stencil attachment
8531024 VkImageCreateInfo ImageCi = {};
@@ -991,7 +1162,7 @@ class VKDevice : public offloadtest::Device {
9911162 // Submit to the queue
9921163 if (vkQueueSubmit (IS.Queue , 1 , &SubmitInfo, Fence))
9931164 return llvm::createStringError (std::errc::device_or_resource_busy,
994- " Failed to submit to queue ." );
1165+ " Failed to submit command buffer ." );
9951166 if (vkWaitForFences (IS.Device , 1 , &Fence, VK_TRUE, UINT64_MAX))
9961167 return llvm::createStringError (std::errc::device_or_resource_busy,
9971168 " Failed waiting for fence." );
0 commit comments