Skip to content

Commit 6db0603

Browse files
committed
feat(vulkan): add resource state tracking for Vulkan images
- Introduced a new resource state tracker to manage Vulkan image layouts, reducing redundant pipeline barriers during layout transitions. - Implemented functions for initializing, shutting down, and managing image layout states, enhancing performance and efficiency in rendering. - Updated existing Vulkan files to utilize the new resource state tracker, improving synchronization and reducing unnecessary operations. - Enhanced comments and documentation to clarify the purpose and functionality of the resource state management system. This commit significantly improves the Vulkan renderer's handling of image states, contributing to better performance and maintainability.
1 parent cb60395 commit 6db0603

9 files changed

Lines changed: 551 additions & 115 deletions

File tree

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1546,6 +1546,7 @@ SET(RENDERER_VK_SRCS
15461546
src/renderers/vulkan/vk_flares.c
15471547
src/renderers/vulkan/vk_utils.c
15481548
src/renderers/vulkan/vk_commands.c
1549+
src/renderers/vulkan/vk_resource_state.c
15491550
src/renderers/vulkan/vk_pipeline.c
15501551
src/renderers/vulkan/vk_shader_manager.cpp
15511552
src/renderers/vulkan/vk_shader_validation.c

src/renderers/vulkan/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ if(VK_METRICS_ENABLED)
1010
endif()
1111
endif()
1212
endif()
13-
target_sources(idtech3_vulkan_x86_64 PRIVATE vk_rtx_acceleration.cpp vk_bindings_direct_final.c vk_sync.c)
13+
target_sources(idtech3_vulkan_x86_64 PRIVATE vk_rtx_acceleration.cpp vk_bindings_direct_final.c vk_sync.c vk_resource_state.c)
1414
# Always include lightweight Vulkan command-buffer stubs to ease testing
1515
option(VK_METRICS_ENABLED "Enable Vulkan metrics instrumentation" OFF)
1616

src/renderers/vulkan/tr_scene.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,13 @@ static int r_firstSceneLitSurf;
3232
int r_numdlights;
3333
static int r_firstSceneDlight;
3434

35-
static int r_numentities;
35+
int r_numentities;
3636
static int r_firstSceneEntity;
3737

38-
static int r_numpolys;
38+
int r_numpolys;
3939
static int r_firstScenePoly;
4040

41-
static int r_numpolyverts;
41+
int r_numpolyverts;
4242

4343
// CPU-side particle buffer for RE_AddParticle
4444
#define MAX_CPU_PARTICLES 1024

src/renderers/vulkan/vk.c

Lines changed: 28 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ void *Sys_LoadFunction(void *handle, const char *name) {
109109
#include "vk_surface_sprites.h"
110110
#include "vk_world_effects.h"
111111
#include "vk_frame.h"
112+
#include "vk_resource_state.h"
112113

113114
#include "vk_post_process.h"
114115
#ifdef USE_VULKAN_RAY_TRACING
@@ -629,6 +630,7 @@ PFN_vkResetFences qvkResetFences;
629630
PFN_vkUnmapMemory qvkUnmapMemory;
630631
PFN_vkUpdateDescriptorSets qvkUpdateDescriptorSets;
631632
PFN_vkWaitForFences qvkWaitForFences;
633+
PFN_vkGetFenceStatus qvkGetFenceStatus;
632634
PFN_vkAcquireNextImageKHR qvkAcquireNextImageKHR;
633635
PFN_vkCreateSwapchainKHR qvkCreateSwapchainKHR;
634636
PFN_vkDestroySwapchainKHR qvkDestroySwapchainKHR;
@@ -1055,6 +1057,7 @@ void VK_EndImmediateCommands( VkCommandBuffer command_buffer, const char *locati
10551057

10561058

10571059
// Optimized layout transition helper using C23 designated initializers
1060+
// Now uses resource state tracker to avoid redundant barriers
10581061
static void record_image_layout_transition(
10591062
VkCommandBuffer command_buffer,
10601063
VkImage image,
@@ -1064,101 +1067,26 @@ static void record_image_layout_transition(
10641067
uint32_t src_stage_override,
10651068
uint32_t dst_stage_override )
10661069
{
1067-
(void)dst_stage_override; // Suppress unused parameter warning
1068-
1069-
// Determine source stage and access mask
1070-
uint32_t src_stage;
1071-
VkAccessFlags src_access;
1072-
1073-
switch ( old_layout ) {
1074-
case VK_IMAGE_LAYOUT_UNDEFINED:
1075-
src_stage = (src_stage_override != 0) ? src_stage_override : VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1076-
src_access = VK_ACCESS_NONE;
1077-
break;
1078-
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
1079-
src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
1080-
src_access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
1081-
break;
1082-
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
1083-
src_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
1084-
src_access = VK_ACCESS_TRANSFER_WRITE_BIT;
1085-
break;
1086-
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
1087-
src_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
1088-
src_access = VK_ACCESS_TRANSFER_READ_BIT;
1089-
break;
1090-
case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
1091-
src_stage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
1092-
src_access = VK_ACCESS_SHADER_READ_BIT;
1093-
break;
1094-
case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
1095-
src_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
1096-
src_access = VK_ACCESS_NONE;
1097-
break;
1098-
default:
1099-
ri.Error( ERR_DROP, "unsupported old layout %i", old_layout );
1100-
src_stage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
1101-
src_access = VK_ACCESS_NONE;
1102-
break;
1070+
// Use resource state tracker to avoid redundant barriers
1071+
// If old_layout is UNDEFINED, try to get actual layout from state tracker
1072+
VkImageLayout actual_old_layout = old_layout;
1073+
if (old_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
1074+
VkImageLayout tracked_layout = vk_resource_state_get_image_layout(image);
1075+
if (tracked_layout != VK_IMAGE_LAYOUT_UNDEFINED) {
1076+
actual_old_layout = tracked_layout;
1077+
}
11031078
}
1104-
1105-
// Determine destination stage and access mask
1106-
uint32_t dst_stage;
1107-
VkAccessFlags dst_access;
11081079

1109-
switch ( new_layout ) {
1110-
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
1111-
dst_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
1112-
dst_access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
1113-
break;
1114-
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
1115-
dst_stage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT;
1116-
dst_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
1117-
break;
1118-
case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
1119-
dst_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
1120-
dst_access = VK_ACCESS_NONE;
1121-
break;
1122-
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
1123-
dst_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
1124-
dst_access = VK_ACCESS_TRANSFER_READ_BIT;
1125-
break;
1126-
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
1127-
dst_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
1128-
dst_access = VK_ACCESS_TRANSFER_WRITE_BIT;
1129-
break;
1130-
case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
1131-
dst_stage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
1132-
dst_access = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
1133-
break;
1134-
default:
1135-
ri.Error( ERR_DROP, "unsupported new layout %i", new_layout);
1136-
dst_stage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
1137-
dst_access = VK_ACCESS_NONE;
1138-
break;
1080+
// Use state tracker transition (will skip if layout hasn't changed)
1081+
if (actual_old_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
1082+
// First use - use automatic transition
1083+
vk_resource_state_transition_image(command_buffer, image, image_aspect_flags,
1084+
new_layout, src_stage_override, dst_stage_override);
1085+
} else {
1086+
// Known old layout - use explicit transition
1087+
vk_resource_state_transition_image_explicit(command_buffer, image, image_aspect_flags,
1088+
actual_old_layout, new_layout, src_stage_override, dst_stage_override);
11391089
}
1140-
1141-
// Use C23 designated initializer for better performance and type safety
1142-
const VkImageMemoryBarrier barrier = {
1143-
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
1144-
.pNext = NULL,
1145-
.srcAccessMask = src_access,
1146-
.dstAccessMask = dst_access,
1147-
.oldLayout = old_layout,
1148-
.newLayout = new_layout,
1149-
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
1150-
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
1151-
.image = image,
1152-
.subresourceRange = {
1153-
.aspectMask = image_aspect_flags,
1154-
.baseMipLevel = 0,
1155-
.levelCount = VK_REMAINING_MIP_LEVELS,
1156-
.baseArrayLayer = 0,
1157-
.layerCount = VK_REMAINING_ARRAY_LAYERS
1158-
}
1159-
};
1160-
1161-
qvkCmdPipelineBarrier( command_buffer, src_stage, dst_stage, 0, 0, NULL, 0, NULL, 1, &barrier );
11621090
}
11631091

11641092

@@ -3891,6 +3819,7 @@ static void init_vulkan_library( void )
38913819
INIT_DEVICE_FUNCTION(vkResetCommandBuffer)
38923820
INIT_DEVICE_FUNCTION(vkResetDescriptorPool)
38933821
INIT_DEVICE_FUNCTION(vkResetFences)
3822+
INIT_DEVICE_FUNCTION(vkGetFenceStatus)
38943823
INIT_DEVICE_FUNCTION(vkUnmapMemory)
38953824
INIT_DEVICE_FUNCTION(vkUpdateDescriptorSets)
38963825
INIT_DEVICE_FUNCTION(vkWaitForFences)
@@ -4462,6 +4391,7 @@ qvkGetPipelineCacheData = NULL;
44624391
qvkResetFences = NULL;
44634392
qvkResetQueryPool = NULL;
44644393
qvkCmdResetQueryPool = NULL;
4394+
qvkGetFenceStatus = NULL;
44654395
qvkUnmapMemory = NULL;
44664396
qvkUpdateDescriptorSets = NULL;
44674397
qvkWaitForFences = NULL;
@@ -4809,6 +4739,9 @@ void vk_initialize( void )
48094739
if (vk.device != (VkDevice)0x20000000) {
48104740
vk_create_sync_primitives();
48114741
ri.Printf(PRINT_ALL, "Vulkan: Sync primitives created\n");
4742+
4743+
// Initialize resource state tracker
4744+
vk_resource_state_init();
48124745
// Optional: initialize timeline semaphore if available
48134746
#ifdef VK_KHR_TIMELINE_SEMAPHORE
48144747
if (qvkCreateSemaphore) {
@@ -9858,6 +9791,9 @@ void vk_shutdown( refShutdownCode_t code ) {
98589791
// Shutdown world effects system
98599792
vk_world_effects_shutdown();
98609793

9794+
// Shutdown resource state tracker
9795+
vk_resource_state_shutdown();
9796+
98619797
// Ray tracing and raymarching moved to RTX renderer only
98629798

98639799
// Shutdown async compute

src/renderers/vulkan/vk_command_buffers.cpp

Lines changed: 43 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -177,22 +177,48 @@ extern "C" VkCommandBuffer vk_begin_command_buffer(void) {
177177

178178
current_command_buffer = command_buffers[0];
179179

180-
// Wait for previous frame to complete before reusing command buffer
181-
// Check device loss during fence wait
182-
if (!vk.device_lost) {
183-
VkResult fence_result = qvkWaitForFences(vk.device, 1, &command_fence, VK_TRUE, UINT64_MAX);
184-
if (fence_result == VK_ERROR_DEVICE_LOST) {
185-
vk.device_lost = qtrue;
186-
vk_reset_memory_tracking_on_device_lost();
187-
ri.Printf(PRINT_ERROR, "Vulkan: Device lost during command buffer fence wait\n");
188-
return VK_NULL_HANDLE;
189-
} else if (fence_result != VK_SUCCESS) {
190-
ri.Printf(PRINT_WARNING, "vk_begin_command_buffer: Fence wait failed: %d\n", fence_result);
180+
// Wait for all frame command buffers to complete before reusing immediate command buffer
181+
// This prevents conflicts between immediate commands and frame rendering
182+
// Use frame-based synchronization to align with the main rendering pipeline
183+
if (!vk.device_lost && vk.device != VK_NULL_HANDLE) {
184+
// Collect all active frame fences to ensure no frame is still in flight
185+
VkFence frame_fences[NUM_COMMAND_BUFFERS];
186+
uint32_t fence_count = 0;
187+
188+
// Add all valid frame fences
189+
for (uint32_t i = 0; i < NUM_COMMAND_BUFFERS; i++) {
190+
if (vk.tess[i].rendering_finished_fence != VK_NULL_HANDLE) {
191+
frame_fences[fence_count++] = vk.tess[i].rendering_finished_fence;
192+
}
191193
}
192194

193-
// Reset fence after waiting (must be done before reuse)
194-
if (!vk.device_lost) {
195-
qvkResetFences(vk.device, 1, &command_fence);
195+
// Wait for all frame fences if any exist
196+
if (fence_count > 0) {
197+
VkResult fence_result = qvkWaitForFences(vk.device, fence_count, frame_fences, VK_TRUE, UINT64_MAX);
198+
if (fence_result == VK_ERROR_DEVICE_LOST) {
199+
vk.device_lost = qtrue;
200+
vk_reset_memory_tracking_on_device_lost();
201+
ri.Printf(PRINT_ERROR, "Vulkan: Device lost during command buffer fence wait\n");
202+
return VK_NULL_HANDLE;
203+
} else if (fence_result != VK_SUCCESS) {
204+
ri.Printf(PRINT_WARNING, "vk_begin_command_buffer: Fence wait failed: %d\n", fence_result);
205+
}
206+
} else {
207+
// Fallback to static fence if no frame fences are available (e.g., during initialization)
208+
if (command_fence != VK_NULL_HANDLE) {
209+
VkResult fence_result = qvkWaitForFences(vk.device, 1, &command_fence, VK_TRUE, UINT64_MAX);
210+
if (fence_result == VK_ERROR_DEVICE_LOST) {
211+
vk.device_lost = qtrue;
212+
vk_reset_memory_tracking_on_device_lost();
213+
ri.Printf(PRINT_ERROR, "Vulkan: Device lost during command buffer fence wait\n");
214+
return VK_NULL_HANDLE;
215+
} else if (fence_result != VK_SUCCESS) {
216+
ri.Printf(PRINT_WARNING, "vk_begin_command_buffer: Fence wait failed: %d\n", fence_result);
217+
} else {
218+
// Reset static fence after waiting (frame fences are managed by frame system)
219+
qvkResetFences(vk.device, 1, &command_fence);
220+
}
221+
}
196222
}
197223
}
198224

@@ -208,12 +234,12 @@ extern "C" VkCommandBuffer vk_begin_command_buffer(void) {
208234
}
209235

210236
// Begin recording
211-
// For reusable command buffers, we can omit ONE_TIME_SUBMIT_BIT since we reset before reuse
212-
// However, keeping it is safe and indicates the buffer is used once per submission
237+
// This is a reusable command buffer - it's reset before reuse
238+
// Omit ONE_TIME_SUBMIT_BIT for better performance with reusable buffers
213239
VkCommandBufferBeginInfo beginInfo = {
214240
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
215241
.pNext = nullptr,
216-
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
242+
.flags = 0, // Reusable buffer - no ONE_TIME_SUBMIT_BIT
217243
.pInheritanceInfo = nullptr
218244
};
219245

src/renderers/vulkan/vk_frame.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -406,10 +406,12 @@ extern "C" void vk_begin_frame(void) {
406406
vk_update_performance_stats();
407407

408408
// Begin command buffer
409+
// Frame command buffers are reusable - they're reset and reused each frame
410+
// Omit ONE_TIME_SUBMIT_BIT for better performance with reusable buffers
409411
VkCommandBufferBeginInfo begin_info = {
410412
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
411413
.pNext = nullptr,
412-
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
414+
.flags = 0, // Reusable frame buffer - no ONE_TIME_SUBMIT_BIT
413415
.pInheritanceInfo = nullptr
414416
};
415417

0 commit comments

Comments
 (0)