From c77c00cfe1115b8c112842ee0da1b893f0b8e2ff Mon Sep 17 00:00:00 2001 From: sergeypdev Date: Sun, 29 Sep 2024 14:59:38 +0400 Subject: [PATCH] Add sync abstraction to automate memory barriers --- src/AssetManager.zig | 7 +- src/GraphicsContext.zig | 171 ++++++++++++++++++++++++++++++++++++++++ src/Render2.zig | 121 +++++++++------------------- 3 files changed, 212 insertions(+), 87 deletions(-) diff --git a/src/AssetManager.zig b/src/AssetManager.zig index 6aeef07..470ca0f 100644 --- a/src/AssetManager.zig +++ b/src/AssetManager.zig @@ -1571,11 +1571,8 @@ fn freeAsset(self: *AssetManager, asset: *LoadedAsset) void { self.allocator.free(shader.source); }, .shaderProgram => |*program| { - self.gc.queues.graphics.mu.lock(); - defer self.gc.queues.graphics.mu.unlock(); - - self.gc.device.queueWaitIdle(self.gc.queues.graphics.handle) catch @panic("Wait Idle failed"); - + // NOTE: We use maintenance4 extension, no need to wait for pipeline + // usage to end self.gc.device.destroyPipeline(program.pipeline, null); }, .texture => |*texture| { diff --git a/src/GraphicsContext.zig b/src/GraphicsContext.zig index 4f65131..ea5340c 100644 --- a/src/GraphicsContext.zig +++ b/src/GraphicsContext.zig @@ -55,6 +55,176 @@ pub const CommandPool = struct { }, &cmd_bufs); return CommandBuffer.init(cmd_bufs[0], self.device.wrapper); } + + pub fn freeCommandBuffer(self: *const CommandPool, command_buffer: CommandBuffer) void { + self.device.freeCommandBuffers(self.handle, 1, &.{command_buffer.handle}); + } +}; + +// Simple sync barrier tracking without a render graph +// https://vulkan.org/user/pages/09.events/vulkanised-2024/vulkanised-2024-grigory-dzhavadyan.pdf +pub const SyncBarrierMasks = struct { + access_mask: vk.AccessFlags2 = .{}, + stage_mask: vk.PipelineStageFlags2 = .{}, +}; + +pub const Image = struct { + handle: vk.Image, + mip_count: u32, + layer_count: u32, + format: vk.Format, + + last_writer: SyncBarrierMasks = .{}, + per_stage_readers: vk.PipelineStageFlags2 = .{}, + /// Current layout + layout: vk.ImageLayout = .undefined, + + pub fn createView(self: *const Image, device: Device, aspect_mask: vk.ImageAspectFlags) !vk.ImageView { + return device.createImageView(&vk.ImageViewCreateInfo{ + .format = self.format, + .components = .{ .r = .r, .g = .g, .b = .b, .a = .a }, + .image = self.handle, + .view_type = .@"2d", + .subresource_range = .{ + .aspect_mask = aspect_mask, + .base_array_layer = 0, + .layer_count = self.layer_count, + .base_mip_level = 0, + .level_count = self.mip_count, + }, + }, null); + } + + pub fn sync(self: *Image, cmds: CommandBuffer, masks: SyncBarrierMasks, layout: vk.ImageLayout) !void { + const is_read = isRead(masks.access_mask); + const is_write = layout != self.layout or isWrite(masks.access_mask); + + // Read only + if (is_read and !is_write) { + if (self.last_writer.access_mask.toInt() != 0) { + if (!self.per_stage_readers.contains(masks.stage_mask)) { + // These stages haven't seen previous writes yet + + const new_stages = masks.stage_mask.subtract(self.per_stage_readers); + self.per_stage_readers = masks.stage_mask.merge(self.per_stage_readers); + + const barrier = vk.ImageMemoryBarrier2{ + .image = self.handle, + .old_layout = self.layout, + .new_layout = layout, + .src_stage_mask = self.last_writer.stage_mask, + .dst_stage_mask = new_stages, + .src_access_mask = self.last_writer.access_mask, + .dst_access_mask = masks.access_mask, + .src_queue_family_index = 0, + .dst_queue_family_index = 0, + .subresource_range = .{ + .base_mip_level = 0, + .base_array_layer = 0, + .layer_count = self.layer_count, + .level_count = self.mip_count, + .aspect_mask = .{ .color_bit = true }, + }, + }; + cmds.pipelineBarrier2(&vk.DependencyInfo{ + .image_memory_barrier_count = 1, + .p_image_memory_barriers = &.{barrier}, + }); + } + } else { + self.per_stage_readers = self.per_stage_readers.merge(masks.stage_mask); + } + } else if (is_write) { + // If there are any reads + const earlier_stages = self.last_writer.stage_mask.merge(self.per_stage_readers); + + if (earlier_stages.toInt() != 0 or self.layout != layout) { + // Emit barrier for earlier stages and last writer mask + + const barrier = vk.ImageMemoryBarrier2{ + .image = self.handle, + .old_layout = self.layout, + .new_layout = layout, + .src_stage_mask = earlier_stages, + .dst_stage_mask = masks.stage_mask, + .src_access_mask = self.last_writer.access_mask, + .dst_access_mask = masks.access_mask, + .src_queue_family_index = 0, + .dst_queue_family_index = 0, + .subresource_range = .{ + .base_mip_level = 0, + .base_array_layer = 0, + .layer_count = self.layer_count, + .level_count = self.mip_count, + .aspect_mask = .{ .color_bit = true }, + }, + }; + cmds.pipelineBarrier2(&vk.DependencyInfo{ + .image_memory_barrier_count = 1, + .p_image_memory_barriers = &.{barrier}, + }); + } + self.last_writer = masks; + self.per_stage_readers = .{}; + self.layout = layout; + } + } + + const read_access_mask = vk.AccessFlags2{ + .indirect_command_read_bit = true, + .index_read_bit = true, + .vertex_attribute_read_bit = true, + .uniform_read_bit = true, + .input_attachment_read_bit = true, + .shader_read_bit = true, + .color_attachment_read_bit = true, + .depth_stencil_attachment_read_bit = true, + .transfer_read_bit = true, + .host_read_bit = true, + .memory_read_bit = true, + .command_preprocess_read_bit_nv = true, + .color_attachment_read_noncoherent_bit_ext = true, + .conditional_rendering_read_bit_ext = true, + .acceleration_structure_read_bit_khr = true, + .fragment_shading_rate_attachment_read_bit_khr = true, + .fragment_density_map_read_bit_ext = true, + .transform_feedback_counter_read_bit_ext = true, + .shader_sampled_read_bit = true, + .shader_storage_read_bit = true, + .video_decode_read_bit_khr = true, + .video_encode_read_bit_khr = true, + .invocation_mask_read_bit_huawei = true, + .shader_binding_table_read_bit_khr = true, + .descriptor_buffer_read_bit_ext = true, + .optical_flow_read_bit_nv = true, + .micromap_read_bit_ext = true, + }; + + const write_access_mask = vk.AccessFlags2{ + .shader_write_bit = true, + .color_attachment_write_bit = true, + .depth_stencil_attachment_write_bit = true, + .transfer_write_bit = true, + .host_write_bit = true, + .memory_write_bit = true, + .command_preprocess_write_bit_nv = true, + .acceleration_structure_write_bit_khr = true, + .transform_feedback_write_bit_ext = true, + .transform_feedback_counter_write_bit_ext = true, + .shader_storage_write_bit = true, + .video_decode_write_bit_khr = true, + .video_encode_write_bit_khr = true, + .optical_flow_write_bit_nv = true, + .micromap_write_bit_ext = true, + }; + + fn isRead(access_mask: vk.AccessFlags2) bool { + return access_mask.intersect(read_access_mask).toInt() != 0; + } + + fn isWrite(access_mask: vk.AccessFlags2) bool { + return access_mask.intersect(write_access_mask).toInt() != 0; + } }; pub fn init(self: *GraphicsContext, allocator: std.mem.Allocator, window: *c.SDL_Window) !void { @@ -113,6 +283,7 @@ pub fn init(self: *GraphicsContext, allocator: std.mem.Allocator, window: *c.SDL .p_next = &vk.PhysicalDeviceVulkan13Features{ .dynamic_rendering = vk.TRUE, .synchronization_2 = vk.TRUE, + .maintenance_4 = vk.TRUE, }, .p_queue_create_infos = &queue_config.queue_create_info, .queue_create_info_count = queue_config.queue_count, diff --git a/src/Render2.zig b/src/Render2.zig index 5e012eb..2f5c20f 100644 --- a/src/Render2.zig +++ b/src/Render2.zig @@ -14,76 +14,39 @@ command_pool: GraphicsContext.CommandPool, // NOTE: TEST frame: u32 = 0, -frame_syncs: [MAX_FRAME_LAG]Sync = [1]Sync{.{}} ** MAX_FRAME_LAG, +frame_data: [MAX_FRAME_LAG]FrameData = undefined, pub fn init(assetman: *AssetManager, gc: *GraphicsContext) !Render2 { var self = Render2{ .assetman = assetman, .gc = gc, - .command_pool = try gc.queues.graphics.createCommandPool(.{}), + .command_pool = try gc.queues.graphics.createCommandPool(.{ .reset_command_buffer_bit = true }), }; // NOTE: TEST for (0..MAX_FRAME_LAG) |i| { - self.frame_syncs[i].acquire_swapchain_image = try self.gc.device.createSemaphore(&.{}, null); - self.frame_syncs[i].draw_sema = try self.gc.device.createSemaphore(&.{}, null); - self.frame_syncs[i].draw_fence = try self.gc.device.createFence(&.{ .flags = .{ .signaled_bit = true } }, null); + self.frame_data[i] = try FrameData.init(gc, self.command_pool); } return self; } pub fn draw(self: *Render2) !void { - const sync = &self.frame_syncs[self.frame]; + const frame = &self.frame_data[self.frame]; - try sync.waitForDrawAndReset(self.gc.device); + try frame.waitForDrawAndReset(self.gc.device); // Move this out into a separate func - const swapchain_image_index: u32 = try self.gc.acquireSwapchainImage(sync.acquire_swapchain_image); + const swapchain_image_index: u32 = try self.gc.acquireSwapchainImage(frame.acquire_swapchain_image); - const current_image = self.gc.swapchain_images[swapchain_image_index]; - const current_image_view = try self.gc.device.createImageView(&.{ - .components = .{ .r = .r, .g = .g, .b = .b, .a = .a }, - .format = .r8g8b8a8_unorm, - .view_type = .@"2d", - .subresource_range = .{ - .aspect_mask = .{ .color_bit = true }, - .base_array_layer = 0, - .base_mip_level = 0, - .layer_count = 1, - .level_count = 1, - }, - .image = current_image, - }, null); + var current_image = GraphicsContext.Image{ .handle = self.gc.swapchain_images[swapchain_image_index], .mip_count = 1, .layer_count = 1, .format = .r8g8b8a8_unorm }; + const current_image_view = try current_image.createView(self.gc.device, .{ .color_bit = true }); defer self.gc.device.destroyImageView(current_image_view, null); - const cmds = try self.command_pool.allocateCommandBuffer(); + const cmds = frame.command_buffer; try cmds.beginCommandBuffer(&.{}); { - { - const img_barrier = vk.ImageMemoryBarrier2{ - .image = current_image, - .old_layout = .undefined, - .new_layout = .color_attachment_optimal, - .src_access_mask = .{}, - .dst_access_mask = .{ .color_attachment_write_bit = true }, - .dst_stage_mask = .{ .color_attachment_output_bit = true }, - .src_queue_family_index = self.gc.queues.graphics.family, - .dst_queue_family_index = self.gc.queues.graphics.family, - .subresource_range = .{ - .aspect_mask = .{ .color_bit = true }, - .base_array_layer = 0, - .base_mip_level = 0, - .layer_count = 1, - .level_count = 1, - }, - }; - cmds.pipelineBarrier2(&.{ - .p_image_memory_barriers = &.{img_barrier}, - .image_memory_barrier_count = 1, - }); - } - + try current_image.sync(cmds, .{ .stage_mask = .{ .color_attachment_output_bit = true }, .access_mask = .{ .color_attachment_write_bit = true } }, .attachment_optimal); { cmds.beginRendering(&.{ .render_area = vk.Rect2D{ .offset = .{ .x = 0, .y = 0 }, .extent = self.gc.swapchain_extent }, @@ -95,9 +58,9 @@ pub fn draw(self: *Render2) !void { .clear_value = .{ .color = .{ .float_32 = .{ 0.8, 0.7, 0.6, 1.0 } } }, .load_op = .clear, .store_op = .store, - .image_layout = .color_attachment_optimal, + .image_layout = .attachment_optimal, .image_view = current_image_view, - .resolve_image_layout = .color_attachment_optimal, + .resolve_image_layout = .attachment_optimal, .resolve_mode = .{}, }, }, @@ -122,46 +85,24 @@ pub fn draw(self: *Render2) !void { cmds.draw(3, 1, 0, 0); } - { - const img_barrier = vk.ImageMemoryBarrier2{ - .image = current_image, - .old_layout = .color_attachment_optimal, - .new_layout = .present_src_khr, - .src_access_mask = .{ .color_attachment_write_bit = true }, - .dst_access_mask = .{}, - .src_stage_mask = .{ .color_attachment_output_bit = true }, - .src_queue_family_index = self.gc.queues.graphics.family, - .dst_queue_family_index = self.gc.queues.graphics.family, - .subresource_range = .{ - .aspect_mask = .{ .color_bit = true }, - .base_array_layer = 0, - .base_mip_level = 0, - .layer_count = 1, - .level_count = 1, - }, - }; - cmds.pipelineBarrier2(&.{ - .p_image_memory_barriers = &.{img_barrier}, - .image_memory_barrier_count = 1, - }); - } + try current_image.sync(cmds, .{}, .present_src_khr); } try cmds.endCommandBuffer(); try self.gc.queues.graphics.submit( &GraphicsContext.SubmitInfo{ - .wait_semaphores = &.{sync.acquire_swapchain_image}, - .wait_dst_stage_mask = &.{.{ .transfer_bit = true }}, + .wait_semaphores = &.{frame.acquire_swapchain_image}, + .wait_dst_stage_mask = &.{vk.PipelineStageFlags{}}, .command_buffers = &.{cmds.handle}, - .signal_semaphores = &.{sync.draw_sema}, + .signal_semaphores = &.{frame.draw_sema}, }, - sync.draw_fence, + frame.draw_fence, ); _ = try self.gc.device.queuePresentKHR(self.gc.queues.graphics.handle, &.{ .swapchain_count = 1, .wait_semaphore_count = 1, - .p_wait_semaphores = &.{sync.draw_sema}, + .p_wait_semaphores = &.{frame.draw_sema}, .p_swapchains = &.{self.gc.swapchain}, .p_image_indices = &.{swapchain_image_index}, }); @@ -169,13 +110,29 @@ pub fn draw(self: *Render2) !void { self.frame = (self.frame + 1) % MAX_FRAME_LAG; } -const Sync = struct { - acquire_swapchain_image: vk.Semaphore = .null_handle, - draw_sema: vk.Semaphore = .null_handle, - draw_fence: vk.Fence = .null_handle, +// Per frame stuff +const FrameData = struct { + // Sync + acquire_swapchain_image: vk.Semaphore, + draw_sema: vk.Semaphore, + draw_fence: vk.Fence, - pub fn waitForDrawAndReset(self: *Sync, device: GraphicsContext.Device) !void { + command_buffer: GraphicsContext.CommandBuffer, + + pub fn init(gc: *GraphicsContext, command_pool: GraphicsContext.CommandPool) !FrameData { + return FrameData{ + .acquire_swapchain_image = try gc.device.createSemaphore(&.{}, null), + .draw_sema = try gc.device.createSemaphore(&.{}, null), + .draw_fence = try gc.device.createFence(&.{ .flags = .{ .signaled_bit = true } }, null), + + .command_buffer = try command_pool.allocateCommandBuffer(), + }; + } + + pub fn waitForDrawAndReset(self: *FrameData, device: GraphicsContext.Device) !void { _ = try device.waitForFences(1, &.{self.draw_fence}, vk.TRUE, std.math.maxInt(u64)); try device.resetFences(1, &.{self.draw_fence}); + + try self.command_buffer.resetCommandBuffer(.{ .release_resources_bit = true }); } };