Add sync abstraction to automate memory barriers

This commit is contained in:
sergeypdev 2024-09-29 14:59:38 +04:00
parent fe69fa1f51
commit c77c00cfe1
3 changed files with 212 additions and 87 deletions

View File

@ -1571,11 +1571,8 @@ fn freeAsset(self: *AssetManager, asset: *LoadedAsset) void {
self.allocator.free(shader.source); self.allocator.free(shader.source);
}, },
.shaderProgram => |*program| { .shaderProgram => |*program| {
self.gc.queues.graphics.mu.lock(); // NOTE: We use maintenance4 extension, no need to wait for pipeline
defer self.gc.queues.graphics.mu.unlock(); // usage to end
self.gc.device.queueWaitIdle(self.gc.queues.graphics.handle) catch @panic("Wait Idle failed");
self.gc.device.destroyPipeline(program.pipeline, null); self.gc.device.destroyPipeline(program.pipeline, null);
}, },
.texture => |*texture| { .texture => |*texture| {

View File

@ -55,6 +55,176 @@ pub const CommandPool = struct {
}, &cmd_bufs); }, &cmd_bufs);
return CommandBuffer.init(cmd_bufs[0], self.device.wrapper); return CommandBuffer.init(cmd_bufs[0], self.device.wrapper);
} }
pub fn freeCommandBuffer(self: *const CommandPool, command_buffer: CommandBuffer) void {
self.device.freeCommandBuffers(self.handle, 1, &.{command_buffer.handle});
}
};
// Simple sync barrier tracking without a render graph
// https://vulkan.org/user/pages/09.events/vulkanised-2024/vulkanised-2024-grigory-dzhavadyan.pdf
pub const SyncBarrierMasks = struct {
access_mask: vk.AccessFlags2 = .{},
stage_mask: vk.PipelineStageFlags2 = .{},
};
pub const Image = struct {
handle: vk.Image,
mip_count: u32,
layer_count: u32,
format: vk.Format,
last_writer: SyncBarrierMasks = .{},
per_stage_readers: vk.PipelineStageFlags2 = .{},
/// Current layout
layout: vk.ImageLayout = .undefined,
pub fn createView(self: *const Image, device: Device, aspect_mask: vk.ImageAspectFlags) !vk.ImageView {
return device.createImageView(&vk.ImageViewCreateInfo{
.format = self.format,
.components = .{ .r = .r, .g = .g, .b = .b, .a = .a },
.image = self.handle,
.view_type = .@"2d",
.subresource_range = .{
.aspect_mask = aspect_mask,
.base_array_layer = 0,
.layer_count = self.layer_count,
.base_mip_level = 0,
.level_count = self.mip_count,
},
}, null);
}
pub fn sync(self: *Image, cmds: CommandBuffer, masks: SyncBarrierMasks, layout: vk.ImageLayout) !void {
const is_read = isRead(masks.access_mask);
const is_write = layout != self.layout or isWrite(masks.access_mask);
// Read only
if (is_read and !is_write) {
if (self.last_writer.access_mask.toInt() != 0) {
if (!self.per_stage_readers.contains(masks.stage_mask)) {
// These stages haven't seen previous writes yet
const new_stages = masks.stage_mask.subtract(self.per_stage_readers);
self.per_stage_readers = masks.stage_mask.merge(self.per_stage_readers);
const barrier = vk.ImageMemoryBarrier2{
.image = self.handle,
.old_layout = self.layout,
.new_layout = layout,
.src_stage_mask = self.last_writer.stage_mask,
.dst_stage_mask = new_stages,
.src_access_mask = self.last_writer.access_mask,
.dst_access_mask = masks.access_mask,
.src_queue_family_index = 0,
.dst_queue_family_index = 0,
.subresource_range = .{
.base_mip_level = 0,
.base_array_layer = 0,
.layer_count = self.layer_count,
.level_count = self.mip_count,
.aspect_mask = .{ .color_bit = true },
},
};
cmds.pipelineBarrier2(&vk.DependencyInfo{
.image_memory_barrier_count = 1,
.p_image_memory_barriers = &.{barrier},
});
}
} else {
self.per_stage_readers = self.per_stage_readers.merge(masks.stage_mask);
}
} else if (is_write) {
// If there are any reads
const earlier_stages = self.last_writer.stage_mask.merge(self.per_stage_readers);
if (earlier_stages.toInt() != 0 or self.layout != layout) {
// Emit barrier for earlier stages and last writer mask
const barrier = vk.ImageMemoryBarrier2{
.image = self.handle,
.old_layout = self.layout,
.new_layout = layout,
.src_stage_mask = earlier_stages,
.dst_stage_mask = masks.stage_mask,
.src_access_mask = self.last_writer.access_mask,
.dst_access_mask = masks.access_mask,
.src_queue_family_index = 0,
.dst_queue_family_index = 0,
.subresource_range = .{
.base_mip_level = 0,
.base_array_layer = 0,
.layer_count = self.layer_count,
.level_count = self.mip_count,
.aspect_mask = .{ .color_bit = true },
},
};
cmds.pipelineBarrier2(&vk.DependencyInfo{
.image_memory_barrier_count = 1,
.p_image_memory_barriers = &.{barrier},
});
}
self.last_writer = masks;
self.per_stage_readers = .{};
self.layout = layout;
}
}
const read_access_mask = vk.AccessFlags2{
.indirect_command_read_bit = true,
.index_read_bit = true,
.vertex_attribute_read_bit = true,
.uniform_read_bit = true,
.input_attachment_read_bit = true,
.shader_read_bit = true,
.color_attachment_read_bit = true,
.depth_stencil_attachment_read_bit = true,
.transfer_read_bit = true,
.host_read_bit = true,
.memory_read_bit = true,
.command_preprocess_read_bit_nv = true,
.color_attachment_read_noncoherent_bit_ext = true,
.conditional_rendering_read_bit_ext = true,
.acceleration_structure_read_bit_khr = true,
.fragment_shading_rate_attachment_read_bit_khr = true,
.fragment_density_map_read_bit_ext = true,
.transform_feedback_counter_read_bit_ext = true,
.shader_sampled_read_bit = true,
.shader_storage_read_bit = true,
.video_decode_read_bit_khr = true,
.video_encode_read_bit_khr = true,
.invocation_mask_read_bit_huawei = true,
.shader_binding_table_read_bit_khr = true,
.descriptor_buffer_read_bit_ext = true,
.optical_flow_read_bit_nv = true,
.micromap_read_bit_ext = true,
};
const write_access_mask = vk.AccessFlags2{
.shader_write_bit = true,
.color_attachment_write_bit = true,
.depth_stencil_attachment_write_bit = true,
.transfer_write_bit = true,
.host_write_bit = true,
.memory_write_bit = true,
.command_preprocess_write_bit_nv = true,
.acceleration_structure_write_bit_khr = true,
.transform_feedback_write_bit_ext = true,
.transform_feedback_counter_write_bit_ext = true,
.shader_storage_write_bit = true,
.video_decode_write_bit_khr = true,
.video_encode_write_bit_khr = true,
.optical_flow_write_bit_nv = true,
.micromap_write_bit_ext = true,
};
fn isRead(access_mask: vk.AccessFlags2) bool {
return access_mask.intersect(read_access_mask).toInt() != 0;
}
fn isWrite(access_mask: vk.AccessFlags2) bool {
return access_mask.intersect(write_access_mask).toInt() != 0;
}
}; };
pub fn init(self: *GraphicsContext, allocator: std.mem.Allocator, window: *c.SDL_Window) !void { pub fn init(self: *GraphicsContext, allocator: std.mem.Allocator, window: *c.SDL_Window) !void {
@ -113,6 +283,7 @@ pub fn init(self: *GraphicsContext, allocator: std.mem.Allocator, window: *c.SDL
.p_next = &vk.PhysicalDeviceVulkan13Features{ .p_next = &vk.PhysicalDeviceVulkan13Features{
.dynamic_rendering = vk.TRUE, .dynamic_rendering = vk.TRUE,
.synchronization_2 = vk.TRUE, .synchronization_2 = vk.TRUE,
.maintenance_4 = vk.TRUE,
}, },
.p_queue_create_infos = &queue_config.queue_create_info, .p_queue_create_infos = &queue_config.queue_create_info,
.queue_create_info_count = queue_config.queue_count, .queue_create_info_count = queue_config.queue_count,

View File

@ -14,76 +14,39 @@ command_pool: GraphicsContext.CommandPool,
// NOTE: TEST // NOTE: TEST
frame: u32 = 0, frame: u32 = 0,
frame_syncs: [MAX_FRAME_LAG]Sync = [1]Sync{.{}} ** MAX_FRAME_LAG, frame_data: [MAX_FRAME_LAG]FrameData = undefined,
pub fn init(assetman: *AssetManager, gc: *GraphicsContext) !Render2 { pub fn init(assetman: *AssetManager, gc: *GraphicsContext) !Render2 {
var self = Render2{ var self = Render2{
.assetman = assetman, .assetman = assetman,
.gc = gc, .gc = gc,
.command_pool = try gc.queues.graphics.createCommandPool(.{}), .command_pool = try gc.queues.graphics.createCommandPool(.{ .reset_command_buffer_bit = true }),
}; };
// NOTE: TEST // NOTE: TEST
for (0..MAX_FRAME_LAG) |i| { for (0..MAX_FRAME_LAG) |i| {
self.frame_syncs[i].acquire_swapchain_image = try self.gc.device.createSemaphore(&.{}, null); self.frame_data[i] = try FrameData.init(gc, self.command_pool);
self.frame_syncs[i].draw_sema = try self.gc.device.createSemaphore(&.{}, null);
self.frame_syncs[i].draw_fence = try self.gc.device.createFence(&.{ .flags = .{ .signaled_bit = true } }, null);
} }
return self; return self;
} }
pub fn draw(self: *Render2) !void { pub fn draw(self: *Render2) !void {
const sync = &self.frame_syncs[self.frame]; const frame = &self.frame_data[self.frame];
try sync.waitForDrawAndReset(self.gc.device); try frame.waitForDrawAndReset(self.gc.device);
// Move this out into a separate func // Move this out into a separate func
const swapchain_image_index: u32 = try self.gc.acquireSwapchainImage(sync.acquire_swapchain_image); const swapchain_image_index: u32 = try self.gc.acquireSwapchainImage(frame.acquire_swapchain_image);
const current_image = self.gc.swapchain_images[swapchain_image_index]; var current_image = GraphicsContext.Image{ .handle = self.gc.swapchain_images[swapchain_image_index], .mip_count = 1, .layer_count = 1, .format = .r8g8b8a8_unorm };
const current_image_view = try self.gc.device.createImageView(&.{ const current_image_view = try current_image.createView(self.gc.device, .{ .color_bit = true });
.components = .{ .r = .r, .g = .g, .b = .b, .a = .a },
.format = .r8g8b8a8_unorm,
.view_type = .@"2d",
.subresource_range = .{
.aspect_mask = .{ .color_bit = true },
.base_array_layer = 0,
.base_mip_level = 0,
.layer_count = 1,
.level_count = 1,
},
.image = current_image,
}, null);
defer self.gc.device.destroyImageView(current_image_view, null); defer self.gc.device.destroyImageView(current_image_view, null);
const cmds = try self.command_pool.allocateCommandBuffer(); const cmds = frame.command_buffer;
try cmds.beginCommandBuffer(&.{}); try cmds.beginCommandBuffer(&.{});
{ {
{ try current_image.sync(cmds, .{ .stage_mask = .{ .color_attachment_output_bit = true }, .access_mask = .{ .color_attachment_write_bit = true } }, .attachment_optimal);
const img_barrier = vk.ImageMemoryBarrier2{
.image = current_image,
.old_layout = .undefined,
.new_layout = .color_attachment_optimal,
.src_access_mask = .{},
.dst_access_mask = .{ .color_attachment_write_bit = true },
.dst_stage_mask = .{ .color_attachment_output_bit = true },
.src_queue_family_index = self.gc.queues.graphics.family,
.dst_queue_family_index = self.gc.queues.graphics.family,
.subresource_range = .{
.aspect_mask = .{ .color_bit = true },
.base_array_layer = 0,
.base_mip_level = 0,
.layer_count = 1,
.level_count = 1,
},
};
cmds.pipelineBarrier2(&.{
.p_image_memory_barriers = &.{img_barrier},
.image_memory_barrier_count = 1,
});
}
{ {
cmds.beginRendering(&.{ cmds.beginRendering(&.{
.render_area = vk.Rect2D{ .offset = .{ .x = 0, .y = 0 }, .extent = self.gc.swapchain_extent }, .render_area = vk.Rect2D{ .offset = .{ .x = 0, .y = 0 }, .extent = self.gc.swapchain_extent },
@ -95,9 +58,9 @@ pub fn draw(self: *Render2) !void {
.clear_value = .{ .color = .{ .float_32 = .{ 0.8, 0.7, 0.6, 1.0 } } }, .clear_value = .{ .color = .{ .float_32 = .{ 0.8, 0.7, 0.6, 1.0 } } },
.load_op = .clear, .load_op = .clear,
.store_op = .store, .store_op = .store,
.image_layout = .color_attachment_optimal, .image_layout = .attachment_optimal,
.image_view = current_image_view, .image_view = current_image_view,
.resolve_image_layout = .color_attachment_optimal, .resolve_image_layout = .attachment_optimal,
.resolve_mode = .{}, .resolve_mode = .{},
}, },
}, },
@ -122,46 +85,24 @@ pub fn draw(self: *Render2) !void {
cmds.draw(3, 1, 0, 0); cmds.draw(3, 1, 0, 0);
} }
{ try current_image.sync(cmds, .{}, .present_src_khr);
const img_barrier = vk.ImageMemoryBarrier2{
.image = current_image,
.old_layout = .color_attachment_optimal,
.new_layout = .present_src_khr,
.src_access_mask = .{ .color_attachment_write_bit = true },
.dst_access_mask = .{},
.src_stage_mask = .{ .color_attachment_output_bit = true },
.src_queue_family_index = self.gc.queues.graphics.family,
.dst_queue_family_index = self.gc.queues.graphics.family,
.subresource_range = .{
.aspect_mask = .{ .color_bit = true },
.base_array_layer = 0,
.base_mip_level = 0,
.layer_count = 1,
.level_count = 1,
},
};
cmds.pipelineBarrier2(&.{
.p_image_memory_barriers = &.{img_barrier},
.image_memory_barrier_count = 1,
});
}
} }
try cmds.endCommandBuffer(); try cmds.endCommandBuffer();
try self.gc.queues.graphics.submit( try self.gc.queues.graphics.submit(
&GraphicsContext.SubmitInfo{ &GraphicsContext.SubmitInfo{
.wait_semaphores = &.{sync.acquire_swapchain_image}, .wait_semaphores = &.{frame.acquire_swapchain_image},
.wait_dst_stage_mask = &.{.{ .transfer_bit = true }}, .wait_dst_stage_mask = &.{vk.PipelineStageFlags{}},
.command_buffers = &.{cmds.handle}, .command_buffers = &.{cmds.handle},
.signal_semaphores = &.{sync.draw_sema}, .signal_semaphores = &.{frame.draw_sema},
}, },
sync.draw_fence, frame.draw_fence,
); );
_ = try self.gc.device.queuePresentKHR(self.gc.queues.graphics.handle, &.{ _ = try self.gc.device.queuePresentKHR(self.gc.queues.graphics.handle, &.{
.swapchain_count = 1, .swapchain_count = 1,
.wait_semaphore_count = 1, .wait_semaphore_count = 1,
.p_wait_semaphores = &.{sync.draw_sema}, .p_wait_semaphores = &.{frame.draw_sema},
.p_swapchains = &.{self.gc.swapchain}, .p_swapchains = &.{self.gc.swapchain},
.p_image_indices = &.{swapchain_image_index}, .p_image_indices = &.{swapchain_image_index},
}); });
@ -169,13 +110,29 @@ pub fn draw(self: *Render2) !void {
self.frame = (self.frame + 1) % MAX_FRAME_LAG; self.frame = (self.frame + 1) % MAX_FRAME_LAG;
} }
const Sync = struct { // Per frame stuff
acquire_swapchain_image: vk.Semaphore = .null_handle, const FrameData = struct {
draw_sema: vk.Semaphore = .null_handle, // Sync
draw_fence: vk.Fence = .null_handle, acquire_swapchain_image: vk.Semaphore,
draw_sema: vk.Semaphore,
draw_fence: vk.Fence,
pub fn waitForDrawAndReset(self: *Sync, device: GraphicsContext.Device) !void { command_buffer: GraphicsContext.CommandBuffer,
pub fn init(gc: *GraphicsContext, command_pool: GraphicsContext.CommandPool) !FrameData {
return FrameData{
.acquire_swapchain_image = try gc.device.createSemaphore(&.{}, null),
.draw_sema = try gc.device.createSemaphore(&.{}, null),
.draw_fence = try gc.device.createFence(&.{ .flags = .{ .signaled_bit = true } }, null),
.command_buffer = try command_pool.allocateCommandBuffer(),
};
}
pub fn waitForDrawAndReset(self: *FrameData, device: GraphicsContext.Device) !void {
_ = try device.waitForFences(1, &.{self.draw_fence}, vk.TRUE, std.math.maxInt(u64)); _ = try device.waitForFences(1, &.{self.draw_fence}, vk.TRUE, std.math.maxInt(u64));
try device.resetFences(1, &.{self.draw_fence}); try device.resetFences(1, &.{self.draw_fence});
try self.command_buffer.resetCommandBuffer(.{ .release_resources_bit = true });
} }
}; };