From 44bd479bb1692ca214e0b2f962dfd47701a73c95 Mon Sep 17 00:00:00 2001 From: sergeypdev Date: Sat, 14 Dec 2024 00:11:11 +0400 Subject: [PATCH] Refactor and simplify per frame mem arena, use vkUpdateBuffer and Device Local memory without HOST_VISIBLE --- src/GraphicsContext.zig | 25 +++++++++++++++++-- src/Render2.zig | 54 +++++++++++++++++------------------------ src/common.zig | 3 +++ 3 files changed, 48 insertions(+), 34 deletions(-) create mode 100644 src/common.zig diff --git a/src/GraphicsContext.zig b/src/GraphicsContext.zig index 50989b4..a5fd3a1 100644 --- a/src/GraphicsContext.zig +++ b/src/GraphicsContext.zig @@ -361,6 +361,7 @@ pub fn init(self: *GraphicsContext, allocator: std.mem.Allocator, window: *c.SDL const physical_devices = try self.instance.enumeratePhysicalDevicesAlloc(fba.allocator()); self.device_info = try selectPhysicalDevice(self.instance, self.surface, physical_devices); + std.debug.print("Selected Physical Device: {s}\n", .{@as([]u8, @alignCast(&self.device_info.properties.device_name))}); const queue_config = try selectQueues(self.instance, self.device_info.physical_device); self.memory_config = try selectMemoryPools(self.instance, self.device_info.physical_device); @@ -628,6 +629,11 @@ pub const VulkanMemoryType = struct { }; const DeviceMemoryConfig = struct { + /// Device Local + gpu: VulkanMemoryType = .{}, + /// Host + cpu: VulkanMemoryType = .{}, + /// PCIe memory cpu_to_gpu: VulkanMemoryType = .{}, }; @@ -740,10 +746,23 @@ fn selectMemoryPools(instance: Instance, device: vk.PhysicalDevice) !DeviceMemor const mem_props = instance.getPhysicalDeviceMemoryProperties(device); var result: DeviceMemoryConfig = .{}; + var found_gpu = false; + var found_cpu_cached = false; var found_cpu_to_gpu = false; for (mem_props.memory_types[0..mem_props.memory_type_count], 0..) |mem_type, i| { + if (!found_gpu and mem_type.property_flags.device_local_bit and !mem_type.property_flags.host_visible_bit) { + found_gpu = true; + result.gpu.type_index = @intCast(i); + result.gpu.size = mem_props.memory_heaps[mem_type.heap_index].size; + } + if (!found_cpu_cached and !mem_type.property_flags.device_local_bit and mem_type.property_flags.host_visible_bit and mem_type.property_flags.host_coherent_bit and mem_type.property_flags.host_cached_bit) { + // It might also be device local on intel, but oh well + found_cpu_cached = true; + result.cpu.type_index = @intCast(i); + result.cpu.size = mem_props.memory_heaps[mem_type.heap_index].size; + } - // CPU->GPU Memory, likely a small buffer of 256mb or less + // GPU Memory mapped over PCIe, likely a small buffer of 256mb or less. Very slow to write if (!mem_type.property_flags.host_cached_bit and mem_type.property_flags.contains(.{ .device_local_bit = true, .host_visible_bit = true })) { found_cpu_to_gpu = true; result.cpu_to_gpu.type_index = @intCast(i); @@ -751,7 +770,9 @@ fn selectMemoryPools(instance: Instance, device: vk.PhysicalDevice) !DeviceMemor } } - if (!found_cpu_to_gpu) { + std.debug.print("Device Memory Config: {}\n", .{result}); + + if (!found_gpu or !found_cpu_cached or !found_cpu_to_gpu) { return error.UnsupportedMemoryTypes; } diff --git a/src/Render2.zig b/src/Render2.zig index 0271cb1..9cefed4 100644 --- a/src/Render2.zig +++ b/src/Render2.zig @@ -7,6 +7,7 @@ const a = @import("asset_manifest"); const za = @import("zalgebra"); const Vec3 = za.Vec3; const Mat4 = za.Mat4; +const common = @import("common.zig"); const Render2 = @This(); @@ -29,7 +30,7 @@ pub const Camera = struct { var default_camera: Camera = .{}; const MAX_FRAME_LAG = 3; -const PER_FRAME_ARENA_SIZE = 64 * 1024 * 1024; // 64mb TODO: should I handle cases when even 64mb is not available +const PER_FRAME_ARENA_SIZE = 64 * common.MB; gc: *GraphicsContext, shaderman: *ShaderManager, @@ -227,8 +228,9 @@ pub const VulkanPerFrameArena = struct { }; pub fn init(self: *Render2, gc: *GraphicsContext, shaderman: *ShaderManager, assetman: *AssetManager) !void { + // Allocated in device local mem const per_frame_upload_memory = try gc.device.allocateMemory(&.{ - .memory_type_index = gc.memory_config.cpu_to_gpu.type_index, + .memory_type_index = gc.memory_config.gpu.type_index, .allocation_size = PER_FRAME_ARENA_SIZE, }, null); @@ -277,30 +279,24 @@ pub fn draw(self: *Render2) !void { self.vulkan_frame_arena.resetFrame(self.frame); self.vulkan_frame_arena.startFrame(self.gc.device, self.frame); - const frame_arena_mem: []u8 = @as([*c]u8, @ptrCast((try device.mapMemory(self.vulkan_frame_arena.memory, 0, self.vulkan_frame_arena.size, .{})).?))[0..self.vulkan_frame_arena.size]; - var global_buffer_addr: u64 = 0; - const global_uniform_buffer = try self.createPerFrameBuffer(.{ .uniform_buffer_bit = true }, @sizeOf(GlobalUniform), &global_buffer_addr); + const global_uniform_buffer = try self.createPerFrameBuffer(.{ .uniform_buffer_bit = true, .transfer_dst_bit = true }, @sizeOf(GlobalUniform), &global_buffer_addr); - { - const global_uniform: *align(1) GlobalUniform = std.mem.bytesAsValue(GlobalUniform, frame_arena_mem[global_buffer_addr .. global_buffer_addr + @sizeOf(GlobalUniform)]); + const global_uniform = blk: { + const view = self.camera.view_mat; + // const fwidth: f32 = @floatFromInt(self.gc.swapchain_extent.width); + // const fheight: f32 = @floatFromInt(self.gc.swapchain_extent.height); + const projection = self.camera.projection(); + const view_projection = projection.mul(view); - { - const view = self.camera.view_mat; - // const fwidth: f32 = @floatFromInt(self.gc.swapchain_extent.width); - // const fheight: f32 = @floatFromInt(self.gc.swapchain_extent.height); - const projection = self.camera.projection(); - const view_projection = projection.mul(view); - - global_uniform.* = .{ - .view = .{ - .world_to_view = view, - .view_to_clip = projection, - .world_to_clip = view_projection, - }, - }; - } - } + break :blk GlobalUniform{ + .view = .{ + .world_to_view = view, + .view_to_clip = projection, + .world_to_clip = view_projection, + }, + }; + }; // Move this out into a separate func const swapchain_image_index: u32 = try self.gc.acquireSwapchainImage(frame.acquire_swapchain_image); @@ -313,14 +309,15 @@ pub fn draw(self: *Render2) !void { try cmds.beginCommandBuffer(&.{}); { + cmds.updateBuffer(global_uniform_buffer, 0, @sizeOf(GlobalUniform), &global_uniform); // Transition global uniform buffer cmds.pipelineBarrier2(&vk.DependencyInfo{ .buffer_memory_barrier_count = 1, .p_buffer_memory_barriers = &.{ vk.BufferMemoryBarrier2{ .buffer = global_uniform_buffer, - .src_stage_mask = .{ .host_bit = true }, - .src_access_mask = .{ .host_write_bit = true }, + .src_stage_mask = .{ .copy_bit = true }, + .src_access_mask = .{ .transfer_write_bit = true }, .dst_stage_mask = .{ .vertex_shader_bit = true }, .dst_access_mask = .{ .shader_read_bit = true }, .offset = 0, @@ -397,13 +394,6 @@ pub fn draw(self: *Render2) !void { } try cmds.endCommandBuffer(); - var vulkan_frame_arena_modified_ranges_buf: [2]vk.MappedMemoryRange = undefined; - const vulkan_frame_arena_modified_ranges = self.vulkan_frame_arena.getModifiedMemoryRanges(&vulkan_frame_arena_modified_ranges_buf); - try device.flushMappedMemoryRanges(@intCast(vulkan_frame_arena_modified_ranges.len), vulkan_frame_arena_modified_ranges.ptr); - - // NOTE: Unmap DEVICE_LOCAL, HOST_VISIBLE memory before submit as it can be slow on Windows (according to Reddit...) - device.unmapMemory(self.vulkan_frame_arena.memory); - try self.gc.queues.graphics.submit( &GraphicsContext.SubmitInfo{ .wait_semaphores = &.{frame.acquire_swapchain_image}, diff --git a/src/common.zig b/src/common.zig new file mode 100644 index 0000000..d362343 --- /dev/null +++ b/src/common.zig @@ -0,0 +1,3 @@ +pub const KB = 1024; +pub const MB = 1024 * KB; +pub const GB = 1024 * MB;