Refactor and simplify per frame mem arena, use vkUpdateBuffer and Device Local memory without HOST_VISIBLE

This commit is contained in:
sergeypdev 2024-12-14 00:11:11 +04:00
parent 8cf7df0a90
commit 44bd479bb1
3 changed files with 48 additions and 34 deletions

View File

@ -361,6 +361,7 @@ pub fn init(self: *GraphicsContext, allocator: std.mem.Allocator, window: *c.SDL
const physical_devices = try self.instance.enumeratePhysicalDevicesAlloc(fba.allocator()); const physical_devices = try self.instance.enumeratePhysicalDevicesAlloc(fba.allocator());
self.device_info = try selectPhysicalDevice(self.instance, self.surface, physical_devices); self.device_info = try selectPhysicalDevice(self.instance, self.surface, physical_devices);
std.debug.print("Selected Physical Device: {s}\n", .{@as([]u8, @alignCast(&self.device_info.properties.device_name))});
const queue_config = try selectQueues(self.instance, self.device_info.physical_device); const queue_config = try selectQueues(self.instance, self.device_info.physical_device);
self.memory_config = try selectMemoryPools(self.instance, self.device_info.physical_device); self.memory_config = try selectMemoryPools(self.instance, self.device_info.physical_device);
@ -628,6 +629,11 @@ pub const VulkanMemoryType = struct {
}; };
const DeviceMemoryConfig = struct { const DeviceMemoryConfig = struct {
/// Device Local
gpu: VulkanMemoryType = .{},
/// Host
cpu: VulkanMemoryType = .{},
/// PCIe memory
cpu_to_gpu: VulkanMemoryType = .{}, cpu_to_gpu: VulkanMemoryType = .{},
}; };
@ -740,10 +746,23 @@ fn selectMemoryPools(instance: Instance, device: vk.PhysicalDevice) !DeviceMemor
const mem_props = instance.getPhysicalDeviceMemoryProperties(device); const mem_props = instance.getPhysicalDeviceMemoryProperties(device);
var result: DeviceMemoryConfig = .{}; var result: DeviceMemoryConfig = .{};
var found_gpu = false;
var found_cpu_cached = false;
var found_cpu_to_gpu = false; var found_cpu_to_gpu = false;
for (mem_props.memory_types[0..mem_props.memory_type_count], 0..) |mem_type, i| { for (mem_props.memory_types[0..mem_props.memory_type_count], 0..) |mem_type, i| {
if (!found_gpu and mem_type.property_flags.device_local_bit and !mem_type.property_flags.host_visible_bit) {
found_gpu = true;
result.gpu.type_index = @intCast(i);
result.gpu.size = mem_props.memory_heaps[mem_type.heap_index].size;
}
if (!found_cpu_cached and !mem_type.property_flags.device_local_bit and mem_type.property_flags.host_visible_bit and mem_type.property_flags.host_coherent_bit and mem_type.property_flags.host_cached_bit) {
// It might also be device local on intel, but oh well
found_cpu_cached = true;
result.cpu.type_index = @intCast(i);
result.cpu.size = mem_props.memory_heaps[mem_type.heap_index].size;
}
// CPU->GPU Memory, likely a small buffer of 256mb or less // GPU Memory mapped over PCIe, likely a small buffer of 256mb or less. Very slow to write
if (!mem_type.property_flags.host_cached_bit and mem_type.property_flags.contains(.{ .device_local_bit = true, .host_visible_bit = true })) { if (!mem_type.property_flags.host_cached_bit and mem_type.property_flags.contains(.{ .device_local_bit = true, .host_visible_bit = true })) {
found_cpu_to_gpu = true; found_cpu_to_gpu = true;
result.cpu_to_gpu.type_index = @intCast(i); result.cpu_to_gpu.type_index = @intCast(i);
@ -751,7 +770,9 @@ fn selectMemoryPools(instance: Instance, device: vk.PhysicalDevice) !DeviceMemor
} }
} }
if (!found_cpu_to_gpu) { std.debug.print("Device Memory Config: {}\n", .{result});
if (!found_gpu or !found_cpu_cached or !found_cpu_to_gpu) {
return error.UnsupportedMemoryTypes; return error.UnsupportedMemoryTypes;
} }

View File

@ -7,6 +7,7 @@ const a = @import("asset_manifest");
const za = @import("zalgebra"); const za = @import("zalgebra");
const Vec3 = za.Vec3; const Vec3 = za.Vec3;
const Mat4 = za.Mat4; const Mat4 = za.Mat4;
const common = @import("common.zig");
const Render2 = @This(); const Render2 = @This();
@ -29,7 +30,7 @@ pub const Camera = struct {
var default_camera: Camera = .{}; var default_camera: Camera = .{};
const MAX_FRAME_LAG = 3; const MAX_FRAME_LAG = 3;
const PER_FRAME_ARENA_SIZE = 64 * 1024 * 1024; // 64mb TODO: should I handle cases when even 64mb is not available const PER_FRAME_ARENA_SIZE = 64 * common.MB;
gc: *GraphicsContext, gc: *GraphicsContext,
shaderman: *ShaderManager, shaderman: *ShaderManager,
@ -227,8 +228,9 @@ pub const VulkanPerFrameArena = struct {
}; };
pub fn init(self: *Render2, gc: *GraphicsContext, shaderman: *ShaderManager, assetman: *AssetManager) !void { pub fn init(self: *Render2, gc: *GraphicsContext, shaderman: *ShaderManager, assetman: *AssetManager) !void {
// Allocated in device local mem
const per_frame_upload_memory = try gc.device.allocateMemory(&.{ const per_frame_upload_memory = try gc.device.allocateMemory(&.{
.memory_type_index = gc.memory_config.cpu_to_gpu.type_index, .memory_type_index = gc.memory_config.gpu.type_index,
.allocation_size = PER_FRAME_ARENA_SIZE, .allocation_size = PER_FRAME_ARENA_SIZE,
}, null); }, null);
@ -277,30 +279,24 @@ pub fn draw(self: *Render2) !void {
self.vulkan_frame_arena.resetFrame(self.frame); self.vulkan_frame_arena.resetFrame(self.frame);
self.vulkan_frame_arena.startFrame(self.gc.device, self.frame); self.vulkan_frame_arena.startFrame(self.gc.device, self.frame);
const frame_arena_mem: []u8 = @as([*c]u8, @ptrCast((try device.mapMemory(self.vulkan_frame_arena.memory, 0, self.vulkan_frame_arena.size, .{})).?))[0..self.vulkan_frame_arena.size];
var global_buffer_addr: u64 = 0; var global_buffer_addr: u64 = 0;
const global_uniform_buffer = try self.createPerFrameBuffer(.{ .uniform_buffer_bit = true }, @sizeOf(GlobalUniform), &global_buffer_addr); const global_uniform_buffer = try self.createPerFrameBuffer(.{ .uniform_buffer_bit = true, .transfer_dst_bit = true }, @sizeOf(GlobalUniform), &global_buffer_addr);
{ const global_uniform = blk: {
const global_uniform: *align(1) GlobalUniform = std.mem.bytesAsValue(GlobalUniform, frame_arena_mem[global_buffer_addr .. global_buffer_addr + @sizeOf(GlobalUniform)]); const view = self.camera.view_mat;
// const fwidth: f32 = @floatFromInt(self.gc.swapchain_extent.width);
// const fheight: f32 = @floatFromInt(self.gc.swapchain_extent.height);
const projection = self.camera.projection();
const view_projection = projection.mul(view);
{ break :blk GlobalUniform{
const view = self.camera.view_mat; .view = .{
// const fwidth: f32 = @floatFromInt(self.gc.swapchain_extent.width); .world_to_view = view,
// const fheight: f32 = @floatFromInt(self.gc.swapchain_extent.height); .view_to_clip = projection,
const projection = self.camera.projection(); .world_to_clip = view_projection,
const view_projection = projection.mul(view); },
};
global_uniform.* = .{ };
.view = .{
.world_to_view = view,
.view_to_clip = projection,
.world_to_clip = view_projection,
},
};
}
}
// Move this out into a separate func // Move this out into a separate func
const swapchain_image_index: u32 = try self.gc.acquireSwapchainImage(frame.acquire_swapchain_image); const swapchain_image_index: u32 = try self.gc.acquireSwapchainImage(frame.acquire_swapchain_image);
@ -313,14 +309,15 @@ pub fn draw(self: *Render2) !void {
try cmds.beginCommandBuffer(&.{}); try cmds.beginCommandBuffer(&.{});
{ {
cmds.updateBuffer(global_uniform_buffer, 0, @sizeOf(GlobalUniform), &global_uniform);
// Transition global uniform buffer // Transition global uniform buffer
cmds.pipelineBarrier2(&vk.DependencyInfo{ cmds.pipelineBarrier2(&vk.DependencyInfo{
.buffer_memory_barrier_count = 1, .buffer_memory_barrier_count = 1,
.p_buffer_memory_barriers = &.{ .p_buffer_memory_barriers = &.{
vk.BufferMemoryBarrier2{ vk.BufferMemoryBarrier2{
.buffer = global_uniform_buffer, .buffer = global_uniform_buffer,
.src_stage_mask = .{ .host_bit = true }, .src_stage_mask = .{ .copy_bit = true },
.src_access_mask = .{ .host_write_bit = true }, .src_access_mask = .{ .transfer_write_bit = true },
.dst_stage_mask = .{ .vertex_shader_bit = true }, .dst_stage_mask = .{ .vertex_shader_bit = true },
.dst_access_mask = .{ .shader_read_bit = true }, .dst_access_mask = .{ .shader_read_bit = true },
.offset = 0, .offset = 0,
@ -397,13 +394,6 @@ pub fn draw(self: *Render2) !void {
} }
try cmds.endCommandBuffer(); try cmds.endCommandBuffer();
var vulkan_frame_arena_modified_ranges_buf: [2]vk.MappedMemoryRange = undefined;
const vulkan_frame_arena_modified_ranges = self.vulkan_frame_arena.getModifiedMemoryRanges(&vulkan_frame_arena_modified_ranges_buf);
try device.flushMappedMemoryRanges(@intCast(vulkan_frame_arena_modified_ranges.len), vulkan_frame_arena_modified_ranges.ptr);
// NOTE: Unmap DEVICE_LOCAL, HOST_VISIBLE memory before submit as it can be slow on Windows (according to Reddit...)
device.unmapMemory(self.vulkan_frame_arena.memory);
try self.gc.queues.graphics.submit( try self.gc.queues.graphics.submit(
&GraphicsContext.SubmitInfo{ &GraphicsContext.SubmitInfo{
.wait_semaphores = &.{frame.acquire_swapchain_image}, .wait_semaphores = &.{frame.acquire_swapchain_image},

3
src/common.zig Normal file
View File

@ -0,0 +1,3 @@
pub const KB = 1024;
pub const MB = 1024 * KB;
pub const GB = 1024 * MB;