Refactor and simplify per frame mem arena, use vkUpdateBuffer and Device Local memory without HOST_VISIBLE

This commit is contained in:
sergeypdev 2024-12-14 00:11:11 +04:00
parent 8cf7df0a90
commit 44bd479bb1
3 changed files with 48 additions and 34 deletions

View File

@ -361,6 +361,7 @@ pub fn init(self: *GraphicsContext, allocator: std.mem.Allocator, window: *c.SDL
const physical_devices = try self.instance.enumeratePhysicalDevicesAlloc(fba.allocator());
self.device_info = try selectPhysicalDevice(self.instance, self.surface, physical_devices);
std.debug.print("Selected Physical Device: {s}\n", .{@as([]u8, @alignCast(&self.device_info.properties.device_name))});
const queue_config = try selectQueues(self.instance, self.device_info.physical_device);
self.memory_config = try selectMemoryPools(self.instance, self.device_info.physical_device);
@ -628,6 +629,11 @@ pub const VulkanMemoryType = struct {
};
const DeviceMemoryConfig = struct {
/// Device Local
gpu: VulkanMemoryType = .{},
/// Host
cpu: VulkanMemoryType = .{},
/// PCIe memory
cpu_to_gpu: VulkanMemoryType = .{},
};
@ -740,10 +746,23 @@ fn selectMemoryPools(instance: Instance, device: vk.PhysicalDevice) !DeviceMemor
const mem_props = instance.getPhysicalDeviceMemoryProperties(device);
var result: DeviceMemoryConfig = .{};
var found_gpu = false;
var found_cpu_cached = false;
var found_cpu_to_gpu = false;
for (mem_props.memory_types[0..mem_props.memory_type_count], 0..) |mem_type, i| {
if (!found_gpu and mem_type.property_flags.device_local_bit and !mem_type.property_flags.host_visible_bit) {
found_gpu = true;
result.gpu.type_index = @intCast(i);
result.gpu.size = mem_props.memory_heaps[mem_type.heap_index].size;
}
if (!found_cpu_cached and !mem_type.property_flags.device_local_bit and mem_type.property_flags.host_visible_bit and mem_type.property_flags.host_coherent_bit and mem_type.property_flags.host_cached_bit) {
// It might also be device local on intel, but oh well
found_cpu_cached = true;
result.cpu.type_index = @intCast(i);
result.cpu.size = mem_props.memory_heaps[mem_type.heap_index].size;
}
// CPU->GPU Memory, likely a small buffer of 256mb or less
// GPU Memory mapped over PCIe, likely a small buffer of 256mb or less. Very slow to write
if (!mem_type.property_flags.host_cached_bit and mem_type.property_flags.contains(.{ .device_local_bit = true, .host_visible_bit = true })) {
found_cpu_to_gpu = true;
result.cpu_to_gpu.type_index = @intCast(i);
@ -751,7 +770,9 @@ fn selectMemoryPools(instance: Instance, device: vk.PhysicalDevice) !DeviceMemor
}
}
if (!found_cpu_to_gpu) {
std.debug.print("Device Memory Config: {}\n", .{result});
if (!found_gpu or !found_cpu_cached or !found_cpu_to_gpu) {
return error.UnsupportedMemoryTypes;
}

View File

@ -7,6 +7,7 @@ const a = @import("asset_manifest");
const za = @import("zalgebra");
const Vec3 = za.Vec3;
const Mat4 = za.Mat4;
const common = @import("common.zig");
const Render2 = @This();
@ -29,7 +30,7 @@ pub const Camera = struct {
var default_camera: Camera = .{};
const MAX_FRAME_LAG = 3;
const PER_FRAME_ARENA_SIZE = 64 * 1024 * 1024; // 64mb TODO: should I handle cases when even 64mb is not available
const PER_FRAME_ARENA_SIZE = 64 * common.MB;
gc: *GraphicsContext,
shaderman: *ShaderManager,
@ -227,8 +228,9 @@ pub const VulkanPerFrameArena = struct {
};
pub fn init(self: *Render2, gc: *GraphicsContext, shaderman: *ShaderManager, assetman: *AssetManager) !void {
// Allocated in device local mem
const per_frame_upload_memory = try gc.device.allocateMemory(&.{
.memory_type_index = gc.memory_config.cpu_to_gpu.type_index,
.memory_type_index = gc.memory_config.gpu.type_index,
.allocation_size = PER_FRAME_ARENA_SIZE,
}, null);
@ -277,30 +279,24 @@ pub fn draw(self: *Render2) !void {
self.vulkan_frame_arena.resetFrame(self.frame);
self.vulkan_frame_arena.startFrame(self.gc.device, self.frame);
const frame_arena_mem: []u8 = @as([*c]u8, @ptrCast((try device.mapMemory(self.vulkan_frame_arena.memory, 0, self.vulkan_frame_arena.size, .{})).?))[0..self.vulkan_frame_arena.size];
var global_buffer_addr: u64 = 0;
const global_uniform_buffer = try self.createPerFrameBuffer(.{ .uniform_buffer_bit = true }, @sizeOf(GlobalUniform), &global_buffer_addr);
const global_uniform_buffer = try self.createPerFrameBuffer(.{ .uniform_buffer_bit = true, .transfer_dst_bit = true }, @sizeOf(GlobalUniform), &global_buffer_addr);
{
const global_uniform: *align(1) GlobalUniform = std.mem.bytesAsValue(GlobalUniform, frame_arena_mem[global_buffer_addr .. global_buffer_addr + @sizeOf(GlobalUniform)]);
const global_uniform = blk: {
const view = self.camera.view_mat;
// const fwidth: f32 = @floatFromInt(self.gc.swapchain_extent.width);
// const fheight: f32 = @floatFromInt(self.gc.swapchain_extent.height);
const projection = self.camera.projection();
const view_projection = projection.mul(view);
{
const view = self.camera.view_mat;
// const fwidth: f32 = @floatFromInt(self.gc.swapchain_extent.width);
// const fheight: f32 = @floatFromInt(self.gc.swapchain_extent.height);
const projection = self.camera.projection();
const view_projection = projection.mul(view);
global_uniform.* = .{
.view = .{
.world_to_view = view,
.view_to_clip = projection,
.world_to_clip = view_projection,
},
};
}
}
break :blk GlobalUniform{
.view = .{
.world_to_view = view,
.view_to_clip = projection,
.world_to_clip = view_projection,
},
};
};
// Move this out into a separate func
const swapchain_image_index: u32 = try self.gc.acquireSwapchainImage(frame.acquire_swapchain_image);
@ -313,14 +309,15 @@ pub fn draw(self: *Render2) !void {
try cmds.beginCommandBuffer(&.{});
{
cmds.updateBuffer(global_uniform_buffer, 0, @sizeOf(GlobalUniform), &global_uniform);
// Transition global uniform buffer
cmds.pipelineBarrier2(&vk.DependencyInfo{
.buffer_memory_barrier_count = 1,
.p_buffer_memory_barriers = &.{
vk.BufferMemoryBarrier2{
.buffer = global_uniform_buffer,
.src_stage_mask = .{ .host_bit = true },
.src_access_mask = .{ .host_write_bit = true },
.src_stage_mask = .{ .copy_bit = true },
.src_access_mask = .{ .transfer_write_bit = true },
.dst_stage_mask = .{ .vertex_shader_bit = true },
.dst_access_mask = .{ .shader_read_bit = true },
.offset = 0,
@ -397,13 +394,6 @@ pub fn draw(self: *Render2) !void {
}
try cmds.endCommandBuffer();
var vulkan_frame_arena_modified_ranges_buf: [2]vk.MappedMemoryRange = undefined;
const vulkan_frame_arena_modified_ranges = self.vulkan_frame_arena.getModifiedMemoryRanges(&vulkan_frame_arena_modified_ranges_buf);
try device.flushMappedMemoryRanges(@intCast(vulkan_frame_arena_modified_ranges.len), vulkan_frame_arena_modified_ranges.ptr);
// NOTE: Unmap DEVICE_LOCAL, HOST_VISIBLE memory before submit as it can be slow on Windows (according to Reddit...)
device.unmapMemory(self.vulkan_frame_arena.memory);
try self.gc.queues.graphics.submit(
&GraphicsContext.SubmitInfo{
.wait_semaphores = &.{frame.acquire_swapchain_image},

3
src/common.zig Normal file
View File

@ -0,0 +1,3 @@
pub const KB = 1024;
pub const MB = 1024 * KB;
pub const GB = 1024 * MB;