Refactor and simplify per frame mem arena, use vkUpdateBuffer and Device Local memory without HOST_VISIBLE
This commit is contained in:
parent
8cf7df0a90
commit
44bd479bb1
@ -361,6 +361,7 @@ pub fn init(self: *GraphicsContext, allocator: std.mem.Allocator, window: *c.SDL
|
|||||||
|
|
||||||
const physical_devices = try self.instance.enumeratePhysicalDevicesAlloc(fba.allocator());
|
const physical_devices = try self.instance.enumeratePhysicalDevicesAlloc(fba.allocator());
|
||||||
self.device_info = try selectPhysicalDevice(self.instance, self.surface, physical_devices);
|
self.device_info = try selectPhysicalDevice(self.instance, self.surface, physical_devices);
|
||||||
|
std.debug.print("Selected Physical Device: {s}\n", .{@as([]u8, @alignCast(&self.device_info.properties.device_name))});
|
||||||
const queue_config = try selectQueues(self.instance, self.device_info.physical_device);
|
const queue_config = try selectQueues(self.instance, self.device_info.physical_device);
|
||||||
self.memory_config = try selectMemoryPools(self.instance, self.device_info.physical_device);
|
self.memory_config = try selectMemoryPools(self.instance, self.device_info.physical_device);
|
||||||
|
|
||||||
@ -628,6 +629,11 @@ pub const VulkanMemoryType = struct {
|
|||||||
};
|
};
|
||||||
|
|
||||||
const DeviceMemoryConfig = struct {
|
const DeviceMemoryConfig = struct {
|
||||||
|
/// Device Local
|
||||||
|
gpu: VulkanMemoryType = .{},
|
||||||
|
/// Host
|
||||||
|
cpu: VulkanMemoryType = .{},
|
||||||
|
/// PCIe memory
|
||||||
cpu_to_gpu: VulkanMemoryType = .{},
|
cpu_to_gpu: VulkanMemoryType = .{},
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -740,10 +746,23 @@ fn selectMemoryPools(instance: Instance, device: vk.PhysicalDevice) !DeviceMemor
|
|||||||
const mem_props = instance.getPhysicalDeviceMemoryProperties(device);
|
const mem_props = instance.getPhysicalDeviceMemoryProperties(device);
|
||||||
|
|
||||||
var result: DeviceMemoryConfig = .{};
|
var result: DeviceMemoryConfig = .{};
|
||||||
|
var found_gpu = false;
|
||||||
|
var found_cpu_cached = false;
|
||||||
var found_cpu_to_gpu = false;
|
var found_cpu_to_gpu = false;
|
||||||
for (mem_props.memory_types[0..mem_props.memory_type_count], 0..) |mem_type, i| {
|
for (mem_props.memory_types[0..mem_props.memory_type_count], 0..) |mem_type, i| {
|
||||||
|
if (!found_gpu and mem_type.property_flags.device_local_bit and !mem_type.property_flags.host_visible_bit) {
|
||||||
|
found_gpu = true;
|
||||||
|
result.gpu.type_index = @intCast(i);
|
||||||
|
result.gpu.size = mem_props.memory_heaps[mem_type.heap_index].size;
|
||||||
|
}
|
||||||
|
if (!found_cpu_cached and !mem_type.property_flags.device_local_bit and mem_type.property_flags.host_visible_bit and mem_type.property_flags.host_coherent_bit and mem_type.property_flags.host_cached_bit) {
|
||||||
|
// It might also be device local on intel, but oh well
|
||||||
|
found_cpu_cached = true;
|
||||||
|
result.cpu.type_index = @intCast(i);
|
||||||
|
result.cpu.size = mem_props.memory_heaps[mem_type.heap_index].size;
|
||||||
|
}
|
||||||
|
|
||||||
// CPU->GPU Memory, likely a small buffer of 256mb or less
|
// GPU Memory mapped over PCIe, likely a small buffer of 256mb or less. Very slow to write
|
||||||
if (!mem_type.property_flags.host_cached_bit and mem_type.property_flags.contains(.{ .device_local_bit = true, .host_visible_bit = true })) {
|
if (!mem_type.property_flags.host_cached_bit and mem_type.property_flags.contains(.{ .device_local_bit = true, .host_visible_bit = true })) {
|
||||||
found_cpu_to_gpu = true;
|
found_cpu_to_gpu = true;
|
||||||
result.cpu_to_gpu.type_index = @intCast(i);
|
result.cpu_to_gpu.type_index = @intCast(i);
|
||||||
@ -751,7 +770,9 @@ fn selectMemoryPools(instance: Instance, device: vk.PhysicalDevice) !DeviceMemor
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!found_cpu_to_gpu) {
|
std.debug.print("Device Memory Config: {}\n", .{result});
|
||||||
|
|
||||||
|
if (!found_gpu or !found_cpu_cached or !found_cpu_to_gpu) {
|
||||||
return error.UnsupportedMemoryTypes;
|
return error.UnsupportedMemoryTypes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7,6 +7,7 @@ const a = @import("asset_manifest");
|
|||||||
const za = @import("zalgebra");
|
const za = @import("zalgebra");
|
||||||
const Vec3 = za.Vec3;
|
const Vec3 = za.Vec3;
|
||||||
const Mat4 = za.Mat4;
|
const Mat4 = za.Mat4;
|
||||||
|
const common = @import("common.zig");
|
||||||
|
|
||||||
const Render2 = @This();
|
const Render2 = @This();
|
||||||
|
|
||||||
@ -29,7 +30,7 @@ pub const Camera = struct {
|
|||||||
var default_camera: Camera = .{};
|
var default_camera: Camera = .{};
|
||||||
|
|
||||||
const MAX_FRAME_LAG = 3;
|
const MAX_FRAME_LAG = 3;
|
||||||
const PER_FRAME_ARENA_SIZE = 64 * 1024 * 1024; // 64mb TODO: should I handle cases when even 64mb is not available
|
const PER_FRAME_ARENA_SIZE = 64 * common.MB;
|
||||||
|
|
||||||
gc: *GraphicsContext,
|
gc: *GraphicsContext,
|
||||||
shaderman: *ShaderManager,
|
shaderman: *ShaderManager,
|
||||||
@ -227,8 +228,9 @@ pub const VulkanPerFrameArena = struct {
|
|||||||
};
|
};
|
||||||
|
|
||||||
pub fn init(self: *Render2, gc: *GraphicsContext, shaderman: *ShaderManager, assetman: *AssetManager) !void {
|
pub fn init(self: *Render2, gc: *GraphicsContext, shaderman: *ShaderManager, assetman: *AssetManager) !void {
|
||||||
|
// Allocated in device local mem
|
||||||
const per_frame_upload_memory = try gc.device.allocateMemory(&.{
|
const per_frame_upload_memory = try gc.device.allocateMemory(&.{
|
||||||
.memory_type_index = gc.memory_config.cpu_to_gpu.type_index,
|
.memory_type_index = gc.memory_config.gpu.type_index,
|
||||||
.allocation_size = PER_FRAME_ARENA_SIZE,
|
.allocation_size = PER_FRAME_ARENA_SIZE,
|
||||||
}, null);
|
}, null);
|
||||||
|
|
||||||
@ -277,30 +279,24 @@ pub fn draw(self: *Render2) !void {
|
|||||||
self.vulkan_frame_arena.resetFrame(self.frame);
|
self.vulkan_frame_arena.resetFrame(self.frame);
|
||||||
self.vulkan_frame_arena.startFrame(self.gc.device, self.frame);
|
self.vulkan_frame_arena.startFrame(self.gc.device, self.frame);
|
||||||
|
|
||||||
const frame_arena_mem: []u8 = @as([*c]u8, @ptrCast((try device.mapMemory(self.vulkan_frame_arena.memory, 0, self.vulkan_frame_arena.size, .{})).?))[0..self.vulkan_frame_arena.size];
|
|
||||||
|
|
||||||
var global_buffer_addr: u64 = 0;
|
var global_buffer_addr: u64 = 0;
|
||||||
const global_uniform_buffer = try self.createPerFrameBuffer(.{ .uniform_buffer_bit = true }, @sizeOf(GlobalUniform), &global_buffer_addr);
|
const global_uniform_buffer = try self.createPerFrameBuffer(.{ .uniform_buffer_bit = true, .transfer_dst_bit = true }, @sizeOf(GlobalUniform), &global_buffer_addr);
|
||||||
|
|
||||||
{
|
const global_uniform = blk: {
|
||||||
const global_uniform: *align(1) GlobalUniform = std.mem.bytesAsValue(GlobalUniform, frame_arena_mem[global_buffer_addr .. global_buffer_addr + @sizeOf(GlobalUniform)]);
|
const view = self.camera.view_mat;
|
||||||
|
// const fwidth: f32 = @floatFromInt(self.gc.swapchain_extent.width);
|
||||||
|
// const fheight: f32 = @floatFromInt(self.gc.swapchain_extent.height);
|
||||||
|
const projection = self.camera.projection();
|
||||||
|
const view_projection = projection.mul(view);
|
||||||
|
|
||||||
{
|
break :blk GlobalUniform{
|
||||||
const view = self.camera.view_mat;
|
.view = .{
|
||||||
// const fwidth: f32 = @floatFromInt(self.gc.swapchain_extent.width);
|
.world_to_view = view,
|
||||||
// const fheight: f32 = @floatFromInt(self.gc.swapchain_extent.height);
|
.view_to_clip = projection,
|
||||||
const projection = self.camera.projection();
|
.world_to_clip = view_projection,
|
||||||
const view_projection = projection.mul(view);
|
},
|
||||||
|
};
|
||||||
global_uniform.* = .{
|
};
|
||||||
.view = .{
|
|
||||||
.world_to_view = view,
|
|
||||||
.view_to_clip = projection,
|
|
||||||
.world_to_clip = view_projection,
|
|
||||||
},
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Move this out into a separate func
|
// Move this out into a separate func
|
||||||
const swapchain_image_index: u32 = try self.gc.acquireSwapchainImage(frame.acquire_swapchain_image);
|
const swapchain_image_index: u32 = try self.gc.acquireSwapchainImage(frame.acquire_swapchain_image);
|
||||||
@ -313,14 +309,15 @@ pub fn draw(self: *Render2) !void {
|
|||||||
|
|
||||||
try cmds.beginCommandBuffer(&.{});
|
try cmds.beginCommandBuffer(&.{});
|
||||||
{
|
{
|
||||||
|
cmds.updateBuffer(global_uniform_buffer, 0, @sizeOf(GlobalUniform), &global_uniform);
|
||||||
// Transition global uniform buffer
|
// Transition global uniform buffer
|
||||||
cmds.pipelineBarrier2(&vk.DependencyInfo{
|
cmds.pipelineBarrier2(&vk.DependencyInfo{
|
||||||
.buffer_memory_barrier_count = 1,
|
.buffer_memory_barrier_count = 1,
|
||||||
.p_buffer_memory_barriers = &.{
|
.p_buffer_memory_barriers = &.{
|
||||||
vk.BufferMemoryBarrier2{
|
vk.BufferMemoryBarrier2{
|
||||||
.buffer = global_uniform_buffer,
|
.buffer = global_uniform_buffer,
|
||||||
.src_stage_mask = .{ .host_bit = true },
|
.src_stage_mask = .{ .copy_bit = true },
|
||||||
.src_access_mask = .{ .host_write_bit = true },
|
.src_access_mask = .{ .transfer_write_bit = true },
|
||||||
.dst_stage_mask = .{ .vertex_shader_bit = true },
|
.dst_stage_mask = .{ .vertex_shader_bit = true },
|
||||||
.dst_access_mask = .{ .shader_read_bit = true },
|
.dst_access_mask = .{ .shader_read_bit = true },
|
||||||
.offset = 0,
|
.offset = 0,
|
||||||
@ -397,13 +394,6 @@ pub fn draw(self: *Render2) !void {
|
|||||||
}
|
}
|
||||||
try cmds.endCommandBuffer();
|
try cmds.endCommandBuffer();
|
||||||
|
|
||||||
var vulkan_frame_arena_modified_ranges_buf: [2]vk.MappedMemoryRange = undefined;
|
|
||||||
const vulkan_frame_arena_modified_ranges = self.vulkan_frame_arena.getModifiedMemoryRanges(&vulkan_frame_arena_modified_ranges_buf);
|
|
||||||
try device.flushMappedMemoryRanges(@intCast(vulkan_frame_arena_modified_ranges.len), vulkan_frame_arena_modified_ranges.ptr);
|
|
||||||
|
|
||||||
// NOTE: Unmap DEVICE_LOCAL, HOST_VISIBLE memory before submit as it can be slow on Windows (according to Reddit...)
|
|
||||||
device.unmapMemory(self.vulkan_frame_arena.memory);
|
|
||||||
|
|
||||||
try self.gc.queues.graphics.submit(
|
try self.gc.queues.graphics.submit(
|
||||||
&GraphicsContext.SubmitInfo{
|
&GraphicsContext.SubmitInfo{
|
||||||
.wait_semaphores = &.{frame.acquire_swapchain_image},
|
.wait_semaphores = &.{frame.acquire_swapchain_image},
|
||||||
|
3
src/common.zig
Normal file
3
src/common.zig
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
pub const KB = 1024;
|
||||||
|
pub const MB = 1024 * KB;
|
||||||
|
pub const GB = 1024 * MB;
|
Loading…
x
Reference in New Issue
Block a user