- Ditch VMA for per frame data - Add basic global descriptor set and all the boilerplate to manage that - Add global uniform buffer that only has camera matrices right now - Implement a per frame GPU memory arena, one large buffer that wraps around holding data for all frames in flight - Get free look camera working again
504 lines
19 KiB
Zig
504 lines
19 KiB
Zig
const std = @import("std");
|
|
const GraphicsContext = @import("GraphicsContext.zig");
|
|
const AssetManager = @import("AssetManager.zig");
|
|
const ShaderManager = @import("ShaderManager.zig");
|
|
const vk = @import("vk");
|
|
const a = @import("asset_manifest");
|
|
const za = @import("zalgebra");
|
|
const Vec3 = za.Vec3;
|
|
const Mat4 = za.Mat4;
|
|
|
|
const Render2 = @This();
|
|
|
|
// TODO: support ortho
|
|
pub const Camera = struct {
|
|
pos: Vec3 = Vec3.zero(),
|
|
|
|
fovy: f32 = 60,
|
|
aspect: f32 = 1,
|
|
near: f32 = 0.1,
|
|
far: f32 = 10,
|
|
|
|
view_mat: Mat4 = Mat4.identity(),
|
|
|
|
pub fn projection(self: *const Camera) Mat4 {
|
|
return za.perspective(self.fovy, self.aspect, self.near, self.far);
|
|
}
|
|
};
|
|
|
|
var default_camera: Camera = .{};
|
|
|
|
const MAX_FRAME_LAG = 3;
|
|
const PER_FRAME_ARENA_SIZE = 64 * 1024 * 1024; // 64mb TODO: should I handle cases when even 64mb is not available
|
|
|
|
gc: *GraphicsContext,
|
|
shaderman: *ShaderManager,
|
|
assetman: *AssetManager,
|
|
command_pool: GraphicsContext.CommandPool,
|
|
vulkan_frame_arena: VulkanPerFrameArena,
|
|
camera: *Camera = &default_camera,
|
|
|
|
frame: u32 = 0,
|
|
frame_data: [MAX_FRAME_LAG]FrameData = undefined,
|
|
|
|
// Ring buffer/arena for per frame data
|
|
pub const VulkanPerFrameArena = struct {
|
|
const Self = @This();
|
|
|
|
pub const FrameRegion = struct {
|
|
start: u64 = 0,
|
|
end: u64 = 0,
|
|
|
|
pub fn init(start: u64, end: u64) FrameRegion {
|
|
return FrameRegion{ .start = start, .end = end };
|
|
}
|
|
|
|
// If region is wrapping (end < start), returns 2 non wrapping regions
|
|
pub fn unwrap(self: *const FrameRegion, len: u64, out_non_wrapping_regions: []FrameRegion) []FrameRegion {
|
|
std.debug.assert(out_non_wrapping_regions.len >= 2);
|
|
|
|
if (self.end < self.start) {
|
|
out_non_wrapping_regions[0].start = self.start;
|
|
out_non_wrapping_regions[0].end = len;
|
|
out_non_wrapping_regions[1].start = 0;
|
|
out_non_wrapping_regions[1].end = self.end;
|
|
return out_non_wrapping_regions[0..2];
|
|
} else {
|
|
out_non_wrapping_regions[0] = self.*;
|
|
return out_non_wrapping_regions[0..1];
|
|
}
|
|
}
|
|
|
|
pub fn intersectsNonWrapping(self: *const FrameRegion, other: *const FrameRegion) bool {
|
|
return !(other.start > self.end or self.start > other.end);
|
|
}
|
|
|
|
pub fn intersectsWrapping(self: *const FrameRegion, other: *const FrameRegion, len: u64) bool {
|
|
var buf_a: [2]FrameRegion = undefined;
|
|
var buf_b: [2]FrameRegion = undefined;
|
|
const non_wrapping_regions_a = self.unwrap(len, &buf_a);
|
|
const non_wrapping_regions_b = other.unwrap(len, &buf_b);
|
|
|
|
for (non_wrapping_regions_a) |region_a| {
|
|
for (non_wrapping_regions_b) |region_b| {
|
|
if (region_a.intersectsNonWrapping(®ion_b)) {
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
};
|
|
|
|
memory: vk.DeviceMemory,
|
|
size: u64,
|
|
tail: u64 = 0,
|
|
frame: u32 = 0,
|
|
|
|
// Tracks where the start offset for each frame is,
|
|
// Allocations will fail if you
|
|
// NOTE: bug in zig? Tried to use [MAX_FRAME_LAG]?u64 here, but optional checks pass even when value is null, wtf??
|
|
frame_regions: [MAX_FRAME_LAG]?FrameRegion = [_]?FrameRegion{null} ** MAX_FRAME_LAG,
|
|
|
|
// Tracking allocated resources per frame, unfortunately have to wait for frame to finish before we can destroy them :(
|
|
buffers: [MAX_FRAME_LAG][1024]vk.Buffer = undefined,
|
|
buffer_counts: [MAX_FRAME_LAG]u16 = [_]u16{0} ** MAX_FRAME_LAG,
|
|
|
|
pub fn init(memory: vk.DeviceMemory, size: u64) Self {
|
|
return Self{
|
|
.memory = memory,
|
|
.size = size,
|
|
};
|
|
}
|
|
|
|
pub fn startFrame(self: *VulkanPerFrameArena, device: GraphicsContext.Device, frame_index: u32) void {
|
|
// TODO: tail pointer should be aligned to nonCoherentAtomSize to avoid accidentally flushing memory being used by previous frames
|
|
// if we end up allocating right up until the previous frame's head
|
|
// Record start position of this frame
|
|
if (self.frame_regions[self.frame]) |*cur_region| {
|
|
cur_region.end = self.tail;
|
|
}
|
|
self.frame = frame_index;
|
|
self.frame_regions[self.frame] = FrameRegion.init(self.tail, self.tail);
|
|
|
|
for (self.buffers[self.frame][0..self.buffer_counts[self.frame]]) |buf| {
|
|
device.destroyBuffer(buf, null);
|
|
}
|
|
self.buffer_counts[self.frame] = 0;
|
|
}
|
|
|
|
// Caller guarantees that memory from given frame can be safely stomped, buffers destroyed etc.
|
|
pub fn resetFrame(self: *VulkanPerFrameArena, frame_index: u32) void {
|
|
self.frame_regions[frame_index] = null;
|
|
}
|
|
|
|
pub fn getModifiedMemoryRanges(self: *VulkanPerFrameArena, out_ranges: []vk.MappedMemoryRange) []const vk.MappedMemoryRange {
|
|
std.debug.assert(out_ranges.len >= 2);
|
|
std.debug.assert(self.frame_regions[self.frame] != null);
|
|
|
|
const region = self.frame_regions[self.frame].?;
|
|
|
|
// We wrapped, use two regions
|
|
if (self.tail < region.start) {
|
|
out_ranges[0] = vk.MappedMemoryRange{
|
|
.memory = self.memory,
|
|
.offset = region.start,
|
|
.size = self.size - region.start,
|
|
};
|
|
out_ranges[1] = vk.MappedMemoryRange{
|
|
.memory = self.memory,
|
|
.offset = 0,
|
|
.size = self.tail,
|
|
};
|
|
|
|
return out_ranges[0..];
|
|
} else {
|
|
out_ranges[0] = vk.MappedMemoryRange{
|
|
.memory = self.memory,
|
|
.offset = region.start,
|
|
.size = self.tail - region.start,
|
|
};
|
|
|
|
return out_ranges[0..1];
|
|
}
|
|
}
|
|
|
|
// Finds offset where memory can be put, handles wrapping, doesn't handle inter-frame stomping
|
|
fn findSlotOptimistic(self: *const Self, size: u64, alignment: u64) !u64 {
|
|
const offset = std.mem.alignForward(u64, self.tail, alignment);
|
|
|
|
if (offset + size <= self.size) {
|
|
return offset;
|
|
} else if (size <= self.size) {
|
|
return 0;
|
|
} else {
|
|
return error.OutOfMemory;
|
|
}
|
|
}
|
|
|
|
fn findSlotChecked(self: *const Self, size: u64, alignment: u64) !u64 {
|
|
const next_frame = (self.frame + 1) % MAX_FRAME_LAG;
|
|
const offset = try self.findSlotOptimistic(size, alignment);
|
|
|
|
if (self.frame_regions[next_frame]) |next_frame_region| {
|
|
const allocated_region = FrameRegion.init(offset, offset + size);
|
|
|
|
if (next_frame_region.intersectsWrapping(&allocated_region, self.size)) {
|
|
return error.OverlapsPreviousFrame;
|
|
}
|
|
}
|
|
|
|
return offset;
|
|
}
|
|
|
|
pub fn allocate(self: *Self, size: u64, alignment: u64) !u64 {
|
|
const offset = try self.findSlotChecked(size, alignment);
|
|
|
|
self.tail = offset + size;
|
|
|
|
return offset;
|
|
}
|
|
|
|
pub fn createBufferRaw(self: *Self, device: GraphicsContext.Device, usage: vk.BufferUsageFlags, size: u64, out_addr: *u64) !vk.Buffer {
|
|
// NOTE: Allocating buffers just in time, hopefully vulkan impl is smart about allocation here and not doing new each time...
|
|
const buffer = try device.createBuffer(&vk.BufferCreateInfo{
|
|
.flags = .{},
|
|
.usage = usage,
|
|
.size = size,
|
|
.sharing_mode = .exclusive,
|
|
}, null);
|
|
errdefer device.destroyBuffer(buffer, null);
|
|
const mem_reqs = device.getBufferMemoryRequirements(buffer);
|
|
|
|
out_addr.* = try self.allocate(mem_reqs.size, mem_reqs.alignment);
|
|
|
|
try device.bindBufferMemory(buffer, self.memory, out_addr.*);
|
|
|
|
self.buffers[self.frame][self.buffer_counts[self.frame]] = buffer;
|
|
self.buffer_counts[self.frame] += 1;
|
|
|
|
return buffer;
|
|
}
|
|
|
|
pub fn reset(self: *Self) void {
|
|
self.head = 0;
|
|
}
|
|
};
|
|
|
|
pub fn init(self: *Render2, gc: *GraphicsContext, shaderman: *ShaderManager, assetman: *AssetManager) !void {
|
|
const per_frame_upload_memory = try gc.device.allocateMemory(&.{
|
|
.memory_type_index = gc.memory_config.cpu_to_gpu.type_index,
|
|
.allocation_size = PER_FRAME_ARENA_SIZE,
|
|
}, null);
|
|
|
|
self.* = Render2{
|
|
.gc = gc,
|
|
.shaderman = shaderman,
|
|
.assetman = assetman,
|
|
.command_pool = try gc.queues.graphics.createCommandPool(.{ .reset_command_buffer_bit = true }),
|
|
.vulkan_frame_arena = VulkanPerFrameArena.init(per_frame_upload_memory, PER_FRAME_ARENA_SIZE),
|
|
};
|
|
errdefer self.command_pool.deinit();
|
|
|
|
// NOTE: TEST
|
|
for (0..MAX_FRAME_LAG) |i| {
|
|
self.frame_data[i] = try FrameData.init(gc, self.command_pool);
|
|
}
|
|
}
|
|
|
|
fn createPerFrameBuffer(self: *Render2, usage: vk.BufferUsageFlags, size: u64, out_addr: *u64) !vk.Buffer {
|
|
while (true) {
|
|
if (self.vulkan_frame_arena.createBufferRaw(self.gc.device, usage, size, out_addr)) |buffer| {
|
|
return buffer;
|
|
} else |err| switch (err) {
|
|
error.OverlapsPreviousFrame => {
|
|
const overlapped_frame = (self.frame + 1) % MAX_FRAME_LAG;
|
|
|
|
std.debug.print("Vulkan Frame Allocator Overlapped frame {}, waiting for it to finish...", .{overlapped_frame});
|
|
|
|
try self.frame_data[overlapped_frame].waitForDrawAndReset(self.gc.device);
|
|
self.vulkan_frame_arena.resetFrame(overlapped_frame);
|
|
},
|
|
else => return err,
|
|
}
|
|
}
|
|
}
|
|
|
|
fn frameAllocMemReqs(self: *Render2, mem_reqs: vk.MemoryRequirements) !u64 {
|
|
return self.frameAlloc(mem_reqs.size, mem_reqs.alignment);
|
|
}
|
|
|
|
pub fn draw(self: *Render2) !void {
|
|
const device = self.gc.device;
|
|
const frame = &self.frame_data[self.frame];
|
|
|
|
try frame.waitForDrawAndReset(self.gc.device);
|
|
self.vulkan_frame_arena.resetFrame(self.frame);
|
|
self.vulkan_frame_arena.startFrame(self.gc.device, self.frame);
|
|
|
|
const frame_arena_mem: []u8 = @as([*c]u8, @ptrCast((try device.mapMemory(self.vulkan_frame_arena.memory, 0, self.vulkan_frame_arena.size, .{})).?))[0..self.vulkan_frame_arena.size];
|
|
|
|
var global_buffer_addr: u64 = 0;
|
|
const global_uniform_buffer = try self.createPerFrameBuffer(.{ .uniform_buffer_bit = true }, @sizeOf(GlobalUniform), &global_buffer_addr);
|
|
|
|
{
|
|
const global_uniform: *align(1) GlobalUniform = std.mem.bytesAsValue(GlobalUniform, frame_arena_mem[global_buffer_addr .. global_buffer_addr + @sizeOf(GlobalUniform)]);
|
|
|
|
{
|
|
const view = self.camera.view_mat;
|
|
// const fwidth: f32 = @floatFromInt(self.gc.swapchain_extent.width);
|
|
// const fheight: f32 = @floatFromInt(self.gc.swapchain_extent.height);
|
|
const projection = self.camera.projection();
|
|
const view_projection = projection.mul(view);
|
|
|
|
global_uniform.* = .{
|
|
.view = .{
|
|
.world_to_view = view,
|
|
.view_to_clip = projection,
|
|
.world_to_clip = view_projection,
|
|
},
|
|
};
|
|
}
|
|
}
|
|
|
|
// Move this out into a separate func
|
|
const swapchain_image_index: u32 = try self.gc.acquireSwapchainImage(frame.acquire_swapchain_image);
|
|
|
|
var current_image = GraphicsContext.Image{ .handle = self.gc.swapchain_images[swapchain_image_index], .mip_count = 1, .layer_count = 1, .format = .r8g8b8a8_unorm };
|
|
const current_image_view = try current_image.createView(self.gc.device, .{ .color_bit = true });
|
|
defer self.gc.device.destroyImageView(current_image_view, null);
|
|
|
|
const cmds = frame.command_buffer;
|
|
|
|
try cmds.beginCommandBuffer(&.{});
|
|
{
|
|
// Transition global uniform buffer
|
|
cmds.pipelineBarrier2(&vk.DependencyInfo{
|
|
.buffer_memory_barrier_count = 1,
|
|
.p_buffer_memory_barriers = &.{
|
|
vk.BufferMemoryBarrier2{
|
|
.buffer = global_uniform_buffer,
|
|
.src_stage_mask = .{ .host_bit = true },
|
|
.src_access_mask = .{ .host_write_bit = true },
|
|
.dst_stage_mask = .{ .vertex_shader_bit = true },
|
|
.dst_access_mask = .{ .shader_read_bit = true },
|
|
.offset = 0,
|
|
.size = @sizeOf(GlobalUniform),
|
|
.src_queue_family_index = vk.QUEUE_FAMILY_IGNORED,
|
|
.dst_queue_family_index = vk.QUEUE_FAMILY_IGNORED,
|
|
},
|
|
},
|
|
});
|
|
|
|
const global_descriptor_set = try frame.allocateDescriptorSet(device, self.shaderman.descriptor_set_layouts.global);
|
|
device.updateDescriptorSets(1, &.{
|
|
vk.WriteDescriptorSet{
|
|
.dst_set = global_descriptor_set,
|
|
.dst_binding = 0,
|
|
.dst_array_element = 0,
|
|
.descriptor_type = .uniform_buffer,
|
|
.descriptor_count = 1,
|
|
.p_buffer_info = &.{
|
|
vk.DescriptorBufferInfo{
|
|
.buffer = global_uniform_buffer,
|
|
.offset = 0,
|
|
.range = @sizeOf(GlobalUniform),
|
|
},
|
|
},
|
|
.p_image_info = &[_]vk.DescriptorImageInfo{},
|
|
.p_texel_buffer_view = &[_]vk.BufferView{},
|
|
},
|
|
}, 0, null);
|
|
|
|
try current_image.sync(cmds, .{ .stage_mask = .{ .color_attachment_output_bit = true }, .access_mask = .{ .color_attachment_write_bit = true } }, .attachment_optimal);
|
|
{
|
|
cmds.beginRendering(&.{
|
|
.render_area = vk.Rect2D{ .offset = .{ .x = 0, .y = 0 }, .extent = self.gc.swapchain_extent },
|
|
.layer_count = 1,
|
|
.view_mask = 0,
|
|
.color_attachment_count = 1,
|
|
.p_color_attachments = &.{
|
|
vk.RenderingAttachmentInfo{
|
|
.clear_value = .{ .color = .{ .float_32 = .{ 0.8, 0.7, 0.6, 1.0 } } },
|
|
.load_op = .clear,
|
|
.store_op = .store,
|
|
.image_layout = .attachment_optimal,
|
|
.image_view = current_image_view,
|
|
.resolve_image_layout = .attachment_optimal,
|
|
.resolve_mode = .{},
|
|
},
|
|
},
|
|
});
|
|
defer cmds.endRendering();
|
|
|
|
const triangle = self.assetman.resolveShaderProgram(a.ShaderPrograms.shaders.triangle);
|
|
|
|
cmds.bindPipeline(.graphics, triangle.pipeline);
|
|
cmds.bindDescriptorSets(.graphics, triangle.layout, 0, 1, &.{global_descriptor_set}, 0, null);
|
|
|
|
cmds.setViewportWithCount(1, &.{vk.Viewport{
|
|
.x = 0,
|
|
.y = 0,
|
|
.width = @floatFromInt(self.gc.swapchain_extent.width),
|
|
.height = @floatFromInt(self.gc.swapchain_extent.height),
|
|
.min_depth = 0,
|
|
.max_depth = 1,
|
|
}});
|
|
cmds.setScissorWithCount(1, &.{vk.Rect2D{
|
|
.offset = .{ .x = 0, .y = 0 },
|
|
.extent = self.gc.swapchain_extent,
|
|
}});
|
|
|
|
cmds.draw(3, 1, 0, 0);
|
|
}
|
|
|
|
try current_image.sync(cmds, .{}, .present_src_khr);
|
|
}
|
|
try cmds.endCommandBuffer();
|
|
|
|
var vulkan_frame_arena_modified_ranges_buf: [2]vk.MappedMemoryRange = undefined;
|
|
const vulkan_frame_arena_modified_ranges = self.vulkan_frame_arena.getModifiedMemoryRanges(&vulkan_frame_arena_modified_ranges_buf);
|
|
try device.flushMappedMemoryRanges(@intCast(vulkan_frame_arena_modified_ranges.len), vulkan_frame_arena_modified_ranges.ptr);
|
|
|
|
// NOTE: Unmap DEVICE_LOCAL, HOST_VISIBLE memory before submit as it can be slow on Windows (according to Reddit...)
|
|
device.unmapMemory(self.vulkan_frame_arena.memory);
|
|
|
|
try self.gc.queues.graphics.submit(
|
|
&GraphicsContext.SubmitInfo{
|
|
.wait_semaphores = &.{frame.acquire_swapchain_image},
|
|
.wait_dst_stage_mask = &.{vk.PipelineStageFlags{}},
|
|
.command_buffers = &.{cmds.handle},
|
|
.signal_semaphores = &.{frame.draw_sema},
|
|
},
|
|
frame.draw_fence,
|
|
);
|
|
|
|
_ = try self.gc.device.queuePresentKHR(self.gc.queues.graphics.handle, &.{
|
|
.swapchain_count = 1,
|
|
.wait_semaphore_count = 1,
|
|
.p_wait_semaphores = &.{frame.draw_sema},
|
|
.p_swapchains = &.{self.gc.swapchain},
|
|
.p_image_indices = &.{swapchain_image_index},
|
|
});
|
|
|
|
self.frame = (self.frame + 1) % MAX_FRAME_LAG;
|
|
}
|
|
|
|
fn uploadData(self: *Render2, cmds: GraphicsContext.CommandBuffer, dst: GraphicsContext.Buffer, dst_offset: usize, len: usize) !void {
|
|
cmds.copyBuffer2(&.{
|
|
.src_buffer = self.upload_buffer.handle,
|
|
.dst_buffer = dst.handle,
|
|
.p_regions = &.{
|
|
vk.BufferCopy2{
|
|
.src_offset = self.upload_buffer_cursor,
|
|
.dst_offset = dst_offset,
|
|
.size = len,
|
|
},
|
|
},
|
|
});
|
|
self.upload_buffer_cursor += len;
|
|
}
|
|
|
|
// Per frame stuff
|
|
const FrameData = struct {
|
|
// Sync
|
|
acquire_swapchain_image: vk.Semaphore,
|
|
draw_sema: vk.Semaphore,
|
|
draw_fence: vk.Fence,
|
|
|
|
command_buffer: GraphicsContext.CommandBuffer,
|
|
descriptor_pool: vk.DescriptorPool = .null_handle,
|
|
|
|
pub fn init(gc: *GraphicsContext, command_pool: GraphicsContext.CommandPool) !FrameData {
|
|
return FrameData{
|
|
.acquire_swapchain_image = try gc.device.createSemaphore(&.{}, null),
|
|
.draw_sema = try gc.device.createSemaphore(&.{}, null),
|
|
.draw_fence = try gc.device.createFence(&.{ .flags = .{ .signaled_bit = true } }, null),
|
|
|
|
.command_buffer = try command_pool.allocateCommandBuffer(),
|
|
.descriptor_pool = try gc.device.createDescriptorPool(&vk.DescriptorPoolCreateInfo{
|
|
.max_sets = 1024,
|
|
.p_pool_sizes = &.{
|
|
vk.DescriptorPoolSize{
|
|
.type = .uniform_buffer,
|
|
.descriptor_count = 8,
|
|
},
|
|
},
|
|
.pool_size_count = 1,
|
|
}, null),
|
|
|
|
// TODO: maybe cache memory requirements?
|
|
};
|
|
}
|
|
|
|
pub fn allocateDescriptorSet(self: *FrameData, device: GraphicsContext.Device, layout: vk.DescriptorSetLayout) !vk.DescriptorSet {
|
|
var result: [1]vk.DescriptorSet = .{.null_handle};
|
|
try device.allocateDescriptorSets(&vk.DescriptorSetAllocateInfo{
|
|
.descriptor_pool = self.descriptor_pool,
|
|
.descriptor_set_count = 1,
|
|
.p_set_layouts = &.{layout},
|
|
}, &result);
|
|
return result[0];
|
|
}
|
|
|
|
pub fn waitForDrawAndReset(self: *FrameData, device: GraphicsContext.Device) !void {
|
|
_ = try device.waitForFences(1, &.{self.draw_fence}, vk.TRUE, std.math.maxInt(u64));
|
|
try device.resetFences(1, &.{self.draw_fence});
|
|
|
|
try self.command_buffer.resetCommandBuffer(.{ .release_resources_bit = true });
|
|
|
|
try device.resetDescriptorPool(self.descriptor_pool, .{});
|
|
}
|
|
};
|
|
|
|
const GlobalUniform = extern struct {
|
|
pub const View = extern struct {
|
|
world_to_clip: Mat4,
|
|
view_to_clip: Mat4,
|
|
world_to_view: Mat4,
|
|
};
|
|
|
|
view: View,
|
|
};
|