const std = @import("std"); const GraphicsContext = @import("GraphicsContext.zig"); const AssetManager = @import("AssetManager.zig"); const ShaderManager = @import("ShaderManager.zig"); const vk = @import("vk"); const a = @import("asset_manifest"); const za = @import("zalgebra"); const Vec3 = za.Vec3; const Mat4 = za.Mat4; const Render2 = @This(); // TODO: support ortho pub const Camera = struct { pos: Vec3 = Vec3.zero(), fovy: f32 = 60, aspect: f32 = 1, near: f32 = 0.1, far: f32 = 10, view_mat: Mat4 = Mat4.identity(), pub fn projection(self: *const Camera) Mat4 { return za.perspective(self.fovy, self.aspect, self.near, self.far); } }; var default_camera: Camera = .{}; const MAX_FRAME_LAG = 3; const PER_FRAME_ARENA_SIZE = 64 * 1024 * 1024; // 64mb TODO: should I handle cases when even 64mb is not available gc: *GraphicsContext, shaderman: *ShaderManager, assetman: *AssetManager, command_pool: GraphicsContext.CommandPool, vulkan_frame_arena: VulkanPerFrameArena, camera: *Camera = &default_camera, frame: u32 = 0, frame_data: [MAX_FRAME_LAG]FrameData = undefined, // Ring buffer/arena for per frame data pub const VulkanPerFrameArena = struct { const Self = @This(); pub const FrameRegion = struct { start: u64 = 0, end: u64 = 0, pub fn init(start: u64, end: u64) FrameRegion { return FrameRegion{ .start = start, .end = end }; } // If region is wrapping (end < start), returns 2 non wrapping regions pub fn unwrap(self: *const FrameRegion, len: u64, out_non_wrapping_regions: []FrameRegion) []FrameRegion { std.debug.assert(out_non_wrapping_regions.len >= 2); if (self.end < self.start) { out_non_wrapping_regions[0].start = self.start; out_non_wrapping_regions[0].end = len; out_non_wrapping_regions[1].start = 0; out_non_wrapping_regions[1].end = self.end; return out_non_wrapping_regions[0..2]; } else { out_non_wrapping_regions[0] = self.*; return out_non_wrapping_regions[0..1]; } } pub fn intersectsNonWrapping(self: *const FrameRegion, other: *const FrameRegion) bool { return !(other.start > self.end or self.start > other.end); } pub fn intersectsWrapping(self: *const FrameRegion, other: *const FrameRegion, len: u64) bool { var buf_a: [2]FrameRegion = undefined; var buf_b: [2]FrameRegion = undefined; const non_wrapping_regions_a = self.unwrap(len, &buf_a); const non_wrapping_regions_b = other.unwrap(len, &buf_b); for (non_wrapping_regions_a) |region_a| { for (non_wrapping_regions_b) |region_b| { if (region_a.intersectsNonWrapping(®ion_b)) { return true; } } } return false; } }; memory: vk.DeviceMemory, size: u64, tail: u64 = 0, frame: u32 = 0, // Tracks where the start offset for each frame is, // Allocations will fail if you // NOTE: bug in zig? Tried to use [MAX_FRAME_LAG]?u64 here, but optional checks pass even when value is null, wtf?? frame_regions: [MAX_FRAME_LAG]?FrameRegion = [_]?FrameRegion{null} ** MAX_FRAME_LAG, // Tracking allocated resources per frame, unfortunately have to wait for frame to finish before we can destroy them :( buffers: [MAX_FRAME_LAG][1024]vk.Buffer = undefined, buffer_counts: [MAX_FRAME_LAG]u16 = [_]u16{0} ** MAX_FRAME_LAG, pub fn init(memory: vk.DeviceMemory, size: u64) Self { return Self{ .memory = memory, .size = size, }; } pub fn startFrame(self: *VulkanPerFrameArena, device: GraphicsContext.Device, frame_index: u32) void { // TODO: tail pointer should be aligned to nonCoherentAtomSize to avoid accidentally flushing memory being used by previous frames // if we end up allocating right up until the previous frame's head // Record start position of this frame if (self.frame_regions[self.frame]) |*cur_region| { cur_region.end = self.tail; } self.frame = frame_index; self.frame_regions[self.frame] = FrameRegion.init(self.tail, self.tail); for (self.buffers[self.frame][0..self.buffer_counts[self.frame]]) |buf| { device.destroyBuffer(buf, null); } self.buffer_counts[self.frame] = 0; } // Caller guarantees that memory from given frame can be safely stomped, buffers destroyed etc. pub fn resetFrame(self: *VulkanPerFrameArena, frame_index: u32) void { self.frame_regions[frame_index] = null; } pub fn getModifiedMemoryRanges(self: *VulkanPerFrameArena, out_ranges: []vk.MappedMemoryRange) []const vk.MappedMemoryRange { std.debug.assert(out_ranges.len >= 2); std.debug.assert(self.frame_regions[self.frame] != null); const region = self.frame_regions[self.frame].?; // We wrapped, use two regions if (self.tail < region.start) { out_ranges[0] = vk.MappedMemoryRange{ .memory = self.memory, .offset = region.start, .size = self.size - region.start, }; out_ranges[1] = vk.MappedMemoryRange{ .memory = self.memory, .offset = 0, .size = self.tail, }; return out_ranges[0..]; } else { out_ranges[0] = vk.MappedMemoryRange{ .memory = self.memory, .offset = region.start, .size = self.tail - region.start, }; return out_ranges[0..1]; } } // Finds offset where memory can be put, handles wrapping, doesn't handle inter-frame stomping fn findSlotOptimistic(self: *const Self, size: u64, alignment: u64) !u64 { const offset = std.mem.alignForward(u64, self.tail, alignment); if (offset + size <= self.size) { return offset; } else if (size <= self.size) { return 0; } else { return error.OutOfMemory; } } fn findSlotChecked(self: *const Self, size: u64, alignment: u64) !u64 { const next_frame = (self.frame + 1) % MAX_FRAME_LAG; const offset = try self.findSlotOptimistic(size, alignment); if (self.frame_regions[next_frame]) |next_frame_region| { const allocated_region = FrameRegion.init(offset, offset + size); if (next_frame_region.intersectsWrapping(&allocated_region, self.size)) { return error.OverlapsPreviousFrame; } } return offset; } pub fn allocate(self: *Self, size: u64, alignment: u64) !u64 { const offset = try self.findSlotChecked(size, alignment); self.tail = offset + size; return offset; } pub fn createBufferRaw(self: *Self, device: GraphicsContext.Device, usage: vk.BufferUsageFlags, size: u64, out_addr: *u64) !vk.Buffer { // NOTE: Allocating buffers just in time, hopefully vulkan impl is smart about allocation here and not doing new each time... const buffer = try device.createBuffer(&vk.BufferCreateInfo{ .flags = .{}, .usage = usage, .size = size, .sharing_mode = .exclusive, }, null); errdefer device.destroyBuffer(buffer, null); const mem_reqs = device.getBufferMemoryRequirements(buffer); out_addr.* = try self.allocate(mem_reqs.size, mem_reqs.alignment); try device.bindBufferMemory(buffer, self.memory, out_addr.*); self.buffers[self.frame][self.buffer_counts[self.frame]] = buffer; self.buffer_counts[self.frame] += 1; return buffer; } pub fn reset(self: *Self) void { self.head = 0; } }; pub fn init(self: *Render2, gc: *GraphicsContext, shaderman: *ShaderManager, assetman: *AssetManager) !void { const per_frame_upload_memory = try gc.device.allocateMemory(&.{ .memory_type_index = gc.memory_config.cpu_to_gpu.type_index, .allocation_size = PER_FRAME_ARENA_SIZE, }, null); self.* = Render2{ .gc = gc, .shaderman = shaderman, .assetman = assetman, .command_pool = try gc.queues.graphics.createCommandPool(.{ .reset_command_buffer_bit = true }), .vulkan_frame_arena = VulkanPerFrameArena.init(per_frame_upload_memory, PER_FRAME_ARENA_SIZE), }; errdefer self.command_pool.deinit(); // NOTE: TEST for (0..MAX_FRAME_LAG) |i| { self.frame_data[i] = try FrameData.init(gc, self.command_pool); } } fn createPerFrameBuffer(self: *Render2, usage: vk.BufferUsageFlags, size: u64, out_addr: *u64) !vk.Buffer { while (true) { if (self.vulkan_frame_arena.createBufferRaw(self.gc.device, usage, size, out_addr)) |buffer| { return buffer; } else |err| switch (err) { error.OverlapsPreviousFrame => { const overlapped_frame = (self.frame + 1) % MAX_FRAME_LAG; std.debug.print("Vulkan Frame Allocator Overlapped frame {}, waiting for it to finish...", .{overlapped_frame}); try self.frame_data[overlapped_frame].waitForDrawAndReset(self.gc.device); self.vulkan_frame_arena.resetFrame(overlapped_frame); }, else => return err, } } } fn frameAllocMemReqs(self: *Render2, mem_reqs: vk.MemoryRequirements) !u64 { return self.frameAlloc(mem_reqs.size, mem_reqs.alignment); } pub fn draw(self: *Render2) !void { const device = self.gc.device; const frame = &self.frame_data[self.frame]; try frame.waitForDrawAndReset(self.gc.device); self.vulkan_frame_arena.resetFrame(self.frame); self.vulkan_frame_arena.startFrame(self.gc.device, self.frame); const frame_arena_mem: []u8 = @as([*c]u8, @ptrCast((try device.mapMemory(self.vulkan_frame_arena.memory, 0, self.vulkan_frame_arena.size, .{})).?))[0..self.vulkan_frame_arena.size]; var global_buffer_addr: u64 = 0; const global_uniform_buffer = try self.createPerFrameBuffer(.{ .uniform_buffer_bit = true }, @sizeOf(GlobalUniform), &global_buffer_addr); { const global_uniform: *align(1) GlobalUniform = std.mem.bytesAsValue(GlobalUniform, frame_arena_mem[global_buffer_addr .. global_buffer_addr + @sizeOf(GlobalUniform)]); { const view = self.camera.view_mat; // const fwidth: f32 = @floatFromInt(self.gc.swapchain_extent.width); // const fheight: f32 = @floatFromInt(self.gc.swapchain_extent.height); const projection = self.camera.projection(); const view_projection = projection.mul(view); global_uniform.* = .{ .view = .{ .world_to_view = view, .view_to_clip = projection, .world_to_clip = view_projection, }, }; } } // Move this out into a separate func const swapchain_image_index: u32 = try self.gc.acquireSwapchainImage(frame.acquire_swapchain_image); var current_image = GraphicsContext.Image{ .handle = self.gc.swapchain_images[swapchain_image_index], .mip_count = 1, .layer_count = 1, .format = .r8g8b8a8_unorm }; const current_image_view = try current_image.createView(self.gc.device, .{ .color_bit = true }); defer self.gc.device.destroyImageView(current_image_view, null); const cmds = frame.command_buffer; try cmds.beginCommandBuffer(&.{}); { // Transition global uniform buffer cmds.pipelineBarrier2(&vk.DependencyInfo{ .buffer_memory_barrier_count = 1, .p_buffer_memory_barriers = &.{ vk.BufferMemoryBarrier2{ .buffer = global_uniform_buffer, .src_stage_mask = .{ .host_bit = true }, .src_access_mask = .{ .host_write_bit = true }, .dst_stage_mask = .{ .vertex_shader_bit = true }, .dst_access_mask = .{ .shader_read_bit = true }, .offset = 0, .size = @sizeOf(GlobalUniform), .src_queue_family_index = vk.QUEUE_FAMILY_IGNORED, .dst_queue_family_index = vk.QUEUE_FAMILY_IGNORED, }, }, }); const global_descriptor_set = try frame.allocateDescriptorSet(device, self.shaderman.descriptor_set_layouts.global); device.updateDescriptorSets(1, &.{ vk.WriteDescriptorSet{ .dst_set = global_descriptor_set, .dst_binding = 0, .dst_array_element = 0, .descriptor_type = .uniform_buffer, .descriptor_count = 1, .p_buffer_info = &.{ vk.DescriptorBufferInfo{ .buffer = global_uniform_buffer, .offset = 0, .range = @sizeOf(GlobalUniform), }, }, .p_image_info = &[_]vk.DescriptorImageInfo{}, .p_texel_buffer_view = &[_]vk.BufferView{}, }, }, 0, null); try current_image.sync(cmds, .{ .stage_mask = .{ .color_attachment_output_bit = true }, .access_mask = .{ .color_attachment_write_bit = true } }, .attachment_optimal); { cmds.beginRendering(&.{ .render_area = vk.Rect2D{ .offset = .{ .x = 0, .y = 0 }, .extent = self.gc.swapchain_extent }, .layer_count = 1, .view_mask = 0, .color_attachment_count = 1, .p_color_attachments = &.{ vk.RenderingAttachmentInfo{ .clear_value = .{ .color = .{ .float_32 = .{ 0.8, 0.7, 0.6, 1.0 } } }, .load_op = .clear, .store_op = .store, .image_layout = .attachment_optimal, .image_view = current_image_view, .resolve_image_layout = .attachment_optimal, .resolve_mode = .{}, }, }, }); defer cmds.endRendering(); const triangle = self.assetman.resolveShaderProgram(a.ShaderPrograms.shaders.triangle); cmds.bindPipeline(.graphics, triangle.pipeline); cmds.bindDescriptorSets(.graphics, triangle.layout, 0, 1, &.{global_descriptor_set}, 0, null); cmds.setViewportWithCount(1, &.{vk.Viewport{ .x = 0, .y = 0, .width = @floatFromInt(self.gc.swapchain_extent.width), .height = @floatFromInt(self.gc.swapchain_extent.height), .min_depth = 0, .max_depth = 1, }}); cmds.setScissorWithCount(1, &.{vk.Rect2D{ .offset = .{ .x = 0, .y = 0 }, .extent = self.gc.swapchain_extent, }}); cmds.draw(3, 1, 0, 0); } try current_image.sync(cmds, .{}, .present_src_khr); } try cmds.endCommandBuffer(); var vulkan_frame_arena_modified_ranges_buf: [2]vk.MappedMemoryRange = undefined; const vulkan_frame_arena_modified_ranges = self.vulkan_frame_arena.getModifiedMemoryRanges(&vulkan_frame_arena_modified_ranges_buf); try device.flushMappedMemoryRanges(@intCast(vulkan_frame_arena_modified_ranges.len), vulkan_frame_arena_modified_ranges.ptr); // NOTE: Unmap DEVICE_LOCAL, HOST_VISIBLE memory before submit as it can be slow on Windows (according to Reddit...) device.unmapMemory(self.vulkan_frame_arena.memory); try self.gc.queues.graphics.submit( &GraphicsContext.SubmitInfo{ .wait_semaphores = &.{frame.acquire_swapchain_image}, .wait_dst_stage_mask = &.{vk.PipelineStageFlags{}}, .command_buffers = &.{cmds.handle}, .signal_semaphores = &.{frame.draw_sema}, }, frame.draw_fence, ); _ = try self.gc.device.queuePresentKHR(self.gc.queues.graphics.handle, &.{ .swapchain_count = 1, .wait_semaphore_count = 1, .p_wait_semaphores = &.{frame.draw_sema}, .p_swapchains = &.{self.gc.swapchain}, .p_image_indices = &.{swapchain_image_index}, }); self.frame = (self.frame + 1) % MAX_FRAME_LAG; } fn uploadData(self: *Render2, cmds: GraphicsContext.CommandBuffer, dst: GraphicsContext.Buffer, dst_offset: usize, len: usize) !void { cmds.copyBuffer2(&.{ .src_buffer = self.upload_buffer.handle, .dst_buffer = dst.handle, .p_regions = &.{ vk.BufferCopy2{ .src_offset = self.upload_buffer_cursor, .dst_offset = dst_offset, .size = len, }, }, }); self.upload_buffer_cursor += len; } // Per frame stuff const FrameData = struct { // Sync acquire_swapchain_image: vk.Semaphore, draw_sema: vk.Semaphore, draw_fence: vk.Fence, command_buffer: GraphicsContext.CommandBuffer, descriptor_pool: vk.DescriptorPool = .null_handle, pub fn init(gc: *GraphicsContext, command_pool: GraphicsContext.CommandPool) !FrameData { return FrameData{ .acquire_swapchain_image = try gc.device.createSemaphore(&.{}, null), .draw_sema = try gc.device.createSemaphore(&.{}, null), .draw_fence = try gc.device.createFence(&.{ .flags = .{ .signaled_bit = true } }, null), .command_buffer = try command_pool.allocateCommandBuffer(), .descriptor_pool = try gc.device.createDescriptorPool(&vk.DescriptorPoolCreateInfo{ .max_sets = 1024, .p_pool_sizes = &.{ vk.DescriptorPoolSize{ .type = .uniform_buffer, .descriptor_count = 8, }, }, .pool_size_count = 1, }, null), // TODO: maybe cache memory requirements? }; } pub fn allocateDescriptorSet(self: *FrameData, device: GraphicsContext.Device, layout: vk.DescriptorSetLayout) !vk.DescriptorSet { var result: [1]vk.DescriptorSet = .{.null_handle}; try device.allocateDescriptorSets(&vk.DescriptorSetAllocateInfo{ .descriptor_pool = self.descriptor_pool, .descriptor_set_count = 1, .p_set_layouts = &.{layout}, }, &result); return result[0]; } pub fn waitForDrawAndReset(self: *FrameData, device: GraphicsContext.Device) !void { _ = try device.waitForFences(1, &.{self.draw_fence}, vk.TRUE, std.math.maxInt(u64)); try device.resetFences(1, &.{self.draw_fence}); try self.command_buffer.resetCommandBuffer(.{ .release_resources_bit = true }); try device.resetDescriptorPool(self.descriptor_pool, .{}); } }; const GlobalUniform = extern struct { pub const View = extern struct { world_to_clip: Mat4, view_to_clip: Mat4, world_to_view: Mat4, }; view: View, };