From baf3e2fee827565d86e65cee39122d0e009b4d66 Mon Sep 17 00:00:00 2001 From: sergeypdev Date: Sun, 15 Dec 2024 18:45:32 +0400 Subject: [PATCH] Add spirv reflection to figure out push constant ranges, add support for bindless textures and a global persistent descriptor set --- assets/shaders/global.glsl | 5 +- assets/shaders/out.spv.d | 1 + assets/shaders/triangle.glsl | 9 + build.zig | 6 +- build.zig.zon | 4 + src/AssetManager.zig | 73 ++++- src/DescriptorManager.zig | 230 ++++++++++++++++ src/GraphicsContext.zig | 58 ++-- src/Render2.zig | 499 ++++++++++++++++++++++++++--------- src/ShaderManager.zig | 48 ---- src/formats.zig | 23 +- src/game.zig | 8 +- src/globals.zig | 6 +- tools/asset_compiler.zig | 84 +++++- 14 files changed, 817 insertions(+), 237 deletions(-) create mode 100644 assets/shaders/out.spv.d create mode 100644 src/DescriptorManager.zig delete mode 100644 src/ShaderManager.zig diff --git a/assets/shaders/global.glsl b/assets/shaders/global.glsl index 1776655..6e6e526 100644 --- a/assets/shaders/global.glsl +++ b/assets/shaders/global.glsl @@ -8,8 +8,11 @@ struct View mat4 world_to_view; }; -layout(binding = 0, std430, row_major) uniform GlobalUniform { +layout(set = 0, binding = 0, std430, row_major) uniform GlobalUniform { View view; } Global; +layout(set = 0, binding = 1) uniform sampler global_samplers[]; +layout(set = 0, binding = 2) uniform texture2D global_textures2d[]; + #endif // GLOBAL_GLSL diff --git a/assets/shaders/out.spv.d b/assets/shaders/out.spv.d new file mode 100644 index 0000000..784f2bb --- /dev/null +++ b/assets/shaders/out.spv.d @@ -0,0 +1 @@ +out.spv: post_process.glsl diff --git a/assets/shaders/triangle.glsl b/assets/shaders/triangle.glsl index 3072e29..4d87bd2 100644 --- a/assets/shaders/triangle.glsl +++ b/assets/shaders/triangle.glsl @@ -19,6 +19,15 @@ vec3 colors[3] = vec3[]( layout(location = 0) out vec3 VertexColor; +layout(push_constant) uniform constants { + vec3 my_vec; + float my_float; + mat4x4 my_mat; + uint tex_index1; + uint tex_index2; + uint tex_index3; +} PushConstants; + void main() { VertexColor = colors[gl_VertexIndex]; diff --git a/build.zig b/build.zig index 185f832..2f9376e 100644 --- a/build.zig +++ b/build.zig @@ -282,12 +282,12 @@ fn buildAssetCompiler(b: *Build, optimize: std.builtin.OptimizeMode, assets_mod: .skip_tests = true, }); const zalgebra_dep = b.dependency("zalgebra", .{}); - const spirv_cross_dep = b.dependency("spirv-cross", .{ + const spirv_reflect_dep = b.dependency("SPIRV-Reflect", .{ .target = b.host, .optimize = optimize, }); const assimp_lib = assimp_dep.artifact("assimp"); - const spirv_cross_lib = spirv_cross_dep.artifact("spirv-cross"); + const spirv_reflect_lib = spirv_reflect_dep.artifact("spirv-reflect"); const assetc = b.addExecutable(.{ .name = "assetc", @@ -317,7 +317,7 @@ fn buildAssetCompiler(b: *Build, optimize: std.builtin.OptimizeMode, assets_mod: assetc.root_module.addImport("assets", assets_mod); assetc.linkLibrary(assimp_lib); - assetc.linkLibrary(spirv_cross_lib); + assetc.linkLibrary(spirv_reflect_lib); assetc.linkLibC(); assetc.linkLibCpp(); diff --git a/build.zig.zon b/build.zig.zon index 1e73f1b..5c4d63d 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -51,6 +51,10 @@ .url = "https://github.com/hexops/spirv-cross/tarball/872bd405fece4bf6388abdea916356e26cb8fed9", .hash = "12207bebf82eef06f4f80a7e54c91e4402c0055d04167fdbcf1f350846a350266976", }, + .@"SPIRV-Reflect" = .{ + .url = "https://github.com/sergeypdev/SPIRV-Reflect/tarball/bb3e8b8d5dee32b65e1d16598c526415470fc863", + .hash = "122048fdee255a7ac992068d5ded4bfa88927ddc8af12a5068c69198153a60dbe779", + }, }, .paths = .{ // This makes *all* files, recursively, included in this package. It is generally diff --git a/src/AssetManager.zig b/src/AssetManager.zig index 27f1f8e..b789bdf 100644 --- a/src/AssetManager.zig +++ b/src/AssetManager.zig @@ -29,7 +29,7 @@ const sdl = @import("sdl.zig"); const tracy = @import("tracy"); const vk = @import("vk"); const GraphicsContext = @import("GraphicsContext.zig"); -const ShaderManager = @import("ShaderManager.zig"); +const DescriptorManager = @import("DescriptorManager.zig"); pub const AssetId = assets.AssetId; pub const Handle = assets.Handle; @@ -60,7 +60,7 @@ asset_watcher: AssetWatcher = undefined, vertex_heap: VertexBufferHeap, gc: *GraphicsContext, -shaderman: *ShaderManager, +descriptorman: *DescriptorManager, const AssetWatcher = struct { assetman: *AssetManager, @@ -150,7 +150,7 @@ const AssetWatcher = struct { } }; -pub fn init(allocator: std.mem.Allocator, frame_arena: std.mem.Allocator, gc: *GraphicsContext, shaderman: *ShaderManager) AssetManager { +pub fn init(allocator: std.mem.Allocator, frame_arena: std.mem.Allocator, gc: *GraphicsContext, descriptorman: *DescriptorManager) AssetManager { var buf: [std.fs.MAX_PATH_BYTES]u8 = undefined; const exe_dir_path = std.fs.selfExeDirPath(&buf) catch @panic("can't find self exe dir path"); const exe_dir = std.fs.openDirAbsolute(exe_dir_path, .{}) catch @panic("can't open self exe dir path"); @@ -161,7 +161,7 @@ pub fn init(allocator: std.mem.Allocator, frame_arena: std.mem.Allocator, gc: *G .exe_dir = exe_dir, .vertex_heap = VertexBufferHeap.init(allocator) catch @panic("OOM"), .gc = gc, - .shaderman = shaderman, + .descriptorman = descriptorman, }; } @@ -320,16 +320,62 @@ pub fn loadShaderProgram(self: *AssetManager, handle: Handle.ShaderProgram) Load }; } +fn getPushConstantRanges(program: formats.ShaderProgram, buffer: []vk.PushConstantRange) []vk.PushConstantRange { + var len: usize = 0; + switch (program) { + .graphics => { + std.debug.assert(buffer.len >= 2); + if (program.graphics.vertex.push_constant_range.size > 0) { + buffer[len] = vk.PushConstantRange{ + .offset = program.graphics.vertex.push_constant_range.offset, + .size = program.graphics.vertex.push_constant_range.size, + .stage_flags = .{ .vertex_bit = true }, + }; + + len += 1; + } + if (program.graphics.fragment.push_constant_range.size > 0) { + buffer[len] = vk.PushConstantRange{ + .offset = program.graphics.fragment.push_constant_range.offset, + .size = program.graphics.fragment.push_constant_range.size, + .stage_flags = .{ .fragment_bit = true }, + }; + + len += 1; + } + }, + .compute => { + std.debug.assert(buffer.len >= 1); + if (program.compute.compute.push_constant_range.size > 0) { + buffer[len] = vk.PushConstantRange{ + .offset = program.compute.compute.push_constant_range.offset, + .size = program.graphics.vertex.push_constant_range.size, + .stage_flags = .{ .compute_bit = true }, + }; + + len += 1; + } + }, + } + + return buffer[0..len]; +} + fn loadShaderProgramErr(self: *AssetManager, id: AssetId) !LoadedShaderProgram { const data = try self.loadFile(self.frame_arena, asset_manifest.getPath(id), SHADER_MAX_BYTES); var serializer = formats.Serializer{ .write = false, .endian = formats.native_endian, .stream = .{ .buffer = std.io.fixedBufferStream(data.bytes) } }; var program: formats.ShaderProgram = undefined; try program.serialize(&serializer); + var push_constant_ranges_buf: [2]vk.PushConstantRange = undefined; + const push_constant_ranges = getPushConstantRanges(program, &push_constant_ranges_buf); + // TODO: parse from shaders or something const pipeline_layout = try self.gc.device.createPipelineLayout(&.{ - .p_set_layouts = &.{self.shaderman.descriptor_set_layouts.global}, + .p_set_layouts = &.{self.descriptorman.descriptor_set_layouts.global}, .set_layout_count = 1, + .push_constant_range_count = @intCast(push_constant_ranges.len), + .p_push_constant_ranges = push_constant_ranges.ptr, }, null); const pipeline = blk: { @@ -349,6 +395,9 @@ fn loadShaderProgramErr(self: *AssetManager, id: AssetId) !LoadedShaderProgram { const dynamic_states = [_]vk.DynamicState{ .viewport_with_count, .scissor_with_count, + .depth_compare_op, + .depth_test_enable, + .depth_write_enable, }; var pipelines = [1]vk.Pipeline{.null_handle}; @@ -356,7 +405,7 @@ fn loadShaderProgramErr(self: *AssetManager, id: AssetId) !LoadedShaderProgram { vk.GraphicsPipelineCreateInfo{ .p_next = &vk.PipelineRenderingCreateInfo{ .color_attachment_count = 1, - .p_color_attachment_formats = &[_]vk.Format{.r8g8b8a8_unorm}, + .p_color_attachment_formats = &[_]vk.Format{.r16g16b16a16_sfloat}, .depth_attachment_format = .d24_unorm_s8_uint, .stencil_attachment_format = .d24_unorm_s8_uint, .view_mask = 0, @@ -382,15 +431,17 @@ fn loadShaderProgramErr(self: *AssetManager, id: AssetId) !LoadedShaderProgram { .p_viewport_state = &vk.PipelineViewportStateCreateInfo{}, .layout = pipeline_layout, .p_depth_stencil_state = &vk.PipelineDepthStencilStateCreateInfo{ - .depth_test_enable = vk.TRUE, - .depth_write_enable = vk.TRUE, - .depth_compare_op = .greater, - .depth_bounds_test_enable = vk.TRUE, + // Dynamic States + .depth_test_enable = vk.FALSE, + .depth_write_enable = vk.FALSE, + .depth_compare_op = .never, + // ~Dynamic States + .depth_bounds_test_enable = vk.FALSE, .stencil_test_enable = vk.FALSE, .front = std.mem.zeroes(vk.StencilOpState), .back = std.mem.zeroes(vk.StencilOpState), .min_depth_bounds = 0.0, - .max_depth_bounds = 1.0, + .max_depth_bounds = 0.0, }, .p_dynamic_state = &vk.PipelineDynamicStateCreateInfo{ .dynamic_state_count = @intCast(dynamic_states.len), diff --git a/src/DescriptorManager.zig b/src/DescriptorManager.zig new file mode 100644 index 0000000..ecf5edf --- /dev/null +++ b/src/DescriptorManager.zig @@ -0,0 +1,230 @@ +const std = @import("std"); +const vk = @import("vk"); +const GraphicsContext = @import("GraphicsContext.zig"); + +pub const DescriptorManager = @This(); + +pub const MAX_SAMPLERS = 128; +pub const MAX_TEXTURES = 1024; + +pub const DescriptorSets = struct { + const Global = struct { + pub const Bindings = enum(u32) { + GlobalUniform = 0, + GlobalSamplers = 1, + GlobalTextures2D = 2, + + pub fn value(self: Bindings) u32 { + return @intFromEnum(self); + } + }; + }; +}; + +pub const DescriptorSetLayouts = struct { + global: vk.DescriptorSetLayout = .null_handle, +}; + +pub const DescriptorHandle = struct { + index: u32 = 0, + generation: u32 = 0, +}; + +pub fn DescriptorT(comptime T: type) type { + return struct { + const Self = @This(); + + handle: DescriptorHandle = .{}, + value: T = std.mem.zeroes(T), + next: ?*Self = null, + + pub fn index(self: *const Self) u32 { + return self.handle.index; + } + }; +} + +pub const SampledImageDescriptorData = struct { + view: vk.ImageView, + layout: vk.ImageLayout, +}; + +pub const SampledImageDescriptor = DescriptorT(SampledImageDescriptorData); + +pub fn DescriptorArray(comptime T: type, comptime MAX_RESOURCES: usize) type { + return struct { + const Self = @This(); + + pub const Resource = DescriptorT(T); + resources: [MAX_RESOURCES]Resource = [_]Resource{.{}} ** MAX_RESOURCES, + first_free: ?*Resource = null, + last_index: u32 = 1, // index of the last allocated image + + resources_to_update: std.PackedIntArray(u1, MAX_RESOURCES) = undefined, + resources_to_update_count: u32 = 0, + + pub fn init(self: *Self, default_value: T) void { + self.* = .{}; + + self.resources_to_update.setAll(0); + + self.resources[0].value = default_value; + } + + pub fn alloc(self: *Self, value: T) *Resource { + if (self.first_free) |image| { + self.first_free = image.next; + + image.value = value; + image.handle.generation += 1; + image.next = null; + self.addUpdate(image.handle.index); + + return image; + } else { + const image = &self.resources[self.last_index]; + image.* = Resource{ + .handle = .{ .index = self.last_index, .generation = 1 }, + .value = value, + }; + self.addUpdate(image.handle.index); + self.last_index += 1; + return image; + } + } + + pub fn get(self: *Self, handle: DescriptorHandle) *Resource { + if (handle.index == 0) { + // Invalid handle + return &self.resources[0]; + } + + const image = &self.resources[handle.index]; + if (image.handle.generation != handle.generation) { + return &self.resources[0]; + } + + return image; + } + + pub fn free(self: *Self, handle: DescriptorHandle) void { + const image = self.get(handle); + if (image.handle.index != 0) { + image.handle.generation += 1; + image.next = self.first_free; + self.first_free = image; + + self.addUpdate(image.handle.index); + } + } + + pub const UpdatedResourceIterator = struct { + array: *Self, + offset: u32 = 0, + + pub fn next(self: *UpdatedResourceIterator) ?*Resource { + for (self.offset..MAX_RESOURCES) |i| { + if (self.array.resources_to_update.get(i) == 1) { + self.offset = @intCast(i + 1); + return &self.array.resources[i]; + } + } + + return null; + } + }; + + pub fn iterator(self: *Self) UpdatedResourceIterator { + return UpdatedResourceIterator{ .array = self }; + } + + pub fn resetUpdates(self: *Self) void { + self.resources_to_update.setAll(0); + self.resources_to_update_count = 0; + } + + fn addUpdate(self: *Self, index: u32) void { + if (self.resources_to_update.get(index) == 0) { + self.resources_to_update.set(index, 1); + self.resources_to_update_count += 1; + } + } + }; +} + +gc: *GraphicsContext, +descriptor_set_layouts: DescriptorSetLayouts = .{}, +image_descriptor_array_2d: DescriptorArray(SampledImageDescriptorData, 1024) = .{}, + +pub fn init(gc: *GraphicsContext) !DescriptorManager { + var self = DescriptorManager{ + .gc = gc, + }; + + self.image_descriptor_array_2d.init(.{ .view = .null_handle, .layout = .undefined }); + + // Global Descriptor Set Layout + { + const descriptor_set_layout_bindings = [_]vk.DescriptorSetLayoutBinding{ + vk.DescriptorSetLayoutBinding{ + .binding = DescriptorSets.Global.Bindings.GlobalUniform.value(), + .descriptor_type = .uniform_buffer, + .descriptor_count = 1, + .stage_flags = vk.ShaderStageFlags.fromInt(0x7FFFFFFF), // SHADER_STAGE_ALL + }, + vk.DescriptorSetLayoutBinding{ + .binding = DescriptorSets.Global.Bindings.GlobalSamplers.value(), + .descriptor_type = .sampler, + .descriptor_count = MAX_SAMPLERS, + .stage_flags = vk.ShaderStageFlags.fromInt(0x7FFFFFFF), // SHADER_STAGE_ALL + }, + vk.DescriptorSetLayoutBinding{ + .binding = DescriptorSets.Global.Bindings.GlobalTextures2D.value(), + .descriptor_type = .sampled_image, + .descriptor_count = MAX_TEXTURES, + .stage_flags = vk.ShaderStageFlags.fromInt(0x7FFFFFFF), // SHADER_STAGE_ALL + }, + }; + const flags = [_]vk.DescriptorBindingFlags{ + .{ .update_after_bind_bit = true, .update_unused_while_pending_bit = true }, + .{ .partially_bound_bit = true, .update_after_bind_bit = true, .update_unused_while_pending_bit = true }, + .{ .partially_bound_bit = true, .update_after_bind_bit = true, .update_unused_while_pending_bit = true }, + }; + self.descriptor_set_layouts.global = try self.gc.device.createDescriptorSetLayout(&.{ + .p_next = &vk.DescriptorSetLayoutBindingFlagsCreateInfo{ + .binding_count = flags.len, + .p_binding_flags = &flags, + }, + .flags = .{ .update_after_bind_pool_bit = true }, + .p_bindings = &descriptor_set_layout_bindings, + .binding_count = descriptor_set_layout_bindings.len, + }, null); + } + + // Post Process Pass Descriptor Set Layout + // { + // const descriptor_set_layout_bindings = [_]vk.DescriptorSetLayoutBinding{ + // vk.DescriptorSetLayoutBinding{ + // .binding = DescriptorSets.Passes.PostProcess.Bindings.ScreenSampler.value(), + // .descriptor_type = .sampler, + // .descriptor_count = 1, + // .stage_flags = vk.ShaderStageFlags.fromInt(0x7FFFFFFF), // SHADER_STAGE_ALL + // }, + // vk.DescriptorSetLayoutBinding{ + // .binding = DescriptorSets.Passes.PostProcess.Bindings.ScreenTexture.value(), + // .descriptor_type = .sampled_image, + // .descriptor_count = 1, + // .stage_flags = vk.ShaderStageFlags.fromInt(0x7FFFFFFF), // SHADER_STAGE_ALL + // }, + // }; + // self.descriptor_set_layouts.passes.post_process = try self.gc.device.createDescriptorSetLayout(&.{ + // .flags = .{ + // .push_descriptor_bit_khr = true, + // }, + // .p_bindings = &descriptor_set_layout_bindings, + // .binding_count = descriptor_set_layout_bindings.len, + // }, null); + // } + + return self; +} diff --git a/src/GraphicsContext.zig b/src/GraphicsContext.zig index 61b87b5..7843b31 100644 --- a/src/GraphicsContext.zig +++ b/src/GraphicsContext.zig @@ -207,6 +207,7 @@ pub const ImageSyncState = struct { .req = req, .layout = layout, }; + self.layout = layout; } return result; @@ -215,6 +216,7 @@ pub const ImageSyncState = struct { pub const Image = struct { handle: vk.Image = .null_handle, + view: vk.ImageView = .null_handle, mip_count: u32 = 0, layer_count: u32 = 0, format: vk.Format = .undefined, @@ -270,8 +272,7 @@ pub const Image = struct { pub const Buffer = struct { gc: *GraphicsContext, handle: vk.Buffer, - allocation: vma.Allocation, - allocation_info: vma.c.VmaAllocationInfo, + size: u64, sync_state: SyncState = .{}, @@ -287,7 +288,7 @@ pub const Buffer = struct { .dst_stage_mask = req.dst_stage_mask, .dst_access_mask = req.dst_access_mask, .offset = 0, - .size = self.allocation_info.size, + .size = self.size, .src_queue_family_index = vk.QUEUE_FAMILY_IGNORED, .dst_queue_family_index = vk.QUEUE_FAMILY_IGNORED, }, @@ -295,20 +296,6 @@ pub const Buffer = struct { }); } } - - pub fn flush(self: *Buffer, offset: vk.DeviceSize, size: vk.DeviceSize) !void { - try vma.flushAllocation(self.gc.vma_allocator, self.allocation, offset, size); - } - - pub fn getAllocationMemoryProperties(self: *Buffer) vk.MemoryPropertyFlags { - var mem_prop_flags = vk.MemoryPropertyFlags{}; - vma.getAllocationMemoryProperties(self.gc.vma_allocator, self.allocation, &mem_prop_flags); - return mem_prop_flags; - } - - pub fn deinit(self: *const Buffer, gc: *GraphicsContext) void { - vma.destroyBuffer(gc.vma_allocator, self.handle, self.allocation); - } }; pub fn init(self: *GraphicsContext, allocator: std.mem.Allocator, window: *c.SDL_Window) !void { @@ -362,6 +349,15 @@ pub fn init(self: *GraphicsContext, allocator: std.mem.Allocator, window: *c.SDL const physical_devices = try self.instance.enumeratePhysicalDevicesAlloc(fba.allocator()); self.device_info = try selectPhysicalDevice(self.instance, self.surface, physical_devices); std.debug.print("Selected Physical Device: {s}\n", .{@as([]u8, @alignCast(&self.device_info.properties.device_name))}); + + // Required for bindless texturing + std.debug.assert(self.device_info.features.descriptor_indexing_features.runtime_descriptor_array == vk.TRUE); + std.debug.assert(self.device_info.features.descriptor_indexing_features.descriptor_binding_partially_bound == vk.TRUE); + std.debug.assert(self.device_info.features.descriptor_indexing_features.descriptor_binding_sampled_image_update_after_bind == vk.TRUE); + std.debug.assert(self.device_info.features.descriptor_indexing_features.descriptor_binding_update_unused_while_pending == vk.TRUE); + std.debug.assert(self.device_info.features.descriptor_indexing_features.descriptor_binding_variable_descriptor_count == vk.TRUE); + std.debug.assert(self.device_info.features.descriptor_indexing_features.descriptor_binding_uniform_buffer_update_after_bind == vk.TRUE); + const queue_config = try selectQueues(self.instance, self.device_info.physical_device); self.memory_config = try selectMemoryPools(self.instance, self.device_info.physical_device); @@ -375,13 +371,19 @@ pub fn init(self: *GraphicsContext, allocator: std.mem.Allocator, window: *c.SDL const device_create_config = vk.DeviceCreateInfo{ .p_queue_create_infos = &queue_config.queue_create_info, .queue_create_info_count = queue_config.queue_count, - .p_enabled_features = &self.device_info.features, + .p_enabled_features = &self.device_info.features.features, .pp_enabled_layer_names = @ptrCast((&vk_layers).ptr), .enabled_layer_count = @intCast(vk_layers.len), .pp_enabled_extension_names = @ptrCast((&device_extensions).ptr), .enabled_extension_count = @intCast(device_extensions.len), .p_next = &vk.PhysicalDeviceVulkan12Features{ .buffer_device_address = vk.TRUE, + .runtime_descriptor_array = vk.TRUE, + .descriptor_binding_partially_bound = vk.TRUE, + .descriptor_binding_sampled_image_update_after_bind = vk.TRUE, + .descriptor_binding_update_unused_while_pending = vk.TRUE, + .descriptor_binding_variable_descriptor_count = vk.TRUE, + .descriptor_binding_uniform_buffer_update_after_bind = vk.TRUE, .p_next = &vulkan13_device_features, }, }; @@ -609,18 +611,34 @@ pub const QueueInstance = struct { } }; +pub const PhysicalDeviceFeatures = struct { + features: vk.PhysicalDeviceFeatures, + descriptor_indexing_features: vk.PhysicalDeviceDescriptorIndexingFeatures, +}; + const SelectedPhysicalDevice = struct { physical_device: vk.PhysicalDevice, properties: vk.PhysicalDeviceProperties, - features: vk.PhysicalDeviceFeatures, + features: PhysicalDeviceFeatures, surface_capabilities: vk.SurfaceCapabilitiesKHR, }; +fn getDeviceFeatures(vki: Instance, physical_device: vk.PhysicalDevice) PhysicalDeviceFeatures { + var descriptor_indexing_features: vk.PhysicalDeviceDescriptorIndexingFeatures = .{}; + var device_features: vk.PhysicalDeviceFeatures2 = .{ .p_next = &descriptor_indexing_features, .features = std.mem.zeroes(vk.PhysicalDeviceFeatures) }; + vki.getPhysicalDeviceFeatures2(physical_device, &device_features); + + return PhysicalDeviceFeatures{ + .features = device_features.features, + .descriptor_indexing_features = descriptor_indexing_features, + }; +} + fn selectPhysicalDevice(vki: Instance, surface: vk.SurfaceKHR, devices: []vk.PhysicalDevice) !SelectedPhysicalDevice { // TODO: select suitable physical device, allow overriding using some user config for (devices) |device| { const props = vki.getPhysicalDeviceProperties(device); - const features = vki.getPhysicalDeviceFeatures(device); + const features = getDeviceFeatures(vki, device); const surface_caps = try vki.getPhysicalDeviceSurfaceCapabilitiesKHR(device, surface); return SelectedPhysicalDevice{ .physical_device = device, diff --git a/src/Render2.zig b/src/Render2.zig index 4adfdcd..45bccbb 100644 --- a/src/Render2.zig +++ b/src/Render2.zig @@ -1,7 +1,7 @@ const std = @import("std"); const GraphicsContext = @import("GraphicsContext.zig"); const AssetManager = @import("AssetManager.zig"); -const ShaderManager = @import("ShaderManager.zig"); +const DescriptorManager = @import("DescriptorManager.zig"); const vk = @import("vk"); const a = @import("asset_manifest"); const za = @import("zalgebra"); @@ -32,16 +32,23 @@ var default_camera: Camera = .{}; const MAX_FRAME_LAG = 3; const PER_FRAME_ARENA_SIZE = 64 * common.MB; +frame_allocator: std.mem.Allocator, gc: *GraphicsContext, -shaderman: *ShaderManager, +descriptorman: *DescriptorManager, assetman: *AssetManager, command_pool: GraphicsContext.CommandPool, vulkan_frame_arena: VulkanPerFrameArena, camera: *Camera = &default_camera, +global_descriptor_pool: vk.DescriptorPool = .null_handle, +global_descriptor_set: vk.DescriptorSet = .null_handle, + frame: u32 = 0, frame_data: [MAX_FRAME_LAG]FrameData = undefined, +// Global sampler to use for reading screen color in post processing +screen_color_sampler: vk.Sampler = .null_handle, + // Ring buffer/arena for per frame data pub const VulkanPerFrameArena = struct { const Self = @This(); @@ -92,6 +99,7 @@ pub const VulkanPerFrameArena = struct { } }; + memory_type_index: u32, memory: vk.DeviceMemory, size: u64, tail: u64 = 0, @@ -102,18 +110,7 @@ pub const VulkanPerFrameArena = struct { // NOTE: bug in zig? Tried to use [MAX_FRAME_LAG]?u64 here, but optional checks pass even when value is null, wtf?? frame_regions: [MAX_FRAME_LAG]?FrameRegion = [_]?FrameRegion{null} ** MAX_FRAME_LAG, - // Tracking allocated resources per frame, unfortunately have to wait for frame to finish before we can destroy them :( - buffers: [MAX_FRAME_LAG][1024]vk.Buffer = undefined, - buffer_counts: [MAX_FRAME_LAG]u16 = [_]u16{0} ** MAX_FRAME_LAG, - - pub fn init(memory: vk.DeviceMemory, size: u64) Self { - return Self{ - .memory = memory, - .size = size, - }; - } - - pub fn startFrame(self: *VulkanPerFrameArena, device: GraphicsContext.Device, frame_index: u32) void { + pub fn startFrame(self: *VulkanPerFrameArena, frame_index: u32) void { // TODO: tail pointer should be aligned to nonCoherentAtomSize to avoid accidentally flushing memory being used by previous frames // if we end up allocating right up until the previous frame's head // Record start position of this frame @@ -122,11 +119,6 @@ pub const VulkanPerFrameArena = struct { } self.frame = frame_index; self.frame_regions[self.frame] = FrameRegion.init(self.tail, self.tail); - - for (self.buffers[self.frame][0..self.buffer_counts[self.frame]]) |buf| { - device.destroyBuffer(buf, null); - } - self.buffer_counts[self.frame] = 0; } // Caller guarantees that memory from given frame can be safely stomped, buffers destroyed etc. @@ -201,7 +193,7 @@ pub const VulkanPerFrameArena = struct { return offset; } - pub fn createBufferRaw(self: *Self, device: GraphicsContext.Device, usage: vk.BufferUsageFlags, size: u64, out_addr: *u64) !vk.Buffer { + pub fn createBufferRaw(self: *Self, frame: *FrameData, device: GraphicsContext.Device, usage: vk.BufferUsageFlags, size: u64, out_addr: *u64) !vk.Buffer { // NOTE: Allocating buffers just in time, hopefully vulkan impl is smart about allocation here and not doing new each time... const buffer = try device.createBuffer(&vk.BufferCreateInfo{ .flags = .{}, @@ -216,18 +208,35 @@ pub const VulkanPerFrameArena = struct { try device.bindBufferMemory(buffer, self.memory, out_addr.*); - self.buffers[self.frame][self.buffer_counts[self.frame]] = buffer; - self.buffer_counts[self.frame] += 1; + frame.addBuffer(buffer); return buffer; } + pub fn createImageRaw(self: *Self, frame: *FrameData, gc: *GraphicsContext, create_info: *const vk.ImageCreateInfo, out_addr: *u64) !vk.Image { + const image = try gc.device.createImage(create_info, null); + errdefer gc.device.destroyImage(image, null); + + const mem_reqs = gc.getImageMemoryRequirements(image); + + std.debug.assert(!mem_reqs.requires_dedicated); + std.debug.assert(mem_reqs.memory_type_bits & (@as(u32, 1) << @intCast(self.memory_type_index)) != 0); + + out_addr.* = try self.allocate(mem_reqs.size, mem_reqs.alignment); + + try gc.device.bindImageMemory(image, self.memory, out_addr.*); + + frame.addImage(image); + + return image; + } + pub fn reset(self: *Self) void { self.head = 0; } }; -pub fn init(self: *Render2, gc: *GraphicsContext, shaderman: *ShaderManager, assetman: *AssetManager) !void { +pub fn init(self: *Render2, frame_allocator: std.mem.Allocator, gc: *GraphicsContext, descriptorman: *DescriptorManager, assetman: *AssetManager) !void { // Allocated in device local mem const per_frame_upload_memory = try gc.device.allocateMemory(&.{ .memory_type_index = gc.memory_config.gpu.type_index, @@ -235,79 +244,169 @@ pub fn init(self: *Render2, gc: *GraphicsContext, shaderman: *ShaderManager, ass }, null); self.* = Render2{ + .frame_allocator = frame_allocator, .gc = gc, - .shaderman = shaderman, + .descriptorman = descriptorman, .assetman = assetman, .command_pool = try gc.queues.graphics.createCommandPool(.{ .reset_command_buffer_bit = true }), - .vulkan_frame_arena = VulkanPerFrameArena.init(per_frame_upload_memory, PER_FRAME_ARENA_SIZE), + .vulkan_frame_arena = VulkanPerFrameArena{ + .memory_type_index = gc.memory_config.gpu.type_index, + .memory = per_frame_upload_memory, + .size = PER_FRAME_ARENA_SIZE, + }, }; errdefer self.command_pool.deinit(); + self.screen_color_sampler = try gc.device.createSampler(&.{ + .flags = .{}, + .mag_filter = .linear, + .min_filter = .linear, + .mipmap_mode = .nearest, + .address_mode_u = .clamp_to_edge, + .address_mode_v = .clamp_to_edge, + .address_mode_w = .clamp_to_edge, + .mip_lod_bias = 0, + .anisotropy_enable = vk.FALSE, + .max_anisotropy = 0, + .compare_enable = vk.FALSE, + .compare_op = .always, + .min_lod = 0, + .max_lod = vk.LOD_CLAMP_NONE, + .border_color = .int_transparent_black, + .unnormalized_coordinates = vk.FALSE, + }, null); + + self.global_descriptor_pool = try gc.device.createDescriptorPool( + &vk.DescriptorPoolCreateInfo{ + .flags = .{ + .update_after_bind_bit = true, + }, + .max_sets = 1, + .p_pool_sizes = &.{ + vk.DescriptorPoolSize{ + .type = .uniform_buffer, + .descriptor_count = 8, + }, + vk.DescriptorPoolSize{ + .type = .sampled_image, + .descriptor_count = 1024, // TODO: don't hardcode + }, + vk.DescriptorPoolSize{ + .type = .sampler, + .descriptor_count = 256, // TODO: dont hardcode + }, + }, + .pool_size_count = 3, + }, + null, + ); + + var descriptor_set_buf = [_]vk.DescriptorSet{.null_handle}; + try gc.device.allocateDescriptorSets(&.{ + .descriptor_pool = self.global_descriptor_pool, + .descriptor_set_count = 1, + .p_set_layouts = &.{descriptorman.descriptor_set_layouts.global}, + }, &descriptor_set_buf); + self.global_descriptor_set = descriptor_set_buf[0]; + for (0..MAX_FRAME_LAG) |i| { self.frame_data[i] = try FrameData.init(gc, self.command_pool); - - try self.maybeResizeFrameBuffer(&self.frame_data[i]); } } -fn maybeResizeFrameBuffer(self: *Render2, frame: *FrameData) !void { +pub const MainRenderTarget = struct { + color: GraphicsContext.Image, + depth: GraphicsContext.Image, + + color_descriptor: *DescriptorManager.SampledImageDescriptor, +}; + +fn allocateRenderTarget(self: *Render2) !MainRenderTarget { const gc = self.gc; - if ((gc.swapchain_extent.width == frame.depth_buffer_extent.width or gc.swapchain_extent.height == frame.depth_buffer_extent.height) and frame.depth_buffer.handle != .null_handle) { - return; - } - if (frame.depth_buffer.handle != .null_handle) { - gc.device.destroyImage(frame.depth_buffer.handle, null); - gc.device.freeMemory(frame.depth_buffer_memory, null); - frame.depth_buffer = .{}; - frame.depth_buffer_memory = .null_handle; - } const swapchain_width = gc.swapchain_extent.width; const swapchain_height = gc.swapchain_extent.height; - const depth_image = try gc.device.createImage(&.{ + var color_img_address: u64 = 0; + var depth_img_address: u64 = 0; + + const color_image = try self.createPerFrameImage(&.{ .image_type = .@"2d", .array_layers = 1, .extent = .{ .width = swapchain_width, .height = swapchain_height, .depth = 1 }, .mip_levels = 1, .sharing_mode = .exclusive, .tiling = .optimal, - .usage = .{ .depth_stencil_attachment_bit = true }, + .usage = .{ .color_attachment_bit = true, .transfer_src_bit = true, .transfer_dst_bit = true, .sampled_bit = true }, + .format = .r16g16b16a16_sfloat, + .samples = .{ .@"1_bit" = true }, + .initial_layout = .undefined, + }, &color_img_address); + + const color_image_view = try self.createPerFrameImageView(&vk.ImageViewCreateInfo{ + .image = color_image, + .view_type = .@"2d", + .format = .r16g16b16a16_sfloat, + .components = .{ .r = .r, .g = .g, .b = .b, .a = .a }, + .subresource_range = .{ + .aspect_mask = .{ .color_bit = true }, + .base_array_layer = 0, + .base_mip_level = 0, + .layer_count = 1, + .level_count = 1, + }, + }); + + const depth_image = try self.createPerFrameImage(&.{ + .image_type = .@"2d", + .array_layers = 1, + .extent = .{ .width = swapchain_width, .height = swapchain_height, .depth = 1 }, + .mip_levels = 1, + .sharing_mode = .exclusive, + .tiling = .optimal, + .usage = .{ .depth_stencil_attachment_bit = true, .transfer_src_bit = true, .transfer_dst_bit = true, .sampled_bit = true }, .format = .d24_unorm_s8_uint, .samples = .{ .@"1_bit" = true }, .initial_layout = .undefined, - }, null); - - const mem_reqs = gc.getImageMemoryRequirements(depth_image); - std.debug.assert(mem_reqs.memory_type_bits & (@as(u32, 1) << @intCast(gc.memory_config.gpu.type_index)) != 0); - // Nvidia apparently doesn't prefer dedicated allocation... Figure out what to do here - // std.debug.assert(mem_reqs.prefers_dedicated); - - frame.depth_buffer_memory = try gc.device.allocateMemory(&.{ - .p_next = @ptrCast(&vk.MemoryDedicatedAllocateInfo{ - .image = depth_image, - }), - .allocation_size = mem_reqs.size, - .memory_type_index = gc.memory_config.gpu.type_index, - }, null); - - try gc.device.bindImageMemory(depth_image, frame.depth_buffer_memory, 0); - - frame.depth_buffer = GraphicsContext.Image{ - .handle = depth_image, + }, &depth_img_address); + const depth_image_view = try self.createPerFrameImageView(&vk.ImageViewCreateInfo{ + .image = depth_image, + .view_type = .@"2d", .format = .d24_unorm_s8_uint, - .layer_count = 1, - .mip_count = 1, - .sync_state = .{ - .layout = .undefined, + .components = .{ .r = .r, .g = .g, .b = .b, .a = .a }, + .subresource_range = .{ + .aspect_mask = .{ .depth_bit = true, .stencil_bit = true }, + .base_array_layer = 0, + .base_mip_level = 0, + .layer_count = 1, + .level_count = 1, }, + }); + + return MainRenderTarget{ + .color = GraphicsContext.Image{ + .handle = color_image, + .view = color_image_view, + .format = .r16g16b16a16_sfloat, + .layer_count = 1, + .mip_count = 1, + .sync_state = .{}, + }, + .depth = GraphicsContext.Image{ + .handle = depth_image, + .view = depth_image_view, + .format = .d24_unorm_s8_uint, + .layer_count = 1, + .mip_count = 1, + .sync_state = .{}, + }, + .color_descriptor = self.createPerFrameImageDescriptor(color_image_view, .read_only_optimal), }; - frame.depth_buffer_extent = .{ .width = swapchain_width, .height = swapchain_height }; } fn createPerFrameBuffer(self: *Render2, usage: vk.BufferUsageFlags, size: u64, out_addr: *u64) !vk.Buffer { while (true) { - if (self.vulkan_frame_arena.createBufferRaw(self.gc.device, usage, size, out_addr)) |buffer| { + if (self.vulkan_frame_arena.createBufferRaw(&self.frame_data[self.frame], self.gc.device, usage, size, out_addr)) |buffer| { return buffer; } else |err| switch (err) { error.OverlapsPreviousFrame => { @@ -315,7 +414,7 @@ fn createPerFrameBuffer(self: *Render2, usage: vk.BufferUsageFlags, size: u64, o std.debug.print("Vulkan Frame Allocator Overlapped frame {}, waiting for it to finish...", .{overlapped_frame}); - try self.frame_data[overlapped_frame].waitForDrawAndReset(self.gc.device); + try self.frame_data[overlapped_frame].waitForDrawAndReset(self); self.vulkan_frame_arena.resetFrame(overlapped_frame); }, else => return err, @@ -323,8 +422,35 @@ fn createPerFrameBuffer(self: *Render2, usage: vk.BufferUsageFlags, size: u64, o } } -fn frameAllocMemReqs(self: *Render2, mem_reqs: vk.MemoryRequirements) !u64 { - return self.frameAlloc(mem_reqs.size, mem_reqs.alignment); +fn createPerFrameImage(self: *Render2, create_info: *const vk.ImageCreateInfo, out_addr: *u64) !vk.Image { + while (true) { + if (self.vulkan_frame_arena.createImageRaw(&self.frame_data[self.frame], self.gc, create_info, out_addr)) |image| { + return image; + } else |err| switch (err) { + error.OverlapsPreviousFrame => { + const overlapped_frame = (self.frame + 1) % MAX_FRAME_LAG; + + std.debug.print("Vulkan Frame Allocator Overlapped frame {}, waiting for it to finish...", .{overlapped_frame}); + + try self.frame_data[overlapped_frame].waitForDrawAndReset(self); + self.vulkan_frame_arena.resetFrame(overlapped_frame); + }, + else => return err, + } + } +} + +fn createPerFrameImageView(self: *Render2, create_info: *const vk.ImageViewCreateInfo) !vk.ImageView { + const view = try self.gc.device.createImageView(create_info, null); + self.frame_data[self.frame].addImageView(view); + + return view; +} + +fn createPerFrameImageDescriptor(self: *Render2, view: vk.ImageView, layout: vk.ImageLayout) *DescriptorManager.SampledImageDescriptor { + const result = self.descriptorman.image_descriptor_array_2d.alloc(.{ .view = view, .layout = layout }); + self.frame_data[self.frame].addImageDescriptor(result.handle); + return result; } pub fn draw(self: *Render2) !void { @@ -332,12 +458,13 @@ pub fn draw(self: *Render2) !void { const device = gc.device; const frame = &self.frame_data[self.frame]; - try frame.waitForDrawAndReset(gc.device); + try frame.waitForDrawAndReset(self); self.vulkan_frame_arena.resetFrame(self.frame); - self.vulkan_frame_arena.startFrame(gc.device, self.frame); + self.vulkan_frame_arena.startFrame(self.frame); var global_buffer_addr: u64 = 0; - const global_uniform_buffer = try self.createPerFrameBuffer(.{ .uniform_buffer_bit = true, .transfer_dst_bit = true }, @sizeOf(GlobalUniform), &global_buffer_addr); + const global_uniform_buffer_handle = try self.createPerFrameBuffer(.{ .uniform_buffer_bit = true, .transfer_dst_bit = true }, @sizeOf(GlobalUniform), &global_buffer_addr); + var global_uniform_buffer = GraphicsContext.Buffer{ .gc = gc, .handle = global_uniform_buffer_handle, .size = @sizeOf(GlobalUniform) }; const global_uniform = blk: { const view = self.camera.view_mat; @@ -358,10 +485,10 @@ pub fn draw(self: *Render2) !void { // Move this out into a separate func const swapchain_image_index: u32 = try gc.acquireSwapchainImage(frame.acquire_swapchain_image); - try self.maybeResizeFrameBuffer(frame); - const depth_image: *GraphicsContext.Image = &frame.depth_buffer; - const depth_image_view = try depth_image.createView(gc.device, .{ .depth_bit = true, .stencil_bit = true }); - defer gc.device.destroyImageView(depth_image_view, null); + var main_render_target = try self.allocateRenderTarget(); + + const color_image: *GraphicsContext.Image = &main_render_target.color; + const depth_image: *GraphicsContext.Image = &main_render_target.depth; var swapchain_image = GraphicsContext.Image{ .handle = gc.swapchain_images[swapchain_image_index], .mip_count = 1, .layer_count = 1, .format = .r8g8b8a8_unorm }; const swapchain_image_view = try swapchain_image.createView(gc.device, .{ .color_bit = true }); @@ -371,26 +498,12 @@ pub fn draw(self: *Render2) !void { try cmds.beginCommandBuffer(&.{}); { - cmds.updateBuffer(global_uniform_buffer, 0, @sizeOf(GlobalUniform), &global_uniform); - // Transition global uniform buffer - cmds.pipelineBarrier2(&vk.DependencyInfo{ - .buffer_memory_barrier_count = 1, - .p_buffer_memory_barriers = &.{ - vk.BufferMemoryBarrier2{ - .buffer = global_uniform_buffer, - .src_stage_mask = .{ .copy_bit = true }, - .src_access_mask = .{ .transfer_write_bit = true }, - .dst_stage_mask = .{ .vertex_shader_bit = true }, - .dst_access_mask = .{ .shader_read_bit = true }, - .offset = 0, - .size = @sizeOf(GlobalUniform), - .src_queue_family_index = vk.QUEUE_FAMILY_IGNORED, - .dst_queue_family_index = vk.QUEUE_FAMILY_IGNORED, - }, - }, - }); + try global_uniform_buffer.sync(cmds, .{ .stage_mask = .{ .copy_bit = true }, .access_mask = .{ .transfer_write_bit = true } }); + cmds.updateBuffer(global_uniform_buffer.handle, 0, @sizeOf(GlobalUniform), &global_uniform); + try global_uniform_buffer.sync(cmds, .{ .stage_mask = .{ .vertex_shader_bit = true }, .access_mask = .{ .shader_read_bit = true } }); + + const global_descriptor_set = self.global_descriptor_set; - const global_descriptor_set = try frame.allocateDescriptorSet(device, self.shaderman.descriptor_set_layouts.global); device.updateDescriptorSets(1, &.{ vk.WriteDescriptorSet{ .dst_set = global_descriptor_set, @@ -400,7 +513,7 @@ pub fn draw(self: *Render2) !void { .descriptor_count = 1, .p_buffer_info = &.{ vk.DescriptorBufferInfo{ - .buffer = global_uniform_buffer, + .buffer = global_uniform_buffer.handle, .offset = 0, .range = @sizeOf(GlobalUniform), }, @@ -410,13 +523,46 @@ pub fn draw(self: *Render2) !void { }, }, 0, null); - try swapchain_image.sync( + // TODO: move this into descriptorman? + // Update image descriptors + { + var offset: u32 = 0; + var writes: []vk.WriteDescriptorSet = try self.frame_allocator.alloc(vk.WriteDescriptorSet, self.descriptorman.image_descriptor_array_2d.resources_to_update_count); + var iter = self.descriptorman.image_descriptor_array_2d.iterator(); + while (iter.next()) |image| { + // TODO: merge WriteDescriptorSets by ranges maybe? + const write = &writes[offset]; + + write.* = .{ + .dst_set = global_descriptor_set, + .dst_binding = 2, + .dst_array_element = image.handle.index, + .descriptor_type = .sampled_image, + .descriptor_count = 1, + .p_image_info = &[_]vk.DescriptorImageInfo{ + vk.DescriptorImageInfo{ + .image_layout = image.value.layout, + .image_view = image.value.view, + .sampler = .null_handle, + }, + }, + .p_buffer_info = &[_]vk.DescriptorBufferInfo{}, + .p_texel_buffer_view = &[_]vk.BufferView{}, + }; + offset += 1; + } + self.descriptorman.image_descriptor_array_2d.resetUpdates(); + + device.updateDescriptorSets(@intCast(writes.len), writes.ptr, 0, null); + } + + try color_image.sync( cmds, .{ .stage_mask = .{ .color_attachment_output_bit = true }, .access_mask = .{ .color_attachment_write_bit = true }, }, - .attachment_optimal, + .color_attachment_optimal, .{ .color_bit = true }, ); try depth_image.sync( @@ -431,6 +577,8 @@ pub fn draw(self: *Render2) !void { .depth_stencil_attachment_optimal, .{ .depth_bit = true, .stencil_bit = true }, ); + + // Actual draws { cmds.beginRendering(&vk.RenderingInfo{ .render_area = vk.Rect2D{ .offset = .{ .x = 0, .y = 0 }, .extent = gc.swapchain_extent }, @@ -442,9 +590,9 @@ pub fn draw(self: *Render2) !void { .clear_value = .{ .color = .{ .float_32 = .{ 0.8, 0.7, 0.6, 1.0 } } }, .load_op = .clear, .store_op = .store, - .image_layout = .attachment_optimal, - .image_view = swapchain_image_view, - .resolve_image_layout = .attachment_optimal, + .image_layout = .color_attachment_optimal, + .image_view = color_image.view, + .resolve_image_layout = .color_attachment_optimal, .resolve_mode = .{}, }, }, @@ -453,7 +601,7 @@ pub fn draw(self: *Render2) !void { .load_op = .clear, .store_op = .store, .image_layout = .depth_stencil_attachment_optimal, - .image_view = depth_image_view, + .image_view = depth_image.view, .resolve_image_layout = .depth_stencil_attachment_optimal, .resolve_mode = .{}, }, @@ -462,13 +610,17 @@ pub fn draw(self: *Render2) !void { .load_op = .clear, .store_op = .none, .image_layout = .depth_stencil_attachment_optimal, - .image_view = depth_image_view, + .image_view = depth_image.view, .resolve_image_layout = .depth_stencil_attachment_optimal, .resolve_mode = .{}, }, }); defer cmds.endRendering(); + cmds.setDepthTestEnable(vk.TRUE); + cmds.setDepthWriteEnable(vk.TRUE); + cmds.setDepthCompareOp(.greater); + const triangle = self.assetman.resolveShaderProgram(a.ShaderPrograms.shaders.triangle); cmds.bindPipeline(.graphics, triangle.pipeline); @@ -490,7 +642,39 @@ pub fn draw(self: *Render2) !void { cmds.draw(3, 2, 0, 0); } - try swapchain_image.sync(cmds, .{}, .present_src_khr, .{ .color_bit = true }); + try color_image.sync(cmds, .{ .stage_mask = .{ .blit_bit = true }, .access_mask = .{ .transfer_read_bit = true } }, .transfer_src_optimal, .{ .color_bit = true }); + try swapchain_image.sync(cmds, .{ .stage_mask = .{ .blit_bit = true }, .access_mask = .{ .transfer_write_bit = true } }, .transfer_dst_optimal, .{ .color_bit = true }); + cmds.blitImage( + color_image.handle, + color_image.sync_state.layout, + swapchain_image.handle, + swapchain_image.sync_state.layout, + 1, + &.{vk.ImageBlit{ + .src_subresource = vk.ImageSubresourceLayers{ + .aspect_mask = .{ .color_bit = true }, + .mip_level = 0, + .base_array_layer = 0, + .layer_count = 1, + }, + .src_offsets = .{ + vk.Offset3D{ .x = 0, .y = 0, .z = 0 }, + vk.Offset3D{ .x = @intCast(gc.swapchain_extent.width), .y = @intCast(gc.swapchain_extent.height), .z = 1 }, + }, + .dst_subresource = vk.ImageSubresourceLayers{ + .aspect_mask = .{ .color_bit = true }, + .mip_level = 0, + .base_array_layer = 0, + .layer_count = 1, + }, + .dst_offsets = .{ + vk.Offset3D{ .x = 0, .y = 0, .z = 0 }, + vk.Offset3D{ .x = @intCast(gc.swapchain_extent.width), .y = @intCast(gc.swapchain_extent.height), .z = 1 }, + }, + }}, + .nearest, + ); + try swapchain_image.sync(cmds, .{ .stage_mask = .{}, .access_mask = .{} }, .present_src_khr, .{ .color_bit = true }); } try cmds.endCommandBuffer(); @@ -538,10 +722,19 @@ const FrameData = struct { draw_fence: vk.Fence, command_buffer: GraphicsContext.CommandBuffer, - descriptor_pool: vk.DescriptorPool = .null_handle, - depth_buffer_extent: vk.Extent2D = .{ .width = 0, .height = 0 }, - depth_buffer_memory: vk.DeviceMemory = .null_handle, - depth_buffer: GraphicsContext.Image = .{}, + + // Store references to per frame allocated objects here, they will be cleaned up after frame finished rendering + buffers: [1024]vk.Buffer = undefined, + buffer_count: u32 = 0, + + images: [1024]vk.Image = undefined, + image_count: u32 = 0, + + image_views: [1024]vk.ImageView = undefined, + image_view_count: u32 = 0, + + image_descriptors: [1024]DescriptorManager.DescriptorHandle = undefined, + image_descriptor_count: u32 = 0, pub fn init(gc: *GraphicsContext, command_pool: GraphicsContext.CommandPool) !FrameData { return FrameData{ @@ -550,38 +743,88 @@ const FrameData = struct { .draw_fence = try gc.device.createFence(&.{ .flags = .{ .signaled_bit = true } }, null), .command_buffer = try command_pool.allocateCommandBuffer(), - .descriptor_pool = try gc.device.createDescriptorPool(&vk.DescriptorPoolCreateInfo{ - .max_sets = 1024, - .p_pool_sizes = &.{ - vk.DescriptorPoolSize{ - .type = .uniform_buffer, - .descriptor_count = 8, - }, - }, - .pool_size_count = 1, - }, null), - - // TODO: maybe cache memory requirements? }; } - pub fn allocateDescriptorSet(self: *FrameData, device: GraphicsContext.Device, layout: vk.DescriptorSetLayout) !vk.DescriptorSet { - var result: [1]vk.DescriptorSet = .{.null_handle}; - try device.allocateDescriptorSets(&vk.DescriptorSetAllocateInfo{ - .descriptor_pool = self.descriptor_pool, - .descriptor_set_count = 1, - .p_set_layouts = &.{layout}, - }, &result); - return result[0]; + pub fn addBuffer(self: *FrameData, buffer: vk.Buffer) void { + self.buffers[self.buffer_count] = buffer; + self.buffer_count += 1; } - pub fn waitForDrawAndReset(self: *FrameData, device: GraphicsContext.Device) !void { + pub fn addImage(self: *FrameData, image: vk.Image) void { + self.images[self.image_count] = image; + self.image_count += 1; + } + + pub fn addImageView(self: *FrameData, view: vk.ImageView) void { + self.image_views[self.image_view_count] = view; + self.image_view_count += 1; + } + + pub fn addImageDescriptor(self: *FrameData, descriptor: DescriptorManager.DescriptorHandle) void { + self.image_descriptors[self.image_descriptor_count] = descriptor; + self.image_descriptor_count += 1; + } + + pub fn waitForDrawAndReset(self: *FrameData, render: *Render2) !void { + const device = render.gc.device; _ = try device.waitForFences(1, &.{self.draw_fence}, vk.TRUE, std.math.maxInt(u64)); try device.resetFences(1, &.{self.draw_fence}); try self.command_buffer.resetCommandBuffer(.{ .release_resources_bit = true }); - try device.resetDescriptorPool(self.descriptor_pool, .{}); + for (self.image_descriptors[0..self.image_descriptor_count]) |desc| { + render.descriptorman.image_descriptor_array_2d.free(desc); + } + self.image_descriptor_count = 0; + + // TODO: move this into descriptorman? + // Update image descriptors + { + var offset: u32 = 0; + var writes: []vk.WriteDescriptorSet = try render.frame_allocator.alloc(vk.WriteDescriptorSet, render.descriptorman.image_descriptor_array_2d.resources_to_update_count); + var iter = render.descriptorman.image_descriptor_array_2d.iterator(); + while (iter.next()) |image| { + // TODO: merge WriteDescriptorSets by ranges maybe? + const write = &writes[offset]; + + write.* = .{ + .dst_set = render.global_descriptor_set, + .dst_binding = 2, + .dst_array_element = image.handle.index, + .descriptor_type = .sampled_image, + .descriptor_count = 1, + .p_image_info = &[_]vk.DescriptorImageInfo{ + vk.DescriptorImageInfo{ + .image_layout = image.value.layout, + .image_view = image.value.view, + .sampler = .null_handle, + }, + }, + .p_buffer_info = &[_]vk.DescriptorBufferInfo{}, + .p_texel_buffer_view = &[_]vk.BufferView{}, + }; + offset += 1; + } + render.descriptorman.image_descriptor_array_2d.resetUpdates(); + + device.updateDescriptorSets(@intCast(writes.len), writes.ptr, 0, null); + } + + for (self.buffers[0..self.buffer_count]) |buf| { + device.destroyBuffer(buf, null); + } + self.buffer_count = 0; + + for (self.image_views[0..self.image_view_count]) |view| { + device.destroyImageView(view, null); + } + self.image_view_count = 0; + + for (self.images[0..self.image_count]) |img| { + device.destroyImage(img, null); + } + self.image_count = 0; } }; diff --git a/src/ShaderManager.zig b/src/ShaderManager.zig deleted file mode 100644 index 8ad5c6d..0000000 --- a/src/ShaderManager.zig +++ /dev/null @@ -1,48 +0,0 @@ -const std = @import("std"); -const vk = @import("vk"); -const GraphicsContext = @import("GraphicsContext.zig"); - -pub const ShaderManager = @This(); - -pub const DescriptorSets = struct { - const Global = struct { - pub const UBO = enum(u32) { - GlobalUniform = 0, - - pub fn value(self: UBO) u32 { - return @intFromEnum(self); - } - }; - }; -}; - -pub const DescriptorSetLayouts = struct { - global: vk.DescriptorSetLayout = .null_handle, -}; - -gc: *GraphicsContext, -descriptor_set_layouts: DescriptorSetLayouts = .{}, - -pub fn init(gc: *GraphicsContext) !ShaderManager { - var self = ShaderManager{ - .gc = gc, - }; - - // Global Descriptor Set Layout - { - const descriptor_set_layout_bindings = [_]vk.DescriptorSetLayoutBinding{ - vk.DescriptorSetLayoutBinding{ - .descriptor_type = .uniform_buffer, - .binding = DescriptorSets.Global.UBO.GlobalUniform.value(), - .descriptor_count = 1, - .stage_flags = vk.ShaderStageFlags.fromInt(0x7FFFFFFF), // SHADER_STAGE_ALL - }, - }; - self.descriptor_set_layouts.global = try self.gc.device.createDescriptorSetLayout(&.{ - .p_bindings = &descriptor_set_layout_bindings, - .binding_count = descriptor_set_layout_bindings.len, - }, null); - } - - return self; -} diff --git a/src/formats.zig b/src/formats.zig index eeeea61..731b84c 100644 --- a/src/formats.zig +++ b/src/formats.zig @@ -123,8 +123,21 @@ pub fn writeMesh(writer: anytype, value: Mesh, endian: std.builtin.Endian) !void pub const ShaderProgramPipelineType = enum(u8) { graphics, compute }; pub const ShaderProgram = union(ShaderProgramPipelineType) { + pub const PushConstantRange = struct { + offset: u32, + size: u32, + }; + pub const ShaderStage = struct { source: []u8, + push_constant_range: PushConstantRange, + + pub fn serialize(self: *ShaderStage, serializer: *Serializer) !void { + try serializer.skipAlign(4); + try serializer.serializeByteSlice(&self.source); + try serializer.serializeInt(u32, &self.push_constant_range.offset); + try serializer.serializeInt(u32, &self.push_constant_range.size); + } }; pub const GraphicsPipeline = struct { @@ -153,17 +166,15 @@ pub const ShaderProgram = union(ShaderProgramPipelineType) { if (!serializer.write) { self.* = .{ .graphics = undefined }; } - try serializer.skipAlign(4); - try serializer.serializeByteSlice(&self.graphics.vertex.source); - try serializer.skipAlign(4); - try serializer.serializeByteSlice(&self.graphics.fragment.source); + + try self.graphics.vertex.serialize(serializer); + try self.graphics.fragment.serialize(serializer); }, .compute => { if (!serializer.write) { self.* = .{ .compute = undefined }; } - try serializer.skipAlign(4); - try serializer.serializeByteSlice(&self.compute.compute.source); + try self.compute.compute.serialize(serializer); }, } } diff --git a/src/game.zig b/src/game.zig index 0e3ff81..7d0ba8f 100644 --- a/src/game.zig +++ b/src/game.zig @@ -4,7 +4,7 @@ const InitMemory = globals.InitMemory; const GameMemory = globals.GameMemory; const c = @import("sdl.zig"); // const gl = @import("gl.zig"); -const ShaderManager = @import("ShaderManager.zig"); +const DescriptorManager = @import("DescriptorManager.zig"); const AssetManager = @import("AssetManager.zig"); const Render = @import("Render.zig"); const Render2 = @import("Render2.zig"); @@ -175,12 +175,12 @@ export fn game_init(global_allocator: *std.mem.Allocator) void { globals.g_mem.* = .{ .global_allocator = global_allocator.*, .frame_fba = std.heap.FixedBufferAllocator.init(frame_arena_buffer), - .shaderman = ShaderManager.init(&globals.g_init.gc) catch @panic("ShaderManager.init"), - .assetman = AssetManager.init(global_allocator.*, globals.g_mem.frame_fba.allocator(), &globals.g_init.gc, &globals.g_mem.shaderman), + .descriptorman = DescriptorManager.init(&globals.g_init.gc) catch @panic("DescriptorManager.init"), + .assetman = AssetManager.init(global_allocator.*, globals.g_mem.frame_fba.allocator(), &globals.g_init.gc, &globals.g_mem.descriptorman), // .render = Render.init(global_allocator.*, globals.g_mem.frame_fba.allocator(), &globals.g_mem.assetman), .world = .{ .frame_arena = globals.g_mem.frame_fba.allocator() }, }; - globals.g_mem.render2.init(&globals.g_init.gc, &globals.g_mem.shaderman, &globals.g_mem.assetman) catch @panic("OOM"); + globals.g_mem.render2.init(globals.g_mem.frame_fba.allocator(), &globals.g_init.gc, &globals.g_mem.descriptorman, &globals.g_mem.assetman) catch @panic("OOM"); globals.g_mem.render2.camera = &globals.g_mem.free_cam.camera; std.log.debug("actual ptr: {}, correct ptr {}", .{ globals.g_mem.assetman.frame_arena.ptr, globals.g_mem.frame_fba.allocator().ptr }); globals.g_assetman = &globals.g_mem.assetman; diff --git a/src/globals.zig b/src/globals.zig index f7b9a50..82c3b66 100644 --- a/src/globals.zig +++ b/src/globals.zig @@ -3,7 +3,7 @@ const c = @import("sdl.zig"); const Render = @import("Render.zig"); const Render2 = @import("Render2.zig"); const AssetManager = @import("AssetManager.zig"); -const ShaderManager = @import("ShaderManager.zig"); +const DescriptorManager = @import("DescriptorManager.zig"); const World = @import("entity.zig").World; const GraphicsContext = @import("GraphicsContext.zig"); @@ -36,7 +36,7 @@ pub const InitMemory = struct { pub const GameMemory = struct { global_allocator: std.mem.Allocator, frame_fba: std.heap.FixedBufferAllocator, - shaderman: ShaderManager, + descriptorman: DescriptorManager, assetman: AssetManager, render: Render = undefined, render2: Render2 = undefined, @@ -69,7 +69,7 @@ pub const FreeLookCamera = struct { pub fn update(self: *FreeLookCamera, dt: f32, move: Vec3, look: Vec2) void { self.yaw += look.x(); - self.pitch += look.y(); + self.pitch -= look.y(); // First rotate pitch, then yaw const rot = Mat4.fromRotation(self.pitch, Vec3.right()).mul(Mat4.fromRotation(self.yaw, Vec3.up())); diff --git a/tools/asset_compiler.zig b/tools/asset_compiler.zig index 35be608..f7017fe 100644 --- a/tools/asset_compiler.zig +++ b/tools/asset_compiler.zig @@ -21,6 +21,8 @@ const c = @cImport({ @cInclude("stb_image.h"); @cInclude("ispc_texcomp.h"); + + @cInclude("spirv_reflect.h"); }); const ASSET_MAX_BYTES = 1024 * 1024 * 1024; @@ -583,14 +585,24 @@ fn readFileContents(allocator: std.mem.Allocator, path: []const u8) ![]u8 { }; } +const PushConstantRange = struct { + offset: u32 = 0, + size: u32 = 0, +}; + +const ProcessedShader = struct { + spirv: []u8, + push_constant_range: PushConstantRange = .{}, +}; + // Returns spirv binary source // Caller owns memory -fn processShader(allocator: std.mem.Allocator, flags: []const []const u8, input: []const u8, maybe_dep_file: ?[]const u8) ![]u8 { +fn processShader(allocator: std.mem.Allocator, flags: []const []const u8, input: []const u8, maybe_dep_file: ?[]const u8) !ProcessedShader { // const old_depfile_contents = if (maybe_dep_file) |dep| try readFileContents(allocator, dep) else try allocator.alloc(u8, 0); // defer allocator.free(old_depfile_contents); // TODO: make sure output is stdout - const result = try std.process.Child.run(.{ + const compile_result = try std.process.Child.run(.{ .allocator = allocator, .argv = try std.mem.concat(allocator, []const u8, &.{ &.{ "glslc", "--target-env=vulkan1.3", "-std=460core", "-g", "-o", "-" }, @@ -599,13 +611,13 @@ fn processShader(allocator: std.mem.Allocator, flags: []const []const u8, input: &.{input}, }), }); - defer allocator.free(result.stderr); - errdefer allocator.free(result.stdout); + defer allocator.free(compile_result.stderr); + errdefer allocator.free(compile_result.stdout); - switch (result.term) { + switch (compile_result.term) { .Exited => |status| { if (status != 0) { - std.log.debug("Shader compilation failed with status {}:\n{s}\n", .{ result.term.Exited, result.stderr }); + std.log.debug("Shader compilation failed with status {}:\n{s}\n", .{ compile_result.term.Exited, compile_result.stderr }); return error.ShaderCompileError; } }, @@ -614,6 +626,21 @@ fn processShader(allocator: std.mem.Allocator, flags: []const []const u8, input: }, } + var result = ProcessedShader{ .spirv = compile_result.stdout }; + + { + var shader_module: c.SpvReflectShaderModule = std.mem.zeroes(c.SpvReflectShaderModule); + try spvReflectTry(c.spvReflectCreateShaderModule(compile_result.stdout.len, compile_result.stdout.ptr, &shader_module)); + defer c.spvReflectDestroyShaderModule(&shader_module); + + if (shader_module.push_constant_blocks != null) { + // Assuming single push constant block per stage, this is what glslc enforces + std.debug.assert(shader_module.push_constant_block_count == 1); + const block = shader_module.push_constant_blocks[0]; + result.push_constant_range = .{ .offset = block.offset, .size = block.size }; + } + } + // NOTE: Dep file is technically incorrect, but zig build system doesn't care, it will collect all dependencies after colon // even if they are not for the same file it's processing @@ -625,7 +652,35 @@ fn processShader(allocator: std.mem.Allocator, flags: []const []const u8, input: // try file.writeAll(old_depfile_contents); // } - return result.stdout; + return result; +} + +fn spvReflectTry(result: c.SpvReflectResult) !void { + switch (result) { + c.SPV_REFLECT_RESULT_SUCCESS => {}, + c.SPV_REFLECT_RESULT_NOT_READY => return error.SPV_REFLECT_RESULT_NOT_READY, + c.SPV_REFLECT_RESULT_ERROR_PARSE_FAILED => return error.SPV_REFLECT_RESULT_ERROR_PARSE_FAILED, + c.SPV_REFLECT_RESULT_ERROR_ALLOC_FAILED => return error.SPV_REFLECT_RESULT_ERROR_ALLOC_FAILED, + c.SPV_REFLECT_RESULT_ERROR_RANGE_EXCEEDED => return error.SPV_REFLECT_RESULT_ERROR_RANGE_EXCEEDED, + c.SPV_REFLECT_RESULT_ERROR_NULL_POINTER => return error.SPV_REFLECT_RESULT_ERROR_NULL_POINTER, + c.SPV_REFLECT_RESULT_ERROR_INTERNAL_ERROR => return error.SPV_REFLECT_RESULT_ERROR_INTERNAL_ERROR, + c.SPV_REFLECT_RESULT_ERROR_COUNT_MISMATCH => return error.SPV_REFLECT_RESULT_ERROR_COUNT_MISMATCH, + c.SPV_REFLECT_RESULT_ERROR_ELEMENT_NOT_FOUND => return error.SPV_REFLECT_RESULT_ERROR_ELEMENT_NOT_FOUND, + c.SPV_REFLECT_RESULT_ERROR_SPIRV_INVALID_CODE_SIZE => return error.SPV_REFLECT_RESULT_ERROR_SPIRV_INVALID_CODE_SIZE, + c.SPV_REFLECT_RESULT_ERROR_SPIRV_INVALID_MAGIC_NUMBER => return error.SPV_REFLECT_RESULT_ERROR_SPIRV_INVALID_MAGIC_NUMBER, + c.SPV_REFLECT_RESULT_ERROR_SPIRV_UNEXPECTED_EOF => return error.SPV_REFLECT_RESULT_ERROR_SPIRV_UNEXPECTED_EOF, + c.SPV_REFLECT_RESULT_ERROR_SPIRV_INVALID_ID_REFERENCE => return error.SPV_REFLECT_RESULT_ERROR_SPIRV_INVALID_ID_REFERENCE, + c.SPV_REFLECT_RESULT_ERROR_SPIRV_SET_NUMBER_OVERFLOW => return error.SPV_REFLECT_RESULT_ERROR_SPIRV_SET_NUMBER_OVERFLOW, + c.SPV_REFLECT_RESULT_ERROR_SPIRV_INVALID_STORAGE_CLASS => return error.SPV_REFLECT_RESULT_ERROR_SPIRV_INVALID_STORAGE_CLASS, + c.SPV_REFLECT_RESULT_ERROR_SPIRV_RECURSION => return error.SPV_REFLECT_RESULT_ERROR_SPIRV_RECURSION, + c.SPV_REFLECT_RESULT_ERROR_SPIRV_INVALID_INSTRUCTION => return error.SPV_REFLECT_RESULT_ERROR_SPIRV_INVALID_INSTRUCTION, + c.SPV_REFLECT_RESULT_ERROR_SPIRV_UNEXPECTED_BLOCK_DATA => return error.SPV_REFLECT_RESULT_ERROR_SPIRV_UNEXPECTED_BLOCK_DATA, + c.SPV_REFLECT_RESULT_ERROR_SPIRV_INVALID_BLOCK_MEMBER_REFERENCE => return error.SPV_REFLECT_RESULT_ERROR_SPIRV_INVALID_BLOCK_MEMBER_REFERENCE, + c.SPV_REFLECT_RESULT_ERROR_SPIRV_INVALID_ENTRY_POINT => return error.SPV_REFLECT_RESULT_ERROR_SPIRV_INVALID_ENTRY_POINT, + c.SPV_REFLECT_RESULT_ERROR_SPIRV_INVALID_EXECUTION_MODE => return error.SPV_REFLECT_RESULT_ERROR_SPIRV_INVALID_EXECUTION_MODE, + c.SPV_REFLECT_RESULT_ERROR_SPIRV_MAX_RECURSIVE_EXCEEDED => return error.SPV_REFLECT_RESULT_ERROR_SPIRV_MAX_RECURSIVE_EXCEEDED, + else => unreachable, + } } fn processShaderProgram(allocator: std.mem.Allocator, input: []const u8, output_dir: std.fs.Dir, dep_file: ?[]const u8, asset_list_writer: anytype) !void { @@ -668,16 +723,18 @@ fn processShaderProgram(allocator: std.mem.Allocator, input: []const u8, output_ const shader_source_path = try std.fs.path.resolve(allocator, &.{ input_dir, stage }); const relative_path = try std.fs.path.relative(allocator, try std.fs.cwd().realpathAlloc(allocator, "."), shader_source_path); - const shader_source = try processShader(allocator, &.{ "-DVERTEX_SHADER=1", "-fshader-stage=vert" }, relative_path, dep_file); - result.graphics.vertex.source = shader_source; + const shader = try processShader(allocator, &.{ "-DVERTEX_SHADER=1", "-fshader-stage=vert" }, relative_path, dep_file); + result.graphics.vertex.source = shader.spirv; + result.graphics.vertex.push_constant_range = .{ .offset = shader.push_constant_range.offset, .size = shader.push_constant_range.size }; } { const stage = program.value.fragment.?; const shader_source_path = try std.fs.path.resolve(allocator, &.{ input_dir, stage }); const relative_path = try std.fs.path.relative(allocator, try std.fs.cwd().realpathAlloc(allocator, "."), shader_source_path); - const shader_source = try processShader(allocator, &.{ "-DFRAGMENT_SHADER=1", "-fshader-stage=frag" }, relative_path, dep_file); - result.graphics.fragment.source = shader_source; + const shader = try processShader(allocator, &.{ "-DFRAGMENT_SHADER=1", "-fshader-stage=frag" }, relative_path, dep_file); + result.graphics.fragment.source = shader.spirv; + result.graphics.fragment.push_constant_range = .{ .offset = shader.push_constant_range.offset, .size = shader.push_constant_range.size }; } } else if (program.value.compute != null) { result = .{ .compute = undefined }; @@ -686,8 +743,9 @@ fn processShaderProgram(allocator: std.mem.Allocator, input: []const u8, output_ const shader_source_path = try std.fs.path.resolve(allocator, &.{ input_dir, stage }); const relative_path = try std.fs.path.relative(allocator, try std.fs.cwd().realpathAlloc(allocator, "."), shader_source_path); - const shader_source = try processShader(allocator, &.{ "-DCOMPUTE_SHADER=1", "-fshader-stage=compute" }, relative_path, dep_file); - result.compute.compute.source = shader_source; + const shader = try processShader(allocator, &.{ "-DCOMPUTE_SHADER=1", "-fshader-stage=compute" }, relative_path, dep_file); + result.compute.compute.source = shader.spirv; + result.compute.compute.push_constant_range = .{ .offset = shader.push_constant_range.offset, .size = shader.push_constant_range.size }; } else { std.log.err("Provide vertex and fragment shaders for a graphics pipeline or a compute shader for a compute pipeline\n", .{}); return error.InvalidPipelines;