engine/src/GraphicsContext.zig

const std = @import("std");
const vk = @import("vk");
const c = @import("sdl.zig");

pub const GraphicsContext = @This();

const apis: []const vk.ApiInfo = &.{
    vk.features.version_1_0,
    vk.features.version_1_1,
    vk.features.version_1_2,
    vk.features.version_1_3,
    vk.extensions.khr_surface,
    vk.extensions.khr_swapchain,
};

pub const Instance = vk.InstanceProxy(apis);
pub const Device = vk.DeviceProxy(apis);
pub const CommandBuffer = vk.CommandBufferProxy(apis);

const BaseDispatch = vk.BaseWrapper(apis);
const InstanceDispatch = vk.InstanceWrapper(apis);
const DeviceDispatch = Device.Wrapper;

const device_extensions = [_][:0]const u8{
    vk.extensions.khr_swapchain.name,
};
const vk_layers = [_][:0]const u8{"VK_LAYER_KHRONOS_validation"};

allocator: std.mem.Allocator = undefined,
window: *c.SDL_Window = undefined,
vkb: BaseDispatch = undefined,
vki: InstanceDispatch = undefined,
vkd: DeviceDispatch = undefined,
device_info: SelectedPhysicalDevice = undefined,
instance: Instance = undefined,
device: Device = undefined,
queues: DeviceQueues = undefined,
surface: vk.SurfaceKHR = .null_handle,
swapchain: vk.SwapchainKHR = .null_handle,
swapchain_extent: vk.Extent2D = .{ .width = 0, .height = 0 },
swapchain_images: []vk.Image = &.{},

pipeline_cache: vk.PipelineCache = .null_handle,

pub const CommandPool = struct {
    device: Device,
    handle: vk.CommandPool,

    pub fn allocateCommandBuffer(self: *const CommandPool) !CommandBuffer {
        var cmd_bufs = [_]vk.CommandBuffer{.null_handle};
        try self.device.allocateCommandBuffers(&.{
            .command_pool = self.handle,
            .level = .primary,
            .command_buffer_count = cmd_bufs.len,
        }, &cmd_bufs);
        return CommandBuffer.init(cmd_bufs[0], self.device.wrapper);
    }
};

pub fn init(self: *GraphicsContext, allocator: std.mem.Allocator, window: *c.SDL_Window) !void {
    self.allocator = allocator;
    self.window = window;

    var scratch: [4096]u8 = undefined;
    var fba = std.heap.FixedBufferAllocator.init(&scratch);
    const vkGetInstanceProcAddr: vk.PfnGetInstanceProcAddr = @ptrCast(c.SDL_Vulkan_GetVkGetInstanceProcAddr());

    var sdl_instance_ext_count: c_uint = 0;
    if (c.SDL_Vulkan_GetInstanceExtensions(window, &sdl_instance_ext_count, null) == c.SDL_FALSE) {
        std.debug.print("SDL_Vulkan_GetInstanceExtensions: get count {s}\n", .{c.SDL_GetError()});
        return error.GetSDLExtensions;
    }

    const sdl_instance_ext_names = try fba.allocator().alloc([*:0]const u8, sdl_instance_ext_count);
    if (c.SDL_Vulkan_GetInstanceExtensions(window, &sdl_instance_ext_count, @ptrCast(sdl_instance_ext_names.ptr)) == c.SDL_FALSE) {
        std.debug.print("SDL_Vulkan_GetInstanceExtensions: get names {s}\n", .{c.SDL_GetError()});
        return error.GetSDLExtensions;
    }

    std.debug.print("SDL Extensions: {s}\n", .{sdl_instance_ext_names});

    self.vkb = try BaseDispatch.load(vkGetInstanceProcAddr);

    const instance_handle = try self.vkb.createInstance(&vk.InstanceCreateInfo{
        .p_application_info = &vk.ApplicationInfo{
            .api_version = vk.API_VERSION_1_3,
            .application_version = 0,
            .engine_version = 0,
        },
        .pp_enabled_layer_names = @ptrCast((&vk_layers).ptr),
        .enabled_layer_count = @intCast(vk_layers.len),
        .enabled_extension_count = @intCast(sdl_instance_ext_names.len),
        .pp_enabled_extension_names = sdl_instance_ext_names.ptr,
    }, null);

    self.vki = try InstanceDispatch.load(instance_handle, vkGetInstanceProcAddr);
    errdefer self.vki.destroyInstance(instance_handle, null);
    self.instance = Instance.init(instance_handle, &self.vki);

    var sdl_vksurface: c.VkSurfaceKHR = null;
    if (c.SDL_Vulkan_CreateSurface(window, @as(*c.VkInstance, @ptrCast(&self.instance.handle)).*, &sdl_vksurface) == c.SDL_FALSE) {
        std.debug.print("SDL_Vulkan_CreateSurface: {s}\n", .{c.SDL_GetError()});
        return error.SDLVulkanCreateSurface;
    }
    std.debug.assert(sdl_vksurface != null);
    self.surface = @as(*vk.SurfaceKHR, @ptrCast(&sdl_vksurface)).*;

    const physical_devices = try self.instance.enumeratePhysicalDevicesAlloc(fba.allocator());
    self.device_info = try selectPhysicalDevice(self.instance, self.surface, physical_devices);
    const queue_config = try selectQueues(self.instance, self.device_info.physical_device);

    const device_create_config = vk.DeviceCreateInfo{
        .p_next = &vk.PhysicalDeviceVulkan13Features{
            .dynamic_rendering = vk.TRUE,
            .synchronization_2 = vk.TRUE,
        },
        .p_queue_create_infos = &queue_config.queue_create_info,
        .queue_create_info_count = queue_config.queue_count,
        .p_enabled_features = &self.device_info.features,
        .pp_enabled_layer_names = @ptrCast((&vk_layers).ptr),
        .enabled_layer_count = @intCast(vk_layers.len),
        .pp_enabled_extension_names = @ptrCast((&device_extensions).ptr),
        .enabled_extension_count = @intCast(device_extensions.len),
    };

    const device_handle = try self.instance.createDevice(self.device_info.physical_device, &device_create_config, null);

    self.vkd = try DeviceDispatch.load(device_handle, self.instance.wrapper.dispatch.vkGetDeviceProcAddr);
    errdefer self.vkd.destroyDevice(device_handle, null);
    self.device = Device.init(device_handle, &self.vkd);

    try self.maybeResizeSwapchain();
    errdefer self.device.destroySwapchainKHR(self.swapchain, null);

    // TODO: handle the case when different queue instance map to the same queue
    const graphics_queue = QueueInstance{
        .device = self.device,
        .family = queue_config.graphics.family,
        .handle = self.device.getDeviceQueue(queue_config.graphics.family, queue_config.graphics.index),
    };
    const compute_queue = QueueInstance{
        .device = self.device,
        .family = queue_config.compute.family,
        .handle = self.device.getDeviceQueue(queue_config.graphics.family, queue_config.compute.index),
    };
    const host_to_device_queue = QueueInstance{
        .device = self.device,
        .family = queue_config.host_to_device.family,
        .handle = self.device.getDeviceQueue(queue_config.graphics.family, queue_config.host_to_device.index),
    };
    const device_to_host_queue = QueueInstance{
        .device = self.device,
        .family = queue_config.device_to_host.family,
        .handle = self.device.getDeviceQueue(queue_config.graphics.family, queue_config.device_to_host.index),
    };

    self.queues = DeviceQueues{
        .graphics = graphics_queue,
        .compute = compute_queue,
        .host_to_device = host_to_device_queue,
        .device_to_host = device_to_host_queue,
    };

    self.pipeline_cache = try self.device.createPipelineCache(&.{}, null);
}

pub fn acquireSwapchainImage(self: *GraphicsContext, acuire_semaphore: vk.Semaphore) !u32 {
    var found = false;
    var swapchain_img: u32 = 0;

    try self.maybeResizeSwapchain();

    while (!found) {
        const acquire_result = try self.device.acquireNextImageKHR(self.swapchain, std.math.maxInt(u64), acuire_semaphore, .null_handle);

        switch (acquire_result.result) {
            .success, .suboptimal_khr => {
                swapchain_img = acquire_result.image_index;
                found = true;
            },
            .error_out_of_date_khr => {
                // TODO: resize swapchain
                std.debug.print("Out of date swapchain\n", .{});
                try self.maybeResizeSwapchain();
            },
            .error_surface_lost_khr => {
                // TODO: recreate surface
                return error.SurfaceLost;
            },
            .not_ready => return error.SwapchainImageNotReady,
            .timeout => return error.SwapchainImageTimeout,
            else => {
                std.debug.print("Unexpected value: {}\n", .{acquire_result.result});
                @panic("Unexpected");
            },
        }
    }

    return swapchain_img;
}

fn maybeResizeSwapchain(self: *GraphicsContext) !void {
    var width: c_int = 0;
    var height: c_int = 0;
    c.SDL_Vulkan_GetDrawableSize(self.window, &width, &height);
    const new_extent = vk.Extent2D{ .width = @intCast(width), .height = @intCast(height) };

    if (self.swapchain_extent.width == new_extent.width and self.swapchain_extent.height == new_extent.height) {
        return;
    }

    if (self.swapchain_images.len > 0) {
        self.allocator.free(self.swapchain_images);
        self.swapchain_images = &.{};
    }
    self.swapchain_extent = new_extent;
    const surface_caps = self.device_info.surface_capabilities;
    self.swapchain = try self.device.createSwapchainKHR(&.{
        .surface = self.surface,
        .min_image_count = std.math.clamp(3, surface_caps.min_image_count, if (surface_caps.max_image_count == 0) std.math.maxInt(u32) else surface_caps.max_image_count),
        .image_format = .r8g8b8a8_unorm, // tonemapping handles srgb
        .image_color_space = .srgb_nonlinear_khr,
        .image_extent = self.swapchain_extent,
        .image_array_layers = 1,
        .image_usage = .{
            .color_attachment_bit = true,
            .transfer_dst_bit = true,
        },
        .image_sharing_mode = .exclusive,
        .present_mode = .fifo_khr, // required to be supported
        .pre_transform = surface_caps.current_transform,
        .composite_alpha = .{ .opaque_bit_khr = true },
        .clipped = vk.TRUE,
        .old_swapchain = self.swapchain,
    }, null);
    self.swapchain_images = try self.device.getSwapchainImagesAllocKHR(self.swapchain, self.allocator);
}

pub const DeviceQueues = struct {
    graphics: QueueInstance,
    compute: QueueInstance,
    host_to_device: QueueInstance,
    device_to_host: QueueInstance,
};

pub const SubmitInfo = struct {
    wait_semaphores: []const vk.Semaphore = &.{},
    wait_dst_stage_mask: []const vk.PipelineStageFlags = &.{},
    command_buffers: []const vk.CommandBuffer = &.{},
    signal_semaphores: []const vk.Semaphore = &.{},
};

pub const QueueInstance = struct {
    const Self = @This();

    mu: std.Thread.Mutex = .{},
    device: Device,
    handle: vk.Queue,
    family: u32,

    pub fn createCommandPool(self: *Self, flags: vk.CommandPoolCreateFlags) !CommandPool {
        return .{
            .handle = try self.device.createCommandPool(&.{
                .flags = flags,
                .queue_family_index = self.family,
            }, null),
            .device = self.device,
        };
    }

    pub fn submit(self: *Self, info: *const SubmitInfo, fence: vk.Fence) Device.QueueSubmitError!void {
        std.debug.assert(info.wait_semaphores.len == info.wait_dst_stage_mask.len);

        var vk_submit_info = vk.SubmitInfo{};

        if (info.wait_semaphores.len > 0) {
            vk_submit_info.p_wait_semaphores = info.wait_semaphores.ptr;
            vk_submit_info.p_wait_dst_stage_mask = info.wait_dst_stage_mask.ptr;
            vk_submit_info.wait_semaphore_count = @intCast(info.wait_semaphores.len);
        }

        if (info.command_buffers.len > 0) {
            vk_submit_info.p_command_buffers = info.command_buffers.ptr;
            vk_submit_info.command_buffer_count = @intCast(info.command_buffers.len);
        }

        if (info.signal_semaphores.len > 0) {
            vk_submit_info.p_signal_semaphores = info.signal_semaphores.ptr;
            vk_submit_info.signal_semaphore_count = @intCast(info.signal_semaphores.len);
        }

        try self.submitVK(&.{vk_submit_info}, fence);
    }

    pub fn submitVK(self: *Self, infos: []const vk.SubmitInfo, fence: vk.Fence) Device.QueueSubmitError!void {
        self.mu.lock();
        defer self.mu.unlock();

        try self.device.queueSubmit(self.handle, @intCast(infos.len), infos.ptr, fence);
    }
};

const SelectedPhysicalDevice = struct {
    physical_device: vk.PhysicalDevice,
    properties: vk.PhysicalDeviceProperties,
    features: vk.PhysicalDeviceFeatures,
    surface_capabilities: vk.SurfaceCapabilitiesKHR,
};

fn selectPhysicalDevice(vki: Instance, surface: vk.SurfaceKHR, devices: []vk.PhysicalDevice) !SelectedPhysicalDevice {
    // TODO: select suitable physical device, allow overriding using some user config
    for (devices) |device| {
        const props = vki.getPhysicalDeviceProperties(device);
        const features = vki.getPhysicalDeviceFeatures(device);
        const surface_caps = try vki.getPhysicalDeviceSurfaceCapabilitiesKHR(device, surface);
        return SelectedPhysicalDevice{
            .physical_device = device,
            .properties = props,
            .features = features,
            .surface_capabilities = surface_caps,
        };
    }

    return error.NoDeviceFound;
}

const DeviceQueueConfig = struct {
    const Config = struct {
        family: u32,
        index: u32,
    };
    queue_create_info: [4]vk.DeviceQueueCreateInfo = undefined,
    queue_count: u32 = 0,
    graphics: Config,
    compute: Config,
    host_to_device: Config,
    device_to_host: Config,
};

// Hardcode queue priorities, no idea why I would need to use them
const queue_priorities = [_]f32{ 1.0, 1.0, 1.0, 1.0, 1.0 };

fn selectQueues(instance: Instance, device: vk.PhysicalDevice) !DeviceQueueConfig {
    var scratch: [1024]u8 = undefined;
    var fba = std.heap.FixedBufferAllocator.init(&scratch);
    const queue_family_props = try instance.getPhysicalDeviceQueueFamilyPropertiesAlloc(device, fba.allocator());

    if (queue_family_props.len == 0) {
        return error.NoQueues;
    }

    var queue_create_info: [4]vk.DeviceQueueCreateInfo = undefined;
    var queue_count: u32 = 0;
    var graphics: ?DeviceQueueConfig.Config = null;
    var compute: ?DeviceQueueConfig.Config = null;
    var host_to_device: ?DeviceQueueConfig.Config = null;
    var device_to_host: ?DeviceQueueConfig.Config = null;

    // We're on Intel most likely, just a single queue for everything :(
    if (queue_family_props.len == 1) {
        if (!queue_family_props[0].queue_flags.contains(.{ .graphics_bit = true, .compute_bit = true, .transfer_bit = true })) {
            return error.InvalidQueue;
        }
        graphics = .{ .family = 0, .index = 0 };
        compute = graphics;
        device_to_host = graphics;
        host_to_device = graphics;
        queue_create_info[0] = .{
            .queue_family_index = 0,
            .queue_count = 1,
            .p_queue_priorities = &queue_priorities,
        };
        queue_count = 1;
    } else {
        for (queue_family_props, 0..) |props, family_idx| {
            // Jackpot, generous Jensen provided us with an all powerfull queue family, use it for everything
            // TODO: actually, still need to use the dedicated transfer queue for CPU->GPU transfers to be async for sure
            if (props.queue_flags.contains(.{ .graphics_bit = true, .compute_bit = true, .transfer_bit = true }) and props.queue_count >= 4) {
                graphics = .{
                    .family = @intCast(family_idx),
                    .index = 0,
                };
                compute = .{
                    .family = @intCast(family_idx),
                    .index = 1,
                };
                host_to_device = .{
                    .family = @intCast(family_idx),
                    .index = 2,
                };
                device_to_host = .{
                    .family = @intCast(family_idx),
                    .index = 3,
                };
                queue_create_info[0] = .{
                    .queue_family_index = 0,
                    .queue_count = 4,
                    .p_queue_priorities = &queue_priorities,
                };
                queue_count = 1;
                break;
            }

            // TODO: make queue create info for AMD
            // Probably AMD, one graphics+compute queue, 2 separate compute queues, one pure transfer queue
            if (props.queue_flags.graphics_bit) {
                graphics = .{
                    .family = @intCast(family_idx),
                    .index = 0,
                };
            }
            if (props.queue_flags.compute_bit and (compute == null or !props.queue_flags.graphics_bit)) {
                compute = .{
                    .family = @intCast(family_idx),
                    .index = 0,
                };
            }
            if (props.queue_flags.transfer_bit and (host_to_device == null or !props.queue_flags.graphics_bit or !props.queue_flags.compute_bit)) {
                device_to_host = .{
                    .family = @intCast(family_idx),
                    .index = 0,
                };
                host_to_device = .{
                    .family = @intCast(family_idx),
                    .index = 0,
                };
            }
        }
    }

    if (graphics == null or compute == null or device_to_host == null or host_to_device == null) {
        return error.MissingQueueFeatures;
    }

    return .{
        .queue_create_info = queue_create_info,
        .queue_count = queue_count,
        .graphics = graphics.?,
        .compute = compute.?,
        .host_to_device = host_to_device.?,
        .device_to_host = device_to_host.?,
    };
}