diff --git a/examples/Cargo.toml b/examples/Cargo.toml index 2408b4aa0d4..5616e093d25 100644 --- a/examples/Cargo.toml +++ b/examples/Cargo.toml @@ -34,6 +34,10 @@ path = "mesh-shading/main.rs" name = "bench" path = "bench/main.rs" +[[bin]] +name = "ray-tracing" +path = "ray-tracing/main.rs" + [dependencies] image = "0.23.12" log = "0.4" @@ -41,6 +45,7 @@ hal = { path = "../src/hal", version = "0.8", package = "gfx-hal" } auxil = { path = "../src/auxil/auxil", version = "0.9", package = "gfx-auxil" } gfx-backend-empty = { path = "../src/backend/empty", version = "0.8" } winit = { version = "0.24", features = ["web-sys"] } +cgmath = "0.18.0" [target.'cfg(not(target_arch = "wasm32"))'.dependencies] env_logger = "0.8" diff --git a/examples/ray-tracing/README.md b/examples/ray-tracing/README.md new file mode 100644 index 00000000000..a4f0d734b28 --- /dev/null +++ b/examples/ray-tracing/README.md @@ -0,0 +1,7 @@ +# Ray Tracing + +TODO + +- Screenshot +- Explain which backends this supports +- Explain what hardware this supports diff --git a/examples/ray-tracing/build-shaders.ps1 b/examples/ray-tracing/build-shaders.ps1 new file mode 100644 index 00000000000..c05701c8942 --- /dev/null +++ b/examples/ray-tracing/build-shaders.ps1 @@ -0,0 +1,11 @@ +$shaders = @( + "$PSScriptRoot\data\simple.rchit" + "$PSScriptRoot\data\simple.rgen" + "$PSScriptRoot\data\simple.rmiss" +) + +Remove-Item $PSScriptRoot\data\*.spv + +foreach ($shader in $shaders) { + & glslangValidator --target-env vulkan1.2 --entry-point main $shader -o "$shader.spv" +} diff --git a/examples/ray-tracing/data/simple.rchit b/examples/ray-tracing/data/simple.rchit new file mode 100644 index 00000000000..de15f399f39 --- /dev/null +++ b/examples/ray-tracing/data/simple.rchit @@ -0,0 +1,9 @@ +#version 460 +#extension GL_EXT_ray_tracing : enable + +layout(location = 0) rayPayloadInEXT vec3 out_color; +hitAttributeEXT vec3 attribs; + +void main() { + out_color = vec3(1.0, 0.0, 0.0); +} diff --git a/examples/ray-tracing/data/simple.rchit.spv b/examples/ray-tracing/data/simple.rchit.spv new file mode 100644 index 00000000000..8d798b6e7de Binary files /dev/null and b/examples/ray-tracing/data/simple.rchit.spv differ diff --git a/examples/ray-tracing/data/simple.rgen b/examples/ray-tracing/data/simple.rgen new file mode 100644 index 00000000000..e2e301bda3c --- /dev/null +++ b/examples/ray-tracing/data/simple.rgen @@ -0,0 +1,34 @@ +#version 460 +#extension GL_EXT_ray_tracing : enable + +layout(binding = 0, set = 0) uniform accelerationStructureEXT accel_struct; +layout(binding = 1, set = 0, rgba8) uniform image2D storage_image; +layout(binding = 2, set = 0) uniform CameraProperties { + mat4 view_inverse; + mat4 proj_inverse; +} +cam; + +layout(location = 0) rayPayloadEXT vec3 out_color; + +void main() { + const vec2 pixel_center = vec2(gl_LaunchIDEXT.xy) + vec2(0.5); + const vec2 in_uv = pixel_center / vec2(gl_LaunchSizeEXT.xy); + vec2 d = in_uv * 2.0 - 1.0; + + vec4 origin = cam.view_inverse * vec4(0, 0, 0, 1); + vec4 target = cam.proj_inverse * vec4(d.x, d.y, 1, 1); + vec4 direction = cam.view_inverse * vec4(normalize(target.xyz), 0); + + out_color = vec3(0.0); + + origin.xyz = vec3(0, 0, 1); + direction.xyz = vec3(0, 0, -1); + + float tmin = 0.001; + float tmax = 10000.0; + traceRayEXT(accel_struct, gl_RayFlagsOpaqueEXT, 0xff, 0, 0, 0, origin.xyz, + tmin, direction.xyz, tmax, 0); + + imageStore(storage_image, ivec2(gl_LaunchIDEXT.xy), vec4(out_color, 1.0)); +} diff --git a/examples/ray-tracing/data/simple.rgen.spv b/examples/ray-tracing/data/simple.rgen.spv new file mode 100644 index 00000000000..58b8585874b Binary files /dev/null and b/examples/ray-tracing/data/simple.rgen.spv differ diff --git a/examples/ray-tracing/data/simple.rmiss b/examples/ray-tracing/data/simple.rmiss new file mode 100644 index 00000000000..0ba6479ea65 --- /dev/null +++ b/examples/ray-tracing/data/simple.rmiss @@ -0,0 +1,6 @@ +#version 460 +#extension GL_EXT_ray_tracing : enable + +layout(location = 0) rayPayloadInEXT vec3 out_color; + +void main() { out_color = vec3(0.8, 0.8, 0.8); } \ No newline at end of file diff --git a/examples/ray-tracing/data/simple.rmiss.spv b/examples/ray-tracing/data/simple.rmiss.spv new file mode 100644 index 00000000000..25928ab702b Binary files /dev/null and b/examples/ray-tracing/data/simple.rmiss.spv differ diff --git a/examples/ray-tracing/main.rs b/examples/ray-tracing/main.rs new file mode 100644 index 00000000000..ce5898570a8 --- /dev/null +++ b/examples/ray-tracing/main.rs @@ -0,0 +1,1233 @@ +#[cfg(feature = "dx11")] +extern crate gfx_backend_dx11 as back; +#[cfg(feature = "dx12")] +extern crate gfx_backend_dx12 as back; +#[cfg(not(any( + feature = "vulkan", + feature = "d offset: (), size: ()x11", + feature = "dx12", + feature = "metal", + feature = "gl", +)))] +extern crate gfx_backend_empty as back; +#[cfg(feature = "gl")] +extern crate gfx_backend_gl as back; +#[cfg(feature = "metal")] +extern crate gfx_backend_metal as back; +#[cfg(feature = "vulkan")] +extern crate gfx_backend_vulkan as back; + +use cgmath::SquareMatrix; +#[cfg(target_arch = "wasm32")] +use wasm_bindgen::prelude::*; + +#[cfg(target_arch = "wasm32")] +#[wasm_bindgen(start)] +pub fn wasm_main() { + std::panic::set_hook(Box::new(console_error_panic_hook::hook)); + main(); +} + +use hal::{ + acceleration_structure as accel, adapter, buffer, command, format, image, memory, pool, + prelude::*, pso, window, IndexType, PhysicalDeviceProperties, +}; + +use std::{ + borrow::Borrow, + io::Cursor, + iter, + marker::PhantomData, + mem::{self, ManuallyDrop}, + ops::{self, Deref}, + ptr, +}; + +#[cfg_attr(rustfmt, rustfmt_skip)] +const DIMS: window::Extent2D = window::Extent2D { width: 1024, height: 768 }; + +#[derive(Debug, Clone, Copy)] +#[allow(non_snake_case)] +struct Vertex { + a_Pos: [f32; 3], +} + +#[derive(Debug, Clone)] +struct CameraProperties { + view_inverse: [[f32; 4]; 4], + proj_inverse: [[f32; 4]; 4], +} + +impl Default for CameraProperties { + fn default() -> Self { + use cgmath::{Matrix, Transform}; + + CameraProperties { + view_inverse: cgmath::conv::array4x4( + cgmath::Matrix4::from_translation(cgmath::Vector3::unit_z() * -2.5) + .inverse_transform() + .unwrap(), + ), + proj_inverse: cgmath::conv::array4x4( + cgmath::perspective(cgmath::Deg(60.0), 1024.0 / 768.0, 0.1, 512.0) + .inverse_transform() + .unwrap(), + ), + } + } +} + +fn main() { + #[cfg(target_arch = "wasm32")] + console_log::init_with_level(log::Level::Debug).unwrap(); + + #[cfg(not(target_arch = "wasm32"))] + env_logger::init(); + + #[cfg(not(any( + feature = "vulkan", + feature = "dx11", + feature = "dx12", + feature = "metal", + feature = "gl", + )))] + eprintln!( + "You are running the example with the empty backend, no graphical output is to be expected" + ); + + let event_loop = winit::event_loop::EventLoop::new(); + + let wb = winit::window::WindowBuilder::new() + .with_min_inner_size(winit::dpi::Size::Logical(winit::dpi::LogicalSize::new( + 64.0, 64.0, + ))) + .with_inner_size(winit::dpi::Size::Physical(winit::dpi::PhysicalSize::new( + DIMS.width, + DIMS.height, + ))) + .with_title("ray-tracing".to_string()); + + // instantiate backend + let window = wb.build(&event_loop).unwrap(); + + #[cfg(target_arch = "wasm32")] + web_sys::window() + .unwrap() + .document() + .unwrap() + .body() + .unwrap() + .append_child(&winit::platform::web::WindowExtWebSys::canvas(&window)) + .unwrap(); + + let instance = + back::Instance::create("gfx-rs ray-tracing", 1).expect("Failed to create an instance!"); + + let surface = unsafe { + instance + .create_surface(&window) + .expect("Failed to create a surface!") + }; + + let adapter = { + let mut adapters = instance.enumerate_adapters(); + for adapter in &adapters { + println!("{:?}", adapter.info); + } + adapters.remove(0) + }; + + let mut renderer = Renderer::new(instance, surface, adapter); + + renderer.render(); + + // It is important that the closure move captures the Renderer, + // otherwise it will not be dropped when the event loop exits. + event_loop.run(move |event, _, control_flow| { + *control_flow = winit::event_loop::ControlFlow::Wait; + + match event { + winit::event::Event::WindowEvent { event, .. } => match event { + winit::event::WindowEvent::CloseRequested => { + *control_flow = winit::event_loop::ControlFlow::Exit + } + winit::event::WindowEvent::KeyboardInput { + input: + winit::event::KeyboardInput { + virtual_keycode: Some(winit::event::VirtualKeyCode::Escape), + .. + }, + .. + } => *control_flow = winit::event_loop::ControlFlow::Exit, + winit::event::WindowEvent::Resized(dims) => { + println!("resized to {:?}", dims); + // renderer.dimensions = window::Extent2D { + // width: dims.width, + // height: dims.height, + // }; + renderer.recreate_swapchain(); + } + _ => {} + }, + winit::event::Event::RedrawEventsCleared => { + renderer.render(); + } + _ => {} + } + }); +} + +struct Renderer { + properties: PhysicalDeviceProperties, + desc_pool: ManuallyDrop, + surface: ManuallyDrop, + format: hal::format::Format, + dimensions: window::Extent2D, + viewport: pso::Viewport, + + bottom_level_accel_struct: ManuallyDrop>, + top_level_accel_struct: ManuallyDrop>, + storage_image: ManuallyDrop, + storage_image_view: ManuallyDrop, + uniform_buffer: ManuallyDrop, + uniform_buffer_memory: ManuallyDrop, + + pipeline: ManuallyDrop, + pipeline_layout: ManuallyDrop, + raygen_shader_binding_table: ManuallyDrop, + raygen_shader_binding_table_memory: ManuallyDrop, + miss_shader_binding_table: ManuallyDrop, + miss_shader_binding_table_memory: ManuallyDrop, + closest_hit_shader_binding_table: ManuallyDrop, + closest_hit_shader_binding_table_memory: ManuallyDrop, + + submission_complete_semaphores: Vec, + submission_complete_fences: Vec, + cmd_pools: Vec, + cmd_buffers: Vec, + desc_set: B::DescriptorSet, + frames_in_flight: usize, + frame: u64, + // These members are dropped in the declaration order. + device: B::Device, + adapter: hal::adapter::Adapter, + queue_group: hal::queue::QueueGroup, + instance: B::Instance, +} + +impl Renderer +where + B: hal::Backend, +{ + fn new( + instance: B::Instance, + mut surface: B::Surface, + adapter: hal::adapter::Adapter, + ) -> Renderer { + // Create device + let required_features = + hal::Features::ACCELERATION_STRUCTURE | hal::Features::RAY_TRACING_PIPELINE; + + // TODO search through all adapters in case the non-first one supports our required features? + assert!(adapter + .physical_device + .features() + .contains(required_features)); + + let memory_types = adapter.physical_device.memory_properties().memory_types; + let properties = adapter.physical_device.properties(); + + // Build a new device and associated command queues + let family = adapter + .queue_families + .iter() + .find(|family| { + surface.supports_queue_family(family) && family.queue_type().supports_graphics() + }) + .expect("No queue family supports presentation"); + let mut gpu = unsafe { + adapter + .physical_device + .open(&[(family, &[1.0])], required_features) + .unwrap() + }; + let mut queue_group = gpu.queue_groups.pop().unwrap(); + let device = gpu.device; + + let caps = surface.capabilities(&adapter.physical_device); + let format = { + let formats = surface.supported_formats(&adapter.physical_device); + formats.map_or(format::Format::Rgba8Srgb, |formats| { + formats + .iter() + .find(|format| format.base_format().1 == format::ChannelType::Srgb) + .map(|format| *format) + .unwrap_or(formats[0]) + }) + }; + + let swap_config = { + let mut swap_config = window::SwapchainConfig::from_caps(&caps, format, DIMS); + swap_config.image_usage |= image::Usage::TRANSFER_DST; + swap_config + }; + println!("{:?}", swap_config); + let extent = swap_config.extent; + // Define maximum number of frames we want to be able to be "in flight" (being computed simultaneously) at once + let frames_in_flight = swap_config.image_count as usize; + unsafe { + surface + .configure_swapchain(&device, swap_config) + .expect("Can't configure swapchain"); + }; + + let mut command_pool = unsafe { + device.create_command_pool(queue_group.family, pool::CommandPoolCreateFlags::empty()) + } + .expect("Can't create command pool"); + + unsafe { + // Create storage image + let mut storage_image = device + .create_image( + image::Kind::D2(extent.width, extent.height, 1, 1), + 1, + format::Format::Bgra8Unorm, + image::Tiling::Optimal, + image::Usage::TRANSFER_SRC | image::Usage::STORAGE, + memory::SparseFlags::empty(), + image::ViewCapabilities::empty(), + ) + .unwrap(); + + let memory_requirements = device.get_image_requirements(&storage_image); + let memory_type = memory_types + .iter() + .enumerate() + .position(|(id, memory_type)| { + memory_requirements.type_mask & (1 << id) != 0 + && memory_type + .properties + .contains(hal::memory::Properties::DEVICE_LOCAL) + }) + .unwrap() + .into(); + let storage_image_memory = device + .allocate_memory(memory_type, memory_requirements.size) + .unwrap(); + device + .bind_image_memory(&storage_image_memory, 0, &mut storage_image) + .unwrap(); + + let storage_image_view = device + .create_image_view( + &storage_image, + image::ViewKind::D2, + format::Format::Bgra8Unorm, + format::Swizzle::NO, + image::Usage::STORAGE, + image::SubresourceRange { + aspects: format::Aspects::COLOR, + level_start: 0, + level_count: Some(1), + layer_start: 0, + layer_count: Some(1), + }, + ) + .unwrap(); + + let mut build_fence = device.create_fence(false).unwrap(); + let mut cmd_buffer = command_pool.allocate_one(command::Level::Primary); + cmd_buffer.begin_primary(command::CommandBufferFlags::ONE_TIME_SUBMIT); + cmd_buffer.pipeline_barrier( + pso::PipelineStage::TRANSFER..pso::PipelineStage::TRANSFER, + memory::Dependencies::empty(), + iter::once(memory::Barrier::Image { + states: (image::Access::empty(), image::Layout::Undefined) + ..(image::Access::empty(), image::Layout::General), + target: &storage_image, + range: image::SubresourceRange { + aspects: format::Aspects::COLOR, + level_start: 0, + level_count: Some(1), + layer_start: 0, + layer_count: Some(1), + }, + families: None, + }), + ); + cmd_buffer.finish(); + queue_group.queues[0].submit( + iter::once(&cmd_buffer), + iter::empty(), + iter::empty(), + Some(&mut build_fence), + ); + + // Create uniform buffer + let uniform_data: CameraProperties = Default::default(); + let (uniform_buffer, uniform_buffer_memory) = upload_to_buffer::( + &device, + properties.limits.non_coherent_atom_size as u64, + &memory_types, + buffer::Usage::UNIFORM, + &[uniform_data], + ); + + // Create blas + let triangle_vertices = &[ + Vertex { + a_Pos: [1.0, 1.0, 0.0], + }, + Vertex { + a_Pos: [-1.0, 1.0, 0.0], + }, + Vertex { + a_Pos: [0.0, -1.0, 0.0], + }, + ]; + + let triangle_indices: &[u16] = &[0, 1, 2]; + let triangle_indices: &[u16] = &[0, 1, 2, 0, 2, 1]; // todo + + let vertex_buffer = upload_to_buffer::( + &device, + properties.limits.non_coherent_atom_size as u64, + &memory_types, + buffer::Usage::ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY + | buffer::Usage::SHADER_DEVICE_ADDRESS, + triangle_vertices, + ); + + let index_buffer = upload_to_buffer::( + &device, + properties.limits.non_coherent_atom_size as u64, + &memory_types, + buffer::Usage::ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY + | buffer::Usage::SHADER_DEVICE_ADDRESS, + triangle_indices, + ); + + let geometry_desc = accel::GeometryDesc { + flags: accel::Flags::ALLOW_COMPACTION, + ty: accel::Type::BottomLevel, + geometries: &[&accel::Geometry { + flags: accel::GeometryFlags::OPAQUE, + geometry: accel::GeometryData::Triangles(accel::GeometryTriangles { + vertex_format: format::Format::Rgb32Sfloat, + vertex_buffer: &vertex_buffer.0, + vertex_buffer_offset: 0, + vertex_buffer_stride: std::mem::size_of::() as u32, + max_vertex: triangle_vertices.len() as u64, + index_buffer: Some((&index_buffer.0, 0, IndexType::U16)), + transform: None, + }), + }], + }; + + let triangle_primitive_count = (triangle_indices.len() / 3) as u32; + let triangle_blas_requirements = device.get_acceleration_structure_build_requirements( + &geometry_desc, + &[triangle_primitive_count], + ); + + let scratch_buffer = create_empty_buffer::( + &device, + properties.limits.non_coherent_atom_size as u64, + &memory_types, + buffer::Usage::ACCELERATION_STRUCTURE_STORAGE + | buffer::Usage::SHADER_DEVICE_ADDRESS, + triangle_blas_requirements.build_scratch_size, + ); + + let accel_struct_bottom_buffer = create_empty_buffer::( + &device, + properties.limits.non_coherent_atom_size as u64, + &memory_types, + buffer::Usage::ACCELERATION_STRUCTURE_STORAGE + | buffer::Usage::SHADER_DEVICE_ADDRESS, + triangle_blas_requirements.acceleration_structure_size, + ); + + let mut triangle_blas = AccelerationStructure:: { + accel_struct: device + .create_acceleration_structure(&accel::CreateDesc { + buffer: &accel_struct_bottom_buffer.0, + buffer_offset: 0, + size: triangle_blas_requirements.acceleration_structure_size, + ty: accel::Type::BottomLevel, + }) + .unwrap(), + backing: accel_struct_bottom_buffer, + }; + + device.set_acceleration_structure_name(&mut triangle_blas.accel_struct, "triangle"); + + let mut build_fence = device.create_fence(false).unwrap(); + let mut cmd_buffer = command_pool.allocate_one(command::Level::Primary); + cmd_buffer.begin_primary(command::CommandBufferFlags::ONE_TIME_SUBMIT); + cmd_buffer.build_acceleration_structure( + &accel::BuildDesc { + src: None, + dst: &triangle_blas.accel_struct, + geometry: &geometry_desc, + scratch: &scratch_buffer.0, + scratch_offset: 0, + }, + &[accel::BuildRangeDesc { + primitive_count: triangle_primitive_count, + primitive_offset: 0, + first_vertex: 0, + transform_offset: 0, + }][..], + ); + // cmd_buffer.pipeline_barrier( + // pso::PipelineStage::ACCELERATION_STRUCTURE_BUILD + // ..pso::PipelineStage::ACCELERATION_STRUCTURE_BUILD, + // memory::Dependencies::empty(), + // iter::once(memory::Barrier::AllBuffers( + // buffer::Access::ACCELERATION_STRUCTURE_WRITE + // ..buffer::Access::ACCELERATION_STRUCTURE_READ, + // )), + // ); + cmd_buffer.finish(); + queue_group.queues[0].submit( + iter::once(&cmd_buffer), + iter::empty(), + iter::empty(), + Some(&mut build_fence), + ); + device + .wait_for_fence(&build_fence, !0) + .expect("Can't wait for fence"); + device.free_memory(scratch_buffer.1); + device.destroy_buffer(scratch_buffer.0); + + // Create tlas + let instances = [accel::Instance::new( + device.get_acceleration_structure_address(&triangle_blas.accel_struct), + )]; + + let instances_buffer = upload_to_buffer::( + &device, + properties.limits.non_coherent_atom_size as u64, + &memory_types, + buffer::Usage::ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY + | buffer::Usage::SHADER_DEVICE_ADDRESS, + &instances, + ); + + let top_level_geometry_desc = accel::GeometryDesc { + flags: accel::Flags::ALLOW_COMPACTION, + ty: accel::Type::TopLevel, + geometries: &[&accel::Geometry { + flags: accel::GeometryFlags::OPAQUE, + geometry: accel::GeometryData::Instances(accel::GeometryInstances { + buffer: &instances_buffer.0, + buffer_offset: 0, + }), + }], + }; + + let tlas_requirements = device + .get_acceleration_structure_build_requirements(&top_level_geometry_desc, &[1]); + + let tlas_scratch_buffer = create_empty_buffer::( + &device, + properties.limits.non_coherent_atom_size as u64, + &memory_types, + buffer::Usage::ACCELERATION_STRUCTURE_STORAGE + | buffer::Usage::SHADER_DEVICE_ADDRESS, + tlas_requirements.build_scratch_size, + ); + + let tlas_buffer = create_empty_buffer::( + &device, + properties.limits.non_coherent_atom_size as u64, + &memory_types, + buffer::Usage::ACCELERATION_STRUCTURE_STORAGE + | buffer::Usage::SHADER_DEVICE_ADDRESS, + tlas_requirements.acceleration_structure_size, + ); + + let mut tlas = AccelerationStructure:: { + accel_struct: device + .create_acceleration_structure(&accel::CreateDesc { + buffer: &tlas_buffer.0, + buffer_offset: 0, + size: tlas_requirements.acceleration_structure_size, + ty: accel::Type::TopLevel, + }) + .unwrap(), + backing: tlas_buffer, + }; + + device.set_acceleration_structure_name(&mut tlas.accel_struct, "tlas"); + + let mut build_fence = device.create_fence(false).unwrap(); + let mut cmd_buffer = command_pool.allocate_one(command::Level::Primary); + cmd_buffer.begin_primary(command::CommandBufferFlags::ONE_TIME_SUBMIT); + cmd_buffer.build_acceleration_structure( + &accel::BuildDesc { + src: None, + dst: &tlas.accel_struct, + geometry: &top_level_geometry_desc, + scratch: &tlas_scratch_buffer.0, + scratch_offset: 0, + }, + &[accel::BuildRangeDesc { + primitive_count: 1, + primitive_offset: 0, + first_vertex: 0, + transform_offset: 0, + }][..], + ); + // cmd_buffer.pipeline_barrier( + // pso::PipelineStage::ACCELERATION_STRUCTURE_BUILD + // ..pso::PipelineStage::ACCELERATION_STRUCTURE_BUILD, + // memory::Dependencies::empty(), + // iter::once(memory::Barrier::AllBuffers( + // buffer::Access::ACCELERATION_STRUCTURE_WRITE + // ..buffer::Access::ACCELERATION_STRUCTURE_READ, + // )), + // ); + cmd_buffer.finish(); + queue_group.queues[0].submit( + iter::once(&cmd_buffer), + iter::empty(), + iter::empty(), + Some(&mut build_fence), + ); + device + .wait_for_fence(&build_fence, !0) + .expect("Can't wait for fence"); + device.free_memory(tlas_scratch_buffer.1); + device.destroy_buffer(tlas_scratch_buffer.0); + + // Create uniform buffer + // TODO + + // Create rt pipeline + let desc_set_layout = device + .create_descriptor_set_layout( + vec![ + pso::DescriptorSetLayoutBinding { + binding: 0, + ty: pso::DescriptorType::AccelerationStructure, + count: 1, + stage_flags: pso::ShaderStageFlags::RAYGEN, + immutable_samplers: false, + }, + pso::DescriptorSetLayoutBinding { + binding: 1, + ty: pso::DescriptorType::Image { + ty: pso::ImageDescriptorType::Storage { read_only: false }, + }, + count: 1, + stage_flags: pso::ShaderStageFlags::RAYGEN, + immutable_samplers: false, + }, + pso::DescriptorSetLayoutBinding { + binding: 2, + ty: pso::DescriptorType::Buffer { + ty: pso::BufferDescriptorType::Uniform, + format: pso::BufferDescriptorFormat::Structured { + dynamic_offset: false, + }, + }, + count: 1, + stage_flags: pso::ShaderStageFlags::RAYGEN, + immutable_samplers: false, + }, + ] + .into_iter(), + iter::empty(), + ) + .unwrap(); + + let pipeline_layout = device + .create_pipeline_layout(iter::once(&desc_set_layout), iter::empty()) + .unwrap(); + + let raygen_module = device + .create_shader_module( + &auxil::read_spirv(Cursor::new(&include_bytes!("./data/simple.rgen.spv")[..])) + .unwrap(), + ) + .unwrap(); + + let miss_module = device + .create_shader_module( + &auxil::read_spirv(Cursor::new(&include_bytes!("./data/simple.rmiss.spv")[..])) + .unwrap(), + ) + .unwrap(); + + let closest_hit_module = device + .create_shader_module( + &auxil::read_spirv(Cursor::new(&include_bytes!("./data/simple.rchit.spv")[..])) + .unwrap(), + ) + .unwrap(); + + let stages = vec![ + pso::ShaderStageDesc { + stage: pso::ShaderStageFlags::RAYGEN, + entry_point: pso::EntryPoint { + entry: "main", + module: &raygen_module, + specialization: pso::Specialization::EMPTY, + }, + }, + pso::ShaderStageDesc { + stage: pso::ShaderStageFlags::MISS, + entry_point: pso::EntryPoint { + entry: "main", + module: &miss_module, + specialization: pso::Specialization::EMPTY, + }, + }, + pso::ShaderStageDesc { + stage: pso::ShaderStageFlags::CLOSEST_HIT, + entry_point: pso::EntryPoint { + entry: "main", + module: &closest_hit_module, + specialization: pso::Specialization::EMPTY, + }, + }, + ]; + + let groups = vec![ + pso::ShaderGroupDesc::General { general_shader: 0 }, + pso::ShaderGroupDesc::General { general_shader: 1 }, + pso::ShaderGroupDesc::TrianglesHitGroup { + closest_hit_shader: Some(2), + any_hit_shader: None, + }, + ]; + + let pipeline = device + .create_ray_tracing_pipeline( + &pso::RayTracingPipelineDesc::new(&stages, &groups, 1, &pipeline_layout), + None, + ) + .unwrap(); + + // Create sbt + // inline uint32_t aligned_size(uint32_t value, uint32_t alignment) + // return (value + alignment - 1) & ~(alignment - 1); + + let handle_size = properties.ray_tracing_pipeline.shader_group_handle_size as usize; + + let shader_handle_data = device + .get_ray_tracing_shader_group_handles( + &pipeline, + 0, + groups.len() as u32, + groups.len() * handle_size, + ) + .unwrap(); + + let raygen_shader_binding_table = upload_to_buffer::( + &device, + properties + .ray_tracing_pipeline + .shader_group_handle_alignment as u64, + &memory_types, + buffer::Usage::SHADER_BINDING_TABLE + | buffer::Usage::TRANSFER_SRC // todo needed? + | buffer::Usage::SHADER_DEVICE_ADDRESS, + &shader_handle_data[0..handle_size], + ); + + let miss_shader_binding_table = upload_to_buffer::( + &device, + properties + .ray_tracing_pipeline + .shader_group_handle_alignment as u64, + &memory_types, + buffer::Usage::SHADER_BINDING_TABLE + | buffer::Usage::TRANSFER_SRC // todo needed? + | buffer::Usage::SHADER_DEVICE_ADDRESS, + &shader_handle_data[handle_size..handle_size * 2], + ); + + let closest_hit_shader_binding_table = upload_to_buffer::( + &device, + properties + .ray_tracing_pipeline + .shader_group_handle_alignment as u64, + &memory_types, + buffer::Usage::SHADER_BINDING_TABLE + | buffer::Usage::TRANSFER_SRC // todo needed? + | buffer::Usage::SHADER_DEVICE_ADDRESS, + &shader_handle_data[handle_size * 2..handle_size * 3], + ); + + // Create desc sets + // TODO + let mut desc_pool = device + .create_descriptor_pool( + 1, + vec![ + pso::DescriptorRangeDesc { + ty: pso::DescriptorType::AccelerationStructure, + count: 1, + }, + pso::DescriptorRangeDesc { + ty: pso::DescriptorType::Image { + ty: pso::ImageDescriptorType::Storage { read_only: false }, + }, + count: 1, + }, + pso::DescriptorRangeDesc { + ty: pso::DescriptorType::Buffer { + ty: pso::BufferDescriptorType::Uniform, + format: pso::BufferDescriptorFormat::Structured { + dynamic_offset: false, + }, + }, + count: 1, + }, + ] + .into_iter(), + pso::DescriptorPoolCreateFlags::empty(), + ) + .unwrap(); + + let mut desc_set = desc_pool.allocate_one(&desc_set_layout).unwrap(); + + device.write_descriptor_set(pso::DescriptorSetWrite { + set: &mut desc_set, + binding: 0, + array_offset: 0, + descriptors: vec![ + pso::Descriptor::AccelerationStructure(&tlas.accel_struct), + pso::Descriptor::Image(&storage_image_view, image::Layout::General), + pso::Descriptor::Buffer(&uniform_buffer, buffer::SubRange::WHOLE), + ] + .into_iter(), + }); + + // Create cmd buffer + + // The number of the rest of the resources is based on the frames in flight. + let mut submission_complete_semaphores = Vec::with_capacity(frames_in_flight); + let mut submission_complete_fences = Vec::with_capacity(frames_in_flight); + let mut cmd_pools = Vec::with_capacity(frames_in_flight); + let mut cmd_buffers = Vec::with_capacity(frames_in_flight); + + cmd_pools.push(command_pool); + for _ in 1..frames_in_flight { + cmd_pools.push( + device + .create_command_pool( + queue_group.family, + pool::CommandPoolCreateFlags::empty(), + ) + .expect("Can't create command pool"), + ); + } + + for i in 0..frames_in_flight { + submission_complete_semaphores.push( + device + .create_semaphore() + .expect("Could not create semaphore"), + ); + submission_complete_fences + .push(device.create_fence(true).expect("Could not create fence")); + cmd_buffers.push(cmd_pools[i].allocate_one(command::Level::Primary)); + } + + Self { + properties, + desc_pool: ManuallyDrop::new(desc_pool), + surface: ManuallyDrop::new(surface), + format, + dimensions: extent, + viewport: pso::Viewport { + rect: pso::Rect { + x: 0, + y: 0, + w: extent.width as _, + h: extent.height as _, + }, + depth: 0.0..1.0, + }, + + bottom_level_accel_struct: ManuallyDrop::new(triangle_blas), + top_level_accel_struct: ManuallyDrop::new(tlas), + storage_image: ManuallyDrop::new(storage_image), + storage_image_view: ManuallyDrop::new(storage_image_view), + uniform_buffer: ManuallyDrop::new(uniform_buffer), + uniform_buffer_memory: ManuallyDrop::new(uniform_buffer_memory), + + pipeline: ManuallyDrop::new(pipeline), + pipeline_layout: ManuallyDrop::new(pipeline_layout), + raygen_shader_binding_table: ManuallyDrop::new(raygen_shader_binding_table.0), + raygen_shader_binding_table_memory: ManuallyDrop::new( + raygen_shader_binding_table.1, + ), + miss_shader_binding_table: ManuallyDrop::new(miss_shader_binding_table.0), + miss_shader_binding_table_memory: ManuallyDrop::new(miss_shader_binding_table.1), + closest_hit_shader_binding_table: ManuallyDrop::new( + closest_hit_shader_binding_table.0, + ), + closest_hit_shader_binding_table_memory: ManuallyDrop::new( + closest_hit_shader_binding_table.1, + ), + submission_complete_semaphores, + submission_complete_fences, + cmd_pools, + cmd_buffers, + desc_set, + frames_in_flight, + frame: 0, + device, + adapter, + queue_group, + instance, + } + } + } + + fn recreate_swapchain(&mut self) { + // let caps = self.surface.capabilities(&self.adapter.physical_device); + // let swap_config = window::SwapchainConfig::from_caps(&caps, self.format, self.dimensions); + // println!("{:?}", swap_config); + + // let extent = swap_config.extent.to_extent(); + // self.viewport.rect.w = extent.width as _; + // self.viewport.rect.h = extent.height as _; + + // unsafe { + // self.device.wait_idle().unwrap(); + // self.device + // .destroy_framebuffer(ManuallyDrop::into_inner(ptr::read(&self.framebuffer))); + // self.framebuffer = ManuallyDrop::new( + // self.device + // .create_framebuffer( + // &self.render_pass, + // iter::once(swap_config.framebuffer_attachment()), + // extent, + // ) + // .unwrap(), + // ) + // }; + + // unsafe { + // self.surface + // .configure_swapchain(&self.device, swap_config) + // .expect("Can't create swapchain"); + // } + } + + fn render(&mut self) { + unsafe { + let surface_image = match self.surface.acquire_image(!0) { + Ok((image, _)) => image, + Err(_) => { + self.recreate_swapchain(); + return; + } + }; + + // Compute index into our resource ring buffers based on the frame number + // and number of frames in flight. Pay close attention to where this index is needed + // versus when the swapchain image index we got from acquire_image is needed. + let frame_idx = self.frame as usize % self.frames_in_flight; + + // Wait for the fence of the previous submission of this frame and reset it; ensures we are + // submitting only up to maximum number of frames_in_flight if we are submitting faster than + // the gpu can keep up with. This would also guarantee that any resources which need to be + // updated with a CPU->GPU data copy are not in use by the GPU, so we can perform those updates. + // In this case there are none to be done, however. + let fence = &mut self.submission_complete_fences[frame_idx]; + self.device + .wait_for_fence(fence, !0) + .expect("Failed to wait for fence"); + self.device + .reset_fence(fence) + .expect("Failed to reset fence"); + self.cmd_pools[frame_idx].reset(false); + + // Rendering + let cmd_buffer = &mut self.cmd_buffers[frame_idx]; + cmd_buffer.begin_primary(command::CommandBufferFlags::ONE_TIME_SUBMIT); + + // Trace the rays + cmd_buffer.bind_ray_tracing_pipeline(&self.pipeline); + cmd_buffer.bind_ray_tracing_descriptor_sets( + &self.pipeline_layout, + 0, + iter::once(&self.desc_set), + iter::empty(), + ); + let handle_size = self + .properties + .ray_tracing_pipeline + .shader_group_handle_size; + cmd_buffer.trace_rays( + Some(pso::ShaderBindingTable { + buffer: &self.raygen_shader_binding_table, + offset: 0, + stride: handle_size, + size: handle_size as u64, + }), + Some(pso::ShaderBindingTable { + buffer: &self.miss_shader_binding_table, + offset: 0, + stride: handle_size, + size: handle_size as u64, + }), + Some(pso::ShaderBindingTable { + buffer: &self.closest_hit_shader_binding_table, + offset: 0, + stride: handle_size, + size: handle_size as u64, + }), + None, + [self.dimensions.width, self.dimensions.height, 1], + ); + + // Copy storage image to output + cmd_buffer.pipeline_barrier( + pso::PipelineStage::TRANSFER..pso::PipelineStage::TRANSFER, + memory::Dependencies::empty(), + iter::once(memory::Barrier::Image { + states: (image::Access::empty(), image::Layout::Undefined) + ..( + image::Access::TRANSFER_WRITE, + image::Layout::TransferDstOptimal, + ), + target: surface_image.borrow(), + range: image::SubresourceRange { + aspects: format::Aspects::COLOR, + level_start: 0, + level_count: Some(1), + layer_start: 0, + layer_count: Some(1), + }, + families: None, + }), + ); + cmd_buffer.pipeline_barrier( + pso::PipelineStage::TRANSFER..pso::PipelineStage::TRANSFER, + memory::Dependencies::empty(), + iter::once(memory::Barrier::Image { + states: (image::Access::empty(), image::Layout::General) + ..( + image::Access::TRANSFER_READ, + image::Layout::TransferSrcOptimal, + ), + target: self.storage_image.deref(), + range: image::SubresourceRange { + aspects: format::Aspects::COLOR, + level_start: 0, + level_count: Some(1), + layer_start: 0, + layer_count: Some(1), + }, + families: None, + }), + ); + cmd_buffer.copy_image( + &self.storage_image, + image::Layout::TransferSrcOptimal, + surface_image.borrow(), + image::Layout::TransferDstOptimal, + iter::once(command::ImageCopy { + src_subresource: image::SubresourceLayers { + aspects: format::Aspects::COLOR, + level: 0, + layers: 0..1, + }, + src_offset: image::Offset::ZERO, + dst_subresource: image::SubresourceLayers { + aspects: format::Aspects::COLOR, + level: 0, + layers: 0..1, + }, + dst_offset: image::Offset::ZERO, + extent: image::Extent { + width: self.dimensions.width, + height: self.dimensions.height, + depth: 1, + }, + }), + ); + cmd_buffer.pipeline_barrier( + pso::PipelineStage::TRANSFER..pso::PipelineStage::TRANSFER, + memory::Dependencies::empty(), + iter::once(memory::Barrier::Image { + states: ( + image::Access::TRANSFER_WRITE, + image::Layout::TransferDstOptimal, + )..(image::Access::empty(), image::Layout::Present), + target: surface_image.borrow(), + range: image::SubresourceRange { + aspects: format::Aspects::COLOR, + level_start: 0, + level_count: Some(1), + layer_start: 0, + layer_count: Some(1), + }, + families: None, + }), + ); + cmd_buffer.pipeline_barrier( + pso::PipelineStage::TRANSFER..pso::PipelineStage::TRANSFER, + memory::Dependencies::empty(), + iter::once(memory::Barrier::Image { + states: ( + image::Access::TRANSFER_READ, + image::Layout::TransferSrcOptimal, + )..(image::Access::empty(), image::Layout::General), + target: self.storage_image.deref(), + range: image::SubresourceRange { + aspects: format::Aspects::COLOR, + level_start: 0, + level_count: Some(1), + layer_start: 0, + layer_count: Some(1), + }, + families: None, + }), + ); + + cmd_buffer.finish(); + + self.queue_group.queues[0].submit( + iter::once(&*cmd_buffer), + iter::empty(), + iter::once(&self.submission_complete_semaphores[frame_idx]), + Some(&mut self.submission_complete_fences[frame_idx]), + ); + + // present frame + let result = self.queue_group.queues[0].present( + &mut self.surface, + surface_image, + Some(&mut self.submission_complete_semaphores[frame_idx]), + ); + + if result.is_err() { + self.recreate_swapchain(); + } + + // Increment our frame + self.frame += 1; + } + } +} + +impl Drop for Renderer +where + B: hal::Backend, +{ + fn drop(&mut self) { + unsafe { + // let _ = self.desc_set.take(); + self.device + .destroy_descriptor_pool(ManuallyDrop::take(&mut self.desc_pool)); + // self.device + // .destroy_descriptor_set_layout(ManuallyDrop::into_inner(ptr::read( + // &self.set_layout, + // ))); + + for p in self.cmd_pools.drain(..) { + self.device.destroy_command_pool(p); + } + for s in self.submission_complete_semaphores.drain(..) { + self.device.destroy_semaphore(s); + } + for f in self.submission_complete_fences.drain(..) { + self.device.destroy_fence(f); + } + + self.device + .destroy_ray_tracing_pipeline(ManuallyDrop::take(&mut self.pipeline)); + self.device + .destroy_pipeline_layout(ManuallyDrop::take(&mut self.pipeline_layout)); + self.instance + .destroy_surface(ManuallyDrop::take(&mut self.surface)); + } + } +} + +#[derive(Debug)] +struct AccelerationStructure { + pub accel_struct: B::AccelerationStructure, + pub backing: (B::Buffer, B::Memory), +} + +fn create_empty_buffer( + device: &B::Device, + alignment: u64, + memory_types: &[adapter::MemoryType], + usage: buffer::Usage, + size: u64, +) -> (B::Buffer, B::Memory) { + assert_ne!(size, 0); + let padded_buffer_len = ((size + alignment - 1) / alignment) * alignment; + + let mut buffer = + unsafe { device.create_buffer(padded_buffer_len, usage, memory::SparseFlags::empty()) } + .unwrap(); + + let buffer_req = unsafe { device.get_buffer_requirements(&buffer) }; + + let upload_type = memory_types + .iter() + .enumerate() + .position(|(id, mem_type)| { + // type_mask is a bit field where each bit represents a memory type. If the bit is set + // to 1 it means we can use that type for our buffer. So this code finds the first + // memory type that has a `1` (or, is allowed), and is visible to the CPU. + buffer_req.type_mask & (1 << id) != 0 + && mem_type + .properties + .contains(memory::Properties::CPU_VISIBLE) + }) + .unwrap() + .into(); + + // TODO: check transitions: read/write mapping and buffer read + let buffer_memory = unsafe { + let memory = device + .allocate_memory(upload_type, buffer_req.size) + .unwrap(); + device.bind_buffer_memory(&memory, 0, &mut buffer).unwrap(); + memory + }; + + (buffer, buffer_memory) +} + +fn upload_to_buffer( + device: &B::Device, + alignment: u64, + memory_types: &[adapter::MemoryType], + usage: buffer::Usage, + data: &[T], +) -> (B::Buffer, B::Memory) { + let buffer_stride = mem::size_of::() as u64; + let buffer_len = data.len() as u64 * buffer_stride; + + let (buffer, mut buffer_memory) = + create_empty_buffer::(device, alignment, memory_types, usage, buffer_len); + + unsafe { + let mapping = device + .map_memory(&mut buffer_memory, memory::Segment::ALL) + .unwrap(); + ptr::copy_nonoverlapping(data.as_ptr() as *const u8, mapping, buffer_len as usize); + device + .flush_mapped_memory_ranges(iter::once((&buffer_memory, memory::Segment::ALL))) + .unwrap(); + device.unmap_memory(&mut buffer_memory); + } + + (buffer, buffer_memory) +} diff --git a/src/backend/empty/src/lib.rs b/src/backend/empty/src/lib.rs index f440add70ee..e387b531bc9 100644 --- a/src/backend/empty/src/lib.rs +++ b/src/backend/empty/src/lib.rs @@ -50,6 +50,7 @@ impl hal::Backend for Backend { type ComputePipeline = (); type GraphicsPipeline = (); + type RayTracingPipeline = (); type PipelineCache = (); type PipelineLayout = (); type DescriptorSetLayout = DescriptorSetLayout; @@ -63,6 +64,8 @@ impl hal::Backend for Backend { type Display = (); type DisplayMode = (); + + type AccelerationStructure = (); } /// Dummy physical device. diff --git a/src/backend/vulkan/src/command.rs b/src/backend/vulkan/src/command.rs index e7ab354b1ba..43131cc686f 100644 --- a/src/backend/vulkan/src/command.rs +++ b/src/backend/vulkan/src/command.rs @@ -661,6 +661,33 @@ impl com::CommandBuffer for CommandBuffer { ); } + unsafe fn bind_ray_tracing_pipeline(&mut self, pipeline: &n::RayTracingPipeline) { + self.device.raw.cmd_bind_pipeline( + self.raw, + vk::PipelineBindPoint::RAY_TRACING_KHR, + pipeline.0, + ) + } + + unsafe fn bind_ray_tracing_descriptor_sets<'a, I, J>( + &mut self, + layout: &n::PipelineLayout, + first_set: usize, + sets: I, + offsets: J, + ) where + I: Iterator, + J: Iterator, + { + self.bind_descriptor_sets( + vk::PipelineBindPoint::RAY_TRACING_KHR, + layout, + first_set, + sets, + offsets, + ); + } + unsafe fn dispatch(&mut self, count: WorkGroupCount) { self.device .raw @@ -1048,6 +1075,227 @@ impl com::CommandBuffer for CommandBuffer { ) } + unsafe fn build_acceleration_structure<'a>( + &self, + desc: &'a hal::acceleration_structure::BuildDesc<'a, Backend>, + ranges: &'a [hal::acceleration_structure::BuildRangeDesc], + ) { + let geometries = conv::map_geometries(&self.device, desc.geometry.geometries.iter()); + self.device + .extension_fns + .acceleration_structure + .as_ref() + .expect("Feature ACCELERATION_STRUCTURE must be enabled to call build_acceleration_structure").unwrap_extension() + .cmd_build_acceleration_structures( + self.raw, + &[conv::map_geometry_info_without_geometries(&self.device, desc).geometries(&geometries).build()], + &[conv::map_build_ranges_infos(ranges)], + ); + } + + unsafe fn build_acceleration_structure_indirect<'a>( + &self, + desc: &'a hal::acceleration_structure::BuildDesc<'a, Backend>, + buffer: &'a n::Buffer, + offset: buffer::Offset, + stride: buffer::Stride, + max_primitive_counts: &'a [u32], + ) { + let geometries = conv::map_geometries(&self.device, desc.geometry.geometries.iter()); + self.device + .extension_fns + .acceleration_structure + .as_ref() + .expect("Feature ACCELERATION_STRUCTURE must be enabled to call build_acceleration_structure_indirect").unwrap_extension() + .cmd_build_acceleration_structures_indirect( + self.raw, + &[conv::map_geometry_info_without_geometries(&self.device, desc).geometries(&geometries).build()], + &[self.device.get_buffer_device_address(buffer, offset)], + &[stride], + &[max_primitive_counts], + ); + } + + unsafe fn copy_acceleration_structure( + &self, + src: &n::AccelerationStructure, + dst: &n::AccelerationStructure, + mode: hal::acceleration_structure::CopyMode, + ) { + self.device + .extension_fns + .acceleration_structure + .as_ref() + .expect("Feature ACCELERATION_STRUCTURE must be enabled to call copy_acceleration_structure").unwrap_extension() + .cmd_copy_acceleration_structure( + self.raw, + &vk::CopyAccelerationStructureInfoKHR::builder() + .src(src.0) + .dst(dst.0) + .mode(conv::map_acceleration_structure_copy_mode(mode)) + .build(), + ); + } + + unsafe fn serialize_acceleration_structure_to_memory( + &self, + src: &n::AccelerationStructure, + dst_buffer: &n::Buffer, + dst_offset: buffer::Offset, + ) { + self.device + .extension_fns + .acceleration_structure + .as_ref() + .expect("Feature ACCELERATION_STRUCTURE must be enabled to call serialize_acceleration_structure_to_memory").unwrap_extension() + .cmd_copy_acceleration_structure_to_memory( + self.raw, + &vk::CopyAccelerationStructureToMemoryInfoKHR::builder() + .src(src.0) + .dst(vk::DeviceOrHostAddressKHR { + device_address: self + .device + .get_buffer_device_address(dst_buffer, dst_offset), + }) + .mode(vk::CopyAccelerationStructureModeKHR::SERIALIZE) + .build(), + ); + } + + unsafe fn deserialize_memory_to_acceleration_structure( + &self, + src_buffer: &n::Buffer, + src_offset: buffer::Offset, + dst: &n::AccelerationStructure, + ) { + self.device + .extension_fns + .acceleration_structure + .as_ref() + .expect("Feature ACCELERATION_STRUCTURE must be enabled to call deserialize_memory_to_acceleration_structure").unwrap_extension() + .cmd_copy_memory_to_acceleration_structure( + self.raw, + &vk::CopyMemoryToAccelerationStructureInfoKHR::builder() + .src(vk::DeviceOrHostAddressConstKHR { + device_address: self + .device + .get_buffer_device_address(src_buffer, src_offset), + }) + .dst(dst.0) + .mode(vk::CopyAccelerationStructureModeKHR::DESERIALIZE) + .build(), + ); + } + + unsafe fn write_acceleration_structures_properties( + &self, + accel_structs: &[&n::AccelerationStructure], + query_type: query::Type, + pool: &n::QueryPool, + first_query: u32, + ) { + self.device + .extension_fns + .acceleration_structure + .as_ref() + .expect("Feature ACCELERATION_STRUCTURE must be enabled to call write_acceleration_structures_properties").unwrap_extension() + .cmd_write_acceleration_structures_properties( + self.raw, + accel_structs + .iter() + .map(|a| a.0) + .collect::>() + .as_slice(), + match query_type { + query::Type::AccelerationStructureCompactedSize => { + vk::QueryType::ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR + } + query::Type::AccelerationStructureSerializationSize => { + vk::QueryType::ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR + } + _ => { + panic!("Unsupported query type") + } + }, + pool.0, + first_query, + ); + } + + unsafe fn set_ray_tracing_pipeline_stack_size(&self, pipeline_stack_size: u32) { + self.device + .extension_fns + .ray_tracing_pipeline + .as_ref() + .expect("Feature RAY_TRACING_PIPELINE must be enabled to call set_ray_tracing_pipeline_stack_size") + .unwrap_extension() + .cmd_set_ray_tracing_pipeline_stack_size(self.raw, pipeline_stack_size); + } + + unsafe fn trace_rays( + &self, + raygen_shader_binding_table: Option>, + miss_shader_binding_table: Option>, + hit_shader_binding_table: Option>, + callable_shader_binding_table: Option>, + count: WorkGroupCount, + ) { + self.device + .extension_fns + .ray_tracing_pipeline + .as_ref() + .expect("Feature RAY_TRACING_PIPELINE must be enabled to call trace_rays") + .unwrap_extension() + .cmd_trace_rays( + self.raw, + &conv::map_shader_binding_table(&self.device, raygen_shader_binding_table), + &conv::map_shader_binding_table(&self.device, miss_shader_binding_table), + &conv::map_shader_binding_table(&self.device, hit_shader_binding_table), + &conv::map_shader_binding_table(&self.device, callable_shader_binding_table), + count[0], + count[1], + count[2], + ); + } + + /// `buffer` points to a `WorkGroupCount`. + unsafe fn trace_rays_indirect<'a>( + &self, + raygen_shader_binding_table: Option>, + miss_shader_binding_table: Option>, + hit_shader_binding_table: Option>, + callable_shader_binding_table: Option>, + buffer: &'a n::Buffer, + offset: buffer::Offset, + ) { + self.device + .extension_fns + .ray_tracing_pipeline + .as_ref() + .expect("Feature RAY_TRACING_PIPELINE must be enabled to call trace_rays_indirect") + .unwrap_extension() + .cmd_trace_rays_indirect( + self.raw, + &[conv::map_shader_binding_table( + &self.device, + raygen_shader_binding_table, + )], + &[conv::map_shader_binding_table( + &self.device, + miss_shader_binding_table, + )], + &[conv::map_shader_binding_table( + &self.device, + hit_shader_binding_table, + )], + &[conv::map_shader_binding_table( + &self.device, + callable_shader_binding_table, + )], + self.device.get_buffer_device_address(buffer, offset), + ); + } + unsafe fn push_compute_constants( &mut self, layout: &n::PipelineLayout, diff --git a/src/backend/vulkan/src/conv.rs b/src/backend/vulkan/src/conv.rs index 69b96c55b2f..89731278ff8 100644 --- a/src/backend/vulkan/src/conv.rs +++ b/src/backend/vulkan/src/conv.rs @@ -9,6 +9,7 @@ use hal::{ window::{CompositeAlphaMode, PresentMode}, IndexType, }; +use pso::{BasePipeline, ShaderBindingTable, ShaderStageFlags}; use std::mem; @@ -199,6 +200,9 @@ pub fn map_descriptor_type(ty: pso::DescriptorType) -> vk::DescriptorType { }, }, pso::DescriptorType::InputAttachment => vk::DescriptorType::INPUT_ATTACHMENT, + pso::DescriptorType::AccelerationStructure => { + vk::DescriptorType::ACCELERATION_STRUCTURE_KHR + } } } @@ -623,3 +627,346 @@ pub fn map_vk_memory_heap_flags(flags: vk::MemoryHeapFlags) -> hal::memory::Heap hal_flags } + +pub fn map_acceleration_structure_type( + ty: hal::acceleration_structure::Type, +) -> vk::AccelerationStructureTypeKHR { + match ty { + hal::acceleration_structure::Type::TopLevel => vk::AccelerationStructureTypeKHR::TOP_LEVEL, + hal::acceleration_structure::Type::BottomLevel => { + vk::AccelerationStructureTypeKHR::BOTTOM_LEVEL + } + hal::acceleration_structure::Type::Generic => vk::AccelerationStructureTypeKHR::GENERIC, + } +} + +pub fn map_acceleration_structure_copy_mode( + ty: hal::acceleration_structure::CopyMode, +) -> vk::CopyAccelerationStructureModeKHR { + match ty { + hal::acceleration_structure::CopyMode::Copy => vk::CopyAccelerationStructureModeKHR::CLONE, + hal::acceleration_structure::CopyMode::Compact => { + vk::CopyAccelerationStructureModeKHR::COMPACT + } + } +} + +pub fn map_acceleration_structure_flags( + accel_flags: hal::acceleration_structure::Flags, +) -> vk::BuildAccelerationStructureFlagsKHR { + let mut flags = vk::BuildAccelerationStructureFlagsKHR::empty(); + if accel_flags.contains(hal::acceleration_structure::Flags::ALLOW_UPDATE) { + flags |= vk::BuildAccelerationStructureFlagsKHR::ALLOW_UPDATE; + } + if accel_flags.contains(hal::acceleration_structure::Flags::ALLOW_COMPACTION) { + flags |= vk::BuildAccelerationStructureFlagsKHR::ALLOW_COMPACTION; + } + if accel_flags.contains(hal::acceleration_structure::Flags::PREFER_FAST_TRACE) { + flags |= vk::BuildAccelerationStructureFlagsKHR::PREFER_FAST_TRACE; + } + if accel_flags.contains(hal::acceleration_structure::Flags::PREFER_FAST_BUILD) { + flags |= vk::BuildAccelerationStructureFlagsKHR::PREFER_FAST_BUILD; + } + if accel_flags.contains(hal::acceleration_structure::Flags::LOW_MEMORY) { + flags |= vk::BuildAccelerationStructureFlagsKHR::LOW_MEMORY; + } + flags +} + +pub fn map_geometry_flags( + geometry_flags: hal::acceleration_structure::GeometryFlags, +) -> vk::GeometryFlagsKHR { + let mut flags = vk::GeometryFlagsKHR::empty(); + if geometry_flags.contains(hal::acceleration_structure::GeometryFlags::OPAQUE) { + flags |= vk::GeometryFlagsKHR::OPAQUE; + } + if geometry_flags + .contains(hal::acceleration_structure::GeometryFlags::NO_DUPLICATE_ANY_HIT_INVOCATION) + { + flags |= vk::GeometryFlagsKHR::NO_DUPLICATE_ANY_HIT_INVOCATION; + } + flags +} + +pub fn map_geometry_type( + geometry_data: &hal::acceleration_structure::GeometryData, +) -> vk::GeometryTypeKHR { + match geometry_data { + hal::acceleration_structure::GeometryData::Triangles(_) => vk::GeometryTypeKHR::TRIANGLES, + hal::acceleration_structure::GeometryData::Aabbs(_) => vk::GeometryTypeKHR::AABBS, + hal::acceleration_structure::GeometryData::Instances(_) => vk::GeometryTypeKHR::INSTANCES, + } +} + +pub unsafe fn map_geometry( + device: &crate::RawDevice, + geometry: &hal::acceleration_structure::Geometry, +) -> vk::AccelerationStructureGeometryKHR { + vk::AccelerationStructureGeometryKHR::builder() + .geometry_type(map_geometry_type(&geometry.geometry)) + .geometry(match geometry.geometry { + hal::acceleration_structure::GeometryData::Triangles(ref triangles) => { + vk::AccelerationStructureGeometryDataKHR { + triangles: vk::AccelerationStructureGeometryTrianglesDataKHR::builder() + .vertex_format(map_format(triangles.vertex_format)) + .vertex_data(vk::DeviceOrHostAddressConstKHR { + device_address: device.get_buffer_device_address( + triangles.vertex_buffer, + triangles.vertex_buffer_offset, + ), + }) + .vertex_stride(triangles.vertex_buffer_stride as u64) + .max_vertex(triangles.max_vertex as u32) + .index_type( + triangles + .index_buffer + .map(|index_buffer| map_index_type(index_buffer.2)) + .unwrap_or(vk::IndexType::NONE_KHR), + ) + .index_data( + triangles + .index_buffer + .map(|index_buffer| vk::DeviceOrHostAddressConstKHR { + device_address: device + .get_buffer_device_address(index_buffer.0, index_buffer.1), + }) + .unwrap_or_default(), + ) + .transform_data( + triangles + .transform + .map(|transform| vk::DeviceOrHostAddressConstKHR { + device_address: device + .get_buffer_device_address(transform.0, transform.1), + }) + .unwrap_or_default(), + ) + .build(), + } + } + hal::acceleration_structure::GeometryData::Aabbs(ref aabbs) => { + vk::AccelerationStructureGeometryDataKHR { + aabbs: vk::AccelerationStructureGeometryAabbsDataKHR::builder() + .data(vk::DeviceOrHostAddressConstKHR { + device_address: device + .get_buffer_device_address(aabbs.buffer, aabbs.buffer_offset), + }) + .stride(aabbs.buffer_stride as u64) + .build(), + } + } + hal::acceleration_structure::GeometryData::Instances(ref instances) => { + vk::AccelerationStructureGeometryDataKHR { + instances: vk::AccelerationStructureGeometryInstancesDataKHR::builder() + .array_of_pointers(false) + .data(vk::DeviceOrHostAddressConstKHR { + device_address: device.get_buffer_device_address( + instances.buffer, + instances.buffer_offset, + ), + }) + .build(), + } + } + }) + .flags(map_geometry_flags(geometry.flags)) + .build() +} + +pub unsafe fn map_geometries<'a>( + device: &crate::RawDevice, + geometries: impl Iterator>, +) -> Vec { + geometries + .map(|geometry| map_geometry(device, geometry)) + .collect::>() +} + +/// Convert all fields of `desc`, except `geometries`. The caller should call `map_geometries` and add it to the builder manually to ensure the lifetime of the resulting collection lives long enough. +pub unsafe fn map_geometry_info_without_geometries<'a>( + device: &crate::RawDevice, + desc: &'a hal::acceleration_structure::BuildDesc, +) -> vk::AccelerationStructureBuildGeometryInfoKHRBuilder<'a> { + vk::AccelerationStructureBuildGeometryInfoKHR::builder() + .ty(map_acceleration_structure_type(desc.geometry.ty)) + .flags(map_acceleration_structure_flags(desc.geometry.flags)) + .mode(if desc.src.is_some() { + vk::BuildAccelerationStructureModeKHR::UPDATE + } else { + vk::BuildAccelerationStructureModeKHR::BUILD + }) + .src_acceleration_structure(desc.src.map(|a| a.0).unwrap_or_default()) + .dst_acceleration_structure(desc.dst.0) + .scratch_data(vk::DeviceOrHostAddressKHR { + device_address: device.get_buffer_device_address(desc.scratch, desc.scratch_offset), + }) +} + +pub unsafe fn map_build_ranges_infos( + build_ranges: &[hal::acceleration_structure::BuildRangeDesc], +) -> &[vk::AccelerationStructureBuildRangeInfoKHR] { + // Safe because `BuildRangeDesc` and `AccelerationStructureBuildRangeInfoKHR` have the same layout. + mem::transmute(build_ranges) +} + +pub fn map_group_shader(group_shader: pso::GroupShader) -> vk::ShaderGroupShaderKHR { + match group_shader { + pso::GroupShader::General => vk::ShaderGroupShaderKHR::GENERAL, + pso::GroupShader::ClosestHit => vk::ShaderGroupShaderKHR::CLOSEST_HIT, + pso::GroupShader::AnyHit => vk::ShaderGroupShaderKHR::ANY_HIT, + pso::GroupShader::Intersection => vk::ShaderGroupShaderKHR::INTERSECTION, + } +} + +// TODO reuse for other pipelines? +pub fn map_pipeline_create_flags<'a, P: 'a>( + pipeline_create_flags: pso::PipelineCreationFlags, + parent: &BasePipeline<'a, P>, +) -> vk::PipelineCreateFlags { + let mut flags = vk::PipelineCreateFlags::empty(); + match parent { + pso::BasePipeline::None => (), + _ => { + flags |= vk::PipelineCreateFlags::DERIVATIVE; + } + } + if pipeline_create_flags.contains(pso::PipelineCreationFlags::DISABLE_OPTIMIZATION) { + flags |= vk::PipelineCreateFlags::DISABLE_OPTIMIZATION; + } + if pipeline_create_flags.contains(pso::PipelineCreationFlags::ALLOW_DERIVATIVES) { + flags |= vk::PipelineCreateFlags::ALLOW_DERIVATIVES; + } + if pipeline_create_flags + .contains(pso::PipelineCreationFlags::RAY_TRACING_NO_NULL_ANY_HIT_SHADERS) + { + flags |= vk::PipelineCreateFlags::RAY_TRACING_NO_NULL_ANY_HIT_SHADERS_KHR; + } + if pipeline_create_flags + .contains(pso::PipelineCreationFlags::RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS) + { + flags |= vk::PipelineCreateFlags::RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_KHR; + } + if pipeline_create_flags.contains(pso::PipelineCreationFlags::RAY_TRACING_NO_NULL_MISS_SHADERS) + { + flags |= vk::PipelineCreateFlags::RAY_TRACING_NO_NULL_MISS_SHADERS_KHR; + } + if pipeline_create_flags + .contains(pso::PipelineCreationFlags::RAY_TRACING_NO_NULL_INTERSECTION_SHADERS) + { + flags |= vk::PipelineCreateFlags::RAY_TRACING_NO_NULL_INTERSECTION_SHADERS_KHR; + } + if pipeline_create_flags.contains(pso::PipelineCreationFlags::RAY_TRACING_SKIP_TRIANGLES) { + flags |= vk::PipelineCreateFlags::RAY_TRACING_SKIP_TRIANGLES_KHR; + } + if pipeline_create_flags.contains(pso::PipelineCreationFlags::RAY_TRACING_SKIP_AABBS) { + flags |= vk::PipelineCreateFlags::RAY_TRACING_SKIP_AABBS_KHR; + } + if pipeline_create_flags + .contains(pso::PipelineCreationFlags::RAY_TRACING_SHADER_GROUP_HANDLE_CAPTURE_REPLAY) + { + flags |= vk::PipelineCreateFlags::RAY_TRACING_SHADER_GROUP_HANDLE_CAPTURE_REPLAY_KHR; + } + flags +} + +pub fn map_shader_stage(stage: pso::ShaderStageFlags) -> vk::ShaderStageFlags { + let mut flags = vk::ShaderStageFlags::empty(); + if stage.contains(pso::ShaderStageFlags::VERTEX) { + flags |= vk::ShaderStageFlags::VERTEX; + } + if stage.contains(pso::ShaderStageFlags::HULL) { + flags |= vk::ShaderStageFlags::TESSELLATION_CONTROL; + } + if stage.contains(pso::ShaderStageFlags::DOMAIN) { + flags |= vk::ShaderStageFlags::TESSELLATION_EVALUATION; + } + if stage.contains(pso::ShaderStageFlags::GEOMETRY) { + flags |= vk::ShaderStageFlags::GEOMETRY; + } + if stage.contains(pso::ShaderStageFlags::FRAGMENT) { + flags |= vk::ShaderStageFlags::FRAGMENT; + } + if stage.contains(pso::ShaderStageFlags::COMPUTE) { + flags |= vk::ShaderStageFlags::COMPUTE; + } + if stage.contains(pso::ShaderStageFlags::TASK) { + flags |= vk::ShaderStageFlags::TASK_NV; + } + if stage.contains(pso::ShaderStageFlags::MESH) { + flags |= vk::ShaderStageFlags::MESH_NV; + } + if stage.contains(pso::ShaderStageFlags::RAYGEN) { + flags |= vk::ShaderStageFlags::RAYGEN_KHR; + } + if stage.contains(pso::ShaderStageFlags::ANY_HIT) { + flags |= vk::ShaderStageFlags::ANY_HIT_KHR; + } + if stage.contains(pso::ShaderStageFlags::CLOSEST_HIT) { + flags |= vk::ShaderStageFlags::CLOSEST_HIT_KHR; + } + if stage.contains(pso::ShaderStageFlags::MISS) { + flags |= vk::ShaderStageFlags::MISS_KHR; + } + if stage.contains(pso::ShaderStageFlags::INTERSECTION) { + flags |= vk::ShaderStageFlags::INTERSECTION_KHR; + } + if stage.contains(pso::ShaderStageFlags::CALLABLE) { + flags |= vk::ShaderStageFlags::CALLABLE_KHR; + } + flags +} + +pub fn map_shader_group_desc( + desc: &pso::ShaderGroupDesc, +) -> vk::RayTracingShaderGroupCreateInfoKHR { + match desc { + pso::ShaderGroupDesc::General { general_shader } => { + vk::RayTracingShaderGroupCreateInfoKHR::builder() + .ty(vk::RayTracingShaderGroupTypeKHR::GENERAL) + .general_shader(*general_shader) + .closest_hit_shader(vk::SHADER_UNUSED_KHR) + .any_hit_shader(vk::SHADER_UNUSED_KHR) + .intersection_shader(vk::SHADER_UNUSED_KHR) + .build() + } + + pso::ShaderGroupDesc::TrianglesHitGroup { + closest_hit_shader, + any_hit_shader, + } => vk::RayTracingShaderGroupCreateInfoKHR::builder() + .ty(vk::RayTracingShaderGroupTypeKHR::TRIANGLES_HIT_GROUP) + .general_shader(vk::SHADER_UNUSED_KHR) + .closest_hit_shader(closest_hit_shader.unwrap_or(vk::SHADER_UNUSED_KHR)) + .any_hit_shader(any_hit_shader.unwrap_or(vk::SHADER_UNUSED_KHR)) + .intersection_shader(vk::SHADER_UNUSED_KHR) + .build(), + + pso::ShaderGroupDesc::ProceduralHitGroup { + closest_hit_shader, + any_hit_shader, + intersection_shader, + } => vk::RayTracingShaderGroupCreateInfoKHR::builder() + .ty(vk::RayTracingShaderGroupTypeKHR::PROCEDURAL_HIT_GROUP) + .general_shader(vk::SHADER_UNUSED_KHR) + .closest_hit_shader(closest_hit_shader.unwrap_or(vk::SHADER_UNUSED_KHR)) + .any_hit_shader(any_hit_shader.unwrap_or(vk::SHADER_UNUSED_KHR)) + .intersection_shader(*intersection_shader) + .build(), + } +} + +pub unsafe fn map_shader_binding_table( + device: &crate::RawDevice, + table: Option>, +) -> vk::StridedDeviceAddressRegionKHR { + table.map_or_else( + || vk::StridedDeviceAddressRegionKHR::default(), + |table| { + vk::StridedDeviceAddressRegionKHR::builder() + .device_address(device.get_buffer_device_address(table.buffer, table.offset)) + .stride(table.stride as u64) + .size(table.size) + .build() + }, + ) +} diff --git a/src/backend/vulkan/src/device.rs b/src/backend/vulkan/src/device.rs index 54c2bb51825..45598f4809c 100644 --- a/src/backend/vulkan/src/device.rs +++ b/src/backend/vulkan/src/device.rs @@ -454,15 +454,58 @@ impl<'a> ComputePipelineInfoBuf<'a> { } } +#[derive(Debug, Default)] +struct RayTracingPipelineInfoBuf<'a> { + shader_groups: Vec>, +} +impl<'a> RayTracingPipelineInfoBuf<'a> { + unsafe fn new(desc: &pso::RayTracingPipelineDesc<'a, B>) -> Self { + let mut this = Self::default(); + this.shader_groups = desc + .stages + .iter() + .map(|stage_desc| { + let mut buf = ComputePipelineInfoBuf::default(); + buf.c_string = CString::new(stage_desc.entry_point.entry).unwrap(); + buf.entries = stage_desc + .entry_point + .specialization + .constants + .iter() + .map(|c| vk::SpecializationMapEntry { + constant_id: c.id, + offset: c.range.start as _, + size: (c.range.end - c.range.start) as _, + }) + .collect(); + buf.specialization = vk::SpecializationInfo { + map_entry_count: buf.entries.len() as _, + p_map_entries: buf.entries.as_ptr(), + data_size: stage_desc.entry_point.specialization.data.len() as _, + p_data: stage_desc.entry_point.specialization.data.as_ptr() as _, + }; + buf + }) + .collect(); + this + } +} + impl d::Device for super::Device { unsafe fn allocate_memory( &self, mem_type: MemoryTypeId, size: u64, ) -> Result { + let mut flags_info = vk::MemoryAllocateFlagsInfo::builder().flags( + // TODO needs Vulkan 1.2? Also either expose in hal or infer from usage? + vk::MemoryAllocateFlags::DEVICE_ADDRESS, + ); + let info = vk::MemoryAllocateInfo::builder() .allocation_size(size) - .memory_type_index(self.get_ash_memory_type_index(mem_type)); + .memory_type_index(self.get_ash_memory_type_index(mem_type)) + .push_next(&mut flags_info); let result = self.shared.raw.allocate_memory(&info, None); @@ -879,6 +922,85 @@ impl d::Device for super::Device { } } + unsafe fn create_ray_tracing_pipeline<'a>( + &self, + desc: &pso::RayTracingPipelineDesc<'a, B>, + cache: Option<&n::PipelineCache>, + ) -> Result { + let buf = RayTracingPipelineInfoBuf::new(desc); + + let stages = desc + .stages + .iter() + .zip(&buf.shader_groups) + .map(|(stage_desc, buf)| { + vk::PipelineShaderStageCreateInfo::builder() + .flags(vk::PipelineShaderStageCreateFlags::empty()) + .stage(conv::map_shader_stage(stage_desc.stage)) + .module(stage_desc.entry_point.module.raw) + .name(buf.c_string.as_c_str()) + .specialization_info(&buf.specialization) + .build() + }) + .collect::>(); + let groups = desc + .groups + .iter() + .map(conv::map_shader_group_desc) + .collect::>(); + + let info = { + let (base_handle, base_index) = match desc.parent { + pso::BasePipeline::Pipeline(pipeline) => (pipeline.0, -1), + pso::BasePipeline::Index(index) => (vk::Pipeline::null(), index as _), + pso::BasePipeline::None => (vk::Pipeline::null(), -1), + }; + + vk::RayTracingPipelineCreateInfoKHR::builder() + .flags(conv::map_pipeline_create_flags(desc.flags, &desc.parent)) + .stages(&stages) + .groups(&groups) + .max_pipeline_ray_recursion_depth(desc.max_pipeline_ray_recursion_depth) + // .library_info() + // .library_interface() + // .dynamic_state() + .layout(desc.layout.raw) + .base_pipeline_handle(base_handle) + .base_pipeline_index(base_index) + }; + + // TODO create_ray_tracing_pipelines also returns VK_OPERATION_DEFERRED_KHR, VK_OPERATION_NOT_DEFERRED_KHR, VK_PIPELINE_COMPILE_REQUIRED_EXT on success, but ash does not support this. + match self + .shared + .extension_fns + .ray_tracing_pipeline + .as_ref() + .expect( + "Feature RAY_TRACING_PIPELINE must be enabled to call create_ray_tracing_pipeline", + ) + .unwrap_extension() + .create_ray_tracing_pipelines( + vk::DeferredOperationKHR::null(), + cache.map_or(vk::PipelineCache::null(), |cache| cache.raw), + &[info.build()], + None, + ) { + Ok(pipelines) => { + // if let Some(name) = desc.label { + // self.shared + // .set_object_name(vk::ObjectType::PIPELINE, pipeline, name); + // } + Ok(n::RayTracingPipeline(pipelines[0])) + } + Err(vk::Result::ERROR_OUT_OF_HOST_MEMORY) => Err(d::OutOfMemory::Host.into()), + Err(vk::Result::ERROR_OUT_OF_DEVICE_MEMORY) => Err(d::OutOfMemory::Device.into()), + Err(vk::Result::ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS) => { + todo!() + } + _ => Err(pso::CreationError::Other), + } + } + unsafe fn create_framebuffer( &self, renderpass: &n::RenderPass, @@ -1313,12 +1435,23 @@ impl d::Device for super::Device { where I: Iterator>, { + /// Stores the + struct RawWriteOffsets { + image_info: usize, + buffer_info: usize, + texel_buffer_view: usize, + accel_struct: usize, + } + let descriptors = op.descriptors; let mut raw_writes = Vec::::with_capacity(descriptors.size_hint().0); + // Parallel array to `raw_writes`. + let mut raw_writes_indices = Vec::::new(); let mut image_infos = Vec::new(); let mut buffer_infos = Vec::new(); let mut texel_buffer_views = Vec::new(); + let mut accel_structs = Vec::new(); // gfx-hal allows the type and stages to be different between the descriptor // in a single write, while Vulkan requires them to be the same. @@ -1358,9 +1491,15 @@ impl d::Device for super::Device { }, descriptor_count: 1, descriptor_type, - p_image_info: image_infos.len() as _, - p_buffer_info: buffer_infos.len() as _, - p_texel_buffer_view: texel_buffer_views.len() as _, + p_image_info: ptr::null(), + p_buffer_info: ptr::null(), + p_texel_buffer_view: ptr::null(), + }); + raw_writes_indices.push(RawWriteOffsets { + image_info: image_infos.len(), + buffer_info: buffer_infos.len(), + texel_buffer_view: texel_buffer_views.len(), + accel_struct: accel_structs.len(), }); } @@ -1404,11 +1543,18 @@ impl d::Device for super::Device { pso::Descriptor::TexelBuffer(view) => { texel_buffer_views.push(view.raw); } + pso::Descriptor::AccelerationStructure(accel_struct) => { + accel_structs.push(accel_struct.0); + } } } + let mut accel_structure_writes = + Vec::::new(); + // Patch the pointers now that we have all the storage allocated. - for raw in raw_writes.iter_mut() { + debug_assert_eq!(raw_writes.len(), raw_writes_indices.len()); + for (raw, offsets) in raw_writes.iter_mut().zip(raw_writes_indices) { use crate::vk::DescriptorType as Dt; match raw.descriptor_type { Dt::SAMPLER @@ -1416,23 +1562,27 @@ impl d::Device for super::Device { | Dt::STORAGE_IMAGE | Dt::COMBINED_IMAGE_SAMPLER | Dt::INPUT_ATTACHMENT => { - raw.p_buffer_info = ptr::null(); - raw.p_texel_buffer_view = ptr::null(); - raw.p_image_info = image_infos[raw.p_image_info as usize..].as_ptr(); + raw.p_image_info = image_infos[offsets.image_info..].as_ptr(); } Dt::UNIFORM_TEXEL_BUFFER | Dt::STORAGE_TEXEL_BUFFER => { - raw.p_buffer_info = ptr::null(); - raw.p_image_info = ptr::null(); raw.p_texel_buffer_view = - texel_buffer_views[raw.p_texel_buffer_view as usize..].as_ptr(); + texel_buffer_views[offsets.texel_buffer_view..].as_ptr(); } Dt::UNIFORM_BUFFER | Dt::STORAGE_BUFFER | Dt::STORAGE_BUFFER_DYNAMIC | Dt::UNIFORM_BUFFER_DYNAMIC => { - raw.p_image_info = ptr::null(); - raw.p_texel_buffer_view = ptr::null(); - raw.p_buffer_info = buffer_infos[raw.p_buffer_info as usize..].as_ptr(); + raw.p_buffer_info = buffer_infos[offsets.buffer_info..].as_ptr(); + } + Dt::ACCELERATION_STRUCTURE_KHR => { + accel_structure_writes.push(vk::WriteDescriptorSetAccelerationStructureKHR { + s_type: vk::StructureType::WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, + p_next: ptr::null(), + acceleration_structure_count: 1, + p_acceleration_structures: accel_structs[offsets.accel_struct..].as_ptr(), + }); + + raw.p_next = accel_structure_writes.last_mut().unwrap() as *mut _ as *mut _; } _ => panic!("unknown descriptor type"), } @@ -1658,6 +1808,14 @@ impl d::Device for super::Device { vk::QueryType::TIMESTAMP, vk::QueryPipelineStatisticFlags::empty(), ), + query::Type::AccelerationStructureCompactedSize => ( + vk::QueryType::ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR, + vk::QueryPipelineStatisticFlags::empty(), + ), + query::Type::AccelerationStructureSerializationSize => ( + vk::QueryType::ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, + vk::QueryPipelineStatisticFlags::empty(), + ), }; let info = vk::QueryPoolCreateInfo::builder() @@ -1705,6 +1863,169 @@ impl d::Device for super::Device { } } + unsafe fn create_acceleration_structure( + &self, + desc: &hal::acceleration_structure::CreateDesc, + ) -> Result { + let result = self + .shared + .extension_fns + .acceleration_structure + .as_ref() + .expect("Feature ACCELERATION_STRUCTURE must be enabled to call create_acceleration_structure").unwrap_extension() + .create_acceleration_structure( + &vk::AccelerationStructureCreateInfoKHR::builder() + .buffer(desc.buffer.raw) + .offset(desc.buffer_offset) + .size(desc.size) + .ty(match desc.ty { + hal::acceleration_structure::Type::TopLevel => { + vk::AccelerationStructureTypeKHR::TOP_LEVEL + } + hal::acceleration_structure::Type::BottomLevel => { + vk::AccelerationStructureTypeKHR::BOTTOM_LEVEL + } + hal::acceleration_structure::Type::Generic => { + vk::AccelerationStructureTypeKHR::GENERIC + } + }) + // TODO(capture-replay) + // .create_flags(vk::AccelerationStructureCreateFlagsKHR::empty()) + // .device_address() + .build(), + None, + ); + + match result { + Ok(acceleration_structure) => Ok(n::AccelerationStructure(acceleration_structure)), + Err(vk::Result::ERROR_OUT_OF_HOST_MEMORY) => Err(d::OutOfMemory::Host), + // TODO(capture-replay) + Err(vk::Result::ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS) => todo!(), + _ => unreachable!(), + } + } + + unsafe fn get_acceleration_structure_build_requirements( + &self, + desc: &hal::acceleration_structure::GeometryDesc, + max_primitive_counts: &[u32], + ) -> hal::acceleration_structure::SizeRequirements { + let geometries = desc + .geometries + .iter() + .map(|&geometry| conv::map_geometry(&self.shared, geometry)) + .collect::>(); + + let build_info = vk::AccelerationStructureBuildGeometryInfoKHR::builder() + .ty(conv::map_acceleration_structure_type(desc.ty)) + .flags(conv::map_acceleration_structure_flags(desc.flags)) + .geometries(geometries.as_slice()); + + let build_size_info = self + .shared + .extension_fns + .acceleration_structure + .as_ref() + .expect( + // TODO: this string prevents rustfmt from running? + "Feature ACCELERATION_STRUCTURE must be enabled to call get_acceleration_structure_build_requirements", + ).unwrap_extension() + .get_acceleration_structure_build_sizes( + vk::AccelerationStructureBuildTypeKHR::DEVICE, + &build_info, + max_primitive_counts, + ); + + hal::acceleration_structure::SizeRequirements { + acceleration_structure_size: build_size_info.acceleration_structure_size, + update_scratch_size: build_size_info.update_scratch_size, + build_scratch_size: build_size_info.build_scratch_size, + } + } + + unsafe fn get_acceleration_structure_address( + &self, + accel_struct: &n::AccelerationStructure, + ) -> hal::acceleration_structure::DeviceAddress { + hal::acceleration_structure::DeviceAddress( + self.shared + .extension_fns + .acceleration_structure + .as_ref() + .expect("Feature ACCELERATION_STRUCTURE must be enabled to call get_acceleration_structure_address").unwrap_extension() + .get_acceleration_structure_device_address( + &vk::AccelerationStructureDeviceAddressInfoKHR::builder() + .acceleration_structure(accel_struct.0) + .build(), + ), + ) + } + + unsafe fn get_device_acceleration_structure_compatibility( + &self, + serialized_accel_struct: &[u8; 32], + ) -> hal::acceleration_structure::Compatibility { + match self + .shared + .extension_fns + .acceleration_structure + .as_ref() + .expect("Feature ACCELERATION_STRUCTURE must be enabled to call get_device_acceleration_structure_compatibility").unwrap_extension() + .get_device_acceleration_structure_compatibility( + &vk::AccelerationStructureVersionInfoKHR::builder() + .version_data(serialized_accel_struct), + ) { + vk::AccelerationStructureCompatibilityKHR::COMPATIBLE => { + hal::acceleration_structure::Compatibility::Compatible + } + vk::AccelerationStructureCompatibilityKHR::INCOMPATIBLE => { + hal::acceleration_structure::Compatibility::Incompatible + } + _ => unreachable!(), + } + } + + unsafe fn get_ray_tracing_shader_group_handles<'a>( + &self, + pipeline: &'a n::RayTracingPipeline, + first_group: u32, + group_count: u32, + data_size: usize, + ) -> Result, d::OutOfMemory> { + let result = self + .shared + .extension_fns + .ray_tracing_pipeline + .as_ref() + .expect("Feature RAY_TRACING_PIPELINE must be enabled to call get_ray_tracing_shader_group_handles").unwrap_extension() + .get_ray_tracing_shader_group_handles(pipeline.0, first_group, group_count, data_size); + + match result { + Ok(data) => Ok(data), + Err(vk::Result::ERROR_OUT_OF_HOST_MEMORY) => Err(d::OutOfMemory::Host), + _ => unreachable!(), + } + } + + unsafe fn get_ray_tracing_shader_group_stack_size<'a>( + &self, + pipeline: &'a n::RayTracingPipeline, + group: u32, + group_shader: pso::GroupShader, + ) -> u64 { + self.shared + .extension_fns + .ray_tracing_pipeline + .as_ref() + .expect("Feature RAY_TRACING_PIPELINE must be enabled to call get_ray_tracing_shader_group_stack_size") + .unwrap_extension() + .get_ray_tracing_shader_group_stack_size( + pipeline.0, + group, + conv::map_group_shader(group_shader), + ) + } + unsafe fn destroy_query_pool(&self, pool: n::QueryPool) { self.shared.raw.destroy_query_pool(pool.0, None); } @@ -1729,6 +2050,10 @@ impl d::Device for super::Device { self.shared.raw.destroy_pipeline(pipeline.0, None); } + unsafe fn destroy_ray_tracing_pipeline(&self, pipeline: n::RayTracingPipeline) { + self.shared.raw.destroy_pipeline(pipeline.0, None); + } + unsafe fn destroy_framebuffer(&self, fb: n::Framebuffer) { match fb { n::Framebuffer::ImageLess(raw) => { @@ -1784,6 +2109,15 @@ impl d::Device for super::Device { self.shared.raw.destroy_event(event.0, None); } + unsafe fn destroy_acceleration_structure(&self, accel_struct: n::AccelerationStructure) { + self.shared + .extension_fns + .acceleration_structure + .as_ref() + .expect("Feature ACCELERATION_STRUCTURE must be enabled to call destroy_acceleration_structure").unwrap_extension() + .destroy_acceleration_structure(accel_struct.0, None); + } + fn wait_idle(&self) -> Result<(), d::OutOfMemory> { match unsafe { self.shared.raw.device_wait_idle() } { Ok(()) => Ok(()), @@ -1866,6 +2200,18 @@ impl d::Device for super::Device { .set_object_name(vk::ObjectType::PIPELINE_LAYOUT, pipeline_layout.raw, name) } + unsafe fn set_acceleration_structure_name( + &self, + accel_struct: &mut n::AccelerationStructure, + name: &str, + ) { + self.shared.set_object_name( + vk::ObjectType::ACCELERATION_STRUCTURE_KHR, + accel_struct.0, + name, + ) + } + unsafe fn set_display_power_state( &self, display: &hal::display::Display, diff --git a/src/backend/vulkan/src/lib.rs b/src/backend/vulkan/src/lib.rs index 45911225461..da589e933a5 100644 --- a/src/backend/vulkan/src/lib.rs +++ b/src/backend/vulkan/src/lib.rs @@ -32,7 +32,7 @@ use ash::Entry; type Entry = ash::EntryCustom<()>; use ash::{ extensions::{ext, khr, nv::MeshShader}, - version::{DeviceV1_0, EntryV1_0, InstanceV1_0}, + version::{DeviceV1_0, DeviceV1_2, EntryV1_0, InstanceV1_0}, vk, }; @@ -796,6 +796,9 @@ struct DeviceExtensionFunctions { mesh_shaders: Option>, draw_indirect_count: Option>, display_control: Option, + buffer_device_address: Option>, + acceleration_structure: Option>, + ray_tracing_pipeline: Option>, } // TODO there's no reason why this can't be unified--the function pointers should all be the same--it's not clear how to do this with `ash`. @@ -899,6 +902,28 @@ impl RawDevice { ); } } + + pub(crate) unsafe fn get_buffer_device_address( + &self, + buffer: &native::Buffer, + offset: hal::buffer::Offset, + ) -> vk::DeviceAddress { + let info = vk::BufferDeviceAddressInfo::builder() + .buffer(buffer.raw) + .build(); + + let buffer_base_address = match self + .extension_fns + .buffer_device_address + .as_ref() + .expect("Feature DRAW_INDIRECT_COUNT must be enabled to call draw_indirect_count") + { + ExtensionFn::Extension(t) => t.get_buffer_device_address_khr(self.raw.handle(), &info), + ExtensionFn::Promoted => self.raw.get_buffer_device_address(&info), + }; + + buffer_base_address + offset + } } // Need to explicitly synchronize on submission and present. @@ -1185,6 +1210,7 @@ impl hal::Backend for Backend { type ComputePipeline = native::ComputePipeline; type GraphicsPipeline = native::GraphicsPipeline; + type RayTracingPipeline = native::RayTracingPipeline; type PipelineLayout = native::PipelineLayout; type PipelineCache = native::PipelineCache; type DescriptorSetLayout = native::DescriptorSetLayout; @@ -1198,4 +1224,6 @@ impl hal::Backend for Backend { type Display = native::Display; type DisplayMode = native::DisplayMode; + + type AccelerationStructure = native::AccelerationStructure; } diff --git a/src/backend/vulkan/src/native.rs b/src/backend/vulkan/src/native.rs index e2520a6b8b7..040910d30a0 100644 --- a/src/backend/vulkan/src/native.rs +++ b/src/backend/vulkan/src/native.rs @@ -25,6 +25,9 @@ pub struct GraphicsPipeline(pub vk::Pipeline); #[derive(Debug, Hash)] pub struct ComputePipeline(pub vk::Pipeline); +#[derive(Debug, Hash)] +pub struct RayTracingPipeline(pub vk::Pipeline); + #[derive(Debug, Hash)] pub struct Memory { pub(crate) raw: vk::DeviceMemory, @@ -240,3 +243,6 @@ pub struct Display(pub vk::DisplayKHR); #[derive(Debug, Hash)] pub struct DisplayMode(pub vk::DisplayModeKHR); + +#[derive(Debug, Hash)] +pub struct AccelerationStructure(pub vk::AccelerationStructureKHR); diff --git a/src/backend/vulkan/src/physical_device.rs b/src/backend/vulkan/src/physical_device.rs index 51f7694a9a2..082e1b32d9a 100644 --- a/src/backend/vulkan/src/physical_device.rs +++ b/src/backend/vulkan/src/physical_device.rs @@ -1,5 +1,8 @@ use ash::{ - extensions::{khr::DrawIndirectCount, khr::Swapchain, nv::MeshShader}, + extensions::{ + khr::AccelerationStructure, khr::DeferredHostOperations, khr::DrawIndirectCount, + khr::RayTracingPipeline, khr::Swapchain, nv::MeshShader, + }, version::{DeviceV1_0, InstanceV1_0}, vk, }; @@ -28,6 +31,10 @@ pub struct PhysicalDeviceFeatures { descriptor_indexing: Option, mesh_shader: Option, imageless_framebuffer: Option, + buffer_device_address: Option, + acceleration_structure: Option, + ray_query: Option, + ray_tracing_pipeline: Option, } // This is safe because the structs have `p_next: *mut c_void`, which we null out/never read. @@ -54,6 +61,18 @@ impl PhysicalDeviceFeatures { if let Some(ref mut feature) = self.imageless_framebuffer { info = info.push_next(feature); } + if let Some(ref mut feature) = self.buffer_device_address { + info = info.push_next(feature); + } + if let Some(ref mut feature) = self.acceleration_structure { + info = info.push_next(feature); + } + if let Some(ref mut feature) = self.ray_query { + info = info.push_next(feature); + } + if let Some(ref mut feature) = self.ray_tracing_pipeline { + info = info.push_next(feature); + } info } @@ -184,6 +203,7 @@ impl PhysicalDeviceFeatures { ) .sampler_filter_minmax(features.contains(Features::SAMPLER_REDUCTION)) .imageless_framebuffer(supports_vulkan12_imageless_framebuffer) + .buffer_device_address(true) // TODO, either this or the extension .build(), ) } else { @@ -235,6 +255,51 @@ impl PhysicalDeviceFeatures { } else { None }, + // TODO add bit vector for dependencies? like "wants buffer device address", etc? + buffer_device_address: if enabled_extensions + .contains(&vk::KhrBufferDeviceAddressFn::name()) + { + Some( + vk::PhysicalDeviceBufferDeviceAddressFeaturesKHR::builder() + .buffer_device_address(true) + .build(), + ) + } else { + None + }, + acceleration_structure: if enabled_extensions.contains(&AccelerationStructure::name()) { + Some( + vk::PhysicalDeviceAccelerationStructureFeaturesKHR::builder() + .acceleration_structure(features.contains(Features::ACCELERATION_STRUCTURE)) + .acceleration_structure_indirect_build( + features.contains(Features::ACCELERATION_STRUCTURE_INDIRECT_BUILD), + ) + .build(), + ) + } else { + None + }, + ray_query: if enabled_extensions.contains(&vk::KhrRayQueryFn::name()) { + Some( + vk::PhysicalDeviceRayQueryFeaturesKHR::builder() + .ray_query(true) + .build(), + ) + } else { + None + }, + ray_tracing_pipeline: if enabled_extensions.contains(&RayTracingPipeline::name()) { + Some( + vk::PhysicalDeviceRayTracingPipelineFeaturesKHR::builder() + .ray_tracing_pipeline(true) + .ray_tracing_pipeline_trace_rays_indirect( + features.contains(Features::TRACE_RAYS_INDIRECT), + ) + .build(), + ) + } else { + None + }, } } @@ -493,6 +558,34 @@ impl PhysicalDeviceFeatures { } } + if let Some(acceleration_structure) = self.acceleration_structure { + if acceleration_structure.acceleration_structure == vk::TRUE { + bits |= Features::ACCELERATION_STRUCTURE; + } + if acceleration_structure.acceleration_structure_indirect_build == vk::TRUE { + bits |= Features::ACCELERATION_STRUCTURE_INDIRECT_BUILD; + } + } + + if let Some(ray_query) = self.ray_query { + if ray_query.ray_query == vk::TRUE { + bits |= Features::RAY_QUERY; + } + } + + if let Some(ray_tracing_pipeline) = self.ray_tracing_pipeline { + if ray_tracing_pipeline.ray_tracing_pipeline == vk::TRUE { + bits |= Features::RAY_TRACING_PIPELINE; + } + if ray_tracing_pipeline.ray_tracing_pipeline_trace_rays_indirect == vk::TRUE { + bits |= Features::TRACE_RAYS_INDIRECT; + } + } + + if let Some(buffer_device_address) = self.buffer_device_address { + // TODO there's not hal feature for this + } + bits } } @@ -582,10 +675,43 @@ impl PhysicalDeviceInfo { requested_extensions.push(vk::KhrGetDisplayProperties2Fn::name()); // TODO NOT NEEDED, RIGHT? } - if self.supports_extension(vk::ExtDisplayControlFn::name()){ + if self.supports_extension(vk::ExtDisplayControlFn::name()) { requested_extensions.push(vk::ExtDisplayControlFn::name()); } + if requested_features.intersects(Features::ACCELERATION_STRUCTURE_MASK) { + requested_extensions.push(AccelerationStructure::name()); + + if self.api_version() < Version::V1_2 { + requested_extensions.push(vk::ExtDescriptorIndexingFn::name()); + // `VK_KHR_acceleration_structure` requires 1.1, which means we don't have to request `VK_KHR_maintenance3`. + requested_extensions.push(vk::KhrBufferDeviceAddressFn::name()); + } + requested_extensions.push(DeferredHostOperations::name()); + } + + if requested_features.contains(Features::RAY_QUERY) { + requested_extensions.push(vk::KhrRayQueryFn::name()); + + if self.api_version() < Version::V1_2 { + requested_extensions.push(vk::KhrSpirv14Fn::name()); + requested_extensions.push(vk::KhrShaderFloatControlsFn::name()); + } + } + + if requested_features.intersects( + Features::RAY_TRACING_PIPELINE + | Features::TRACE_RAYS_INDIRECT + | Features::RAY_TRAVERSAL_PRIMITIVE_CULLING, + ) { + requested_extensions.push(RayTracingPipeline::name()); + + if self.api_version() < Version::V1_2 { + requested_extensions.push(vk::KhrSpirv14Fn::name()); + requested_extensions.push(vk::KhrShaderFloatControlsFn::name()); + } + } + requested_extensions } @@ -644,6 +770,37 @@ impl PhysicalDeviceInfo { mut_ref.p_next = mem::replace(&mut features2.p_next, mut_ref as *mut _ as *mut _); } + if device_properties.supports_extension(vk::KhrBufferDeviceAddressFn::name()) { + features.buffer_device_address = + Some(vk::PhysicalDeviceBufferDeviceAddressFeaturesKHR::builder().build()); + + let mut_ref = features.buffer_device_address.as_mut().unwrap(); + mut_ref.p_next = mem::replace(&mut features2.p_next, mut_ref as *mut _ as *mut _); + } + + if device_properties.supports_extension(AccelerationStructure::name()) { + features.acceleration_structure = + Some(vk::PhysicalDeviceAccelerationStructureFeaturesKHR::builder().build()); + + let mut_ref = features.acceleration_structure.as_mut().unwrap(); + mut_ref.p_next = mem::replace(&mut features2.p_next, mut_ref as *mut _ as *mut _); + } + + if device_properties.supports_extension(vk::KhrRayQueryFn::name()) { + features.ray_query = Some(vk::PhysicalDeviceRayQueryFeaturesKHR::builder().build()); + + let mut_ref = features.ray_query.as_mut().unwrap(); + mut_ref.p_next = mem::replace(&mut features2.p_next, mut_ref as *mut _ as *mut _); + } + + if device_properties.supports_extension(RayTracingPipeline::name()) { + features.ray_tracing_pipeline = + Some(vk::PhysicalDeviceRayTracingPipelineFeaturesKHR::builder().build()); + + let mut_ref = features.ray_tracing_pipeline.as_mut().unwrap(); + mut_ref.p_next = mem::replace(&mut features2.p_next, mut_ref as *mut _ as *mut _); + } + unsafe { get_device_properties .get_physical_device_features2_khr(device, &mut features2 as *mut _); @@ -662,11 +819,17 @@ impl PhysicalDeviceInfo { } } + // Null out all of the `pNext` fields in `features` to prevent any accidental unsafe derefs later on. + // We need to do this because `features` has internal pointers to itself, but we're moving it out of this function. unsafe { null_p_next(&mut features.vulkan_1_2); null_p_next(&mut features.descriptor_indexing); null_p_next(&mut features.mesh_shader); null_p_next(&mut features.imageless_framebuffer); + null_p_next(&mut features.buffer_device_address); + null_p_next(&mut features.acceleration_structure); + null_p_next(&mut features.ray_query); + null_p_next(&mut features.ray_tracing_pipeline); } (device_properties, features) @@ -755,15 +918,49 @@ impl PhysicalDevice { }; let display_control = if enabled_extensions.contains(&vk::ExtDisplayControlFn::name()) { - Some(vk::ExtDisplayControlFn::load( - |name| { - std::mem::transmute( - self.instance - .inner - .get_device_proc_addr(device_raw.handle(), name.as_ptr()), - ) - }, - )) + Some(vk::ExtDisplayControlFn::load(|name| { + std::mem::transmute( + self.instance + .inner + .get_device_proc_addr(device_raw.handle(), name.as_ptr()), + ) + })) + } else { + None + }; + + let buffer_device_address_fn = + if enabled_extensions.contains(&vk::KhrBufferDeviceAddressFn::name()) { + Some(ExtensionFn::Extension(vk::KhrBufferDeviceAddressFn::load( + |name| { + mem::transmute( + self.instance + .inner + .get_device_proc_addr(device_raw.handle(), name.as_ptr()), + ) + }, + ))) + } else if self.device_info.api_version() >= Version::V1_2 { + Some(ExtensionFn::Promoted) + } else { + None + }; + + let acceleration_structure_fn = + if enabled_extensions.contains(&AccelerationStructure::name()) { + Some(ExtensionFn::Extension(AccelerationStructure::new( + &self.instance.inner, + &device_raw, + ))) + } else { + None + }; + + let ray_tracing_pipeline_fn = if enabled_extensions.contains(&RayTracingPipeline::name()) { + Some(ExtensionFn::Extension(RayTracingPipeline::new( + &self.instance.inner, + &device_raw, + ))) } else { None }; @@ -806,6 +1003,9 @@ impl PhysicalDevice { mesh_shaders: mesh_fn, draw_indirect_count: indirect_count_fn, display_control, + buffer_device_address: buffer_device_address_fn, + acceleration_structure: acceleration_structure_fn, + ray_tracing_pipeline: ray_tracing_pipeline_fn, }, flip_y_requires_shift: self.device_info.api_version() >= Version::V1_1 || self @@ -1273,6 +1473,9 @@ impl adapter::PhysicalDevice for PhysicalDevice { let mut descriptor_indexing_capabilities = hal::DescriptorIndexingProperties::default(); let mut mesh_shader_capabilities = hal::MeshShaderProperties::default(); let mut sampler_reduction_capabilities = hal::SamplerReductionProperties::default(); + let mut acceleration_structure_capabilities = + hal::AccelerationStructureProperties::default(); + let mut ray_tracing_pipeline_capabilities = hal::RayTracingPipelineProperties::default(); if let Some(get_physical_device_properties) = self.instance.get_physical_device_properties.as_ref() @@ -1282,6 +1485,10 @@ impl adapter::PhysicalDevice for PhysicalDevice { let mut mesh_shader_properties = vk::PhysicalDeviceMeshShaderPropertiesNV::builder(); let mut sampler_reduction_properties = vk::PhysicalDeviceSamplerFilterMinmaxProperties::builder(); + let mut acceleration_structure_properties = + vk::PhysicalDeviceAccelerationStructurePropertiesKHR::builder(); + let mut ray_tracing_pipeline_properties = + vk::PhysicalDeviceRayTracingPipelinePropertiesKHR::builder(); unsafe { get_physical_device_properties.get_physical_device_properties2_khr( @@ -1290,6 +1497,8 @@ impl adapter::PhysicalDevice for PhysicalDevice { .push_next(&mut descriptor_indexing_properties) .push_next(&mut mesh_shader_properties) .push_next(&mut sampler_reduction_properties) + .push_next(&mut acceleration_structure_properties) + .push_next(&mut ray_tracing_pipeline_properties) .build() as *mut _, ); } @@ -1348,6 +1557,36 @@ impl adapter::PhysicalDevice for PhysicalDevice { .filter_minmax_image_component_mapping == vk::TRUE, }; + + acceleration_structure_capabilities = hal::AccelerationStructureProperties { + max_acceleration_structure_bottom_level_geometry_count: + acceleration_structure_properties.max_geometry_count, + max_acceleration_structure_top_level_instance_count: + acceleration_structure_properties.max_instance_count, + max_acceleration_structure_bottom_level_total_primitive_count: + acceleration_structure_properties.max_primitive_count, + max_per_stage_descriptor_acceleration_structures: acceleration_structure_properties + .max_per_stage_descriptor_acceleration_structures, + max_descriptor_set_acceleration_structures: acceleration_structure_properties + .max_descriptor_set_acceleration_structures, + min_acceleration_structure_scratch_offset_alignment: + acceleration_structure_properties + .min_acceleration_structure_scratch_offset_alignment, + }; + + ray_tracing_pipeline_capabilities = hal::RayTracingPipelineProperties { + shader_group_handle_size: ray_tracing_pipeline_properties.shader_group_handle_size, + max_ray_recursion_depth: ray_tracing_pipeline_properties.max_ray_recursion_depth, + max_shader_group_stride: ray_tracing_pipeline_properties.max_shader_group_stride, + shader_group_base_alignment: ray_tracing_pipeline_properties + .shader_group_base_alignment, + max_ray_dispatch_invocation_count: ray_tracing_pipeline_properties + .max_ray_dispatch_invocation_count, + shader_group_handle_alignment: ray_tracing_pipeline_properties + .shader_group_handle_alignment, + max_ray_hit_attribute_size: ray_tracing_pipeline_properties + .max_ray_hit_attribute_size, + }; } PhysicalDeviceProperties { @@ -1355,6 +1594,8 @@ impl adapter::PhysicalDevice for PhysicalDevice { descriptor_indexing: descriptor_indexing_capabilities, mesh_shader: mesh_shader_capabilities, sampler_reduction: sampler_reduction_capabilities, + acceleration_structure: acceleration_structure_capabilities, + ray_tracing_pipeline: ray_tracing_pipeline_capabilities, performance_caveats: Default::default(), dynamic_pipeline_states: DynamicStates::all(), downlevel: DownlevelProperties::all_enabled(), @@ -1415,9 +1656,7 @@ impl adapter::PhysicalDevice for PhysicalDevice { true } - unsafe fn enumerate_displays( - &self, - ) -> Vec> { + unsafe fn enumerate_displays(&self) -> Vec> { let display_extension = match self.instance.display { Some(ref display_extension) => display_extension, None => { @@ -1426,23 +1665,31 @@ impl adapter::PhysicalDevice for PhysicalDevice { } }; - let display_properties = match display_extension - .get_physical_device_display_properties(self.handle) - { - Ok(display_properties) => display_properties, - Err(err)=>{ - match err { - vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => - error!("Error returned on `get_physical_device_display_properties`: {:#?}",err), - err=>error!("Unexpected error on `get_physical_device_display_properties`: {:#?}",err) + let display_properties = + match display_extension.get_physical_device_display_properties(self.handle) { + Ok(display_properties) => display_properties, + Err(err) => { + match err { + vk::Result::ERROR_OUT_OF_HOST_MEMORY + | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => error!( + "Error returned on `get_physical_device_display_properties`: {:#?}", + err + ), + err => error!( + "Unexpected error on `get_physical_device_display_properties`: {:#?}", + err + ), + } + return Vec::new(); } - return Vec::new(); - } - }; + }; let mut displays = Vec::new(); for display_property in display_properties { - let supported_transforms = hal::display::SurfaceTransformFlags::from_bits(display_property.supported_transforms.as_raw()).unwrap(); + let supported_transforms = hal::display::SurfaceTransformFlags::from_bits( + display_property.supported_transforms.as_raw(), + ) + .unwrap(); let display_name = if display_property.display_name.is_null() { None } else { @@ -1475,11 +1722,17 @@ impl adapter::PhysicalDevice for PhysicalDevice { .get_display_mode_properties(self.handle, display_property.display) { Ok(display_modes) => display_modes, - Err(err)=>{ + Err(err) => { match err { - vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => - error!("Error returned on `get_display_mode_properties`: {:#?}",err), - err=>error!("Unexpected error on `get_display_mode_properties`: {:#?}",err) + vk::Result::ERROR_OUT_OF_HOST_MEMORY + | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => error!( + "Error returned on `get_display_mode_properties`: {:#?}", + err + ), + err => error!( + "Unexpected error on `get_display_mode_properties`: {:#?}", + err + ), } return Vec::new(); } @@ -1526,7 +1779,7 @@ impl adapter::PhysicalDevice for PhysicalDevice { .get_display_plane_supported_displays(self.handle, index as u32) { Ok(compatible_displays) => compatible_displays, - Err(err)=>{ + Err(err) => { match err { vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => error!("Error returned on `get_display_plane_supported_displays`: {:#?}",err), @@ -1544,11 +1797,17 @@ impl adapter::PhysicalDevice for PhysicalDevice { } planes } - Err(err)=>{ + Err(err) => { match err { - vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => - error!("Error returned on `get_physical_device_display_plane_properties`: {:#?}",err), - err=>error!("Unexpected error on `get_physical_device_display_plane_properties`: {:#?}",err) + vk::Result::ERROR_OUT_OF_HOST_MEMORY + | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => error!( + "Error returned on `get_physical_device_display_plane_properties`: {:#?}", + err + ), + err => error!( + "Unexpected error on `get_physical_device_display_plane_properties`: {:#?}", + err + ), } Vec::new() } @@ -1648,49 +1907,50 @@ impl adapter::PhysicalDevice for PhysicalDevice { start: ( display_plane_capabilities.min_src_position.x, display_plane_capabilities.min_src_position.x, - ) + ) .into(), end: ( display_plane_capabilities.max_src_position.x, display_plane_capabilities.max_src_position.x, - ).into() + ) + .into(), }, src_extent: std::ops::Range { start: ( display_plane_capabilities.min_src_extent.width, display_plane_capabilities.min_src_extent.height, ) - .into(), + .into(), end: ( display_plane_capabilities.max_src_extent.width, display_plane_capabilities.max_src_extent.height, ) - .into(), + .into(), }, dst_position: std::ops::Range { start: ( display_plane_capabilities.min_dst_position.x, display_plane_capabilities.min_dst_position.x, ) - .into(), + .into(), end: ( display_plane_capabilities.max_dst_position.x, display_plane_capabilities.max_dst_position.x, ) - .into(), + .into(), }, dst_extent: std::ops::Range { start: ( display_plane_capabilities.min_dst_extent.width, display_plane_capabilities.min_dst_extent.height, ) - .into(), + .into(), end: ( display_plane_capabilities.max_dst_extent.width, display_plane_capabilities.max_dst_extent.height, ) - .into(), - } + .into(), + }, }) } } diff --git a/src/hal/src/acceleration_structure.rs b/src/hal/src/acceleration_structure.rs new file mode 100644 index 00000000000..92ee99571a3 --- /dev/null +++ b/src/hal/src/acceleration_structure.rs @@ -0,0 +1,608 @@ +//! Types to describe and handle acceleration structures. + +use crate::{ + buffer::{Offset, Stride}, + format::Format, + Backend, IndexType, +}; + +/// Denotes the type of acceleration structure. +#[derive(Debug, Copy, Clone)] +pub enum Type { + /// A top-level acceleration structure containing [`GeometryData::Instances`] pointing to bottom-level acceleration structures. + TopLevel, + /// A bottom-level acceleration structure containing [`GeometryData::Triangles`] or [`GeometryData::Aabbs`]. + BottomLevel, + /// An acceleration structure whose type is not known until build time. [`Self::TopLevel`] and [`Self::BottomLevel`] should be preferred over [`Self::Generic`]. + /// + /// This is not valid during any of the acceleration structure build commands. + Generic, +} + +/// A description of the data needed to create an acceleration structure. +#[derive(Debug)] +pub struct CreateDesc<'a, B: Backend> { + /// The buffer to store the acceleration structure in. + pub buffer: &'a B::Buffer, + + /// The offset into `buffer` where the acceleration structure will be written. Must be a multiple of 256. + pub buffer_offset: Offset, + + /// The size required for the acceleration structure. + pub size: u64, + + /// The type of acceleration structure to build. + pub ty: Type, + // TODO(capture-replay) + // /// currently only has `accelerationStructureCaptureReplay` + // create_flags: VkAccelerationStructureCreateFlagsKHR, + // /// used for `accelerationStructureCaptureReplay` + // device_address: VkDeviceAddress, +} + +/// A description of the data needed to build or update an acceleration structure with geometry data. +#[derive(Debug)] +pub struct BuildDesc<'a, B: Backend> { + /// The original acceleration structure to base an update from. + /// + /// If `Some`, implies that we will do an update from `src` rather than a build from scratch. + pub src: Option<&'a B::AccelerationStructure>, + + /// The acceleration structure to be built or updated. + pub dst: &'a B::AccelerationStructure, + + /// The geometry data that will be written into this acceleration structure. + pub geometry: &'a GeometryDesc<'a, B>, + + // TODO(cpu-repr) + /// The buffer containing scratch space used to construct a acceleration structure. + pub scratch: &'a B::Buffer, + /// The offset into `scratch` which should be used for the scratch data. + pub scratch_offset: Offset, +} + +bitflags! { + /// Option flags for acceleration structure builds. + pub struct Flags: u32 { + /// The acceleration structure can be updated during builds. + const ALLOW_UPDATE = 0x1; + /// The acceleration structure can be compacted during copies with [`CopyMode::Compact`]. + const ALLOW_COMPACTION = 0x2; + /// The acceleration structure build should prioritize trace performance over build time. + const PREFER_FAST_TRACE = 0x4; + /// The acceleration structure build should prioritize trace build time over performance. + const PREFER_FAST_BUILD = 0x8; + /// The acceleration structure build should minimize scratch memory usage and final build size, potentially at the cost of build time or performance. + const LOW_MEMORY = 0x10; + } +} + +/// A description of the geometry data needed to populate an acceleration structure. +#[derive(Debug)] +pub struct GeometryDesc<'a, B: Backend> { + /// Acceleration structure build flags. + pub flags: Flags, + + /// The type of acceleration structure to build. + pub ty: Type, + + /// List of geometries to be stored in an acceleration structure. + /// + /// All geometries in this list must have the same variant. + /// - For bottom-level structures, the geometries must be triangles or AABBs. + /// - For top-level structures, the geometries must be instances. + pub geometries: &'a [&'a Geometry<'a, B>], +} + +bitflags! { + /// Option flags for various acceleration structure geometry settings. + pub struct GeometryFlags: u32 { + /// This geometry will not invoke the any-hit shaders, even if present in a hit group. + const OPAQUE = 0x1; + /// The any-hit shader will only be called once per primitive in this geometry. + const NO_DUPLICATE_ANY_HIT_INVOCATION = 0x2; + } +} + +/// Geometry data that can be used in an acceleration structure. +#[derive(Debug)] +pub struct Geometry<'a, B: Backend> { + /// Flags to describe how this geometry will be intersected. + pub flags: GeometryFlags, + + /// The data contained in this geometry. + pub geometry: GeometryData<'a, B>, +} + +/// TODO docs +#[derive(Debug)] +pub enum GeometryData<'a, B: Backend> { + /// TODO docs + Triangles(GeometryTriangles<'a, B>), + /// TODO docs + Aabbs(GeometryAabbs<'a, B>), + /// TODO docs + Instances(GeometryInstances<'a, B>), +} + +/// Geometry data containing triangle data. +#[derive(Debug)] +pub struct GeometryTriangles<'a, B: Backend> { + // TODO: VK could support more by querying `VK_FORMAT_FEATURE_ACCELERATION_STRUCTURE_VERTEX_BUFFER_BIT_KHR`, DX12 is not queryable? Note [the DX12 ray tracing spec](https://microsoft.github.io/DirectX-Specs/d3d/Raytracing.html#d3d12_raytracing_geometry_triangles_desc) says it supports more than [the Win32 docs](https://docs.microsoft.com/en-us/windows/win32/api/d3d12/ns-d3d12-d3d12_raytracing_geometry_triangles_desc). + /// The format of the vertex data in `vertex_buffer`. + /// + /// At least the following formats are supported: + /// - `(R32_G32, Float)`: The Z component is implied to be 0. + /// - `(R32_G32_B32, Float)` + /// - `(R16_G16, Float)`: The Z component is implied to be 0. + /// - `(R16_G16_B16_A16, Float)`: The A component is ignored. + /// - `(R16_G16, Inorm)`: The Z component is implied to be 0. + /// - `(R16_G16_B16_A16, Inorm)`: The A component is ignored. + pub vertex_format: Format, + + // TODO(cpu-repr) + /// The buffer containing the vertex data. + pub vertex_buffer: &'a B::Buffer, + /// The offset into `vertex_buffer` pointing to the start of the vertex data. + pub vertex_buffer_offset: Offset, + /// The space between vertices in `vertex_buffer`. + pub vertex_buffer_stride: Stride, + + /// The index of the last vertex addressed by a build command using this geometry. + pub max_vertex: Offset, + + // TODO(cpu-repr) + /// The buffer and offset containing the index data and the type of the indices. + pub index_buffer: Option<(&'a B::Buffer, Offset, IndexType)>, + + /// TODO(cpu-repr) + /// The buffer and offset containing a list of transform data. + /// + /// The buffer must contain a list of `TransformMatrix`. + pub transform: Option<(&'a B::Buffer, Offset)>, +} + +/// A 3x4 row-major affine transformation matrix. +#[derive(Debug, Copy, Clone)] +#[repr(transparent)] +pub struct TransformMatrix([[f32; 4]; 3]); + +impl TransformMatrix { + /// The identity transform. + pub fn identity() -> Self { + Self([ + [1.0, 0.0, 0.0, 0.0], + [0.0, 1.0, 0.0, 0.0], + [0.0, 0.0, 1.0, 0.0], + ]) + } +} + +/// Geometry data containing axis-aligned bounding box data. +#[derive(Debug)] +pub struct GeometryAabbs<'a, B: Backend> { + // TODO(cpu-repr) + /// The buffer containing the AABB data. + /// + /// The buffer must contain a list of `AabbPositions`. + pub buffer: &'a B::Buffer, + + /// The offset into `buffer`. + pub buffer_offset: Offset, + + /// The stride of the AABB data in `buffer`. + pub buffer_stride: Stride, +} + +/// An axis-aligned bounding box. +#[derive(Debug, Copy, Clone)] +#[repr(C)] +pub struct AabbPositions { + /// A 3D position containing the minimum corner of the AABB. + pub min: [f32; 3], + + /// A 3D position containing the maximum corner of the AABB. + pub max: [f32; 3], +} + +/// Geometry data containing instance data. +#[derive(Debug)] +pub struct GeometryInstances<'a, B: Backend> { + // TODO this struct also allows passing an array of pointers, idk if that makes sense outside the host operations case + // TODO(cpu-repr) + /// The buffer containing the instance data. + /// + /// The buffer must contain a list of `Instance`. + pub buffer: &'a B::Buffer, + + /// The offset into `buffer`. + pub buffer_offset: Offset, +} + +bitflags! { + /// Option flags for an acceleration structure instance. + pub struct InstanceFlags: u8 { + /// Disables face culling for this instance. + const TRIANGLE_FACING_CULL_DISABLE = 0x1; + /// Reverses front and back sides of geometry's triangles. + /// + /// Note the winding direction is calculated in object space, is not affected by instance transforms. + const TRIANGLE_FRONT_COUNTERCLOCKWISE = 0x2; + /// Override the `GeometryFlags` bottom-level acceleration structures to act as if `GeometryFlags::OPAQUE` was set. + /// + /// This flag can be overridden by the ray flags (TODO reference which flags when they are added) + const FORCE_OPAQUE = 0x4; + /// Override the `GeometryFlags` bottom-level acceleration structures to act as if `GeometryFlags::OPAQUE` was not set. + /// + /// This flag can be overridden by the ray flags (TODO reference which flags when they are added) + const FORCE_NO_OPAQUE = 0x8; + } +} + +/// The device address for an acceleration structure. +/// +/// This is only used to refer to bottom-level acceleration structure in [`Instances`] written to device buffers by the user and referenced by [`GeometryInstances`] to create top-level acceleration structures. +/// +/// Note: The inner value is `pub` to allow for backend implementations, but should be otherwise treated as opaque. +#[derive(Copy, Clone)] +#[repr(transparent)] +pub struct DeviceAddress(pub u64); + +impl std::fmt::Debug for DeviceAddress { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + struct DebugAsHex(u64); + + impl std::fmt::Debug for DebugAsHex { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::LowerHex::fmt(&self.0, f) + } + } + + f.debug_tuple("DeviceAddress") + .field(&DebugAsHex(self.0)) + .finish() + } +} + +impl std::fmt::Pointer for DeviceAddress { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::LowerHex::fmt(&self.0, f) + } +} + +/// An instance pointing to some bottom-level acceleration structure data. +/// +/// Note: there are fields that are combined because driver APIs require this struct to have a specific layout and to be written, tightly packed, into a GPU buffer to be consumed. Consider using the helper methods on this type to assign to those fields. +#[derive(Clone)] +#[repr(C)] +pub struct Instance { + /// The instance transform matrix that should be applied to the referenced acceleration structure. + pub transform: TransformMatrix, + + /// Combined instance custom index and mask into a single field. + /// - Top 24 bits are the custom index + /// - Bottom 8 bits are the visibility mask for the geometry. The instance may only be hit if rayMask & instance.mask != 0 + pub instance_custom_index_24_and_mask_8: u32, + + /// Combined instance shader binding table record offset and flags into a single field. + /// - Top 24 bits are the SBT record offset + /// - Bottom 8 bits are `InstanceFlags` + pub instance_shader_binding_table_record_offset_24_and_flags_8: u32, + + /// The bottom-level acceleration structure this `Instance` refers to. + pub acceleration_structure_reference: DeviceAddress, +} + +impl std::fmt::Debug for Instance { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Instance") + .field("transform", &self.transform) + .field("instance_custom_index", &self.instance_custom_index()) + .field("mask", &self.mask()) + .field( + "instance_shader_binding_table_record_offset", + &self.instance_shader_binding_table_record_offset(), + ) + .field("flags", &self.flags()) + .field( + "acceleration_structure_reference", + &self.acceleration_structure_reference, + ) + .finish() + } +} + +impl Instance { + /// Create an reference to a bottom-level acceleration structure. + pub fn new(blas: DeviceAddress) -> Self { + Self { + transform: TransformMatrix::identity(), + instance_custom_index_24_and_mask_8: 0, + instance_shader_binding_table_record_offset_24_and_flags_8: 0, + acceleration_structure_reference: blas, + } + } + + const TOP_24_MASK: u32 = 0xFFFFFF00; + const BOTTOM_8_MASK: u32 = 0xFF; + + fn fits_in_24_bits(n: u32) -> bool { + n < 1 << 24 + } + + fn get_top_24_bits(n: u32) -> u32 { + (n & Self::TOP_24_MASK) >> 8 + } + + fn get_bottom_8_bits(n: u32) -> u8 { + (n & Self::BOTTOM_8_MASK) as u8 + } + + fn replace_bits(destination: u32, new_bits: u32, new_bits_mask: u32) -> u32 { + destination ^ ((destination ^ new_bits) & new_bits_mask) + } + + /// Get the instance custom index portion of `self.instance_custom_index_24_and_mask_8`. + pub fn instance_custom_index(&self) -> u32 { + Self::get_top_24_bits(self.instance_custom_index_24_and_mask_8) + } + + /// Set the instance custom index portion of `self.instance_custom_index_24_and_mask_8`. + pub fn set_instance_custom_index(&mut self, instance_custom_index: u32) { + assert!(Self::fits_in_24_bits(instance_custom_index)); + self.instance_custom_index_24_and_mask_8 = Self::replace_bits( + self.instance_custom_index_24_and_mask_8, + instance_custom_index << 8, + Self::TOP_24_MASK, + ); + } + + /// Get the mask portion of `self.instance_custom_index_24_and_mask_8`. + pub fn mask(&self) -> u8 { + Self::get_bottom_8_bits(self.instance_custom_index_24_and_mask_8) + } + + /// Set the mask portion of `self.instance_custom_index_24_and_mask_8`. + pub fn set_mask(&mut self, mask: u8) { + self.instance_custom_index_24_and_mask_8 = Self::replace_bits( + self.instance_custom_index_24_and_mask_8, + mask as u32, + Self::BOTTOM_8_MASK, + ); + } + + /// Get the instance shader binding table record offset portion of `self.instance_shader_binding_table_record_offset_24_and_flags_8`. + pub fn instance_shader_binding_table_record_offset(&self) -> u32 { + Self::get_top_24_bits(self.instance_shader_binding_table_record_offset_24_and_flags_8) + } + + /// Set the instance shader binding table record offset portion of `self.instance_shader_binding_table_record_offset_24_and_flags_8`. + pub fn set_instance_shader_binding_table_record_offset( + &mut self, + instance_shader_binding_table_record_offset: u32, + ) { + assert!(Self::fits_in_24_bits( + instance_shader_binding_table_record_offset + )); + self.instance_shader_binding_table_record_offset_24_and_flags_8 = Self::replace_bits( + self.instance_shader_binding_table_record_offset_24_and_flags_8, + instance_shader_binding_table_record_offset << 8, + Self::TOP_24_MASK, + ); + } + + /// Get the flags portion of `self.instance_shader_binding_table_record_offset_24_and_flags_8`. + pub fn set_flags(&mut self, flags: InstanceFlags) { + self.instance_shader_binding_table_record_offset_24_and_flags_8 = Self::replace_bits( + self.instance_shader_binding_table_record_offset_24_and_flags_8, + flags.bits() as u32, + Self::BOTTOM_8_MASK, + ); + } + + /// Set the flags portion of `self.instance_shader_binding_table_record_offset_24_and_flags_8`. + /// + /// If the flags value is not valid (i.e. if it were set directly), returns `Err` with the raw bits. + pub fn flags(&self) -> Result { + let bits = Self::get_bottom_8_bits( + self.instance_shader_binding_table_record_offset_24_and_flags_8, + ); + InstanceFlags::from_bits(bits).ok_or(bits) + } +} + +#[cfg(test)] +mod instance_tests { + use super::*; + + #[test] + fn debug_fmt() { + let mut instance = Instance::new(DeviceAddress(12)); + instance.set_instance_custom_index(2); + instance.set_mask(3); + instance.set_instance_shader_binding_table_record_offset(4); + instance.set_flags(InstanceFlags::FORCE_OPAQUE); + + assert_eq!(format!("{:?}", instance), "Instance { transform: TransformMatrix([[1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0]]), instance_custom_index: 2, mask: 3, instance_shader_binding_table_record_offset: 4, flags: Ok(FORCE_OPAQUE), acceleration_structure_reference: DeviceAddress(c) }"); + + assert_eq!( + format!("{:#?}", instance), + r"Instance { + transform: TransformMatrix( + [ + [ + 1.0, + 0.0, + 0.0, + 0.0, + ], + [ + 0.0, + 1.0, + 0.0, + 0.0, + ], + [ + 0.0, + 0.0, + 1.0, + 0.0, + ], + ], + ), + instance_custom_index: 2, + mask: 3, + instance_shader_binding_table_record_offset: 4, + flags: Ok( + FORCE_OPAQUE, + ), + acceleration_structure_reference: DeviceAddress( + 0xc, + ), +}" + ); + } + + #[test] + fn simple() { + let mut instance = Instance::new(DeviceAddress(1)); + instance.set_instance_custom_index(2); + instance.set_mask(3); + instance.set_instance_shader_binding_table_record_offset(4); + instance.set_flags(InstanceFlags::FORCE_NO_OPAQUE); + + assert_eq!(instance.acceleration_structure_reference.0, 1); + assert_eq!(instance.instance_custom_index(), 2); + assert_eq!(instance.mask(), 3); + assert_eq!(instance.instance_shader_binding_table_record_offset(), 4); + assert_eq!(instance.flags(), Ok(InstanceFlags::FORCE_NO_OPAQUE)); + } + + #[test] + fn flags_getter() { + // Ensure that `0xFF` is not a valid value. If it is, `Instance.flags()` doesn't need to return `Result`. + assert!(InstanceFlags::from_bits(0xFF).is_none()); + + let mut instance = Instance::new(DeviceAddress(1)); + instance.instance_shader_binding_table_record_offset_24_and_flags_8 = 0xFF; + assert_eq!(instance.flags(), Err(0xFF)); + } + + const LARGEST_24_BIT_NUMBER: u32 = (1 << 24) - 1; + + #[test] + fn set_instance_custom_index_largest_value() { + let mut instance = Instance::new(DeviceAddress(1)); + instance.set_instance_custom_index(LARGEST_24_BIT_NUMBER); + assert_eq!(instance.instance_custom_index(), LARGEST_24_BIT_NUMBER); + } + + #[test] + #[should_panic] + fn set_instance_custom_index_panic_on_too_large() { + Instance::new(DeviceAddress(1)).set_instance_custom_index(LARGEST_24_BIT_NUMBER + 1); + } + + #[test] + fn set_instance_shader_binding_table_record_offset_largest_value() { + let mut instance = Instance::new(DeviceAddress(1)); + instance.set_instance_shader_binding_table_record_offset(LARGEST_24_BIT_NUMBER); + assert_eq!( + instance.instance_shader_binding_table_record_offset(), + LARGEST_24_BIT_NUMBER + ); + } + + #[test] + #[should_panic] + fn set_instance_shader_binding_table_record_offset_panic_on_too_large() { + Instance::new(DeviceAddress(1)) + .set_instance_shader_binding_table_record_offset(LARGEST_24_BIT_NUMBER + 1); + } +} + +/// The size requirements describing how big to make the buffers needed to create an acceleration structure. +#[derive(Debug, Copy, Clone)] +pub struct SizeRequirements { + /// The required size for the acceleration structure buffer. + pub acceleration_structure_size: u64, + /// The required size for the scratch buffer used in the build step if an incremental update was requested. + pub update_scratch_size: u64, + /// The required size for the scratch buffer used in the build step. + pub build_scratch_size: u64, +} + +/// Denotes how an acceleration structure should be copied. +#[derive(Debug, Copy, Clone)] +pub enum CopyMode { + /// Creates a copy of the source acceleration structure to the destination. Both must have been created with the same parameters. + Copy, + /// Creates a more compact version of the source acceleration structure into the destination. The destination acceleration structure must be at least large enough, as queried by `query::Type::AccelerationStructureCompactedSize`. + Compact, +} + +/// Indexes and offsets into a [`GeometryDesc`] from which an acceleration structure should be built. +#[derive(Debug)] +#[repr(C)] +pub struct BuildRangeDesc { + /// The number of primitives for the given acceleration structure. + /// + /// - For [`GeometryTriangles`], this refers to the number of triangles to be built. + /// - For [`GeometryAabbs`], this refers to the number of bounding boxes to be built. + /// - For [`GeometryInstances`], this refers to the number of instances to be built. + pub primitive_count: u32, + /// The offset in bytes into the memory where the primitives are defined. + /// + /// - For [`GeometryTriangles`] + /// - If indices are used, this must be a multiple of the index type size. + /// - If not, this must be a multiple of the component size of the vertex format. + /// - For [`GeometryAabbs`], this must be a multiple of 8. + /// - For [`GeometryInstances`], this must be a multiple of 16. + pub primitive_offset: u32, + /// The index of the first triangle to build from. + /// + /// Only used by [`GeometryTriangles`]. + pub first_vertex: u32, + /// The offset in bytes into the memory where the transform is defined from which a single transformation matrix will be read. It must be a multiple of 16. + /// + /// Only used by [`GeometryTriangles`]. + pub transform_offset: u32, +} + +/// Serialized acceleration structure compatibility. +#[derive(Debug)] +pub enum Compatibility { + /// The serialized acceleration structure is compatible with the current device. + Compatible, + /// The serialized acceleration structure is not compatible with the current device. + Incompatible, +} + +#[cfg(test)] +mod struct_size_tests { + use super::*; + + #[test] + fn transform_matrix() { + assert_eq!(std::mem::size_of::(), 48); + assert_eq!(std::mem::size_of::<[TransformMatrix; 2]>(), 96); + } + + #[test] + fn aabb_positions() { + assert_eq!(std::mem::size_of::(), 24); + assert_eq!(std::mem::size_of::<[AabbPositions; 2]>(), 48); + } + + #[test] + fn instance() { + assert_eq!(std::mem::size_of::(), 64); + assert_eq!(std::mem::size_of::<[Instance; 2]>(), 128); + } + + #[test] + fn build_range_desc() { + assert_eq!(std::mem::size_of::(), 16); + assert_eq!(std::mem::size_of::<[BuildRangeDesc; 2]>(), 32); + } +} diff --git a/src/hal/src/buffer.rs b/src/hal/src/buffer.rs index 14d670af45f..8dd90fab8fd 100644 --- a/src/hal/src/buffer.rs +++ b/src/hal/src/buffer.rs @@ -85,6 +85,14 @@ bitflags!( const VERTEX = 0x80; /// const INDIRECT = 0x100; + /// + const SHADER_DEVICE_ADDRESS = 0x20000; + /// + const ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY = 0x80000; + /// + const ACCELERATION_STRUCTURE_STORAGE = 0x100000; + /// + const SHADER_BINDING_TABLE = 0x400; } ); @@ -144,5 +152,9 @@ bitflags!( const MEMORY_READ = 0x8000; /// const MEMORY_WRITE = 0x10000; + /// + const ACCELERATION_STRUCTURE_READ = 0x200000; + /// + const ACCELERATION_STRUCTURE_WRITE = 0x400000; } ); diff --git a/src/hal/src/command/mod.rs b/src/hal/src/command/mod.rs index 81521b513e2..3a6d208b880 100644 --- a/src/hal/src/command/mod.rs +++ b/src/hal/src/command/mod.rs @@ -17,7 +17,7 @@ mod clear; mod structs; use crate::{ - buffer, + acceleration_structure, buffer, image::{Filter, Layout, SubresourceRange}, memory::{Barrier, Dependencies}, pass, pso, query, Backend, DrawCount, IndexCount, IndexType, InstanceCount, TaskCount, @@ -353,6 +353,25 @@ pub trait CommandBuffer: fmt::Debug + Any + Send + Sync { I: Iterator, J: Iterator; + /// TODO docs + unsafe fn bind_ray_tracing_pipeline(&mut self, _pipeline: &B::RayTracingPipeline) { + unimplemented!(); + } + + /// TODO docs + unsafe fn bind_ray_tracing_descriptor_sets<'a, I, J>( + &mut self, + _layout: &B::PipelineLayout, + _first_set: usize, + _sets: I, + _offsets: J, + ) where + I: Iterator, + J: Iterator, + { + unimplemented!(); + } + /// Execute a workgroup in the compute pipeline. `x`, `y` and `z` are the /// number of local workgroups to dispatch along each "axis"; a total of `x`*`y`*`z` /// local workgroups will be created. @@ -574,6 +593,103 @@ pub trait CommandBuffer: fmt::Debug + Any + Send + Sync { /// Requests a timestamp to be written. unsafe fn write_timestamp(&mut self, stage: pso::PipelineStage, query: query::Query); + /// Build an acceleration structure. + /// + /// `ranges` must contain a number of entries equal to the number of geometries described in `desc`. + unsafe fn build_acceleration_structure<'a>( + &self, + _desc: &'a acceleration_structure::BuildDesc<'a, B>, + _ranges: &'a [acceleration_structure::BuildRangeDesc], + ) { + unimplemented!() + } + + /// Functions identically to `build_acceleration_structure()`, except the parameters are read from the given buffer, starting at `offset` and increasing `stride` bytes for each geometry in `desc`. + /// + /// `max_primitive_counts` must contain a number of entries equal to the number of geometries described in `desc`. + unsafe fn build_acceleration_structure_indirect<'a>( + &self, + _desc: &'a acceleration_structure::BuildDesc<'a, B>, + _buffer: &'a B::Buffer, + _offset: buffer::Offset, + _stride: buffer::Stride, + _max_primitive_counts: &'a [u32], + ) { + unimplemented!() + } + + /// Copy an acceleration structure from `src` to `dst`. + unsafe fn copy_acceleration_structure( + &self, + _src: &B::AccelerationStructure, + _dst: &B::AccelerationStructure, + _mode: acceleration_structure::CopyMode, + ) { + unimplemented!() + } + + /// Serialize acceleration structure from `src` to `dst`. + unsafe fn serialize_acceleration_structure_to_memory( + &self, + _src: &B::AccelerationStructure, + _dst_buffer: &B::Buffer, + _dst_offset: buffer::Offset, + ) { + unimplemented!() + } + + /// Deserialize acceleration structure from `src` to `dst`. + unsafe fn deserialize_memory_to_acceleration_structure( + &self, + _src_buffer: &B::Buffer, + _src_offset: buffer::Offset, + _dst: &B::AccelerationStructure, + ) { + unimplemented!() + } + + /// Write some property `query_type` about `accel_structs` to `pool`. + unsafe fn write_acceleration_structures_properties( + &self, + _accel_structs: &[&B::AccelerationStructure], + _query_type: query::Type, + _pool: &B::QueryPool, + _first_query: u32, + ) { + unimplemented!() + } + + /// TODO docs + unsafe fn set_ray_tracing_pipeline_stack_size(&self, _pipeline_stack_size: u32) { + unimplemented!() + } + + /// TODO docs + unsafe fn trace_rays( + &self, + _raygen_shader_binding_table: Option>, + _miss_shader_binding_table: Option>, + _hit_shader_binding_table: Option>, + _callable_shader_binding_table: Option>, + _count: WorkGroupCount, + ) { + unimplemented!() + } + + /// TODO docs + /// `buffer` points to a `WorkGroupCount`. + unsafe fn trace_rays_indirect<'a>( + &self, + _raygen_shader_binding_table: Option>, + _miss_shader_binding_table: Option>, + _hit_shader_binding_table: Option>, + _callable_shader_binding_table: Option>, + _buffer: &'a B::Buffer, + _offset: buffer::Offset, + ) { + unimplemented!() + } + /// Modify constant data in a graphics pipeline. Push constants are intended to modify data in a /// pipeline more quickly than a updating the values inside a descriptor set. /// diff --git a/src/hal/src/device.rs b/src/hal/src/device.rs index eb2750a2bfc..10e4ba5ea7c 100644 --- a/src/hal/src/device.rs +++ b/src/hal/src/device.rs @@ -12,7 +12,7 @@ //! and is used to actually do things. use crate::{ - buffer, display, format, image, memory, + acceleration_structure, buffer, display, format, image, memory, memory::{Requirements, Segment}, pass, pool::CommandPoolCreateFlags, @@ -23,7 +23,7 @@ use crate::{ Backend, MemoryTypeId, }; -use std::{any::Any, fmt, iter, ops::Range}; +use std::{any::Any, fmt, iter, ops::Range, unimplemented}; /// Error occurred caused device to be lost. #[derive(Clone, Debug, PartialEq, thiserror::Error)] @@ -329,6 +329,22 @@ pub trait Device: fmt::Debug + Any + Send + Sync { /// which references the compute pipeline, has finished execution. unsafe fn destroy_compute_pipeline(&self, pipeline: B::ComputePipeline); + /// TODO docs + // TODO(capture-replay) can return VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS + // TODO(host-operations) deferredOperation + unsafe fn create_ray_tracing_pipeline<'a>( + &self, + _desc: &pso::RayTracingPipelineDesc<'a, B>, + _cache: Option<&B::PipelineCache>, + ) -> Result { + unimplemented!() + } + + /// TODO docs + unsafe fn destroy_ray_tracing_pipeline(&self, _pipeline: B::RayTracingPipeline) { + unimplemented!() + } + /// Create a new framebuffer object. /// /// # Safety @@ -673,6 +689,82 @@ pub trait Device: fmt::Debug + Any + Send + Sync { flags: query::ResultFlags, ) -> Result; + /// Create an acceleration structure object. + unsafe fn create_acceleration_structure( + &self, + _desc: &acceleration_structure::CreateDesc, + ) -> Result { + unimplemented!() + } + + /// Destroy an acceleration structure object. + unsafe fn destroy_acceleration_structure(&self, _accel_struct: B::AccelerationStructure) { + unimplemented!() + } + + /// Get the size requirements for the buffers needed to build an acceleration structure. + /// + /// `max_primitive_counts` must contain a number of entries equal to the number of geometries described in `desc`. + unsafe fn get_acceleration_structure_build_requirements( + &self, + _desc: &acceleration_structure::GeometryDesc, + _max_primitive_counts: &[u32], + ) -> acceleration_structure::SizeRequirements { + unimplemented!() + } + + /// Get the device address of a bottom-level acceleration structure for use in top-level acceleration structures `acceleration_structure::Instance`s. + unsafe fn get_acceleration_structure_address( + &self, + _accel_struct: &B::AccelerationStructure, + ) -> acceleration_structure::DeviceAddress { + unimplemented!() + } + + /// Determine if a previously serialized acceleration structure (e.g. loaded from disk) is compatible with the current device. + /// + /// `version_header` is the first 32 bytes of a serialized acceleration struct. If you have a `&[u8]` from loading an acceleration structure, consider using `try_into()` to convert to `&[u8; 32]`. + unsafe fn get_device_acceleration_structure_compatibility( + &self, + _version_header: &[u8; 32], + ) -> acceleration_structure::Compatibility { + unimplemented!() + } + + // // TODO(capture-replay) + // // TODO return a buffer of size shaderGroupHandleCaptureReplaySize * groupCount + // unsafe fn get_ray_tracing_capture_replay_shader_group_handles<'a>( + // &self, + // _pipeline: &'a B::RayTracingPipeline, + // _first_group: u32, + // _group_count: u32, + // _data: &mut [u8] + // ) -> Result<(), OutOfMemory> { + // unimplemented!() + // } + + /// TODO docs + // `data_size` must be at least `shaderGroupHandleSize * groupCount` + unsafe fn get_ray_tracing_shader_group_handles<'a>( + &self, + _pipeline: &'a B::RayTracingPipeline, + _first_group: u32, + _group_count: u32, + _data_size: usize, + ) -> Result, OutOfMemory> { + unimplemented!() + } + + /// TODO docs + unsafe fn get_ray_tracing_shader_group_stack_size<'a>( + &self, + _pipeline: &'a B::RayTracingPipeline, + _group: u32, + _group_shader: pso::GroupShader, + ) -> u64 { + unimplemented!() + } + /// Wait for all queues associated with this device to idle. /// /// Host access to all queues needs to be **externally** sycnhronized! @@ -713,6 +805,15 @@ pub trait Device: fmt::Debug + Any + Send + Sync { /// Associate a name with a pipeline layout, for easier debugging in external tools or with /// validation layers that can print a friendly name when referring to objects in error messages unsafe fn set_pipeline_layout_name(&self, pipeline_layout: &mut B::PipelineLayout, name: &str); + /// Associate a name with an acceleration structure, for easier debugging in external tools or with + /// validation layers that can print a friendly name when referring to objects in error messages + unsafe fn set_acceleration_structure_name( + &self, + _accel_struct: &mut B::AccelerationStructure, + _name: &str, + ) { + unimplemented!() + } /// Control the power state of the provided display unsafe fn set_display_power_state( diff --git a/src/hal/src/lib.rs b/src/hal/src/lib.rs index 82b11e97060..f12b72faabb 100644 --- a/src/hal/src/lib.rs +++ b/src/hal/src/lib.rs @@ -48,6 +48,7 @@ extern crate serde; use std::{any::Any, fmt, hash::Hash}; +pub mod acceleration_structure; pub mod adapter; pub mod buffer; pub mod command; @@ -283,13 +284,32 @@ bitflags! { // Bits for Extensions /// Supports task shader stage. - const TASK_SHADER = 0x0001 << 96; + const TASK_SHADER = 0x0000_0001 << 96; /// Supports mesh shader stage. - const MESH_SHADER = 0x0002 << 96; + const MESH_SHADER = 0x0000_0002 << 96; /// Mask for all the features associated with mesh shader stages. const MESH_SHADER_MASK = Features::TASK_SHADER.bits | Features::MESH_SHADER.bits; /// Support sampler min/max reduction mode. - const SAMPLER_REDUCTION = 0x0004 << 96; + const SAMPLER_REDUCTION = 0x0000_0004 << 96; + + /// Supports acceleration structures. + /// + /// Requires `RAY_TRACING_PIPELINE` or `RAY_QUERY` to also be enabled. + const ACCELERATION_STRUCTURE = 0x0000_0008 << 96; + /// Supports a command to indirectly build an acceleration structure. + // TODO should this be part of `AccelerationStructureProperties`? The diff would be if app can depend on this feature vs. check for its availability. + const ACCELERATION_STRUCTURE_INDIRECT_BUILD = 0x0000_0010 << 96; + /// Mask for all the features associated with acceleration structures. + const ACCELERATION_STRUCTURE_MASK = Features::ACCELERATION_STRUCTURE.bits | Features::ACCELERATION_STRUCTURE_INDIRECT_BUILD.bits; + + /// Support ray query functionality in shaders. + const RAY_QUERY = 0x0000_0020 << 96; + /// Supports ray tracing pipelines. + const RAY_TRACING_PIPELINE = 0x0000_0040 << 96; + /// Supports the indirect trace rays call. + const TRACE_RAYS_INDIRECT = 0x0000_0080 << 96; + /// TODO docs + const RAY_TRAVERSAL_PRIMITIVE_CULLING = 0x0000_0100 << 96; } } @@ -327,6 +347,8 @@ bitflags! { const STENCIL_WRITE_MASK = 0x0200; /// Supports `StencilTest::reference_values == State::Dynamic(_)` const STENCIL_REFERENCE = 0x0400; + /// TODO docs + const RAY_TRACING_PIPELINE_STACK_SIZE = 1000347000; } } @@ -347,6 +369,10 @@ pub struct PhysicalDeviceProperties { pub sampler_reduction: SamplerReductionProperties, /// Downlevel properties. pub downlevel: DownlevelProperties, + /// Acceleration Structure properties. + pub acceleration_structure: AccelerationStructureProperties, + /// Ray Tracing Pipeline properties. + pub ray_tracing_pipeline: RayTracingPipelineProperties, /// Performance caveats. pub performance_caveats: PerformanceCaveats, /// Dynamic pipeline states. @@ -576,6 +602,50 @@ pub struct MeshShaderProperties { pub mesh_output_per_primitive_granularity: u32, } +/// Resource limits related to the Acceleration Structure. +#[derive(Clone, Copy, Debug, Default, PartialEq)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct AccelerationStructureProperties { + /// The maximum number of geometries in a bottom level acceleration structure. + pub max_acceleration_structure_bottom_level_geometry_count: u64, + /// The maximum number of instances in a top level acceleration structure. + pub max_acceleration_structure_top_level_instance_count: u64, + /// The maximum total number of triangles or AABBs in all geometries in a bottom level acceleration structure. + pub max_acceleration_structure_bottom_level_total_primitive_count: u64, + /// The maximum number of acceleration structure bindings that can be accessible to a single shader stage in a pipeline layout. + pub max_per_stage_descriptor_acceleration_structures: u32, + /// + pub max_descriptor_set_acceleration_structures: u32, + /// The minimum alignment in bytes for scratch data passed in to an acceleration structure build command. + pub min_acceleration_structure_scratch_offset_alignment: u32, +} + +/// Resource limits related to the Ray Tracing Pipeline. +#[derive(Clone, Copy, Debug, Default, PartialEq)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct RayTracingPipelineProperties { + // TODO since we don't use `vk::DescriptorSetLayoutCreateFlags`, I'm leaving this out + // pub max_per_stage_descriptor_update_after_bind_acceleration_structures: u32, + // pub max_descriptor_set_update_after_bind_acceleration_structures: u32, + /// TODO docs + pub shader_group_handle_size: u32, + /// TODO docs + pub max_ray_recursion_depth: u32, + /// TODO docs + pub max_shader_group_stride: u32, + /// TODO docs + pub shader_group_base_alignment: u32, + // TODO(capture-replay) + // /// TODO docs + // pub shader_group_handle_capture_replay_size: u32, + /// TODO docs + pub max_ray_dispatch_invocation_count: u32, + /// TODO docs + pub shader_group_handle_alignment: u32, + /// TODO docs + pub max_ray_hit_attribute_size: u32, +} + /// Resource limits related to the reduction samplers. #[derive(Clone, Copy, Debug, Default, PartialEq)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -801,6 +871,8 @@ pub trait Backend: 'static + Sized + Eq + Clone + Hash + fmt::Debug + Any + Send type ComputePipeline: fmt::Debug + Any + Send + Sync; /// The corresponding graphics pipeline type for this backend. type GraphicsPipeline: fmt::Debug + Any + Send + Sync; + /// The corresponding ray tracing pipeline type for this backend. + type RayTracingPipeline: fmt::Debug + Any + Send + Sync; /// The corresponding pipeline cache type for this backend. type PipelineCache: fmt::Debug + Any + Send + Sync; /// The corresponding pipeline layout type for this backend. @@ -820,8 +892,12 @@ pub trait Backend: 'static + Sized + Eq + Clone + Hash + fmt::Debug + Any + Send type Event: fmt::Debug + Any + Send + Sync; /// The corresponding query pool type for this backend. type QueryPool: fmt::Debug + Any + Send + Sync; + /// The corresponding display type for this backend. type Display: fmt::Debug + Any + Send + Sync; /// The corresponding display mode type for this backend type DisplayMode: fmt::Debug + Any + Send + Sync; + + /// The corresponding acceleration structure type for this backend. + type AccelerationStructure: fmt::Debug + Any + Send + Sync; } diff --git a/src/hal/src/pso/descriptor.rs b/src/hal/src/pso/descriptor.rs index 5e023e14a84..aff04b1e25b 100644 --- a/src/hal/src/pso/descriptor.rs +++ b/src/hal/src/pso/descriptor.rs @@ -94,6 +94,8 @@ pub enum DescriptorType { }, /// A descriptor associated with an input attachment. InputAttachment, + /// A descriptor associated with an acceleration structure. + AccelerationStructure, } /// Information about the contents of and in which stages descriptors may be bound to a descriptor @@ -249,6 +251,7 @@ pub enum Descriptor<'a, B: Backend> { CombinedImageSampler(&'a B::ImageView, Layout, &'a B::Sampler), Buffer(&'a B::Buffer, SubRange), TexelBuffer(&'a B::BufferView), + AccelerationStructure(&'a B::AccelerationStructure), } /// Copies a range of descriptors to be bound from one descriptor set to another. diff --git a/src/hal/src/pso/mod.rs b/src/hal/src/pso/mod.rs index e5ab4c2bab7..66e18cbab03 100644 --- a/src/hal/src/pso/mod.rs +++ b/src/hal/src/pso/mod.rs @@ -9,10 +9,12 @@ mod descriptor; mod graphics; mod input_assembler; mod output_merger; +mod ray_tracing; mod specialization; pub use self::{ - compute::*, descriptor::*, graphics::*, input_assembler::*, output_merger::*, specialization::*, + compute::*, descriptor::*, graphics::*, input_assembler::*, output_merger::*, ray_tracing::*, + specialization::*, }; /// Error types happening upon PSO creation on the device side. @@ -48,6 +50,8 @@ bitflags!( /// Some stages are queue type dependent. #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct PipelineStage: u32 { + // NOTE: these values follow [VkPipelineStageFlagBits](https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkPipelineStageFlagBits.html). + /// Beginning of the command queue. const TOP_OF_PIPE = 0x1; /// Indirect data consumption. @@ -79,6 +83,10 @@ bitflags!( /// Read/Write access from host. /// (Not a real pipeline stage) const HOST = 0x4000; + /// Acceleration structure building stage. + const ACCELERATION_STRUCTURE_BUILD = 0x02000000; + /// Ray tracing shader execution. + const RAY_TRACING_SHADER = 0x00200000; /// Task shader stage. const TASK_SHADER = 0x80000; /// Mesh shader stage. @@ -91,6 +99,8 @@ bitflags!( #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Default)] pub struct ShaderStageFlags: u32 { + // NOTE: these values follow [VkShaderStageFlagBits](https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkShaderStageFlagBits.html). + /// Vertex shader stage. const VERTEX = 0x1; /// Hull (tessellation) shader stage. @@ -110,6 +120,18 @@ bitflags!( /// All graphics pipeline shader stages. const GRAPHICS = Self::VERTEX.bits | Self::HULL.bits | Self::DOMAIN.bits | Self::GEOMETRY.bits | Self::FRAGMENT.bits; + /// Ray geneneration shader stage. + const RAYGEN = 0x100; + /// Any-hit shader stage. + const ANY_HIT = 0x200; + /// Closest-hit shader stage. + const CLOSEST_HIT = 0x400; + /// Miss shader stage. + const MISS = 0x800; + /// Intersection shader stage. + const INTERSECTION = 0x1000; + /// Callable shader stage. + const CALLABLE = 0x2000; /// All shader stages (matches Vulkan). const ALL = 0x7FFFFFFF; } @@ -159,6 +181,21 @@ bitflags!( /// /// Must be set when pipelines set the pipeline as base. const ALLOW_DERIVATIVES = 0x2; + + /// TODO docs + const RAY_TRACING_NO_NULL_ANY_HIT_SHADERS = 0x4000; + /// TODO docs + const RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS = 0x8000; + /// TODO docs + const RAY_TRACING_NO_NULL_MISS_SHADERS = 0x10000; + /// TODO docs + const RAY_TRACING_NO_NULL_INTERSECTION_SHADERS = 0x20000; + /// TODO docs + const RAY_TRACING_SKIP_TRIANGLES = 0x1000; + /// TODO docs + const RAY_TRACING_SKIP_AABBS = 0x2000; + /// TODO docs + const RAY_TRACING_SHADER_GROUP_HANDLE_CAPTURE_REPLAY = 0x80000; } ); diff --git a/src/hal/src/pso/ray_tracing.rs b/src/hal/src/pso/ray_tracing.rs new file mode 100644 index 00000000000..7d4cfcfadca --- /dev/null +++ b/src/hal/src/pso/ray_tracing.rs @@ -0,0 +1,136 @@ +//! Ray tracing pipeline descriptor. + +use crate::{ + buffer::{Offset, Stride}, + pso::{PipelineCreationFlags, ShaderStageFlags}, + Backend, +}; + +use super::{BasePipeline, EntryPoint}; + +/// TODO docs +pub const SHADER_UNUSED: u32 = !0; + +/// TODO docs +// https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkStridedDeviceAddressRegionKHR.html +#[derive(Debug)] +pub struct ShaderBindingTable<'a, B: Backend> { + /// TODO docs + pub buffer: &'a B::Buffer, + /// TODO docs + pub offset: Offset, + /// TODO docs + pub stride: Stride, + /// TODO docs + pub size: u64, +} + +/// TODO docs +// https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkShaderGroupShaderKHR.html +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum GroupShader { + /// TODO docs + General, + /// TODO docs + ClosestHit, + /// TODO docs + AnyHit, + /// TODO docs + Intersection, +} + +/// A description of the data needed to construct a ray tracing pipeline. +// https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkRayTracingPipelineCreateInfoKHR.html +#[derive(Debug)] +pub struct RayTracingPipelineDesc<'a, B: Backend> { + /// Pipeline label + pub label: Option<&'a str>, + + /// TODO docs + pub flags: PipelineCreationFlags, + + /// TODO docs + // todo shaderstagecreatedesc instead + pub stages: &'a [ShaderStageDesc<'a, B>], + + /// TODO docs + pub groups: &'a [ShaderGroupDesc], + + /// TODO docs + pub max_pipeline_ray_recursion_depth: u32, + + // const VkPipelineLibraryCreateInfoKHR* pLibraryInfo; + // const VkRayTracingPipelineInterfaceCreateInfoKHR* pLibraryInterface; + // const VkPipelineDynamicStateCreateInfo* pDynamicState; + /// TODO docs + pub layout: &'a B::PipelineLayout, + + /// TODO docs + pub parent: BasePipeline<'a, B::RayTracingPipeline>, +} + +impl<'a, B: Backend> RayTracingPipelineDesc<'a, B> { + /// Create a new empty PSO descriptor. + pub fn new( + stages: &'a [ShaderStageDesc<'a, B>], + groups: &'a [ShaderGroupDesc], + max_pipeline_ray_recursion_depth: u32, + layout: &'a B::PipelineLayout, + ) -> Self { + Self { + label: None, + flags: PipelineCreationFlags::empty(), + stages, + groups, + max_pipeline_ray_recursion_depth, + layout, + parent: BasePipeline::None, + } + } +} + +/// TODO docs +// https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkPipelineShaderStageCreateInfo.html +#[derive(Debug)] +pub struct ShaderStageDesc<'a, B: Backend> { + /// TODO docs + pub stage: ShaderStageFlags, + /// TODO docs + pub entry_point: EntryPoint<'a, B>, +} + +/// TODO docs +// https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkRayTracingShaderGroupCreateInfoKHR.html +// https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkRayTracingShaderGroupTypeKHR.html +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ShaderGroupDesc { + /// Specifies a shader group with a single shader in it. + General { + /// The index into the ray generation, miss, or callable shader from [`RayTracingPipelineDesc::stages`]. + general_shader: u32, + }, + /// Specifies a shader group that only hits triangles. + TrianglesHitGroup { + /// The optional index into the closest hit shader from [`RayTracingPipelineDesc::stages`]. + closest_hit_shader: Option, + /// The optional index into the any hit shader from [`RayTracingPipelineDesc::stages`]. + any_hit_shader: Option, + }, + /// Specifies a shader group that only intersects with custom geometry. + ProceduralHitGroup { + /// The optional index into the closest hit shader from [`RayTracingPipelineDesc::stages`]. + closest_hit_shader: Option, + /// The optional index into the any hit shader from [`RayTracingPipelineDesc::stages`]. + any_hit_shader: Option, + /// The index into the intersection shader from [`RayTracingPipelineDesc::stages`]. + intersection_shader: u32, + }, +} + +/// TODO docs +// https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkRayTracingPipelineInterfaceCreateInfoKHR.html +#[derive(Debug)] +pub struct PipelineInterfaceDesc { + max_pipeline_ray_payload_size: u32, + max_pipeline_ray_hit_attribute_size: u32, +} diff --git a/src/hal/src/query.rs b/src/hal/src/query.rs index 5b42c3a3a05..fc5c80f1026 100644 --- a/src/hal/src/query.rs +++ b/src/hal/src/query.rs @@ -70,6 +70,10 @@ pub enum Type { /// Timestamp query. Timestamps can be recorded to the /// query pool by calling `write_timestamp()`. Timestamp, + /// The required size of the destination acceleration structure when copied with `CopyMode::Compact`. + AccelerationStructureCompactedSize, + /// The required size of the destination buffer when serialized. + AccelerationStructureSerializationSize, } bitflags!(