diff --git a/wgpu-core/src/command/ray_tracing.rs b/wgpu-core/src/command/ray_tracing.rs index f8bba308b98..71c8bd59ff4 100644 --- a/wgpu-core/src/command/ray_tracing.rs +++ b/wgpu-core/src/command/ray_tracing.rs @@ -247,6 +247,7 @@ pub(crate) fn build_acceleration_structures( custom_data: instance.custom_data, mask: instance.mask, blas_address: blas.handle, + pipeline_intersection_data_offset: 0, }, )); diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index 00b8edbb5f9..5e7c63b5481 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -1016,6 +1016,11 @@ impl super::Adapter { max_binding_array_acceleration_structure_elements_per_shader_stage: max_acceleration_structures_per_shader_stage, max_multiview_view_count, + + // not yet implemented + max_intersection_group_count: 0, + max_ray_dispatch_count: 0, + max_ray_recursion_depth: 0, }), alignments: crate::Alignments { buffer_copy_offset: wgt::BufferSize::new( @@ -1035,6 +1040,10 @@ impl super::Adapter { .unwrap(), ray_tracing_scratch_buffer_alignment: Direct3D12::D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BYTE_ALIGNMENT, + // Not yet implemented + ray_tracing_pipeline_group_data_size: 0, + ray_tracing_pipeline_group_data_alignment: 0, + ray_tracing_pipeline_data_offset_alignment: 0, }, downlevel, cooperative_matrix_properties: Vec::new(), diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 3d218efdf07..c4fbaf48713 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -1857,4 +1857,29 @@ impl crate::CommandEncoder for super::CommandEncoder { _dependencies: &[&super::AccelerationStructure], ) { } + + unsafe fn begin_ray_tracing_pass(&mut self, _desc: &crate::RayTracingPassDescriptor) { + unreachable!("Ray tracing pipelines not supported") + } + + unsafe fn end_ray_tracing_pass(&mut self) { + unreachable!("Ray tracing pipelines not supported") + } + + unsafe fn set_ray_tracing_pipeline( + &mut self, + _pipeline: &::RayTracingPipeline, + ) { + unreachable!("Ray tracing pipelines not supported") + } + + unsafe fn trace_rays( + &mut self, + _count: [u32; 3], + _ray_generation_group_data: crate::PipelineGroupData, + _miss_group_data: crate::PipelineGroupData, + _intersection_group_data: crate::PipelineGroupData, + ) { + unreachable!("Ray tracing pipelines not supported") + } } diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index 359a89fc054..33dfcc9c27c 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -2208,6 +2208,29 @@ impl crate::Device for super::Device { self.counters.compute_pipelines.sub(1); } + unsafe fn create_ray_tracing_pipeline( + &self, + _desc: &crate::RayTracingPipelineDescriptor< + super::PipelineLayout, + super::ShaderModule, + super::PipelineCache, + >, + ) -> Result<::RayTracingPipeline, crate::PipelineError> { + unreachable!("ray tracing pipelines not yet implemented") + } + + unsafe fn destroy_ray_tracing_pipeline(&self, _pipeline: super::RayTracingPipeline) { + unreachable!("ray tracing pipelines not yet implemented") + } + + unsafe fn get_raytracing_pipeline_group_data( + &self, + _pipeline: &super::RayTracingPipeline, + _groups: core::ops::Range, + ) -> Result, crate::DeviceError> { + unimplemented!("ray tracing pipelines not yet implemented") + } + unsafe fn create_pipeline_cache( &self, _desc: &crate::PipelineCacheDescriptor<'_>, @@ -2591,7 +2614,7 @@ impl crate::Device for super::Device { let temp = Direct3D12::D3D12_RAYTRACING_INSTANCE_DESC { Transform: instance.transform, _bitfield1: (instance.custom_data & MAX_U24) | (u32::from(instance.mask) << 24), - _bitfield2: 0, + _bitfield2: (instance.pipeline_intersection_data_offset & MAX_U24), AccelerationStructure: instance.blas_address, }; diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 304da8c8868..6753cfffcc1 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -478,6 +478,7 @@ impl crate::Api for Api { type ShaderModule = ShaderModule; type RenderPipeline = RenderPipeline; type ComputePipeline = ComputePipeline; + type RayTracingPipeline = RayTracingPipeline; type PipelineCache = PipelineCache; type AccelerationStructure = AccelerationStructure; @@ -499,6 +500,7 @@ crate::impl_dyn_resource!( PipelineLayout, QuerySet, Queue, + RayTracingPipeline, RenderPipeline, Sampler, ShaderModule, @@ -1293,6 +1295,11 @@ impl crate::DynComputePipeline for ComputePipeline {} unsafe impl Send for ComputePipeline {} unsafe impl Sync for ComputePipeline {} +#[derive(Debug)] +pub struct RayTracingPipeline {} + +impl crate::DynRayTracingPipeline for RayTracingPipeline {} + #[derive(Debug)] pub struct PipelineCache; diff --git a/wgpu-hal/src/dynamic/command.rs b/wgpu-hal/src/dynamic/command.rs index 6b4e4fdb040..aca5bbcb3e4 100644 --- a/wgpu-hal/src/dynamic/command.rs +++ b/wgpu-hal/src/dynamic/command.rs @@ -4,8 +4,9 @@ use core::ops::Range; use crate::{ AccelerationStructureBarrier, Api, Attachment, BufferBarrier, BufferBinding, BufferCopy, BufferTextureCopy, BuildAccelerationStructureDescriptor, ColorAttachment, CommandEncoder, - ComputePassDescriptor, DepthStencilAttachment, DeviceError, Label, MemoryRange, - PassTimestampWrites, Rect, RenderPassDescriptor, TextureBarrier, TextureCopy, + ComputePassDescriptor, DepthStencilAttachment, DeviceError, DynRayTracingPipeline, Label, + MemoryRange, PassTimestampWrites, RayTracingPassDescriptor, Rect, RenderPassDescriptor, + TextureBarrier, TextureCopy, }; use super::{ @@ -190,6 +191,19 @@ pub trait DynCommandEncoder: DynResource + core::fmt::Debug { offset: wgt::BufferAddress, ); + unsafe fn begin_ray_tracing_pass(&mut self, desc: &RayTracingPassDescriptor); + unsafe fn end_ray_tracing_pass(&mut self); + + unsafe fn trace_rays( + &mut self, + count: [u32; 3], + ray_generation_group_data: crate::PipelineGroupData, + miss_group_data: crate::PipelineGroupData, + intersection_group_data: crate::PipelineGroupData, + ); + + unsafe fn set_ray_tracing_pipeline(&mut self, pipeline: &dyn DynRayTracingPipeline); + unsafe fn build_acceleration_structures<'a>( &mut self, descriptors: &'a [BuildAccelerationStructureDescriptor< @@ -646,6 +660,46 @@ impl DynCommandEncoder for C { unsafe { self.set_vertex_buffer(index, binding) }; } + unsafe fn begin_ray_tracing_pass(&mut self, desc: &RayTracingPassDescriptor) { + let desc = RayTracingPassDescriptor { label: desc.label }; + unsafe { C::begin_ray_tracing_pass(self, &desc) }; + } + + unsafe fn end_ray_tracing_pass(&mut self) { + unsafe { C::end_ray_tracing_pass(self) }; + } + + unsafe fn set_ray_tracing_pipeline(&mut self, pipeline: &dyn DynRayTracingPipeline) { + let pipeline = pipeline.expect_downcast_ref(); + unsafe { C::set_ray_tracing_pipeline(self, pipeline) }; + } + + unsafe fn trace_rays<'a>( + &mut self, + count: [u32; 3], + ray_generation_group_data: crate::PipelineGroupData<'a, dyn DynBuffer>, + miss_group_data: crate::PipelineGroupData<'a, dyn DynBuffer>, + intersection_group_data: crate::PipelineGroupData<'a, dyn DynBuffer>, + ) { + let downcast_group_data = + |data: crate::PipelineGroupData<'a, dyn DynBuffer>| crate::PipelineGroupData { + buffer: data.buffer.expect_downcast_ref(), + offset: data.offset, + stride: data.stride, + count: data.count, + }; + + unsafe { + C::trace_rays( + self, + count, + downcast_group_data(ray_generation_group_data), + downcast_group_data(miss_group_data), + downcast_group_data(intersection_group_data), + ); + } + } + unsafe fn build_acceleration_structures<'a>( &mut self, descriptors: &'a [BuildAccelerationStructureDescriptor< diff --git a/wgpu-hal/src/dynamic/device.rs b/wgpu-hal/src/dynamic/device.rs index b5ff5904520..f9b9fe682cc 100644 --- a/wgpu-hal/src/dynamic/device.rs +++ b/wgpu-hal/src/dynamic/device.rs @@ -5,16 +5,16 @@ use crate::{ BindGroupLayoutDescriptor, BufferDescriptor, BufferMapping, CommandEncoderDescriptor, ComputePipelineDescriptor, Device, DeviceError, FenceValue, GetAccelerationStructureBuildSizesDescriptor, Label, MemoryRange, PipelineCacheDescriptor, - PipelineCacheError, PipelineError, PipelineLayoutDescriptor, RenderPipelineDescriptor, - SamplerDescriptor, ShaderError, ShaderInput, ShaderModuleDescriptor, TextureDescriptor, - TextureViewDescriptor, TlasInstance, + PipelineCacheError, PipelineError, PipelineLayoutDescriptor, RayObjectIntersectionState, + RayTracingPipelineDescriptor, RenderPipelineDescriptor, SamplerDescriptor, ShaderError, + ShaderInput, ShaderModuleDescriptor, TextureDescriptor, TextureViewDescriptor, TlasInstance, }; use super::{ DynAccelerationStructure, DynBindGroup, DynBindGroupLayout, DynBuffer, DynCommandEncoder, DynComputePipeline, DynFence, DynPipelineCache, DynPipelineLayout, DynQuerySet, DynQueue, - DynRenderPipeline, DynResource, DynResourceExt as _, DynSampler, DynShaderModule, DynTexture, - DynTextureView, + DynRayTracingPipeline, DynRenderPipeline, DynResource, DynResourceExt as _, DynSampler, + DynShaderModule, DynTexture, DynTextureView, }; pub trait DynDevice: DynResource { @@ -112,6 +112,21 @@ pub trait DynDevice: DynResource { ) -> Result, PipelineError>; unsafe fn destroy_compute_pipeline(&self, pipeline: Box); + unsafe fn create_ray_tracing_pipeline( + &self, + desc: &RayTracingPipelineDescriptor< + dyn DynPipelineLayout, + dyn DynShaderModule, + dyn DynPipelineCache, + >, + ) -> Result, PipelineError>; + unsafe fn destroy_ray_tracing_pipeline(&self, pipeline: Box); + unsafe fn get_raytracing_pipeline_group_data( + &self, + pipeline: &dyn DynRayTracingPipeline, + groups: core::ops::Range, + ) -> Result, DeviceError>; + unsafe fn create_pipeline_cache( &self, desc: &PipelineCacheDescriptor<'_>, @@ -442,6 +457,55 @@ impl DynDevice for D { unsafe { D::destroy_compute_pipeline(self, pipeline.unbox()) }; } + unsafe fn create_ray_tracing_pipeline( + &self, + desc: &RayTracingPipelineDescriptor< + dyn DynPipelineLayout, + dyn DynShaderModule, + dyn DynPipelineCache, + >, + ) -> Result, PipelineError> { + let ray_intersection: Vec<_> = desc + .intersection + .iter() + .map(|stage| RayObjectIntersectionState { + closest_hit: stage.closest_hit.clone().expect_downcast(), + any_hit: stage + .any_hit + .as_ref() + .map(|stage| stage.clone().expect_downcast()), + }) + .collect(); + + let desc = RayTracingPipelineDescriptor { + label: desc.label, + layout: desc.layout.expect_downcast_ref(), + ray_generation: desc.ray_generation.clone().expect_downcast(), + miss: desc.miss.clone().expect_downcast(), + intersection: &ray_intersection, + max_recursion_depth: desc.max_recursion_depth, + cache: desc.cache.as_ref().map(|c| c.expect_downcast_ref()), + }; + + unsafe { D::create_ray_tracing_pipeline(self, &desc) } + .map(|b| -> Box { Box::new(b) }) + } + + unsafe fn destroy_ray_tracing_pipeline(&self, pipeline: Box) { + unsafe { + D::destroy_ray_tracing_pipeline(self, pipeline.unbox()); + }; + } + unsafe fn get_raytracing_pipeline_group_data( + &self, + pipeline: &dyn DynRayTracingPipeline, + groups: core::ops::Range, + ) -> Result, DeviceError> { + unsafe { + D::get_raytracing_pipeline_group_data(self, pipeline.expect_downcast_ref(), groups) + } + } + unsafe fn create_pipeline_cache( &self, desc: &PipelineCacheDescriptor<'_>, diff --git a/wgpu-hal/src/dynamic/mod.rs b/wgpu-hal/src/dynamic/mod.rs index 85d8ca00450..8284300df92 100644 --- a/wgpu-hal/src/dynamic/mod.rs +++ b/wgpu-hal/src/dynamic/mod.rs @@ -116,6 +116,7 @@ pub trait DynPipelineCache: DynResource + fmt::Debug {} pub trait DynPipelineLayout: DynResource + fmt::Debug {} pub trait DynQuerySet: DynResource + fmt::Debug {} pub trait DynRenderPipeline: DynResource + fmt::Debug {} +pub trait DynRayTracingPipeline: DynResource + fmt::Debug {} pub trait DynSampler: DynResource + fmt::Debug {} pub trait DynShaderModule: DynResource + fmt::Debug {} pub trait DynSurfaceTexture: diff --git a/wgpu-hal/src/gles/adapter.rs b/wgpu-hal/src/gles/adapter.rs index d8f14d42e47..53aa43b27c2 100644 --- a/wgpu-hal/src/gles/adapter.rs +++ b/wgpu-hal/src/gles/adapter.rs @@ -846,6 +846,10 @@ impl super::Adapter { max_acceleration_structures_per_shader_stage: 0, max_multiview_view_count: 0, + + max_intersection_group_count: 0, + max_ray_dispatch_count: 0, + max_ray_recursion_depth: 0, }); let mut workarounds = super::Workarounds::empty(); @@ -920,6 +924,9 @@ impl super::Adapter { uniform_bounds_check_alignment: wgt::BufferSize::new(1).unwrap(), raw_tlas_instance_size: 0, ray_tracing_scratch_buffer_alignment: 0, + ray_tracing_pipeline_group_data_size: 0, + ray_tracing_pipeline_group_data_alignment: 0, + ray_tracing_pipeline_data_offset_alignment: 0, }, cooperative_matrix_properties: Vec::new(), }, diff --git a/wgpu-hal/src/gles/command.rs b/wgpu-hal/src/gles/command.rs index 409481b682a..427551cec85 100644 --- a/wgpu-hal/src/gles/command.rs +++ b/wgpu-hal/src/gles/command.rs @@ -1317,4 +1317,29 @@ impl crate::CommandEncoder for super::CommandEncoder { ) { unimplemented!() } + + unsafe fn begin_ray_tracing_pass(&mut self, _desc: &crate::RayTracingPassDescriptor) { + unimplemented!() + } + + unsafe fn end_ray_tracing_pass(&mut self) { + unimplemented!() + } + + unsafe fn set_ray_tracing_pipeline( + &mut self, + _pipeline: &::RayTracingPipeline, + ) { + unimplemented!() + } + + unsafe fn trace_rays( + &mut self, + _count: [u32; 3], + _ray_generation_group_data: crate::PipelineGroupData, + _miss_group_data: crate::PipelineGroupData, + _intersection_group_data: crate::PipelineGroupData, + ) { + unimplemented!() + } } diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs index 1f63a808100..46472f05d65 100644 --- a/wgpu-hal/src/gles/device.rs +++ b/wgpu-hal/src/gles/device.rs @@ -1553,6 +1553,29 @@ impl crate::Device for super::Device { self.counters.compute_pipelines.sub(1); } + unsafe fn create_ray_tracing_pipeline( + &self, + _desc: &crate::RayTracingPipelineDescriptor< + super::PipelineLayout, + super::ShaderModule, + super::PipelineCache, + >, + ) -> Result { + unimplemented!("Ray tracing is unsupported on GL") + } + + unsafe fn destroy_ray_tracing_pipeline(&self, _pipeline: super::RayTracingPipeline) { + unimplemented!("Ray tracing is unsupported on GL") + } + + unsafe fn get_raytracing_pipeline_group_data( + &self, + _pipeline: &super::RayTracingPipeline, + _groups: core::ops::Range, + ) -> Result, crate::DeviceError> { + unimplemented!("Ray tracing is unsupported on GL") + } + unsafe fn create_pipeline_cache( &self, _: &crate::PipelineCacheDescriptor<'_>, diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs index 1a0292d4a0f..dd034f3b836 100644 --- a/wgpu-hal/src/gles/mod.rs +++ b/wgpu-hal/src/gles/mod.rs @@ -168,6 +168,7 @@ impl crate::Api for Api { type ShaderModule = ShaderModule; type RenderPipeline = RenderPipeline; type ComputePipeline = ComputePipeline; + type RayTracingPipeline = RayTracingPipeline; } crate::impl_dyn_resource!( @@ -187,6 +188,7 @@ crate::impl_dyn_resource!( QuerySet, Queue, RenderPipeline, + RayTracingPipeline, Sampler, ShaderModule, Surface, @@ -752,6 +754,11 @@ pub struct ComputePipeline { impl crate::DynComputePipeline for ComputePipeline {} +#[derive(Debug)] +pub struct RayTracingPipeline {} + +impl crate::DynRayTracingPipeline for RayTracingPipeline {} + #[cfg(send_sync)] unsafe impl Sync for ComputePipeline {} #[cfg(send_sync)] diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 51815774dec..c92293d5223 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -288,8 +288,9 @@ pub use dynamic::{ DynAccelerationStructure, DynAcquiredSurfaceTexture, DynAdapter, DynBindGroup, DynBindGroupLayout, DynBuffer, DynCommandBuffer, DynCommandEncoder, DynComputePipeline, DynDevice, DynExposedAdapter, DynFence, DynInstance, DynOpenDevice, DynPipelineCache, - DynPipelineLayout, DynQuerySet, DynQueue, DynRenderPipeline, DynResource, DynSampler, - DynShaderModule, DynSurface, DynSurfaceTexture, DynTexture, DynTextureView, + DynPipelineLayout, DynQuerySet, DynQueue, DynRayTracingPipeline, DynRenderPipeline, + DynResource, DynSampler, DynShaderModule, DynSurface, DynSurfaceTexture, DynTexture, + DynTextureView, }; #[allow(unused)] @@ -640,6 +641,7 @@ pub trait Api: Clone + fmt::Debug + Sized + WasmNotSendSync + 'static { type ShaderModule: DynShaderModule; type RenderPipeline: DynRenderPipeline; type ComputePipeline: DynComputePipeline; + type RayTracingPipeline: DynRayTracingPipeline; type PipelineCache: DynPipelineCache; type AccelerationStructure: DynAccelerationStructure + 'static; @@ -1064,6 +1066,24 @@ pub trait Device: WasmNotSendSync { ) -> Result<::ComputePipeline, PipelineError>; unsafe fn destroy_compute_pipeline(&self, pipeline: ::ComputePipeline); + #[allow(clippy::type_complexity)] + unsafe fn create_ray_tracing_pipeline( + &self, + desc: &RayTracingPipelineDescriptor< + ::PipelineLayout, + ::ShaderModule, + ::PipelineCache, + >, + ) -> Result<::RayTracingPipeline, PipelineError>; + unsafe fn destroy_ray_tracing_pipeline(&self, pipeline: ::RayTracingPipeline); + /// Obtain the opaque data from each group, behaves as if group 0 is the closest hit, group 1 + /// is the miss shader, and group 2.. are the intersection groups. + unsafe fn get_raytracing_pipeline_group_data( + &self, + pipeline: &::RayTracingPipeline, + groups: Range, + ) -> Result, DeviceError>; + unsafe fn create_pipeline_cache( &self, desc: &PipelineCacheDescriptor<'_>, @@ -1581,10 +1601,15 @@ pub trait CommandEncoder: WasmNotSendSync + fmt::Debug { /// - All prior calls to [`begin_compute_pass`] on this [`CommandEncoder`] must have been followed /// by a call to [`end_compute_pass`]. /// + /// - All prior calls to [`begin_ray_tracing_pass`] on this [`CommandEncoder`] must have been followed + /// by a call to [`end_ray_tracing_pass`]. + /// /// [`begin_render_pass`]: CommandEncoder::begin_render_pass /// [`begin_compute_pass`]: CommandEncoder::begin_compute_pass + /// [`begin_ray_tracing_pass`]: CommandEncoder::begin_ray_tracing_pass /// [`end_render_pass`]: CommandEncoder::end_render_pass /// [`end_compute_pass`]: CommandEncoder::end_compute_pass + /// [`end_ray_tracing_pass`]: CommandEncoder::end_ray_tracing_pass unsafe fn begin_render_pass( &mut self, desc: &RenderPassDescriptor<::QuerySet, ::TextureView>, @@ -1701,10 +1726,15 @@ pub trait CommandEncoder: WasmNotSendSync + fmt::Debug { /// - All prior calls to [`begin_compute_pass`] on this [`CommandEncoder`] must have been followed /// by a call to [`end_compute_pass`]. /// + /// - All prior calls to [`begin_ray_tracing_pass`] on this [`CommandEncoder`] must have been followed + /// by a call to [`end_ray_tracing_pass`]. + /// /// [`begin_render_pass`]: CommandEncoder::begin_render_pass /// [`begin_compute_pass`]: CommandEncoder::begin_compute_pass + /// [`begin_ray_tracing_pass`]: CommandEncoder::begin_ray_tracing_pass /// [`end_render_pass`]: CommandEncoder::end_render_pass /// [`end_compute_pass`]: CommandEncoder::end_compute_pass + /// [`end_ray_tracing_pass`]: CommandEncoder::end_ray_tracing_pass unsafe fn begin_compute_pass( &mut self, desc: &ComputePassDescriptor<::QuerySet>, @@ -1730,6 +1760,58 @@ pub trait CommandEncoder: WasmNotSendSync + fmt::Debug { offset: wgt::BufferAddress, ); + /// Begin a new ray tracing pass, clearing all active bindings. + /// + /// This clears any bindings established by the following calls: + /// + /// - [`set_bind_group`](CommandEncoder::set_bind_group) + /// - [`set_immediates`](CommandEncoder::set_immediates) + /// - [`begin_query`](CommandEncoder::begin_query) + /// - [`set_ray_tracing_pipeline`](CommandEncoder::set_compute_pipeline) + /// + /// # Safety + /// + /// - All prior calls to [`begin_render_pass`] on this [`CommandEncoder`] must have been followed + /// by a call to [`end_render_pass`]. + /// + /// - All prior calls to [`begin_compute_pass`] on this [`CommandEncoder`] must have been followed + /// by a call to [`end_compute_pass`]. + /// + /// - All prior calls to [`begin_ray_tracing_pass`] on this [`CommandEncoder`] must have been followed + /// by a call to [`end_ray_tracing_pass`]. + /// + /// [`begin_render_pass`]: CommandEncoder::begin_render_pass + /// [`begin_compute_pass`]: CommandEncoder::begin_compute_pass + /// [`begin_ray_tracing_pass`]: CommandEncoder::begin_ray_tracing_pass + /// [`end_render_pass`]: CommandEncoder::end_render_pass + /// [`end_compute_pass`]: CommandEncoder::end_compute_pass + /// [`end_ray_tracing_pass`]: CommandEncoder::end_ray_tracing_pass + unsafe fn begin_ray_tracing_pass(&mut self, desc: &RayTracingPassDescriptor); + + /// End the current compute pass. + /// + /// # Safety + /// + /// - There must have been a prior call to [`begin_ray_tracing_pass`] on this [`CommandEncoder`] + /// that has not been followed by a call to [`end_ray_tracing_pass`]. + /// + /// [`begin_ray_tracing_pass`]: CommandEncoder::begin_ray_tracing_pass + /// [`end_ray_tracing_pass`]: CommandEncoder::end_ray_tracing_pass + unsafe fn end_ray_tracing_pass(&mut self); + + /// # Safety + /// + /// - Pipeline must not be destroyed + unsafe fn set_ray_tracing_pipeline(&mut self, pipeline: &::RayTracingPipeline); + + unsafe fn trace_rays<'a>( + &mut self, + count: [u32; 3], + ray_generation_group_data: PipelineGroupData<'a, ::Buffer>, + miss_group_data: PipelineGroupData<'a, ::Buffer>, + intersection_group_data: PipelineGroupData<'a, ::Buffer>, + ); + /// To get the required sizes for the buffer allocations use `get_acceleration_structure_build_sizes` per descriptor /// All buffers must be synchronized externally /// All buffer regions, which are written to may only be passed once per function call, @@ -1969,6 +2051,20 @@ pub struct Alignments { /// What the scratch buffer for building an acceleration structure must be aligned to pub ray_tracing_scratch_buffer_alignment: u32, + + /// How large a single piece of group data is. That is, how large the vector returned + /// from `device.get_raytracing_pipeline_group_data(&pipeline, n..(n+1))` is. + /// + /// If ray tracing pipelines are implemented, this must be non zero. + pub ray_tracing_pipeline_group_data_size: u32, + + /// If ray tracing pipelines are implemented, this must be a power of two (and non zero). + pub ray_tracing_pipeline_group_data_alignment: u32, + + /// If ray tracing pipelines are implemented, this must be a power of two (and non zero). + /// + /// The offset within `PipelineGroupData` must be a multiple of this + pub ray_tracing_pipeline_data_offset_alignment: u32, } #[derive(Clone, Debug)] @@ -2535,6 +2631,35 @@ pub struct RenderPipelineDescriptor< pub cache: Option<&'a Pc>, } +#[derive(Clone, Debug)] +pub struct RayObjectIntersectionState<'a, M: DynShaderModule + ?Sized> { + pub closest_hit: ProgrammableStage<'a, M>, + pub any_hit: Option>, +} + +/// Describes a ray tracing pipeline. +#[derive(Clone, Debug)] +pub struct RayTracingPipelineDescriptor< + 'a, + Pl: DynPipelineLayout + ?Sized, + M: DynShaderModule + ?Sized, + Pc: DynPipelineCache + ?Sized, +> { + pub label: Label<'a>, + /// The layout of bind groups for this pipeline. + pub layout: &'a Pl, + /// The ray generation stage. + pub ray_generation: ProgrammableStage<'a, M>, + /// The miss stage. + pub miss: ProgrammableStage<'a, M>, + /// All the object intersection stages. + pub intersection: &'a [RayObjectIntersectionState<'a, M>], + /// The maximum recursion depth allowed for the ray tracing (ray_generation shader counts as depth 0). + pub max_recursion_depth: u32, + /// The cache which will be used and filled when compiling this pipeline + pub cache: Option<&'a Pc>, +} + #[derive(Debug, Clone)] pub struct SurfaceConfiguration { /// Maximum number of queued frames. Must be in @@ -2701,6 +2826,11 @@ pub struct ComputePassDescriptor<'a, Q: DynQuerySet + ?Sized> { pub timestamp_writes: Option>, } +#[derive(Clone, Debug)] +pub struct RayTracingPassDescriptor<'a> { + pub label: Label<'a>, +} + #[test] fn test_default_limits() { let limits = wgt::Limits::default(); @@ -2860,6 +2990,7 @@ pub struct TlasInstance { pub custom_data: u32, pub mask: u8, pub blas_address: u64, + pub pipeline_intersection_data_offset: u32, } #[cfg(dx12)] @@ -2881,3 +3012,10 @@ pub struct Telemetry { result: D3D12ExposeAdapterResult, ), } + +pub struct PipelineGroupData<'a, B: DynBuffer + ?Sized> { + pub buffer: &'a B, + pub offset: wgt::BufferAddress, + pub stride: u64, + pub count: u64, +} diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index 6223c139718..6fd554e2936 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -1402,6 +1402,10 @@ impl super::CapabilitiesQuery { max_mesh_output_primitives: 256, max_mesh_output_layers: self.max_texture_layers as u32, max_mesh_multiview_view_count: 0, + // unimplemented + max_intersection_group_count: 0, + max_ray_dispatch_count: 0, + max_ray_recursion_depth: 0, }); crate::Capabilities { @@ -1418,6 +1422,10 @@ impl super::CapabilitiesQuery { >()) .unwrap(), ray_tracing_scratch_buffer_alignment: 1, + // Not yet supported + ray_tracing_pipeline_group_data_size: 0, + ray_tracing_pipeline_group_data_alignment: 0, + ray_tracing_pipeline_data_offset_alignment: 0, }, downlevel, cooperative_matrix_properties: self.cooperative_matrix_properties(), diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index 9f2560df72f..67a215f37a2 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -1883,6 +1883,31 @@ impl crate::CommandEncoder for super::CommandEncoder { } residency_set.commit(); } + + unsafe fn begin_ray_tracing_pass(&mut self, _desc: &crate::RayTracingPassDescriptor) { + unreachable!("Ray tracing pipelines not supported") + } + + unsafe fn end_ray_tracing_pass(&mut self) { + unreachable!("Ray tracing pipelines not supported") + } + + unsafe fn set_ray_tracing_pipeline( + &mut self, + _pipeline: &::RayTracingPipeline, + ) { + unreachable!("Ray tracing pipelines not supported") + } + + unsafe fn trace_rays( + &mut self, + _count: [u32; 3], + _ray_generation_group_data: crate::PipelineGroupData, + _miss_group_data: crate::PipelineGroupData, + _intersection_group_data: crate::PipelineGroupData, + ) { + unreachable!("Ray tracing pipelines not supported") + } } impl Drop for super::CommandEncoder { diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index f13bb24c3b1..525eefbabf3 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -1770,6 +1770,29 @@ impl crate::Device for super::Device { self.counters.compute_pipelines.sub(1); } + unsafe fn create_ray_tracing_pipeline( + &self, + _desc: &crate::RayTracingPipelineDescriptor< + super::PipelineLayout, + super::ShaderModule, + super::PipelineCache, + >, + ) -> Result { + unimplemented!("Ray tracing pipelines are unsupported on Metal") + } + + unsafe fn destroy_ray_tracing_pipeline(&self, _pipeline: super::RayTracingPipeline) { + unimplemented!("Ray tracing pipelines are unsupported on Metal") + } + + unsafe fn get_raytracing_pipeline_group_data( + &self, + _pipeline: &super::RayTracingPipeline, + _groups: core::ops::Range, + ) -> Result, crate::DeviceError> { + unimplemented!("Ray tracing pipelines are unsupported on Metal") + } + unsafe fn create_pipeline_cache( &self, _desc: &crate::PipelineCacheDescriptor<'_>, @@ -2019,7 +2042,7 @@ impl crate::Device for super::Device { }, options: MTLAccelerationStructureInstanceOptions::None, mask: instance.mask as u32, - intersectionFunctionTableOffset: 0, + intersectionFunctionTableOffset: instance.pipeline_intersection_data_offset, userID: instance.custom_data, accelerationStructureID: unsafe { MTLResourceID::from_raw(instance.blas_address) }, }; diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs index d7edd7ce6e3..01aa6ffc159 100644 --- a/wgpu-hal/src/metal/mod.rs +++ b/wgpu-hal/src/metal/mod.rs @@ -87,6 +87,7 @@ impl crate::Api for Api { type PipelineLayout = PipelineLayout; type ShaderModule = ShaderModule; type RenderPipeline = RenderPipeline; + type RayTracingPipeline = RayTracingPipeline; type ComputePipeline = ComputePipeline; type PipelineCache = PipelineCache; @@ -110,6 +111,7 @@ crate::impl_dyn_resource!( QuerySet, Queue, RenderPipeline, + RayTracingPipeline, Sampler, ShaderModule, Surface, @@ -1005,6 +1007,11 @@ unsafe impl Sync for ComputePipeline {} impl crate::DynComputePipeline for ComputePipeline {} +#[derive(Debug)] +pub struct RayTracingPipeline {} + +impl crate::DynRayTracingPipeline for RayTracingPipeline {} + #[derive(Debug, Clone)] pub struct QuerySet { raw_buffer: Retained>, diff --git a/wgpu-hal/src/noop/command.rs b/wgpu-hal/src/noop/command.rs index 4150a1bc6f9..16cca58d436 100644 --- a/wgpu-hal/src/noop/command.rs +++ b/wgpu-hal/src/noop/command.rs @@ -300,6 +300,25 @@ impl crate::CommandEncoder for CommandBuffer { dependencies: &[&Resource], ) { } + + unsafe fn begin_ray_tracing_pass(&mut self, _desc: &crate::RayTracingPassDescriptor) {} + + unsafe fn end_ray_tracing_pass(&mut self) {} + + unsafe fn set_ray_tracing_pipeline( + &mut self, + _pipeline: &::RayTracingPipeline, + ) { + } + + unsafe fn trace_rays( + &mut self, + _count: [u32; 3], + _ray_generation_group_data: crate::PipelineGroupData, + _miss_group_data: crate::PipelineGroupData, + _intersection_group_data: crate::PipelineGroupData, + ) { + } } impl Command { diff --git a/wgpu-hal/src/noop/mod.rs b/wgpu-hal/src/noop/mod.rs index 563b1b27de9..45491701137 100644 --- a/wgpu-hal/src/noop/mod.rs +++ b/wgpu-hal/src/noop/mod.rs @@ -60,6 +60,7 @@ impl crate::Api for Api { type PipelineLayout = Resource; type ShaderModule = Resource; type RenderPipeline = Resource; + type RayTracingPipeline = Resource; type ComputePipeline = Resource; } @@ -76,6 +77,7 @@ impl crate::DynPipelineCache for Resource {} impl crate::DynPipelineLayout for Resource {} impl crate::DynQuerySet for Resource {} impl crate::DynRenderPipeline for Resource {} +impl crate::DynRayTracingPipeline for Resource {} impl crate::DynSampler for Resource {} impl crate::DynShaderModule for Resource {} impl crate::DynSurfaceTexture for Resource {} @@ -178,6 +180,9 @@ pub const CAPABILITIES: crate::Capabilities = { uniform_bounds_check_alignment: wgt::BufferSize::MIN, raw_tlas_instance_size: 0, ray_tracing_scratch_buffer_alignment: 1, + ray_tracing_pipeline_group_data_size: 1, + ray_tracing_pipeline_group_data_alignment: 1, + ray_tracing_pipeline_data_offset_alignment: 1, }, downlevel: wgt::DownlevelCapabilities { flags: wgt::DownlevelFlags::all(), @@ -389,6 +394,20 @@ impl crate::Device for Context { Ok(Resource) } unsafe fn destroy_compute_pipeline(&self, pipeline: Resource) {} + unsafe fn create_ray_tracing_pipeline( + &self, + desc: &crate::RayTracingPipelineDescriptor, + ) -> Result { + Ok(Resource) + } + unsafe fn destroy_ray_tracing_pipeline(&self, pipeline: Resource) {} + unsafe fn get_raytracing_pipeline_group_data( + &self, + pipeline: &Resource, + groups: core::ops::Range, + ) -> Result, crate::DeviceError> { + Ok(vec![0; groups.count()]) + } unsafe fn create_pipeline_cache( &self, desc: &crate::PipelineCacheDescriptor<'_>, diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index 8bc23a534c1..dc705ece17a 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -100,6 +100,9 @@ pub struct PhysicalDeviceFeatures { /// [`Instance::expose_adapter`]: super::Instance::expose_adapter ray_query: Option>, + /// Features provided by `VK_KHR_ray_tracing_pipeline`. + ray_tracing_pipeline: Option>, + /// Features provided by `VK_KHR_zero_initialize_workgroup_memory`, promoted /// to Vulkan 1.3. zero_initialize_workgroup_memory: @@ -192,6 +195,9 @@ impl PhysicalDeviceFeatures { if let Some(ref mut feature) = self.ray_query { info = info.push_next(feature); } + if let Some(ref mut feature) = self.ray_tracing_pipeline { + info = info.push_next(feature); + } if let Some(ref mut feature) = self.shader_atomic_int64 { info = info.push_next(feature); } @@ -476,6 +482,14 @@ impl PhysicalDeviceFeatures { } else { None }, + ray_tracing_pipeline: if enabled_extensions.contains(&khr::ray_tracing_pipeline::NAME) { + Some( + vk::PhysicalDeviceRayTracingPipelineFeaturesKHR::default() + .ray_tracing_pipeline(true), + ) + } else { + None + }, zero_initialize_workgroup_memory: if device_api_version >= vk::API_VERSION_1_3 || enabled_extensions.contains(&khr::zero_initialize_workgroup_memory::NAME) { @@ -959,6 +973,14 @@ impl PhysicalDeviceFeatures { supports_acceleration_structure_binding_array, ); + features.set( + F::EXPERIMENTAL_RAY_TRACING_PIPELINES + // Ditto. + | F::EXTENDED_ACCELERATION_STRUCTURE_VERTEX_FORMATS, + supports_acceleration_structures + && caps.supports_extension(khr::ray_tracing_pipeline::NAME), + ); + let rg11b10ufloat_renderable = supports_format( instance, phd, @@ -1124,6 +1146,10 @@ pub struct PhysicalDeviceProperties { /// `VK_KHR_acceleration_structure` extension. acceleration_structure: Option>, + /// Additional `vk::PhysicalDevice` properties from the + /// `VK_KHR_ray_tracing_pipeline` extension. + ray_tracing_pipeline: Option>, + /// Additional `vk::PhysicalDevice` properties from the /// `VK_KHR_driver_properties` extension, promoted to Vulkan 1.2. driver: Option>, @@ -1330,14 +1356,26 @@ impl PhysicalDeviceProperties { extensions.push(khr::draw_indirect_count::NAME); } - // Require `VK_KHR_deferred_host_operations`, `VK_KHR_acceleration_structure` `VK_KHR_buffer_device_address` (for acceleration structures) and`VK_KHR_ray_query` if `EXPERIMENTAL_RAY_QUERY` was requested - if requested_features.contains(wgt::Features::EXPERIMENTAL_RAY_QUERY) { + // Require `VK_KHR_deferred_host_operations`, `VK_KHR_acceleration_structure` `VK_KHR_buffer_device_address` (for acceleration structures) if either `EXPERIMENTAL_RAY_QUERY` or `EXPERIMENTAL_RAY_TRACING_PIPELINES` were requested. + if requested_features.intersects( + wgt::Features::EXPERIMENTAL_RAY_QUERY + | wgt::Features::EXPERIMENTAL_RAY_TRACING_PIPELINES, + ) { extensions.push(khr::deferred_host_operations::NAME); extensions.push(khr::acceleration_structure::NAME); extensions.push(khr::buffer_device_address::NAME); + } + + // Require `VK_KHR_ray_query` if `EXPERIMENTAL_RAY_QUERY` was requested + if requested_features.contains(wgt::Features::EXPERIMENTAL_RAY_QUERY) { extensions.push(khr::ray_query::NAME); } + // Require `VK_KHR_ray_tracing_pipeline` if `EXPERIMENTAL_RAY_TRACING_PIPELINES` was requested + if requested_features.contains(wgt::Features::EXPERIMENTAL_RAY_TRACING_PIPELINES) { + extensions.push(khr::ray_tracing_pipeline::NAME); + } + if requested_features.contains(wgt::Features::EXPERIMENTAL_RAY_HIT_VERTEX_RETURN) { extensions.push(khr::ray_tracing_position_fetch::NAME) } @@ -1625,6 +1663,16 @@ impl PhysicalDeviceProperties { let max_color_attachment_bytes_per_sample = max_color_attachments * wgt::TextureFormat::MAX_TARGET_PIXEL_BYTE_COST; + let mut max_intersection_group_count = 0; + let mut max_ray_dispatch_count = 0; + let mut max_ray_recursion_depth = 0; + + if let Some(properties) = self.ray_tracing_pipeline { + max_intersection_group_count = (1 << 24) / properties.max_ray_hit_attribute_size; + max_ray_dispatch_count = properties.max_ray_dispatch_invocation_count; + max_ray_recursion_depth = properties.max_ray_recursion_depth; + } + let max_multiview_view_count = self .multiview .map(|a| a.max_multiview_view_count.min(32)) @@ -1725,6 +1773,10 @@ impl PhysicalDeviceProperties { max_acceleration_structures_per_shader_stage, max_multiview_view_count, + + max_intersection_group_count, + max_ray_dispatch_count, + max_ray_recursion_depth, }) } @@ -1767,6 +1819,21 @@ impl PhysicalDeviceProperties { acceleration_structure.min_acceleration_structure_scratch_offset_alignment }, ), + ray_tracing_pipeline_group_data_size: self + .ray_tracing_pipeline + .map_or(0, |ray_tracing_pipeline| { + ray_tracing_pipeline.shader_group_handle_size + }), + ray_tracing_pipeline_group_data_alignment: self + .ray_tracing_pipeline + .map_or(0, |ray_tracing_pipeline| { + ray_tracing_pipeline.shader_group_handle_alignment + }), + ray_tracing_pipeline_data_offset_alignment: self + .ray_tracing_pipeline + .map_or(0, |ray_tracing_pipeline| { + ray_tracing_pipeline.shader_group_base_alignment + }), } } } @@ -1808,6 +1875,9 @@ impl super::InstanceShared { let supports_acceleration_structure = capabilities.supports_extension(khr::acceleration_structure::NAME); + let supports_ray_tracing_pipeline = + capabilities.supports_extension(khr::ray_tracing_pipeline::NAME); + let supports_mesh_shader = capabilities.supports_extension(ext::mesh_shader::NAME); let mut properties2 = vk::PhysicalDeviceProperties2KHR::default(); @@ -1839,6 +1909,13 @@ impl super::InstanceShared { properties2 = properties2.push_next(next); } + if supports_ray_tracing_pipeline { + let next = capabilities + .ray_tracing_pipeline + .insert(vk::PhysicalDeviceRayTracingPipelinePropertiesKHR::default()); + properties2 = properties2.push_next(next); + } + if supports_driver_properties { let next = capabilities .driver @@ -2346,6 +2423,7 @@ impl super::Instance { .map(|a| a.max_multiview_instance_index) .unwrap_or(0), scratch_buffer_alignment: alignments.ray_tracing_scratch_buffer_alignment, + ray_tracing_pipeline_group_data_size: alignments.ray_tracing_pipeline_group_data_size, }; let capabilities = crate::Capabilities { limits: phd_capabilities.to_wgpu_limits(), @@ -2526,6 +2604,15 @@ impl super::Adapter { } else { None }; + let ray_tracing_pipeline_fns = + if enabled_extensions.contains(&khr::ray_tracing_pipeline::NAME) { + Some(khr::ray_tracing_pipeline::Device::new( + &self.instance.raw, + &raw_device, + )) + } else { + None + }; let mesh_shading_fns = if enabled_extensions.contains(&ext::mesh_shader::NAME) { Some(ext::mesh_shader::Device::new( &self.instance.raw, @@ -2665,7 +2752,8 @@ impl super::Adapter { true, // could check `super::Workarounds::SEPARATE_ENTRY_POINTS` ); flags.set( - spv::WriterFlags::PRINT_ON_RAY_QUERY_INITIALIZATION_FAIL, + spv::WriterFlags::PRINT_ON_RAY_QUERY_INITIALIZATION_FAIL + | spv::WriterFlags::PRINT_ON_TRACE_RAYS_FAIL, self.instance.flags.contains(wgt::InstanceFlags::DEBUG) && (self.instance.instance_api_version >= vk::API_VERSION_1_3 || enabled_extensions.contains(&khr::shader_non_semantic_info::NAME)), @@ -2673,6 +2761,9 @@ impl super::Adapter { if features.contains(wgt::Features::EXPERIMENTAL_RAY_QUERY) { capabilities.push(spv::Capability::RayQueryKHR); } + if features.contains(wgt::Features::EXPERIMENTAL_RAY_TRACING_PIPELINES) { + capabilities.push(spv::Capability::RayTracingKHR); + } if features.contains(wgt::Features::EXPERIMENTAL_RAY_HIT_VERTEX_RETURN) { capabilities.push(spv::Capability::RayQueryPositionFetchKHR) } @@ -2789,6 +2880,7 @@ impl super::Adapter { draw_indirect_count: indirect_count_fn, timeline_semaphore: timeline_semaphore_fn, ray_tracing: ray_tracing_fns, + ray_tracing_pipelines: ray_tracing_pipeline_fns, mesh_shading: mesh_shading_fns, external_memory_fd: external_memory_fd_fn, }, diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs index 0bdc5eaedfe..77db8a88743 100644 --- a/wgpu-hal/src/vulkan/command.rs +++ b/wgpu-hal/src/vulkan/command.rs @@ -1359,6 +1359,94 @@ impl crate::CommandEncoder for super::CommandEncoder { } } + // ray tracing + + unsafe fn begin_ray_tracing_pass(&mut self, desc: &crate::RayTracingPassDescriptor<'_>) { + self.bind_point = vk::PipelineBindPoint::RAY_TRACING_KHR; + if let Some(label) = desc.label { + unsafe { self.begin_debug_marker(label) }; + self.rpass_debug_marker_active = true; + } + } + unsafe fn end_ray_tracing_pass(&mut self) { + if self.rpass_debug_marker_active { + unsafe { self.end_debug_marker() }; + self.rpass_debug_marker_active = false + } + } + + unsafe fn trace_rays( + &mut self, + count: [u32; 3], + ray_generation_group_data: crate::PipelineGroupData, + miss_group_data: crate::PipelineGroupData, + intersection_group_data: crate::PipelineGroupData, + ) { + let ray_tracing_functions = self + .device + .extension_fns + .ray_tracing + .as_ref() + .expect("Feature `RAY_TRACING` not enabled"); + + let ray_tracing_pipeline_functions = self + .device + .extension_fns + .ray_tracing_pipelines + .as_ref() + .expect("Feature `RAY_TRACING_PIPELINES` not enabled"); + + let get_device_address = |buffer: &super::Buffer| unsafe { + ray_tracing_functions + .buffer_device_address + .get_buffer_device_address( + &vk::BufferDeviceAddressInfo::default().buffer(buffer.raw), + ) + }; + + unsafe { + ray_tracing_pipeline_functions.cmd_trace_rays( + self.raw_handle(), + &vk::StridedDeviceAddressRegionKHR { + device_address: get_device_address(ray_generation_group_data.buffer) + + ray_generation_group_data.offset, + stride: ray_generation_group_data.stride, + size: ray_generation_group_data.stride /* no need for multiplying by count, vulkan requires the to be just one group */, + }, + &vk::StridedDeviceAddressRegionKHR { + device_address: get_device_address(miss_group_data.buffer) + + miss_group_data.offset, + stride: miss_group_data.stride, + size: miss_group_data.stride * miss_group_data.count, + }, + &vk::StridedDeviceAddressRegionKHR { + device_address: get_device_address(intersection_group_data.buffer) + + intersection_group_data.offset, + stride: intersection_group_data.stride, + size: intersection_group_data.stride * intersection_group_data.count, + }, + &vk::StridedDeviceAddressRegionKHR { + device_address: 0, + stride: 0, + size: 0, + }, + count[0], + count[1], + count[2], + ) + }; + } + + unsafe fn set_ray_tracing_pipeline(&mut self, pipeline: &super::RayTracingPipeline) { + unsafe { + self.device.raw.cmd_bind_pipeline( + self.active, + vk::PipelineBindPoint::RAY_TRACING_KHR, + pipeline.raw, + ) + }; + } + unsafe fn copy_acceleration_structure_to_acceleration_structure( &mut self, src: &super::AccelerationStructure, diff --git a/wgpu-hal/src/vulkan/conv.rs b/wgpu-hal/src/vulkan/conv.rs index 24c3a5ba408..e47c01d5ba8 100644 --- a/wgpu-hal/src/vulkan/conv.rs +++ b/wgpu-hal/src/vulkan/conv.rs @@ -567,6 +567,10 @@ pub fn map_buffer_usage(usage: wgt::BufferUses) -> vk::BufferUsageFlags { if usage.intersects(wgt::BufferUses::ACCELERATION_STRUCTURE_QUERY) { flags |= vk::BufferUsageFlags::TRANSFER_DST; } + if usage.intersects(wgt::BufferUses::RAY_TRACING_PIPELINE_SHADER_DATA) { + flags |= vk::BufferUsageFlags::SHADER_BINDING_TABLE_KHR + | vk::BufferUsageFlags::SHADER_DEVICE_ADDRESS; + } flags } @@ -767,6 +771,18 @@ pub fn map_shader_stage(stage: wgt::ShaderStages) -> vk::ShaderStageFlags { if stage.contains(wgt::ShaderStages::MESH) { flags |= vk::ShaderStageFlags::MESH_EXT; } + if stage.contains(wgt::ShaderStages::RAY_GENERATION) { + flags |= vk::ShaderStageFlags::RAYGEN_KHR; + } + if stage.contains(wgt::ShaderStages::MISS) { + flags |= vk::ShaderStageFlags::MISS_KHR; + } + if stage.contains(wgt::ShaderStages::ANY_HIT) { + flags |= vk::ShaderStageFlags::ANY_HIT_KHR; + } + if stage.contains(wgt::ShaderStages::CLOSEST_HIT) { + flags |= vk::ShaderStageFlags::CLOSEST_HIT_KHR; + } flags } @@ -1024,6 +1040,12 @@ pub fn map_acceleration_structure_usage_to_barrier( | vk::PipelineStageFlags::COMPUTE_SHADER; access |= vk::AccessFlags::ACCELERATION_STRUCTURE_READ_KHR; } + if usage.contains(crate::AccelerationStructureUses::SHADER_INPUT) + && features.contains(wgt::Features::EXPERIMENTAL_RAY_TRACING_PIPELINES) + { + stages |= vk::PipelineStageFlags::RAY_TRACING_SHADER_KHR; + access |= vk::AccessFlags::ACCELERATION_STRUCTURE_READ_KHR; + } if usage.contains(crate::AccelerationStructureUses::COPY_SRC) { stages |= vk::PipelineStageFlags::ACCELERATION_STRUCTURE_BUILD_KHR; access |= vk::AccessFlags::ACCELERATION_STRUCTURE_READ_KHR; diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index 6f7d8b3c8fa..151e97a4b50 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -2324,6 +2324,178 @@ impl crate::Device for super::Device { self.counters.compute_pipelines.sub(1); } + unsafe fn create_ray_tracing_pipeline( + &self, + desc: &crate::RayTracingPipelineDescriptor< + super::PipelineLayout, + super::ShaderModule, + super::PipelineCache, + >, + ) -> Result { + let mut stages = Vec::new(); + let mut groups = Vec::new(); + + let compiled_ray_gen = self.compile_stage( + &desc.ray_generation, + naga::ShaderStage::RayGeneration, + &desc.layout.binding_map, + )?; + + groups.push( + vk::RayTracingShaderGroupCreateInfoKHR::default() + .closest_hit_shader(vk::SHADER_UNUSED_KHR) + .any_hit_shader(vk::SHADER_UNUSED_KHR) + .intersection_shader(vk::SHADER_UNUSED_KHR) + .general_shader(stages.len() as _) + .ty(vk::RayTracingShaderGroupTypeKHR::GENERAL), + ); + + stages.push(compiled_ray_gen.create_info); + + let compiled_miss = self.compile_stage( + &desc.miss, + naga::ShaderStage::Miss, + &desc.layout.binding_map, + )?; + + groups.push( + vk::RayTracingShaderGroupCreateInfoKHR::default() + .closest_hit_shader(vk::SHADER_UNUSED_KHR) + .any_hit_shader(vk::SHADER_UNUSED_KHR) + .intersection_shader(vk::SHADER_UNUSED_KHR) + .general_shader(stages.len() as _) + .ty(vk::RayTracingShaderGroupTypeKHR::GENERAL), + ); + + stages.push(compiled_miss.create_info); + + // This is to keep alive the CStrings, as the ones in the loop would be deallocated + // causing UB otherwise. + let mut compiled_stages = Vec::new(); + + for group in desc.intersection { + let compiled_closest_hits = self.compile_stage( + &group.closest_hit, + naga::ShaderStage::ClosestHit, + &desc.layout.binding_map, + )?; + + let closest_idx = stages.len(); + + stages.push(compiled_closest_hits.create_info); + + compiled_stages.push(compiled_closest_hits); + + let mut raw_hit: vk::RayTracingShaderGroupCreateInfoKHR<'_> = + vk::RayTracingShaderGroupCreateInfoKHR::default() + .closest_hit_shader(closest_idx as _) + .any_hit_shader(vk::SHADER_UNUSED_KHR) + .intersection_shader(vk::SHADER_UNUSED_KHR) + .general_shader(vk::SHADER_UNUSED_KHR) + .ty(vk::RayTracingShaderGroupTypeKHR::TRIANGLES_HIT_GROUP); + + if let Some(any_hit) = &group.any_hit { + let compiled_any_hit = self.compile_stage( + any_hit, + naga::ShaderStage::AnyHit, + &desc.layout.binding_map, + )?; + + let any_idx = stages.len(); + + stages.push(compiled_any_hit.create_info); + + compiled_stages.push(compiled_any_hit); + + raw_hit = raw_hit.any_hit_shader(any_idx as _); + } + + groups.push(raw_hit); + } + + let create_infos = [{ + vk::RayTracingPipelineCreateInfoKHR::default() + .layout(desc.layout.raw) + .max_pipeline_ray_recursion_depth(desc.max_recursion_depth) + .stages(&stages) + .groups(&groups) + }]; + + let pipeline_cache = desc + .cache + .map(|it| it.raw) + .unwrap_or(vk::PipelineCache::null()); + + let fns = self + .shared + .extension_fns + .ray_tracing_pipelines + .as_ref() + .unwrap(); + let pipelines = unsafe { + fns.create_ray_tracing_pipelines( + vk::DeferredOperationKHR::null(), + pipeline_cache, + &create_infos, + None, + ) + .map_err(|(_, e)| super::map_pipeline_err(e)) + }?; + + if let Some(raw_module) = compiled_ray_gen.temp_raw_module { + unsafe { self.shared.raw.destroy_shader_module(raw_module, None) }; + } + + if let Some(raw_module) = compiled_miss.temp_raw_module { + unsafe { self.shared.raw.destroy_shader_module(raw_module, None) }; + } + + for raw_module in compiled_stages + .into_iter() + .flat_map(|stage| stage.temp_raw_module) + { + unsafe { self.shared.raw.destroy_shader_module(raw_module, None) }; + } + + self.counters.ray_tracing_pipelines.add(1); + + Ok(super::RayTracingPipeline { raw: pipelines[0] }) + } + + unsafe fn destroy_ray_tracing_pipeline(&self, pipeline: super::RayTracingPipeline) { + unsafe { self.shared.raw.destroy_pipeline(pipeline.raw, None) }; + + self.counters.ray_tracing_pipelines.sub(1); + } + + unsafe fn get_raytracing_pipeline_group_data( + &self, + pipeline: &super::RayTracingPipeline, + groups: core::ops::Range, + ) -> Result, crate::DeviceError> { + let fns = self + .shared + .extension_fns + .ray_tracing_pipelines + .as_ref() + .unwrap(); + + let num = groups.end - groups.start; + + unsafe { + fns.get_ray_tracing_shader_group_handles( + pipeline.raw, + groups.start, + num, + (num * self + .shared + .private_caps + .ray_tracing_pipeline_group_data_size) as usize, + ) + } + .map_err(super::map_host_device_oom_err) + } + unsafe fn create_pipeline_cache( &self, desc: &crate::PipelineCacheDescriptor<'_>, @@ -2847,7 +3019,7 @@ impl crate::Device for super::Device { transform: instance.transform, custom_data_and_mask: (instance.custom_data & MAX_U24) | (u32::from(instance.mask) << 24), - shader_binding_table_record_offset_and_flags: 0, + shader_binding_table_record_offset_and_flags: (instance.custom_data & MAX_U24), acceleration_structure_reference: instance.blas_address, }; bytemuck::bytes_of(&temp).to_vec() diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs index f90ad45d4b4..71ac1e8ac24 100644 --- a/wgpu-hal/src/vulkan/mod.rs +++ b/wgpu-hal/src/vulkan/mod.rs @@ -92,6 +92,7 @@ impl crate::Api for Api { type ShaderModule = ShaderModule; type RenderPipeline = RenderPipeline; type ComputePipeline = ComputePipeline; + type RayTracingPipeline = RayTracingPipeline; } crate::impl_dyn_resource!( @@ -111,6 +112,7 @@ crate::impl_dyn_resource!( QuerySet, Queue, RenderPipeline, + RayTracingPipeline, Sampler, ShaderModule, Surface, @@ -298,6 +300,7 @@ struct DeviceExtensionFunctions { draw_indirect_count: Option, timeline_semaphore: Option>, ray_tracing: Option, + ray_tracing_pipelines: Option, mesh_shading: Option, #[cfg_attr(not(unix), allow(dead_code))] external_memory_fd: Option, @@ -396,6 +399,11 @@ struct PrivateCapabilities { /// these usages do not have as high of an alignment requirement using the buffer as /// a scratch buffer when building acceleration structures. scratch_buffer_alignment: u32, + + /// `get_raytracing_pipeline_group_data` requires both a group count and a data size. + /// The data size parameter is just this * the group count, so we store this to not + /// require an unnecessary parameter. + ray_tracing_pipeline_group_data_size: u32, } bitflags::bitflags!( @@ -1077,6 +1085,13 @@ pub struct ComputePipeline { impl crate::DynComputePipeline for ComputePipeline {} +#[derive(Debug)] +pub struct RayTracingPipeline { + raw: vk::Pipeline, +} + +impl crate::DynRayTracingPipeline for RayTracingPipeline {} + #[derive(Debug)] pub struct PipelineCache { raw: vk::PipelineCache, diff --git a/wgpu-info/src/human.rs b/wgpu-info/src/human.rs index 1e642c49ce4..00de404f576 100644 --- a/wgpu-info/src/human.rs +++ b/wgpu-info/src/human.rs @@ -203,6 +203,10 @@ fn print_adapter(output: &mut impl io::Write, report: &AdapterReport, idx: usize max_acceleration_structures_per_shader_stage, max_multiview_view_count, + + max_intersection_group_count, + max_ray_dispatch_count, + max_ray_recursion_depth, } = limits; writeln!(output, "\t\t Max Texture Dimension 1d: {max_texture_dimension_1d}")?; writeln!(output, "\t\t Max Texture Dimension 2d: {max_texture_dimension_2d}")?; @@ -261,6 +265,9 @@ fn print_adapter(output: &mut impl io::Write, report: &AdapterReport, idx: usize writeln!(output, "\t\t Max Acceleration Structures Per Shader Stage: {max_acceleration_structures_per_shader_stage}")?; writeln!(output, "\t\t Max Multiview View Count: {max_multiview_view_count}")?; + writeln!(output, "\t\t Max Intersection Group Count: {max_intersection_group_count}")?; + writeln!(output, "\t\t Max Ray Dispatch Count: {max_ray_dispatch_count}")?; + writeln!(output, "\t\t Max Ray Recursion Depth: {max_ray_recursion_depth}")?; // This one reflects more of a wgpu implementation limitations than a hardware limit // so don't show it here. let _ = max_non_sampler_bindings; diff --git a/wgpu-naga-bridge/src/lib.rs b/wgpu-naga-bridge/src/lib.rs index b04bf1e4a40..d2e265bca4c 100644 --- a/wgpu-naga-bridge/src/lib.rs +++ b/wgpu-naga-bridge/src/lib.rs @@ -179,6 +179,10 @@ pub fn features_to_naga_capabilities( Caps::MEMORY_DECORATION_VOLATILE, features.contains(wgt::Features::MEMORY_DECORATION_VOLATILE), ); + caps.set( + Caps::RAY_TRACING_PIPELINE, + features.intersects(wgt::Features::EXPERIMENTAL_RAY_TRACING_PIPELINES), + ); caps } diff --git a/wgpu-types/src/buffer.rs b/wgpu-types/src/buffer.rs index 46d4c044c0e..b1d856f720f 100644 --- a/wgpu-types/src/buffer.rs +++ b/wgpu-types/src/buffer.rs @@ -138,10 +138,12 @@ bitflags::bitflags! { const TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT = 1 << 13; /// A buffer used to store the compacted size of an acceleration structure const ACCELERATION_STRUCTURE_QUERY = 1 << 14; + /// Buffer used for storing opaque shader data + const RAY_TRACING_PIPELINE_SHADER_DATA = 1 << 15; /// The combination of states that a buffer may be in _at the same time_. const INCLUSIVE = Self::MAP_READ.bits() | Self::COPY_SRC.bits() | Self::INDEX.bits() | Self::VERTEX.bits() | Self::UNIFORM.bits() | - Self::STORAGE_READ_ONLY.bits() | Self::INDIRECT.bits() | Self::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT.bits() | Self::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT.bits(); + Self::STORAGE_READ_ONLY.bits() | Self::INDIRECT.bits() | Self::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT.bits() | Self::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT.bits() | Self::RAY_TRACING_PIPELINE_SHADER_DATA.bits(); /// The combination of states that a buffer must exclusively be in. const EXCLUSIVE = Self::MAP_WRITE.bits() | Self::COPY_DST.bits() | Self::STORAGE_READ_WRITE.bits() | Self::ACCELERATION_STRUCTURE_SCRATCH.bits(); } diff --git a/wgpu-types/src/counters.rs b/wgpu-types/src/counters.rs index 147a96c45ee..8302a8e22b4 100644 --- a/wgpu-types/src/counters.rs +++ b/wgpu-types/src/counters.rs @@ -114,6 +114,7 @@ pub struct HalCounters { pub bind_group_layouts: InternalCounter, pub render_pipelines: InternalCounter, pub compute_pipelines: InternalCounter, + pub ray_tracing_pipelines: InternalCounter, pub pipeline_layouts: InternalCounter, pub samplers: InternalCounter, pub command_encoders: InternalCounter, diff --git a/wgpu-types/src/features.rs b/wgpu-types/src/features.rs index 4048a139ed9..4959b7e680a 100644 --- a/wgpu-types/src/features.rs +++ b/wgpu-types/src/features.rs @@ -1474,6 +1474,10 @@ bitflags_array! { #[name("wgpu-memory-decoration-volatile")] const MEMORY_DECORATION_VOLATILE = 1 << 62; + /// Allows for constructing ray tracing pipelines. + #[name("wgpu-ray-tracing-pipelines")] + const EXPERIMENTAL_RAY_TRACING_PIPELINES = 1 << 24; + // Adding a new feature? All bits in the first u64 are used. Use the second u64 (bits 64+). } @@ -1846,7 +1850,8 @@ impl Features { | FeaturesWGPU::EXPERIMENTAL_MESH_SHADER_POINTS.bits() | FeaturesWGPU::EXPERIMENTAL_RAY_QUERY.bits() | FeaturesWGPU::EXPERIMENTAL_RAY_HIT_VERTEX_RETURN.bits() - | FeaturesWGPU::EXPERIMENTAL_COOPERATIVE_MATRIX.bits(), + | FeaturesWGPU::EXPERIMENTAL_COOPERATIVE_MATRIX.bits() + | FeaturesWGPU::EXPERIMENTAL_RAY_TRACING_PIPELINES.bits(), FeaturesWebGPU::empty().bits(), ])) } @@ -1855,7 +1860,8 @@ impl Features { #[must_use] pub fn allowed_vertex_formats_for_blas(&self) -> Vec { let mut formats = Vec::new(); - if self.intersects(Self::EXPERIMENTAL_RAY_QUERY) { + if self.intersects(Self::EXPERIMENTAL_RAY_QUERY | Self::EXPERIMENTAL_RAY_TRACING_PIPELINES) + { formats.push(VertexFormat::Float32x3); } if self.contains(Self::EXTENDED_ACCELERATION_STRUCTURE_VERTEX_FORMATS) { diff --git a/wgpu-types/src/limits.rs b/wgpu-types/src/limits.rs index 7a994571f24..ea6641f9834 100644 --- a/wgpu-types/src/limits.rs +++ b/wgpu-types/src/limits.rs @@ -89,6 +89,10 @@ macro_rules! with_limits { $macro_name!(max_acceleration_structures_per_shader_stage, Ordering::Less); $macro_name!(max_multiview_view_count, Ordering::Less); + + $macro_name!(max_intersection_group_count, Ordering::Less); + $macro_name!(max_ray_dispatch_count, Ordering::Less); + $macro_name!(max_ray_recursion_depth, Ordering::Less); }; } @@ -316,6 +320,22 @@ pub struct Limits { /// The maximum number of views that can be used in multiview rendering pub max_multiview_view_count: u32, + + /// The maximum number of intersection groups in a ray tracing pipeline. + /// Currently only affects wgpu-hal + pub max_intersection_group_count: u32, + /// The maximum total number (`x*y*z`) of rays able to be dispatched by a trace rays call in a ray + /// tracing pass. Requesting more than 0 during device creation only makes sense if [`Features::EXPERIMENTAL_RAY_TRACING_PIPELINES`] + /// is enabled. + /// + /// Currently only affects wgpu-hal + pub max_ray_dispatch_count: u32, + /// The maximum number that one can pass into a ray tracing pipeline creation to be the maximum ray + /// recursion depth. (the maximum of the max ray recursion depth) Requesting more than 0 during device + /// creation only makes sense if [`Features::EXPERIMENTAL_RAY_TRACING_PIPELINES`] is enabled. + /// + /// Currently only affects wgpu-hal + pub max_ray_recursion_depth: u32, } impl Default for Limits { @@ -386,6 +406,9 @@ impl Limits { /// max_tlas_instance_count: 0, /// max_acceleration_structures_per_shader_stage: 0, /// max_multiview_view_count: 0, + /// max_intersection_group_count: 0, + /// max_ray_dispatch_count: 0, + /// max_ray_recursion_depth: 0, /// }); /// ``` /// @@ -451,6 +474,10 @@ impl Limits { max_acceleration_structures_per_shader_stage: 0, max_multiview_view_count: 0, + + max_intersection_group_count: 0, + max_ray_dispatch_count: 0, + max_ray_recursion_depth: 0, } } @@ -517,6 +544,10 @@ impl Limits { /// max_acceleration_structures_per_shader_stage: 0, /// /// max_multiview_view_count: 0, + /// + /// max_intersection_group_count: 0, + /// max_ray_dispatch_count: 0, + /// max_ray_recursion_depth: 0, /// }); /// ``` #[must_use] @@ -599,6 +630,10 @@ impl Limits { /// max_acceleration_structures_per_shader_stage: 0, /// /// max_multiview_view_count: 0, + /// + /// max_intersection_group_count: 0, + /// max_ray_dispatch_count: 0, + /// max_ray_recursion_depth: 0, /// }); /// ``` #[must_use] @@ -697,6 +732,9 @@ impl Limits { max_acceleration_structures_per_shader_stage: ALLOC_MAX_U32, max_multiview_view_count: ALLOC_MAX_U32, + max_intersection_group_count: ALLOC_MAX_U32, + max_ray_dispatch_count: ALLOC_MAX_U32, + max_ray_recursion_depth: ALLOC_MAX_U32, } } @@ -754,6 +792,18 @@ impl Limits { } } + /// The minimum guaranteed limits for acceleration structures if you enable [`Features::EXPERIMENTAL_RAY_TRACING_PIPELINES`] + /// These may change in the future (including downwards). + #[must_use] + pub const fn using_minimum_supported_ray_tracing_pipeline_values(self) -> Self { + Self { + max_intersection_group_count: 524288, // Vulkan has an exact size of each intersection group being 32, (2 ^ 24 - intersection bytes) / 32 = 524288 + max_ray_dispatch_count: 1 << 30, + max_ray_recursion_depth: 1, + ..self + } + } + /// The recommended minimum limits for mesh shaders if you enable [`Features::EXPERIMENTAL_MESH_SHADER`] /// /// These are chosen somewhat arbitrarily. They are small enough that they should cover all physical devices, diff --git a/wgpu/src/backend/webgpu.rs b/wgpu/src/backend/webgpu.rs index 364158a09ac..00ec425ff98 100644 --- a/wgpu/src/backend/webgpu.rs +++ b/wgpu/src/backend/webgpu.rs @@ -877,6 +877,10 @@ fn map_wgt_limits(limits: webgpu_sys::GpuSupportedLimits) -> wgt::Limits { .max_acceleration_structures_per_shader_stage, max_multiview_view_count: wgt::Limits::default().max_multiview_view_count, + + max_intersection_group_count: wgt::Limits::default().max_intersection_group_count, + max_ray_dispatch_count: wgt::Limits::default().max_ray_dispatch_count, + max_ray_recursion_depth: wgt::Limits::default().max_ray_recursion_depth, } }