Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion wgpu-core/src/device/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ pub mod ray_tracing;
pub mod resource;
#[cfg(any(feature = "trace", feature = "replay"))]
pub mod trace;
pub(crate) use resource::{FenceReadGuard, FenceWriteGuard};
pub(crate) use resource::FenceReadGuard;
pub use {life::WaitIdleError, resource::Device};

pub const SHADER_STAGE_COUNT: usize = hal::MAX_CONCURRENT_SHADER_STAGES;
Expand Down
12 changes: 6 additions & 6 deletions wgpu-core/src/device/queue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use crate::{
CommandAllocator, CommandBuffer, CommandEncoder, CommandEncoderError, CopySide,
TransferError,
},
device::{DeviceError, FenceReadGuard, FenceWriteGuard, WaitIdleError},
device::{DeviceError, FenceReadGuard, WaitIdleError},
get_lowest_common_denom,
global::Global,
hal_label,
Expand Down Expand Up @@ -566,7 +566,7 @@ impl WebGpuError for QueueSubmitError {
pub(crate) struct PendingSubmission<'a> {
queue: &'a Queue,
snatch_guard: SnatchGuard<'a>,
fence: FenceWriteGuard<'a>,
fence: FenceReadGuard<'a>,
command_index_guard: RwLockWriteGuard<'a, CommandIndices>,
// Command buffers to be executed, along with trackers for the resources they use.
pub executions: Vec<EncoderInFlight>,
Expand Down Expand Up @@ -1557,7 +1557,7 @@ impl Queue {
) -> (PendingSubmission<'a>, SubmissionIndex) {
// Lock ordering requires that the fence lock be acquired after the snatch lock and
// before the command index lock.
let fence = self.device.fence.write();
let fence = self.device.fence.read();
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't particularly like that things are set up this way (or at least, I think it should be documented more clearly what is going on), but I believe that the fence RwLock is being used not just to protect &mut self methods on the fence, but also to ensure mutual exclusion between submit and other things that don't want a submit to happen concurrently. Which will no longer be the case if submit only acquires a read lock.

I also don't particularly like that there are separate locks for the fence and command indices (I feel like the Fence could also have responsibility for giving out command indices), nor do I like that the protection against concurrent submits (see https://github.com/gfx-rs/wgpu/pull/9307/changes#diff-150156a37cf3627465ceb22096ed995ee26ae640007c421e726134bafd499dbeR1679-R1683) is more pessimistic than necessary (the validate_command_buffers processing probably could be done concurrently). But looking for solutions that don't bite off too much refactoring, one strategy might be to switch to using the command indices lock rather than the fence lock to provide mutual exclusion with concurrent submits (and document this, since it's non-obvious). If we do that, then I think we could get rid of the fence lock in wgpu_core entirely and rely on the locking in hal.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe that the fence RwLock is being used not just to protect &mut self methods on the fence, but also to ensure mutual exclusion between submit and other things that don't want a submit to happen concurrently. Which will no longer be the case if submit only acquires a read lock.

I had assumed (at least for submission) that command indices was held for that purpose. The only thing which appeared to use fences for exclusion was present, which should still exclude due to using a write.


let mut command_index_guard = self.device.command_indices.write();
command_index_guard.active_submission_index += 1;
Expand Down Expand Up @@ -1600,7 +1600,7 @@ impl Queue {
let PendingSubmission {
queue: _,
snatch_guard,
mut fence,
fence,
command_index_guard,
mut executions,
mut surface_textures,
Expand Down Expand Up @@ -1671,7 +1671,7 @@ impl Queue {
self.raw().submit(
&hal_command_buffers,
&submit_surface_textures,
(fence.as_mut(), submit_index),
(fence.as_ref(), submit_index),
)
}
.map_err(|e| self.device.handle_hal_error(e))?;
Expand All @@ -1692,7 +1692,7 @@ impl Queue {
self.lock_life().track_submission(submit_index, executions);

Ok(SubmissionResult {
fence: RwLockWriteGuard::downgrade(fence),
fence,
snatch_guard,
})
}
Expand Down
3 changes: 1 addition & 2 deletions wgpu-core/src/device/resource.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ use crate::{
TextureInitTrackerAction,
},
instance::{Adapter, RequestDeviceError},
lock::{rank, Mutex, RwLock, RwLockReadGuard, RwLockWriteGuard},
lock::{rank, Mutex, RwLock, RwLockReadGuard},
pipeline::{self, ColorStateError},
pool::ResourcePool,
present,
Expand Down Expand Up @@ -289,7 +289,6 @@ pub struct Device {
}

pub(crate) type FenceReadGuard<'a> = RwLockReadGuard<'a, ManuallyDrop<Box<dyn hal::DynFence>>>;
pub(crate) type FenceWriteGuard<'a> = RwLockWriteGuard<'a, ManuallyDrop<Box<dyn hal::DynFence>>>;

pub(crate) enum DeferredDestroy {
TextureViews(WeakVec<TextureView>),
Expand Down
19 changes: 5 additions & 14 deletions wgpu-core/src/lock/observing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,15 +185,6 @@ impl<'a, T> RwLockReadGuard<'a, T> {
}
}

impl<'a, T> RwLockWriteGuard<'a, T> {
pub fn downgrade(this: Self) -> RwLockReadGuard<'a, T> {
RwLockReadGuard {
inner: parking_lot::RwLockWriteGuard::downgrade(this.inner),
_state: this._state,
}
}
}

impl<T: core::fmt::Debug> core::fmt::Debug for RwLock<T> {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
self.inner.fmt(f)
Expand Down Expand Up @@ -226,11 +217,11 @@ impl<'a, T> core::ops::DerefMut for RwLockWriteGuard<'a, T> {
///
/// This type serves two purposes:
///
/// - Operations like `RwLockWriteGuard::downgrade` would like to be able to
/// destructure lock guards and reassemble their pieces into new guards, but
/// if the guard type itself implements `Drop`, we can't destructure it
/// without unsafe code or pointless `Option`s whose state is almost always
/// statically known.
/// - Operations would like to be able to destructure lock guards and
/// reassemble their pieces into new guards, but if the guard type
/// itself implements `Drop`, we can't destructure it without unsafe
/// code or pointless `Option`s whose state is almost always statically
/// known.
///
/// - We can just implement `Drop` for this type once, and then use it in lock
/// guards, rather than implementing `Drop` separately for each guard type.
Expand Down
19 changes: 5 additions & 14 deletions wgpu-core/src/lock/ranked.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,11 +110,11 @@ impl LockState {
///
/// This type serves two purposes:
///
/// - Operations like `RwLockWriteGuard::downgrade` would like to be able to
/// destructure lock guards and reassemble their pieces into new guards, but
/// if the guard type itself implements `Drop`, we can't destructure it
/// without unsafe code or pointless `Option`s whose state is almost always
/// statically known.
/// - Operations would like to be able to destructure lock guards and
/// reassemble their pieces into new guards, but if the guard type
/// itself implements `Drop`, we can't destructure it without unsafe
/// code or pointless `Option`s whose state is almost always statically
/// known.
///
/// - We can just implement `Drop` for this type once, and then use it in lock
/// guards, rather than implementing `Drop` separately for each guard type.
Expand Down Expand Up @@ -299,15 +299,6 @@ impl<'a, T> RwLockReadGuard<'a, T> {
}
}

impl<'a, T> RwLockWriteGuard<'a, T> {
pub fn downgrade(this: Self) -> RwLockReadGuard<'a, T> {
RwLockReadGuard {
inner: parking_lot::RwLockWriteGuard::downgrade(this.inner),
saved: this.saved,
}
}
}

impl<T: fmt::Debug> fmt::Debug for RwLock<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.inner.fmt(f)
Expand Down
6 changes: 0 additions & 6 deletions wgpu-core/src/lock/vanilla.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,12 +116,6 @@ impl<'a, T> RwLockReadGuard<'a, T> {
}
}

impl<'a, T> RwLockWriteGuard<'a, T> {
pub fn downgrade(this: Self) -> RwLockReadGuard<'a, T> {
RwLockReadGuard(parking_lot::RwLockWriteGuard::downgrade(this.0))
}
}

impl<T: fmt::Debug> fmt::Debug for RwLock<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.0.fmt(f)
Expand Down
2 changes: 1 addition & 1 deletion wgpu-hal/src/dx12/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1670,7 +1670,7 @@ impl crate::Queue for Queue {
&self,
command_buffers: &[&CommandBuffer],
_surface_textures: &[&Texture],
(signal_fence, signal_value): (&mut Fence, crate::FenceValue),
(signal_fence, signal_value): (&Fence, crate::FenceValue),
) -> Result<(), crate::DeviceError> {
let mut temp_lists = self.temp_lists.lock();
temp_lists.clear();
Expand Down
10 changes: 0 additions & 10 deletions wgpu-hal/src/dynamic/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,6 @@ trait DynResourceExt {
///
/// - Panics if `self` is not downcastable to `T`.
fn expect_downcast_ref<T: DynResource>(&self) -> &T;
/// # Panics
///
/// - Panics if `self` is not downcastable to `T`.
fn expect_downcast_mut<T: DynResource>(&mut self) -> &mut T;

/// Unboxes a `Box<dyn DynResource>` to a concrete type.
///
Expand All @@ -77,12 +73,6 @@ impl<R: DynResource + ?Sized> DynResourceExt for R {
.expect("Resource doesn't have the expected backend type.")
}

fn expect_downcast_mut<'a, T: DynResource>(&'a mut self) -> &'a mut T {
self.as_any_mut()
.downcast_mut()
.expect("Resource doesn't have the expected backend type.")
}

unsafe fn unbox<T: DynResource + 'static>(self: Box<Self>) -> T {
debug_assert!(
<Self as Any>::type_id(self.as_ref()) == TypeId::of::<T>(),
Expand Down
6 changes: 3 additions & 3 deletions wgpu-hal/src/dynamic/queue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ pub trait DynQueue: DynResource {
&self,
command_buffers: &[&dyn DynCommandBuffer],
surface_textures: &[&dyn DynSurfaceTexture],
signal_fence: (&mut dyn DynFence, FenceValue),
signal_fence: (&dyn DynFence, FenceValue),
) -> Result<(), DeviceError>;
unsafe fn present(
&self,
Expand All @@ -28,7 +28,7 @@ impl<Q: Queue + DynResource> DynQueue for Q {
&self,
command_buffers: &[&dyn DynCommandBuffer],
surface_textures: &[&dyn DynSurfaceTexture],
signal_fence: (&mut dyn DynFence, FenceValue),
signal_fence: (&dyn DynFence, FenceValue),
) -> Result<(), DeviceError> {
let command_buffers = command_buffers
.iter()
Expand All @@ -38,7 +38,7 @@ impl<Q: Queue + DynResource> DynQueue for Q {
.iter()
.map(|surface| (*surface).expect_downcast_ref())
.collect::<Vec<_>>();
let signal_fence = (signal_fence.0.expect_downcast_mut(), signal_fence.1);
let signal_fence = (signal_fence.0.expect_downcast_ref(), signal_fence.1);
unsafe { Q::submit(self, &command_buffers, &surface_textures, signal_fence) }
}

Expand Down
71 changes: 52 additions & 19 deletions wgpu-hal/src/gles/fence.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,29 @@
use alloc::vec::Vec;
use alloc::{sync::Arc, vec::Vec};
use core::sync::atomic::Ordering;
use parking_lot::RwLock;

use glow::HasContext;

use crate::AtomicFenceValue;

#[derive(Debug, Copy, Clone)]
#[derive(Debug)]
struct GLFence {
sync: glow::Fence,
// Since a fence can be `Copy`ed, there can exist some
// cases where (without proper synchronisation),
// a fence could be destroyed while something else is
// still using it. Therefore, while a function is
// using this fence, it should read this. (write
// should be done when destroying it)
//
// The arc should not be kept after a function has finished
sync: Arc<RwLock<glow::Fence>>,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of layering additional locks here, I think Fence::pending could be RwLock<Vec<Arc<GLFence>>> (or maybe just a Mutex). Then, wait can do something like:

        // Find a matching fence
        let gl_fence = self
            .pending
            .read() // or `.lock()`, if a mutex
            .iter()
            // Greater or equal as an abundance of caution, but there should be one fence per value
            .find(|gl_fence| gl_fence.value >= wait_value)
            .map(Arc::clone);

i.e., drop the lock before actually waiting. For Metal, Retained is already like an Arc, so similar can be done with Retained::clone.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried that, but I was worried about the possibility of destroying a fence while it was being waited on. The Arc would be unnecessary if there wasn't the RwLock as fences are Copy.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For Metal, Retained is already like an Arc, so similar can be done with Retained::clone.

I believe metal already does this (in this PR), but on metal the resource is destroyed when the ref-count goes to zero, instead of having a destroy method

value: crate::FenceValue,
}

#[derive(Debug)]
pub struct Fence {
last_completed: AtomicFenceValue,
pending: Vec<GLFence>,
pending: RwLock<Vec<GLFence>>,
fence_behavior: wgt::GlFenceBehavior,
}

Expand All @@ -29,24 +38,27 @@ impl Fence {
pub fn new(options: &wgt::GlBackendOptions) -> Self {
Self {
last_completed: AtomicFenceValue::new(0),
pending: Vec::new(),
pending: RwLock::new(Vec::new()),
fence_behavior: options.fence_behavior,
}
}

pub fn signal(
&mut self,
&self,
gl: &glow::Context,
value: crate::FenceValue,
) -> Result<(), crate::DeviceError> {
if self.fence_behavior.is_auto_finish() {
*self.last_completed.get_mut() = value;
self.last_completed.store(value, Ordering::Release);
return Ok(());
}

let sync = unsafe { gl.fence_sync(glow::SYNC_GPU_COMMANDS_COMPLETE, 0) }
.map_err(|_| crate::DeviceError::OutOfMemory)?;
self.pending.push(GLFence { sync, value });
self.pending.write().push(GLFence {
sync: Arc::new(RwLock::new(sync)),
value,
});

Ok(())
}
Expand All @@ -62,12 +74,15 @@ impl Fence {
return max_value;
}

for gl_fence in self.pending.iter() {
let pending = self.pending.read();

for gl_fence in pending.iter() {
let fence = gl_fence.sync.read();
if gl_fence.value <= max_value {
// We already know this was good, no need to check again
continue;
}
let status = unsafe { gl.get_sync_status(gl_fence.sync) };
let status = unsafe { gl.get_sync_status(*fence) };
if status == glow::SIGNALED {
max_value = gl_fence.value;
} else {
Expand All @@ -82,20 +97,27 @@ impl Fence {
max_value
}

pub fn maintain(&mut self, gl: &glow::Context) {
pub fn maintain(&self, gl: &glow::Context) {
if self.fence_behavior.is_auto_finish() {
return;
}

let latest = self.get_latest(gl);
for &gl_fence in self.pending.iter() {
let mut pending = self.pending.write();
for gl_fence in pending.iter() {
// We don't need to keep around this lock until after the retain - we need to make
// sure nothing is using it by writing to it, but any new references must come
// from `self.pending`, which is write-locked, so nothing else can take a
// copy of this value
let sync = *gl_fence.sync.write();

if gl_fence.value <= latest {
unsafe {
gl.delete_sync(gl_fence.sync);
gl.delete_sync(sync);
}
}
}
self.pending.retain(|&gl_fence| gl_fence.value > latest);
pending.retain(|gl_fence| gl_fence.value > latest);
}

pub fn wait(
Expand All @@ -115,9 +137,10 @@ impl Fence {
return Ok(true);
}

let pending = self.pending.read();

// Find a matching fence
let gl_fence = self
.pending
let gl_fence = pending
.iter()
// Greater or equal as an abundance of caution, but there should be one fence per value
.find(|gl_fence| gl_fence.value >= wait_value);
Expand All @@ -130,14 +153,20 @@ impl Fence {
// We should have found a fence with the exact value.
debug_assert_eq!(gl_fence.value, wait_value);

let sync = gl_fence.sync.clone();

drop(pending);

let status = unsafe {
gl.client_wait_sync(
gl_fence.sync,
*sync.read(),
glow::SYNC_FLUSH_COMMANDS_BIT,
timeout_ns.min(i32::MAX as u32) as i32,
)
};

drop(sync);

let signalled = match status {
glow::ALREADY_SIGNALED | glow::CONDITION_SATISFIED => true,
glow::TIMEOUT_EXPIRED | glow::WAIT_FAILED => false,
Expand All @@ -159,9 +188,13 @@ impl Fence {
return;
}

for gl_fence in self.pending {
for gl_fence in self.pending.into_inner() {
unsafe {
gl.delete_sync(gl_fence.sync);
gl.delete_sync(
Arc::into_inner(gl_fence.sync)
.expect("A function has failed to drop all its references to this")
.into_inner(),
);
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion wgpu-hal/src/gles/queue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1901,7 +1901,7 @@ impl crate::Queue for super::Queue {
&self,
command_buffers: &[&super::CommandBuffer],
_surface_textures: &[&super::Texture],
(signal_fence, signal_value): (&mut super::Fence, crate::FenceValue),
(signal_fence, signal_value): (&super::Fence, crate::FenceValue),
) -> Result<(), crate::DeviceError> {
let shared = Arc::clone(&self.shared);
let gl = &shared.context.lock();
Expand Down
Loading