Skip to content

Commit 14a7978

Browse files
Benoît Rouleaubenface
authored andcommitted
metal: collapse consecutive same-target passes into one encoder
Higher-level engines (e.g. macroquad's draw-call loop) issue one `begin_pass` / `end_render_pass` pair per draw call so they can stream batched draws to whatever the active camera's render target is. Each pair was becoming its own `MTLRenderCommandEncoder`, which on tile- based GPUs forces a full color-attachment store + (for MSAA) resolve on every draw — heavy memory bandwidth that visibly tanks real-device frame rate at retina resolution with `Conf.sample_count > 1`. OpenGL absorbs this pattern transparently because re-binding an FBO doesn't trigger a resolve; Metal's per-encoder load/store actions make the cost explicit. Apple's own guidance is to minimize encoder count for exactly this reason. Defer `endEncoding` until the next `begin_pass` actually requires a new encoder (different target, `Clear` action, or `commit_frame`). If the next `begin_pass` continues the same target with `PassAction::Nothing`, keep encoding into the existing encoder so the whole sequence collapses into one Metal pass with a single store / resolve at the real end. Each existing draw call's `apply_pipeline` / `apply_bindings` / `apply_uniforms` already re-sends its full state, so reusing the encoder is correctness-safe. Tested on iPhone running iOS 27 with macroquad's per-draw-call begin/ end pattern at 4x MSAA — drops the per-frame encoder count from the draw-call count down to one per render target.
1 parent 1140738 commit 14a7978

1 file changed

Lines changed: 48 additions & 4 deletions

File tree

src/graphics/metal.rs

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,10 @@ pub struct MetalContext {
282282
/// for `apply_scissor_rect`'s Y-flip + clamp.
283283
current_pass_width: u32,
284284
current_pass_height: u32,
285+
// Bookkeeping for the deferred-end pass-merge in `begin_pass`
286+
// / `end_render_pass`.
287+
current_pass_target: Option<Option<RenderPass>>,
288+
pending_end_encoder: bool,
285289
view: ObjcId,
286290
device: ObjcId,
287291
current_frame_index: usize,
@@ -375,6 +379,8 @@ impl MetalContext {
375379
render_encoder: None,
376380
current_pass_width: 0,
377381
current_pass_height: 0,
382+
current_pass_target: None,
383+
pending_end_encoder: false,
378384
view,
379385
device,
380386
buffers: vec![],
@@ -390,6 +396,17 @@ impl MetalContext {
390396
}
391397
}
392398
}
399+
400+
/// Actually call `endEncoding` and clear deferred-end state.
401+
/// `end_render_pass` defers this so consecutive same-target
402+
/// begin/end pairs can collapse into one Metal pass.
403+
fn really_end_encoder(&mut self) {
404+
if let Some(render_encoder) = self.render_encoder.take() {
405+
unsafe { msg_send_!(render_encoder, endEncoding) };
406+
}
407+
self.current_pass_target = None;
408+
self.pending_end_encoder = false;
409+
}
393410
}
394411

395412
impl RenderingBackend for MetalContext {
@@ -525,6 +542,10 @@ impl RenderingBackend for MetalContext {
525542
unimplemented!()
526543
}
527544
fn texture_generate_mipmaps(&mut self, texture: TextureId) {
545+
// Close any deferred render encoder before opening a blit
546+
// encoder on the same command buffer — Metal asserts
547+
// "encoding in progress" otherwise.
548+
self.really_end_encoder();
528549
unsafe {
529550
if self.command_buffer.is_none() {
530551
self.command_buffer = Some(msg_send![self.command_queue, commandBuffer]);
@@ -1205,6 +1226,22 @@ impl RenderingBackend for MetalContext {
12051226

12061227
fn begin_pass(&mut self, pass: Option<RenderPass>, action: PassAction) {
12071228
unsafe {
1229+
// Reuse the deferred encoder if this begin continues the
1230+
// same target with a load-style action — collapses the
1231+
// per-draw-call begin/end pattern (e.g. macroquad's) into
1232+
// one Metal pass, avoiding a store + (for MSAA) resolve
1233+
// per draw on tile-based GPUs.
1234+
if self.pending_end_encoder
1235+
&& self.current_pass_target == Some(pass)
1236+
&& matches!(action, PassAction::Nothing)
1237+
{
1238+
self.pending_end_encoder = false;
1239+
return;
1240+
}
1241+
if self.pending_end_encoder {
1242+
self.really_end_encoder();
1243+
}
1244+
12081245
if self.command_buffer.is_none() {
12091246
self.command_buffer = Some(msg_send![self.command_queue, commandBuffer]);
12101247
}
@@ -1288,6 +1325,8 @@ impl RenderingBackend for MetalContext {
12881325
// });
12891326

12901327
self.render_encoder = Some(render_encoder);
1328+
self.current_pass_target = Some(pass);
1329+
self.pending_end_encoder = false;
12911330
}
12921331
}
12931332

@@ -1297,10 +1336,10 @@ impl RenderingBackend for MetalContext {
12971336
"end_render_pass unpaired with begin_render_pass!"
12981337
);
12991338

1300-
let render_encoder = self.render_encoder.unwrap();
1301-
unsafe { msg_send_!(render_encoder, endEncoding) };
1302-
1303-
self.render_encoder = None;
1339+
// Defer `endEncoding`; see `begin_pass` for the reuse rule.
1340+
// Anything that breaks the merge invariant runs
1341+
// `really_end_encoder` first.
1342+
self.pending_end_encoder = true;
13041343
self.index_buffer = None;
13051344
}
13061345

@@ -1332,6 +1371,11 @@ impl RenderingBackend for MetalContext {
13321371
}
13331372

13341373
fn commit_frame(&mut self) {
1374+
// Flush the deferred `endEncoding` from the last
1375+
// `end_render_pass`.
1376+
if self.pending_end_encoder {
1377+
self.really_end_encoder();
1378+
}
13351379
unsafe {
13361380
assert!(!self.command_queue.is_null());
13371381
let drawable: ObjcId = msg_send!(self.view, currentDrawable);

0 commit comments

Comments
 (0)