Skip to content

Commit c7b615e

Browse files
authored
Allow deeper blend stacks by spilling to a buffer (#657)
This brings in support for blend spilling (which was supported in the old piet-gpu). I don't have a good heuristic for how big to make the buffer. That is something which will need to be addressed in #606 (or its successors). I just guessed that 256 spills would be fine. I think this is probably too small - I suspect we'll get feedback from @TrueDoctor about this. I have confirmed that the robustness works as expected with the GPU shaders.
1 parent 0808fa0 commit c7b615e

File tree

12 files changed

+123
-12
lines changed

12 files changed

+123
-12
lines changed

Diff for: CHANGELOG.md

+3
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ This release has an [MSRV][] of 1.75.
1717

1818
### Added
1919

20+
- Support blends more than four layers deep ([#657][] by [@DJMcNab][])
21+
2022
### Changed
2123

2224
- Breaking: Updated `wgpu` to 22.1.0. ([#635] by [@waywardmonkeys])
@@ -119,6 +121,7 @@ This release has an [MSRV][] of 1.75.
119121
[#630]: https://github.com/linebender/vello/pull/630
120122
[#631]: https://github.com/linebender/vello/pull/631
121123
[#635]: https://github.com/linebender/vello/pull/635
124+
[#657]: https://github.com/linebender/vello/pull/657
122125

123126
<!-- Note that this still comparing against 0.2.0, because 0.2.1 is a cherry-picked patch -->
124127
[Unreleased]: https://github.com/linebender/vello/compare/v0.2.0...HEAD

Diff for: examples/scenes/src/test_scenes.rs

+37
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ export_scenes!(
6767
two_point_radial(two_point_radial),
6868
brush_transform(brush_transform: animated),
6969
blend_grid(blend_grid),
70+
deep_blend(deep_blend),
7071
conflation_artifacts(conflation_artifacts),
7172
labyrinth(labyrinth),
7273
robust_paths(robust_paths),
@@ -1057,6 +1058,42 @@ mod impls {
10571058
}
10581059
}
10591060

1061+
pub(super) fn deep_blend(scene: &mut Scene, params: &mut SceneParams) {
1062+
params.resolution = Some(Vec2::new(1000., 1000.));
1063+
let main_rect = Rect::from_origin_size((10., 10.), (900., 900.));
1064+
scene.fill(
1065+
Fill::EvenOdd,
1066+
Affine::IDENTITY,
1067+
Color::RED,
1068+
None,
1069+
&main_rect,
1070+
);
1071+
let options = [
1072+
(800., Color::AQUA),
1073+
(700., Color::RED),
1074+
(600., Color::ALICE_BLUE),
1075+
(500., Color::YELLOW),
1076+
(400., Color::GREEN),
1077+
(300., Color::BLUE),
1078+
(200., Color::ORANGE),
1079+
(100., Color::WHITE),
1080+
];
1081+
let mut depth = 0;
1082+
for (width, colour) in &options[..params.complexity.min(options.len() - 1)] {
1083+
scene.push_layer(
1084+
Mix::Normal,
1085+
0.9,
1086+
Affine::IDENTITY,
1087+
&Rect::from_origin_size((10., 10.), (*width, *width)),
1088+
);
1089+
scene.fill(Fill::EvenOdd, Affine::IDENTITY, colour, None, &main_rect);
1090+
depth += 1;
1091+
}
1092+
for _ in 0..depth {
1093+
scene.pop_layer();
1094+
}
1095+
}
1096+
10601097
// Support functions
10611098

10621099
pub(super) fn render_cardioid(scene: &mut Scene) {

Diff for: vello/src/render.rs

+8
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ struct FineResources {
4646
gradient_image: ResourceProxy,
4747
info_bin_data_buf: ResourceProxy,
4848
image_atlas: ResourceProxy,
49+
blend_spill_buf: ResourceProxy,
4950

5051
out_image: ImageProxy,
5152
}
@@ -450,6 +451,10 @@ impl Render {
450451
recording.free_resource(bin_header_buf);
451452
recording.free_resource(path_buf);
452453
let out_image = ImageProxy::new(params.width, params.height, ImageFormat::Rgba8);
454+
let blend_spill_buf = BufferProxy::new(
455+
buffer_sizes.blend_spill.size_in_bytes().into(),
456+
"blend_spill",
457+
);
453458
self.fine_wg_count = Some(wg_counts.fine);
454459
self.fine_resources = Some(FineResources {
455460
aa_config: params.antialiasing_method,
@@ -460,6 +465,7 @@ impl Render {
460465
ptcl_buf,
461466
gradient_image,
462467
info_bin_data_buf,
468+
blend_spill_buf: ResourceProxy::Buffer(blend_spill_buf),
463469
image_atlas: ResourceProxy::Image(image_atlas),
464470
out_image,
465471
});
@@ -510,6 +516,7 @@ impl Render {
510516
fine.segments_buf,
511517
fine.ptcl_buf,
512518
fine.info_bin_data_buf,
519+
fine.blend_spill_buf,
513520
ResourceProxy::Image(fine.out_image),
514521
fine.gradient_image,
515522
fine.image_atlas,
@@ -543,6 +550,7 @@ impl Render {
543550
fine.segments_buf,
544551
fine.ptcl_buf,
545552
fine.info_bin_data_buf,
553+
fine.blend_spill_buf,
546554
ResourceProxy::Image(fine.out_image),
547555
fine.gradient_image,
548556
fine.image_atlas,

Diff for: vello/src/shaders.rs

+1
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,7 @@ pub(crate) fn full_shaders(
211211
BindType::BufReadOnly,
212212
BindType::BufReadOnly,
213213
BindType::BufReadOnly,
214+
BindType::Buffer,
214215
BindType::Image(ImageFormat::Rgba8),
215216
BindType::ImageRead(ImageFormat::Rgba8),
216217
BindType::ImageRead(ImageFormat::Rgba8),

Diff for: vello_encoding/src/config.rs

+8
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,9 @@ pub struct ConfigUniform {
147147
pub seg_counts_size: u32,
148148
/// Size of segment buffer allocation (in [`PathSegment`]s).
149149
pub segments_size: u32,
150+
/// Size of blend spill buffer (in `u32` pixels).
151+
// TODO: Maybe store in TILE_WIDTH * TILE_HEIGHT blocks of pixels instead?
152+
pub blend_size: u32,
150153
/// Size of per-tile command list buffer allocation (in `u32`s).
151154
pub ptcl_size: u32,
152155
}
@@ -184,6 +187,7 @@ impl RenderConfig {
184187
tiles_size: buffer_sizes.tiles.len(),
185188
seg_counts_size: buffer_sizes.seg_counts.len(),
186189
segments_size: buffer_sizes.segments.len(),
190+
blend_size: buffer_sizes.blend_spill.len(),
187191
ptcl_size: buffer_sizes.ptcl.len(),
188192
layout: *layout,
189193
},
@@ -352,6 +356,7 @@ pub struct BufferSizes {
352356
pub tiles: BufferSize<Tile>,
353357
pub seg_counts: BufferSize<SegmentCount>,
354358
pub segments: BufferSize<PathSegment>,
359+
pub blend_spill: BufferSize<u32>,
355360
pub ptcl: BufferSize<u32>,
356361
}
357362

@@ -395,6 +400,8 @@ impl BufferSizes {
395400
let lines = BufferSize::new(1 << 21);
396401
let seg_counts = BufferSize::new(1 << 21);
397402
let segments = BufferSize::new(1 << 21);
403+
// 16 * 16 (1 << 8) is one blend spill, so this allows for 4096 spills.
404+
let blend_spill = BufferSize::new(1 << 20);
398405
let ptcl = BufferSize::new(1 << 23);
399406
Self {
400407
path_reduced,
@@ -419,6 +426,7 @@ impl BufferSizes {
419426
tiles,
420427
seg_counts,
421428
segments,
429+
blend_spill,
422430
ptcl,
423431
}
424432
}

Diff for: vello_shaders/shader/coarse.wgsl

+4-1
Original file line numberDiff line numberDiff line change
@@ -444,8 +444,11 @@ fn main(
444444
ptcl[cmd_offset] = CMD_END;
445445
var blend_ix = 0u;
446446
if max_blend_depth > BLEND_STACK_SPLIT {
447-
let scratch_size = max_blend_depth * TILE_WIDTH * TILE_HEIGHT;
447+
let scratch_size = (max_blend_depth - BLEND_STACK_SPLIT) * TILE_WIDTH * TILE_HEIGHT;
448448
blend_ix = atomicAdd(&bump.blend, scratch_size);
449+
if blend_ix + scratch_size > config.blend_size {
450+
atomicOr(&bump.failed, STAGE_COARSE);
451+
}
449452
}
450453
ptcl[blend_offset] = blend_ix;
451454
}

Diff for: vello_shaders/shader/fine.wgsl

+18-6
Original file line numberDiff line numberDiff line change
@@ -39,27 +39,30 @@ var<storage> ptcl: array<u32>;
3939
var<storage> info: array<u32>;
4040

4141
@group(0) @binding(4)
42+
var<storage, read_write> blend_spill: array<u32>;
43+
44+
@group(0) @binding(5)
4245
#ifdef r8
4346
var output: texture_storage_2d<r8unorm, write>;
4447
#else
4548
var output: texture_storage_2d<rgba8unorm, write>;
4649
#endif
4750

4851
#ifdef full
49-
@group(0) @binding(5)
52+
@group(0) @binding(6)
5053
var gradients: texture_2d<f32>;
5154

52-
@group(0) @binding(6)
55+
@group(0) @binding(7)
5356
var image_atlas: texture_2d<f32>;
5457
#endif
5558

5659
// MSAA-only bindings and utilities
5760
#ifdef msaa
5861

5962
#ifdef full
60-
const MASK_LUT_INDEX: u32 = 7;
63+
const MASK_LUT_INDEX: u32 = 8;
6164
#else
62-
const MASK_LUT_INDEX: u32 = 5;
65+
const MASK_LUT_INDEX: u32 = 6;
6366
#endif
6467

6568
#ifdef msaa8
@@ -947,7 +950,13 @@ fn main(
947950
rgba[i] = vec4(0.0);
948951
}
949952
} else {
950-
// TODO: spill to memory
953+
let blend_in_scratch = clip_depth - BLEND_STACK_SPLIT;
954+
let local_tile_ix = local_id.x * PIXELS_PER_THREAD + local_id.y * TILE_WIDTH;
955+
let local_blend_start = blend_offset + blend_in_scratch * TILE_WIDTH * TILE_HEIGHT + local_tile_ix;
956+
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
957+
blend_spill[local_blend_start + i] = pack4x8unorm(rgba[i]);
958+
rgba[i] = vec4(0.0);
959+
}
951960
}
952961
clip_depth += 1u;
953962
cmd_ix += 1u;
@@ -960,7 +969,10 @@ fn main(
960969
if clip_depth < BLEND_STACK_SPLIT {
961970
bg_rgba = blend_stack[clip_depth][i];
962971
} else {
963-
// load from memory
972+
let blend_in_scratch = clip_depth - BLEND_STACK_SPLIT;
973+
let local_tile_ix = local_id.x * PIXELS_PER_THREAD + local_id.y * TILE_WIDTH;
974+
let local_blend_start = blend_offset + blend_in_scratch * TILE_WIDTH * TILE_HEIGHT + local_tile_ix;
975+
bg_rgba = blend_spill[local_blend_start + i];
964976
}
965977
let bg = unpack4x8unorm(bg_rgba);
966978
let fg = rgba[i] * area[i] * end_clip.alpha;

Diff for: vello_shaders/shader/shared/config.wgsl

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// Copyright 2022 the Vello Authors
22
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
33

4-
// This must be kept in sync with the struct in src/encoding/resolve.rs
4+
// This must be kept in sync with `ConfigUniform` in `vello_encoding/src/config.rs`
55
struct Config {
66
width_in_tiles: u32,
77
height_in_tiles: u32,
@@ -38,6 +38,7 @@ struct Config {
3838
tiles_size: u32,
3939
seg_counts_size: u32,
4040
segments_size: u32,
41+
blend_size: u32,
4142
ptcl_size: u32,
4243
}
4344

@@ -54,6 +55,9 @@ let N_TILE = 256u;
5455
// Not currently supporting non-square tiles
5556
let TILE_SCALE = 0.0625;
5657

58+
// The "split" point between using local memory in fine for the blend stack and spilling to the blend_spill buffer.
59+
// A higher value will increase vgpr ("register") pressure in fine, but decrease required dynamic memory allocation.
60+
// If changing, also change in vello_shaders/src/cpu/coarse.rs.
5761
let BLEND_STACK_SPLIT = 4u;
5862

5963
// The following are computed in draw_leaf from the generic gradient parameters

Diff for: vello_shaders/src/cpu/coarse.rs

+19-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
// Copyright 2023 the Vello Authors
22
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
33

4+
use std::cmp::max;
5+
46
use vello_encoding::{
57
BinHeader, BumpAllocators, ConfigUniform, DrawMonoid, DrawTag, Path, Tile,
68
DRAW_INFO_FLAGS_FILL_RULE_BIT,
@@ -11,10 +13,18 @@ use super::{
1113
CMD_LIN_GRAD, CMD_RAD_GRAD, CMD_SOLID, CMD_SWEEP_GRAD, PTCL_INITIAL_ALLOC,
1214
};
1315

16+
// Tiles per bin
1417
const N_TILE_X: usize = 16;
1518
const N_TILE_Y: usize = 16;
1619
const N_TILE: usize = N_TILE_X * N_TILE_Y;
1720

21+
// If changing also change in config.wgsl
22+
const BLEND_STACK_SPLIT: u32 = 4;
23+
24+
// Pixels per tile
25+
const TILE_WIDTH: u32 = 16;
26+
const TILE_HEIGHT: u32 = 16;
27+
1828
const PTCL_INCREMENT: u32 = 256;
1929
const PTCL_HEADROOM: u32 = 2;
2030

@@ -219,6 +229,8 @@ fn coarse_main(
219229
let blend_offset = tile_state.cmd_offset;
220230
tile_state.cmd_offset += 1;
221231
let mut clip_depth = 0;
232+
let mut render_blend_depth = 0;
233+
let mut max_blend_depth = 0_u32;
222234
let mut clip_zero_depth = 0;
223235
for drawobj_ix in &compacted[tile_ix] {
224236
let drawtag = scene[(drawtag_base + drawobj_ix) as usize];
@@ -306,7 +318,10 @@ fn coarse_main(
306318
clip_zero_depth = clip_depth + 1;
307319
} else {
308320
tile_state.write_begin_clip(config, bump, ptcl);
309-
// TODO: update blend depth
321+
// TODO: Do we need to track this separately, seems like it
322+
// is always the same as clip_depth in this code path
323+
render_blend_depth += 1;
324+
max_blend_depth = max(render_blend_depth, max_blend_depth);
310325
}
311326
clip_depth += 1;
312327
}
@@ -317,6 +332,7 @@ fn coarse_main(
317332
let blend = scene[dd as usize];
318333
let alpha = f32::from_bits(scene[dd as usize + 1]);
319334
tile_state.write_end_clip(config, bump, ptcl, blend, alpha);
335+
render_blend_depth -= 1;
320336
}
321337
_ => todo!(),
322338
}
@@ -338,7 +354,8 @@ fn coarse_main(
338354

339355
if bin_tile_x + tile_x < width_in_tiles && bin_tile_y + tile_y < height_in_tiles {
340356
ptcl[tile_state.cmd_offset as usize] = CMD_END;
341-
let scratch_size = 0; // TODO: actually compute blend depth
357+
let scratch_size =
358+
(max_blend_depth.saturating_sub(BLEND_STACK_SPLIT)) * TILE_WIDTH * TILE_HEIGHT;
342359
ptcl[blend_offset as usize] = bump.blend;
343360
bump.blend += scratch_size;
344361
}

Diff for: vello_tests/snapshots/deep_blend.png

+3
Loading

Diff for: vello_tests/tests/compare_gpu_cpu.rs

+9-1
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,15 @@ fn compare_tricky_strokes() {
7777
#[cfg_attr(skip_gpu_tests, ignore)]
7878
fn compare_fill_types() {
7979
let test_scene = test_scenes::fill_types();
80-
assert_eq!(test_scene.config.name, "fill_types");
8180
let params = TestParams::new("compare_fill_types", 1400, 700);
8281
compare_test_scene(test_scene, params);
8382
}
83+
84+
#[test]
85+
#[cfg_attr(skip_gpu_tests, ignore)]
86+
fn compare_deep_blend() {
87+
let test_scene = test_scenes::deep_blend();
88+
assert_eq!(test_scene.config.name, "deep_blend");
89+
let params = TestParams::new("compare_deep_blend", 150, 150);
90+
compare_test_scene(test_scene, params);
91+
}

Diff for: vello_tests/tests/snapshots.rs

+8-1
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,14 @@ fn snapshot_tricky_strokes() {
7171
#[cfg_attr(skip_gpu_tests, ignore)]
7272
fn snapshot_fill_types() {
7373
let test_scene = test_scenes::fill_types();
74-
assert_eq!(test_scene.config.name, "fill_types");
7574
let params = TestParams::new("fill_types", 700, 350);
7675
snapshot_test_scene(test_scene, params);
7776
}
77+
78+
#[test]
79+
#[cfg_attr(skip_gpu_tests, ignore)]
80+
fn snapshot_deep_blend() {
81+
let test_scene = test_scenes::deep_blend();
82+
let params = TestParams::new("deep_blend", 200, 200);
83+
snapshot_test_scene(test_scene, params);
84+
}

0 commit comments

Comments
 (0)