Skip to content

Commit 0c242cb

Browse files
authored
CPU: feature flag u8 and f32 pipelines (#1294)
Adds feature flags to `vello_cpu`s `u8` and `f32` pipelines so that you can save on binary size if you're only planning on using one of them. Disabling the `f32` pipeline knocks ~60kb off the binary size for me ~250kb -> ~190kb. --------- Signed-off-by: Nico Burns <[email protected]>
1 parent add2fda commit 0c242cb

File tree

6 files changed

+76
-15
lines changed

6 files changed

+76
-15
lines changed

.github/workflows/ci.yml

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -146,13 +146,19 @@ jobs:
146146
save-if: ${{ github.event_name != 'merge_group' }}
147147

148148
- name: cargo clippy (no_std)
149-
run: cargo hack clippy ${{ env.RUST_NO_STD_PKGS }} --locked --optional-deps --each-feature --ignore-unknown-features --features libm --exclude-features ${{ env.FEATURES_DEPENDING_ON_STD }} --target x86_64-unknown-none -- -D warnings
149+
run: cargo hack clippy ${{ env.RUST_NO_STD_PKGS }} --locked --optional-deps --each-feature --ignore-unknown-features --features libm,u8_pipeline,f32_pipeline --exclude-features ${{ env.FEATURES_DEPENDING_ON_STD }} --target x86_64-unknown-none -- -D warnings
150150

151151
- name: cargo clippy
152-
run: cargo hack clippy --workspace --locked --optional-deps --each-feature --ignore-unknown-features --features std -- -D warnings
152+
run: cargo hack clippy --workspace --locked --optional-deps --each-feature --ignore-unknown-features --features std,u8_pipeline,f32_pipeline -- -D warnings
153+
154+
- name: cargo clippy (vello_cpu - u8 pipeline)
155+
run: cargo clippy -p vello_cpu --locked --no-default-features --features std,text,png,u8_pipeline
156+
157+
- name: cargo clippy (vello_cpu - f32 pipeline)
158+
run: cargo clippy -p vello_cpu --locked --no-default-features --features std,text,png,f32_pipeline
153159

154160
- name: cargo clippy (auxiliary)
155-
run: cargo hack clippy --workspace --locked --optional-deps --each-feature --ignore-unknown-features --features std --tests --benches --examples -- -D warnings
161+
run: cargo hack clippy --workspace --locked --optional-deps --each-feature --ignore-unknown-features --features std,u8_pipeline,f32_pipeline --tests --benches --examples -- -D warnings
156162

157163
clippy-stable-wasm:
158164
name: cargo clippy (wasm32)
@@ -178,13 +184,13 @@ jobs:
178184
save-if: ${{ github.event_name != 'merge_group' }}
179185

180186
- name: cargo clippy (no_std)
181-
run: cargo hack clippy ${{ env.RUST_NO_STD_PKGS }} --locked --target wasm32-unknown-unknown --optional-deps --each-feature --ignore-unknown-features --features libm --exclude-features ${{ env.FEATURES_DEPENDING_ON_STD }} -- -D warnings
187+
run: cargo hack clippy ${{ env.RUST_NO_STD_PKGS }} --locked --target wasm32-unknown-unknown --optional-deps --each-feature --ignore-unknown-features --features libm,u8_pipeline,f32_pipeline --exclude-features ${{ env.FEATURES_DEPENDING_ON_STD }} -- -D warnings
182188

183189
- name: cargo clippy
184-
run: cargo hack clippy --workspace ${{ env.NO_WASM_PKGS }} --locked --target wasm32-unknown-unknown --optional-deps --each-feature --ignore-unknown-features --features std -- -D warnings
190+
run: cargo hack clippy --workspace ${{ env.NO_WASM_PKGS }} --locked --target wasm32-unknown-unknown --optional-deps --each-feature --ignore-unknown-features --features std,u8_pipeline,f32_pipeline -- -D warnings
185191

186192
- name: cargo clippy (auxiliary)
187-
run: cargo hack clippy --workspace ${{ env.NO_WASM_PKGS }} --locked --target wasm32-unknown-unknown --optional-deps --each-feature --ignore-unknown-features --features std --tests --benches --examples -- -D warnings
193+
run: cargo hack clippy --workspace ${{ env.NO_WASM_PKGS }} --locked --target wasm32-unknown-unknown --optional-deps --each-feature --ignore-unknown-features --features std,u8_pipeline,f32_pipeline --tests --benches --examples -- -D warnings
188194

189195
prime-lfs-cache:
190196
name: Prime LFS Cache
@@ -422,10 +428,10 @@ jobs:
422428
save-if: ${{ github.event_name != 'merge_group' }}
423429

424430
- name: cargo check (no_std)
425-
run: cargo hack check ${{ env.RUST_NO_STD_PKGS }} --locked --optional-deps --each-feature --ignore-unknown-features --features libm --exclude-features ${{ env.FEATURES_DEPENDING_ON_STD }} --target x86_64-unknown-none
431+
run: cargo hack check ${{ env.RUST_NO_STD_PKGS }} --locked --optional-deps --each-feature --ignore-unknown-features --features libm,u8_pipeline,f32_pipeline --exclude-features ${{ env.FEATURES_DEPENDING_ON_STD }} --target x86_64-unknown-none
426432

427433
- name: cargo check
428-
run: cargo hack check ${{ env.RUST_MIN_VER_PKGS }} --locked --optional-deps --each-feature --ignore-unknown-features --features std
434+
run: cargo hack check ${{ env.RUST_MIN_VER_PKGS }} --locked --optional-deps --each-feature --ignore-unknown-features --features std,u8_pipeline,f32_pipeline
429435

430436
check-msrv-wasm:
431437
name: cargo check (msrv) (wasm32)
@@ -450,7 +456,7 @@ jobs:
450456
save-if: ${{ github.event_name != 'merge_group' }}
451457

452458
- name: cargo check
453-
run: cargo hack check ${{ env.RUST_MIN_VER_PKGS }} ${{ env.NO_WASM_PKGS }} --locked --target wasm32-unknown-unknown --optional-deps --each-feature --ignore-unknown-features --features std
459+
run: cargo hack check ${{ env.RUST_MIN_VER_PKGS }} ${{ env.NO_WASM_PKGS }} --locked --target wasm32-unknown-unknown --optional-deps --each-feature --ignore-unknown-features --features std,u8_pipeline,f32_pipeline
454460

455461
doc:
456462
name: cargo doc

sparse_strips/vello_cpu/Cargo.toml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ rayon = { workspace = true, optional = true }
2626
thread_local = { workspace = true, optional = true }
2727

2828
[features]
29-
default = ["std", "png", "text"]
29+
default = ["std", "png", "text", "u8_pipeline"]
3030
# Get floating point functions from the standard library (likely using your target’s libc).
3131
std = ["vello_common/std"]
3232
# Use floating point implementations from libm.
@@ -45,6 +45,11 @@ multithreading = [
4545
# Add support for text rendering
4646
text = ["vello_common/text"]
4747

48+
# Speed focussed rendering using u8 math
49+
u8_pipeline = []
50+
# Quality focussed rendering using f32 math
51+
f32_pipeline = []
52+
4853
[lints]
4954
workspace = true
5055

sparse_strips/vello_cpu/src/dispatch/multi_threaded.rs

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use crate::RenderMode;
55
use crate::dispatch::Dispatcher;
66
use crate::dispatch::multi_threaded::cost::{COST_THRESHOLD, estimate_render_task_cost};
77
use crate::dispatch::multi_threaded::worker::Worker;
8-
use crate::fine::{F32Kernel, Fine, FineKernel, U8Kernel};
8+
use crate::fine::{Fine, FineKernel};
99
use crate::kurbo::{Affine, BezPath, PathEl, Stroke};
1010
use crate::peniko::{BlendMode, Fill};
1111
use crate::region::Regions;
@@ -163,23 +163,27 @@ impl MultiThreadedDispatcher {
163163
dispatcher
164164
}
165165

166+
#[cfg(feature = "f32_pipeline")]
166167
fn rasterize_f32(
167168
&self,
168169
buffer: &mut [u8],
169170
width: u16,
170171
height: u16,
171172
encoded_paints: &[EncodedPaint],
172173
) {
174+
use crate::fine::F32Kernel;
173175
dispatch!(self.level, simd => self.rasterize_with::<_, F32Kernel>(simd, buffer, width, height, encoded_paints));
174176
}
175177

178+
#[cfg(feature = "u8_pipeline")]
176179
fn rasterize_u8(
177180
&self,
178181
buffer: &mut [u8],
179182
width: u16,
180183
height: u16,
181184
encoded_paints: &[EncodedPaint],
182185
) {
186+
use crate::fine::U8Kernel;
183187
dispatch!(self.level, simd => self.rasterize_with::<_, U8Kernel>(simd, buffer, width, height, encoded_paints));
184188
}
185189

@@ -569,6 +573,21 @@ impl Dispatcher for MultiThreadedDispatcher {
569573
) {
570574
assert!(self.flushed, "attempted to rasterize before flushing");
571575

576+
// Only u8 pipeline enabled
577+
#[cfg(all(feature = "u8_pipeline", not(feature = "f32_pipeline")))]
578+
{
579+
let _ = render_mode;
580+
self.rasterize_u8(buffer, width, height, encoded_paints);
581+
}
582+
// Only f32 pipeline enabled
583+
#[cfg(all(feature = "f32_pipeline", not(feature = "u8_pipeline")))]
584+
{
585+
let _ = render_mode;
586+
self.rasterize_f32(buffer, width, height, encoded_paints);
587+
}
588+
589+
// Both pipelines enabled
590+
#[cfg(all(feature = "f32_pipeline", feature = "u8_pipeline"))]
572591
match render_mode {
573592
RenderMode::OptimizeSpeed => self.rasterize_u8(buffer, width, height, encoded_paints),
574593
RenderMode::OptimizeQuality => {

sparse_strips/vello_cpu/src/dispatch/single_threaded.rs

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
use crate::RenderMode;
55
use crate::dispatch::Dispatcher;
6-
use crate::fine::{F32Kernel, Fine, FineKernel, U8Kernel};
6+
use crate::fine::{Fine, FineKernel};
77
use crate::kurbo::{Affine, BezPath, Stroke};
88
use crate::layer_manager::LayerManager;
99
use crate::peniko::{BlendMode, Fill};
@@ -12,7 +12,7 @@ use vello_common::clip::ClipContext;
1212
use vello_common::coarse::{Cmd, LayerKind, MODE_CPU, Wide, WideTilesBbox};
1313
use vello_common::color::palette::css::TRANSPARENT;
1414
use vello_common::encode::EncodedPaint;
15-
use vello_common::fearless_simd::{Level, Simd, dispatch};
15+
use vello_common::fearless_simd::{Level, Simd};
1616
use vello_common::filter_effects::Filter;
1717
use vello_common::mask::Mask;
1818
use vello_common::paint::{Paint, PremulColor};
@@ -87,27 +87,33 @@ impl SingleThreadedDispatcher {
8787
///
8888
/// This dispatches to the appropriate SIMD implementation based on the
8989
/// configured level, using f32 for intermediate calculations.
90+
#[cfg(feature = "f32_pipeline")]
9091
fn rasterize_f32(
9192
&self,
9293
buffer: &mut [u8],
9394
width: u16,
9495
height: u16,
9596
encoded_paints: &[EncodedPaint],
9697
) {
98+
use crate::fine::F32Kernel;
99+
use vello_common::fearless_simd::dispatch;
97100
dispatch!(self.level, simd => self.rasterize_with::<_, F32Kernel>(simd, buffer, width, height, encoded_paints));
98101
}
99102

100103
/// Rasterizes the scene using u8 precision (fast).
101104
///
102105
/// This dispatches to the appropriate SIMD implementation based on the
103106
/// configured level, using u8 for intermediate calculations to maximize speed.
107+
#[cfg(feature = "u8_pipeline")]
104108
fn rasterize_u8(
105109
&self,
106110
buffer: &mut [u8],
107111
width: u16,
108112
height: u16,
109113
encoded_paints: &[EncodedPaint],
110114
) {
115+
use crate::fine::U8Kernel;
116+
use vello_common::fearless_simd::dispatch;
111117
dispatch!(self.level, simd => self.rasterize_with::<_, U8Kernel>(simd, buffer, width, height, encoded_paints));
112118
}
113119

@@ -526,7 +532,22 @@ impl Dispatcher for SingleThreadedDispatcher {
526532
height: u16,
527533
encoded_paints: &[EncodedPaint],
528534
) {
529-
// Select precision based on render mode.
535+
// If only the u8 pipeline is enabled, then use it
536+
#[cfg(all(feature = "u8_pipeline", not(feature = "f32_pipeline")))]
537+
{
538+
let _ = render_mode;
539+
self.rasterize_u8(buffer, width, height, encoded_paints);
540+
}
541+
542+
// If only the f32 pipeline is enabled, then use it
543+
#[cfg(all(feature = "f32_pipeline", not(feature = "u8_pipeline")))]
544+
{
545+
let _ = render_mode;
546+
self.rasterize_f32(buffer, width, height, encoded_paints);
547+
}
548+
549+
// If both pipelines are enabled, select precision based on render mode parameter.
550+
#[cfg(all(feature = "u8_pipeline", feature = "f32_pipeline"))]
530551
match render_mode {
531552
RenderMode::OptimizeSpeed => {
532553
// Use u8 precision for faster rendering.
@@ -537,6 +558,13 @@ impl Dispatcher for SingleThreadedDispatcher {
537558
self.rasterize_f32(buffer, width, height, encoded_paints);
538559
}
539560
}
561+
562+
#[cfg(all(not(feature = "u8_pipeline"), not(feature = "f32_pipeline")))]
563+
{
564+
// This case never gets hit because there is a compile_error in the root.
565+
// But have this code disables some warnings and makes the compile error easier to read
566+
let _ = (buffer, render_mode, width, height, encoded_paints);
567+
}
540568
}
541569

542570
fn generate_wide_cmd(&mut self, strip_buf: &[Strip], paint: Paint, blend_mode: BlendMode) {

sparse_strips/vello_cpu/src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,9 @@ extern crate core;
120120
#[cfg(feature = "std")]
121121
extern crate std;
122122

123+
#[cfg(all(not(feature = "u8_pipeline"), not(feature = "f32_pipeline")))]
124+
compile_error!("vello_cpu must have at least one of the u8 or f32 pipelines enabled");
125+
123126
mod render;
124127

125128
mod dispatch;

sparse_strips/vello_sparse_tests/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ path = "tests/mod.rs"
1717
[dependencies]
1818
vello_api = { workspace = true }
1919
vello_common = { workspace = true, features = ["std"] }
20-
vello_cpu = { workspace = true, features = ["multithreading", "std"] }
20+
vello_cpu = { workspace = true, features = ["multithreading", "std", "f32_pipeline"] }
2121
vello_hybrid = { workspace = true }
2222
wgpu = { workspace = true, default-features = true }
2323
pollster = { workspace = true }

0 commit comments

Comments
 (0)