Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions crates/resonators/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,11 @@ harness = false

[dependencies]
num-complex = "0.4.6"

# WASM SIMD128 only. `wide::f32x4` is used by the per-sample bank inner
# loop when compiling for wasm32 with `target-feature=+simd128` enabled.
# On every other target the scalar path is used (LLVM auto-vectorises
# it to SSE/NEON just fine, and explicit SIMD there either matches or
# slightly regresses vs auto-vectorised scalar in our benchmarks).
[target.'cfg(all(target_arch = "wasm32", target_feature = "simd128"))'.dependencies]
wide = "0.8"
158 changes: 138 additions & 20 deletions crates/resonators/benches/bank.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use std::hint::black_box;
use std::time::Duration;

use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
use resonators::{ResonatorBank, ResonatorConfig, heuristic_alpha, midi_to_hz};
Expand All @@ -21,34 +22,151 @@ fn log_spaced_configs(n_bins: usize) -> Vec<ResonatorConfig> {
.collect()
}

const BIN_COUNTS: &[usize] = &[88, 264, 440, 880];

fn bench_bank(c: &mut Criterion) {
let n = SAMPLE_RATE as usize; // 1 second of a 440 Hz sine wave
let signal: Vec<f32> = (0..n)
.map(|i| (2.0 * std::f32::consts::PI * 440.0 * i as f32 / SAMPLE_RATE).sin())
.collect();

let mut group = c.benchmark_group("bank");
group.throughput(Throughput::Elements(n as u64));
group.sample_size(50);

for &n_bins in &[88, 264, 440, 880] {
let configs = log_spaced_configs(n_bins);
group.bench_with_input(
BenchmarkId::from_parameter(n_bins),
&configs,
|bencher, configs| {
let mut bank = ResonatorBank::new(configs, SAMPLE_RATE);
bencher.iter(|| {
bank.reset();
for &sample in &signal {
bank.process_sample(black_box(sample));
}
});
},
);
// Scalar path — forced via process_sample_scalar to bypass the
// x86_64 runtime-dispatch match, so the measurement reflects the
// scalar hot loop only. On non-x86 targets `process_sample` has no
// dispatch so we call it directly; on x86_64 LLVM auto-vectorises
// the scalar loop to whatever target-cpu supports (SSE2 baseline,
// AVX2/AVX-512 if enabled).
{
let mut group = c.benchmark_group("bank/scalar");
group.throughput(Throughput::Elements(n as u64));
group.sample_size(50);
// 10 s covers the largest bin count (880, ~17 ms/iter × 50
// samples ≈ 0.9 s + warmup) with headroom, so criterion won't
// warn about missed sample budget at any of the benched sizes.
group.measurement_time(Duration::from_secs(10));

for &n_bins in BIN_COUNTS {
let configs = log_spaced_configs(n_bins);
group.bench_with_input(
BenchmarkId::from_parameter(n_bins),
&configs,
|bencher, configs| {
let mut bank = ResonatorBank::new(configs, SAMPLE_RATE);
bencher.iter(|| {
bank.reset();
for &sample in &signal {
#[cfg(target_arch = "x86_64")]
bank.process_sample_scalar(black_box(sample));
#[cfg(not(target_arch = "x86_64"))]
bank.process_sample(black_box(sample));
}
});
},
);
}
group.finish();
}

// Runtime-dispatched path — `ResonatorBank::new` picks the widest
// supported backend, then `process_sample` does a per-call match
// and dispatches. The interesting measurement is the delta vs the
// forced `bank/avx512` (or `bank/avx2` on hosts without AVX-512):
// that's the cost of the dispatch match. Expected near-zero
// because the branch is predictable and set once at construction.
#[cfg(target_arch = "x86_64")]
{
let mut group = c.benchmark_group("bank/dispatch");
group.throughput(Throughput::Elements(n as u64));
group.sample_size(50);
group.measurement_time(Duration::from_secs(10));

for &n_bins in BIN_COUNTS {
let configs = log_spaced_configs(n_bins);
group.bench_with_input(
BenchmarkId::from_parameter(n_bins),
&configs,
|bencher, configs| {
let mut bank = ResonatorBank::new(configs, SAMPLE_RATE);
bencher.iter(|| {
bank.reset();
for &sample in &signal {
bank.process_sample(black_box(sample));
}
});
},
);
}
group.finish();
eprintln!("bank/dispatch backend = {:?}", resonators::Backend::detect());
}

group.finish();
// Explicit AVX2 + FMA — 8 bins per iteration via __m256 + vfmadd231ps.
#[cfg(target_arch = "x86_64")]
{
if std::arch::is_x86_feature_detected!("avx2") && std::arch::is_x86_feature_detected!("fma") {
let mut group = c.benchmark_group("bank/avx2");
group.throughput(Throughput::Elements(n as u64));
group.sample_size(50);
group.measurement_time(Duration::from_secs(10));

for &n_bins in BIN_COUNTS {
let configs = log_spaced_configs(n_bins);
group.bench_with_input(
BenchmarkId::from_parameter(n_bins),
&configs,
|bencher, configs| {
let mut bank = ResonatorBank::new(configs, SAMPLE_RATE);
bencher.iter(|| {
bank.reset();
// Safety: we've checked avx2+fma support above.
unsafe {
for &sample in &signal {
bank.process_sample_avx2(black_box(sample));
}
}
});
},
);
}
group.finish();
} else {
eprintln!("SKIPPED bank/avx2 — CPU lacks avx2 or fma");
}
}

// Explicit AVX-512F — 16 bins per iteration via __m512.
#[cfg(target_arch = "x86_64")]
{
if std::arch::is_x86_feature_detected!("avx512f") {
let mut group = c.benchmark_group("bank/avx512");
group.throughput(Throughput::Elements(n as u64));
group.sample_size(50);
group.measurement_time(Duration::from_secs(10));

for &n_bins in BIN_COUNTS {
let configs = log_spaced_configs(n_bins);
group.bench_with_input(
BenchmarkId::from_parameter(n_bins),
&configs,
|bencher, configs| {
let mut bank = ResonatorBank::new(configs, SAMPLE_RATE);
bencher.iter(|| {
bank.reset();
// Safety: we've checked avx512f support above.
unsafe {
for &sample in &signal {
bank.process_sample_avx512(black_box(sample));
}
}
});
},
);
}
group.finish();
} else {
eprintln!("SKIPPED bank/avx512 — CPU lacks avx512f");
}
}
}

criterion_group!(benches, bench_bank);
Expand Down
Loading