Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

All notable changes to this project will be documented in this file. Format based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/); this project adheres to [Semantic Versioning](https://semver.org/).

## [0.1.1] - 2026-04-24

### Changed

- WASM: ~13-15x speedup in `ResonatorBank::process_sample` / `process_samples` at 88-880 bins, by recovering autovectorization that was defeated by `f32::mul_add` lowering to per-lane `fmaf` calls on wasm32+simd128. Diagnosis by @pengowray (#1).

## [Python 0.1.1] - 2026-04-22

### Changed
Expand Down
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ members = ["crates/*"]
resolver = "3"

[workspace.package]
version = "0.1.0"
version = "0.1.1"
authors = ["John Hartquist <john@hartquist.com>"]
edition = "2024"
license = "MIT OR Apache-2.0"
Expand Down
2 changes: 1 addition & 1 deletion crates/resonators-py/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
name = "resonators-py"
description = "Python bindings for the resonators crate."
publish = false
version = "0.1.1"
version.workspace = true
authors.workspace = true
edition.workspace = true
license.workspace = true
Expand Down
2 changes: 1 addition & 1 deletion crates/resonators-py/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "maturin"

[project]
name = "resonators"
version = "0.1.1"
dynamic = ["version"]
description = "Rust implementation of the Resonate algorithm for low-latency spectral analysis."
readme = "README.md"
requires-python = ">=3.9"
Expand Down
91 changes: 69 additions & 22 deletions crates/resonators/src/bank.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,26 +104,7 @@ impl ResonatorBank {
/// Updates every resonator with a single input sample.
#[inline]
pub fn process_sample(&mut self, sample: f32) {
for k in 0..self.n_resonators {
let alpha = self.alphas[k];
let beta = self.betas[k];
let alpha_sample = alpha * sample;

// EWMA accumulation
self.r_re[k] = (1.0 - alpha).mul_add(self.r_re[k], alpha_sample * self.z_re[k]);
self.r_im[k] = (1.0 - alpha).mul_add(self.r_im[k], alpha_sample * self.z_im[k]);

// output smoothing
self.rr_re[k] = (1.0 - beta).mul_add(self.rr_re[k], beta * self.r_re[k]);
self.rr_im[k] = (1.0 - beta).mul_add(self.rr_im[k], beta * self.r_im[k]);

// rotate phasor
let zr = self.z_re[k];
let zi = self.z_im[k];
self.z_re[k] = zr * self.w_re[k] - zi * self.w_im[k];
self.z_im[k] = zr * self.w_im[k] + zi * self.w_re[k];
}

self.process_sample_inner(sample);
self.sample_count += 1;
if self.sample_count.is_multiple_of(STABILIZE_EVERY) {
self.stabilize();
Expand All @@ -133,11 +114,64 @@ impl ResonatorBank {
/// Updates every resonator with a block of input samples, in order.
#[inline]
pub fn process_samples(&mut self, samples: &[f32]) {
for &s in samples {
self.process_sample(s);
let mut remaining = samples;
while !remaining.is_empty() {
let chunk_len = remaining.len().min(self.samples_until_stabilize());
let (chunk, rest) = remaining.split_at(chunk_len);

for &sample in chunk {
self.process_sample_inner(sample);
}

self.sample_count += chunk_len as u64;
if self.sample_count.is_multiple_of(STABILIZE_EVERY) {
self.stabilize();
}
remaining = rest;
}
}

#[inline(always)]
fn process_sample_inner(&mut self, sample: f32) {
// hoisted to locals so LLVM can drop bounds checks and vectorize cleanly.
let n = self.n_resonators;
let alphas = &self.alphas[..n];
let betas = &self.betas[..n];
let w_re = &self.w_re[..n];
let w_im = &self.w_im[..n];
let r_re = &mut self.r_re[..n];
let r_im = &mut self.r_im[..n];
let rr_re = &mut self.rr_re[..n];
let rr_im = &mut self.rr_im[..n];
let z_re = &mut self.z_re[..n];
let z_im = &mut self.z_im[..n];

for k in 0..n {
let alpha = alphas[k];
let beta = betas[k];
let alpha_sample = alpha * sample;

// EWMA accumulation
r_re[k] = mul_add(1.0 - alpha, r_re[k], alpha_sample * z_re[k]);
r_im[k] = mul_add(1.0 - alpha, r_im[k], alpha_sample * z_im[k]);

// output smoothing
rr_re[k] = mul_add(1.0 - beta, rr_re[k], beta * r_re[k]);
rr_im[k] = mul_add(1.0 - beta, rr_im[k], beta * r_im[k]);

// rotate phasor
let zr = z_re[k];
let zi = z_im[k];
z_re[k] = zr * w_re[k] - zi * w_im[k];
z_im[k] = zr * w_im[k] + zi * w_re[k];
}
}

fn samples_until_stabilize(&self) -> usize {
let offset = (self.sample_count % STABILIZE_EVERY) as usize;
STABILIZE_EVERY as usize - offset
}

/// Processes `signal` in hops and returns the complex state of every
/// resonator at the end of each hop.
///
Expand Down Expand Up @@ -231,6 +265,19 @@ impl ResonatorBank {
}
}

// Unfused on wasm32+simd128: `f32::mul_add` kills autovectorization there.
#[inline(always)]
fn mul_add(a: f32, b: f32, c: f32) -> f32 {
#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
{
a * b + c
}
#[cfg(not(all(target_arch = "wasm32", target_feature = "simd128")))]
{
a.mul_add(b, c)
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
Loading