Skip to content

Commit 05fc052

Browse files
authored
Merge pull request #4 from Ameyanagi/feat/optimize-core-pipeline-performance
perf: reduce xafs clone churn and gate bench flamegraph profiling
2 parents 29e442d + 1084119 commit 05fc052

4 files changed

Lines changed: 165 additions & 38 deletions

File tree

crates/xraytsubaki/benches/autobk_stage_benchmark.rs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1+
mod perf;
2+
13
use criterion::{black_box, criterion_group, criterion_main, Criterion};
24

5+
use perf::FlamegraphProfiler;
36
use xraytsubaki::xafs::background::{AUTOBKSolver, AUTOBK};
47
use xraytsubaki::xafs::normalization::{NormalizationMethod, PrePostEdge};
58

@@ -53,9 +56,20 @@ fn criterion_benchmark(c: &mut Criterion) {
5356
});
5457
}
5558

59+
fn custom() -> Criterion {
60+
let base = Criterion::default().sample_size(20);
61+
let enable_profiler = std::env::args()
62+
.any(|arg| arg == "--profile-time" || arg.starts_with("--profile-time="));
63+
if enable_profiler {
64+
base.with_profiler(FlamegraphProfiler::new(1000))
65+
} else {
66+
base
67+
}
68+
}
69+
5670
criterion_group! {
5771
name = benches;
58-
config = Criterion::default().sample_size(20);
72+
config = custom();
5973
targets = criterion_benchmark
6074
}
6175

crates/xraytsubaki/doc/profiling.md

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,3 +262,64 @@ Notes:
262262

263263
- This slice reduced allocation pressure in both single and parallel allocator-instrumented runs.
264264
- Runtime impact is mixed: parallel center estimate improved while sequential and AUTOBK-stage medians regressed modestly on this machine/run.
265+
266+
## Slice H Metrics (allocation-pressure proposal implementation)
267+
268+
Date: 2026-02-08
269+
270+
Environment:
271+
272+
- `rustc 1.93.0`
273+
- `cargo 1.93.0`
274+
- `Darwin 25.2.0 arm64`
275+
276+
Commands:
277+
278+
```bash
279+
cargo test -p xraytsubaki
280+
cargo bench -p xraytsubaki --bench autobk_stage_benchmark -- --noplot
281+
cargo bench -p xraytsubaki --bench xas_group_benchmark_single -- --noplot
282+
cargo bench -p xraytsubaki --bench xas_group_benchmark_parallel -- --noplot
283+
cargo run -p xraytsubaki --example alloc_baseline --release
284+
bash crates/xraytsubaki/scripts/bench_regression_gate.sh informational
285+
cargo bench -p xraytsubaki --bench autobk_stage_benchmark -- --profile-time 5
286+
```
287+
288+
Pre-change baseline for this slice (captured before edits):
289+
290+
- `autobk_stage_legacy_lm`: `[2.4009 ms 2.4325 ms 2.4624 ms]`
291+
- `autobk_stage_linear_direct`: `[120.13 µs 121.48 µs 122.76 µs]`
292+
- `xas_group_benchmark_single`: `[1.5536 s 1.5717 s 1.5861 s]`
293+
- `xas_group_benchmark_parallel`: `[337.67 ms 344.21 ms 351.27 ms]`
294+
- `xas_group_benchmark_single_alloc`:
295+
- `alloc_calls=258799`
296+
- `alloc_bytes=85411908`
297+
- `xas_group_benchmark_parallel_alloc`:
298+
- `alloc_calls=25800408`
299+
- `alloc_bytes=8439161224`
300+
301+
Post-change results:
302+
303+
- `autobk_stage_legacy_lm` median point estimate: `2.581180 ms`
304+
- `autobk_stage_linear_direct` median point estimate: `123.671 µs`
305+
- `xas_group_benchmark_single` median point estimate: `1.585049 s`
306+
- `xas_group_benchmark_parallel` median point estimate: `392.217 ms`
307+
- `xas_group_benchmark_single_alloc`:
308+
- `alloc_calls=258799` (`0.00%`)
309+
- `alloc_bytes=85411908` (`0.00%`)
310+
- `xas_group_benchmark_parallel_alloc`:
311+
- `alloc_calls=25800407` (`-0.000004%`)
312+
- `alloc_bytes=8439161176` (`-0.000001%`)
313+
314+
Profiler output status:
315+
316+
- `autobk_stage_benchmark` now uses conditional profiler wiring.
317+
- `--profile-time` run emits flamegraphs at:
318+
- `target/criterion/autobk_stage_legacy_lm/profile/flamegraph.svg`
319+
- `target/criterion/autobk_stage_linear_direct/profile/flamegraph.svg`
320+
- Normal `--noplot` runs do not force profiling overhead.
321+
322+
Notes:
323+
324+
- Allocation-reduction targets for this slice were not met on this machine/run.
325+
- Regression gate is green in informational mode for the stored baseline (`10%` threshold).

crates/xraytsubaki/src/xafs/xasgroup.rs

Lines changed: 39 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -256,28 +256,34 @@ impl XASGroup {
256256
where
257257
F: FnMut(&mut XASSpectrum) -> Result<&mut XASSpectrum, XAFSError>,
258258
{
259-
let errors = self
260-
.spectra
261-
.iter_mut()
262-
.enumerate()
263-
.filter_map(|(index, spectrum)| {
264-
op(spectrum)
265-
.err()
266-
.map(|source| BatchSpectrumError { index, source })
267-
})
268-
.collect::<Vec<_>>();
259+
if self.spectra.is_empty() {
260+
return Ok(self);
261+
}
269262

270-
if errors.is_empty() {
271-
Ok(self)
272-
} else {
263+
let mut errors: Option<Vec<BatchSpectrumError>> = None;
264+
for (index, spectrum) in self.spectra.iter_mut().enumerate() {
265+
if let Err(source) = op(spectrum) {
266+
errors
267+
.get_or_insert_with(|| Vec::with_capacity(4))
268+
.push(BatchSpectrumError { index, source });
269+
}
270+
}
271+
272+
if let Some(errors) = errors {
273273
Err(BatchProcessError { errors })
274+
} else {
275+
Ok(self)
274276
}
275277
}
276278

277279
fn collect_par_errors<F>(&mut self, op: F) -> Result<&mut Self, BatchProcessError>
278280
where
279281
F: Fn(&mut XASSpectrum) -> Result<&mut XASSpectrum, XAFSError> + Sync + Send,
280282
{
283+
if self.spectra.is_empty() {
284+
return Ok(self);
285+
}
286+
281287
let mut errors = self
282288
.spectra
283289
.par_iter_mut()
@@ -288,11 +294,12 @@ impl XASGroup {
288294
.map(|source| BatchSpectrumError { index, source })
289295
})
290296
.collect::<Vec<_>>();
291-
errors.sort_by_key(|err| err.index);
292-
293297
if errors.is_empty() {
294298
Ok(self)
295299
} else {
300+
if errors.len() > 1 {
301+
errors.sort_by_key(|err| err.index);
302+
}
296303
Err(BatchProcessError { errors })
297304
}
298305
}
@@ -491,6 +498,23 @@ mod tests {
491498
assert_eq!(par_err.errors[0].index, 1);
492499
}
493500

501+
#[test]
502+
fn test_batch_find_e0_par_multiple_errors_are_sorted_by_index() {
503+
let path = String::from(TOP_DIR) + "/tests/testfiles/Ru_QAS.dat";
504+
let valid = io::load_spectrum_QAS_trans(&path).unwrap();
505+
let invalid = XASSpectrum::new();
506+
507+
let mut par_group = XASGroup::new();
508+
par_group
509+
.add_spectrum(invalid.clone())
510+
.add_spectrum(valid)
511+
.add_spectrum(invalid);
512+
513+
let par_err = par_group.find_e0_par().unwrap_err();
514+
let indices = par_err.errors.iter().map(|err| err.index).collect::<Vec<_>>();
515+
assert_eq!(indices, vec![0, 2]);
516+
}
517+
494518
#[test]
495519
fn test_default_and_par_error_semantics_match() {
496520
let path = String::from(TOP_DIR) + "/tests/testfiles/Ru_QAS.dat";

crates/xraytsubaki/src/xafs/xasspectrum.rs

Lines changed: 50 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -145,35 +145,23 @@ impl XASSpectrum {
145145
energy: T,
146146
) -> Result<&mut Self, XAFSError> {
147147
self.energy = Some(energy.into());
148-
149-
let energy = self.energy.clone().ok_or_else(|| DataError::MissingData {
148+
let energy = self.energy.as_ref().ok_or_else(|| DataError::MissingData {
150149
field: "energy".to_string(),
151150
})?;
152-
let mu = self
153-
.raw_mu
154-
.clone()
155-
.ok_or_else(|| DataError::MissingData {
156-
field: "raw_mu".to_string(),
157-
})?
158-
.data
159-
.as_vec()
160-
.to_vec();
161-
let knot = self
162-
.raw_energy
163-
.clone()
164-
.ok_or_else(|| DataError::MissingData {
165-
field: "raw_energy".to_string(),
166-
})?
167-
.data
168-
.as_vec()
169-
.to_vec();
151+
let mu = self.raw_mu.as_ref().ok_or_else(|| DataError::MissingData {
152+
field: "raw_mu".to_string(),
153+
})?;
154+
let knot = self.raw_energy.as_ref().ok_or_else(|| DataError::MissingData {
155+
field: "raw_energy".to_string(),
156+
})?;
170157

171-
self.mu = Some(energy.interpolate(&knot, &mu).map_err(|e| {
158+
let interpolated = energy.interpolate(knot.as_slice(), mu.as_slice()).map_err(|e| {
172159
super::errors::MathError::SplineEvalFailed {
173160
x: 0.0,
174161
reason: e.to_string(),
175162
}
176-
})?);
163+
})?;
164+
self.mu = Some(interpolated);
177165

178166
Ok(self)
179167
}
@@ -649,6 +637,46 @@ pub mod tests {
649637
));
650638
}
651639

640+
#[test]
641+
fn test_interpolate_spectrum_updates_energy_and_mu() {
642+
let mut spectrum = XASSpectrum::new();
643+
spectrum.set_spectrum(vec![0.0, 1.0, 2.0, 3.0], vec![0.0, 2.0, 4.0, 6.0]);
644+
645+
spectrum
646+
.interpolate_spectrum(vec![0.5, 1.5, 2.5])
647+
.unwrap();
648+
649+
assert_eq!(
650+
spectrum.energy.as_ref().unwrap(),
651+
&DVector::from_vec(vec![0.5, 1.5, 2.5])
652+
);
653+
654+
let mu = spectrum.mu.as_ref().unwrap();
655+
assert_abs_diff_eq!(mu[0], 1.0, epsilon = TEST_TOL);
656+
assert_abs_diff_eq!(mu[1], 3.0, epsilon = TEST_TOL);
657+
assert_abs_diff_eq!(mu[2], 5.0, epsilon = TEST_TOL);
658+
}
659+
660+
#[test]
661+
fn test_interpolate_spectrum_missing_raw_mu_keeps_existing_mu() {
662+
let mut spectrum = XASSpectrum::new();
663+
spectrum.raw_energy = Some(DVector::from_vec(vec![0.0, 1.0]));
664+
spectrum.raw_mu = None;
665+
spectrum.mu = Some(DVector::from_vec(vec![42.0]));
666+
667+
let err = spectrum.interpolate_spectrum(vec![0.25, 0.75]).unwrap_err();
668+
assert!(matches!(
669+
err,
670+
XAFSError::Data(DataError::MissingData { ref field }) if field == "raw_mu"
671+
));
672+
673+
assert_eq!(
674+
spectrum.energy.as_ref().unwrap(),
675+
&DVector::from_vec(vec![0.25, 0.75])
676+
);
677+
assert_eq!(spectrum.mu.as_ref().unwrap(), &DVector::from_vec(vec![42.0]));
678+
}
679+
652680
#[test]
653681
#[cfg(feature = "ndarray-compat")]
654682
fn test_borrowed_k_chi_views_match_owned_getters() -> Result<(), Box<dyn std::error::Error>> {

0 commit comments

Comments
 (0)