Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ unicode-id = { features = ["no_std"], version = "0.3" }
[dev-dependencies]
criterion = "0.5"
env_logger = "0.11"
itertools = "0.13.0"
pretty_assertions = { workspace = true }
serde_json = { version = "1" }
swc_core = { version = "22", features = [
Expand Down
41 changes: 29 additions & 12 deletions benches/bench.rs
Original file line number Diff line number Diff line change
@@ -1,24 +1,41 @@
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
use itertools::Itertools;
use std::fs;

fn tiny_markdown_string(c: &mut Criterion) {
let doc = "A *single* [markdown](/path) string!".to_owned();
Comment on lines +5 to +6
Copy link
Author

@frankharkins frankharkins Feb 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I noticed a very small slowdown in this tiny test case (~8.9us -> ~9.1us; ~3%). This seems to even out very quickly as size grows. I tried creating an adaptive data structure that switched from Vec to BTreeMap at a certain size, but the complexity was quite high and the performance improvement was basically negligible.

c.bench_with_input(BenchmarkId::new("tiny", "markdown_string"), &doc, |b, s| {
b.iter(|| markdown::to_html(s));
});
}

fn readme(c: &mut Criterion) {
let doc = fs::read_to_string("readme.md").unwrap();
Comment on lines 12 to 13
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Criterion reported no difference for this benchmark.


c.bench_with_input(BenchmarkId::new("readme", "readme"), &doc, |b, s| {
c.bench_with_input(BenchmarkId::new("medium", "readme"), &doc, |b, s| {
b.iter(|| markdown::to_html(s));
});
}

// fn one_and_a_half_mb(c: &mut Criterion) {
// let doc = fs::read_to_string("../a-dump-of-markdown/markdown.md").unwrap();
// let mut group = c.benchmark_group("giant");
// group.sample_size(10);
// group.bench_with_input(BenchmarkId::new("giant", "1.5 mb"), &doc, |b, s| {
// b.iter(|| markdown::to_html(s));
// });
// group.finish();
// }
// , one_and_a_half_mb
fn large_jsx_expressions(c: &mut Criterion) {
let num_components = 3;
let num_jsx_lines_per_component = 5000;
fn dummy_component(code: String) -> String {
return format!("<DummyComponent code={{`{code}`}} />");
}
fn uuids(len: usize) -> String {
(0..len)
.map(|_| "770f93e8-b4ee-4ce8-ab0f-4ece7d8c1090")
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hardcoding the same UUID seemed to reproduce the scaling behaviour, I left it like this to avoid adding another dependency.

.join("\n")
}
let doc = (0..num_components)
.map(|_| dummy_component(uuids(num_jsx_lines_per_component)))
.join("\n\n");

c.bench_with_input(BenchmarkId::new("large", "jsx_expression"), &doc, |b, s| {
b.iter(|| markdown::to_html(s));
});
}

criterion_group!(benches, readme);
criterion_group!(benches, tiny_markdown_string, readme, large_jsx_expressions);
criterion_main!(benches);
54 changes: 21 additions & 33 deletions src/util/edit_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
//! through another tokenizer and inject the result.

use crate::event::Event;
use alloc::{vec, vec::Vec};
use alloc::{collections::BTreeMap, vec::Vec};

/// Shift `previous` and `next` links according to `jumps`.
///
Expand Down Expand Up @@ -58,13 +58,15 @@ fn shift_links(events: &mut [Event], jumps: &[(usize, usize, usize)]) {
#[derive(Debug)]
pub struct EditMap {
/// Record of changes.
map: Vec<(usize, usize, Vec<Event>)>,
map: BTreeMap<usize, (usize, Vec<Event>)>,
}

impl EditMap {
/// Create a new edit map.
pub fn new() -> EditMap {
EditMap { map: vec![] }
EditMap {
map: BTreeMap::new(),
}
}
/// Create an edit: a remove and/or add at a certain place.
pub fn add(&mut self, index: usize, remove: usize, add: Vec<Event>) {
Expand All @@ -76,36 +78,28 @@ impl EditMap {
}
/// Done, change the events.
pub fn consume(&mut self, events: &mut Vec<Event>) {
self.map
.sort_unstable_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
Comment on lines -79 to -80
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BTreeMap.iter() is sorted already


if self.map.is_empty() {
return;
}

// Calculate jumps: where items in the current list move to.
let mut jumps = Vec::with_capacity(self.map.len());
let mut index = 0;
let mut add_acc = 0;
let mut remove_acc = 0;
while index < self.map.len() {
let (at, remove, add) = &self.map[index];
for (at, (remove, add)) in self.map.iter() {
remove_acc += remove;
add_acc += add.len();
jumps.push((*at, remove_acc, add_acc));
index += 1;
}

shift_links(events, &jumps);

let len_before = events.len();
let mut index = self.map.len();
let mut vecs = Vec::with_capacity(index * 2 + 1);
while index > 0 {
index -= 1;
vecs.push(events.split_off(self.map[index].0 + self.map[index].1));
vecs.push(self.map[index].2.split_off(0));
events.truncate(self.map[index].0);
let mut vecs = Vec::with_capacity(self.map.len() * 2 + 1);
for (at, (remove, add)) in self.map.iter_mut().rev() {
vecs.push(events.split_off(at + *remove));
vecs.push(add.split_off(0));
events.truncate(*at);
}
vecs.push(events.split_off(0));

Expand All @@ -115,34 +109,28 @@ impl EditMap {
events.append(&mut slice);
}

self.map.truncate(0);
self.map.clear();
}
}

/// Create an edit.
fn add_impl(edit_map: &mut EditMap, at: usize, remove: usize, mut add: Vec<Event>, before: bool) {
let mut index = 0;

if remove == 0 && add.is_empty() {
return;
}

while index < edit_map.map.len() {
if edit_map.map[index].0 == at {
edit_map.map[index].1 += remove;

match edit_map.map.get_mut(&at) {
Comment on lines -130 to +122
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think was the source of the quadratic behaviour; beforehand we were iterating through the Vec to find elements, which is $O(n)$. With BTreeMap, we can get an element in $O(log(n))$ time.

Some(edit) => {
edit.0 += remove;
if before {
add.append(&mut edit_map.map[index].2);
edit_map.map[index].2 = add;
add.append(&mut edit.1);
edit.1 = add;
} else {
edit_map.map[index].2.append(&mut add);
edit.1.append(&mut add);
}

return;
}

index += 1;
None => {
edit_map.map.insert(at, (remove, add));
}
}

edit_map.map.push((at, remove, add));
}
Loading