-
-
Notifications
You must be signed in to change notification settings - Fork 84
Fix scaling problems #197
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Fix scaling problems #197
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,24 +1,41 @@ | ||
| use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; | ||
| use itertools::Itertools; | ||
| use std::fs; | ||
|
|
||
| fn tiny_markdown_string(c: &mut Criterion) { | ||
| let doc = "A *single* [markdown](/path) string!".to_owned(); | ||
| c.bench_with_input(BenchmarkId::new("tiny", "markdown_string"), &doc, |b, s| { | ||
| b.iter(|| markdown::to_html(s)); | ||
| }); | ||
| } | ||
|
|
||
| fn readme(c: &mut Criterion) { | ||
| let doc = fs::read_to_string("readme.md").unwrap(); | ||
|
Comment on lines
12
to
13
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Criterion reported no difference for this benchmark. |
||
|
|
||
| c.bench_with_input(BenchmarkId::new("readme", "readme"), &doc, |b, s| { | ||
| c.bench_with_input(BenchmarkId::new("medium", "readme"), &doc, |b, s| { | ||
| b.iter(|| markdown::to_html(s)); | ||
| }); | ||
| } | ||
|
|
||
| // fn one_and_a_half_mb(c: &mut Criterion) { | ||
| // let doc = fs::read_to_string("../a-dump-of-markdown/markdown.md").unwrap(); | ||
| // let mut group = c.benchmark_group("giant"); | ||
| // group.sample_size(10); | ||
| // group.bench_with_input(BenchmarkId::new("giant", "1.5 mb"), &doc, |b, s| { | ||
| // b.iter(|| markdown::to_html(s)); | ||
| // }); | ||
| // group.finish(); | ||
| // } | ||
| // , one_and_a_half_mb | ||
| fn large_jsx_expressions(c: &mut Criterion) { | ||
| let num_components = 3; | ||
| let num_jsx_lines_per_component = 5000; | ||
| fn dummy_component(code: String) -> String { | ||
| return format!("<DummyComponent code={{`{code}`}} />"); | ||
| } | ||
| fn uuids(len: usize) -> String { | ||
| (0..len) | ||
| .map(|_| "770f93e8-b4ee-4ce8-ab0f-4ece7d8c1090") | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hardcoding the same UUID seemed to reproduce the scaling behaviour, I left it like this to avoid adding another dependency. |
||
| .join("\n") | ||
| } | ||
| let doc = (0..num_components) | ||
| .map(|_| dummy_component(uuids(num_jsx_lines_per_component))) | ||
| .join("\n\n"); | ||
|
|
||
| c.bench_with_input(BenchmarkId::new("large", "jsx_expression"), &doc, |b, s| { | ||
| b.iter(|| markdown::to_html(s)); | ||
| }); | ||
| } | ||
|
|
||
| criterion_group!(benches, readme); | ||
| criterion_group!(benches, tiny_markdown_string, readme, large_jsx_expressions); | ||
| criterion_main!(benches); | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,7 +9,7 @@ | |
| //! through another tokenizer and inject the result. | ||
|
|
||
| use crate::event::Event; | ||
| use alloc::{vec, vec::Vec}; | ||
| use alloc::{collections::BTreeMap, vec::Vec}; | ||
|
|
||
| /// Shift `previous` and `next` links according to `jumps`. | ||
| /// | ||
|
|
@@ -58,13 +58,15 @@ fn shift_links(events: &mut [Event], jumps: &[(usize, usize, usize)]) { | |
| #[derive(Debug)] | ||
| pub struct EditMap { | ||
| /// Record of changes. | ||
| map: Vec<(usize, usize, Vec<Event>)>, | ||
| map: BTreeMap<usize, (usize, Vec<Event>)>, | ||
| } | ||
|
|
||
| impl EditMap { | ||
| /// Create a new edit map. | ||
| pub fn new() -> EditMap { | ||
| EditMap { map: vec![] } | ||
| EditMap { | ||
| map: BTreeMap::new(), | ||
| } | ||
| } | ||
| /// Create an edit: a remove and/or add at a certain place. | ||
| pub fn add(&mut self, index: usize, remove: usize, add: Vec<Event>) { | ||
|
|
@@ -76,36 +78,28 @@ impl EditMap { | |
| } | ||
| /// Done, change the events. | ||
| pub fn consume(&mut self, events: &mut Vec<Event>) { | ||
| self.map | ||
| .sort_unstable_by(|a, b| a.0.partial_cmp(&b.0).unwrap()); | ||
|
Comment on lines
-79
to
-80
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
|
||
| if self.map.is_empty() { | ||
| return; | ||
| } | ||
|
|
||
| // Calculate jumps: where items in the current list move to. | ||
| let mut jumps = Vec::with_capacity(self.map.len()); | ||
| let mut index = 0; | ||
| let mut add_acc = 0; | ||
| let mut remove_acc = 0; | ||
| while index < self.map.len() { | ||
| let (at, remove, add) = &self.map[index]; | ||
| for (at, (remove, add)) in self.map.iter() { | ||
| remove_acc += remove; | ||
frankharkins marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| add_acc += add.len(); | ||
| jumps.push((*at, remove_acc, add_acc)); | ||
| index += 1; | ||
| } | ||
|
|
||
| shift_links(events, &jumps); | ||
|
|
||
| let len_before = events.len(); | ||
| let mut index = self.map.len(); | ||
| let mut vecs = Vec::with_capacity(index * 2 + 1); | ||
| while index > 0 { | ||
| index -= 1; | ||
| vecs.push(events.split_off(self.map[index].0 + self.map[index].1)); | ||
| vecs.push(self.map[index].2.split_off(0)); | ||
| events.truncate(self.map[index].0); | ||
| let mut vecs = Vec::with_capacity(self.map.len() * 2 + 1); | ||
| for (at, (remove, add)) in self.map.iter_mut().rev() { | ||
| vecs.push(events.split_off(at + *remove)); | ||
frankharkins marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| vecs.push(add.split_off(0)); | ||
| events.truncate(*at); | ||
| } | ||
| vecs.push(events.split_off(0)); | ||
|
|
||
|
|
@@ -115,34 +109,28 @@ impl EditMap { | |
| events.append(&mut slice); | ||
| } | ||
|
|
||
| self.map.truncate(0); | ||
| self.map.clear(); | ||
| } | ||
| } | ||
|
|
||
| /// Create an edit. | ||
| fn add_impl(edit_map: &mut EditMap, at: usize, remove: usize, mut add: Vec<Event>, before: bool) { | ||
| let mut index = 0; | ||
|
|
||
| if remove == 0 && add.is_empty() { | ||
| return; | ||
| } | ||
|
|
||
| while index < edit_map.map.len() { | ||
| if edit_map.map[index].0 == at { | ||
| edit_map.map[index].1 += remove; | ||
|
|
||
| match edit_map.map.get_mut(&at) { | ||
|
Comment on lines
-130
to
+122
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think was the source of the quadratic behaviour; beforehand we were iterating through the |
||
| Some(edit) => { | ||
| edit.0 += remove; | ||
| if before { | ||
| add.append(&mut edit_map.map[index].2); | ||
| edit_map.map[index].2 = add; | ||
| add.append(&mut edit.1); | ||
| edit.1 = add; | ||
| } else { | ||
| edit_map.map[index].2.append(&mut add); | ||
| edit.1.append(&mut add); | ||
| } | ||
|
|
||
| return; | ||
| } | ||
|
|
||
| index += 1; | ||
| None => { | ||
| edit_map.map.insert(at, (remove, add)); | ||
| } | ||
| } | ||
|
|
||
| edit_map.map.push((at, remove, add)); | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I noticed a very small slowdown in this tiny test case (~8.9us -> ~9.1us; ~3%). This seems to even out very quickly as size grows. I tried creating an adaptive data structure that switched from
VectoBTreeMapat a certain size, but the complexity was quite high and the performance improvement was basically negligible.