Skip to content

Commit efe5d84

Browse files
crowlKatsclaude
andcommitted
perf(html): reduce memory usage for HTML generator
Major memory optimizations targeting JSR's per-request rendering pattern (create GenerateCtx, render one page, drop everything). ## Key changes ### DocNodeWithContext clone cost: O(tree) → O(1) - `parent: Box<DocNodeWithContext>` → `Rc<DocNodeWithContext>` — cloning no longer deep-copies the entire ancestor chain - `namespace_children: Vec<DocNodeWithContext>` → `Rc<Vec<DocNodeWithContext>>` — cloning no longer deep-copies all children - Shared parent `Rc` in `get_drilldown_symbols` — N children share one parent instead of cloning it N times ### Document.symbols: Vec<Symbol> → Vec<Arc<Symbol>> ParseOutput cloning (used by JSR's cache) now does refcount bumps instead of deep-copying every symbol. Clone cost dropped from 4.48 MB → 0.02 MB. `Arc::make_mut` provides copy-on-write for the rare FnOrConstructor mutation in GenerateCtx. ### Lazy reference_index The reference_index (HashMap cloning every DocNodeWithContext) is now built on first access via OnceCell instead of eagerly in GenerateCtx::new. For simple symbol page renders that don't hit re-exports, this is never triggered. GenerateCtx creation: 4.31 MB → 1.85 MB. ### generate_json returns HashMap<String, String> Replaced `serde_json::Value` output with pre-serialized JSON strings, eliminating ~4.6x memory overhead from Value's per-field heap allocations. ### Streaming API: generate_json_with New callback-based API that emits pages as they're generated, allowing callers to process and drop immediately. Deduplicates ~85K duplicate page emissions that were previously silently masked by HashMap insertion. ### Allocation reduction in hot paths - `lookup_symbol_href`: reusable Vec buffer instead of per-iteration clone - `compute_namespaced_symbols`: reusable path buffer instead of per-method/property Vec allocation ## Impact (zod fixture: 2049 symbols, 8 modules) JSR per-request: - ParseOutput clone: 4.48 MB → 0.02 MB - GenerateCtx creation: 8.72 MB → 1.85 MB - Total: ~13 MB → ~2.1 MB (84% reduction) Full generation: - Collecting: 1422 MB → 412 MB (71% reduction) - Streaming: 336 MB peak, 0 MB retained Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 51558a5 commit efe5d84

20 files changed

Lines changed: 852 additions & 26330 deletions

examples/ddoc/main.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,10 @@ async fn run() -> anyhow::Result<()> {
212212
for (_, doc) in &mut doc_nodes_by_url {
213213
if let Some(filter) = &filter {
214214
let symbols = std::mem::take(&mut doc.symbols);
215-
doc.symbols = find_nodes_by_name_recursively(symbols, filter);
215+
doc.symbols = find_nodes_by_name_recursively(symbols, filter)
216+
.into_iter()
217+
.map(std::sync::Arc::new)
218+
.collect();
216219
}
217220
}
218221

examples/memory_profile.rs

Lines changed: 315 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,315 @@
1+
use deno_doc::html::*;
2+
use deno_doc::html::pages::SymbolPage;
3+
use indexmap::IndexMap;
4+
use std::alloc::{GlobalAlloc, Layout, System};
5+
use std::rc::Rc;
6+
use std::sync::atomic::{AtomicUsize, Ordering};
7+
use std::time::Instant;
8+
9+
struct TrackingAllocator;
10+
11+
static ALLOCATED: AtomicUsize = AtomicUsize::new(0);
12+
static PEAK: AtomicUsize = AtomicUsize::new(0);
13+
14+
unsafe impl GlobalAlloc for TrackingAllocator {
15+
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
16+
let ret = unsafe { System.alloc(layout) };
17+
if !ret.is_null() {
18+
let current =
19+
ALLOCATED.fetch_add(layout.size(), Ordering::Relaxed) + layout.size();
20+
PEAK.fetch_max(current, Ordering::Relaxed);
21+
}
22+
ret
23+
}
24+
25+
unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
26+
unsafe { System.dealloc(ptr, layout) };
27+
ALLOCATED.fetch_sub(layout.size(), Ordering::Relaxed);
28+
}
29+
}
30+
31+
#[global_allocator]
32+
static GLOBAL: TrackingAllocator = TrackingAllocator;
33+
34+
fn reset_peak() {
35+
PEAK.store(ALLOCATED.load(Ordering::Relaxed), Ordering::Relaxed);
36+
}
37+
38+
fn current_allocated() -> usize {
39+
ALLOCATED.load(Ordering::Relaxed)
40+
}
41+
42+
fn peak_allocated() -> usize {
43+
PEAK.load(Ordering::Relaxed)
44+
}
45+
46+
fn mb(bytes: usize) -> f64 {
47+
bytes as f64 / (1024.0 * 1024.0)
48+
}
49+
50+
struct EmptyResolver;
51+
52+
impl HrefResolver for EmptyResolver {
53+
fn resolve_path(
54+
&self,
55+
current: UrlResolveKind,
56+
target: UrlResolveKind,
57+
) -> String {
58+
href_path_resolve(current, target)
59+
}
60+
fn resolve_global_symbol(&self, _symbol: &[String]) -> Option<String> {
61+
None
62+
}
63+
fn resolve_import_href(
64+
&self,
65+
_symbol: &[String],
66+
_src: &str,
67+
) -> Option<String> {
68+
None
69+
}
70+
fn resolve_source(&self, _location: &deno_doc::Location) -> Option<String> {
71+
None
72+
}
73+
fn resolve_external_jsdoc_module(
74+
&self,
75+
_module: &str,
76+
_symbol: Option<&str>,
77+
) -> Option<(String, String)> {
78+
None
79+
}
80+
}
81+
82+
impl UsageComposer for EmptyResolver {
83+
fn is_single_mode(&self) -> bool {
84+
true
85+
}
86+
fn compose(
87+
&self,
88+
_current_resolve: UrlResolveKind,
89+
_usage_to_md: UsageToMd,
90+
) -> IndexMap<UsageComposerEntry, String> {
91+
IndexMap::new()
92+
}
93+
}
94+
95+
fn make_options() -> GenerateOptions {
96+
GenerateOptions {
97+
package_name: Some("@zod/zod".to_string()),
98+
main_entrypoint: None,
99+
href_resolver: Rc::new(EmptyResolver),
100+
usage_composer: Some(Rc::new(EmptyResolver)),
101+
rewrite_map: None,
102+
category_docs: None,
103+
disable_search: false,
104+
symbol_redirect_map: None,
105+
default_symbol_map: None,
106+
markdown_renderer: comrak::create_renderer(None, None, None),
107+
markdown_stripper: Rc::new(comrak::strip),
108+
head_inject: None,
109+
id_prefix: None,
110+
diff_only: false,
111+
}
112+
}
113+
114+
fn main() {
115+
let raw = std::fs::read_to_string(
116+
std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
117+
.join("tests/testdata/@zod_zod_4.3.6_raw.json"),
118+
)
119+
.unwrap();
120+
121+
eprintln!(
122+
"=== Memory Profile: deno_doc HTML generator (zod fixture) ===\n"
123+
);
124+
125+
let baseline = current_allocated();
126+
eprintln!("Baseline allocation: {:.2} MB", mb(baseline));
127+
128+
// Parse the fixture into ParseOutput
129+
reset_peak();
130+
let before = current_allocated();
131+
let t0 = Instant::now();
132+
133+
let fixture: serde_json::Map<String, serde_json::Value> =
134+
serde_json::from_str(&raw).unwrap();
135+
136+
let mut doc_nodes_by_url: deno_doc::ParseOutput = IndexMap::new();
137+
for (url, v1_nodes) in fixture {
138+
let specifier = deno_ast::ModuleSpecifier::parse(&url).unwrap();
139+
let doc = deno_doc::docnodes_v1_to_v2(v1_nodes);
140+
doc_nodes_by_url.insert(specifier, doc);
141+
}
142+
143+
let parse_time = t0.elapsed();
144+
let after_parse = current_allocated();
145+
let peak_parse = peak_allocated();
146+
eprintln!(
147+
"ParseOutput: retained={:.2} MB, peak={:.2} MB, time={:.1}ms",
148+
mb(after_parse - before),
149+
mb(peak_parse - before),
150+
parse_time.as_secs_f64() * 1000.0
151+
);
152+
153+
let num_modules = doc_nodes_by_url.len();
154+
let num_symbols: usize =
155+
doc_nodes_by_url.values().map(|d| d.symbols.len()).sum();
156+
eprintln!(" modules={num_modules}, symbols={num_symbols}");
157+
158+
// === JSR-like pattern: create GenerateCtx + render single page ===
159+
eprintln!("\n--- JSR-like single page rendering ---");
160+
161+
// Simulate JSR: clone ParseOutput from cache (JSR caches Arc<ParseOutput>)
162+
reset_peak();
163+
let before_clone = current_allocated();
164+
let tc = Instant::now();
165+
let cached_output = doc_nodes_by_url.clone();
166+
let clone_time = tc.elapsed();
167+
eprintln!(
168+
"ParseOutput clone: retained={:.2} MB, peak={:.2} MB, time={:.1}ms",
169+
mb(current_allocated() - before_clone),
170+
mb(peak_allocated() - before_clone),
171+
clone_time.as_secs_f64() * 1000.0
172+
);
173+
174+
reset_peak();
175+
let before_ctx = current_allocated();
176+
let t1 = Instant::now();
177+
178+
let mut options = make_options();
179+
options.main_entrypoint =
180+
Some(cached_output.keys().next().unwrap().clone());
181+
182+
let ctx = GenerateCtx::create_basic(options, cached_output, None).unwrap();
183+
184+
let ctx_time = t1.elapsed();
185+
let after_ctx = current_allocated();
186+
let peak_ctx = peak_allocated();
187+
eprintln!(
188+
"GenerateCtx::create_basic: retained={:.2} MB, peak={:.2} MB, time={:.1}ms",
189+
mb(after_ctx - before_ctx),
190+
mb(peak_ctx - before_ctx),
191+
ctx_time.as_secs_f64() * 1000.0
192+
);
193+
194+
let num_doc_nodes: usize = ctx.doc_nodes.values().map(|v| v.len()).sum();
195+
eprintln!(" doc_nodes entries={num_doc_nodes}");
196+
197+
// Render a single symbol page (like JSR DocsRequest::Symbol)
198+
reset_peak();
199+
let before_render = current_allocated();
200+
let t2 = Instant::now();
201+
202+
let (short_path, doc_nodes) = ctx.doc_nodes.iter().next().unwrap();
203+
let symbol_pages =
204+
generate_symbol_pages_for_module(&ctx, short_path, doc_nodes);
205+
206+
let render_time = t2.elapsed();
207+
let after_render = current_allocated();
208+
let peak_render = peak_allocated();
209+
let num_pages = symbol_pages.len();
210+
eprintln!(
211+
"generate_symbol_pages_for_module: retained={:.2} MB, peak={:.2} MB, time={:.1}ms",
212+
mb(after_render - before_render),
213+
mb(peak_render - before_render),
214+
render_time.as_secs_f64() * 1000.0
215+
);
216+
eprintln!(" symbol pages generated: {num_pages}");
217+
218+
// Count symbol vs redirect pages
219+
let mut symbol_count = 0usize;
220+
let mut redirect_count = 0usize;
221+
for page in &symbol_pages {
222+
match page {
223+
SymbolPage::Symbol { .. } => symbol_count += 1,
224+
SymbolPage::Redirect { .. } => redirect_count += 1,
225+
}
226+
}
227+
eprintln!(
228+
" symbols: {symbol_count}, redirects: {redirect_count}"
229+
);
230+
231+
// Drop everything and show cleanup
232+
drop(symbol_pages);
233+
let after_drop_pages = current_allocated();
234+
235+
drop(ctx);
236+
let after_drop_ctx = current_allocated();
237+
238+
eprintln!("\n--- Cleanup ---");
239+
eprintln!(
240+
"After drop symbol_pages: {:.2} MB",
241+
mb(after_drop_pages)
242+
);
243+
eprintln!(
244+
"After drop GenerateCtx: {:.2} MB",
245+
mb(after_drop_ctx)
246+
);
247+
248+
// Drop everything from previous test to get clean baseline
249+
drop(doc_nodes_by_url);
250+
eprintln!(
251+
"After cleanup: {:.2} MB\n",
252+
mb(current_allocated())
253+
);
254+
255+
// === generate_json_with (streaming) — fresh run ===
256+
eprintln!("--- generate_json_with (streaming) ---");
257+
258+
let raw2 = std::fs::read_to_string(
259+
std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
260+
.join("tests/testdata/@zod_zod_4.3.6_raw.json"),
261+
)
262+
.unwrap();
263+
let fixture2: serde_json::Map<String, serde_json::Value> =
264+
serde_json::from_str(&raw2).unwrap();
265+
let mut doc_nodes2: deno_doc::ParseOutput = IndexMap::new();
266+
for (url, v1_nodes) in fixture2 {
267+
let specifier = deno_ast::ModuleSpecifier::parse(&url).unwrap();
268+
let doc = deno_doc::docnodes_v1_to_v2(v1_nodes);
269+
doc_nodes2.insert(specifier, doc);
270+
}
271+
272+
reset_peak();
273+
let before_full = current_allocated();
274+
let t4 = Instant::now();
275+
276+
let mut options2 = make_options();
277+
options2.main_entrypoint =
278+
Some(doc_nodes2.keys().next().unwrap().clone());
279+
let ctx2 =
280+
GenerateCtx::create_basic(options2, doc_nodes2, None).unwrap();
281+
282+
let mut file_count = 0usize;
283+
let mut total_json_size = 0usize;
284+
let mut largest_file = (String::new(), 0usize);
285+
let mut seen = std::collections::HashSet::new();
286+
let mut dup_count = 0usize;
287+
generate_json_with(ctx2, |name, content| {
288+
let len = content.len();
289+
total_json_size += len;
290+
file_count += 1;
291+
if !seen.insert(name.clone()) {
292+
dup_count += 1;
293+
}
294+
if len > largest_file.1 {
295+
largest_file = (name, len);
296+
}
297+
// Drop content immediately — this is the streaming benefit
298+
})
299+
.unwrap();
300+
301+
let full_time = t4.elapsed();
302+
let peak_full = peak_allocated();
303+
eprintln!(
304+
"generate_json_with (streaming): peak={:.2} MB, time={:.1}ms",
305+
mb(peak_full - before_full),
306+
full_time.as_secs_f64() * 1000.0
307+
);
308+
eprintln!(" output files={file_count} (unique={}, dups={dup_count})", seen.len());
309+
eprintln!(" total JSON output size={:.2} MB", mb(total_json_size));
310+
eprintln!(
311+
" largest file: {:.1} MB {}",
312+
mb(largest_file.1),
313+
largest_file.0,
314+
);
315+
}

lib/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -673,6 +673,7 @@ fn generate_html_inner(
673673
let serializer =
674674
serde_wasm_bindgen::Serializer::new().serialize_maps_as_objects(true);
675675

676+
// files is HashMap<String, String> where values are pre-serialized JSON
676677
files
677678
.serialize(&serializer)
678679
.map_err(|err| anyhow!("{}", err))

src/diagnostics.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use crate::util::symbol::symbol_has_ignorable_js_doc_tag;
1313
use crate::variable::VariableDef;
1414

1515
use deno_ast::ModuleSpecifier;
16+
use std::sync::Arc;
1617
use deno_ast::SourceRange;
1718
use deno_ast::SourceTextInfo;
1819
use deno_ast::diagnostics::Diagnostic;
@@ -270,9 +271,9 @@ impl<'a> DiagnosticsCollector<'a> {
270271
inner
271272
}
272273

273-
pub fn analyze_doc_nodes(&mut self, doc_nodes: &[Symbol]) {
274+
pub fn analyze_doc_nodes(&mut self, doc_nodes: &[Arc<Symbol>]) {
274275
DiagnosticDocNodeVisitor { diagnostics: self }
275-
.visit_doc_nodes(doc_nodes.iter())
276+
.visit_doc_nodes(doc_nodes.iter().map(|s| &**s))
276277
}
277278

278279
fn check_missing_js_doc(&mut self, js_doc: &JsDoc, location: &Location) {

src/diff/mod.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ use indexmap::IndexSet;
1616
use serde::Deserialize;
1717
use serde::Serialize;
1818

19+
use std::sync::Arc;
20+
1921
use crate::Symbol;
2022
use crate::node::Declaration;
2123
use crate::node::DeclarationDef;
@@ -199,7 +201,7 @@ pub struct ModuleDiff {
199201
}
200202

201203
impl ModuleDiff {
202-
pub fn diff(old_nodes: &[Symbol], new_nodes: &[Symbol]) -> Self {
204+
pub fn diff(old_nodes: &[Arc<Symbol>], new_nodes: &[Arc<Symbol>]) -> Self {
203205
let old_map = build_name_map(old_nodes);
204206
let new_map = build_name_map(new_nodes);
205207

@@ -321,8 +323,8 @@ fn try_detect_rename(old: &Symbol, new: &Symbol) -> Option<SymbolDiff> {
321323
})
322324
}
323325

324-
fn build_name_map(nodes: &[Symbol]) -> IndexMap<&str, &Symbol> {
325-
nodes.iter().map(|n| (n.name.as_ref(), n)).collect()
326+
fn build_name_map(nodes: &[Arc<Symbol>]) -> IndexMap<&str, &Symbol> {
327+
nodes.iter().map(|n| (n.name.as_ref(), n.as_ref())).collect()
326328
}
327329

328330
#[derive(Debug, Clone, Serialize, Deserialize)]

0 commit comments

Comments
 (0)