s attach to it.
let children = tag.children();
for child_handle in children.top().iter() {
- walk(state, child_handle, parser, Some(list_idx));
+ walk(state, child_handle, parser, Some(list_idx), depth + 1);
}
}
@@ -663,7 +680,7 @@ fn process_tag(
// Recurse into blockquote children under the Quote node.
let children = tag.children();
for child_handle in children.top().iter() {
- walk(state, child_handle, parser, Some(quote_idx));
+ walk(state, child_handle, parser, Some(quote_idx), depth + 1);
}
}
@@ -770,7 +787,7 @@ fn process_tag(
}
let children = tag.children();
for child_handle in children.top().iter() {
- walk(state, child_handle, parser, Some(group_idx));
+ walk(state, child_handle, parser, Some(group_idx), depth + 1);
}
}
@@ -779,7 +796,7 @@ fn process_tag(
| "form" | "fieldset" => {
let children = tag.children();
for child_handle in children.top().iter() {
- walk(state, child_handle, parser, parent_idx);
+ walk(state, child_handle, parser, parent_idx, depth + 1);
}
}
@@ -787,7 +804,7 @@ fn process_tag(
_ => {
let children = tag.children();
for child_handle in children.top().iter() {
- walk(state, child_handle, parser, parent_idx);
+ walk(state, child_handle, parser, parent_idx, depth + 1);
}
}
}
diff --git a/crates/html-to-markdown/tests/deep_nesting_overflow.rs b/crates/html-to-markdown/tests/deep_nesting_overflow.rs
new file mode 100644
index 000000000..bb0137520
--- /dev/null
+++ b/crates/html-to-markdown/tests/deep_nesting_overflow.rs
@@ -0,0 +1,139 @@
+#![allow(missing_docs)]
+
+//! Regression coverage for deeply nested and malformed markup. `tl` preserves
+//! repeated unclosed `| ` tags as a multi-thousand-level DOM chain, so these
+//! tests run conversion on small thread stacks to catch native-stack recursion
+//! in whole-subtree helpers.
+//!
+//! The ` | ` shape reaches hierarchy recording, metadata extraction, and table
+//! scanning while the main conversion walker remains bounded by its own depth
+//! guard. That keeps failures attributable to the helper traversal under test.
+
+use html_to_markdown_rs::convert;
+use html_to_markdown_rs::options::{ConversionOptions, OutputFormat};
+use std::sync::{Mutex, MutexGuard};
+use std::thread;
+
+static TEST_MUTEX: Mutex<()> = Mutex::new(());
+
+fn test_lock() -> MutexGuard<'static, ()> {
+ TEST_MUTEX.lock().expect("deep nesting test mutex poisoned")
+}
+
+fn converts_without_overflow(html: String, options: ConversionOptions) -> bool {
+ converts_without_overflow_on_stack(html, options, 256 * 1024)
+}
+
+fn converts_without_overflow_on_stack(html: String, options: ConversionOptions, stack_size: usize) -> bool {
+ thread::Builder::new()
+ .stack_size(stack_size)
+ .spawn(move || convert(&html, Some(options)).is_ok())
+ .expect("spawn conversion thread")
+ .join()
+ .expect("conversion thread overflowed the stack")
+}
+
+/// Exercises `record_node_hierarchy` (pre-pass) and `scan_table_node` (table
+/// scan); the `` is found without descending the deep chain.
+#[test]
+fn deep_unclosed_table_cells_do_not_overflow_stack() {
+ let _guard = test_lock();
+ let mut html = String::from("t");
+ for _ in 0..20_000 {
+ html.push_str("| x");
+ }
+ html.push_str(" | ");
+ let options = ConversionOptions::builder().max_depth(Some(200)).build();
+ assert!(converts_without_overflow(html, options));
+}
+
+/// No ``, so metadata extraction must search the entire deep chain.
+#[test]
+fn deep_markup_without_head_does_not_overflow_stack() {
+ let _guard = test_lock();
+ let mut html = String::from("");
+ for _ in 0..20_000 {
+ html.push_str("| x");
+ }
+ html.push_str(" | ");
+ let options = ConversionOptions::builder().max_depth(Some(200)).build();
+ assert!(converts_without_overflow(html, options));
+}
+
+#[test]
+fn deep_link_descendant_text_does_not_overflow_stack() {
+ let _guard = test_lock();
+ let mut html = String::from("t");
+ for _ in 0..1_000 {
+ html.push_str("");
+ }
+ html.push_str("deep");
+ html.push_str("");
+
+ let options = ConversionOptions::builder().max_depth(Some(200)).build();
+ assert!(converts_without_overflow_on_stack(html, options, 8 * 1024 * 1024));
+}
+
+#[test]
+fn default_depth_uses_stack_safe_limit() {
+ let _guard = test_lock();
+ let mut html = String::from("");
+ for _ in 0..1_000 {
+ html.push_str("");
+ }
+ html.push_str("deep");
+ for _ in 0..1_000 {
+ html.push_str(" ");
+ }
+ html.push_str("");
+
+ assert!(converts_without_overflow(html, ConversionOptions::default()));
+}
+
+#[test]
+fn plain_text_output_does_not_overflow_stack() {
+ let _guard = test_lock();
+ let mut html = String::from("");
+ for _ in 0..1_000 {
+ html.push_str("");
+ }
+ html.push_str("deep");
+ for _ in 0..1_000 {
+ html.push_str(" ");
+ }
+ html.push_str("");
+
+ let options = ConversionOptions {
+ output_format: OutputFormat::Plain,
+ max_depth: Some(200),
+ ..Default::default()
+ };
+ assert!(converts_without_overflow(html, options));
+}
+
+#[test]
+fn document_structure_builder_does_not_overflow_stack() {
+ let _guard = test_lock();
+ let mut html = String::from("");
+ for _ in 0..1_000 {
+ html.push_str("");
+ }
+ html.push_str("deep ");
+ for _ in 0..1_000 {
+ html.push_str("");
+ }
+ html.push_str("");
+
+ assert!(
+ thread::Builder::new()
+ .stack_size(8 * 1024 * 1024)
+ .spawn(move || {
+ let dom = tl::parse(&html, tl::ParserOptions::default()).expect("parse deep html");
+ let document = html_to_markdown_rs::types::build_document_structure(&dom);
+ !document.nodes.is_empty()
+ })
+ .expect("spawn structure thread")
+ .join()
+ .expect("structure thread overflowed the stack")
+ );
+}
diff --git a/crates/html-to-markdown/tests/test_max_depth.rs b/crates/html-to-markdown/tests/test_max_depth.rs
index 76a3b3558..03a94ca57 100644
--- a/crates/html-to-markdown/tests/test_max_depth.rs
+++ b/crates/html-to-markdown/tests/test_max_depth.rs
@@ -11,12 +11,13 @@ fn convert_with_options(html: &str, options: ConversionOptions) -> String {
.unwrap_or_default()
}
-/// With the default `max_depth: None`, deeply nested content should be fully converted.
+/// With the default `max_depth: None`, ordinary nesting below the native stack
+/// safety limit should be fully converted.
#[test]
-fn test_max_depth_none_converts_deeply_nested() {
- // Build 100 levels of nesting around a leaf text node.
+fn test_max_depth_none_converts_reasonably_nested_content() {
+ // Build 32 levels of nesting around a leaf text node.
let mut html = String::from("deep ");
- for _ in 0..100 {
+ for _ in 0..32 {
html = format!("{html} ");
}
|