@@ -18,8 +18,6 @@ static HEADING_PREFIXES: [&str; 6] = ["# ", "## ", "### ", "#### ", "##### ", "#
1818
1919/// Pre-computed blockquote prefixes for depths 1-6 (avoids `"> ".repeat()`)
2020static BQ_PREFIXES : [ & str ; 7 ] = [ "" , "> " , "> > " , "> > > " , "> > > > " , "> > > > > " , "> > > > > > " ] ;
21- /// Pre-computed unordered list item prefixes for indent depths 0-5
22- static UL_PREFIXES : [ & str ; 6 ] = [ "- " , " - " , " - " , " - " , " - " , " - " ] ;
2321
2422// Clean mode bitmask flags
2523const CLEAN_EMPTY_LINKS : u8 = 1 ;
@@ -247,6 +245,16 @@ pub struct ConvertState {
247245 in_heading : bool ,
248246 /// Buffer position at heading start (for extracting heading text)
249247 heading_buffer_start : usize ,
248+
249+ /// Cumulative indent string for list-item continuation content. Grows by
250+ /// each ancestor `<li>`'s marker width (`"- "` = 2, `"N. "` = digits(N)+2),
251+ /// so code blocks, paragraphs, and nested blocks inside a list item land
252+ /// in the content column that CommonMark requires. Pushed on `<li>` enter,
253+ /// popped on `<li>` close.
254+ list_indent : String ,
255+ /// Per-`<li>` contribution width stack, parallel to `list_indent`. Used to
256+ /// truncate the correct number of bytes on close without re-walking ancestors.
257+ list_indent_widths : Vec < u8 > ,
250258}
251259
252260impl ConvertState {
@@ -322,6 +330,9 @@ impl ConvertState {
322330 fragment_links : Vec :: new ( ) ,
323331 in_heading : false ,
324332 heading_buffer_start : 0 ,
333+
334+ list_indent : String :: new ( ) ,
335+ list_indent_widths : Vec :: with_capacity ( 8 ) ,
325336 } ;
326337 // Resolve clean config into bitmask
327338 let effective_clean_urls;
@@ -968,36 +979,33 @@ impl ConvertState {
968979 return ;
969980 }
970981
971- // Indent code block content inside a list item so the fenced block stays
972- // within the list item's content column. Skip indent for blank lines and
973- // lines that already begin with whitespace — preserves the original
974- // indentation structure from the HTML and matches the JS engine.
982+ // Indent code block content inside a list item so every line starts at
983+ // the list item's content column. CommonMark closes the list item when
984+ // a line is indented less than that column, so we prepend list_indent
985+ // on top of any existing in-source indentation. Blank lines are left
986+ // alone so they stay blank.
975987 let li_depth = self . depth_map [ TAG_LI as usize ] as usize ;
976988 let indented_storage;
977989 let text = if self . depth_map [ TAG_PRE as usize ] > 0 && li_depth > 0
978990 && ( text. contains ( '\n' ) || last_char == b'\n' ) {
979- let indent = " " . repeat ( li_depth ) ;
991+ let indent = self . list_indent . as_str ( ) ;
980992 let mut out = String :: with_capacity ( text. len ( ) + indent. len ( ) * 2 ) ;
981993 let bytes = text. as_bytes ( ) ;
982994 // Prepend indent for the first line when the buffer ended with a
983- // newline (code fence opener) and this text doesn't already start
984- // with leading whitespace.
995+ // newline (code fence opener). Blank first line stays blank.
985996 if last_char == b'\n' {
986997 let first = bytes. first ( ) . copied ( ) . unwrap_or ( 0 ) ;
987- if first != b' ' && first != b'\t ' && first != b'\n' {
988- out. push_str ( & indent) ;
998+ if first != b'\n ' && first != 0 {
999+ out. push_str ( indent) ;
9891000 }
9901001 }
9911002 let mut prev = 0usize ;
9921003 for ( i, & b) in bytes. iter ( ) . enumerate ( ) {
9931004 if b == b'\n' {
9941005 out. push_str ( & text[ prev..=i] ) ;
9951006 let next = i + 1 ;
996- if next < bytes. len ( ) {
997- let c = bytes[ next] ;
998- if c != b' ' && c != b'\t' && c != b'\n' {
999- out. push_str ( & indent) ;
1000- }
1007+ if next < bytes. len ( ) && bytes[ next] != b'\n' {
1008+ out. push_str ( indent) ;
10011009 }
10021010 prev = next;
10031011 }
@@ -1104,8 +1112,7 @@ impl ConvertState {
11041112 "> " . repeat ( depth)
11051113 } ;
11061114 if self . depth_map [ TAG_LI as usize ] > 0 {
1107- let indent = " " . repeat ( self . depth_map [ TAG_LI as usize ] as usize ) ;
1108- prefix = format ! ( "\n {indent}{prefix}" ) ;
1115+ prefix = format ! ( "\n {}{}" , self . list_indent, prefix) ;
11091116 }
11101117 Some ( Cow :: Owned ( prefix) )
11111118 }
@@ -1115,14 +1122,14 @@ impl ConvertState {
11151122 let lang = Self :: get_language_from_class ( node. attributes . get ( "class" ) ) ;
11161123 let li_depth = self . depth_map [ TAG_LI as usize ] as usize ;
11171124 if li_depth > 0 {
1118- let indent = " " . repeat ( li_depth ) ;
1125+ let indent = self . list_indent . as_str ( ) ;
11191126 let mut s = String :: with_capacity ( 2 + indent. len ( ) * 2 + 4 + lang. len ( ) + 1 ) ;
11201127 s. push_str ( "\n \n " ) ;
1121- s. push_str ( & indent) ;
1128+ s. push_str ( indent) ;
11221129 s. push_str ( "```" ) ;
11231130 s. push_str ( lang) ;
11241131 s. push ( '\n' ) ;
1125- s. push_str ( & indent) ;
1132+ s. push_str ( indent) ;
11261133 Some ( Cow :: Owned ( s) )
11271134 } else if lang. is_empty ( ) {
11281135 Some ( Cow :: Borrowed ( "```\n " ) )
@@ -1170,23 +1177,21 @@ impl ConvertState {
11701177 if self . in_table_cell ( ) {
11711178 return Some ( Cow :: Borrowed ( "<li>" ) ) ;
11721179 }
1173- let ul_depth = self . depth_map [ TAG_UL as usize ] as usize ;
1174- let ol_depth = self . depth_map [ TAG_OL as usize ] as usize ;
1175- let depth = if ul_depth + ol_depth > 0 { ul_depth + ol_depth - 1 } else { 0 } ;
1176- let is_ordered = ol_depth > 0 && _ancestors. last ( ) . is_some_and ( |p| p. tag_id == Some ( TAG_OL ) ) ;
1177- if !is_ordered && depth < UL_PREFIXES . len ( ) {
1178- Some ( Cow :: Borrowed ( UL_PREFIXES [ depth] ) )
1180+ // Parent determines marker: <ol> → "N. " (digits of N + 2
1181+ // columns), else "- " (2 columns). The indent emitted here is
1182+ // the parent's accumulated list_indent — this LI's own marker
1183+ // contribution is pushed onto list_indent AFTER this output
1184+ // is written to the buffer.
1185+ let is_ordered = _ancestors. last ( ) . is_some_and ( |p| p. tag_id == Some ( TAG_OL ) ) ;
1186+ let mut s = String :: with_capacity ( self . list_indent . len ( ) + 6 ) ;
1187+ s. push_str ( & self . list_indent ) ;
1188+ if is_ordered {
1189+ use std:: fmt:: Write ;
1190+ let _ = write ! ( s, "{}. " , node. index + 1 ) ;
11791191 } else {
1180- let mut s = String :: with_capacity ( depth * 2 + 6 ) ;
1181- for _ in 0 ..depth { s. push_str ( " " ) ; }
1182- if is_ordered {
1183- use std:: fmt:: Write ;
1184- let _ = write ! ( s, "{}. " , node. index + 1 ) ;
1185- } else {
1186- s. push_str ( "- " ) ;
1187- }
1188- Some ( Cow :: Owned ( s) )
1192+ s. push_str ( "- " ) ;
11891193 }
1194+ Some ( Cow :: Owned ( s) )
11901195 }
11911196 TAG_A => {
11921197 if node. attributes . contains_key ( "href" ) { Some ( Cow :: Borrowed ( "[" ) ) } else { None }
@@ -1273,12 +1278,12 @@ impl ConvertState {
12731278 if self . depth_map [ TAG_PRE as usize ] > 0 {
12741279 let li_depth = self . depth_map [ TAG_LI as usize ] as usize ;
12751280 if li_depth > 0 {
1276- let indent = " " . repeat ( li_depth ) ;
1281+ let indent = self . list_indent . as_str ( ) ;
12771282 let mut s = String :: with_capacity ( 1 + indent. len ( ) * 2 + 5 ) ;
12781283 s. push ( '\n' ) ;
1279- s. push_str ( & indent) ;
1284+ s. push_str ( indent) ;
12801285 s. push_str ( "```\n \n " ) ;
1281- s. push_str ( & indent) ;
1286+ s. push_str ( indent) ;
12821287 Some ( Cow :: Owned ( s) )
12831288 } else {
12841289 Some ( Cow :: Borrowed ( "\n ```" ) )
@@ -1838,6 +1843,35 @@ impl ConvertState {
18381843 self . emit_enter_element ( ) ;
18391844 }
18401845
1846+ // After the LI prefix is emitted, push this LI's marker-width worth of
1847+ // spaces to list_indent so subsequent continuation content (code blocks,
1848+ // paragraphs, nested blocks) lands in the correct content column. The
1849+ // width depends on the marker: "- " = 2, "N. " = digits(N) + 2.
1850+ // Push for every LI open so close_node can pop unconditionally; width 0
1851+ // when skipped or in a table cell keeps the stack balanced without
1852+ // affecting the indent string.
1853+ if tag_id == Some ( TAG_LI )
1854+ && let Some ( li) = self . stack . last ( )
1855+ {
1856+ let width: usize = if !skip_node && !self . in_table_cell ( ) {
1857+ let stack_len = self . stack . len ( ) ;
1858+ let parent_is_ordered = stack_len >= 2
1859+ && self . stack [ stack_len - 2 ] . tag_id == Some ( TAG_OL ) ;
1860+ if parent_is_ordered {
1861+ let n = li. index + 1 ;
1862+ // n >= 1 so ilog10 never panics; +1 converts floor(log10) to digit count.
1863+ let digits = ( n. ilog10 ( ) + 1 ) as usize ;
1864+ digits + 2
1865+ } else {
1866+ 2
1867+ }
1868+ } else {
1869+ 0
1870+ } ;
1871+ self . list_indent_widths . push ( u8:: try_from ( width) . unwrap_or ( u8:: MAX ) ) ;
1872+ for _ in 0 ..width { self . list_indent . push ( ' ' ) ; }
1873+ }
1874+
18411875 self . has_encoded_html_entity = false ;
18421876
18431877 if self . stack . last ( ) . is_some_and ( |n| n. is_non_nesting ) && !self_closing {
@@ -1924,6 +1958,12 @@ impl ConvertState {
19241958 self . depth_map [ id as usize ] = self . depth_map [ id as usize ] . saturating_sub ( 1 ) ;
19251959 }
19261960 self . update_escape_ctx_on_close ( id) ;
1961+ if id == TAG_LI
1962+ && let Some ( w) = self . list_indent_widths . pop ( )
1963+ {
1964+ let new_len = self . list_indent . len ( ) . saturating_sub ( w as usize ) ;
1965+ self . list_indent . truncate ( new_len) ;
1966+ }
19271967 }
19281968 self . depth -= 1 ;
19291969 self . has_encoded_html_entity = false ;
@@ -1945,6 +1985,12 @@ impl ConvertState {
19451985 self . depth_map [ id as usize ] = self . depth_map [ id as usize ] . saturating_sub ( 1 ) ;
19461986 }
19471987 self . update_escape_ctx_on_close ( id) ;
1988+ if id == TAG_LI
1989+ && let Some ( w) = self . list_indent_widths . pop ( )
1990+ {
1991+ let new_len = self . list_indent . len ( ) . saturating_sub ( w as usize ) ;
1992+ self . list_indent . truncate ( new_len) ;
1993+ }
19481994 }
19491995
19501996 self . in_non_nesting = self . stack . last ( ) . is_some_and ( |n| n. is_non_nesting ) ;
0 commit comments