Skip to content

Commit 8feb09f

Browse files
authored
fix: markdown serialization of hyperlink with code (#434)
* chore: leverage fixtures in tests and add type annotations Use fixtures for sample DoclingDocument in tests. Add missing function type annotation when the body contains annotations. Signed-off-by: Cesar Berrospi Ramis <[email protected]> * fix(markdown): serialize hyperlink as code always with single backticks Signed-off-by: Cesar Berrospi Ramis <[email protected]> --------- Signed-off-by: Cesar Berrospi Ramis <[email protected]>
1 parent 3fe8b5a commit 8feb09f

15 files changed

+725
-633
lines changed

docling_core/transforms/chunker/code_chunking/_language_code_chunkers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def _new_hash(code: str) -> int:
3838
class _RangeTracker:
3939
"""Handles tracking and management of used byte ranges in code."""
4040

41-
def __init__(self):
41+
def __init__(self) -> None:
4242
"""Initialize the range tracker with an empty list of used ranges."""
4343
self.used_ranges: List[Tuple[int, int]] = []
4444

docling_core/transforms/serializer/markdown.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,9 @@ def serialize(
211211
text_part = f"{num_hashes * '#'} {text}"
212212
elif isinstance(item, CodeItem):
213213
if params.format_code_blocks:
214-
text_part = f"`{text}`" if is_inline_scope else f"```\n{text}\n```"
214+
# inline items and all hyperlinks: use single backticks
215+
bt = is_inline_scope or (params.include_hyperlinks and item.hyperlink)
216+
text_part = f"`{text}`" if bt else f"```\n{text}\n```"
215217
else:
216218
text_part = text
217219
escape_html = False

docling_core/types/doc/document.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6138,7 +6138,7 @@ def check_version_is_compatible(cls, v: str) -> str:
61386138
else:
61396139
return CURRENT_VERSION
61406140

6141-
@model_validator(mode="after") # type: ignore
6141+
@model_validator(mode="after")
61426142
def validate_document(self) -> Self:
61436143
"""validate_document."""
61446144
with warnings.catch_warnings():
@@ -6152,7 +6152,7 @@ def validate_document(self) -> Self:
61526152
return self
61536153

61546154
@model_validator(mode="after")
6155-
def validate_misplaced_list_items(self):
6155+
def validate_misplaced_list_items(self) -> Self:
61566156
"""validate_misplaced_list_items."""
61576157
# find list items without list parent, putting succesive ones together
61586158
misplaced_list_items: list[list[ListItem]] = []
@@ -6184,7 +6184,7 @@ def validate_misplaced_list_items(self):
61846184
)
61856185

61866186
# delete list items from document (should not be affected by group addition)
6187-
self.delete_items(node_items=curr_list_items)
6187+
self.delete_items(node_items=list(curr_list_items))
61886188

61896189
# add list items to new group
61906190
for li in curr_list_items:

0 commit comments

Comments
 (0)