Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions obsidiantools/md_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,8 @@ def get_tags(filepath: Path, *, show_nested: bool = False) -> list[str]:
str_transform_func=_transform_md_file_string_for_tag_parsing)
# remove wikilinks so that '#' headers are not caught:
src_txt = _remove_wikilinks_from_source_text(src_txt)
# remove md link URLs so that '#' fragments in them are not caught:
src_txt = _remove_md_link_urls_from_source_text(src_txt)
tags = _get_tags_from_source_text(src_txt, show_nested=show_nested)
return tags

Expand Down Expand Up @@ -447,6 +449,12 @@ def _remove_wikilinks_from_source_text(src_txt: str) -> str:
return re.sub(WIKILINK_REGEX, '', src_txt)


def _remove_md_link_urls_from_source_text(src_txt: str) -> str:
# replace '[text](<url>)' with its 'text' so that any '#' fragment
# in the URL is not mistaken for a tag, while keeping the link text:
return re.sub(INLINE_LINK_AFTER_HTML_PROC_REGEX, r'\1', src_txt)


def _transform_md_file_string_for_tag_parsing(txt: str) -> str:
return txt.replace('\\#', '')

Expand Down
7 changes: 7 additions & 0 deletions tests/general/tags_url-fragment.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Tags vs URL fragments

thanks to [Vite's HMR feature](https://vitejs.dev/guide/features.html#hot-module-replacement).

A real tag: #python

Another link [docs](https://example.com/page#section-two) followed by #data text.
16 changes: 16 additions & 0 deletions tests/test_md_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,22 @@ def test_sussudio_tags_with_nesting_shown():
assert actual_tags == expected_tags


def test_url_fragments_in_md_links_not_parsed_as_tags():
# the '#' fragment of a markdown link URL must not be read as a tag,
# while genuine tags in the note are still captured (issue #52):
actual_tags = get_tags(
Path('.') / 'tests/general/tags_url-fragment.md')
expected_tags = ['python', 'data']
assert actual_tags == expected_tags


def test_url_fragments_in_md_links_not_parsed_as_tags_with_nesting():
actual_tags = get_tags(
Path('.') / 'tests/general/tags_url-fragment.md', show_nested=True)
expected_tags = ['python', 'data']
assert actual_tags == expected_tags


def test_embedded_files_alias_scaling():
actual_embedded_images = get_embedded_files(
Path('.') / 'tests/general/embedded-images_in-table.md')
Expand Down
Loading