Skip to content

Commit 797ebf2

Browse files
lpi-tnCopilot
andauthored
Apply suggestions from code review
Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
1 parent 62157ee commit 797ebf2

2 files changed

Lines changed: 10 additions & 5 deletions

File tree

welearn_datastack/collectors/atom_collector.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,16 +59,21 @@ def collect(self) -> List[WeLearnDocument]:
5959
links = XMLExtractor(entry.content).extract_content_attribute_filter(
6060
tag="link", attribute_name="rel", attribute_value="alternate"
6161
)
62-
try:
63-
[link] = links
64-
link_lines.append(link.attributes.get("href", ""))
65-
except ValueError:
62+
if not links:
6663
logger.warning(
6764
"No link found for entry, skipping entry. Entry content: %s",
6865
entry.content,
6966
)
7067
continue
7168

69+
if len(links) > 1:
70+
logger.warning(
71+
"Multiple rel='alternate' links found for entry; using the first. Entry content: %s",
72+
entry.content,
73+
)
74+
75+
link_lines.append(links[0].attributes.get("href", ""))
76+
7277
urls = lines_to_url(domain, link_lines)
7378

7479
ret = extracted_url_to_url_datastore(urls=urls, corpus=self.corpus)

welearn_datastack/collectors/helpers/feed_helpers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def lines_to_url(domain: str, link_lines: List[str]) -> List[str]:
3838
return urls
3939

4040

41-
def remove_illegal_character(text: str):
41+
def remove_illegal_character(text: str) -> str:
4242
illegal_char_pos = [
4343
text.find(x) for x in url_illegal_characters if text.find(x) >= 0
4444
]

0 commit comments

Comments
 (0)