Apply suggestions from code review

lpi-tn · Copilot · web-flow · commit 797ebf2fdf0b · 2026-06-02T11:58:14.000+02:00
Co-authored-by: Copilot Autofix powered by AI &lt;175728472+Copilot@users.noreply.github.com&gt;
diff --git a/welearn_datastack/collectors/atom_collector.py b/welearn_datastack/collectors/atom_collector.py
@@ -59,16 +59,21 @@ def collect(self) -> List[WeLearnDocument]:
             links = XMLExtractor(entry.content).extract_content_attribute_filter(
                 tag="link", attribute_name="rel", attribute_value="alternate"
             )
-            try:
-                [link] = links
-                link_lines.append(link.attributes.get("href", ""))
-            except ValueError:
+            if not links:
                 logger.warning(
                     "No link found for entry, skipping entry. Entry content: %s",
                     entry.content,
                 )
                 continue
 
+            if len(links) > 1:
+                logger.warning(
+                    "Multiple rel='alternate' links found for entry; using the first. Entry content: %s",
+                    entry.content,
+                )
+
+            link_lines.append(links[0].attributes.get("href", ""))
+
         urls = lines_to_url(domain, link_lines)
 
         ret = extracted_url_to_url_datastore(urls=urls, corpus=self.corpus)
diff --git a/welearn_datastack/collectors/helpers/feed_helpers.py b/welearn_datastack/collectors/helpers/feed_helpers.py
@@ -38,7 +38,7 @@ def lines_to_url(domain: str, link_lines: List[str]) -> List[str]:
     return urls
 
 
-def remove_illegal_character(text: str):
+def remove_illegal_character(text: str) -> str:
     illegal_char_pos = [
         text.find(x) for x in url_illegal_characters if text.find(x) >= 0
     ]

Original file line number	Diff line number	Diff line change
`@@ -38,7 +38,7 @@ def lines_to_url(domain: str, link_lines: List[str]) -> List[str]:`
`38`	`38`	`return urls`
`39`	`39`
`40`	`40`
`41`		`-def remove_illegal_character(text: str):`
	`41`	`+def remove_illegal_character(text: str) -> str:`
`42`	`42`	`illegal_char_pos = [`
`43`	`43`	`text.find(x) for x in url_illegal_characters if text.find(x) >= 0`
`44`	`44`	`]`