Skip to content

Commit b9e469f

Browse files
herohemantedivm
authored andcommitted
fix(history): improve history processing and error handling
- Handle empty history cases by returning an empty list - Support both dict and list formats for history input - Safely parse date strings with error handling - Sort revisions by date, ensuring robustness against missing values
1 parent ac80319 commit b9e469f

File tree

1 file changed

+24
-6
lines changed

1 file changed

+24
-6
lines changed

scp_crawler/postprocessing.py

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,29 @@ def get_images(html):
4545

4646

4747
def process_history(history):
48-
history = [v for v in history.values()]
49-
for revision in history:
50-
revision["date"] = datetime.strptime(revision["date"], "%d %b %Y %H:%M")
51-
history.sort(key=lambda x: x["date"])
52-
return history
48+
if not history:
49+
return []
50+
51+
if isinstance(history, dict):
52+
revisions = list(history.values())
53+
elif isinstance(history, list):
54+
revisions = history
55+
else:
56+
return []
57+
58+
for revision in revisions:
59+
if not isinstance(revision, dict):
60+
continue
61+
revision_date = revision.get("date")
62+
if isinstance(revision_date, str):
63+
try:
64+
revision["date"] = datetime.strptime(revision_date, "%d %b %Y %H:%M")
65+
except Exception:
66+
# Keep original value if parsing fails.
67+
pass
68+
69+
revisions.sort(key=lambda x: x.get("date") or datetime.min)
70+
return revisions
5371

5472

5573
def get_wiki_source(page_id, domain, attempts=5):
@@ -200,7 +218,7 @@ def run_postproc_tales():
200218
tale["raw_source"] = get_wiki_source(tale["page_id"], tale["domain"])
201219

202220
# Convert history dict to list and sort by date.
203-
tale["history"] = process_history(tale["history"])
221+
tale["history"] = process_history(tale.get("history"))
204222

205223
if len(tale["history"]) > 0:
206224
tale["created_at"] = tale["history"][0]["date"]

0 commit comments

Comments
 (0)