|
| 1 | +"""Main entry point for RSS Friend aggregator.""" |
1 | 2 |
|
2 | | -import json |
3 | | -import os |
4 | | -import time |
5 | 3 | from pathlib import Path |
6 | | -import feedparser |
7 | | -import requests |
| 4 | +from rss_aggregator import aggregate_rss_feeds |
8 | 5 |
|
9 | 6 | # Constants |
10 | 7 | ASSETS_DIR = Path(__file__).parent / 'assets' |
11 | 8 | MAX_POSTS = 30 |
12 | 9 | FRIENDS_JSON_PATH = Path(__file__).parent.parent / '_data' / 'friends.json' |
13 | 10 |
|
14 | | -def ensure_directory_exists(dir_path: Path): |
15 | | - """Ensures a directory exists, creating it if necessary.""" |
16 | | - dir_path.mkdir(parents=True, exist_ok=True) |
17 | | - |
18 | | -def write_json_file(file_path: Path, data): |
19 | | - """Writes data to a JSON file.""" |
20 | | - with open(file_path, 'w', encoding='utf-8') as f: |
21 | | - json.dump(data, f, ensure_ascii=False, indent=2) |
22 | | - |
23 | | -def parse_rss_feed(friend: dict) -> list: |
24 | | - """Parses a single RSS feed.""" |
25 | | - friend_name = friend.get("title", "") |
26 | | - friend_link = friend.get("link", "") |
27 | | - rss_url = friend.get("feed", "") |
28 | | - posts = [] |
29 | | - |
30 | | - if not rss_url or not rss_url.startswith('http'): |
31 | | - print(f"Invalid RSS URL: {rss_url} (from: {friend_name})") |
32 | | - return [] |
33 | 11 |
|
| 12 | +def main(): |
| 13 | + """Main entry point.""" |
34 | 14 | try: |
35 | | - # Use requests to fetch the feed with a timeout and user-agent |
36 | | - response = requests.get(rss_url, timeout=10, headers={'User-Agent': 'RSS Aggregator Bot'}) |
37 | | - response.raise_for_status() # Raise an exception for bad status codes |
38 | | - |
39 | | - # Parse the feed content using feedparser |
40 | | - feed = feedparser.parse(response.content) |
41 | | - |
42 | | - for entry in feed.entries: |
43 | | - # Get date |
44 | | - date_tuple = entry.get("published_parsed") or entry.get("updated_parsed") or time.gmtime() |
45 | | - date = time.strftime('%Y-%m-%dT%H:%M:%SZ', date_tuple) |
46 | | - |
47 | | - posts.append({ |
48 | | - "title": entry.get("title", "No Title"), |
49 | | - "link": entry.get("link", friend_link), |
50 | | - "date": date, |
51 | | - "author": { |
52 | | - "name": friend_name, |
53 | | - "link": friend_link |
54 | | - } |
55 | | - }) |
56 | | - except Exception as e: |
57 | | - print(f"Failed to parse RSS feed ({friend_name} - {rss_url}): {e}") |
58 | | - |
59 | | - return posts |
60 | | - |
61 | | -def aggregate_rss_feeds(): |
62 | | - """Aggregates RSS feeds from a list of friends.""" |
63 | | - try: |
64 | | - ensure_directory_exists(ASSETS_DIR) |
65 | | - |
66 | | - with open(FRIENDS_JSON_PATH, 'r', encoding='utf-8') as f: |
67 | | - friends = json.load(f) |
68 | | - |
69 | | - if not isinstance(friends, list) or not friends: |
70 | | - print("No valid friends data found.") |
71 | | - return |
72 | | - |
73 | | - all_posts = [] |
74 | | - for friend in friends: |
75 | | - all_posts.extend(parse_rss_feed(friend)) |
76 | | - |
77 | | - write_json_file(ASSETS_DIR / 'unsort.json', all_posts) |
78 | | - |
79 | | - # Sort posts by date (newest first) |
80 | | - sorted_posts = sorted(all_posts, key=lambda x: x['date'], reverse=True)[:MAX_POSTS] |
81 | | - write_json_file(ASSETS_DIR / 'sorted.json', sorted_posts) |
82 | | - |
83 | | - # Format posts |
84 | | - formatted_posts = [] |
85 | | - for post in sorted_posts: |
86 | | - t = time.strptime(post['date'], '%Y-%m-%dT%H:%M:%SZ') |
87 | | - formatted_posts.append({ |
88 | | - "title": post["title"], |
89 | | - "link": post["link"], |
90 | | - "year": t.tm_year, |
91 | | - "month": t.tm_mon, |
92 | | - "day": t.tm_mday, |
93 | | - "author": post["author"] |
94 | | - }) |
95 | | - |
96 | | - write_json_file(ASSETS_DIR / 'rss.json', formatted_posts) |
97 | | - |
98 | | - print(f"Processing complete - Aggregated {len(all_posts)} posts, saved the top {len(sorted_posts)}.") |
99 | | - |
| 15 | + aggregate_rss_feeds(FRIENDS_JSON_PATH, ASSETS_DIR, MAX_POSTS) |
100 | 16 | except Exception as e: |
101 | 17 | print(f"Main process failed: {e}") |
102 | 18 |
|
| 19 | + |
103 | 20 | if __name__ == "__main__": |
104 | | - aggregate_rss_feeds() |
| 21 | + main() |
0 commit comments