This repository was archived by the owner on May 25, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
82 lines (64 loc) · 2.32 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import feedparser
import requests
import json
import os
import configparser
from concurrent.futures import ThreadPoolExecutor
def get_final_url(url):
response = requests.get(url, allow_redirects=False)
if response.status_code == 307:
return response.headers['Location']
else:
return url
def get_article_urls_from_rss(rss_url):
feed = feedparser.parse(rss_url)
article_urls = []
article_count = len(feed.entries)
def process_entry(entry):
article_title = entry.title
article_url = entry.link
final_url = get_final_url(article_url)
article_urls.append({"title": article_title, "url": final_url})
# 打印进度
progress = f"{len(article_urls)}/{article_count}"
print(f"正在获取文章URL: {progress} - {article_title}", end='\r')
# 使用线程池处理每个文章 URL
with ThreadPoolExecutor(max_workers=5) as executor:
executor.map(process_entry, feed.entries)
print("获取文章URL完成!")
return article_urls
# 读取配置文件
config = configparser.ConfigParser()
config_file = 'config.ini'
# 检查配置文件是否存在
if not os.path.exists(config_file):
# 如果配置文件不存在,则创建一个新的配置文件并提示用户输入博客 URL
url = input("请输入博客 URL:")
# 创建配置文件并保存博客 URL
config['Blog'] = {'URL': url}
with open(config_file, 'w') as file:
config.write(file)
else:
# 如果配置文件存在,则读取博客 URL
config.read(config_file)
url = config.get('Blog', 'URL')
print("已获取博客URL:" + url)
# 构建 RSS 请求地址
rss_url = url + "/feed?format=xml"
# 删除上一次获取后保存的 article.json 文件
if os.path.exists('article.json'):
os.remove('article.json')
# 调用函数获取文章 URL 列表
article_urls = get_article_urls_from_rss(rss_url)
# 构造包含文章名和对应 URL 的字典列表
data = {'articles': article_urls}
# 将数据以 JSON 格式写入文件
with open('article.json', 'w', encoding='utf-8') as file:
json.dump(data, file, ensure_ascii=False)
# 获取文章数
article_count = len(article_urls)
# 输出结果
if article_count > 0:
print("获取到的文章数:", article_count)
else:
print("获取文章URL失败或未找到任何文章。")