Skip to content

Commit 6653bfd

Browse files
committed
修改、删除、重写了部分代码,精简了代码结构
1 parent cfa32ef commit 6653bfd

1 file changed

Lines changed: 12 additions & 28 deletions

File tree

main.py

Lines changed: 12 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,9 @@ def download(url_, path_):
2727
class OSCourseware:
2828
BASE_URL = "https://jyywiki.cn"
2929
SOURCE_FILE_TYPE = (
30-
".png", ".jpg", ".gif", ".webp", "jpeg", ".js", ".css", ".html", ".c", ".cpp", ".py", ".sh", ".S"
30+
".png", ".jpg", ".gif", ".webp", "jpeg",
31+
".js", ".css", ".html",
32+
".c", ".h", ".cpp", ".py", ".sh", ".S", ".lua", ".txt"
3133
)
3234
COURSEWARE_DIR = "Courseware"
3335
WITHOUT_DOWNLOAD = [
@@ -38,7 +40,6 @@ class OSCourseware:
3840
year = []
3941
year_input = ''
4042
current_dir = ''
41-
index_url_path_pairs = {}
4243
slides_url_path_pairs = {}
4344
sources_url_path_pairs = {} # 字典不用去重
4445

@@ -63,40 +64,23 @@ def build_courseware_url_path(year_):
6364

6465
if self.year_input == "ALL":
6566
for year in ['2021', '2022', '2023']:
66-
self.index_url_path_pairs.update(build_courseware_url_path(year))
67+
self.sources_url_path_pairs.update(build_courseware_url_path(year))
6768
elif self.year_input != "Invalid":
68-
self.index_url_path_pairs.update(build_courseware_url_path(self.year_input))
69+
self.sources_url_path_pairs.update(build_courseware_url_path(self.year_input))
6970
if self.year_input != "2023":
7071
self.WITHOUT_DOWNLOAD.append(f'{self.BASE_URL}/OS/2023/index.html')
7172
else:
7273
print("输入非法,程序退出")
7374
sys.exit()
7475

7576
def file_download(self):
76-
# 按年下载、分析index.html
77-
def _download(_url_path_pairs):
78-
for _url, _path in _url_path_pairs.items():
77+
while self.sources_url_path_pairs:
78+
self.slides_url_path_pairs.update(self.sources_url_path_pairs)
79+
self.sources_url_path_pairs.clear()
80+
for _url, _path in self.slides_url_path_pairs.items():
7981
download(_url, _path)
80-
81-
def _analyse(_url_path_pairs):
82-
for _url, _path in _url_path_pairs.items():
8382
self.file_analyse(_path)
8483

85-
def _analyse_download(_url_path_pairs):
86-
_analyse(_url_path_pairs)
87-
_download(self.sources_url_path_pairs)
88-
89-
# 下载index.html文件,分析index.html后下载课件
90-
_download(self.index_url_path_pairs)
91-
_analyse_download(self.index_url_path_pairs)
92-
93-
# 分析课件后下载课件中的其他文件
94-
self.slides_url_path_pairs.update(self.sources_url_path_pairs)
95-
_analyse_download(self.slides_url_path_pairs)
96-
97-
self.slides_url_path_pairs.update(self.sources_url_path_pairs)
98-
_analyse_download(self.slides_url_path_pairs)
99-
10084
# 提取每个文件中的链接
10185
def file_analyse(self, filepath):
10286
# 对非HTML文件不做分析
@@ -123,9 +107,9 @@ def file_analyse(self, filepath):
123107
path = os.path.normpath(os.path.join(os.path.dirname(filepath), link.replace("/", "\\")))
124108
relative_path = path.split(os.getcwd() + os.sep + self.COURSEWARE_DIR)[1]
125109
url = urljoin(self.BASE_URL, relative_path.replace("\\", "/"))
126-
if url in self.WITHOUT_DOWNLOAD:
127-
continue
128-
self.sources_url_path_pairs.update({url: path})
110+
if url not in self.WITHOUT_DOWNLOAD:
111+
self.sources_url_path_pairs.update({url: path})
112+
self.WITHOUT_DOWNLOAD.append(url)
129113

130114

131115
courseware = OSCourseware()

0 commit comments

Comments
 (0)