1- '''
2- 爬 https://course.ncku.edu.tw/index.php?c=qry_all (成大選課系統) by seleniumy
3- '''
1+ """
2+ 爬 https://course.ncku.edu.tw/index.php?c=qry_all (成大選課系統) by seleniumy
3+ """
4+
45import time
56import json
67import os
1415from bs4 import BeautifulSoup
1516from rich import print
1617
18+
1719def setup_driver ():
1820 # 設定 Chrome 選項
1921 chrome_options = Options ()
20- chrome_options .add_argument (' --headless' ) # 無頭模式,不會開啟瀏覽器視窗
21- chrome_options .add_argument (' --disable-gpu' )
22- chrome_options .add_argument (' --no-sandbox' )
23- chrome_options .add_argument (' --disable-dev-shm-usage' )
24-
22+ chrome_options .add_argument (" --headless" ) # 無頭模式,不會開啟瀏覽器視窗
23+ chrome_options .add_argument (" --disable-gpu" )
24+ chrome_options .add_argument (" --no-sandbox" )
25+ chrome_options .add_argument (" --disable-dev-shm-usage" )
26+
2527 # 初始化 webdriver
2628 driver = webdriver .Chrome (options = chrome_options )
2729 return driver
2830
31+
2932def main ():
3033 url = "https://course.ncku.edu.tw/index.php?c=qry_all"
3134 driver = setup_driver ()
3235 driver .get (url )
3336 last_height = driver .execute_script ("return document.body.scrollHeight" )
3437 print (last_height )
35-
38+
3639 # 找到所有導航按鈕
37- nav_elements = driver .find_elements (By .CLASS_NAME , ' btn_dept' )
38- course_list = [element .text for element in nav_elements if element .text != '' ]
39- #for i in course_list: print(i)
40+ nav_elements = driver .find_elements (By .CLASS_NAME , " btn_dept" )
41+ course_list = [element .text for element in nav_elements if element .text != "" ]
42+ # for i in course_list: print(i)
4043 # 點擊每個按鈕並收集資料
4144 data = []
4245
43- html_content = driver .page_source
44- soup = BeautifulSoup (html_content ,' lxml' )
45- div_elements = soup .find_all ('li' , class_ = ' btn_dept' )
46- counter = len (div_elements )
46+ html_content = driver .page_source
47+ soup = BeautifulSoup (html_content , " lxml" )
48+ div_elements = soup .find_all ("li" , class_ = " btn_dept" )
49+ counter = len (div_elements )
4750
4851 for course in course_list :
4952 counter -= 1
5053 if counter < 0 :
51- #print(soup)
54+ # print(soup)
5255 break
5356 try :
5457 # 使用 XPath 查找按鈕
5558 button_xpath = f"//li[@class='btn_dept'][contains(text(), '{ course } ')]"
5659 button = WebDriverWait (driver , 10 ).until (
5760 EC .presence_of_element_located ((By .XPATH , button_xpath ))
5861 )
59-
62+
6063 # 點擊按鈕
6164 driver .execute_script ("arguments[0].click();" , button )
6265 time .sleep (1 ) # 等待頁面載入
63-
66+
6467 # 解析課程資料
65- soup = BeautifulSoup (driver .page_source , ' lxml' )
66- table = soup .find (' table' , {'id' : ' A9-table' })
67-
68+ soup = BeautifulSoup (driver .page_source , " lxml" )
69+ table = soup .find (" table" , {"id" : " A9-table" })
70+
6871 if table :
69- rows = table .find_all ('tr' )
72+ rows = table .find_all ("tr" )
7073 for row in rows [1 :]: # 跳過表頭
71- columns = row .find_all ('td' )
74+ columns = row .find_all ("td" )
7275 course_data = {
73- ' 系所名稱' : columns [0 ].text .strip (),
74- ' 系號-序號' : columns [1 ].text .strip (),
75- '年級' : columns [2 ].text .strip (),
76- '類別' : columns [3 ].text .strip (),
77- ' 科目名稱' : columns [4 ].text .strip ().split (' ' )[0 ],
78- '學分' : columns [5 ].text .strip (),
79- ' 教師姓名' : columns [6 ].text .strip (),
80- ' 已選課人數/餘額' : columns [7 ].text .strip (),
81- ' 時間/教室' : columns [8 ].text .strip (),
82- ' 是否有餘額' : '額' not in columns [7 ].text .strip ()
76+ " 系所名稱" : columns [0 ].text .strip (),
77+ " 系號-序號" : columns [1 ].text .strip (),
78+ "年級" : columns [2 ].text .strip (),
79+ "類別" : columns [3 ].text .strip (),
80+ " 科目名稱" : columns [4 ].text .strip ().split (" " )[0 ],
81+ "學分" : columns [5 ].text .strip (),
82+ " 教師姓名" : columns [6 ].text .strip (),
83+ " 已選課人數/餘額" : columns [7 ].text .strip (),
84+ " 時間/教室" : columns [8 ].text .strip (),
85+ " 是否有餘額" : "額" not in columns [7 ].text .strip (),
8386 }
8487 data .append (course_data )
8588
@@ -88,22 +91,23 @@ def main():
8891 WebDriverWait (driver , 10 ).until (
8992 EC .presence_of_element_located ((By .XPATH , '//li[@class="btn_dept"]' ))
9093 )
91-
94+
9295 except Exception as e :
9396 print (f"處理 { course } 時發生錯誤: { str (e )} " )
9497 continue
95-
98+
9699 # 關閉瀏覽器
97100 driver .quit ()
98-
101+
99102 # 儲存資料
100103 timestamp = time .strftime ("%Y%m%d_%H%M%S" )
101- filename = f' ncku_courses_11302.json'
102- with open (filename , 'w' , encoding = ' utf-8' ) as f :
104+ filename = f" ncku_courses_11302.json"
105+ with open (filename , "w" , encoding = " utf-8" ) as f :
103106 json .dump (data , f , ensure_ascii = False , indent = 2 )
104-
107+
105108 print (f"課程資料已保存至: { filename } " )
106109 print (f"總共收集到 { len (data )} 門課程" )
107110
111+
108112if __name__ == "__main__" :
109- main ()
113+ main ()
0 commit comments