1+ '''
2+ 爬 https://course.ncku.edu.tw/index.php?c=qry_all (成大選課系統) by seleniumy
3+ '''
4+ import time
5+ import json
6+ import os
7+ import re
8+ from selenium import webdriver
9+ from selenium .webdriver .chrome .service import Service
10+ from selenium .webdriver .chrome .options import Options
11+ from selenium .webdriver .common .by import By
12+ from selenium .webdriver .support .ui import WebDriverWait
13+ from selenium .webdriver .support import expected_conditions as EC
14+ from bs4 import BeautifulSoup
15+ from rich import print
16+
17+ def setup_driver ():
18+ # 設定 Chrome 選項
19+ chrome_options = Options ()
20+ chrome_options .add_argument ('--headless' ) # 無頭模式,不會開啟瀏覽器視窗
21+ chrome_options .add_argument ('--disable-gpu' )
22+ chrome_options .add_argument ('--no-sandbox' )
23+ chrome_options .add_argument ('--disable-dev-shm-usage' )
24+
25+ # 初始化 webdriver
26+ driver = webdriver .Chrome (options = chrome_options )
27+ return driver
28+
29+ def main ():
30+ url = "https://course.ncku.edu.tw/index.php?c=qry_all"
31+ driver = setup_driver ()
32+ driver .get (url )
33+ last_height = driver .execute_script ("return document.body.scrollHeight" )
34+ print (last_height )
35+
36+ # 找到所有導航按鈕
37+ nav_elements = driver .find_elements (By .CLASS_NAME , 'btn_dept' )
38+ course_list = [element .text for element in nav_elements if element .text != '' ]
39+ #for i in course_list: print(i)
40+ # 點擊每個按鈕並收集資料
41+ data = []
42+
43+ html_content = driver .page_source
44+ soup = BeautifulSoup (html_content ,'lxml' )
45+ div_elements = soup .find_all ('li' , class_ = 'btn_dept' )
46+ counter = len (div_elements )
47+
48+ for course in course_list :
49+ counter -= 1
50+ if counter < 0 :
51+ #print(soup)
52+ break
53+ try :
54+ # 使用 XPath 查找按鈕
55+ button_xpath = f"//li[@class='btn_dept'][contains(text(), '{ course } ')]"
56+ button = WebDriverWait (driver , 10 ).until (
57+ EC .presence_of_element_located ((By .XPATH , button_xpath ))
58+ )
59+
60+ # 點擊按鈕
61+ driver .execute_script ("arguments[0].click();" , button )
62+ time .sleep (1 ) # 等待頁面載入
63+
64+ # 解析課程資料
65+ soup = BeautifulSoup (driver .page_source , 'lxml' )
66+ table = soup .find ('table' , {'id' : 'A9-table' })
67+
68+ if table :
69+ rows = table .find_all ('tr' )
70+ for row in rows [1 :]: # 跳過表頭
71+ columns = row .find_all ('td' )
72+ course_data = {
73+ '系所名稱' : columns [0 ].text .strip (),
74+ '系號-序號' : columns [1 ].text .strip (),
75+ '年級' : columns [2 ].text .strip (),
76+ '類別' : columns [3 ].text .strip (),
77+ '科目名稱' : columns [4 ].text .strip ().split (' ' )[0 ],
78+ '學分' : columns [5 ].text .strip (),
79+ '教師姓名' : columns [6 ].text .strip (),
80+ '已選課人數/餘額' : columns [7 ].text .strip (),
81+ '時間/教室' : columns [8 ].text .strip (),
82+ '是否有餘額' : '額' not in columns [7 ].text .strip ()
83+ }
84+ data .append (course_data )
85+
86+ print (f"已掃描: { course } " )
87+ driver .back ()
88+ WebDriverWait (driver , 10 ).until (
89+ EC .presence_of_element_located ((By .XPATH , '//li[@class="btn_dept"]' ))
90+ )
91+
92+ except Exception as e :
93+ print (f"處理 { course } 時發生錯誤: { str (e )} " )
94+ continue
95+
96+ # 關閉瀏覽器
97+ driver .quit ()
98+
99+ # 儲存資料
100+ timestamp = time .strftime ("%Y%m%d_%H%M%S" )
101+ filename = f'ncku_courses_11302.json'
102+ with open (filename , 'w' , encoding = 'utf-8' ) as f :
103+ json .dump (data , f , ensure_ascii = False , indent = 2 )
104+
105+ print (f"課程資料已保存至: { filename } " )
106+ print (f"總共收集到 { len (data )} 門課程" )
107+
108+ if __name__ == "__main__" :
109+ main ()
0 commit comments