-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgettimetable.js
More file actions
230 lines (194 loc) · 6.82 KB
/
gettimetable.js
File metadata and controls
230 lines (194 loc) · 6.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
// Complete Working University TimeTable Scraper
require('dotenv').config(); // Load Env Vars
const cheerio = require('cheerio');
const userAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)';
const url = 'https://apps.uclan.ac.uk/timetables/';
/*
Brief: Building Code Mapping
Used to convert short codes (e.g., "CM") into full building names
*/
const buildingCodes = {
"AB": "Adelphi Building",
"AC": "St Peter’s Arts Centre",
"AL": "Allen Building",
"BB": "Brook Building",
"CB": "Chandler Building",
"CM": "Computing & Technology Building",
"DB": "Darwin Building",
"EB": "Edward Building",
"FB": "Foster Building",
"FY": "Fylde Building",
"GR": "Greenbank Building",
"HA": "Harrington Building",
"HB": "Harris Building",
"HR": "Hanover Building",
"JBF": "JB Firth Building",
"KM": "Kirkham Building",
"LE": "Leighton Building",
"LIB": "Library & Learning and Information Services",
"MB": "Maudland Building",
"MC": "Medical Centre",
"ME": "Media Factory",
"MF": "Multi-Faith Centre / Oasis Faith & Spirituality Centre",
"MO": "Moss Building",
"PSC": "Pre-School Centre",
"SB": "Stewart Building",
"STF": "Sir Tom Finney Sports Centre",
"SU": "Students’ Union / 53 Degrees",
"STU": "Student Centre",
"SPG": "St Peter’s Gardens",
"US": "University Square",
"VB": "Victoria Building",
"VE": "Vernon Building",
"WB": "Wharf Building",
"Bm": "Boatsmans Court (Residence)",
"Bw": "Bowran House (Residence)",
"Dr": "Douglas Hall (Residence)",
"Dw": "Derwent Hall (Residence)",
"Er": "Eden Hall (Residence)",
"iQ": "iQ Preston (Residence)",
"LH": "Livesey House (Residence)",
"Pn": "Pendle Hall (Residence)",
"Ri": "Ribble Hall (Residence)",
"Rr": "Roeburn Hall (Residence)",
"Wr": "Whitendale Hall (Residence)"
};
/*
Brief: Parse Location Building Room Strings
*/
const parseLocation = (rawLocation) => {
if (!rawLocation) return "Unknown Location";
// Regex to extract Building Code (2-3 chars) and Room Number
const locationRegex = /^([A-Za-z]{2,3})[ -]?(.+)$/;
const match = rawLocation.trim().match(locationRegex);
if (match) {
const code = match[1];
const details = match[2];
const buildingName = buildingCodes[code] || code; // Fallback to code if not found
return `${buildingName} : (${code} ${details})`;
}
// If no specific pattern matches, just check if the whole string is a known code
return buildingCodes[rawLocation] || rawLocation;
};
// HELPER FUNCTION
/*
Brief: Parse events recursively from text
@Param1 text - Raw event text
@Param2 events - Events Array (Event in JSON object)
@Return: events - Array of parsed event JSON objects
@ReturnT: Parsed events
@ReturnF: No events found
*/
const parseEvents = (text, events = []) =>
{
// Pattern matches the 5-part structure (collapsed to one line):
// Time | Module | Location (contains "Building") | Lecturer | Type (Group: group)
const eventPattern = /(\d{2}:\d{2}) - (\d{2}:\d{2}) (.+?) (.+?Building.+?) (.+?) (.+?) \(Group: (.+?)\)/;
console.log('Parsing text:', text.match(eventPattern));
const match = text.match(eventPattern);
if (match)
{
const eventJson = {
startTime: match[1],
endTime: match[2],
moduleName: match[3],
location: parseLocation(match[4]),
lecturer: match[5],
type: match[6],
group: match[7],
time: `${match[1]} - ${match[2]}`,
description: `${match[3]} - ${match[4]} - ${match[5]} - ${match[6]} (Group: ${match[7]})`,
};
events.push(eventJson);
// Remove the matched part and recurse on the remaining text
const remainingText = text.replace(match[0], '').trim();
return parseEvents(remainingText, events);
}
return events;
}
/*
Brief: Parse user events from raw timetable HTML
@Param1 timetableHTML - Raw HTML of the timetable page
@Return: eventArray - Array of user events
@ReturnT: Parsed user events
@ReturnF: Parsing Error
*/
const parseUserEvents = async (timetableHTML) =>
{
const $ = cheerio.load(timetableHTML);
const allEvents = [];
// Each row has a date in the first column (th), then event cells (td)
$('tr').each((rowIndex, tr) => {
// Get the date from the first th cell in this row
const dateHeader = $(tr).find('th').first().text().trim();
// Parse date like "Monday 19/01/2026" - extract DD/MM/YYYY
const dateMatch = dateHeader.match(/(\d{2})\/(\d{2})\/(\d{4})/);
if (!dateMatch) return; // Skip header rows without dates
const eventDate = `${dateMatch[3]}-${dateMatch[2]}-${dateMatch[1]}`; // YYYY-MM-DD format
// Process each td cell in this row for events
$(tr).find('td').each((colIndex, td) => {
const cellText = $(td).text().trim().replace(/\s+/g, ' ');
// Only process cells that contain 'Group' (event cells)
if (cellText.includes('Group')) {
const eventsFromCell = parseEvents(cellText);
eventsFromCell.forEach(event => {
event.eventDate = eventDate; // Use the actual date from the row
allEvents.push(event);
});
}
});
});
// Return based on parsed events
if (allEvents.length === 0)
{
return { success: false, error: 'No events found' };
}
console.log('Parsed Events:', allEvents);
return { success: true, events: allEvents };
}
// MAIN FUNCTION
/*
Brief: Fetch Timetable HTML using HTTP Basic Authentication
@Param1 user - User's Email
@Param2 pass - User's Password
@Return: timetableHTML - Raw HTML of the timetable page
@ReturnT: Timetable HTML fetched successfully
@ReturnF: Fetch Error
*/
async function fetchTimetable(user, pass) {
// HTTP Basic Authentication: Base64 to encode "username:password"
// Source: https://developer.mozilla.org/en-US/docs/Web/HTTP/Authentication#basic_authentication_scheme
var token = Buffer.from(`${user}:${pass}`, 'utf8').toString('base64');
try {
// GET request
console.log('Fetching timetable with Basic Auth');
const getRes = await fetch(url, {
method: 'GET',
headers: {
'Authorization': `Basic ${token}`,
'User-Agent': userAgent
}
});
console.log('GET Status:', getRes.status);
if (!getRes.ok)
return {success: false, error: `Failed to fetch timetable: ${getRes.status} ${getRes.statusText}`};
else
return await parseUserEvents(await getRes.text());
} catch (error) {
console.error('Error fetching timetable:', error.message);
return {success: false, error: error.message};
}
}
/*
// Example usage
fetchTimetable('KRobinson25@Lancashire.ac.uk', 'Rebel250904^^^^').then(async html =>
{
console.log('Timetable HTML fetched successfully.');
console.log('Timetable HTML length:', html.length);
// Parse the HTML to extract events and return after parsing
return await parseUserEvents(html);
}).catch(err => {
console.error('Error:', err.message);
});
*/
module.exports = { fetchTimetable };