diff --git a/flow/api/parse/schedule/schedule.go b/flow/api/parse/schedule/schedule.go index 97b3756bb..14e7f4bc5 100644 --- a/flow/api/parse/schedule/schedule.go +++ b/flow/api/parse/schedule/schedule.go @@ -14,16 +14,23 @@ type Summary struct { // Class numbers are four digits (e.g. 4895) // and uniquely identify a section of a course within a term. ClassNumbers []int + // Classrooms identify the location of the class (e.g. DWE 3422, ONLN - Online) + Classrooms []string } var ( termRegexp = regexp.MustCompile(`(Spring|Fall|Winter)\s+(\d{4})`) + // Class numbers are *the* four or five digit sequences // which occur on a separate line, perhaps parenthesized. // To be safe, we pre-emptively handle sequences up to length 8. // This should be fine since the only other numbers that appear // on their own line are the course code numbers (length 2 or 3). classNumberRegexp = regexp.MustCompile(`\n\(?(\d{4,8})\)?\n`) + + // Matches room locations that appear on their own line + // Building codes (alphanumeric with at least one letter) + space + room numbers, or TBA, or ONLN - Online + classroomRegexp = regexp.MustCompile(`(?m)^([A-Z0-9]*[A-Z][A-Z0-9]*\s+\d+|TBA|ONLN - Online)$`) ) func extractTerm(text string) (int, error) { @@ -56,6 +63,16 @@ func extractClassNumbers(text string) ([]int, error) { return classNumbers, nil } +func extractClassrooms(text string) ([]string, error) { + submatches := classroomRegexp.FindAllStringSubmatchIndex(text, -1) + classrooms := make([]string, len(submatches)) + for i, submatch := range submatches { + matchText := text[submatch[2]:submatch[3]] + classrooms[i] = matchText + } + return classrooms, nil +} + func Parse(text string) (*Summary, error) { term, err := extractTerm(text) if err != nil { @@ -65,9 +82,14 @@ func Parse(text string) (*Summary, error) { if err != nil { return nil, fmt.Errorf("extracting class numbers: %w", err) } + classrooms, err := extractClassrooms(text) + if err != nil { + return nil, fmt.Errorf("extracting classrooms: %w", err) + } summary := &Summary{ TermId: term, ClassNumbers: classNumbers, + Classrooms: classrooms, } return summary, nil } diff --git a/flow/api/parse/schedule/schedule_test.go b/flow/api/parse/schedule/schedule_test.go index cf311dfc6..2f63d46c6 100644 --- a/flow/api/parse/schedule/schedule_test.go +++ b/flow/api/parse/schedule/schedule_test.go @@ -21,6 +21,9 @@ func TestParseSchedule(t *testing.T) { ClassNumbers: []int{ 4896, 4897, 4899, 4741, 4742, 5003, 4747, 4748, 7993, 7994, 7995, 4751, 4752, }, + Classrooms: []string{ + "MC 2038", "MC 4064", "DWE 2527", "E3 2119", "CPH 3681", "CPH 3681", "CPH 3681", "CPH 3681", "CPH 3681", "MC 2034", "CPH 3681", "CPH 1346", "CPH 3681", "CPH 3681", + }, }, }, // This schedule does not have parentheses around class numbers. @@ -31,6 +34,9 @@ func TestParseSchedule(t *testing.T) { ClassNumbers: []int{ 5211, 8052, 9289, 6394, 5867, 6321, 6205, 7253, 7254, }, + Classrooms: []string{ + "E7 2317", "RCH 101", "MC 2034", "TBA", "MC 2017", "TBA", "AL 124", "DC 1351", "DC 1351", + }, }, }, // This schedule is old (carried over from Flow 1.0) @@ -41,6 +47,9 @@ func TestParseSchedule(t *testing.T) { ClassNumbers: []int{ 3370, 3077, 3078, 3166, 2446, 4106, 4107, 4108, 4111, 4117, 4118, 4110, }, + Classrooms: []string{ + "MC 4040", "QNC 1502", "QNC 1502", "TBA", "STP 105", "RCH 307", "MC 2038", "MC 2038", "TBA", "TBA", "MC 2038", "TBA", "TBA", "TBA", + }, }, }, // This schedule has an abnormal amount of whitespace @@ -51,6 +60,9 @@ func TestParseSchedule(t *testing.T) { ClassNumbers: []int{ 4669, 4658, 4660, 4699, 4655, 4656, 4661, 4662, 4850, 4664, 4666, 4936, 4639, 4668, 7634, }, + Classrooms: []string{ + "E5 3102", "E5 3102", "E5 3101", "E5 3101", "E5 3101", "E5 3101", "DWE 3518", "CPH 1346", "E5 3102", "E5 3101", "E5 3101", "MC 4063", "E5 3101", "E5 3102", "E5 3101", "E5 3101", "E3 3164", "E5 3101", "E5 3102", "MC 4060", "E2 2363", "E2 2363", "E2 2363", "E2 2363", "E2 2363", "E5 3101", "E5 3101", "E5 3101", "EV3 4412", "TBA", + }, }, }, // This schedule has class codes longer than 4 digits @@ -61,6 +73,9 @@ func TestParseSchedule(t *testing.T) { ClassNumbers: []int{ 4262, 11810, 9336, 6336, 6367, 10692, 10310, 8204, 10376, }, + Classrooms: []string{ + "ONLN - Online", "ONLN - Online", "ONLN - Online", "ONLN - Online", "ONLN - Online", "ONLN - Online", "ONLN - Online", "ONLN - Online", "ONLN - Online", + }, }, }, }