@@ -226,7 +226,10 @@ def resolve_historical_courses(
226226 for code_1 , code_2 in itertools .pairwise (course_codes ):
227227 cross_listed_codes .add_edge (code_1 , code_2 )
228228
229- for code_1 , code_2 in [* code_changes , * build_four_digit_transition (listings ).items ()]:
229+ for code_1 , code_2 in [
230+ * code_changes ,
231+ * build_four_digit_transition (listings ).items (),
232+ ]:
230233 cross_listed_codes .add_edge (code_1 , code_2 )
231234
232235 for subject_1 , subject_2 in subject_changes :
@@ -254,7 +257,7 @@ def resolve_historical_courses(
254257 courses ["description" ].apply (len ) >= MIN_DESCRIPTION_MATCH_LEN
255258 ].to_dict ()
256259
257- same_courses : list [ list [int ]] = []
260+ same_course_to_courses : dict [ int , list [int ]] = {}
258261
259262 for codes in tqdm (
260263 cross_listed_codes ,
@@ -281,7 +284,8 @@ def resolve_historical_courses(
281284 title_components = [(i , t , c ) for i , (t , c ) in enumerate (titles .items ())]
282285 # There's no title variation, nothing to match
283286 if len (title_components ) == 1 :
284- same_courses .append (list (course_set ))
287+ ids = list (course_set )
288+ same_course_to_courses [min (ids )] = ids
285289 continue
286290 same_course_graph = nx .Graph ()
287291 # fill in the nodes first to keep courses with no same-code edges
@@ -349,15 +353,15 @@ def resolve_historical_courses(
349353 log_file .write (
350354 f"[WARNING] { '/' .join (c1 )} and { '/' .join (c2 )} have no code in common\n "
351355 )
352- same_courses .append (list (x ))
356+ ids = list (x )
357+ same_course_to_courses [min (ids )] = ids
353358
354359 for course in set (discussion_course_ids ):
355- same_courses . append ( [course ])
360+ same_course_to_courses [course ] = [ course ]
356361
357362 # map courses to unique same-courses ID, and map same-courses ID to courses
358- connected_courses = pd .Series (same_courses , name = "course_id" )
363+ connected_courses = pd .Series (same_course_to_courses , name = "course_id" )
359364 connected_courses .index .rename ("same_course_id" , inplace = True )
360- same_course_to_courses = connected_courses .to_dict ()
361365
362366 # map course_id to same-course partition ID
363367 same_course_id = (
@@ -409,14 +413,15 @@ def split_same_professors(
409413 .reset_index ()
410414 )
411415
412- professors_grouped .index .rename ("same_course_and_profs_id" , inplace = True )
416+ professors_grouped ["same_course_and_profs_id" ] = professors_grouped [
417+ "course_id"
418+ ].apply (min )
419+ same_prof_course_to_courses = professors_grouped .set_index (
420+ "same_course_and_profs_id"
421+ )["course_id" ].to_dict ()
413422
414- same_prof_course_to_courses = professors_grouped ["course_id" ].to_dict ()
415-
416- same_course_and_profs_id = (
417- professors_grouped .explode ("course_id" )
418- .reset_index (drop = False )
419- .set_index ("course_id" )["same_course_and_profs_id" ]
420- )
423+ same_course_and_profs_id = professors_grouped .explode ("course_id" ).set_index (
424+ "course_id"
425+ )["same_course_and_profs_id" ]
421426
422427 return same_course_and_profs_id , same_prof_course_to_courses
0 commit comments