@@ -76,7 +76,9 @@ def run_clustering(
7676 mod_out_statement_ids = mod_out_statement_ids ,
7777 )
7878
79- participants_df , projected_statements , pca = run_pca (vote_matrix = filtered_vote_matrix )
79+ # Run PCA and generate participant/statement projections.
80+ # DataFrames each have "x" and "y" columns.
81+ participants_df , statements_df , pca = run_pca (vote_matrix = filtered_vote_matrix )
8082
8183 participant_ids_to_cluster = get_clusterable_participant_ids (raw_vote_matrix , vote_threshold = min_user_vote_threshold )
8284 if keep_participant_ids :
@@ -116,15 +118,13 @@ def get_with_default(lst, idx, default=None):
116118 except IndexError :
117119 return default
118120
119- statements_df = pd .DataFrame (index = pd .Index (data = raw_vote_matrix .columns , name = "statement_id" )) # NEW
120- statements_df ["to_zero" ] = statements_df .index .isin (mod_out_statement_ids ) #NEW
121- statements_df ["is_meta" ] = statements_df .index .isin (meta_statement_ids ) #NEW
121+ statements_df ["to_zero" ] = statements_df .index .isin (mod_out_statement_ids )
122+ statements_df ["is_meta" ] = statements_df .index .isin (meta_statement_ids )
122123 statements_df ["mean" ] = pca .mean_
123124 statements_df ["pc1" ] = get_with_default (pca .components_ , 0 )
124125 statements_df ["pc2" ] = get_with_default (pca .components_ , 1 )
125126 statements_df ["pc3" ] = get_with_default (pca .components_ , 2 )
126- statements_df = pd .concat ([statements_df , projected_statements ], axis = 1 ) # NEW
127- statements_df = pd .concat ([statements_df , gac_df ], axis = 1 ) # NEW
127+ statements_df = pd .concat ([statements_df , gac_df ], axis = 1 )
128128 statements_df = populate_priority_calculations_into_statements_df (
129129 statements_df = statements_df ,
130130 vote_matrix = raw_vote_matrix .loc [participant_ids_to_cluster , :],
@@ -134,10 +134,10 @@ def get_with_default(lst, idx, default=None):
134134 raw_vote_matrix = raw_vote_matrix ,
135135 filtered_vote_matrix = filtered_vote_matrix ,
136136 pca = pca ,
137- projected_participants = participants_df .loc [participant_ids_to_cluster , :],
138- projected_statements = projected_statements ,
137+ projected_participants = participants_df .loc [participant_ids_to_cluster , [ "x" , "y" , "cluster_id" ]], # deprecate?
138+ projected_statements = statements_df . loc [:, [ "x" , "y" ]], # deprecate?
139139 kmeans = kmeans ,
140- group_aware_consensus = gac_df ,
140+ group_aware_consensus = gac_df , # deprecate?
141141 group_comment_stats = grouped_stats_df ,
142142 statements_df = statements_df ,
143143 participants_df = participants_df ,
0 commit comments