Skip to content

Commit a2ab643

Browse files
committed
Clean up statements dataframe in polis implementation.
1 parent a012a0a commit a2ab643

1 file changed

Lines changed: 9 additions & 9 deletions

File tree

reddwarf/implementations/polis.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,9 @@ def run_clustering(
7676
mod_out_statement_ids=mod_out_statement_ids,
7777
)
7878

79-
participants_df, projected_statements, pca = run_pca(vote_matrix=filtered_vote_matrix)
79+
# Run PCA and generate participant/statement projections.
80+
# DataFrames each have "x" and "y" columns.
81+
participants_df, statements_df, pca = run_pca(vote_matrix=filtered_vote_matrix)
8082

8183
participant_ids_to_cluster = get_clusterable_participant_ids(raw_vote_matrix, vote_threshold=min_user_vote_threshold)
8284
if keep_participant_ids:
@@ -116,15 +118,13 @@ def get_with_default(lst, idx, default=None):
116118
except IndexError:
117119
return default
118120

119-
statements_df = pd.DataFrame(index=pd.Index(data=raw_vote_matrix.columns, name="statement_id")) # NEW
120-
statements_df["to_zero"] = statements_df.index.isin(mod_out_statement_ids) #NEW
121-
statements_df["is_meta"] = statements_df.index.isin(meta_statement_ids) #NEW
121+
statements_df["to_zero"] = statements_df.index.isin(mod_out_statement_ids)
122+
statements_df["is_meta"] = statements_df.index.isin(meta_statement_ids)
122123
statements_df["mean"] = pca.mean_
123124
statements_df["pc1"] = get_with_default(pca.components_, 0)
124125
statements_df["pc2"] = get_with_default(pca.components_, 1)
125126
statements_df["pc3"] = get_with_default(pca.components_, 2)
126-
statements_df = pd.concat([statements_df, projected_statements], axis=1) # NEW
127-
statements_df = pd.concat([statements_df, gac_df], axis=1) # NEW
127+
statements_df = pd.concat([statements_df, gac_df], axis=1)
128128
statements_df = populate_priority_calculations_into_statements_df(
129129
statements_df=statements_df,
130130
vote_matrix=raw_vote_matrix.loc[participant_ids_to_cluster, :],
@@ -134,10 +134,10 @@ def get_with_default(lst, idx, default=None):
134134
raw_vote_matrix=raw_vote_matrix,
135135
filtered_vote_matrix=filtered_vote_matrix,
136136
pca=pca,
137-
projected_participants=participants_df.loc[participant_ids_to_cluster, :],
138-
projected_statements=projected_statements,
137+
projected_participants=participants_df.loc[participant_ids_to_cluster, ["x", "y", "cluster_id"]], # deprecate?
138+
projected_statements=statements_df.loc[:, ["x", "y"]], # deprecate?
139139
kmeans=kmeans,
140-
group_aware_consensus=gac_df,
140+
group_aware_consensus=gac_df, # deprecate?
141141
group_comment_stats=grouped_stats_df,
142142
statements_df=statements_df,
143143
participants_df=participants_df,

0 commit comments

Comments
 (0)