22import pandas as pd
33import numpy as np
44from reddwarf .utils .matrix import VoteMatrix , generate_virtual_vote_matrix
5- from reddwarf .sklearn .transformers import SparsityAwareCapturer , SparsityAwareScaler , calculate_scaling_factors
5+ from reddwarf .sklearn .transformers import SparsityAwareCapturer , SparsityAwareScaler
66from reddwarf .sklearn .pipeline import PatchedPipeline
77from typing import Tuple
88
@@ -67,99 +67,7 @@ def run_pca(
6767
6868 return projected_participants , projected_statements , pca
6969
70- # TODO: Clean up variables and docs.
71- def sparsity_aware_project_ptpt (participant_votes , statement_components , statement_means ):
72- """
73- Projects a sparse vote vector into PCA space while adjusting for sparsity.
74-
75- Args:
76- participant_votes (list): List of participant votes on each statement
77- statement_components (list[list[float]]): Two lists of floats corresponding to the two principal components
78- statement_means (list[float]): List of floats corresponding to the centers/means of each statement
79-
80- Returns:
81- projected_coords (list[list[float]]): Two lists corresponding to projected xy coordinates.
82- """
83- statement_components = np .array (statement_components ) # Shape: (2, n_features)
84- statement_means = np .array (statement_means ) # Shape: (n_features,)
85-
86- participant_votes = np .array (participant_votes )
87- mask = ~ np .isnan (participant_votes ) # Only consider non-null values
88-
89- # Extract relevant values
90- x_vals = participant_votes [mask ] - statement_means [mask ] # Centered values
91- # TODO: Extend this to work in 3D
92- pc1_vals , pc2_vals = statement_components [:, mask ] # Select only used components
93-
94- # Compute dot product projection
95- p1 = np .dot (x_vals , pc1_vals )
96- p2 = np .dot (x_vals , pc2_vals )
97-
98- scaling_factor = calculate_scaling_factors (participant_votes )
99-
100- coord_projected = np .array ([p1 , p2 ])
101- coord_scaled = coord_projected * scaling_factor
102-
103- return coord_scaled
104-
105- # TODO: Clean up variables and docs.
106- def sparsity_aware_project_ptpts (vote_matrix , statement_components , statement_means ):
107- """
108- Apply sparsity-aware projection to multiple vote vectors.
109- """
110- return np .array ([
111- sparsity_aware_project_ptpt (participant_votes , statement_components , statement_means )
112- for participant_votes in vote_matrix ]
113- )
114-
115- # TODO: Clean up variables and docs.
116- def pca_project_cmnts (statement_components , statement_means ):
117- """
118- Projects unit vectors for each feature into PCA space to understand their placement.
119- """
120-
121- # Create a matrix of virtual participants that each vote once on a single statement.
122-
123- # Build an basic identity matrix
124- n_statements = len (statement_means )
125- virtual_vote_matrix = np .eye (n_statements )
126-
127- # Replace 1s with -1 and 0s with NaN
128- # TODO: Why does Polis use -1 (disagree) here? is it the same? BUG?
129- AGREE_VAL = 1
130- MISSING_VAL = np .nan
131- virtual_vote_matrix = np .where (virtual_vote_matrix == 1 , AGREE_VAL , MISSING_VAL )
132-
133- statement_projections = sparsity_aware_project_ptpts (
134- virtual_vote_matrix ,
135- statement_components ,
136- statement_means ,
137- )
138-
139- return statement_projections
140-
14170def calculate_extremity (projections : ArrayLike ):
14271 # Compute extremity as vector magnitude on rows.
14372 # vector magnitude = Euclidean norm = hypotenuse of xy
144- return np .linalg .norm (projections , axis = 0 )
145-
146- # TODO: Clean up variables and docs.
147- def with_proj_and_extremity (pca ):
148- """
149- Compute projection and extremity, then merge into PCA results.
150- """
151- statement_projections = pca_project_cmnts (
152- statement_components = pca ["comps" ],
153- statement_means = pca ["center" ],
154- )
155-
156- # Flip the axes to get all x together and y together.
157- # 2 sets of 40. shape (2, 40)
158- statement_projections = statement_projections .transpose ()
159-
160- statement_extremities = calculate_extremity (statement_projections )
161-
162- pca ["comment-projection" ] = statement_projections .tolist ()
163- pca ["comment-extremity" ] = statement_extremities .tolist ()
164-
165- return pca
73+ return np .linalg .norm (projections , axis = 0 )
0 commit comments