-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpca_plot_pc.py
61 lines (51 loc) · 2.18 KB
/
pca_plot_pc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import pandas as pd
from sklearn.preprocessing import StandardScaler
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.read_csv('DataB.csv', index_col=0)
class_matrix = df['gnd'].values
df.drop(columns=['gnd'],inplace=True)
data_matrix = df.astype(float).values
data_matrix_std = StandardScaler().fit_transform(data_matrix) # Standardization of data matrix.
covariance_matrix = np.cov(data_matrix_std.T) # Find the covariance matrix.
eig_val, eig_vec = np.linalg.eig(covariance_matrix) # Compute the eigenvalues and eigenvectors.
# Sort the list of (eigenvalue, eigenvector) by the absolute eigenvalue in descending order.
eig_pairs = [(np.abs(eig_val[i]), eig_vec[:,i]) for i in range(len(eig_val))]
eig_pairs.sort()
eig_pairs.reverse()
sns.set(font_scale=1.2)
# Compute the projected data matrix by the first two principle components.
transformation_matrix_pc_1_2 = np.hstack((eig_pairs[0][1].reshape(len(eig_val),1), eig_pairs[1][1].reshape(len(eig_val),1)))
pc_1_2_matrix = data_matrix_std.dot(transformation_matrix_pc_1_2)
pc_1_2_df = pd.DataFrame(data=pc_1_2_matrix,columns=['PC1', 'PC2'])
pc_1_2_df['Classes'] = class_matrix
# Plot the projected samples in different colours by their classes.
sns.lmplot(x="PC1", y="PC2",
data=pc_1_2_df,
fit_reg=False,
hue='Classes',
legend=True,
scatter_kws={"s": 80},
)
plt.axvline(x=-20,color='#d62728')
plt.axvline(x=10,color='#d62728')
plt.axhline(y=-15,color='#1f77b4')
plt.axhline(y=10,color='#1f77b4')
# Compute the projected data matrix by the 5th and 6th principle components.
transformation_matrix_pc_5_6 = np.hstack((eig_pairs[4][1].reshape(len(eig_val),1), eig_pairs[5][1].reshape(len(eig_val),1)))
pc_5_6_matrix = data_matrix_std.dot(transformation_matrix_pc_5_6)
pc_5_6_df = pd.DataFrame(data=pc_5_6_matrix,columns=['PC5', 'PC6'])
pc_5_6_df['Classes'] = class_matrix
# Plot the projected samples in different colours by their classes.
sns.lmplot(x="PC5", y="PC6",
data=pc_5_6_df,
fit_reg=False,
hue='Classes', # color by Classes
legend=True,
scatter_kws={"s": 80})
plt.axvline(x=-12,color='#d62728')
plt.axvline(x=11,color='#d62728')
plt.axhline(y=-8,color='#1f77b4')
plt.axhline(y=10,color='#1f77b4')
plt.show()