-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathneural.py
More file actions
115 lines (99 loc) · 4.24 KB
/
Copy pathneural.py
File metadata and controls
115 lines (99 loc) · 4.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
from scipy.io import mmread
from scipy.sparse import save_npz
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import KMeans
from sklearn.manifold import TSNE # For 2D visualization
from keras.layers import Input, Dense, Layer
from keras.models import Model
import tensorflow.keras.backend as K
import os
import pickle
def assign_clusters(sample, k):
path = 'data/sample'+str(sample)+'/matrix.mtx'
sparse_matrix = mmread(path)
X = sparse_matrix.toarray()
X = X.transpose()
# Preprocessing
X = np.log1p(X)
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
input_dim = X_scaled.shape[1]
encoding_dim = 32 # Latent space dimension
n_clusters = k # Adjust based on your data
# Step 1: Pretrain Autoencoder
input_layer = Input(shape=(input_dim,))
encoder = Dense(encoding_dim, activation='relu')(input_layer)
decoder = Dense(input_dim, activation='sigmoid')(encoder)
autoencoder = Model(inputs=input_layer, outputs=decoder)
autoencoder.compile(optimizer='adam', loss='mse')
autoencoder.fit(X_scaled, X_scaled, epochs=100, batch_size=32, shuffle=True)
# Extract encoded features
encoder_model = Model(inputs=input_layer, outputs=encoder)
encoded_data = encoder_model.predict(X_scaled)
# Step 2: Initialize Clusters
kmeans = KMeans(n_clusters=n_clusters, n_init=10)
kmeans.fit(encoded_data)
cluster_centers = kmeans.cluster_centers_
# Step 3: Define Clustering Layer (same as before)
class ClusteringLayer(Layer):
def __init__(self, n_clusters, weights=None, alpha=1.0, **kwargs):
super(ClusteringLayer, self).__init__(**kwargs)
self.n_clusters = n_clusters
self.alpha = alpha
self.initial_weights = weights
def build(self, input_shape):
self.clusters = self.add_weight(
shape=(self.n_clusters, input_shape[1]),
initializer='glorot_uniform',
name='clusters'
)
if self.initial_weights is not None:
self.set_weights(self.initial_weights)
del self.initial_weights
self.built = True
def call(self, inputs):
q = 1.0 / (1.0 + (K.sum(K.square(K.expand_dims(inputs, axis=1) - self.clusters), axis=2) / self.alpha))
q **= (self.alpha + 1.0) / 2.0
q = K.transpose(K.transpose(q) / K.sum(q, axis=1))
return q
# Build and train clustering model (same as before)
clustering_layer = ClusteringLayer(n_clusters, weights=[cluster_centers], name='clustering')(encoder)
clustering_model = Model(inputs=input_layer, outputs=clustering_layer)
clustering_model.compile(optimizer='adam', loss='kld')
# Step 4: Define Target Distribution
def target_distribution(q):
weight = q ** 2 / q.sum(axis=0)
return (weight.T / weight.sum(axis=1)).T
# Train with target distribution
max_iter = 20
for epoch in range(max_iter):
print('epoch '+str(epoch))
q = clustering_model.predict(X_scaled, verbose=0)
p = target_distribution(q)
clustering_model.fit(X_scaled, p, epochs=1, batch_size=32, verbose=0)
# Get final cluster labels
q_final = clustering_model.predict(X_scaled, verbose=0)
cluster_labels = np.argmax(q_final, axis=1)
# Step 4: Plot Clusters in 2D using t-SNE
tsne = TSNE(n_components=2, random_state=42)
X_tsne = tsne.fit_transform(encoded_data) # Use encoded features
prefix_path = 'compressed/neural/sample'+str(sample)+'/k'+str(n_clusters)
if not os.path.isdir(prefix_path):
os.mkdir(prefix_path)
plt.figure(figsize=(8, 6))
plt.scatter(
X_tsne[:, 0], X_tsne[:, 1],
c=cluster_labels, cmap='viridis', alpha=0.8
)
plt.title('t-SNE Visualization of Cell Clusters (k='+str(n_clusters)+') for scRNA Matrix '+str(sample))
plt.xlabel('t-SNE Dimension 1')
plt.ylabel('t-SNE Dimension 2')
plt.colorbar(label='Cluster')
plt.savefig(prefix_path+'/clusters.png')
# Pickle the array and save to a file
filename = prefix_path + '/cluster_labels.pkl'
with open(filename, 'wb') as file:
pickle.dump(cluster_labels, file)