-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsphericalclustering.py
75 lines (57 loc) · 2.1 KB
/
sphericalclustering.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# -*- coding: utf-8 -*-
"""
This module clusters the data using the VMF distribution
GUIDE :
Prof. Anand A Joshi - [email protected]
TEAM:
Bhavana Ganesh - [email protected]
Mahathi Vatsal Salopanthula - [email protected]
Sayali Ghume - [email protected]
Contact any of the members for queries and bugs.
"""
import numpy as np
import mpmath
#getting the size of the data
def sphericalknn (data,no_clusters):
H=len(data)
W=len(data[0])
#calculating the mean to remove the dc component
sum_sample=sum(data)
norm_sample=np.linalg.norm(sum_sample)
mean_sample=(sum_sample)/(norm_sample)
#calculating global mean to get centroids of the clusters
deviation=0.01
mean_global = np.zeros([no_clusters,W])
for i in range (0,no_clusters):
random_sample=np.random.rand(1,W)-0.5
random_norm=deviation*(np.random.rand())
random_sample2=(random_norm*random_sample)/np.linalg.norm(random_sample)
temp = mean_sample+random_sample2
mean_global[i,:] = temp/np.linalg.norm(temp)
#calculating mean from spherical kmeans
sum_sample3 = np.zeros([1,W])
difference=1
epsilon=0.01
number=100
iteration=0
while (difference>epsilon):
iteration=iteration+1
number2=number
#computing the nearest neighbour and assigning the points
#E Step in EM algorithm
mean_global2 = np.transpose(mean_global)
value=np.dot(data, mean_global2)
value_max=value.max(1)
clusters=np.argmax(value,axis=1)
#computing value of the function
number=sum(value_max)
#print(number)
#computing centroids for the clusters
#M step in EM algorithm
for i in range(0,no_clusters):
sum_sample3=sum(data[np.where(clusters==i)])
if(mpmath.norm(sum_sample3) != 0):
temp2=sum_sample3/mpmath.norm(sum_sample3)#np.linalg.norm(sum_sample3) #Check this
mean_global[i,:] = temp2
difference=abs(number-number2)
return clusters