-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspearman.py
106 lines (87 loc) · 2.37 KB
/
spearman.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import csv
import numpy as np
import copy
import math
items=[]
centroids=[]
closest=[] #contains the closest distance
closest_centroid=[] #cluster number
np.random.seed(200)
k=int(input('Enter the value of k you want: '))
#opening csv file and storing the values of x and y
with open('xy.csv') as csv_file:
csv_reader=csv.reader(csv_file, delimiter=',')
for line in csv_reader:
items.append([float(line[0]), float(line[1])])
#initialising the centroids
for i in range(k):
centroids.append([np.random.randint(0, 80), np.random.randint(0, 80)])
def SpearmanCorrelation(item, centroid):
l1=list(item)
l1.sort()
l2=list(centroid)
l2.sort()
Rankobj1={}
Rankobj2={}
for i in range(1,len(l1)+1):
Rankobj1[l1[i-1]]=i
Rankobj2[l2[i-1]]=i
d=[]
for j in range(len(item)):
d.append(abs(Rankobj1[item[j]] - Rankobj2[centroid[j]]))
dsq=[]
for j in range(len(item)):
dsq.append(math.pow(d[j],2))
sum=0
for j in range(len(item)):
sum+=dsq[j]
n=len(item)
return (1-6*sum/(n*(n*n-1)))
def Correlation(items,means):
k=len(means)
correlations=[]
for i in range(k):
correlation=[]
for item in items:
cor=SpearmanCorrelation(item,means[i])
correlation.append(cor)
correlations.append(correlation)
return correlations
def assignment():
for j in range(len(closest_centroid)):
max=-2
for i in range(len(Correlations)):
if max<Correlations[i][j]:
max=Correlations[i][j]
closest_centroid[j]=i+1
closest_centroid = [0 for i in range(len(items))]
Correlations=Correlation(items,centroids)
assignment()
print(closest_centroid)
def CalculateMean(items,clusters,i,j):
sum=0.0
count=0
z=len(items)
for k in range(z):
if(clusters[k]==j+1):
sum+=items[k][i]
count+=1
if(count!=0):
return(sum/float(count))
else:
return -1
def update():
a=len(centroids)
b=len(centroids[0])
for j in range(a):
for i in range(b):
num=CalculateMean(items,closest_centroid,i,j)
if(num!=-1):
centroids[j][i]=num
while True:
old_centroids=copy.deepcopy(closest_centroid)
update()
assignment()
if old_centroids==closest_centroid:
break
print(closest_centroid)