-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathrun_demo.py
249 lines (207 loc) · 7.85 KB
/
run_demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
#Recoded by Dogu Can ELCI, Istanbul Technical University
import warnings
warnings.filterwarnings("ignore")
import numpy as np
from matplotlib import mlab
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from nxviz.plots import CircosPlot
import networkx as nx
# noinspection PyUnresolvedReferences
import matplotlib.pyplot as plt
from ScoreFeaturesAcrossRuns import Score_features
from NAGFS import NAGFS
from simulateData import simulate_data
#* * (1) Some variables are described.
mu1 = 0.9 #Mean value of the first Gaussian distribution
sigma1 = 0.4 #Standard deviation value of the first Gaussian distribution
mu2 = 0.7 #Mean value of the second Gaussian distribution
sigma2 = 0.6 #Standard deviation value of the first Gaussian distribution
Nf = 5 #number of selected features
displayResults = 0
predict_list=[] #total predicts list from all iterations
ind_array=np.empty((0,Nf),int)
#* *
#* * (2) Random connectivity matrixes are created as user inputs.
Data=simulate_data(mu1,sigma1,mu2,sigma2) #simulate all data as connectivity matrixes
#Data=[Featurematrix,X,Label]
# * *
# * * (3) This part include seperating samples by each classes.
#This part will used for plotting gaussian distribution of datas of each classes.
number=()
number2=()
#Data[0] (Featurematrix) and Data[2](LabelMatrix) has same order and same index, so they can be seperated featurematrix
#with using labelmatrix.
for i in range(len(Data[2])):
if Data[2][i]==1:
number=number+(i,)
data_class1=Data[0][number,:]
for i in range(len(Data[2])):
if Data[2][i]==-1:
number2=number2+(i,)
data_class2=Data[0][number2,:]
#Gaussian Distribution needs 1x(NxK) matrixes, so they are converted to this shape.
data_class11=np.concatenate(data_class1)
data_class22=np.concatenate(data_class2)
# * *
#MAIN LOOP---All iteration circulations are here.
# * * (4) In this part, Samples are seperated from each other one by one as a training and testing sets for each iteration.
for i in range(0,(len(Data[2]))):
print("Iteration number :", i+1)
#We create a list which contain regular numbers like [0,1,2,3,....,n]->n is a number of total samples
#So this list is used in iteration for leave one out.
general_index = []
for j in range(len(Data[2])):
general_index.append(j)
#In first iteration(i=0) 0 is deleted from general_index list, so remains are used as training samples and
#deleted one is used as testing sample.
general_index.remove(i)
train_data=Data[1][general_index,:,:]
train_feature_data=Data[0][general_index,:]
train_Labels=Data[2][general_index]
test_data=Data[1][i,:,:]
test_feature_data = Data[0][i, :]
test_Label=Data[2][i]
# * *
# * * (5) NAGFC Function
#This function is used to seperate training samples into 3 clusters as their similarities and find representative loca atlasses
#of each center of clusters as AC11, AC12 etc. and then fuse it and find general representative atlasses of all Class-1 samples and all Class-2 samples as AC1 and AC2.
AC1 , AC2 ,ind = NAGFS(train_data,train_Labels,Nf,displayResults)
# * *
# * * (6) 5 most discriminative features are added in ind_array for each iteration.
ind1=np.ravel(ind)
ind_array=np.append(ind_array,[ind1.reshape(Nf)],axis=0)
# * *
# * * (7)
#Most discriminitive features are determined before with NAGFS function.In this part, all feature
#columns except these discriminitive feature columns in train and test sets are deleted.(For example, after this part
#train set turn to [X,5] shape and test set turn to [1,5] shape.)
delete_list=[]
cont2=True
cont4=True
for i in range(train_feature_data.shape[1]):
for j in range(len(ind)):
if i==ind[j]:
cont2=False
else:
continue
if cont2==True:
delete_list.append(i) #for delete unnecessary columns-features
else:
cont2=True
train_set=np.delete(train_feature_data,delete_list, 1)
for i in range(len(test_feature_data)):
for j in range(len(ind)):
if i == ind[j]:
cont4 = False
else:
continue
if cont4 == True:
delete_list.append(i)
else:
cont4 = True
test_set = np.delete(test_feature_data, delete_list)
test_set=test_set.reshape(-1,1)
test_set=test_set.transpose()
#* *
# * * (8)
#In this part, after all feature reduction, Test samples are classified with SVM classifier and predictions of test set of each iterations are gotten and added to
#predict list.
clf=SVC(kernel="linear",C=1)
clf.fit(train_set,train_Labels)
pred=clf.predict(test_set)
predict_list.append(pred)
# * *
# * * (9) Finding Accuracy, Sensitivity and Specificity scores.
conf=confusion_matrix(Data[2],predict_list)
TN = conf[0][0]
FN = conf[1][0]
TP = conf[1][1]
FP = conf[0][1]
TPR = TP / (TP + FN) # Sensitivity
TNR = TN / (TN + FP) # Specificity
ACC = (TP+TN)/(TP+FP+FN+TN) # Accuracy
print("Confusion Matrix: ")
print(conf)
print("* * * * * ")
print("Accuracy Score: ",ACC)
print("Sensitivity Score: ",TPR)
print("Specificity Score: ",TNR)
# * *
# * * (10) Score_index function is used to find 5 most discriminative features across all iterations by scoring them.
Score_index=Score_features(ind_array)
# * *
# * * (11) In this part, 5 most discriminative features which are across all cross-validation runs are plotted in matrix.
aa11=Data[1][0]
aa22=Data[0][0]
last_coor=[]
for i in range(len(Score_index)):
key1=True
for j in range(aa11.shape[0]):
for k in range(aa11.shape[1]):
if aa22[Score_index[i]]==aa11[j][k]:
if key1:
last_coor.append([j,k])
key1=False
else:
continue
topScoreFeatures=np.zeros((aa11.shape[0],aa11.shape[1]))
s=0
ss=0
for i in range(len(Score_index)):
topScoreFeatures[last_coor[i][0]][last_coor[i][1]]= aa22[Score_index[s]]
topScoreFeatures[last_coor[i][1]][last_coor[i][0]] = aa22[Score_index[s]]
ss+=1
if ss==2:
s+=1
ss=0
#Plotting feature matrix
plt.imshow(topScoreFeatures)
plt.title('NAGFS Most Discriminative Features Across All Cross-Validation Runs')
plt.colorbar()
plt.show()
# * *
# * * (11) Plotting gaussian distribution of both classes
#For Class-1
n, bins, patches = plt.hist(data_class11, 15, normed=1, facecolor='green', alpha=0.75)
y = mlab.normpdf( bins, mu1, sigma1)
l = plt.plot(bins, y, 'g--', linewidth=3)
plt.xlabel('')
plt.ylabel('')
plt.grid(True)
#For Class-2
n, bins, patches = plt.hist(data_class22,15, normed=1, facecolor='blue', alpha=0.75)
y = mlab.normpdf( bins, mu2, sigma2)
l = plt.plot(bins, y, 'b--', linewidth=3)
plt.xlabel('')
plt.ylabel('')
plt.title("Class-specific simulated data distribution (2 classes)".title())
plt.grid(True)
plt.show()
#* *
#* * (12) Plotting circular graph of top Nf discriminative features across all cross-validation runs
node_list=[]
edge_list=[]
for i in range(aa11.shape[0]):
node=i+1
node_list.append(node)
for i in range(len(last_coor)):
last_coor[i][0] += 1
last_coor[i][1] += 1
edge_list.append(last_coor[i])
G = nx.Graph()
G.add_nodes_from(node_list)
G.add_edges_from(edge_list)
color_list=["a", "b", "c", "d", "e"]
for n, d in G.nodes(data=True):
G.node[n]["class"] = node_list[n-1]
c = CircosPlot(graph=G,node_labels=True,
node_label_rotation=True,
fontsize=30,
group_legend=True,
figsize=(7, 7),node_color="class")
c.draw()
plt.title("circular graph of top Nf discriminative features across all cross-validation runs".title())
plt.show()
#* *