-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathbayes.py
131 lines (99 loc) · 3.78 KB
/
bayes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import os,glob,numpy
os.chdir('/Desktop/malimg_dataset') # the parent folder with sub-folders
list_fams = os.listdir(os.getcwd()) # vector of strings with family names
no_imgs = [] # No. of samples per family
for i in range(len(list_fams)):
os.chdir(list_fams[i])
len1 = len(glob.glob('*.png')) # assuming the images are stored as 'png'
no_imgs.append(len1)
os.chdir('..')
total = sum(no_imgs) # total number of all samples
y = numpy.zeros(total) # label vector
temp1 = numpy.zeros(len(no_imgs)+1)
temp1[1:len(temp1)]=no_imgs
temp2 = int(temp1[0]) # now temp2 is [0 no_imgs]
for jj in range(len(no_imgs)):
temp3 = temp2 +int(temp1[jj+1])
for ii in range(temp2,temp3):
y[ii] = jj
temp2 = temp2+ int(temp1[jj+1])
import Image, leargist
X = numpy.zeros((sum(no_imgs), 320)) # Feature Matrix
cnt = 0
for i in range(len(list_fams)):
os.chdir(list_fams[i])
img_list = glob.glob('*.png') # Getting only 'png' files in a folder
for j in range(len(img_list)):
im = Image.open(img_list[j])
im1 = im.resize((64, 64), Image.ANTIALIAS); # for faster computation
des = leargist.color_gist(im1)
X[cnt] = des[0:320]
cnt = cnt + 1
os.chdir('..')
import random
from sklearn.cross_validation import StratifiedKFold
from sklearn.utils import shuffle
n_samples, n_features = X.shape
p = range(n_samples) # an index array, 0:n_samples
random.seed(random.random())
random.shuffle(p) # the index array is now shuffled
X, y = X[p], y[p] # both the arrays are now shuffled
kfold = 10 # no. of folds (better to have this at the start of the code)
skf = StratifiedKFold(y, kfold) #indices='true'
# Stratified KFold: This first divides the data into k folds. Then it also makes sure that the distribution of the data in each fold follows the original input distribution
# Note: in future versions of scikit.learn, this module will be fused with kfold
skfind = [None] * len(skf) # indices
cnt = 0
for train_index in skf:
skfind[cnt] = train_index
cnt = cnt + 1
from sklearn.naive_bayes import MultinomialNB
import time
conf_mat = numpy.zeros((len(no_imgs), len(no_imgs))) # Initializing the Confusion Matrix
n_neighbors = 1 # better to have this at the start of the code
# 10-fold Cross Validation
for i in range(kfold):
train_indices = skfind[i][0]
test_indices = skfind[i][1]
clf = []
clf = MultinomialNB()
X_train = X[train_indices]
y_train = y[train_indices]
X_test = X[test_indices]
y_test = y[test_indices]
# Training
tic = time.time()
clf.fit(X_train, y_train)
toc = time.time()
print "training time= ", toc - tic # roughly 2.5 secs
# Testing
y_predict = []
tic = time.time()
y_predict = clf.predict(X_test) # output is labels and not indices
toc = time.time()
print "testing time = ", toc - tic # roughly 0.3 secs
# Compute confusion matrix
from sklearn.metrics import confusion_matrix
cm = []
cm = confusion_matrix(y_test, y_predict)
conf_mat = conf_mat + cm
conf_mat = conf_mat.T # since rows and cols are interchanged
avg_acc = numpy.trace(conf_mat) / sum(no_imgs)
conf_mat_norm = conf_mat / no_imgs # Normalizing the confusion matrix
import matplotlib.pyplot as plt
plt.imshow(conf_mat_norm, interpolation='nearest')
plt.title('Confusion matrix')
plt.colorbar()
plt.show()
plt.savefig('confusion_matrix.png')
conf_mat2 = numpy.around(conf_mat_norm,decimals=2) # rounding to display in figure
plt.imshow(conf_mat2,interpolation='nearest')
for x in xrange(len(list_fams)):
for y in xrange(len(list_fams)):
plt.annotate(str(conf_mat2[x][y]),xy=(y,x),ha='center',va='center')
plt.xticks(range(len(list_fams)),list_fams,rotation=90,fontsize=11)
plt.yticks(range(len(list_fams)),list_fams,fontsize=11)
plt.title('multinomial Naive Bayes')
plt.colorbar()
plt.show()
plt.savefig('confusion_matrix.png')