Skip to content

Commit 892ff4b

Browse files
Work for 13/3/18
1 parent ed7fd3b commit 892ff4b

File tree

6 files changed

+95
-91
lines changed

6 files changed

+95
-91
lines changed

__init__.py

Whitespace-only changes.

bbox.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
def confidence_filter(result, confidence):
1111

1212
conf_mask = (result[:,:,4] > confidence).float().unsqueeze(2)
13-
result = result*conf_mask
13+
result = result*conf_mask
1414

1515
return result
1616

darknet.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import matplotlib.pyplot as plt
1010
from util import count_parameters as count
1111
from util import convert2cpu as cpu
12+
from preprocess import prep_image, prep_batch
1213

1314
class test_net(nn.Module):
1415
def __init__(self, num_layers, input_size):
@@ -397,12 +398,11 @@ def save_weights(self, savedfile, cutoff = 0):
397398
cpu(conv.weight.data).numpy().tofile(fp)
398399

399400

400-
401-
402-
403-
404-
405-
401+
402+
403+
404+
405+
406406

407407

408408

detect.py

+66-83
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,12 @@
77
import numpy as np
88
import cv2
99
import matplotlib.pyplot as plt
10-
from util import count_parameters as count
11-
from util import convert2cpu as cpu
10+
from util import *
1211
import argparse
1312
import os
1413
import os.path as osp
1514
from darknet import Darknet
16-
from preprocess import prep_image, prep_batch
15+
from preprocess import prep_image, prep_batch, inp_to_image
1716
from bbox import confidence_filter
1817
import time
1918

@@ -65,85 +64,74 @@ def arg_parse():
6564

6665
return parser.parse_args()
6766

68-
def predict_transform(prediction, inp_dim, anchors):
69-
batch_size = prediction.size(0)
70-
network_stride = 32
71-
grid_size = inp_dim // network_stride
72-
bbox_attrs = 5 + num_classes
73-
num_anchors = len(anchors)
74-
# #Flatten the grid boxes dimensions
75-
# prediction = prediction.view(batch_size, -1, grid_size*grid_size)
76-
#
77-
# #Flatten w.r.t to different anchors predicted by a grid (depth)
78-
# prediction = prediction.view(batch_size, bbox_attrs, -1)
67+
#def predict_transform(prediction, inp_dim, anchors):
68+
# batch_size = prediction.size(0)
69+
# network_stride = 32
70+
# grid_size = inp_dim // network_stride
71+
# bbox_attrs = 5 + num_classes
72+
# num_anchors = len(anchors)
7973
#
74+
# prediction = prediction.view(batch_size, bbox_attrs*num_anchors, grid_size*grid_size)
8075
# prediction = prediction.transpose(1,2).contiguous()
81-
82-
prediction = prediction.view(batch_size, bbox_attrs, num_anchors, grid_size, grid_size)
83-
prediction = prediction.view(batch_size, num_anchors, bbox_attrs, grid_size*grid_size)
84-
prediction = prediction.view(batch_size, num_anchors * bbox_attrs, grid_size*grid_size)
85-
prediction = prediction.transpose(1,2).contiguous()
86-
prediction = prediction.view(batch_size, grid_size*grid_size*num_anchors, -1)
87-
88-
temp = torch.FloatTensor(prediction.shape)
89-
90-
if CUDA:
91-
temp = temp.cuda()
92-
93-
#Sigmoid the centre_X, centre_Y. and object confidencce
94-
temp[:,:,0].copy_(torch.sigmoid(prediction[:,:,0]).data)
95-
temp[:,:,1].copy_(torch.sigmoid(prediction[:,:,1]).data)
96-
temp[:,:,4].copy_(torch.sigmoid(prediction[:,:,4]).data)
97-
98-
#log space transform height and the width
99-
anchors = torch.FloatTensor(anchors)
100-
101-
if CUDA:
102-
anchors = anchors.cuda()
103-
104-
anchors = anchors.repeat(grid_size*grid_size, 1).unsqueeze(0)
105-
106-
# temp[:,:,3:5] = torch.exp(temp[:,:,3:5])*anchors
107-
108-
109-
#Softmax the class scores
110-
temp[:,:,5: 5 + num_classes].copy_(nn.Softmax(-1)(prediction[:,:, 5 : 5 + num_classes]).data)
111-
112-
113-
#Add the center offsets
114-
grid_len = np.arange(grid_size)
115-
a,b = np.meshgrid(grid_len, grid_len)
116-
117-
#create the grid
118-
x_offset = torch.FloatTensor(a).view(-1,1)
119-
y_offset = torch.FloatTensor(b).view(-1,1)
120-
121-
if CUDA:
122-
x_offset = x_offset.cuda()
123-
y_offset = y_offset.cuda()
124-
125-
x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1,num_anchors).view(-1,2).unsqueeze(0)
126-
76+
# prediction = prediction.view(batch_size, grid_size*grid_size*num_anchors, bbox_attrs)
77+
#
78+
# #Creating a new Tensor to store the transformed data
79+
# #Just avoiding inplace operations
80+
# temp = torch.FloatTensor(prediction.shape)
81+
#
82+
# if CUDA:
83+
# temp = temp.cuda()
84+
#
85+
# temp1 = temp[:,:,2]
86+
# cacher = torch.FloatTensor(temp1.shape).cuda()
87+
# cacher.copy_(temp1)
88+
#
89+
# #Sigmoid the centre_X, centre_Y. and object confidencce
90+
# temp[:,:,0].copy_(torch.sigmoid(prediction[:,:,0]).data)
91+
# temp[:,:,1].copy_(torch.sigmoid(prediction[:,:,1]).data)
92+
# temp[:,:,4].copy_(torch.sigmoid(prediction[:,:,4]).data)
93+
# temp[:,:,2:4].copy_((prediction[:,:,2:4]).data)
94+
#
95+
# #Add the center offsets
96+
# grid_len = np.arange(grid_size)
97+
# a,b = np.meshgrid(grid_len, grid_len)
98+
#
99+
# x_offset = torch.FloatTensor(a).view(-1,1)
100+
# y_offset = torch.FloatTensor(b).view(-1,1)
101+
#
102+
# if CUDA:
103+
# x_offset = x_offset.cuda()
104+
# y_offset = y_offset.cuda()
105+
#
106+
# x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1,num_anchors).view(-1,2).unsqueeze(0)
107+
#
127108
# temp[:,:,:2] += x_y_offset
128-
129-
return temp
109+
#
110+
# #log space transform height and the width
111+
# anchors = torch.FloatTensor(anchors)
112+
#
113+
# if CUDA:
114+
# anchors = anchors.cuda()
115+
#
116+
# anchors = anchors.repeat(grid_size*grid_size, 1).unsqueeze(0)
117+
# temp[:,:,2:4] = torch.exp(temp[:,:,2:4])*anchors
118+
#
119+
# #Softmax the class scores
120+
# temp[:,:,5: 5 + num_classes].copy_(nn.Softmax(-1)(prediction[:,:, 5 : 5 + num_classes]).data)
121+
#
122+
# return temp
130123

131-
132-
133124
if __name__ == '__main__':
134125
parser = arg_parse()
135126
images = parser.images
136127
cfg = parser.cfg
137128
weightsfile = parser.weightsfile
138129
start = 0
139-
140-
141-
130+
142131
CUDA = torch.cuda.is_available()
143132
network_dim = (416,416)
144133
num_classes = 20 #Will be updated in future to accomodate COCO
145-
146-
134+
147135
#Set up the neural network
148136
print("Loading network.....")
149137
model = Darknet(cfg)
@@ -164,34 +152,29 @@ def predict_transform(prediction, inp_dim, anchors):
164152
print ("No file or directory with the name {}".format(images))
165153
exit()
166154

167-
batch_size = 1
155+
batch_size = 5
168156
im_batches = prep_batch(imlist, batch_size, network_dim)
169157

170158
for batch in im_batches:
171159
#load the image
160+
start = time.time()
172161
inp_dim = batch[0].size(2)
173162
if CUDA:
174163
batch = batch.cuda()
175-
176-
177-
178-
# inp_image = torch.cat((inp_image, inp_image),0)
179-
180-
164+
181165
pred = model(batch)
182-
166+
183167
#Apply offsets to the result predictions
168+
#Tranform the predictions as described in the YOLO paper
169+
170+
prediction = predict_transform(pred, inp_dim, model.anchors, num_classes, CUDA)
184171

185-
prediction = predict_transform(pred, inp_dim, model.anchors)
186-
im1 = prediction[0]
187-
b = (batch[0].data.cpu().numpy()*255.0)
188-
cv2.imwrite("f.png", b.transpose(1,2,0)[:,:,::-1])
172+
prediction = confidence_filter(prediction, 0.7)
189173
#flatten the prediction vector
190174
# B x (bbox cord x no. of anchors) x grid_w x grid_h --> B x bbox x (all the boxes)
191175
# Put every proposed box as a row.
192176
#get the boxes with object confidence > threshold
193-
prediction_ = confidence_filter(prediction, 0.5)
194-
assert False
177+
195178

196179
#perform NMS on these boxes
197180

preprocess.py

+20-1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,25 @@ def prep_image(img, network_dim):
2727
img_ = Variable(img_)
2828
return img_
2929

30+
def prep_image_pil(img, network_dim):
31+
img = Image.open(img).convert('RGB')
32+
img = img.resize(network_dim)
33+
img = torch.ByteTensor(torch.ByteStorage.from_buffer(img.tobytes()))
34+
img = img.view(*network_dim, 3).transpose(0,1).transpose(0,2).contiguous()
35+
img = img.view(1, 3,*network_dim)
36+
img = img.float().div(255.0)
37+
return Variable(img)
38+
39+
def inp_to_image(inp):
40+
inp = inp.cpu().squeeze()
41+
inp = inp*255
42+
inp = inp.data.numpy()
43+
inp = inp.transpose(1,2,0)
44+
45+
inp = inp[:,:,::-1]
46+
return inp
47+
48+
3049
def prep_batch(imlist, batch_size, network_dim):
3150
num_batches = len(imlist)//batch_size + 1
3251
im_batches = []
@@ -38,7 +57,7 @@ def prep_batch(imlist, batch_size, network_dim):
3857
image = imlist[id]
3958
except IndexError:
4059
break
41-
inp_image = prep_image(image, network_dim)
60+
inp_image = prep_image_pil(image, network_dim)
4261
if img == 0:
4362
batchx.copy_(inp_image.data)
4463
else:

util.py

+2
Original file line numberDiff line numberDiff line change
@@ -78,3 +78,5 @@ def predict_transform(prediction, inp_dim, anchors, num_classes, CUDA):
7878

7979
return temp
8080

81+
82+

0 commit comments

Comments
 (0)