7
7
import numpy as np
8
8
import cv2
9
9
import matplotlib .pyplot as plt
10
- from util import count_parameters as count
11
- from util import convert2cpu as cpu
10
+ from util import *
12
11
import argparse
13
12
import os
14
13
import os .path as osp
15
14
from darknet import Darknet
16
- from preprocess import prep_image , prep_batch
15
+ from preprocess import prep_image , prep_batch , inp_to_image
17
16
from bbox import confidence_filter
18
17
import time
19
18
@@ -65,85 +64,74 @@ def arg_parse():
65
64
66
65
return parser .parse_args ()
67
66
68
- def predict_transform (prediction , inp_dim , anchors ):
69
- batch_size = prediction .size (0 )
70
- network_stride = 32
71
- grid_size = inp_dim // network_stride
72
- bbox_attrs = 5 + num_classes
73
- num_anchors = len (anchors )
74
- # #Flatten the grid boxes dimensions
75
- # prediction = prediction.view(batch_size, -1, grid_size*grid_size)
76
- #
77
- # #Flatten w.r.t to different anchors predicted by a grid (depth)
78
- # prediction = prediction.view(batch_size, bbox_attrs, -1)
67
+ #def predict_transform(prediction, inp_dim, anchors):
68
+ # batch_size = prediction.size(0)
69
+ # network_stride = 32
70
+ # grid_size = inp_dim // network_stride
71
+ # bbox_attrs = 5 + num_classes
72
+ # num_anchors = len(anchors)
79
73
#
74
+ # prediction = prediction.view(batch_size, bbox_attrs*num_anchors, grid_size*grid_size)
80
75
# prediction = prediction.transpose(1,2).contiguous()
81
-
82
- prediction = prediction .view (batch_size , bbox_attrs , num_anchors , grid_size , grid_size )
83
- prediction = prediction .view (batch_size , num_anchors , bbox_attrs , grid_size * grid_size )
84
- prediction = prediction .view (batch_size , num_anchors * bbox_attrs , grid_size * grid_size )
85
- prediction = prediction .transpose (1 ,2 ).contiguous ()
86
- prediction = prediction .view (batch_size , grid_size * grid_size * num_anchors , - 1 )
87
-
88
- temp = torch .FloatTensor (prediction .shape )
89
-
90
- if CUDA :
91
- temp = temp .cuda ()
92
-
93
- #Sigmoid the centre_X, centre_Y. and object confidencce
94
- temp [:,:,0 ].copy_ (torch .sigmoid (prediction [:,:,0 ]).data )
95
- temp [:,:,1 ].copy_ (torch .sigmoid (prediction [:,:,1 ]).data )
96
- temp [:,:,4 ].copy_ (torch .sigmoid (prediction [:,:,4 ]).data )
97
-
98
- #log space transform height and the width
99
- anchors = torch .FloatTensor (anchors )
100
-
101
- if CUDA :
102
- anchors = anchors .cuda ()
103
-
104
- anchors = anchors .repeat (grid_size * grid_size , 1 ).unsqueeze (0 )
105
-
106
- # temp[:,:,3:5] = torch.exp(temp[:,:,3:5])*anchors
107
-
108
-
109
- #Softmax the class scores
110
- temp [:,:,5 : 5 + num_classes ].copy_ (nn .Softmax (- 1 )(prediction [:,:, 5 : 5 + num_classes ]).data )
111
-
112
-
113
- #Add the center offsets
114
- grid_len = np .arange (grid_size )
115
- a ,b = np .meshgrid (grid_len , grid_len )
116
-
117
- #create the grid
118
- x_offset = torch .FloatTensor (a ).view (- 1 ,1 )
119
- y_offset = torch .FloatTensor (b ).view (- 1 ,1 )
120
-
121
- if CUDA :
122
- x_offset = x_offset .cuda ()
123
- y_offset = y_offset .cuda ()
124
-
125
- x_y_offset = torch .cat ((x_offset , y_offset ), 1 ).repeat (1 ,num_anchors ).view (- 1 ,2 ).unsqueeze (0 )
126
-
76
+ # prediction = prediction.view(batch_size, grid_size*grid_size*num_anchors, bbox_attrs)
77
+ #
78
+ # #Creating a new Tensor to store the transformed data
79
+ # #Just avoiding inplace operations
80
+ # temp = torch.FloatTensor(prediction.shape)
81
+ #
82
+ # if CUDA:
83
+ # temp = temp.cuda()
84
+ #
85
+ # temp1 = temp[:,:,2]
86
+ # cacher = torch.FloatTensor(temp1.shape).cuda()
87
+ # cacher.copy_(temp1)
88
+ #
89
+ # #Sigmoid the centre_X, centre_Y. and object confidencce
90
+ # temp[:,:,0].copy_(torch.sigmoid(prediction[:,:,0]).data)
91
+ # temp[:,:,1].copy_(torch.sigmoid(prediction[:,:,1]).data)
92
+ # temp[:,:,4].copy_(torch.sigmoid(prediction[:,:,4]).data)
93
+ # temp[:,:,2:4].copy_((prediction[:,:,2:4]).data)
94
+ #
95
+ # #Add the center offsets
96
+ # grid_len = np.arange(grid_size)
97
+ # a,b = np.meshgrid(grid_len, grid_len)
98
+ #
99
+ # x_offset = torch.FloatTensor(a).view(-1,1)
100
+ # y_offset = torch.FloatTensor(b).view(-1,1)
101
+ #
102
+ # if CUDA:
103
+ # x_offset = x_offset.cuda()
104
+ # y_offset = y_offset.cuda()
105
+ #
106
+ # x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1,num_anchors).view(-1,2).unsqueeze(0)
107
+ #
127
108
# temp[:,:,:2] += x_y_offset
128
-
129
- return temp
109
+ #
110
+ # #log space transform height and the width
111
+ # anchors = torch.FloatTensor(anchors)
112
+ #
113
+ # if CUDA:
114
+ # anchors = anchors.cuda()
115
+ #
116
+ # anchors = anchors.repeat(grid_size*grid_size, 1).unsqueeze(0)
117
+ # temp[:,:,2:4] = torch.exp(temp[:,:,2:4])*anchors
118
+ #
119
+ # #Softmax the class scores
120
+ # temp[:,:,5: 5 + num_classes].copy_(nn.Softmax(-1)(prediction[:,:, 5 : 5 + num_classes]).data)
121
+ #
122
+ # return temp
130
123
131
-
132
-
133
124
if __name__ == '__main__' :
134
125
parser = arg_parse ()
135
126
images = parser .images
136
127
cfg = parser .cfg
137
128
weightsfile = parser .weightsfile
138
129
start = 0
139
-
140
-
141
-
130
+
142
131
CUDA = torch .cuda .is_available ()
143
132
network_dim = (416 ,416 )
144
133
num_classes = 20 #Will be updated in future to accomodate COCO
145
-
146
-
134
+
147
135
#Set up the neural network
148
136
print ("Loading network....." )
149
137
model = Darknet (cfg )
@@ -164,34 +152,29 @@ def predict_transform(prediction, inp_dim, anchors):
164
152
print ("No file or directory with the name {}" .format (images ))
165
153
exit ()
166
154
167
- batch_size = 1
155
+ batch_size = 5
168
156
im_batches = prep_batch (imlist , batch_size , network_dim )
169
157
170
158
for batch in im_batches :
171
159
#load the image
160
+ start = time .time ()
172
161
inp_dim = batch [0 ].size (2 )
173
162
if CUDA :
174
163
batch = batch .cuda ()
175
-
176
-
177
-
178
- # inp_image = torch.cat((inp_image, inp_image),0)
179
-
180
-
164
+
181
165
pred = model (batch )
182
-
166
+
183
167
#Apply offsets to the result predictions
168
+ #Tranform the predictions as described in the YOLO paper
169
+
170
+ prediction = predict_transform (pred , inp_dim , model .anchors , num_classes , CUDA )
184
171
185
- prediction = predict_transform (pred , inp_dim , model .anchors )
186
- im1 = prediction [0 ]
187
- b = (batch [0 ].data .cpu ().numpy ()* 255.0 )
188
- cv2 .imwrite ("f.png" , b .transpose (1 ,2 ,0 )[:,:,::- 1 ])
172
+ prediction = confidence_filter (prediction , 0.7 )
189
173
#flatten the prediction vector
190
174
# B x (bbox cord x no. of anchors) x grid_w x grid_h --> B x bbox x (all the boxes)
191
175
# Put every proposed box as a row.
192
176
#get the boxes with object confidence > threshold
193
- prediction_ = confidence_filter (prediction , 0.5 )
194
- assert False
177
+
195
178
196
179
#perform NMS on these boxes
197
180
0 commit comments