Work for 13/3/18

ayooshkathuria · ayooshkathuria · commit 892ff4bc2974 · 2018-03-13T17:20:01.000+05:30
diff --git a/__init__.py b/__init__.py
diff --git a/bbox.py b/bbox.py
@@ -10,7 +10,7 @@
 def confidence_filter(result, confidence):
 
     conf_mask = (result[:,:,4] > confidence).float().unsqueeze(2)
-    result = result*conf_mask
+    result = result*conf_mask    
     
     return result
 
diff --git a/darknet.py b/darknet.py
@@ -9,6 +9,7 @@
 import matplotlib.pyplot as plt
 from util import count_parameters as count
 from util import convert2cpu as cpu
+from preprocess import prep_image, prep_batch
 
 class test_net(nn.Module):
     def __init__(self, num_layers, input_size):
@@ -397,12 +398,11 @@ def save_weights(self, savedfile, cutoff = 0):
                 cpu(conv.weight.data).numpy().tofile(fp)
                
 
-        
-            
-            
-        
-    
-    
+
+
+
+
+
 
 
 
diff --git a/detect.py b/detect.py
@@ -7,13 +7,12 @@
 import numpy as np
 import cv2 
 import matplotlib.pyplot as plt
-from util import count_parameters as count
-from util import convert2cpu as cpu
+from util import *
 import argparse
 import os 
 import os.path as osp
 from darknet import Darknet
-from preprocess import prep_image, prep_batch
+from preprocess import prep_image, prep_batch, inp_to_image
 from bbox import confidence_filter
 import time
 
@@ -65,85 +64,74 @@ def arg_parse():
     
     return parser.parse_args()
 
-def predict_transform(prediction, inp_dim, anchors):
-    batch_size = prediction.size(0)
-    network_stride = 32
-    grid_size = inp_dim // network_stride
-    bbox_attrs = 5 + num_classes
-    num_anchors = len(anchors)
-#    #Flatten the grid boxes dimensions
-#    prediction = prediction.view(batch_size, -1, grid_size*grid_size)
-#    
-#    #Flatten w.r.t to different anchors predicted by a grid (depth)
-#    prediction = prediction.view(batch_size, bbox_attrs, -1)
+#def predict_transform(prediction, inp_dim, anchors):
+#    batch_size = prediction.size(0)
+#    network_stride = 32
+#    grid_size = inp_dim // network_stride
+#    bbox_attrs = 5 + num_classes
+#    num_anchors = len(anchors)
 #    
+#    prediction = prediction.view(batch_size, bbox_attrs*num_anchors, grid_size*grid_size)
 #    prediction = prediction.transpose(1,2).contiguous()
-    
-    prediction = prediction.view(batch_size, bbox_attrs, num_anchors, grid_size, grid_size)
-    prediction = prediction.view(batch_size, num_anchors, bbox_attrs, grid_size*grid_size)
-    prediction = prediction.view(batch_size, num_anchors * bbox_attrs, grid_size*grid_size)
-    prediction = prediction.transpose(1,2).contiguous()
-    prediction = prediction.view(batch_size, grid_size*grid_size*num_anchors, -1)
-    
-    temp = torch.FloatTensor(prediction.shape)
-
-    if CUDA:
-        temp = temp.cuda()
-    
-    #Sigmoid the  centre_X, centre_Y. and object confidencce
-    temp[:,:,0].copy_(torch.sigmoid(prediction[:,:,0]).data)
-    temp[:,:,1].copy_(torch.sigmoid(prediction[:,:,1]).data)
-    temp[:,:,4].copy_(torch.sigmoid(prediction[:,:,4]).data)
-    
-    #log space transform height and the width
-    anchors = torch.FloatTensor(anchors)
-    
-    if CUDA:
-        anchors = anchors.cuda()
-    
-    anchors = anchors.repeat(grid_size*grid_size, 1).unsqueeze(0)
-    
-#    temp[:,:,3:5] = torch.exp(temp[:,:,3:5])*anchors
-    
-    
-    #Softmax the class scores
-    temp[:,:,5: 5 + num_classes].copy_(nn.Softmax(-1)(prediction[:,:, 5 : 5 + num_classes]).data)
-    
-    
-    #Add the center offsets
-    grid_len = np.arange(grid_size)
-    a,b = np.meshgrid(grid_len, grid_len)
-    
-    #create the grid
-    x_offset = torch.FloatTensor(a).view(-1,1)
-    y_offset = torch.FloatTensor(b).view(-1,1)
-    
-    if CUDA:
-        x_offset = x_offset.cuda()
-        y_offset = y_offset.cuda()
-    
-    x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1,num_anchors).view(-1,2).unsqueeze(0)
-    
+#    prediction = prediction.view(batch_size, grid_size*grid_size*num_anchors, bbox_attrs)
+#    
+#    #Creating a new Tensor to store the transformed data
+#    #Just avoiding inplace operations    
+#    temp = torch.FloatTensor(prediction.shape)
+#
+#    if CUDA:
+#        temp = temp.cuda()
+#    
+#    temp1 = temp[:,:,2]
+#    cacher = torch.FloatTensor(temp1.shape).cuda()
+#    cacher.copy_(temp1)
+#    
+#    #Sigmoid the  centre_X, centre_Y. and object confidencce
+#    temp[:,:,0].copy_(torch.sigmoid(prediction[:,:,0]).data)
+#    temp[:,:,1].copy_(torch.sigmoid(prediction[:,:,1]).data)
+#    temp[:,:,4].copy_(torch.sigmoid(prediction[:,:,4]).data)
+#    temp[:,:,2:4].copy_((prediction[:,:,2:4]).data)
+#    
+#    #Add the center offsets
+#    grid_len = np.arange(grid_size)
+#    a,b = np.meshgrid(grid_len, grid_len)
+#    
+#    x_offset = torch.FloatTensor(a).view(-1,1)
+#    y_offset = torch.FloatTensor(b).view(-1,1)
+#    
+#    if CUDA:
+#        x_offset = x_offset.cuda()
+#        y_offset = y_offset.cuda()
+#    
+#    x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1,num_anchors).view(-1,2).unsqueeze(0)
+#    
 #    temp[:,:,:2] += x_y_offset
-    
-    return temp
+#      
+#    #log space transform height and the width
+#    anchors = torch.FloatTensor(anchors)
+#    
+#    if CUDA:
+#        anchors = anchors.cuda()
+#    
+#    anchors = anchors.repeat(grid_size*grid_size, 1).unsqueeze(0)
+#    temp[:,:,2:4] = torch.exp(temp[:,:,2:4])*anchors
+#
+#    #Softmax the class scores
+#    temp[:,:,5: 5 + num_classes].copy_(nn.Softmax(-1)(prediction[:,:, 5 : 5 + num_classes]).data)
+#
+#    return temp
 
-    
-  
 if __name__ ==  '__main__':
     parser = arg_parse()
     images = parser.images
     cfg = parser.cfg
     weightsfile = parser.weightsfile
     start = 0
-    
-    
-    
+
     CUDA = torch.cuda.is_available()
     network_dim = (416,416)
     num_classes  = 20   #Will be updated in future to accomodate COCO
-    
-    
+
     #Set up the neural network
     print("Loading network.....")
     model = Darknet(cfg)
@@ -164,34 +152,29 @@ def predict_transform(prediction, inp_dim, anchors):
         print ("No file or directory with the name {}".format(images))
         exit()
         
-    batch_size = 1
+    batch_size = 5
     im_batches = prep_batch(imlist, batch_size, network_dim)
 
     for batch in im_batches:
         #load the image 
+        start = time.time()
         inp_dim = batch[0].size(2)
         if CUDA:
             batch = batch.cuda()
-            
-        
-
-#        inp_image = torch.cat((inp_image, inp_image),0)
-
-        
+       
         pred = model(batch)
-        
+
         #Apply offsets to the result predictions
+        #Tranform the predictions as described in the YOLO paper
+        
+        prediction = predict_transform(pred, inp_dim, model.anchors, num_classes, CUDA)
         
-        prediction = predict_transform(pred, inp_dim, model.anchors)
-        im1 = prediction[0]
-        b = (batch[0].data.cpu().numpy()*255.0)
-        cv2.imwrite("f.png", b.transpose(1,2,0)[:,:,::-1])
+        prediction = confidence_filter(prediction, 0.7)
         #flatten the prediction vector 
         # B x (bbox cord x no. of anchors) x grid_w x grid_h --> B x bbox x (all the boxes) 
         # Put every proposed box as a row.
         #get the boxes with object confidence > threshold
-        prediction_ = confidence_filter(prediction, 0.5)
-        assert False
+     
 
         #perform NMS on these boxes
         
diff --git a/preprocess.py b/preprocess.py
@@ -27,6 +27,25 @@ def prep_image(img, network_dim):
     img_ = Variable(img_)
     return img_
 
+def prep_image_pil(img, network_dim):
+    img = Image.open(img).convert('RGB')
+    img = img.resize(network_dim)
+    img = torch.ByteTensor(torch.ByteStorage.from_buffer(img.tobytes()))
+    img = img.view(*network_dim, 3).transpose(0,1).transpose(0,2).contiguous()
+    img = img.view(1, 3,*network_dim)
+    img = img.float().div(255.0)
+    return Variable(img)
+
+def inp_to_image(inp):
+    inp = inp.cpu().squeeze()
+    inp = inp*255
+    inp = inp.data.numpy()
+    inp = inp.transpose(1,2,0)
+
+    inp = inp[:,:,::-1]
+    return inp
+
+
 def prep_batch(imlist, batch_size, network_dim):
     num_batches = len(imlist)//batch_size + 1
     im_batches = []
@@ -38,7 +57,7 @@ def prep_batch(imlist, batch_size, network_dim):
                 image = imlist[id]
             except IndexError:
                 break
-            inp_image = prep_image(image, network_dim)
+            inp_image = prep_image_pil(image, network_dim)
             if img == 0:
                 batchx.copy_(inp_image.data)
             else:
diff --git a/util.py b/util.py
@@ -78,3 +78,5 @@ def predict_transform(prediction, inp_dim, anchors, num_classes, CUDA):
 
     return temp
 
+
+

Original file line number	Diff line number	Diff line change
`@@ -78,3 +78,5 @@ def predict_transform(prediction, inp_dim, anchors, num_classes, CUDA):`
`78`	`78`
`79`	`79`	`return temp`
`80`	`80`
	`81`	`+`
	`82`	`+`