Skip to content

Commit e21f8be

Browse files
Resize image keeping aspect ratio intact and padding
1 parent 201fc56 commit e21f8be

File tree

4 files changed

+59
-17
lines changed

4 files changed

+59
-17
lines changed

detect.py

+18-5
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def arg_parse():
6969
default = "yolov3.weights", type = str)
7070
parser.add_argument("--reso", dest = 'reso', help =
7171
"Input resolution of the network. Increase to increase accuracy. Decrease to increase speed",
72-
default = "320", type = str)
72+
default = "416", type = str)
7373
parser.add_argument("--scales", dest = "scales", help = "Scales to use for detection",
7474
default = "1,2,3", type = str)
7575

@@ -242,11 +242,24 @@ def arg_parse():
242242
print("No detections were made")
243243
exit()
244244

245-
output_recast = time.time()
246-
output[:,1:5] = torch.clamp(output[:,1:5], 0.0, float(inp_dim))
245+
im_dim_list = torch.index_select(im_dim_list, 0, output[:,0].long())
246+
247+
scaling_factor = torch.min(inp_dim/im_dim_list,1)[0].view(-1,1)
248+
249+
250+
output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim_list[:,0].view(-1,1))/2
251+
output[:,[2,4]] -= (inp_dim - scaling_factor*im_dim_list[:,1].view(-1,1))/2
252+
247253

248-
im_dim_list = torch.index_select(im_dim_list, 0, output[:,0].long())/inp_dim
249-
output[:,1:5] *= im_dim_list
254+
255+
output[:,1:5] /= scaling_factor
256+
257+
for i in range(output.shape[0]):
258+
output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim_list[i,0])
259+
output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim_list[i,1])
260+
261+
262+
output_recast = time.time()
250263

251264

252265
class_load = time.time()

preprocess.py

+16-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,21 @@
1212
from PIL import Image, ImageDraw
1313

1414

15+
def letterbox_image(img, inp_dim):
16+
'''resize image with unchanged aspect ratio using padding'''
17+
img_w, img_h = img.shape[1], img.shape[0]
18+
w, h = inp_dim
19+
new_w = int(img_w * min(w/img_w, h/img_h))
20+
new_h = int(img_h * min(w/img_w, h/img_h))
21+
resized_image = cv2.resize(img, (new_w,new_h), interpolation = cv2.INTER_CUBIC)
22+
23+
canvas = np.full((inp_dim[1], inp_dim[0], 3), 128)
24+
25+
canvas[(h-new_h)//2:(h-new_h)//2 + new_h,(w-new_w)//2:(w-new_w)//2 + new_w, :] = resized_image
26+
27+
return canvas
28+
29+
1530

1631
def prep_image(img, inp_dim):
1732
"""
@@ -22,7 +37,7 @@ def prep_image(img, inp_dim):
2237

2338
orig_im = cv2.imread(img)
2439
dim = orig_im.shape[1], orig_im.shape[0]
25-
img = cv2.resize(orig_im, (inp_dim, inp_dim))
40+
img = (letterbox_image(orig_im, (inp_dim, inp_dim)))
2641
img_ = img[:,:,::-1].transpose((2,0,1)).copy()
2742
img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
2843
return img_, orig_im, dim

video_demo.py

+12-6
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import cv2
88
from util import *
99
from darknet import Darknet
10-
from preprocess import prep_image, inp_to_image
10+
from preprocess import prep_image, inp_to_image, letterbox_image
1111
import pandas as pd
1212
import random
1313
import pickle as pkl
@@ -36,7 +36,7 @@ def prep_image(img, inp_dim):
3636

3737
orig_im = img
3838
dim = orig_im.shape[1], orig_im.shape[0]
39-
img = cv2.resize(orig_im, (inp_dim, inp_dim))
39+
img = (letterbox_image(orig_im, (inp_dim, inp_dim)))
4040
img_ = img[:,:,::-1].transpose((2,0,1)).copy()
4141
img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
4242
return img_, orig_im, dim
@@ -151,11 +151,17 @@ def arg_parse():
151151

152152

153153

154-
155-
output[:,1:5] = torch.clamp(output[:,1:5], 0.0, float(inp_dim))
154+
im_dim = im_dim.repeat(output.size(0), 1)
155+
scaling_factor = torch.min(inp_dim/im_dim,1)[0].view(-1,1)
156+
157+
output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim[:,0].view(-1,1))/2
158+
output[:,[2,4]] -= (inp_dim - scaling_factor*im_dim[:,1].view(-1,1))/2
156159

157-
im_dim = im_dim.repeat(output.size(0), 1)/inp_dim
158-
output[:,1:5] *= im_dim
160+
output[:,1:5] /= scaling_factor
161+
162+
for i in range(output.shape[0]):
163+
output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim[i,0])
164+
output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim[i,1])
159165

160166
classes = load_classes('data/coco.names')
161167
colors = pkl.load(open("pallete", "rb"))

video_demo_half.py

+13-5
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import cv2
88
from util import *
99
from darknet import Darknet
10-
from preprocess import prep_image, inp_to_image
10+
from preprocess import prep_image, inp_to_image, letterbox_image
1111
import pandas as pd
1212
import random
1313
import pickle as pkl
@@ -36,7 +36,7 @@ def prep_image(img, inp_dim):
3636

3737
orig_im = img
3838
dim = orig_im.shape[1], orig_im.shape[0]
39-
img = cv2.resize(orig_im, (inp_dim, inp_dim))
39+
img = (letterbox_image(orig_im, (inp_dim, inp_dim)))
4040
img_ = img[:,:,::-1].transpose((2,0,1)).copy()
4141
img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
4242
return img_, orig_im, dim
@@ -153,10 +153,18 @@ def arg_parse():
153153
continue
154154

155155

156-
output[:,1:5] = torch.clamp(output[:,1:5], 0.0, float(inp_dim))
156+
im_dim = im_dim.repeat(output.size(0), 1)
157+
scaling_factor = torch.min(inp_dim/im_dim,1)[0].view(-1,1)
158+
159+
output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim[:,0].view(-1,1))/2
160+
output[:,[2,4]] -= (inp_dim - scaling_factor*im_dim[:,1].view(-1,1))/2
161+
162+
output[:,1:5] /= scaling_factor
163+
164+
for i in range(output.shape[0]):
165+
output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim[i,0])
166+
output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim[i,1])
157167

158-
im_dim = im_dim.repeat(output.size(0), 1)/inp_dim
159-
output[:,1:5] *= im_dim
160168

161169
classes = load_classes('data/coco.names')
162170
colors = pkl.load(open("pallete", "rb"))

0 commit comments

Comments
 (0)