diff --git a/requirements.txt b/requirements.txt index 26cd97f2..7596ec57 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,4 @@ tensorboard scipy calflops transformers +supervision diff --git a/tools/inference/onnx_inf.py b/tools/inference/onnx_inf.py index 3fb26c76..4b9f1ab8 100644 --- a/tools/inference/onnx_inf.py +++ b/tools/inference/onnx_inf.py @@ -7,7 +7,8 @@ import onnxruntime as ort import torch import torchvision.transforms as T -from PIL import Image, ImageDraw +from PIL import Image +import supervision as sv def resize_with_aspect_ratio(image, size, interpolation=Image.BILINEAR): @@ -27,27 +28,59 @@ def resize_with_aspect_ratio(image, size, interpolation=Image.BILINEAR): def draw(images, labels, boxes, scores, ratios, paddings, thrh=0.4): result_images = [] for i, im in enumerate(images): - draw = ImageDraw.Draw(im) + np_image = np.array(im) + scr = scores[i] - lab = labels[i][scr > thrh] - box = boxes[i][scr > thrh] - scr = scr[scr > thrh] + lab = labels[i] + box = boxes[i] + + keep_mask = scr > thrh + scr = scr[keep_mask] + lab = lab[keep_mask] + box = box[keep_mask] ratio = ratios[i] pad_w, pad_h = paddings[i] - for lbl, bb in zip(lab, box): - # Adjust bounding boxes according to the resizing and padding - bb = [ - (bb[0] - pad_w) / ratio, - (bb[1] - pad_h) / ratio, - (bb[2] - pad_w) / ratio, - (bb[3] - pad_h) / ratio, - ] - draw.rectangle(bb, outline="red") - draw.text((bb[0], bb[1]), text=str(lbl), fill="blue") + adjusted_boxes = [] + for b in box: + x1 = (b[0] - pad_w) / ratio + y1 = (b[1] - pad_h) / ratio + x2 = (b[2] - pad_w) / ratio + y2 = (b[3] - pad_h) / ratio + adjusted_boxes.append([x1, y1, x2, y2]) + adjusted_boxes = np.array(adjusted_boxes) + + detections = sv.Detections( + xyxy=adjusted_boxes, + confidence=scr, + class_id=lab.astype(int), + ) + + height, width = np_image.shape[:2] + resolution_wh = (width, height) + + text_scale = sv.calculate_optimal_text_scale(resolution_wh=resolution_wh) + line_thickness = sv.calculate_optimal_line_thickness(resolution_wh=resolution_wh) + + box_annotator = sv.BoxAnnotator(thickness=line_thickness) + label_annotator = sv.LabelAnnotator(text_scale=text_scale, smart_position=True) + + label_texts = [ + f"{class_id} {confidence:.2f}" + for class_id, confidence in zip(detections.class_id, detections.confidence) + ] + + np_image = box_annotator.annotate(scene=np_image, detections=detections) + np_image = label_annotator.annotate( + scene=np_image, + detections=detections, + labels=label_texts, + ) + + result_im = Image.fromarray(np_image) + result_images.append(result_im) - result_images.append(im) return result_images diff --git a/tools/inference/openvino_inf.py b/tools/inference/openvino_inf.py index 054d0edc..3be2e7ac 100644 --- a/tools/inference/openvino_inf.py +++ b/tools/inference/openvino_inf.py @@ -5,6 +5,7 @@ import cv2 import numpy as np import openvino +import supervision as sv from openvino.runtime import Core @@ -71,18 +72,39 @@ def get_available_device(self): def draw_and_save_image(self, infer_result, image_path, score_threshold=0.6): draw_image = self.ori_image + scores = infer_result["scores"] labels = infer_result["labels"] boxes = infer_result["boxes"] - for i in range(self.query_num): - if scores[0, i] > score_threshold: - cx = boxes[0, i, 0] * self.ratio - cy = boxes[0, i, 1] * self.ratio - bx = boxes[0, i, 2] * self.ratio - by = boxes[0, i, 3] * self.ratio - cv2.rectangle( - draw_image, (int(cx), int(cy), int(bx - cx), int(by - cy)), (255, 0, 0), 1 - ) + + detections = sv.Detections( + xyxy=boxes[0] * self.ratio, + confidence=scores[0], + class_id=labels[0].astype(int) + ) + detections = detections[detections.confidence > score_threshold] + + height, width = draw_image.shape[:2] + resolution_wh = (width, height) + + text_scale = sv.calculate_optimal_text_scale(resolution_wh=resolution_wh) + line_thickness = sv.calculate_optimal_line_thickness(resolution_wh=resolution_wh) + + box_annotator = sv.BoxAnnotator(thickness=line_thickness) + label_annotator = sv.LabelAnnotator(text_scale=text_scale, smart_position=True) + + label_texts = [ + f"{class_id} {confidence:.2f}" + for class_id, confidence in zip(detections.class_id, detections.confidence) + ] + + draw_image = box_annotator.annotate(scene=draw_image, detections=detections) + draw_image = label_annotator.annotate( + scene=draw_image, + detections=detections, + labels=label_texts, + ) + cv2.imwrite(image_path, draw_image) diff --git a/tools/inference/torch_inf.py b/tools/inference/torch_inf.py index c5ba0efb..80d6ff78 100644 --- a/tools/inference/torch_inf.py +++ b/tools/inference/torch_inf.py @@ -10,30 +10,42 @@ import torch import torch.nn as nn import torchvision.transforms as T -from PIL import Image, ImageDraw +import supervision as sv +from PIL import Image sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../"))) from src.core import YAMLConfig def draw(images, labels, boxes, scores, thrh=0.4): - for i, im in enumerate(images): - draw = ImageDraw.Draw(im) - - scr = scores[i] - lab = labels[i][scr > thrh] - box = boxes[i][scr > thrh] - scrs = scr[scr > thrh] - - for j, b in enumerate(box): - draw.rectangle(list(b), outline="red") - draw.text( - (b[0], b[1]), - text=f"{lab[j].item()} {round(scrs[j].item(), 2)}", - fill="blue", - ) - - im.save("torch_results.jpg") + for i, image in enumerate(images): + detections = sv.Detections( + xyxy=boxes[i].detach().cpu().numpy(), + confidence=scores[i].detach().cpu().numpy(), + class_id=labels[i].detach().cpu().numpy().astype(int), + ) + detections = detections[detections.confidence > thrh] + + text_scale = sv.calculate_optimal_text_scale(resolution_wh=image.size) + line_thickness = sv.calculate_optimal_line_thickness(resolution_wh=image.size) + + box_annotator = sv.BoxAnnotator(thickness=line_thickness) + label_annotator = sv.LabelAnnotator(text_scale=text_scale, smart_position=True) + + label_texts = [ + f"{class_id} {confidence:.2f}" + for class_id, confidence + in zip(detections.class_id, detections.confidence) + ] + + image = box_annotator.annotate(scene=image, detections=detections) + image = label_annotator.annotate( + scene=image, + detections=detections, + labels=label_texts + ) + + image.save("torch_results.jpg") def process_image(model, device, file_path): diff --git a/tools/inference/trt_inf.py b/tools/inference/trt_inf.py index 731e0b55..9a2fa2a7 100644 --- a/tools/inference/trt_inf.py +++ b/tools/inference/trt_inf.py @@ -13,7 +13,8 @@ import tensorrt as trt import torch import torchvision.transforms as T -from PIL import Image, ImageDraw +import supervision as sv +from PIL import Image class TimeProfiler(contextlib.ContextDecorator): @@ -122,22 +123,38 @@ def synchronize(self): def draw(images, labels, boxes, scores, thrh=0.4): - for i, im in enumerate(images): - draw = ImageDraw.Draw(im) - scr = scores[i] - lab = labels[i][scr > thrh] - box = boxes[i][scr > thrh] - scrs = scr[scr > thrh] - - for j, b in enumerate(box): - draw.rectangle(list(b), outline="red") - draw.text( - (b[0], b[1]), - text=f"{lab[j].item()} {round(scrs[j].item(), 2)}", - fill="blue", - ) - - return images + updated_images = [] + + for i, image in enumerate(images): + detections = sv.Detections( + xyxy=boxes[i].detach().cpu().numpy(), + confidence=scores[i].detach().cpu().numpy(), + class_id=labels[i].detach().cpu().numpy().astype(int), + ) + detections = detections[detections.confidence > thrh] + + text_scale = sv.calculate_optimal_text_scale(resolution_wh=image.size) + line_thickness = sv.calculate_optimal_line_thickness(resolution_wh=image.size) + + box_annotator = sv.BoxAnnotator(thickness=line_thickness) + label_annotator = sv.LabelAnnotator(text_scale=text_scale, smart_position=True) + + label_texts = [ + f"{class_id} {confidence:.2f}" + for class_id, confidence + in zip(detections.class_id, detections.confidence) + ] + + image = box_annotator.annotate(scene=image, detections=detections) + image = label_annotator.annotate( + scene=image, + detections=detections, + labels=label_texts + ) + + updated_images.append(image) + + return updated_images def process_image(m, file_path, device):