improved box visualizations with supervision #172

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open

SkalskiP wants to merge 5 commits into Peterande:master from SkalskiP:master

requirements.txt

-Original file line number
+Diff line change
@@ Expand Up / @@ -6,3 +6,4 @@ tensorboard @@
     scipy
     calflops
     transformers
+    supervision

tools/inference/onnx_inf.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -7,7 +7,8 @@ @@
     import onnxruntime as ort
     import torch
     import torchvision.transforms as T
-    from PIL import Image, ImageDraw
+    from PIL import Image
+    import supervision as sv
     def resize_with_aspect_ratio(image, size, interpolation=Image.BILINEAR):
@@ Expand All @@
     def draw(images, labels, boxes, scores, ratios, paddings, thrh=0.4):
         result_images = []
         for i, im in enumerate(images):
-            draw = ImageDraw.Draw(im)
+            np_image = np.array(im)
             scr = scores[i]
-            lab = labels[i][scr > thrh]
-            box = boxes[i][scr > thrh]
-            scr = scr[scr > thrh]
+            lab = labels[i]
+            box = boxes[i]
+            keep_mask = scr > thrh
+            scr = scr[keep_mask]
+            lab = lab[keep_mask]
+            box = box[keep_mask]
             ratio = ratios[i]
             pad_w, pad_h = paddings[i]
-            for lbl, bb in zip(lab, box):
-                # Adjust bounding boxes according to the resizing and padding
-                bb = [
-                    (bb[0] - pad_w) / ratio,
-                    (bb[1] - pad_h) / ratio,
-                    (bb[2] - pad_w) / ratio,
-                    (bb[3] - pad_h) / ratio,
-                ]
-                draw.rectangle(bb, outline="red")
-                draw.text((bb[0], bb[1]), text=str(lbl), fill="blue")
+            adjusted_boxes = []
+            for b in box:
+                x1 = (b[0] - pad_w) / ratio
+                y1 = (b[1] - pad_h) / ratio
+                x2 = (b[2] - pad_w) / ratio
+                y2 = (b[3] - pad_h) / ratio
+                adjusted_boxes.append([x1, y1, x2, y2])
+            adjusted_boxes = np.array(adjusted_boxes)
+            detections = sv.Detections(
+                xyxy=adjusted_boxes,
+                confidence=scr,
+                class_id=lab.astype(int),
+            )
+            height, width = np_image.shape[:2]
+            resolution_wh = (width, height)
+            text_scale = sv.calculate_optimal_text_scale(resolution_wh=resolution_wh)
+            line_thickness = sv.calculate_optimal_line_thickness(resolution_wh=resolution_wh)
+            box_annotator = sv.BoxAnnotator(thickness=line_thickness)
+            label_annotator = sv.LabelAnnotator(text_scale=text_scale, smart_position=True)
+            label_texts = [
+                f"{class_id} {confidence:.2f}"
+                for class_id, confidence in zip(detections.class_id, detections.confidence)
+            ]
+            np_image = box_annotator.annotate(scene=np_image, detections=detections)
+            np_image = label_annotator.annotate(
+                scene=np_image,
+                detections=detections,
+                labels=label_texts,
+            )
+            result_im = Image.fromarray(np_image)
+            result_images.append(result_im)
-            result_images.append(im)
         return result_images
@@ Expand Down @@

tools/inference/openvino_inf.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -5,6 +5,7 @@ @@
     import cv2
     import numpy as np
     import openvino
+    import supervision as sv
     from openvino.runtime import Core
@@ Expand Down Expand Up / @@ -71,18 +72,39 @@ def get_available_device(self): @@
         def draw_and_save_image(self, infer_result, image_path, score_threshold=0.6):
             draw_image = self.ori_image
             scores = infer_result["scores"]
             labels = infer_result["labels"]
             boxes = infer_result["boxes"]
-            for i in range(self.query_num):
-                if scores[0, i] > score_threshold:
-                    cx = boxes[0, i, 0] * self.ratio
-                    cy = boxes[0, i, 1] * self.ratio
-                    bx = boxes[0, i, 2] * self.ratio
-                    by = boxes[0, i, 3] * self.ratio
-                    cv2.rectangle(
-                        draw_image, (int(cx), int(cy), int(bx - cx), int(by - cy)), (255, 0, 0), 1
-                    )
+            detections = sv.Detections(
+                xyxy=boxes[0] * self.ratio,
+                confidence=scores[0],
+                class_id=labels[0].astype(int)
+            )
+            detections = detections[detections.confidence > score_threshold]
+            height, width = draw_image.shape[:2]
+            resolution_wh = (width, height)
+            text_scale = sv.calculate_optimal_text_scale(resolution_wh=resolution_wh)
+            line_thickness = sv.calculate_optimal_line_thickness(resolution_wh=resolution_wh)
+            box_annotator = sv.BoxAnnotator(thickness=line_thickness)
+            label_annotator = sv.LabelAnnotator(text_scale=text_scale, smart_position=True)
+            label_texts = [
+                f"{class_id} {confidence:.2f}"
+                for class_id, confidence in zip(detections.class_id, detections.confidence)
+            ]
+            draw_image = box_annotator.annotate(scene=draw_image, detections=detections)
+            draw_image = label_annotator.annotate(
+                scene=draw_image,
+                detections=detections,
+                labels=label_texts,
+            )
             cv2.imwrite(image_path, draw_image)
@@ Expand Down @@

tools/inference/torch_inf.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -10,30 +10,42 @@ @@
     import torch
     import torch.nn as nn
     import torchvision.transforms as T
-    from PIL import Image, ImageDraw
+    import supervision as sv
+    from PIL import Image
     sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../")))
     from src.core import YAMLConfig
     def draw(images, labels, boxes, scores, thrh=0.4):
-        for i, im in enumerate(images):
-            draw = ImageDraw.Draw(im)
-            scr = scores[i]
-            lab = labels[i][scr > thrh]
-            box = boxes[i][scr > thrh]
-            scrs = scr[scr > thrh]
-            for j, b in enumerate(box):
-                draw.rectangle(list(b), outline="red")
-                draw.text(
-                    (b[0], b[1]),
-                    text=f"{lab[j].item()} {round(scrs[j].item(), 2)}",
-                    fill="blue",
-                )
-            im.save("torch_results.jpg")
+        for i, image in enumerate(images):
+            detections = sv.Detections(
+                xyxy=boxes[i].detach().cpu().numpy(),
+                confidence=scores[i].detach().cpu().numpy(),
+                class_id=labels[i].detach().cpu().numpy().astype(int),
+            )
+            detections = detections[detections.confidence > thrh]
+            text_scale = sv.calculate_optimal_text_scale(resolution_wh=image.size)
+            line_thickness = sv.calculate_optimal_line_thickness(resolution_wh=image.size)
+            box_annotator = sv.BoxAnnotator(thickness=line_thickness)
+            label_annotator = sv.LabelAnnotator(text_scale=text_scale, smart_position=True)
+            label_texts = [
+                f"{class_id} {confidence:.2f}"
+                for class_id, confidence
+                in zip(detections.class_id, detections.confidence)
+            ]
+            image = box_annotator.annotate(scene=image, detections=detections)
+            image = label_annotator.annotate(
+                scene=image,
+                detections=detections,
+                labels=label_texts
+            )
+            image.save("torch_results.jpg")
     def process_image(model, device, file_path):
@@ Expand Down @@

tools/inference/trt_inf.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -13,7 +13,8 @@ @@
     import tensorrt as trt
     import torch
     import torchvision.transforms as T
-    from PIL import Image, ImageDraw
+    import supervision as sv
+    from PIL import Image
     class TimeProfiler(contextlib.ContextDecorator):
@@ Expand Down Expand Up / @@ -122,22 +123,38 @@ def synchronize(self): @@
     def draw(images, labels, boxes, scores, thrh=0.4):
-        for i, im in enumerate(images):
-            draw = ImageDraw.Draw(im)
-            scr = scores[i]
-            lab = labels[i][scr > thrh]
-            box = boxes[i][scr > thrh]
-            scrs = scr[scr > thrh]
-            for j, b in enumerate(box):
-                draw.rectangle(list(b), outline="red")
-                draw.text(
-                    (b[0], b[1]),
-                    text=f"{lab[j].item()} {round(scrs[j].item(), 2)}",
-                    fill="blue",
-                )
-        return images
+        updated_images = []
+        for i, image in enumerate(images):
+            detections = sv.Detections(
+                xyxy=boxes[i].detach().cpu().numpy(),
+                confidence=scores[i].detach().cpu().numpy(),
+                class_id=labels[i].detach().cpu().numpy().astype(int),
+            )
+            detections = detections[detections.confidence > thrh]
+            text_scale = sv.calculate_optimal_text_scale(resolution_wh=image.size)
+            line_thickness = sv.calculate_optimal_line_thickness(resolution_wh=image.size)
+            box_annotator = sv.BoxAnnotator(thickness=line_thickness)
+            label_annotator = sv.LabelAnnotator(text_scale=text_scale, smart_position=True)
+            label_texts = [
+                f"{class_id} {confidence:.2f}"
+                for class_id, confidence
+                in zip(detections.class_id, detections.confidence)
+            ]
+            image = box_annotator.annotate(scene=image, detections=detections)
+            image = label_annotator.annotate(
+                scene=image,
+                detections=detections,
+                labels=label_texts
+            )
+            updated_images.append(image)
+        return updated_images
     def process_image(m, file_path, device):
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

improved box visualizations with supervision #172

Uh oh!

Diff view

Diff view

There are no files selected for viewing

improved box visualizations with supervision #172

Are you sure you want to change the base?

Uh oh!

improved box visualizations with supervision #172

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing