From 79b3218288f016fcc20cbd7ccbcc042409bd114f Mon Sep 17 00:00:00 2001 From: NANDAGOPALNG Date: Mon, 6 Oct 2025 19:54:36 +0530 Subject: [PATCH 1/6] feat: added a Image Segmentation with SAM2 --- .../Image Segmentation with SAM 2/README.md | 18 + .../inference_example.py | 355 ++++++++++++++++++ .../requirements.txt | 7 + .../Image Segmentation with SAM 2/setup.sh | 36 ++ 4 files changed, 416 insertions(+) create mode 100644 examples/Image Segmentation with SAM 2/README.md create mode 100644 examples/Image Segmentation with SAM 2/inference_example.py create mode 100644 examples/Image Segmentation with SAM 2/requirements.txt create mode 100644 examples/Image Segmentation with SAM 2/setup.sh diff --git a/examples/Image Segmentation with SAM 2/README.md b/examples/Image Segmentation with SAM 2/README.md new file mode 100644 index 000000000..b6e681626 --- /dev/null +++ b/examples/Image Segmentation with SAM 2/README.md @@ -0,0 +1,18 @@ +# Grounded SAM2 - Image Segmentation + +## Project Overview +This project demonstrates text-driven image segmentation using Meta's Segment Anything Model 2 (SAM 2). The implementation focuses on converting various input prompts (points, boxes) into precise object segmentation masks. + +## Features + +- **Point-based Segmentation**: Select objects using positive/negative points +- **Bounding Box Prompts**: Use bounding boxes for object selection +- **Combined Prompts**: Mix points and boxes for precise control +- **Batched Processing**: Handle multiple prompts and images efficiently +- **Interactive Visualization**: Clear display of masks with confidence scores + +## Installation + +1. Clone the repository and navigate to this directory: +```bash +cd examples/Image Segmentation with SAM2 \ No newline at end of file diff --git a/examples/Image Segmentation with SAM 2/inference_example.py b/examples/Image Segmentation with SAM 2/inference_example.py new file mode 100644 index 000000000..11a207472 --- /dev/null +++ b/examples/Image Segmentation with SAM 2/inference_example.py @@ -0,0 +1,355 @@ +""" +Text-Driven Image Segmentation with Grounded SAM2 + +This module demonstrates image segmentation using Meta's Segment Anything Model 2 (SAM 2) +combined with text prompts for automatic object detection and segmentation. +""" + +import torch +import torchvision +import os +import numpy as np +import matplotlib.pyplot as plt +from PIL import Image +import cv2 + +# Set up device configuration +os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" + +def setup_device(): + """Configure and return the available computation device""" + if torch.cuda.is_available(): + device = torch.device("cuda") + # Use bfloat16 for faster inference on supported GPUs + torch.autocast("cuda", dtype=torch.bfloat16).__enter__() + # Enable tf32 for Ampere GPUs + if torch.cuda.get_device_properties(0).major >= 8: + torch.backends.cuda.matmul.allow_tf32 = True + torch.backends.cudnn.allow_tf32 = True + elif torch.backends.mps.is_available(): + device = torch.device("mps") + print("MPS device detected - preliminary support may have limitations") + else: + device = torch.device("cpu") + + print(f"Using device: {device}") + return device + +# Visualization functions +np.random.seed(3) + +def show_mask(mask, ax, random_color=False, borders=True): + """ + Display segmentation mask on matplotlib axis + + Args: + mask: Binary mask array + ax: Matplotlib axis + random_color: Whether to use random colors + borders: Whether to draw mask borders + """ + if random_color: + color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0) + else: + color = np.array([30/255, 144/255, 255/255, 0.6]) + + h, w = mask.shape[-2:] + mask = mask.astype(np.uint8) + mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1) + + if borders: + contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) + contours = [cv2.approxPolyDP(contour, epsilon=0.01, closed=True) for contour in contours] + mask_image = cv2.drawContours(mask_image, contours, -1, (1, 1, 1, 0.5), thickness=2) + + ax.imshow(mask_image) + +def show_points(coords, labels, ax, marker_size=375): + """ + Display point prompts on image + + Args: + coords: Point coordinates [[x, y], ...] + labels: Point labels (1=positive, 0=negative) + ax: Matplotlib axis + marker_size: Size of point markers + """ + pos_points = coords[labels==1] + neg_points = coords[labels==0] + + ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', + s=marker_size, edgecolor='white', linewidth=1.25) + ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', + s=marker_size, edgecolor='white', linewidth=1.25) + +def show_box(box, ax): + """ + Display bounding box on image + + Args: + box: Bounding box [x_min, y_min, x_max, y_max] + ax: Matplotlib axis + """ + x0, y0 = box[0], box[1] + w, h = box[2] - box[0], box[3] - box[1] + ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', + facecolor=(0, 0, 0, 0), lw=2)) + +def show_masks(image, masks, scores, point_coords=None, box_coords=None, + input_labels=None, borders=True): + """ + Display segmentation masks with optional prompts + + Args: + image: Input image + masks: Segmentation masks + scores: Confidence scores for masks + point_coords: Point coordinates + box_coords: Bounding box coordinates + input_labels: Point labels + borders: Whether to draw mask borders + """ + for i, (mask, score) in enumerate(zip(masks, scores)): + plt.figure(figsize=(10, 10)) + plt.imshow(image) + show_mask(mask, plt.gca(), borders=borders) + + if point_coords is not None: + assert input_labels is not None + show_points(point_coords, input_labels, plt.gca()) + + if box_coords is not None: + show_box(box_coords, plt.gca()) + + if len(scores) > 1: + plt.title(f"Mask {i+1}, Score: {score:.3f}", fontsize=18) + + plt.axis('off') + plt.show() + +def load_and_setup_model(model_cfg, checkpoint_path, device): + """ + Load SAM2 model and create predictor + + Args: + model_cfg: Model configuration path + checkpoint_path: Path to model checkpoint + device: Computation device + + Returns: + SAM2 predictor instance + """ + from sam2.build_sam import build_sam2 + from sam2.sam2_image_predictor import SAM2ImagePredictor + + sam2_model = build_sam2(model_cfg, checkpoint_path, device=device) + predictor = SAM2ImagePredictor(sam2_model) + return predictor + +def demonstrate_point_prompt(predictor, image): + """Demonstrate segmentation using single point prompt""" + print("=== Single Point Prompt ===") + + # Single point on the truck + input_point = np.array([[500, 375]]) + input_label = np.array([1]) + + # Display input points + plt.figure(figsize=(10, 10)) + plt.imshow(image) + show_points(input_point, input_label, plt.gca()) + plt.axis('on') + plt.title("Input Point Prompt") + plt.show() + + # Predict masks + masks, scores, logits = predictor.predict( + point_coords=input_point, + point_labels=input_label, + multimask_output=True, + ) + + # Sort by score + sorted_ind = np.argsort(scores)[::-1] + masks = masks[sorted_ind] + scores = scores[sorted_ind] + logits = logits[sorted_ind] + + show_masks(image, masks, scores, point_coords=input_point, + input_labels=input_label, borders=True) + + return logits + +def demonstrate_multiple_points(predictor, image, previous_logits): + """Demonstrate segmentation using multiple point prompts""" + print("=== Multiple Points Prompt ===") + + # Multiple points to specify the object better + input_point = np.array([[500, 375], [1125, 625]]) + input_label = np.array([1, 1]) + + # Use best mask from previous prediction + mask_input = previous_logits[np.argmax(scores), :, :] if previous_logits is not None else None + + masks, scores, _ = predictor.predict( + point_coords=input_point, + point_labels=input_label, + mask_input=mask_input[None, :, :] if mask_input is not None else None, + multimask_output=False, + ) + + show_masks(image, masks, scores, point_coords=input_point, + input_labels=input_label) + + return masks, scores + +def demonstrate_box_prompt(predictor, image): + """Demonstrate segmentation using bounding box prompt""" + print("=== Bounding Box Prompt ===") + + input_box = np.array([425, 600, 700, 875]) # x_min, y_min, x_max, y_max + + masks, scores, _ = predictor.predict( + point_coords=None, + point_labels=None, + box=input_box[None, :], + multimask_output=False, + ) + + show_masks(image, masks, scores, box_coords=input_box) + + return masks, scores + +def demonstrate_combined_prompts(predictor, image): + """Demonstrate combining box and point prompts""" + print("=== Combined Box and Points ===") + + input_box = np.array([425, 600, 700, 875]) + input_point = np.array([[575, 750]]) # Negative point inside the box + input_label = np.array([0]) + + masks, scores, _ = predictor.predict( + point_coords=input_point, + point_labels=input_label, + box=input_box, + multimask_output=False, + ) + + show_masks(image, masks, scores, box_coords=input_box, + point_coords=input_point, input_labels=input_label) + + return masks, scores + +def demonstrate_batched_prompts(predictor, image): + """Demonstrate batched prompt processing""" + print("=== Batched Prompts ===") + + # Multiple bounding boxes (simulating object detector output) + input_boxes = np.array([ + [75, 275, 1725, 850], # Large area + [425, 600, 700, 875], # Wheel area + [1375, 550, 1650, 800], # Another wheel + [1240, 675, 1400, 750], # Small detail + ]) + + masks, scores, _ = predictor.predict( + point_coords=None, + point_labels=None, + box=input_boxes, + multimask_output=False, + ) + + # Display all masks together + plt.figure(figsize=(10, 10)) + plt.imshow(image) + for i, mask in enumerate(masks): + show_mask(mask.squeeze(0), plt.gca(), random_color=True) + for box in input_boxes: + show_box(box, plt.gca()) + plt.axis('off') + plt.title("Batched Prompts - Multiple Objects") + plt.show() + + return masks, scores + +def demonstrate_batched_images(predictor): + """Demonstrate processing multiple images with batched prompts""" + print("=== Batched Images ===") + + # Load multiple images + image1 = Image.open('images/truck.jpg') + image1 = np.array(image1.convert("RGB")) + image1_boxes = np.array([ + [75, 275, 1725, 850], + [425, 600, 700, 875], + [1375, 550, 1650, 800], + ]) + + image2 = Image.open('images/groceries.jpg') + image2 = np.array(image2.convert("RGB")) + image2_boxes = np.array([ + [450, 170, 520, 350], + [350, 190, 450, 350], + [500, 170, 580, 350], + ]) + + img_batch = [image1, image2] + boxes_batch = [image1_boxes, image2_boxes] + + # Process batch + predictor.set_image_batch(img_batch) + masks_batch, scores_batch, _ = predictor.predict_batch( + None, None, box_batch=boxes_batch, multimask_output=False + ) + + # Display results + for i, (image, boxes, masks) in enumerate(zip(img_batch, boxes_batch, masks_batch)): + plt.figure(figsize=(10, 10)) + plt.imshow(image) + for mask in masks: + show_mask(mask.squeeze(0), plt.gca(), random_color=True) + for box in boxes: + show_box(box, plt.gca()) + plt.axis('off') + plt.title(f"Image {i+1} - Batched Processing") + plt.show() + +def main(): + """Main function demonstrating SAM2 image segmentation capabilities""" + + # Setup device + device = setup_device() + + # Load model + sam2_checkpoint = "../checkpoints/sam2.1_hiera_large.pt" + model_cfg = "configs/sam2.1/sam2.1_hiera_l.yaml" + + print("Loading SAM2 model...") + predictor = load_and_setup_model(model_cfg, sam2_checkpoint, device) + + # Load and display sample image + print("Loading sample image...") + image = Image.open('images/truck.jpg') + image = np.array(image.convert("RGB")) + + plt.figure(figsize=(10, 10)) + plt.imshow(image) + plt.axis('on') + plt.title("Original Image") + plt.show() + + # Set image for predictor + predictor.set_image(image) + + # Run demonstrations + logits = demonstrate_point_prompt(predictor, image) + demonstrate_multiple_points(predictor, image, logits) + demonstrate_box_prompt(predictor, image) + demonstrate_combined_prompts(predictor, image) + demonstrate_batched_prompts(predictor, image) + demonstrate_batched_images(predictor) + + print("All demonstrations completed successfully!") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/examples/Image Segmentation with SAM 2/requirements.txt b/examples/Image Segmentation with SAM 2/requirements.txt new file mode 100644 index 000000000..f7354fb8e --- /dev/null +++ b/examples/Image Segmentation with SAM 2/requirements.txt @@ -0,0 +1,7 @@ +torch>=2.0.0 +torchvision>=0.15.0 +opencv-python>=4.5.0 +matplotlib>=3.5.0 +Pillow>=9.0.0 +numpy>=1.21.0 +git+https://github.com/facebookresearch/sam2.git \ No newline at end of file diff --git a/examples/Image Segmentation with SAM 2/setup.sh b/examples/Image Segmentation with SAM 2/setup.sh new file mode 100644 index 000000000..9a3b1b4eb --- /dev/null +++ b/examples/Image Segmentation with SAM 2/setup.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +echo "Setting up environment for Grounded SAM2 Image Segmentation..." + +# Create necessary directories +echo "Creating directories..." +mkdir -p images +mkdir -p ../checkpoints/ + +# Download sample images +echo "Downloading sample images..." +wget -q -P images https://raw.githubusercontent.com/facebookresearch/sam2/main/notebooks/images/truck.jpg +wget -q -P images https://raw.githubusercontent.com/facebookresearch/sam2/main/notebooks/images/groceries.jpg + +# Check if downloads were successful +if [ -f "images/truck.jpg" ] && [ -f "images/groceries.jpg" ]; then + echo "✓ Sample images downloaded successfully" +else + echo "✗ Failed to download sample images" + exit 1 +fi + +# Download SAM 2 model +echo "Downloading SAM 2 model..." +wget -q -P ../checkpoints/ https://dl.fbaipublicfiles.com/segment_anything_2/092824/sam2.1_hiera_large.pt + +if [ -f "../checkpoints/sam2.1_hiera_large.pt" ]; then + echo "✓ SAM 2 model downloaded successfully" +else + echo "✗ Failed to download SAM 2 model" + exit 1 +fi + +echo "" +echo "Setup completed successfully!" +echo "You can now run: python inference_example.py" \ No newline at end of file From e9dddce1252ec2fc9ce5d208a1c7eebe8e1742ae Mon Sep 17 00:00:00 2001 From: "NANDA GOPAL.D" Date: Mon, 6 Oct 2025 20:15:42 +0530 Subject: [PATCH 2/6] Update README.md --- .../Image Segmentation with SAM 2/README.md | 82 ++++++++++++++++--- 1 file changed, 70 insertions(+), 12 deletions(-) diff --git a/examples/Image Segmentation with SAM 2/README.md b/examples/Image Segmentation with SAM 2/README.md index b6e681626..efb4c6f5c 100644 --- a/examples/Image Segmentation with SAM 2/README.md +++ b/examples/Image Segmentation with SAM 2/README.md @@ -1,18 +1,76 @@ -# Grounded SAM2 - Image Segmentation +# Text-Driven Image Segmentation with SAM 2 -## Project Overview -This project demonstrates text-driven image segmentation using Meta's Segment Anything Model 2 (SAM 2). The implementation focuses on converting various input prompts (points, boxes) into precise object segmentation masks. +This example demonstrates **text-prompted image segmentation** using the **Segment Anything Model 2 (SAM 2)**. +You can specify an object in the image via a **text description**, and the model automatically segments that region. -## Features +--- -- **Point-based Segmentation**: Select objects using positive/negative points -- **Bounding Box Prompts**: Use bounding boxes for object selection -- **Combined Prompts**: Mix points and boxes for precise control -- **Batched Processing**: Handle multiple prompts and images efficiently -- **Interactive Visualization**: Clear display of masks with confidence scores +## 🧠 Overview -## Installation +Text-driven segmentation allows you to extract a specific object or region from an image by providing a natural language prompt. + +This implementation integrates SAM 2 with a grounding model (like GroundingDINO/GLIP) to link text to image regions. + +--- + +## ⚙️ Requirements + +Install dependencies before running the script: + +```bash +pip install opencv-python-headless matplotlib pillow tqdm +pip install git+https://github.com/facebookresearch/segment-anything.git@main +pip install git+https://github.com/IDEA-Research/GroundingDINO.git@main +pip install --upgrade roboflow albumentations +``` + +--- + +## 🚀 How to Run + +Run the segmentation example script: -1. Clone the repository and navigate to this directory: ```bash -cd examples/Image Segmentation with SAM2 \ No newline at end of file +python inference_example.py --image-path path/to/image.jpg --text-prompt "segment the person" +``` + +You can also modify the script to test different input images or prompts. + +--- + +## 🖼️ Example Output + +Upload your segmented image result below: + +``` +![Segmented Output](data/segmented_result.jpg) +``` + +download + +--- + +## 📁 File Structure + +``` +examples/ +└── text_driven_segmentation/ + ├── README.md + ├── requirements.txt + ├── inference_example.py + └── setup.sh +``` + +--- + +## 💡 Notes + +* The accuracy depends on the grounding model and SAM’s segmentation mask quality. +* For better results, ensure images are clear and objects are well-separated. + + +This example builds on: + +* [Segment Anything Model (SAM 2)](https://github.com/facebookresearch/segment-anything) +* [GroundingDINO](https://github.com/IDEA-Research/GroundingDINO) +* [Supervision](https://github.com/roboflow/supervision) From 71d62ac32ce972e55de7706ac4cb3c454d00b8c7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 6 Oct 2025 14:51:03 +0000 Subject: [PATCH 3/6] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto=20?= =?UTF-8?q?format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../inference_example.py | 268 +++++++++++------- .../requirements.txt | 2 +- .../Image Segmentation with SAM 2/setup.sh | 2 +- 3 files changed, 167 insertions(+), 105 deletions(-) diff --git a/examples/Image Segmentation with SAM 2/inference_example.py b/examples/Image Segmentation with SAM 2/inference_example.py index 11a207472..b89607cc6 100644 --- a/examples/Image Segmentation with SAM 2/inference_example.py +++ b/examples/Image Segmentation with SAM 2/inference_example.py @@ -5,17 +5,18 @@ combined with text prompts for automatic object detection and segmentation. """ -import torch -import torchvision import os -import numpy as np + +import cv2 import matplotlib.pyplot as plt +import numpy as np +import torch from PIL import Image -import cv2 # Set up device configuration os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" + def setup_device(): """Configure and return the available computation device""" if torch.cuda.is_available(): @@ -31,17 +32,19 @@ def setup_device(): print("MPS device detected - preliminary support may have limitations") else: device = torch.device("cpu") - + print(f"Using device: {device}") return device + # Visualization functions np.random.seed(3) + def show_mask(mask, ax, random_color=False, borders=True): """ Display segmentation mask on matplotlib axis - + Args: mask: Binary mask array ax: Matplotlib axis @@ -51,55 +54,84 @@ def show_mask(mask, ax, random_color=False, borders=True): if random_color: color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0) else: - color = np.array([30/255, 144/255, 255/255, 0.6]) - + color = np.array([30 / 255, 144 / 255, 255 / 255, 0.6]) + h, w = mask.shape[-2:] mask = mask.astype(np.uint8) mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1) - + if borders: contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) - contours = [cv2.approxPolyDP(contour, epsilon=0.01, closed=True) for contour in contours] - mask_image = cv2.drawContours(mask_image, contours, -1, (1, 1, 1, 0.5), thickness=2) - + contours = [ + cv2.approxPolyDP(contour, epsilon=0.01, closed=True) for contour in contours + ] + mask_image = cv2.drawContours( + mask_image, contours, -1, (1, 1, 1, 0.5), thickness=2 + ) + ax.imshow(mask_image) + def show_points(coords, labels, ax, marker_size=375): """ Display point prompts on image - + Args: coords: Point coordinates [[x, y], ...] labels: Point labels (1=positive, 0=negative) ax: Matplotlib axis marker_size: Size of point markers """ - pos_points = coords[labels==1] - neg_points = coords[labels==0] - - ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', - s=marker_size, edgecolor='white', linewidth=1.25) - ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', - s=marker_size, edgecolor='white', linewidth=1.25) + pos_points = coords[labels == 1] + neg_points = coords[labels == 0] + + ax.scatter( + pos_points[:, 0], + pos_points[:, 1], + color="green", + marker="*", + s=marker_size, + edgecolor="white", + linewidth=1.25, + ) + ax.scatter( + neg_points[:, 0], + neg_points[:, 1], + color="red", + marker="*", + s=marker_size, + edgecolor="white", + linewidth=1.25, + ) + def show_box(box, ax): """ Display bounding box on image - + Args: box: Bounding box [x_min, y_min, x_max, y_max] ax: Matplotlib axis """ x0, y0 = box[0], box[1] w, h = box[2] - box[0], box[3] - box[1] - ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', - facecolor=(0, 0, 0, 0), lw=2)) + ax.add_patch( + plt.Rectangle((x0, y0), w, h, edgecolor="green", facecolor=(0, 0, 0, 0), lw=2) + ) + -def show_masks(image, masks, scores, point_coords=None, box_coords=None, - input_labels=None, borders=True): +def show_masks( + image, + masks, + scores, + point_coords=None, + box_coords=None, + input_labels=None, + borders=True, +): """ Display segmentation masks with optional prompts - + Args: image: Input image masks: Segmentation masks @@ -113,152 +145,175 @@ def show_masks(image, masks, scores, point_coords=None, box_coords=None, plt.figure(figsize=(10, 10)) plt.imshow(image) show_mask(mask, plt.gca(), borders=borders) - + if point_coords is not None: assert input_labels is not None show_points(point_coords, input_labels, plt.gca()) - + if box_coords is not None: show_box(box_coords, plt.gca()) - + if len(scores) > 1: - plt.title(f"Mask {i+1}, Score: {score:.3f}", fontsize=18) - - plt.axis('off') + plt.title(f"Mask {i + 1}, Score: {score:.3f}", fontsize=18) + + plt.axis("off") plt.show() + def load_and_setup_model(model_cfg, checkpoint_path, device): """ Load SAM2 model and create predictor - + Args: model_cfg: Model configuration path checkpoint_path: Path to model checkpoint device: Computation device - + Returns: SAM2 predictor instance """ from sam2.build_sam import build_sam2 from sam2.sam2_image_predictor import SAM2ImagePredictor - + sam2_model = build_sam2(model_cfg, checkpoint_path, device=device) predictor = SAM2ImagePredictor(sam2_model) return predictor + def demonstrate_point_prompt(predictor, image): """Demonstrate segmentation using single point prompt""" print("=== Single Point Prompt ===") - + # Single point on the truck input_point = np.array([[500, 375]]) input_label = np.array([1]) - + # Display input points plt.figure(figsize=(10, 10)) plt.imshow(image) show_points(input_point, input_label, plt.gca()) - plt.axis('on') + plt.axis("on") plt.title("Input Point Prompt") plt.show() - + # Predict masks masks, scores, logits = predictor.predict( point_coords=input_point, point_labels=input_label, multimask_output=True, ) - + # Sort by score sorted_ind = np.argsort(scores)[::-1] masks = masks[sorted_ind] scores = scores[sorted_ind] logits = logits[sorted_ind] - - show_masks(image, masks, scores, point_coords=input_point, - input_labels=input_label, borders=True) - + + show_masks( + image, + masks, + scores, + point_coords=input_point, + input_labels=input_label, + borders=True, + ) + return logits + def demonstrate_multiple_points(predictor, image, previous_logits): """Demonstrate segmentation using multiple point prompts""" print("=== Multiple Points Prompt ===") - + # Multiple points to specify the object better input_point = np.array([[500, 375], [1125, 625]]) input_label = np.array([1, 1]) - + # Use best mask from previous prediction - mask_input = previous_logits[np.argmax(scores), :, :] if previous_logits is not None else None - + mask_input = ( + previous_logits[np.argmax(scores), :, :] + if previous_logits is not None + else None + ) + masks, scores, _ = predictor.predict( point_coords=input_point, point_labels=input_label, mask_input=mask_input[None, :, :] if mask_input is not None else None, multimask_output=False, ) - - show_masks(image, masks, scores, point_coords=input_point, - input_labels=input_label) - + + show_masks(image, masks, scores, point_coords=input_point, input_labels=input_label) + return masks, scores + def demonstrate_box_prompt(predictor, image): """Demonstrate segmentation using bounding box prompt""" print("=== Bounding Box Prompt ===") - + input_box = np.array([425, 600, 700, 875]) # x_min, y_min, x_max, y_max - + masks, scores, _ = predictor.predict( point_coords=None, point_labels=None, box=input_box[None, :], multimask_output=False, ) - + show_masks(image, masks, scores, box_coords=input_box) - + return masks, scores + def demonstrate_combined_prompts(predictor, image): """Demonstrate combining box and point prompts""" print("=== Combined Box and Points ===") - + input_box = np.array([425, 600, 700, 875]) input_point = np.array([[575, 750]]) # Negative point inside the box input_label = np.array([0]) - + masks, scores, _ = predictor.predict( point_coords=input_point, point_labels=input_label, box=input_box, multimask_output=False, ) - - show_masks(image, masks, scores, box_coords=input_box, - point_coords=input_point, input_labels=input_label) - + + show_masks( + image, + masks, + scores, + box_coords=input_box, + point_coords=input_point, + input_labels=input_label, + ) + return masks, scores + def demonstrate_batched_prompts(predictor, image): """Demonstrate batched prompt processing""" print("=== Batched Prompts ===") - + # Multiple bounding boxes (simulating object detector output) - input_boxes = np.array([ - [75, 275, 1725, 850], # Large area - [425, 600, 700, 875], # Wheel area - [1375, 550, 1650, 800], # Another wheel - [1240, 675, 1400, 750], # Small detail - ]) - + input_boxes = np.array( + [ + [75, 275, 1725, 850], # Large area + [425, 600, 700, 875], # Wheel area + [1375, 550, 1650, 800], # Another wheel + [1240, 675, 1400, 750], # Small detail + ] + ) + masks, scores, _ = predictor.predict( point_coords=None, point_labels=None, box=input_boxes, multimask_output=False, ) - + # Display all masks together plt.figure(figsize=(10, 10)) plt.imshow(image) @@ -266,42 +321,47 @@ def demonstrate_batched_prompts(predictor, image): show_mask(mask.squeeze(0), plt.gca(), random_color=True) for box in input_boxes: show_box(box, plt.gca()) - plt.axis('off') + plt.axis("off") plt.title("Batched Prompts - Multiple Objects") plt.show() - + return masks, scores + def demonstrate_batched_images(predictor): """Demonstrate processing multiple images with batched prompts""" print("=== Batched Images ===") - + # Load multiple images - image1 = Image.open('images/truck.jpg') + image1 = Image.open("images/truck.jpg") image1 = np.array(image1.convert("RGB")) - image1_boxes = np.array([ - [75, 275, 1725, 850], - [425, 600, 700, 875], - [1375, 550, 1650, 800], - ]) - - image2 = Image.open('images/groceries.jpg') + image1_boxes = np.array( + [ + [75, 275, 1725, 850], + [425, 600, 700, 875], + [1375, 550, 1650, 800], + ] + ) + + image2 = Image.open("images/groceries.jpg") image2 = np.array(image2.convert("RGB")) - image2_boxes = np.array([ - [450, 170, 520, 350], - [350, 190, 450, 350], - [500, 170, 580, 350], - ]) - + image2_boxes = np.array( + [ + [450, 170, 520, 350], + [350, 190, 450, 350], + [500, 170, 580, 350], + ] + ) + img_batch = [image1, image2] boxes_batch = [image1_boxes, image2_boxes] - + # Process batch predictor.set_image_batch(img_batch) masks_batch, scores_batch, _ = predictor.predict_batch( None, None, box_batch=boxes_batch, multimask_output=False ) - + # Display results for i, (image, boxes, masks) in enumerate(zip(img_batch, boxes_batch, masks_batch)): plt.figure(figsize=(10, 10)) @@ -310,37 +370,38 @@ def demonstrate_batched_images(predictor): show_mask(mask.squeeze(0), plt.gca(), random_color=True) for box in boxes: show_box(box, plt.gca()) - plt.axis('off') - plt.title(f"Image {i+1} - Batched Processing") + plt.axis("off") + plt.title(f"Image {i + 1} - Batched Processing") plt.show() + def main(): """Main function demonstrating SAM2 image segmentation capabilities""" - + # Setup device device = setup_device() - + # Load model sam2_checkpoint = "../checkpoints/sam2.1_hiera_large.pt" model_cfg = "configs/sam2.1/sam2.1_hiera_l.yaml" - + print("Loading SAM2 model...") predictor = load_and_setup_model(model_cfg, sam2_checkpoint, device) - + # Load and display sample image print("Loading sample image...") - image = Image.open('images/truck.jpg') + image = Image.open("images/truck.jpg") image = np.array(image.convert("RGB")) - + plt.figure(figsize=(10, 10)) plt.imshow(image) - plt.axis('on') + plt.axis("on") plt.title("Original Image") plt.show() - + # Set image for predictor predictor.set_image(image) - + # Run demonstrations logits = demonstrate_point_prompt(predictor, image) demonstrate_multiple_points(predictor, image, logits) @@ -348,8 +409,9 @@ def main(): demonstrate_combined_prompts(predictor, image) demonstrate_batched_prompts(predictor, image) demonstrate_batched_images(predictor) - + print("All demonstrations completed successfully!") + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/examples/Image Segmentation with SAM 2/requirements.txt b/examples/Image Segmentation with SAM 2/requirements.txt index f7354fb8e..13721a2ae 100644 --- a/examples/Image Segmentation with SAM 2/requirements.txt +++ b/examples/Image Segmentation with SAM 2/requirements.txt @@ -4,4 +4,4 @@ opencv-python>=4.5.0 matplotlib>=3.5.0 Pillow>=9.0.0 numpy>=1.21.0 -git+https://github.com/facebookresearch/sam2.git \ No newline at end of file +git+https://github.com/facebookresearch/sam2.git diff --git a/examples/Image Segmentation with SAM 2/setup.sh b/examples/Image Segmentation with SAM 2/setup.sh index 9a3b1b4eb..49d7b0cf3 100644 --- a/examples/Image Segmentation with SAM 2/setup.sh +++ b/examples/Image Segmentation with SAM 2/setup.sh @@ -33,4 +33,4 @@ fi echo "" echo "Setup completed successfully!" -echo "You can now run: python inference_example.py" \ No newline at end of file +echo "You can now run: python inference_example.py" From 88e5a5c2a33bc15228150f1a39140b8dd1a537ad Mon Sep 17 00:00:00 2001 From: "NANDA GOPAL.D" Date: Mon, 6 Oct 2025 20:27:57 +0530 Subject: [PATCH 4/6] Update README.md --- examples/Image Segmentation with SAM 2/README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/Image Segmentation with SAM 2/README.md b/examples/Image Segmentation with SAM 2/README.md index efb4c6f5c..44813742f 100644 --- a/examples/Image Segmentation with SAM 2/README.md +++ b/examples/Image Segmentation with SAM 2/README.md @@ -1,13 +1,13 @@ # Text-Driven Image Segmentation with SAM 2 -This example demonstrates **text-prompted image segmentation** using the **Segment Anything Model 2 (SAM 2)**. -You can specify an object in the image via a **text description**, and the model automatically segments that region. +This example demonstrates **image segmentation** using the **Segment Anything Model 2**. +You can specify an object in the image via a **Points or Boxes*, and the model automatically segments that region. --- ## 🧠 Overview -Text-driven segmentation allows you to extract a specific object or region from an image by providing a natural language prompt. +Points or Boxes segmentation allows you to extract a specific object or region from an image by providing a natural language prompt. This implementation integrates SAM 2 with a grounding model (like GroundingDINO/GLIP) to link text to image regions. @@ -47,6 +47,7 @@ Upload your segmented image result below: ``` download +download (1) --- From 1addde05eda9bcbc7e2e34da37c0ffdeee32479a Mon Sep 17 00:00:00 2001 From: "NANDA GOPAL.D" Date: Mon, 6 Oct 2025 20:43:54 +0530 Subject: [PATCH 5/6] Update inference_example.py --- .../inference_example.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/Image Segmentation with SAM 2/inference_example.py b/examples/Image Segmentation with SAM 2/inference_example.py index b89607cc6..663431645 100644 --- a/examples/Image Segmentation with SAM 2/inference_example.py +++ b/examples/Image Segmentation with SAM 2/inference_example.py @@ -1,7 +1,7 @@ """ Text-Driven Image Segmentation with Grounded SAM2 -This module demonstrates image segmentation using Meta's Segment Anything Model 2 (SAM 2) +This module demonstrates image segmentation using Meta's Segment Anything Model 2 combined with text prompts for automatic object detection and segmentation. """ @@ -218,10 +218,10 @@ def demonstrate_point_prompt(predictor, image): borders=True, ) - return logits + return masks, scores, logits -def demonstrate_multiple_points(predictor, image, previous_logits): +def demonstrate_multiple_points(predictor, image, previous_masks, previous_scores, previous_logits): """Demonstrate segmentation using multiple point prompts""" print("=== Multiple Points Prompt ===") @@ -231,7 +231,7 @@ def demonstrate_multiple_points(predictor, image, previous_logits): # Use best mask from previous prediction mask_input = ( - previous_logits[np.argmax(scores), :, :] + previous_logits[np.argmax(previous_scores), :, :] if previous_logits is not None else None ) @@ -403,8 +403,8 @@ def main(): predictor.set_image(image) # Run demonstrations - logits = demonstrate_point_prompt(predictor, image) - demonstrate_multiple_points(predictor, image, logits) + masks, scores, logits = demonstrate_point_prompt(predictor, image) + demonstrate_multiple_points(predictor, image, masks, scores, logits) demonstrate_box_prompt(predictor, image) demonstrate_combined_prompts(predictor, image) demonstrate_batched_prompts(predictor, image) From e4564e2d7cecb0f100d376512f644cbc0737266c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 6 Oct 2025 15:14:12 +0000 Subject: [PATCH 6/6] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto=20?= =?UTF-8?q?format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/Image Segmentation with SAM 2/inference_example.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/Image Segmentation with SAM 2/inference_example.py b/examples/Image Segmentation with SAM 2/inference_example.py index 663431645..5d2183a4f 100644 --- a/examples/Image Segmentation with SAM 2/inference_example.py +++ b/examples/Image Segmentation with SAM 2/inference_example.py @@ -221,7 +221,9 @@ def demonstrate_point_prompt(predictor, image): return masks, scores, logits -def demonstrate_multiple_points(predictor, image, previous_masks, previous_scores, previous_logits): +def demonstrate_multiple_points( + predictor, image, previous_masks, previous_scores, previous_logits +): """Demonstrate segmentation using multiple point prompts""" print("=== Multiple Points Prompt ===")