Skip to content

Fix loading dataset #35

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 43 additions & 55 deletions datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ def __init__(
use_train_aug=False,
train=False,
no_mosaic=False,
square_training=False
square_training=False,
discard_negative_example = True
):
self.transforms = transforms
self.use_train_aug = use_train_aug
Expand All @@ -41,59 +42,42 @@ def __init__(
self.square_training = square_training
self.mosaic_border = [-img_size // 2, -img_size // 2]
self.image_file_types = ['*.jpg', '*.jpeg', '*.png', '*.ppm', '*.JPG']
self.all_image_paths = []

# get all the image paths in sorted order
for file_type in self.image_file_types:
self.all_image_paths.extend(glob.glob(os.path.join(self.images_path, file_type)))
self.all_images = []

self.all_annot_paths = glob.glob(os.path.join(self.labels_path, '*.xml'))
self.all_images = [image_path.split(os.path.sep)[-1] for image_path in self.all_image_paths]
self.all_images = sorted(self.all_images)
# Remove all annotations and images when no object is present.
self.read_and_clean()

def read_and_clean(self):
self.read_and_clean(discard_negative_example)

def read_and_clean(self, discard_negative_example):
# Discard any images and labels when the XML
# file does not contain any object.
for annot_path in self.all_annot_paths:
tree = et.parse(annot_path)
# file does not contain any object, if negative example are not given
def check_path_and_save_image(path):
tree = et.parse(path)
root = tree.getroot()
object_present = False
for member in root.findall('object'):
if member.find('bndbox'):
object_present = True
if object_present == False:
image_name = annot_path.split(os.path.sep)[-1].split('.xml')[0]
image_root = self.all_image_paths[0].split(os.path.sep)[:-1]
# remove_image = f"{'/'.join(image_root)}/{image_name}.jpg"

# TODO Is this code necessary?
# for img_type in self.image_file_types:
# remove_image = os.path.join(os.sep.join(image_root), image_name+img_type.replace("*",""))
# if remove_image in self.all_image_paths:
# print(f"Removing {annot_path} and corresponding {remove_image}")
# self.all_annot_paths.remove(annot_path)
# self.all_image_paths.remove(remove_image)
# break

# Discard any image file when no annotation file
# is not found for the image.
for image_name in self.all_images:
possible_xml_name = os.path.join(self.labels_path, os.path.splitext(image_name)[0]+'.xml')
if possible_xml_name not in self.all_annot_paths:
print(f"{possible_xml_name} not found...")
print(f"Removing {image_name} image")
# items = [item for item in items if item != element]
self.all_images = [image_instance for image_instance in self.all_images if image_instance != image_name]
# self.all_images.remove(image_name)

# for image_path in self.all_image_paths:
# image_name = image_path.split(os.path.sep)[-1].split('.jpg')[0]
# possible_xml_name = f"{self.labels_path}/{image_name.split('.jpg')[0]}.xml"
# if possible_xml_name not in self.all_annot_paths:
# print(f"{possible_xml_name} not found...")
# print(f"Removing {image_name} image")
# self.all_image_paths.remove(image_path)
image_name = root.findtext("filename")
image_path = os.path.join(self.images_path, image_name)
discard_path = False
if not os.path.exists(image_path):
print(f"Image {image_path} associated to {path} not found...")
print(f"Discarding {path}...")
discard_path = True

if not discard_path and discard_negative_example:
object_present = False
for member in root.findall('object'):
if member.find('bndbox'):
object_present = True
break
if not object_present:
print(f"File {path} contains no object. Discarding xml file and image...")
discard_path = True
#If i don't discard xml than save the image
if not discard_path:
self.all_images.append(image_name)
return discard_path

self.all_annot_paths = list(filter(check_path_and_save_image, self.all_annot_paths ))


def resize(self, im, square=False):
if square:
Expand Down Expand Up @@ -159,7 +143,7 @@ def load_image_and_labels(self, index):
ymax_final = (ymax/image_height)*image_resized.shape[0]

boxes.append([xmin_final, ymin_final, xmax_final, ymax_final])

# Bounding box to tensor.
boxes_length = len(boxes)
boxes = torch.as_tensor(boxes, dtype=torch.float32)
Expand Down Expand Up @@ -320,7 +304,8 @@ def create_train_dataset(
classes,
use_train_aug=False,
no_mosaic=False,
square_training=False
square_training=False,
discard_negative=True
):
train_dataset = CustomDataset(
train_dir_images,
Expand All @@ -331,15 +316,17 @@ def create_train_dataset(
use_train_aug=use_train_aug,
train=True,
no_mosaic=no_mosaic,
square_training=square_training
square_training=square_training,
discard_negative_example=discard_negative
)
return train_dataset
def create_valid_dataset(
valid_dir_images,
valid_dir_labels,
img_size,
classes,
square_training=False
square_training=False,
discard_negative=True
):
valid_dataset = CustomDataset(
valid_dir_images,
Expand All @@ -349,7 +336,8 @@ def create_valid_dataset(
get_valid_transform(),
train=False,
no_mosaic=True,
square_training=square_training
square_training=square_training,
discard_negative_example=discard_negative
)
return valid_dataset

Expand Down
13 changes: 11 additions & 2 deletions eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@
default=torch.device('cuda:0' if torch.cuda.is_available() else 'cpu'),
help='computation/training device, default is GPU if GPU present'
)
parser.add_argument(
'-dn', '--discard-negative',
dest='discard_negative',
action='store_true',
help='pass this if you want to discard images with no objects'
)
parser.add_argument(
'-v', '--verbose',
action='store_true',
Expand Down Expand Up @@ -105,7 +111,9 @@
VALID_DIR_LABELS,
IMAGE_SIZE,
COCO_91_CLASSES,
square_training=args['square_training']
square_training=args['square_training'],
discard_negative=args["discard_negative"]

)

# Load weights.
Expand All @@ -118,7 +126,8 @@
VALID_DIR_LABELS,
IMAGE_SIZE,
CLASSES,
square_training=args['square_training']
square_training=args['square_training'],
discard_negative=args["discard_negative"]
)
model.to(DEVICE).eval()

Expand Down
12 changes: 10 additions & 2 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,12 @@ def parse_opt():
action='store_true',
help='pass this to not to use mosaic augmentation'
)
parser.add_argument(
'-dn', '--discard-negative',
dest='discard_negative',
action='store_true',
help='pass this if you want to discard images with no objects'
)
parser.add_argument(
'-uta', '--use-train-aug',
dest='use_train_aug',
Expand Down Expand Up @@ -227,14 +233,16 @@ def main(args):
CLASSES,
use_train_aug=args['use_train_aug'],
no_mosaic=args['no_mosaic'],
square_training=args['square_training']
square_training=args['square_training'],
discard_negative=args["discard_negative"]
)
valid_dataset = create_valid_dataset(
VALID_DIR_IMAGES,
VALID_DIR_LABELS,
IMAGE_SIZE,
CLASSES,
square_training=args['square_training']
square_training=args['square_training'],
discard_negative=args["discard_negative"]
)
print('Creating data loaders')
if args['distributed']:
Expand Down