-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconvert_data_to_coco.py
109 lines (93 loc) · 3.62 KB
/
convert_data_to_coco.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author: @rmnoa
"""
import os
from pathlib import Path
import json
import numpy as np
from datetime import datetime
import imageio
import nrrd
import sys
# Change these
data_dir = sys.argv[1] # Directory containing mix of images and jsons
output_dir = sys.argv[2] # Should not exist before running
output_json_file = sys.argv[2] + '.json'
classes = ["RA1", "RA2", "RA3", "RB4", "RC5", "RC6"] # Labels
boxes_dir_name = 'BOXES' # Subdirectory of BOXES
width, height = 514, 660 # Dimensions of input images, after rotation to fix
# Check output_dir exist or not
if os.path.isdir(output_dir):
output_dir_exists = True
else:
output_dir_exists = False
os.mkdir(output_dir)
boxes_jsons = {}
image_ids = []
# Extract filenames from directory
for pathname, dirnames, filenames in os.walk(data_dir):
if boxes_dir_name in pathname:
image_id = Path(pathname).parents[0].name
image_ids.append(image_id)
boxes_jsons[image_id] = filenames
output_json = {}
# Prepare the JSON
# Categories
output_json['categories'] = [ { '1': 'RA1' }, { '2': 'RA2' }, { '3': 'RA3' },
{ '1': 'RA4' }, { '1': 'RA5' }, { '1': 'RA6' } ]
# Images and annotations list in JSON
images = []
annotations = []
for image_id in image_ids:
for boxes_json in boxes_jsons[image_id]:
# Get label from filename
class_id = boxes_json.split('_')[0].split('.')[0]
category_id = classes.index(class_id) + 1 # Models accept ids > 0
# Parse JSON
with open(os.path.join(data_dir, image_id, boxes_dir_name, boxes_json)) as f:
boxes = json.load(f)
# Calculate bounding box
# 0.06 is for mm to pixel conversion
center = [i/0.06 for i in boxes['markups'][0]['center']][0:2]
sizes = [i/0.06 for i in boxes['markups'][0]['size']][0:2]
x1, x2, y1, y2 = [ (center[0] - sizes[0] / 2),
(center[0] + sizes[0] / 2),
(center[1] - sizes[1] / 2),
(center[1] + sizes[1] / 2 ) ]
bbox = x1, y1, x2, y2
annotation = { 'image_id': image_id,
'category_id': category_id,
'bbox': bbox
}
annotations.append(annotation)
# Convert NRRD to JPG and move to output dir
image_name = os.path.join(data_dir, image_id, image_id + '.nrrd')
image_name_new = os.path.join(output_dir, image_id + '.jpg')
# To prevent writing images on every run
if not output_dir_exists:
pixels = nrrd.read(image_name)[0]
# Remove extra dim
pixels = np.squeeze(pixels, axis=2)
# Normalize to 0 - 255 range
pixels = (pixels - pixels.min()) / (pixels.max() - pixels.min()) * 255
# Prepare for writing to JPEG (unit8 data type required)
# Swapaxes is due to the images being rotated originally
pixels = pixels.astype(np.uint8).swapaxes(-1, -2)
# Output to file
imageio.imwrite(image_name_new, pixels)
date_captured = str(datetime.fromtimestamp(os.stat(image_name).st_mtime))
# images JSON list
images.append({ 'id': image_id,
'width': width,
'height': height,
'file_name': image_name_new,
'date_captured': date_captured
})
# Add to JSON
output_json['annotation'] = annotations
output_json['images'] = images
# Write to JSON file
with open(output_json_file, 'w') as f:
json.dump(output_json, f)