forked from paris-saclay-cds/ramp-workflow
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathobject_detector.py
165 lines (130 loc) · 5.66 KB
/
object_detector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
from __future__ import division
import imp
import numpy as np
class ObjectDetector(object):
"""
Object detection workflow.
This workflow is used to train image object detection tasks, typically
when the dataset cannot be stored in memory.
Submissions need to contain two files, which by default are named:
image_preprocessor.py and object_detector_model.py (they can be
modified by changing `workflow_element_names`).
image_preprocessor.py needs a `tranform` function, which
is used for preprocessing the images. It takes an image as input
and it returns an image as an output. Optionally, image_preprocessor.py
can also have a function `transform_test`, which is used only to preprocess
images at test time. Otherwise, if `transform_test` does not exist,
`transform` is used at train and test time.
object_detector_model.py needs a `ObjectDetector` class, which
implements `fit` and `predict`.
Parameters
==========
test_batch_size : int
batch size used for testing.
chunk_size : int
size of the chunk used to load data from disk into memory.
(see at the top of the file what a chunk is and its difference
with the mini-batch size of neural nets).
n_jobs : int
the number of jobs used to load images from disk to memory as `chunks`.
"""
def __init__(self, workflow_element_names=['object_detector']):
self.element_names = workflow_element_names
def train_submission(self, module_path, X, y, train_is=None):
"""Train a ObjectDetector.
module_path : str
module where the submission is. the folder of the module
have to contain object_detector.py.
X : ArrayContainer vector of int
vector of image data to train on
y : vector of lists
vector of object labels corresponding to X
train_is : vector of int
indices from X_array to train on
"""
if train_is is None:
train_is = slice(None, None, None)
# object detector model
submitted_model_file = '{}/{}.py'.format(
module_path, self.element_names[0])
detector = imp.load_source(
self.element_names[0], submitted_model_file)
clf = detector.ObjectDetector()
# train and return fitted model
clf.fit(X[train_is], y[train_is])
return clf
def test_submission(self, trained_model, X):
"""Test an ObjectDetector.
trained_model
Trained model returned by `train_submission`.
X : ArrayContainer of int
Vector of image data to test on.
"""
clf = trained_model
y_pred = clf.predict(X)
return y_pred
class BatchGeneratorBuilder(object):
"""A batch generator builder for generating batches of images on the fly.
This class is a way to build training and
validation generators that yield each time a tuple (X, y) of mini-batches.
The generators are built in a way to fit into keras API of `fit_generator`
(see https://keras.io/models/model/).
The fit function from `Classifier` should then use the instance
to build train and validation generators, using the method
`get_train_valid_generators`
Parameters
==========
X_array : ArrayContainer of int
vector of image data to train on
y_array : vector of int
vector of object labels corresponding to `X_array`
"""
def __init__(self, X_array, y_array):
self.X_array = X_array
self.y_array = y_array
self.nb_examples = len(X_array)
def get_train_valid_generators(self, batch_size=256, valid_ratio=0.1):
"""Build train and valid generators for keras.
This method is used by the user defined `Classifier` to o build train
and valid generators that will be used in keras `fit_generator`.
Parameters
==========
batch_size : int
size of mini-batches
valid_ratio : float between 0 and 1
ratio of validation data
Returns
=======
a 4-tuple (gen_train, gen_valid, nb_train, nb_valid) where:
- gen_train is a generator function for training data
- gen_valid is a generator function for valid data
- nb_train is the number of training examples
- nb_valid is the number of validation examples
The number of training and validation data are necessary
so that we can use the keras method `fit_generator`.
"""
nb_valid = int(valid_ratio * self.nb_examples)
nb_train = self.nb_examples - nb_valid
indices = np.arange(self.nb_examples)
train_indices = indices[0:nb_train]
valid_indices = indices[nb_train:]
gen_train = self._get_generator(
indices=train_indices, batch_size=batch_size)
gen_valid = self._get_generator(
indices=valid_indices, batch_size=batch_size)
return gen_train, gen_valid, nb_train, nb_valid
def _get_generator(self, indices=None, batch_size=256):
if indices is None:
indices = np.arange(self.nb_examples)
# Infinite loop, as required by keras `fit_generator`.
# However, as we provide the number of examples per epoch
# and the user specifies the total number of epochs, it will
# be able to end.
while True:
X = self.X_array[indices]
y = self.y_array[indices]
# converting to float needed?
# X = np.array(X, dtype='float32')
# Yielding mini-batches
for i in range(0, len(X), batch_size):
yield X[i:i + batch_size], y[i:i + batch_size]