Made randomness deterministic via seeds

YangFei1990 · May 22, 2019 · 91780c9 · 91780c9
1 parent 8e27b61
commit 91780c9
Show file tree

Hide file tree

Showing 14 changed files with 127 additions and 100 deletions.
diff --git a/MaskRCNN/config.py b/MaskRCNN/config.py
@@ -129,6 +129,7 @@ def __ne__(self, _):
 _C.TRAIN.LR_EPOCH_SCHEDULE = [(8, 0.1), (10, 0.01), (12, None)] # "1x" schedule in detectron
 _C.TRAIN.EVAL_PERIOD = 25  # period (epochs) to run evaluation
 _C.TRAIN.BATCH_SIZE_PER_GPU = 1
+_C.TRAIN.SEED = 1234
 
 # preprocessing --------------------
 # Alternative old (worse & faster) setting: 600
@@ -139,7 +140,7 @@ def __ne__(self, _):
 # Un-scaled version: [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
 _C.PREPROC.PIXEL_MEAN = [123.675, 116.28, 103.53]
 _C.PREPROC.PIXEL_STD = [58.395, 57.12, 57.375]
-_C.PREPROC.PREDEFINED_PADDING = True
+_C.PREPROC.PREDEFINED_PADDING = False
 _C.PREPROC.PADDING_SHAPES = [(800, 1000), (800, 1200), (800, 1350)]    # only add landscape shapes in decreasing h/w aspect ratio order - the corresponding portrait shape will be automatically created
 
 # anchors -------------------------
@@ -169,7 +170,7 @@ def __ne__(self, _):
 _C.RPN.TEST_POST_NMS_TOPK = 1000   # if you encounter OOM in inference, set this to a smaller number
 # for FPN, #proposals per-level and #proposals after merging are (for now) the same
 # if FPN.PROPOSAL_MODE = 'Joint', these options have no effect
-_C.RPN.TRAIN_PER_LEVEL_NMS_TOPK = 2000 
+_C.RPN.TRAIN_PER_LEVEL_NMS_TOPK = 2000
 _C.RPN.TEST_PER_LEVEL_NMS_TOPK = 1000
 _C.RPN.TOPK_PER_IMAGE = True
 

diff --git a/MaskRCNN/data.py b/MaskRCNN/data.py
@@ -29,7 +29,7 @@ def _get_padding_shape(aspect_ratio):
     for shape in cfg.PREPROC.PADDING_SHAPES:
         if aspect_ratio >= float(shape[0])/float(shape[1]):
             return shape
-        
+
     return cfg.PREPROC.PADDING_SHAPES[-1]
 
 def get_padding_shape(h, w):
@@ -52,21 +52,21 @@ def get_next_roidb(roidbs, i, shp, taken):
         return None
 
     for k in range(i+1, len(roidbs)):
-        if get_padding_shape(roidbs[k]['height'], roidbs[k]['width']) == shp and not taken[k]: 
-            return k 
+        if get_padding_shape(roidbs[k]['height'], roidbs[k]['width']) == shp and not taken[k]:
+            return k
         if k - i > 40:    # don't try too hard
             break
 
     # at least try to get one dimension the same
     for k in range(i, len(roidbs)):
         padding_shape = get_padding_shape(roidbs[k]['height'], roidbs[k]['width'])
-        if (padding_shape[0] == shp[0] or padding_shape[1] == shp[1]) and not taken[k]: 
-            return k 
+        if (padding_shape[0] == shp[0] or padding_shape[1] == shp[1]) and not taken[k]:
+            return k
         if k - i > 40:    # don't try too hard
             break
 
     for k in range(i, len(roidbs)):
-        if not taken[k]: 
+        if not taken[k]:
             return k
 
     return None
@@ -157,7 +157,7 @@ def get_all_anchors(stride=None, sizes=None, tile=True):
         shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose()
         # Kx4, K = field_size * field_size
         K = shifts.shape[0]
-    
+
         A = cell_anchors.shape[0]
         field_of_anchors = (
             cell_anchors.reshape((1, A, 4)) +
@@ -172,7 +172,7 @@ def get_all_anchors(stride=None, sizes=None, tile=True):
     else:
         cell_anchors = cell_anchors.astype('float32')
         cell_anchors[:, [2, 3]] += 1
-        return cell_anchors 
+        return cell_anchors
 
 
 
@@ -210,6 +210,7 @@ def get_anchor_labels(anchors, gt_boxes, crowd_boxes):
     def filter_box_label(labels, value, max_num):
         curr_inds = np.where(labels == value)[0]
         if len(curr_inds) > max_num:
+            np.random.seed(cfg.TRAIN.SEED)
             disable_inds = np.random.choice(
                 curr_inds, size=(len(curr_inds) - max_num),
                 replace=False)
@@ -445,6 +446,7 @@ def preprocess(roidb):
         return ret
 
     if cfg.TRAINER == 'horovod':
+        #ds = MapData(ds, preprocess)
         ds = MultiThreadMapData(ds, 5, preprocess)
         # MPI does not like fork()
     else:
@@ -482,19 +484,21 @@ def get_batch_train_dataflow(batch_size):
     logger.info("Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}".format(
         num - len(roidbs), len(roidbs)))
 
+    roidbs = sorted(roidbs, key=lambda x: float(x['width']) / float(x['height']), reverse=True)     # will shuffle it later at every rank
+
     print("Batching roidbs")
     batched_roidbs = []
 
     if cfg.PREPROC.PREDEFINED_PADDING:
         taken = [False for _ in roidbs]
         done = False
-    
-        for i, d in enumerate(roidbs): 
+
+        for i, d in enumerate(roidbs):
             batch = []
             if not taken[i]:
                 batch.append(d)
-                padding_shape = get_padding_shape(d['height'], d['width']) 
-                while len(batch) < batch_size: 
+                padding_shape = get_padding_shape(d['height'], d['width'])
+                while len(batch) < batch_size:
                     k = get_next_roidb(roidbs, i, padding_shape, taken)
                     if k == None:
                         done = True
@@ -523,9 +527,6 @@ def get_batch_train_dataflow(batch_size):
     #   - TODO: Fix lack of batch contents shuffling
 
 
-
-
-
     aug = imgaug.AugmentorList(
          [CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
           imgaug.Flip(horiz=True)])
@@ -536,7 +537,6 @@ def preprocess(roidb_batch):
         datapoint_list = []
         for roidb in roidb_batch:
             fname, boxes, klass, is_crowd = roidb['file_name'], roidb['boxes'], roidb['class'], roidb['is_crowd']
-            # print(fname)
             boxes = np.copy(boxes)
             im = cv2.imread(fname, cv2.IMREAD_COLOR)
             assert im is not None, fname
@@ -637,14 +637,13 @@ def preprocess(roidb_batch):
 
         if cfg.PREPROC.PREDEFINED_PADDING:
             padding_shapes = [get_padding_shape(*(d["images"].shape[:2])) for d in datapoint_list]
-            max_height = max([shp[0] for shp in padding_shapes])       
-            max_width = max([shp[1] for shp in padding_shapes])       
-
+            max_height = max([shp[0] for shp in padding_shapes])
+            max_width = max([shp[1] for shp in padding_shapes])
         else:
             image_dims = [d["images"].shape for d in datapoint_list]
             heights = [dim[0] for dim in image_dims]
             widths = [dim[1] for dim in image_dims]
-    
+
             max_height = max(heights)
             max_width = max(widths)
 
@@ -814,13 +813,13 @@ def resize_images(inputs):
         org_shapes = [inp[0].shape for inp in inputs]
         scales = [np.sqrt(rimg.shape[0] * 1.0 / org_shape[0] * rimg.shape[1] / org_shape[1]) for rimg, org_shape in zip(resized_imgs, org_shapes)]
 
-        return [[resized_imgs[i], inp[1], scales[i], org_shapes[i][:2]] for i, inp in enumerate(inputs)] 
+        return [[resized_imgs[i], inp[1], scales[i], org_shapes[i][:2]] for i, inp in enumerate(inputs)]
 
     def pad_and_batch(inputs):
         heights, widths, _ = zip(*[inp[0].shape for inp in inputs])
         max_h, max_w = max(heights), max(widths)
         padded_images = np.stack([np.pad(inp[0], [[0, max_h-inp[0].shape[0]], [0, max_w-inp[0].shape[1]], [0,0]], 'constant') for inp in inputs])
-        return [padded_images, [inp[1] for inp in inputs], list(zip(heights, widths)), [inp[2] for inp in inputs], [inp[3] for inp in inputs]] 
+        return [padded_images, [inp[1] for inp in inputs], list(zip(heights, widths)), [inp[2] for inp in inputs], [inp[3] for inp in inputs]]
 
     ds = MapData(ds, decode_images)
     ds = MapData(ds, resize_images)
@@ -836,5 +835,5 @@ def pad_and_batch(inputs):
     ds.reset_state()
     cnt = 0
     for k in ds:
-        print(k) 
+        print(k)
         cnt += 1
diff --git a/MaskRCNN/model/backbone.py b/MaskRCNN/model/backbone.py
@@ -13,7 +13,6 @@
 from config import config as cfg
 from utils.mixed_precision import mixed_precision_scope
 
-
 @layer_register(log_shape=True)
 def GroupNorm(x, group=32, gamma_initializer=tf.constant_initializer(1.)):
     shape = x.get_shape().as_list()
@@ -127,51 +126,51 @@ def norm(x):
     #return lambda x: Norm(layer_name, x, gamma_initializer=tf.zeros_initializer() if zero_init else None)
 
 
-def resnet_shortcut(l, n_out, stride, activation=tf.identity):
+def resnet_shortcut(l, n_out, stride, seed_gen, activation=tf.identity):
     n_in = l.shape[1]
     if n_in != n_out:   # change dimension when channel is not the same
         # TF's SAME mode output ceil(x/stride), which is NOT what we want when x is odd and stride is 2
         # In FPN mode, the images are pre-padded already.
         if not cfg.MODE_FPN and stride == 2:
             l = l[:, :, :-1, :-1]
         return Conv2D('convshortcut', l, n_out, 1,
-                      strides=stride, activation=activation)
+                      strides=stride, activation=activation, seed=seed_gen.next())
     else:
         return l
 
 
-def resnet_bottleneck(l, ch_out, stride):
+def resnet_bottleneck(l, ch_out, stride, seed_gen):
     shortcut = l
     if cfg.BACKBONE.STRIDE_1X1:
         if stride == 2:
             l = l[:, :, :-1, :-1]
-        l = Conv2D('conv1', l, ch_out, 1, strides=stride)
-        l = Conv2D('conv2', l, ch_out, 3, strides=1)
+        l = Conv2D('conv1', l, ch_out, 1, strides=stride, seed=seed_gen.next())
+        l = Conv2D('conv2', l, ch_out, 3, strides=1, seed=seed_gen.next())
     else:
-        l = Conv2D('conv1', l, ch_out, 1, strides=1)
+        l = Conv2D('conv1', l, ch_out, 1, strides=1, seed=seed_gen.next())
         if stride == 2:
             l = tf.pad(l, [[0, 0], [0, 0], maybe_reverse_pad(0, 1), maybe_reverse_pad(0, 1)])
-            l = Conv2D('conv2', l, ch_out, 3, strides=2, padding='VALID')
+            l = Conv2D('conv2', l, ch_out, 3, strides=2, padding='VALID', seed=seed_gen.next())
         else:
-            l = Conv2D('conv2', l, ch_out, 3, strides=stride)
+            l = Conv2D('conv2', l, ch_out, 3, strides=stride, seed=seed_gen.next())
     if cfg.BACKBONE.NORM != 'None':
-        l = Conv2D('conv3', l, ch_out * 4, 1, activation=get_norm(zero_init=True))
+        l = Conv2D('conv3', l, ch_out * 4, 1, activation=get_norm(zero_init=True), seed=seed_gen.next())
     else:
         l = Conv2D('conv3', l, ch_out * 4, 1, activation=tf.identity,
-                   kernel_initializer=tf.constant_initializer())
-    ret = l + resnet_shortcut(shortcut, ch_out * 4, stride, activation=get_norm(zero_init=False))
+                   kernel_initializer=tf.constant_initializer(), seed=seed_gen.next())
+    ret = l + resnet_shortcut(shortcut, ch_out * 4, stride, seed_gen=seed_gen, activation=get_norm(zero_init=False))
     return tf.nn.relu(ret, name='output')
 
 
-def resnet_group(name, l, block_func, features, count, stride):
+def resnet_group(name, l, block_func, features, count, stride, seed_gen):
     with tf.variable_scope(name):
         for i in range(0, count):
             with tf.variable_scope('block{}'.format(i)):
-                l = block_func(l, features, stride if i == 0 else 1)
+                l = block_func(l, features, stride if i == 0 else 1, seed_gen)
     return l
 
 
-def resnet_c4_backbone(image, num_blocks):
+def resnet_c4_backbone(image, num_blocks, seed_gen):
     assert len(num_blocks) == 3
     freeze_at = cfg.BACKBONE.FREEZE_AT
     with backbone_scope(freeze=freeze_at > 0):
@@ -181,10 +180,10 @@ def resnet_c4_backbone(image, num_blocks):
         l = MaxPooling('pool0', l, 3, strides=2, padding='VALID')
 
     with backbone_scope(freeze=freeze_at > 1):
-        c2 = resnet_group('group0', l, resnet_bottleneck, 64, num_blocks[0], 1)
+        c2 = resnet_group('group0', l, resnet_bottleneck, 64, num_blocks[0], 1, seed_gen=seed_gen)
     with backbone_scope(freeze=False):
-        c3 = resnet_group('group1', c2, resnet_bottleneck, 128, num_blocks[1], 2)
-        c4 = resnet_group('group2', c3, resnet_bottleneck, 256, num_blocks[2], 2)
+        c3 = resnet_group('group1', c2, resnet_bottleneck, 128, num_blocks[1], 2, seed_gen=seed_gen)
+        c4 = resnet_group('group2', c3, resnet_bottleneck, 256, num_blocks[2], 2, seed_gen=seed_gen)
     # 16x downsampling up to now
     return c4
 
@@ -196,7 +195,7 @@ def resnet_conv5(image, num_block):
         return l
 
 
-def resnet_fpn_backbone(image, num_blocks, fp16=False):
+def resnet_fpn_backbone(image, num_blocks, seed_gen, fp16=False):
     freeze_at = cfg.BACKBONE.FREEZE_AT
     shape2d = tf.shape(image)[2:]
     mult = float(cfg.FPN.RESOLUTION_REQUIREMENT)
@@ -217,15 +216,15 @@ def resnet_fpn_backbone(image, num_blocks, fp16=False):
                 [pad_base[0], pad_base[1] + pad_shape2d[0]],
                 [pad_base[0], pad_base[1] + pad_shape2d[1]]]))
             l.set_shape([None, chan, None, None])
-            l = Conv2D('conv0', l, 64, 7, strides=2, padding='VALID')
+            l = Conv2D('conv0', l, 64, 7, strides=2, padding='VALID', seed=seed_gen.next())
             l = tf.pad(l, [[0, 0], [0, 0], maybe_reverse_pad(0, 1), maybe_reverse_pad(0, 1)])
             l = MaxPooling('pool0', l, 3, strides=2, padding='VALID')
         with backbone_scope(freeze=freeze_at > 1):
-            c2 = resnet_group('group0', l, resnet_bottleneck, 64, num_blocks[0], 1)
+            c2 = resnet_group('group0', l, resnet_bottleneck, 64, num_blocks[0], 1, seed_gen=seed_gen)
         with backbone_scope(freeze=False):
-            c3 = resnet_group('group1', c2, resnet_bottleneck, 128, num_blocks[1], 2)
-            c4 = resnet_group('group2', c3, resnet_bottleneck, 256, num_blocks[2], 2)
-            c5 = resnet_group('group3', c4, resnet_bottleneck, 512, num_blocks[3], 2)
+            c3 = resnet_group('group1', c2, resnet_bottleneck, 128, num_blocks[1], 2, seed_gen=seed_gen)
+            c4 = resnet_group('group2', c3, resnet_bottleneck, 256, num_blocks[2], 2, seed_gen=seed_gen)
+            c5 = resnet_group('group3', c4, resnet_bottleneck, 512, num_blocks[3], 2, seed_gen=seed_gen)
 
     # 32x downsampling up to now
     # size of c5: ceil(input/32)

diff --git a/MaskRCNN/model/biased_sampler.py b/MaskRCNN/model/biased_sampler.py
@@ -7,7 +7,7 @@
 
 
 @under_name_scope()
-def sample_fast_rcnn_targets(boxes, gt_boxes, gt_labels, orig_gt_counts, batch_size):
+def sample_fast_rcnn_targets(boxes, gt_boxes, gt_labels, orig_gt_counts, batch_size, seed_gen):
     """
     Sample some boxes from all proposals for training.
     #fg(foreground) is guaranteed to be > 0, because ground truth boxes will be added as proposals.
@@ -65,14 +65,14 @@ def sample_fg_bg(iou):
         num_fg = tf.minimum(int(
             cfg.FRCNN.BATCH_PER_IM * cfg.FRCNN.FG_RATIO),
             tf.size(fg_inds))
-        fg_inds = tf.random_shuffle(fg_inds)[:num_fg]
+        fg_inds = tf.random_shuffle(fg_inds, seed=seed_gen.next('sampler'))[:num_fg]
         # fg_inds = fg_inds[:num_fg]
 
         bg_inds = tf.reshape(tf.where(tf.logical_not(fg_mask)), [-1])
         num_bg = tf.minimum(
             cfg.FRCNN.BATCH_PER_IM - num_fg,
             tf.size(bg_inds))
-        bg_inds = tf.random_shuffle(bg_inds)[:num_bg]
+        bg_inds = tf.random_shuffle(bg_inds, seed=seed_gen.next('sampler'))[:num_bg]
         # bg_inds = bg_inds[:num_bg]
 
 

diff --git a/MaskRCNN/model/boxclass_head.py b/MaskRCNN/model/boxclass_head.py
@@ -17,7 +17,7 @@
 
 
 @layer_register(log_shape=True)
-def boxclass_outputs(feature, num_classes, class_agnostic_regression=False):
+def boxclass_outputs(feature, num_classes, seed_gen, class_agnostic_regression=False):
     """
     Args:
         feature (any shape):
@@ -30,11 +30,11 @@ def boxclass_outputs(feature, num_classes, class_agnostic_regression=False):
     """
     classification = FullyConnected(
         'class', feature, num_classes,
-        kernel_initializer=tf.random_normal_initializer(stddev=0.01))
+        kernel_initializer=tf.random_normal_initializer(stddev=0.01, seed=seed_gen.next()))
     num_classes_for_box = 1 if class_agnostic_regression else num_classes
     box_regression = FullyConnected(
         'box', feature, num_classes_for_box * 4,
-        kernel_initializer=tf.random_normal_initializer(stddev=0.001))
+        kernel_initializer=tf.random_normal_initializer(stddev=0.001, seed=seed_gen.next()))
     box_regression = tf.reshape(box_regression, [-1, num_classes_for_box, 4], name='output_box')
     return classification, box_regression
 
@@ -172,7 +172,7 @@ def f(X):
 
 
 @layer_register(log_shape=True)
-def boxclass_2fc_head(feature, fp16=False):
+def boxclass_2fc_head(feature, seed_gen, fp16=False):
     """
     Fully connected layer for the class and box branch
 
@@ -187,7 +187,7 @@ def boxclass_2fc_head(feature, fp16=False):
         feature = tf.cast(feature, tf.float16)
 
     with mixed_precision_scope(mixed=fp16):
-        init = tf.variance_scaling_initializer(dtype=tf.float16 if fp16 else tf.float32)
+        init = tf.variance_scaling_initializer(dtype=tf.float16 if fp16 else tf.float32, seed=seed_gen.next())
         hidden = FullyConnected('fc6', feature, dim, kernel_initializer=init, activation=tf.nn.relu)
         hidden = FullyConnected('fc7', hidden, dim, kernel_initializer=init, activation=tf.nn.relu)
 
@@ -283,7 +283,6 @@ def add_training_info(self,
         self.training_info_available = True
 
 
-
     @memoized_method
     def losses(self, batch_size_per_gpu, shortcut=False):