diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0755657 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +./ade20k_model +./cityscapes_model +model.ckpt-* +checkpoint diff --git a/evaluate.py b/evaluate.py index 8c53b96..b26674c 100644 --- a/evaluate.py +++ b/evaluate.py @@ -8,8 +8,7 @@ import tensorflow as tf import numpy as np -from model import PSPNet -from tools import decode_labels +from model import PSPNet101 from image_reader import ImageReader IMG_MEAN = np.array((103.939, 116.779, 123.68), dtype=np.float32) @@ -80,13 +79,12 @@ def main(): image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0) # Add one batch dimension. # Create network. - net = PSPNet({'data': image_batch}, is_training=False, num_classes=num_classes) + net = PSPNet101({'data': image_batch}, is_training=False, num_classes=num_classes) with tf.variable_scope('', reuse=True): flipped_img = tf.image.flip_left_right(image) flipped_img = tf.expand_dims(flipped_img, dim=0) - net2 = PSPNet({'data': flipped_img}, is_training=False, num_classes=num_classes) - + net2 = PSPNet101({'data': flipped_img}, is_training=False, num_classes=num_classes) # Which variables to load. restore_var = tf.global_variables() @@ -142,9 +140,8 @@ def main(): if step % 10 == 0: print('Finish {0}/{1}'.format(step, num_steps)) - print('step {0} mIoU: {1}'.format(step, sess.run(mIoU))) - print('step {0} mIoU: {1}'.format(step, sess.run(mIoU))) + print('mIoU: {1}'.format(step, sess.run(mIoU))) coord.request_stop() coord.join(threads) diff --git a/inference.py b/inference.py index d79b7d6..5eb2871 100644 --- a/inference.py +++ b/inference.py @@ -4,31 +4,36 @@ import os import sys import time -from PIL import Image import tensorflow as tf import numpy as np +from scipy import misc -from model import PSPNet -from tools import decode_labels +from model import PSPNet101, PSPNet50 +from tools import * -IMG_MEAN = np.array((103.939, 116.779, 123.68), dtype=np.float32) -input_size = [1024, 2048] -num_classes = 19 +ADE20k_param = {'crop_size': [473, 473], + 'num_classes': 150, + 'model': PSPNet50} +cityscapes_param = {'crop_size': [720, 720], + 'num_classes': 19, + 'model': PSPNet101} SAVE_DIR = './output/' SNAPSHOT_DIR = './model/' -crop_size = [720, 720] def get_arguments(): parser = argparse.ArgumentParser(description="Reproduced PSPNet") parser.add_argument("--img-path", type=str, default='', help="Path to the RGB image file.") - parser.add_argument("--model", type=str, default=SNAPSHOT_DIR, + parser.add_argument("--checkpoints", type=str, default=SNAPSHOT_DIR, help="Path to restore weights.") parser.add_argument("--save-dir", type=str, default=SAVE_DIR, help="Path to save output.") parser.add_argument("--flipped-eval", action="store_true", help="whether to evaluate with flipped img.") + parser.add_argument("--dataset", type=str, default='', + choices=['ade20k', 'cityscapes'], + required=True) return parser.parse_args() @@ -45,44 +50,23 @@ def load(saver, sess, ckpt_path): saver.restore(sess, ckpt_path) print("Restored model parameters from {}".format(ckpt_path)) -def load_img(img_path): - if os.path.isfile(img_path): - print('successful load img: {0}'.format(img_path)) - else: - print('not found file: {0}'.format(img_path)) - sys.exit(0) - - filename = img_path.split('/')[-1] - ext = filename.split('.')[-1] - - if ext.lower() == 'png': - img = tf.image.decode_png(tf.read_file(img_path), channels=3) - elif ext.lower() == 'jpg': - img = tf.image.decode_jpeg(tf.read_file(img_path), channels=3) - else: - print('cannot process {0} file.'.format(file_type)) - - return img, filename - -def preprocess(img, h, w): - # Convert RGB to BGR - img_r, img_g, img_b = tf.split(axis=2, num_or_size_splits=3, value=img) - img = tf.cast(tf.concat(axis=2, values=[img_b, img_g, img_r]), dtype=tf.float32) - # Extract mean. - img -= IMG_MEAN - - pad_img = tf.image.pad_to_bounding_box(img, 0, 0, h, w) - pad_img = tf.expand_dims(pad_img, dim=0) - - return pad_img - def main(): args = get_arguments() + # load parameters + if args.dataset == 'ade20k': + param = ADE20k_param + elif args.dataset == 'cityscapes': + param = cityscapes_param + + crop_size = param['crop_size'] + num_classes = param['num_classes'] + PSPNet = param['model'] + + # preprocess images img, filename = load_img(args.img_path) img_shape = tf.shape(img) h, w = (tf.maximum(crop_size[0], img_shape[0]), tf.maximum(crop_size[1], img_shape[1])) - img = preprocess(img, h, w) # Create network. @@ -92,8 +76,9 @@ def main(): flipped_img = tf.expand_dims(flipped_img, dim=0) net2 = PSPNet({'data': flipped_img}, is_training=False, num_classes=num_classes) - raw_output = net.layers['conv6'] + + # Do flipped eval or not if args.flipped_eval: flipped_output = tf.image.flip_left_right(tf.squeeze(net2.layers['conv6'])) flipped_output = tf.expand_dims(flipped_output, dim=0) @@ -103,7 +88,7 @@ def main(): raw_output_up = tf.image.resize_bilinear(raw_output, size=[h, w], align_corners=True) raw_output_up = tf.image.crop_to_bounding_box(raw_output_up, 0, 0, img_shape[0], img_shape[1]) raw_output_up = tf.argmax(raw_output_up, dimension=3) - pred = tf.expand_dims(raw_output_up, dim=3) + pred = decode_labels(raw_output_up, img_shape, num_classes) # Init tf Session config = tf.ConfigProto() @@ -113,11 +98,9 @@ def main(): sess.run(init) - saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=10) - restore_var = tf.global_variables() - - ckpt = tf.train.get_checkpoint_state(args.model) + + ckpt = tf.train.get_checkpoint_state(args.checkpoints) if ckpt and ckpt.model_checkpoint_path: loader = tf.train.Saver(var_list=restore_var) load_step = int(os.path.basename(ckpt.model_checkpoint_path).split('-')[1]) @@ -127,11 +110,9 @@ def main(): preds = sess.run(pred) - msk = decode_labels(preds, num_classes=num_classes) - im = Image.fromarray(msk[0]) if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) - im.save(args.save_dir + filename) + misc.imsave(args.save_dir + filename, preds[0]) if __name__ == '__main__': - main() + main() \ No newline at end of file diff --git a/input/._test_256x512.png b/input/._test_256x512.png deleted file mode 100644 index c7fdfd9..0000000 Binary files a/input/._test_256x512.png and /dev/null differ diff --git a/input/._test_720x720.png b/input/._test_720x720.png deleted file mode 100644 index dddc329..0000000 Binary files a/input/._test_720x720.png and /dev/null differ diff --git a/input/indoor_1.jpg b/input/indoor_1.jpg new file mode 100644 index 0000000..9a4f849 Binary files /dev/null and b/input/indoor_1.jpg differ diff --git a/input/indoor_2.jpg b/input/indoor_2.jpg new file mode 100644 index 0000000..f333f29 Binary files /dev/null and b/input/indoor_2.jpg differ diff --git a/model.py b/model.py index ebfe39b..c9688c8 100644 --- a/model.py +++ b/model.py @@ -1,7 +1,7 @@ from network import Network import tensorflow as tf -class PSPNet(Network): +class PSPNet101(Network): def setup(self, is_training, num_classes): '''Network definition. @@ -469,3 +469,268 @@ def setup(self, is_training, num_classes): .conv(3, 3, 512, 1, 1, biased=False, relu=False, padding='SAME', name='conv5_4') .batch_normalization(relu=True, name='conv5_4_bn') .conv(1, 1, num_classes, 1, 1, biased=True, relu=False, name='conv6')) + +class PSPNet50(Network): + def setup(self, is_training, num_classes): + '''Network definition. + + Args: + is_training: whether to update the running mean and variance of the batch normalisation layer. + If the batch size is small, it is better to keep the running mean and variance of + the-pretrained model frozen. + num_classes: number of classes to predict (including background). + ''' + (self.feed('data') + .conv(3, 3, 64, 2, 2, biased=False, relu=False, padding='SAME', name='conv1_1_3x3_s2') + .batch_normalization(relu=False, name='conv1_1_3x3_s2_bn') + .relu(name='conv1_1_3x3_s2_bn_relu') + .conv(3, 3, 64, 1, 1, biased=False, relu=False, padding='SAME', name='conv1_2_3x3') + .batch_normalization(relu=True, name='conv1_2_3x3_bn') + .conv(3, 3, 128, 1, 1, biased=False, relu=False, padding='SAME', name='conv1_3_3x3') + .batch_normalization(relu=True, name='conv1_3_3x3_bn') + .max_pool(3, 3, 2, 2, padding='SAME', name='pool1_3x3_s2') + .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='conv2_1_1x1_proj') + .batch_normalization(relu=False, name='conv2_1_1x1_proj_bn')) + + (self.feed('pool1_3x3_s2') + .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='conv2_1_1x1_reduce') + .batch_normalization(relu=True, name='conv2_1_1x1_reduce_bn') + .zero_padding(paddings=1, name='padding1') + .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='conv2_1_3x3') + .batch_normalization(relu=True, name='conv2_1_3x3_bn') + .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='conv2_1_1x1_increase') + .batch_normalization(relu=False, name='conv2_1_1x1_increase_bn')) + + (self.feed('conv2_1_1x1_proj_bn', + 'conv2_1_1x1_increase_bn') + .add(name='conv2_1') + .relu(name='conv2_1/relu') + .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='conv2_2_1x1_reduce') + .batch_normalization(relu=True, name='conv2_2_1x1_reduce_bn') + .zero_padding(paddings=1, name='padding2') + .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='conv2_2_3x3') + .batch_normalization(relu=True, name='conv2_2_3x3_bn') + .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='conv2_2_1x1_increase') + .batch_normalization(relu=False, name='conv2_2_1x1_increase_bn')) + + (self.feed('conv2_1/relu', + 'conv2_2_1x1_increase_bn') + .add(name='conv2_2') + .relu(name='conv2_2/relu') + .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='conv2_3_1x1_reduce') + .batch_normalization(relu=True, name='conv2_3_1x1_reduce_bn') + .zero_padding(paddings=1, name='padding3') + .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='conv2_3_3x3') + .batch_normalization(relu=True, name='conv2_3_3x3_bn') + .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='conv2_3_1x1_increase') + .batch_normalization(relu=False, name='conv2_3_1x1_increase_bn')) + + (self.feed('conv2_2/relu', + 'conv2_3_1x1_increase_bn') + .add(name='conv2_3') + .relu(name='conv2_3/relu') + .conv(1, 1, 512, 2, 2, biased=False, relu=False, name='conv3_1_1x1_proj') + .batch_normalization(relu=False, name='conv3_1_1x1_proj_bn')) + + (self.feed('conv2_3/relu') + .conv(1, 1, 128, 2, 2, biased=False, relu=False, name='conv3_1_1x1_reduce') + .batch_normalization(relu=True, name='conv3_1_1x1_reduce_bn') + .zero_padding(paddings=1, name='padding4') + .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='conv3_1_3x3') + .batch_normalization(relu=True, name='conv3_1_3x3_bn') + .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='conv3_1_1x1_increase') + .batch_normalization(relu=False, name='conv3_1_1x1_increase_bn')) + + (self.feed('conv3_1_1x1_proj_bn', + 'conv3_1_1x1_increase_bn') + .add(name='conv3_1') + .relu(name='conv3_1/relu') + .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='conv3_2_1x1_reduce') + .batch_normalization(relu=True, name='conv3_2_1x1_reduce_bn') + .zero_padding(paddings=1, name='padding5') + .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='conv3_2_3x3') + .batch_normalization(relu=True, name='conv3_2_3x3_bn') + .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='conv3_2_1x1_increase') + .batch_normalization(relu=False, name='conv3_2_1x1_increase_bn')) + + (self.feed('conv3_1/relu', + 'conv3_2_1x1_increase_bn') + .add(name='conv3_2') + .relu(name='conv3_2/relu') + .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='conv3_3_1x1_reduce') + .batch_normalization(relu=True, name='conv3_3_1x1_reduce_bn') + .zero_padding(paddings=1, name='padding6') + .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='conv3_3_3x3') + .batch_normalization(relu=True, name='conv3_3_3x3_bn') + .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='conv3_3_1x1_increase') + .batch_normalization(relu=False, name='conv3_3_1x1_increase_bn')) + + (self.feed('conv3_2/relu', + 'conv3_3_1x1_increase_bn') + .add(name='conv3_3') + .relu(name='conv3_3/relu') + .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='conv3_4_1x1_reduce') + .batch_normalization(relu=True, name='conv3_4_1x1_reduce_bn') + .zero_padding(paddings=1, name='padding7') + .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='conv3_4_3x3') + .batch_normalization(relu=True, name='conv3_4_3x3_bn') + .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='conv3_4_1x1_increase') + .batch_normalization(relu=False, name='conv3_4_1x1_increase_bn')) + + (self.feed('conv3_3/relu', + 'conv3_4_1x1_increase_bn') + .add(name='conv3_4') + .relu(name='conv3_4/relu') + .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='conv4_1_1x1_proj') + .batch_normalization(relu=False, name='conv4_1_1x1_proj_bn')) + + (self.feed('conv3_4/relu') + .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='conv4_1_1x1_reduce') + .batch_normalization(relu=True, name='conv4_1_1x1_reduce_bn') + .zero_padding(paddings=2, name='padding8') + .atrous_conv(3, 3, 256, 2, biased=False, relu=False, name='conv4_1_3x3') + .batch_normalization(relu=True, name='conv4_1_3x3_bn') + .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='conv4_1_1x1_increase') + .batch_normalization(relu=False, name='conv4_1_1x1_increase_bn')) + + (self.feed('conv4_1_1x1_proj_bn', + 'conv4_1_1x1_increase_bn') + .add(name='conv4_1') + .relu(name='conv4_1/relu') + .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='conv4_2_1x1_reduce') + .batch_normalization(relu=True, name='conv4_2_1x1_reduce_bn') + .zero_padding(paddings=2, name='padding9') + .atrous_conv(3, 3, 256, 2, biased=False, relu=False, name='conv4_2_3x3') + .batch_normalization(relu=True, name='conv4_2_3x3_bn') + .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='conv4_2_1x1_increase') + .batch_normalization(relu=False, name='conv4_2_1x1_increase_bn')) + + (self.feed('conv4_1/relu', + 'conv4_2_1x1_increase_bn') + .add(name='conv4_2') + .relu(name='conv4_2/relu') + .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='conv4_3_1x1_reduce') + .batch_normalization(relu=True, name='conv4_3_1x1_reduce_bn') + .zero_padding(paddings=2, name='padding10') + .atrous_conv(3, 3, 256, 2, biased=False, relu=False, name='conv4_3_3x3') + .batch_normalization(relu=True, name='conv4_3_3x3_bn') + .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='conv4_3_1x1_increase') + .batch_normalization(relu=False, name='conv4_3_1x1_increase_bn')) + + (self.feed('conv4_2/relu', + 'conv4_3_1x1_increase_bn') + .add(name='conv4_3') + .relu(name='conv4_3/relu') + .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='conv4_4_1x1_reduce') + .batch_normalization(relu=True, name='conv4_4_1x1_reduce_bn') + .zero_padding(paddings=2, name='padding11') + .atrous_conv(3, 3, 256, 2, biased=False, relu=False, name='conv4_4_3x3') + .batch_normalization(relu=True, name='conv4_4_3x3_bn') + .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='conv4_4_1x1_increase') + .batch_normalization(relu=False, name='conv4_4_1x1_increase_bn')) + + (self.feed('conv4_3/relu', + 'conv4_4_1x1_increase_bn') + .add(name='conv4_4') + .relu(name='conv4_4/relu') + .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='conv4_5_1x1_reduce') + .batch_normalization(relu=True, name='conv4_5_1x1_reduce_bn') + .zero_padding(paddings=2, name='padding12') + .atrous_conv(3, 3, 256, 2, biased=False, relu=False, name='conv4_5_3x3') + .batch_normalization(relu=True, name='conv4_5_3x3_bn') + .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='conv4_5_1x1_increase') + .batch_normalization(relu=False, name='conv4_5_1x1_increase_bn')) + + (self.feed('conv4_4/relu', + 'conv4_5_1x1_increase_bn') + .add(name='conv4_5') + .relu(name='conv4_5/relu') + .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='conv4_6_1x1_reduce') + .batch_normalization(relu=True, name='conv4_6_1x1_reduce_bn') + .zero_padding(paddings=2, name='padding13') + .atrous_conv(3, 3, 256, 2, biased=False, relu=False, name='conv4_6_3x3') + .batch_normalization(relu=True, name='conv4_6_3x3_bn') + .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='conv4_6_1x1_increase') + .batch_normalization(relu=False, name='conv4_6_1x1_increase_bn')) + + (self.feed('conv4_5/relu', + 'conv4_6_1x1_increase_bn') + .add(name='conv4_6') + .relu(name='conv4_6/relu') + .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='conv5_1_1x1_proj') + .batch_normalization(relu=False, name='conv5_1_1x1_proj_bn')) + + (self.feed('conv4_6/relu') + .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='conv5_1_1x1_reduce') + .batch_normalization(relu=True, name='conv5_1_1x1_reduce_bn') + .zero_padding(paddings=4, name='padding31') + .atrous_conv(3, 3, 512, 4, biased=False, relu=False, name='conv5_1_3x3') + .batch_normalization(relu=True, name='conv5_1_3x3_bn') + .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='conv5_1_1x1_increase') + .batch_normalization(relu=False, name='conv5_1_1x1_increase_bn')) + + (self.feed('conv5_1_1x1_proj_bn', + 'conv5_1_1x1_increase_bn') + .add(name='conv5_1') + .relu(name='conv5_1/relu') + .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='conv5_2_1x1_reduce') + .batch_normalization(relu=True, name='conv5_2_1x1_reduce_bn') + .zero_padding(paddings=4, name='padding32') + .atrous_conv(3, 3, 512, 4, biased=False, relu=False, name='conv5_2_3x3') + .batch_normalization(relu=True, name='conv5_2_3x3_bn') + .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='conv5_2_1x1_increase') + .batch_normalization(relu=False, name='conv5_2_1x1_increase_bn')) + + (self.feed('conv5_1/relu', + 'conv5_2_1x1_increase_bn') + .add(name='conv5_2') + .relu(name='conv5_2/relu') + .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='conv5_3_1x1_reduce') + .batch_normalization(relu=True, name='conv5_3_1x1_reduce_bn') + .zero_padding(paddings=4, name='padding33') + .atrous_conv(3, 3, 512, 4, biased=False, relu=False, name='conv5_3_3x3') + .batch_normalization(relu=True, name='conv5_3_3x3_bn') + .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='conv5_3_1x1_increase') + .batch_normalization(relu=False, name='conv5_3_1x1_increase_bn')) + + (self.feed('conv5_2/relu', + 'conv5_3_1x1_increase_bn') + .add(name='conv5_3') + .relu(name='conv5_3/relu')) + + conv5_3 = self.layers['conv5_3/relu'] + shape = tf.shape(conv5_3)[1:3] + + (self.feed('conv5_3/relu') + .avg_pool(60, 60, 60, 60, name='conv5_3_pool1') + .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='conv5_3_pool1_conv') + .batch_normalization(relu=True, name='conv5_3_pool1_conv_bn') + .resize_bilinear(shape, name='conv5_3_pool1_interp')) + + (self.feed('conv5_3/relu') + .avg_pool(30, 30, 30, 30, name='conv5_3_pool2') + .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='conv5_3_pool2_conv') + .batch_normalization(relu=True, name='conv5_3_pool2_conv_bn') + .resize_bilinear(shape, name='conv5_3_pool2_interp')) + + (self.feed('conv5_3/relu') + .avg_pool(20, 20, 20, 20, name='conv5_3_pool3') + .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='conv5_3_pool3_conv') + .batch_normalization(relu=True, name='conv5_3_pool3_conv_bn') + .resize_bilinear(shape, name='conv5_3_pool3_interp')) + + (self.feed('conv5_3/relu') + .avg_pool(10, 10, 10, 10, name='conv5_3_pool6') + .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='conv5_3_pool6_conv') + .batch_normalization(relu=True, name='conv5_3_pool6_conv_bn') + .resize_bilinear(shape, name='conv5_3_pool6_interp')) + + (self.feed('conv5_3/relu', + 'conv5_3_pool6_interp', + 'conv5_3_pool3_interp', + 'conv5_3_pool2_interp', + 'conv5_3_pool1_interp') + .concat(axis=-1, name='conv5_3_concat') + .conv(3, 3, 512, 1, 1, biased=False, relu=False, padding='SAME', name='conv5_4') + .batch_normalization(relu=True, name='conv5_4_bn') + .conv(1, 1, num_classes, 1, 1, biased=True, relu=False, name='conv6')) diff --git a/network.py b/network.py index cc2519d..51cebca 100644 --- a/network.py +++ b/network.py @@ -4,6 +4,11 @@ DEFAULT_PADDING = 'VALID' DEFAULT_DATAFORMAT = 'NHWC' +BN_param_map = {'scale': 'gamma', + 'offset': 'beta', + 'variance': 'moving_variance', + 'mean': 'moving_mean'} + def layer(op): '''Decorator for composable network layers.''' @@ -57,12 +62,16 @@ def load(self, data_path, session, ignore_missing=False): session: The current TensorFlow session ignore_missing: If true, serialized weights for missing layers are ignored. ''' - data_dict = np.load(data_path).item() + data_dict = np.load(data_path, encoding='latin1').item() for op_name in data_dict: with tf.variable_scope(op_name, reuse=True): - for param_name, data in data_dict[op_name].iteritems(): + for param_name, data in data_dict[op_name].items(): try: + if 'bn' in op_name: + param_name = BN_param_map[param_name] + data = np.squeeze(data) + var = tf.get_variable(param_name) session.run(var.assign(data)) except ValueError: @@ -241,43 +250,18 @@ def softmax(self, input, name): @layer def batch_normalization(self, input, name, scale_offset=True, relu=False): - """ - # NOTE: Currently, only inference is supported - with tf.variable_scope(name) as scope: - shape = [input.get_shape()[-1]] - if scale_offset: - scale = self.make_var('scale', shape=shape) - offset = self.make_var('offset', shape=shape) - else: - scale, offset = (None, None) - output = tf.nn.batch_normalization( - input, - mean=self.make_var('mean', shape=shape), - variance=self.make_var('variance', shape=shape), - offset=offset, - scale=scale, - # TODO: This is the default Caffe batch norm eps - # Get the actual eps from parameters - variance_epsilon=1e-5, - name=name) - if relu: - output = tf.nn.relu(output) - return output - """ + output = tf.layers.batch_normalization( + input, + momentum=0.95, + epsilon=1e-5, + training=self.is_training, + name=name + ) - with tf.variable_scope(name) as scope: - output = tf.layers.batch_normalization( - input, - momentum=0.95, - epsilon=1e-5, - training=self.is_training, - name=name - ) - - if relu: - output = tf.nn.relu(output) + if relu: + output = tf.nn.relu(output) - return output + return output @layer def dropout(self, input, keep_prob, name): diff --git a/output/._test_1024x2048.png b/output/._test_1024x2048.png deleted file mode 100644 index 4248363..0000000 Binary files a/output/._test_1024x2048.png and /dev/null differ diff --git a/output/._test_256x512.png b/output/._test_256x512.png deleted file mode 100644 index 2b38d12..0000000 Binary files a/output/._test_256x512.png and /dev/null differ diff --git a/output/._test_720x720.png b/output/._test_720x720.png deleted file mode 100644 index 5192e99..0000000 Binary files a/output/._test_720x720.png and /dev/null differ diff --git a/output/indoor_1.jpg b/output/indoor_1.jpg new file mode 100644 index 0000000..8c801fc Binary files /dev/null and b/output/indoor_1.jpg differ diff --git a/output/indoor_2.jpg b/output/indoor_2.jpg new file mode 100644 index 0000000..3482c6c Binary files /dev/null and b/output/indoor_2.jpg differ diff --git a/tools.py b/tools.py index 13e5c22..896baf0 100644 --- a/tools.py +++ b/tools.py @@ -2,7 +2,9 @@ import numpy as np from PIL import Image import tensorflow as tf +import os +IMG_MEAN = np.array((103.939, 116.779, 123.68), dtype=np.float32) label_colours = [(128, 64, 128), (244, 35, 231), (69, 69, 69) # 0 = road, 1 = sidewalk, 2 = building ,(102, 102, 156), (190, 153, 153), (153, 153, 153) @@ -15,22 +17,31 @@ # 12 = rider, 13 = car, 14 = truck ,(0, 60, 100), (0, 79, 100), (0, 0, 230) # 15 = bus, 16 = train, 17 = motocycle - ,(119, 10, 32), (1, 1, 1)] - # 18 = bicycle, 19 = void label - -def decode_labels(mask, num_images=1, num_classes=19): - n, h, w, c = mask.shape - assert(n >= num_images), 'Batch size %d should be greater or equal than number of images to save %d.' % (n, num_images) - outputs = np.zeros((num_images, h, w, 3), dtype=np.uint8) - for i in range(num_images): - img = Image.new('RGB', (len(mask[i, 0]), len(mask[i]))) - pixels = img.load() - for j_, j in enumerate(mask[i, :, :, 0]): - for k_, k in enumerate(j): - if k < num_classes: - pixels[k_,j_] = label_colours[k] - outputs[i] = np.array(img) - return outputs + ,(119, 10, 32)] + # 18 = bicycle + +matfn = './color150.mat' +def read_labelcolours(matfn): + mat = sio.loadmat(matfn) + color_table = mat['colors'] + shape = color_table.shape + color_list = [tuple(color_table[i]) for i in range(shape[0])] + + return color_list + +def decode_labels(mask, img_shape, num_classes): + if num_classes == 150: + color_table = read_labelcolours(matfn) + else: + color_table = label_colours + + color_mat = tf.constant(color_table, dtype=tf.float32) + onehot_output = tf.one_hot(mask, depth=num_classes) + onehot_output = tf.reshape(onehot_output, (-1, num_classes)) + pred = tf.matmul(onehot_output, color_mat) + pred = tf.reshape(pred, (1, img_shape[0], img_shape[1], 3)) + + return pred def prepare_label(input_batch, new_size, num_classes, one_hot=True): with tf.name_scope('label_encode'): @@ -39,4 +50,36 @@ def prepare_label(input_batch, new_size, num_classes, one_hot=True): if one_hot: input_batch = tf.one_hot(input_batch, depth=num_classes) - return input_batch \ No newline at end of file + return input_batch + + +def load_img(img_path): + if os.path.isfile(img_path): + print('successful load img: {0}'.format(img_path)) + else: + print('not found file: {0}'.format(img_path)) + sys.exit(0) + + filename = img_path.split('/')[-1] + ext = filename.split('.')[-1] + + if ext.lower() == 'png': + img = tf.image.decode_png(tf.read_file(img_path), channels=3) + elif ext.lower() == 'jpg': + img = tf.image.decode_jpeg(tf.read_file(img_path), channels=3) + else: + print('cannot process {0} file.'.format(file_type)) + + return img, filename + +def preprocess(img, h, w): + # Convert RGB to BGR + img_r, img_g, img_b = tf.split(axis=2, num_or_size_splits=3, value=img) + img = tf.cast(tf.concat(axis=2, values=[img_b, img_g, img_r]), dtype=tf.float32) + # Extract mean. + img -= IMG_MEAN + + pad_img = tf.image.pad_to_bounding_box(img, 0, 0, h, w) + pad_img = tf.expand_dims(pad_img, dim=0) + + return pad_img \ No newline at end of file diff --git a/train.py b/train.py index d21df83..bdc3d1d 100644 --- a/train.py +++ b/train.py @@ -12,13 +12,13 @@ import tensorflow as tf import numpy as np -from model import PSPNet -from tools import decode_labels, prepare_label +from model import PSPNet101 +from tools import prepare_label from image_reader import ImageReader IMG_MEAN = np.array((103.939, 116.779, 123.68), dtype=np.float32) -BATCH_SIZE = 4 +BATCH_SIZE = 2 DATA_DIRECTORY = '/SSD_data/cityscapes_dataset/cityscape' DATA_LIST_PATH = './list/train_list.txt' IGNORE_LABEL = 255 @@ -120,7 +120,7 @@ def main(): coord) image_batch, label_batch = reader.dequeue(args.batch_size) - net = PSPNet({'data': image_batch}, is_training=True, num_classes=args.num_classes) + net = PSPNet101({'data': image_batch}, is_training=True, num_classes=args.num_classes) raw_output = net.layers['conv6'] diff --git a/utils/color150.mat b/utils/color150.mat new file mode 100644 index 0000000..c518b64 Binary files /dev/null and b/utils/color150.mat differ