diff --git a/official/vision/beta/projects/yolo/__init__.py b/official/vision/beta/projects/yolo/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/official/vision/beta/projects/yolo/common/registry_imports.py b/official/vision/beta/projects/yolo/common/registry_imports.py index e40d39856a7..56349093624 100644 --- a/official/vision/beta/projects/yolo/common/registry_imports.py +++ b/official/vision/beta/projects/yolo/common/registry_imports.py @@ -17,20 +17,16 @@ # pylint: disable=unused-import # pylint: disable=g-bad-import-order from official.common import registry_imports - # import configs from official.vision.beta.projects.yolo.configs import darknet_classification from official.vision.beta.projects.yolo.configs import yolo as yolo_config - # import modeling components from official.vision.beta.projects.yolo.modeling.backbones import darknet from official.vision.beta.projects.yolo.modeling.decoders import yolo_decoder - +# import optimization packages +from official.vision.beta.projects.yolo.optimization import optimizer_factory +from official.vision.beta.projects.yolo.optimization.configs import ( + optimization_config, optimizer_config) # import tasks from official.vision.beta.projects.yolo.tasks import image_classification from official.vision.beta.projects.yolo.tasks import yolo as yolo_task - -# import optimization packages -from official.vision.beta.projects.yolo.optimization import optimizer_factory -from official.vision.beta.projects.yolo.optimization.configs import optimizer_config -from official.vision.beta.projects.yolo.optimization.configs import optimization_config diff --git a/official/vision/beta/projects/yolo/configs/backbones.py b/official/vision/beta/projects/yolo/configs/backbones.py index 071af5bdef7..476e8e6ee4a 100644 --- a/official/vision/beta/projects/yolo/configs/backbones.py +++ b/official/vision/beta/projects/yolo/configs/backbones.py @@ -14,6 +14,7 @@ """Backbones configurations.""" import dataclasses + from official.modeling import hyperparams from official.vision.beta.configs import backbones diff --git a/official/vision/beta/projects/yolo/configs/decoders.py b/official/vision/beta/projects/yolo/configs/decoders.py index bc96e1b77a6..48a8c542b34 100755 --- a/official/vision/beta/projects/yolo/configs/decoders.py +++ b/official/vision/beta/projects/yolo/configs/decoders.py @@ -15,6 +15,7 @@ """Decoders configurations.""" import dataclasses from typing import Optional + from official.modeling import hyperparams from official.vision.beta.configs import decoders @@ -33,6 +34,8 @@ class YoloDecoder(hyperparams.Config): use_separable_conv: bool = False csp_stack: Optional[bool] = None fpn_depth: Optional[int] = None + max_fpn_depth: Optional[int] = None + max_csp_stack: Optional[int] = None fpn_filter_scale: Optional[int] = None path_process_len: Optional[int] = None max_level_process_len: Optional[int] = None diff --git a/official/vision/beta/projects/yolo/configs/experiments/scaled-yolo/detection-finetune/yolo_csp_640_tpu.yaml b/official/vision/beta/projects/yolo/configs/experiments/scaled-yolo/detection-finetune/yolo_csp_640_tpu.yaml new file mode 100644 index 00000000000..844c1f5b9d0 --- /dev/null +++ b/official/vision/beta/projects/yolo/configs/experiments/scaled-yolo/detection-finetune/yolo_csp_640_tpu.yaml @@ -0,0 +1,80 @@ +# --experiment_type=scaled_yolo +# mAP 47.6 +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'float32' + tpu_enable_xla_dynamic_padder: false +task: + model: + input_size: [640, 640, 3] + backbone: + type: 'darknet' + darknet: + model_id: 'altered_cspdarknet53' + max_level: 5 + min_level: 3 + decoder: + type: yolo_decoder + yolo_decoder: + version: v4 + type: csp + head: + smart_bias: true + detection_generator: + box_type: + 'all': scaled + scale_xy: + 'all': 2.0 + max_boxes: 300 + nms_type: iou + iou_thresh: 0.001 + nms_thresh: 0.65 + loss: + use_scaled_loss: true + update_on_repeat: true + box_loss_type: + 'all': ciou + ignore_thresh: + 'all': 0.0 + iou_normalizer: + 'all': 0.05 + cls_normalizer: + 'all': 0.3 + object_normalizer: + '5': 0.28 + '4': 0.70 + '3': 2.80 + objectness_smooth: + 'all': 1.0 + norm_activation: + use_sync_bn: true + num_classes: 80 + anchor_boxes: + anchors_per_scale: 3 + boxes: [box: [12, 16], box: [19, 36], box: [40, 28], + box: [36, 75], box: [76, 55], box: [72, 146], + box: [142, 110], box: [192, 243], box: [459, 401]] + train_data: + input_path: 'gs://cam2-datasets/coco/train*' + shuffle_buffer_size: 10000 + parser: + mosaic: + mosaic_frequency: 1.0 + mixup_frequency: 0.2 + mosaic_crop_mode: 'scale' + mosaic_center: 0.25 + aug_scale_min: 0.1 + aug_scale_max: 1.9 + max_num_instances: 300 + letter_box: true + random_flip: true + aug_rand_translate: 0.1 + area_thresh: 0.1 + validation_data: + input_path: 'gs://cam2-datasets/coco/val*' +trainer: + train_steps: 831600 # epoch 300 to 450 + optimizer_config: + learning_rate: + cosine: + decay_steps: 831600 # epoch 300 to 450 \ No newline at end of file diff --git a/official/vision/beta/projects/yolo/configs/experiments/scaled-yolo/detection-finetune/yolo_l_p5_896_tpu.yaml b/official/vision/beta/projects/yolo/configs/experiments/scaled-yolo/detection-finetune/yolo_l_p5_896_tpu.yaml new file mode 100644 index 00000000000..a520acf4e91 --- /dev/null +++ b/official/vision/beta/projects/yolo/configs/experiments/scaled-yolo/detection-finetune/yolo_l_p5_896_tpu.yaml @@ -0,0 +1,82 @@ +# --experiment_type=large_yolo_finetune +# mAP 51.1% +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'float32' + tpu_enable_xla_dynamic_padder: false +task: + model: + input_size: [896, 896, 3] + backbone: + type: 'darknet' + darknet: + model_id: 'csp-large' + max_level: 5 + min_level: 3 + width_scale: 1.00 + depth_scale: 1.00 + decoder: + type: yolo_decoder + yolo_decoder: + version: v4 + type: csp_large + head: + smart_bias: true + detection_generator: + box_type: + 'all': scaled + scale_xy: + 'all': 2.0 + max_boxes: 300 + nms_type: iou + iou_thresh: 0.001 + nms_thresh: 0.65 + loss: + use_scaled_loss: true + update_on_repeat: true + box_loss_type: + 'all': ciou + ignore_thresh: + 'all': 0.0 + iou_normalizer: + 'all': 0.05 + cls_normalizer: + 'all': 0.5 + object_normalizer: + '5': 0.4 + '4': 1.0 + '3': 4.0 + objectness_smooth: + 'all': 1.0 + norm_activation: + use_sync_bn: true + num_classes: 80 + anchor_boxes: + anchors_per_scale: 4 + boxes: [box: [13,17], box: [31,25], box: [24,51], box: [61,45], + box: [48,102], box: [119,96], box: [97,189], box: [217,184], + box: [171,384], box: [324,451], box: [616,618], box: [800,800]] + train_data: + input_path: 'gs://cam2-datasets/coco/train*' + shuffle_buffer_size: 10000 + parser: + mosaic: + mosaic_frequency: 1.0 + mixup_frequency: 0.2 + mosaic_crop_mode: 'scale' + mosaic_center: 0.0 + aug_scale_min: 0.2 + aug_scale_max: 1.8 + max_num_instances: 300 + letter_box: true + random_flip: true + aug_rand_translate: 0.5 + area_thresh: 0.1 + validation_data: + input_path: 'gs://cam2-datasets/coco/val*' +trainer: + train_steps: 831600 # epoch 300 to 450 + optimizer_config: + learning_rate: + cosine: + decay_steps: 831600 # epoch 300 to 450 \ No newline at end of file diff --git a/official/vision/beta/projects/yolo/configs/experiments/scaled-yolo/detection-finetune/yolo_l_p6_1280_tpu.yaml b/official/vision/beta/projects/yolo/configs/experiments/scaled-yolo/detection-finetune/yolo_l_p6_1280_tpu.yaml new file mode 100644 index 00000000000..b10bace12d7 --- /dev/null +++ b/official/vision/beta/projects/yolo/configs/experiments/scaled-yolo/detection-finetune/yolo_l_p6_1280_tpu.yaml @@ -0,0 +1,83 @@ +# --experiment_type=large_yolo_finetune +# mAP 54.4% +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'float32' + tpu_enable_xla_dynamic_padder: false +task: + model: + input_size: [1280, 1280, 3] + backbone: + type: 'darknet' + darknet: + model_id: 'csp-large' + max_level: 6 + min_level: 3 + width_scale: 1.00 + depth_scale: 1.00 + decoder: + type: yolo_decoder + yolo_decoder: + version: v4 + type: csp_large + head: + smart_bias: true + detection_generator: + box_type: + 'all': scaled + scale_xy: + 'all': 2.0 + max_boxes: 300 + nms_type: iou + iou_thresh: 0.001 + nms_thresh: 0.65 + loss: + use_scaled_loss: true + update_on_repeat: true + box_loss_type: + 'all': ciou + ignore_thresh: + 'all': 0.0 + iou_normalizer: + 'all': 0.05 + cls_normalizer: + 'all': 0.5 + object_normalizer: + '6': 0.1 + '5': 0.4 + '4': 1.0 + '3': 4.0 + objectness_smooth: + 'all': 1.0 + norm_activation: + use_sync_bn: true + num_classes: 80 + anchor_boxes: + anchors_per_scale: 4 + boxes: [box: [13,17], box: [31,25], box: [24,51], box: [61,45], + box: [61,45], box: [48,102], box: [119,96], box: [97,189], + box: [97,189], box: [217,184], box: [171,384], box: [324,451], + box: [324,451], box: [545,357], box: [616,618], box: [1024,1024]] + train_data: + input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/train*' + shuffle_buffer_size: 10000 + parser: + mosaic: + mosaic_frequency: 1.0 + mixup_frequency: 0.2 + mosaic_crop_mode: 'scale' + mosaic_center: 0.0 + aug_scale_min: 0.2 + aug_scale_max: 1.8 + max_num_instances: 300 + letter_box: true + random_flip: true + aug_rand_translate: 0.5 + area_thresh: 0.1 + validation_data: + input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/val*' + trainer: + train_steps: 831600 # epoch 300 to 450 + learning_rate: + cosine: + decay_steps: 831600 # epoch 300 to 450 \ No newline at end of file diff --git a/official/vision/beta/projects/yolo/configs/experiments/scaled-yolo/detection-finetune/yolo_l_p7_1536_tpu.yaml b/official/vision/beta/projects/yolo/configs/experiments/scaled-yolo/detection-finetune/yolo_l_p7_1536_tpu.yaml new file mode 100644 index 00000000000..a28c691683b --- /dev/null +++ b/official/vision/beta/projects/yolo/configs/experiments/scaled-yolo/detection-finetune/yolo_l_p7_1536_tpu.yaml @@ -0,0 +1,85 @@ +# --experiment_type=large_yolo +# mAP 55.3% +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'float32' + tpu_enable_xla_dynamic_padder: false +task: + model: + input_size: [1536, 1536, 3] + backbone: + type: 'darknet' + darknet: + model_id: 'csp-large' + max_level: 7 + min_level: 3 + width_scale: 1.25 + depth_scale: 1.00 + decoder: + type: yolo_decoder + yolo_decoder: + version: v4 + type: csp_large + head: + smart_bias: true + detection_generator: + box_type: + 'all': scaled + scale_xy: + 'all': 2.0 + max_boxes: 300 + nms_type: iou + iou_thresh: 0.001 + nms_thresh: 0.65 + loss: + use_scaled_loss: true + update_on_repeat: true + box_loss_type: + 'all': ciou + ignore_thresh: + 'all': 0.0 + iou_normalizer: + 'all': 0.05 + cls_normalizer: + 'all': 0.5 + object_normalizer: + '7': 0.1 + '6': 0.4 + '5': 0.5 + '4': 1.0 + '3': 4.0 + objectness_smooth: + 'all': 1.0 + norm_activation: + use_sync_bn: true + num_classes: 80 + anchor_boxes: + anchors_per_scale: 4 + boxes: [box: [13,17], box: [22,25], box: [55,41], box: [27,66], + box: [57,88], box: [112,69], box: [69,177], box: [136,138], + box: [136,138], box: [287,114], box: [134,275], box: [268,248], + box: [268,248], box: [232,504], box: [445,416], box: [640,640], + box: [812,393], box: [477,808], box: [1070,908], box: [1408,1408]] + train_data: + input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/train*' + shuffle_buffer_size: 10000 + parser: + mosaic: + mosaic_frequency: 1.0 + mixup_frequency: 0.2 + mosaic_crop_mode: 'scale' + mosaic_center: 0.0 + aug_scale_min: 0.2 + aug_scale_max: 1.8 + max_num_instances: 300 + letter_box: true + random_flip: true + aug_rand_translate: 0.5 + area_thresh: 0.1 + validation_data: + input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/val*' + trainer: + train_steps: 831600 # epoch 300 to 450 + learning_rate: + cosine: + decay_steps: 831600 # epoch 300 to 450 \ No newline at end of file diff --git a/official/vision/beta/projects/yolo/configs/experiments/scaled-yolo/detection/yolo_csp_640_tpu.yaml b/official/vision/beta/projects/yolo/configs/experiments/scaled-yolo/detection/yolo_csp_640_tpu.yaml new file mode 100644 index 00000000000..e4a00b1d8c9 --- /dev/null +++ b/official/vision/beta/projects/yolo/configs/experiments/scaled-yolo/detection/yolo_csp_640_tpu.yaml @@ -0,0 +1,74 @@ +# --experiment_type=scaled_yolo +# mAP 47.6 +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'float32' + tpu_enable_xla_dynamic_padder: false +task: + model: + input_size: [640, 640, 3] + backbone: + type: 'darknet' + darknet: + model_id: 'altered_cspdarknet53' + max_level: 5 + min_level: 3 + decoder: + type: yolo_decoder + yolo_decoder: + version: v4 + type: csp + head: + smart_bias: true + detection_generator: + box_type: + 'all': scaled + scale_xy: + 'all': 2.0 + max_boxes: 300 + nms_type: iou + iou_thresh: 0.001 + nms_thresh: 0.65 + loss: + use_scaled_loss: true + update_on_repeat: true + box_loss_type: + 'all': ciou + ignore_thresh: + 'all': 0.0 + iou_normalizer: + 'all': 0.05 + cls_normalizer: + 'all': 0.3 + object_normalizer: + '5': 0.28 + '4': 0.70 + '3': 2.80 + objectness_smooth: + 'all': 1.0 + norm_activation: + use_sync_bn: true + num_classes: 80 + anchor_boxes: + anchors_per_scale: 3 + boxes: [box: [12, 16], box: [19, 36], box: [40, 28], + box: [36, 75], box: [76, 55], box: [72, 146], + box: [142, 110], box: [192, 243], box: [459, 401]] + train_data: + input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/train*' + shuffle_buffer_size: 10000 + parser: + mosaic: + mosaic_frequency: 1.0 + mixup_frequency: 0.0 + mosaic_crop_mode: 'scale' + mosaic_center: 0.25 + aug_scale_min: 0.1 + aug_scale_max: 1.9 + max_num_instances: 300 + letter_box: true + random_flip: true + aug_rand_translate: 0.1 + area_thresh: 0.1 + validation_data: + input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/val*' diff --git a/official/vision/beta/projects/yolo/configs/experiments/scaled-yolo/detection/yolo_l_p5_896_tpu.yaml b/official/vision/beta/projects/yolo/configs/experiments/scaled-yolo/detection/yolo_l_p5_896_tpu.yaml new file mode 100644 index 00000000000..0d5eb4efa5d --- /dev/null +++ b/official/vision/beta/projects/yolo/configs/experiments/scaled-yolo/detection/yolo_l_p5_896_tpu.yaml @@ -0,0 +1,76 @@ +# --experiment_type=large_yolo +# mAP 50.5% +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'float32' + tpu_enable_xla_dynamic_padder: false +task: + model: + input_size: [896, 896, 3] + backbone: + type: 'darknet' + darknet: + model_id: 'csp-large' + max_level: 5 + min_level: 3 + width_scale: 1.00 + depth_scale: 1.00 + decoder: + type: yolo_decoder + yolo_decoder: + version: v4 + type: csp_large + head: + smart_bias: true + detection_generator: + box_type: + 'all': scaled + scale_xy: + 'all': 2.0 + max_boxes: 300 + nms_type: iou + iou_thresh: 0.001 + nms_thresh: 0.65 + loss: + use_scaled_loss: true + update_on_repeat: true + box_loss_type: + 'all': ciou + ignore_thresh: + 'all': 0.0 + iou_normalizer: + 'all': 0.05 + cls_normalizer: + 'all': 0.5 + object_normalizer: + '5': 0.4 + '4': 1.0 + '3': 4.0 + objectness_smooth: + 'all': 1.0 + norm_activation: + use_sync_bn: true + num_classes: 80 + anchor_boxes: + anchors_per_scale: 4 + boxes: [box: [13,17], box: [31,25], box: [24,51], box: [61,45], + box: [48,102], box: [119,96], box: [97,189], box: [217,184], + box: [171,384], box: [324,451], box: [616,618], box: [800,800]] + train_data: + input_path: 'gs://cam2-datasets/coco/train*' + shuffle_buffer_size: 10000 + parser: + mosaic: + mosaic_frequency: 1.0 + mixup_frequency: 0.0 + mosaic_crop_mode: 'scale' + mosaic_center: 0.0 + aug_scale_min: 0.5 + aug_scale_max: 1.5 + max_num_instances: 300 + letter_box: true + random_flip: true + aug_rand_translate: 0.5 + area_thresh: 0.1 + validation_data: + input_path: 'gs://cam2-datasets/coco/val*' diff --git a/official/vision/beta/projects/yolo/configs/experiments/scaled-yolo/detection/yolo_l_p6_1280_tpu.yaml b/official/vision/beta/projects/yolo/configs/experiments/scaled-yolo/detection/yolo_l_p6_1280_tpu.yaml new file mode 100644 index 00000000000..f1f8262199d --- /dev/null +++ b/official/vision/beta/projects/yolo/configs/experiments/scaled-yolo/detection/yolo_l_p6_1280_tpu.yaml @@ -0,0 +1,78 @@ +# --experiment_type=large_yolo +# mAP 53.4% +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'float32' + tpu_enable_xla_dynamic_padder: false +task: + model: + input_size: [1280, 1280, 3] + backbone: + type: 'darknet' + darknet: + model_id: 'csp-large' + max_level: 6 + min_level: 3 + width_scale: 1.00 + depth_scale: 1.00 + decoder: + type: yolo_decoder + yolo_decoder: + version: v4 + type: csp_large + head: + smart_bias: true + detection_generator: + box_type: + 'all': scaled + scale_xy: + 'all': 2.0 + max_boxes: 300 + nms_type: iou + iou_thresh: 0.001 + nms_thresh: 0.65 + loss: + use_scaled_loss: true + update_on_repeat: true + box_loss_type: + 'all': ciou + ignore_thresh: + 'all': 0.0 + iou_normalizer: + 'all': 0.05 + cls_normalizer: + 'all': 0.5 + object_normalizer: + '6': 0.1 + '5': 0.4 + '4': 1.0 + '3': 4.0 + objectness_smooth: + 'all': 1.0 + norm_activation: + use_sync_bn: true + num_classes: 80 + anchor_boxes: + anchors_per_scale: 4 + boxes: [box: [13,17], box: [31,25], box: [24,51], box: [61,45], + box: [61,45], box: [48,102], box: [119,96], box: [97,189], + box: [97,189], box: [217,184], box: [171,384], box: [324,451], + box: [324,451], box: [545,357], box: [616,618], box: [1024,1024]] + train_data: + input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/train*' + shuffle_buffer_size: 10000 + parser: + mosaic: + mosaic_frequency: 1.0 + mixup_frequency: 0.0 + mosaic_crop_mode: 'scale' + mosaic_center: 0.0 + aug_scale_min: 0.5 + aug_scale_max: 1.5 + max_num_instances: 300 + letter_box: true + random_flip: true + aug_rand_translate: 0.5 + area_thresh: 0.0 + validation_data: + input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/val*' diff --git a/official/vision/beta/projects/yolo/configs/experiments/scaled-yolo/detection/yolo_l_p7_1536_tpu.yaml b/official/vision/beta/projects/yolo/configs/experiments/scaled-yolo/detection/yolo_l_p7_1536_tpu.yaml new file mode 100644 index 00000000000..4295d1ce828 --- /dev/null +++ b/official/vision/beta/projects/yolo/configs/experiments/scaled-yolo/detection/yolo_l_p7_1536_tpu.yaml @@ -0,0 +1,80 @@ +# --experiment_type=large_yolo +# mAP 54.6% +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'float32' + tpu_enable_xla_dynamic_padder: false +task: + model: + input_size: [1536, 1536, 3] + backbone: + type: 'darknet' + darknet: + model_id: 'csp-large' + max_level: 7 + min_level: 3 + width_scale: 1.25 + depth_scale: 1.00 + decoder: + type: yolo_decoder + yolo_decoder: + version: v4 + type: csp_large + head: + smart_bias: true + detection_generator: + box_type: + 'all': scaled + scale_xy: + 'all': 2.0 + max_boxes: 300 + nms_type: iou + iou_thresh: 0.001 + nms_thresh: 0.65 + loss: + use_scaled_loss: true + update_on_repeat: true + box_loss_type: + 'all': ciou + ignore_thresh: + 'all': 0.0 + iou_normalizer: + 'all': 0.05 + cls_normalizer: + 'all': 0.5 + object_normalizer: + '7': 0.1 + '6': 0.4 + '5': 0.5 + '4': 1.0 + '3': 4.0 + objectness_smooth: + 'all': 1.0 + norm_activation: + use_sync_bn: true + num_classes: 80 + anchor_boxes: + anchors_per_scale: 4 + boxes: [box: [13,17], box: [22,25], box: [55,41], box: [27,66], + box: [57,88], box: [112,69], box: [69,177], box: [136,138], + box: [136,138], box: [287,114], box: [134,275], box: [268,248], + box: [268,248], box: [232,504], box: [445,416], box: [640,640], + box: [812,393], box: [477,808], box: [1070,908], box: [1408,1408]] + train_data: + input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/train*' + shuffle_buffer_size: 10000 + parser: + mosaic: + mosaic_frequency: 1.0 + mixup_frequency: 0.0 + mosaic_crop_mode: 'scale' + mosaic_center: 0.0 + aug_scale_min: 0.5 + aug_scale_max: 1.5 + max_num_instances: 300 + letter_box: true + random_flip: true + aug_rand_translate: 0.5 + area_thresh: 0.1 + validation_data: + input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/val*' diff --git a/official/vision/beta/projects/yolo/configs/experiments/scaled-yolo/detection/yolo_tiny_416_tpu.yaml b/official/vision/beta/projects/yolo/configs/experiments/scaled-yolo/detection/yolo_tiny_416_tpu.yaml new file mode 100755 index 00000000000..f31ecd0bb16 --- /dev/null +++ b/official/vision/beta/projects/yolo/configs/experiments/scaled-yolo/detection/yolo_tiny_416_tpu.yaml @@ -0,0 +1,75 @@ +# --experiment_type=yolo_darknet +# mAP 43.0 +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + model: + input_size: [416, 416, 3] + backbone: + type: 'darknet' + darknet: + model_id: 'cspdarknettiny' + max_level: 5 + min_level: 4 + decoder: + type: yolo_decoder + yolo_decoder: + version: v4 + type: tiny + head: + smart_bias: true + detection_generator: + box_type: + 'all': scaled + scale_xy: + 'all': 2.0 + max_boxes: 300 + nms_type: iou + iou_thresh: 0.001 + nms_thresh: 0.65 + loss: + use_scaled_loss: true + update_on_repeat: true + box_loss_type: + 'all': ciou + ignore_thresh: + 'all': 0.0 + iou_normalizer: + 'all': 0.05 + cls_normalizer: + 'all': 0.3 + object_normalizer: + '5': 0.28 + '4': 0.70 + '3': 2.80 + objectness_smooth: + 'all': 1.0 + norm_activation: + activation: leaky + norm_epsilon: 0.0001 + norm_momentum: 0.97 + use_sync_bn: true + num_classes: 80 + anchor_boxes: + anchors_per_scale: 3 + boxes: [box: [10, 14], box: [23, 27], box: [37, 58], + box: [81, 82], box: [135, 169], box: [344, 319]] + train_data: + input_path: 'gs://cam2-datasets/coco/train*' + shuffle_buffer_size: 10000 + parser: + mosaic: + mosaic_frequency: 1.0 + mixup_frequency: 0.0 + mosaic_crop_mode: 'scale' + mosaic_center: 0.25 + aug_scale_min: 0.1 + aug_scale_max: 1.9 + max_num_instances: 300 + letter_box: true + random_flip: true + aug_rand_translate: 0.1 + area_thresh: 0.1 + validation_data: + input_path: 'gs://cam2-datasets/coco/val*' diff --git a/official/vision/beta/projects/yolo/configs/experiments/yolov4-csp/tpu/640.yaml b/official/vision/beta/projects/yolo/configs/experiments/scaled-yolo/tpu/640.yaml similarity index 100% rename from official/vision/beta/projects/yolo/configs/experiments/yolov4-csp/tpu/640.yaml rename to official/vision/beta/projects/yolo/configs/experiments/scaled-yolo/tpu/640.yaml diff --git a/official/vision/beta/projects/yolo/configs/experiments/yolov4/tpu/512.yaml b/official/vision/beta/projects/yolo/configs/experiments/yolov4/detection/yolov4_512_tpu.yaml similarity index 100% rename from official/vision/beta/projects/yolo/configs/experiments/yolov4/tpu/512.yaml rename to official/vision/beta/projects/yolo/configs/experiments/yolov4/detection/yolov4_512_tpu.yaml diff --git a/official/vision/beta/projects/yolo/configs/experiments/yolov4/detection/yolov4_tiny_416_tpu.yaml b/official/vision/beta/projects/yolo/configs/experiments/yolov4/detection/yolov4_tiny_416_tpu.yaml new file mode 100755 index 00000000000..c992bc5f8e1 --- /dev/null +++ b/official/vision/beta/projects/yolo/configs/experiments/yolov4/detection/yolov4_tiny_416_tpu.yaml @@ -0,0 +1,102 @@ +# --experiment_type=yolo_darknet +# mAP 43.0 +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + smart_bias_lr: 0.0 + model: + darknet_based_model: true + input_size: [416, 416, 3] + backbone: + type: 'darknet' + darknet: + model_id: 'cspdarknettiny' + max_level: 5 + min_level: 4 + decoder: + type: yolo_decoder + yolo_decoder: + version: v4 + type: tiny + head: + smart_bias: true + detection_generator: + box_type: + 'all': original + scale_xy: + '5': 1.05 + '4': 1.05 + max_boxes: 300 + nms_type: iou + iou_thresh: 0.001 + nms_thresh: 0.60 + loss: + use_scaled_loss: false + box_loss_type: + 'all': ciou + ignore_thresh: + 'all': 0.7 + iou_normalizer: + 'all': 0.07 + cls_normalizer: + 'all': 1.0 + object_normalizer: + 'all': 1.0 + objectness_smooth: + 'all': 0.0 + max_delta: + 'all': 5.0 + norm_activation: + activation: leaky + norm_epsilon: 0.0001 + norm_momentum: 0.99 + use_sync_bn: true + num_classes: 80 + anchor_boxes: + anchors_per_scale: 3 + boxes: [box: [10, 14], box: [23, 27], box: [37, 58], + box: [81, 82], box: [135, 169], box: [344, 319]] + train_data: + global_batch_size: 256 + shuffle_buffer_size: 10000 + dtype: float32 + input_path: 'gs://cam2-datasets/coco/train*' + is_training: true + drop_remainder: true + parser: + mosaic: + mosaic_frequency: 0.75 + mixup_frequency: 0.0 + mosaic_crop_mode: 'crop' + mosaic_center: 0.2 + aug_scale_min: 0.7 + aug_scale_max: 1.3 + jitter: 0.3 + max_num_instances: 200 + letter_box: false + random_flip: true + aug_rand_saturation: 1.5 + aug_rand_brightness: 1.5 + aug_rand_hue: 0.1 + aug_scale_min: 0.5 + aug_scale_max: 1.5 + aug_rand_translate: 0.0 + jitter: 0.3 + area_thresh: 0.1 + random_pad: true + use_tie_breaker: true + anchor_thresh: 0.4 + validation_data: + global_batch_size: 8 + dtype: float32 + input_path: 'gs://cam2-datasets/coco/val*' + is_training: false + drop_remainder: true + parser: + max_num_instances: 200 + letter_box: false + use_tie_breaker: true + anchor_thresh: 0.4 + weight_decay: 0.000 + annotation_file: null diff --git a/official/vision/beta/projects/yolo/configs/experiments/yolov4/imagenet_pretraining/cspdarknet53_256_tpu.yaml b/official/vision/beta/projects/yolo/configs/experiments/yolov4/imagenet_pretraining/cspdarknet53_256_tpu.yaml new file mode 100644 index 00000000000..10dbdc56855 --- /dev/null +++ b/official/vision/beta/projects/yolo/configs/experiments/yolov4/imagenet_pretraining/cspdarknet53_256_tpu.yaml @@ -0,0 +1,51 @@ +runtime: + distribution_strategy: 'mirrored' + mixed_precision_dtype: 'float32' +task: + model: + num_classes: 1001 + input_size: [256, 256, 3] + backbone: + type: 'darknet' + darknet: + model_id: 'cspdarknet53' + norm_activation: + activation: 'mish' + losses: + l2_weight_decay: 0.0005 + one_hot: true + label_smoothing: 0.1 + train_data: + input_path: 'imagenet-2012-tfrecord/train*' + is_training: true + global_batch_size: 128 + dtype: 'float16' + validation_data: + input_path: 'imagenet-2012-tfrecord/valid*' + is_training: true + global_batch_size: 128 + dtype: 'float16' + drop_remainder: false +trainer: + train_steps: 1200000 # epochs: 120 + validation_steps: 400 # size of validation data + validation_interval: 10000 + steps_per_loop: 10000 + summary_interval: 10000 + checkpoint_interval: 10000 + optimizer_config: + optimizer: + type: 'sgd' + sgd: + momentum: 0.9 + learning_rate: + type: 'polynomial' + polynomial: + initial_learning_rate: 0.1 + end_learning_rate: 0.0001 + power: 4.0 + decay_steps: 1200000 + warmup: + type: 'linear' + linear: + warmup_steps: 1000 # learning rate rises from 0 to 0.1 over 1000 steps diff --git a/official/vision/beta/projects/yolo/configs/experiments/yolox/yolov4_512_tpu.yaml b/official/vision/beta/projects/yolo/configs/experiments/yolox/yolov4_512_tpu.yaml new file mode 100644 index 00000000000..e918d34581b --- /dev/null +++ b/official/vision/beta/projects/yolo/configs/experiments/yolox/yolov4_512_tpu.yaml @@ -0,0 +1,135 @@ +# --experiment_type=yolo_darknet +# mAP 43.0 +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + smart_bias_lr: 0.0 + model: + darknet_based_model: true + input_size: [512, 512, 3] + backbone: + type: 'darknet' + darknet: + model_id: 'cspdarknet53' + max_level: 5 + min_level: 3 + decoder: + type: yolo_decoder + yolo_decoder: + version: v4 + type: regular + activation: leaky + head: + smart_bias: true + detection_generator: + box_type: + 'all': original + scale_xy: + '5': 1.05 + '4': 1.1 + '3': 1.2 + max_boxes: 200 + nms_type: iou + iou_thresh: 0.001 + nms_thresh: 0.60 + loss: + use_scaled_loss: false + box_loss_type: + 'all': ciou + ignore_thresh: + 'all': 0.7 + iou_normalizer: + 'all': 0.07 + cls_normalizer: + 'all': 1.0 + object_normalizer: + 'all': 1.0 + objectness_smooth: + 'all': 0.0 + max_delta: + 'all': 5.0 + norm_activation: + activation: mish + norm_epsilon: 0.0001 + norm_momentum: 0.99 + use_sync_bn: true + num_classes: 80 + anchor_boxes: + anchors_per_scale: 3 + boxes: [box: [12, 16], box: [19, 36], box: [40, 28], + box: [36, 75], box: [76, 55], box: [72, 146], + box: [142, 110], box: [192, 243], box: [459, 401]] + train_data: + global_batch_size: 64 + dtype: float32 + input_path: 'gs://cam2-datasets/coco/train*' + is_training: true + drop_remainder: true + seed: 1000 + parser: + mosaic: + mosaic_frequency: 0.75 + mixup_frequency: 0.0 + mosaic_crop_mode: 'crop' + mosaic_center: 0.2 + aug_scale_min: 0.2 + aug_scale_max: 1.6 + jitter: 0.3 + max_num_instances: 200 + letter_box: false + random_flip: true + aug_rand_saturation: 1.5 + aug_rand_brightness: 1.5 + aug_rand_hue: 0.1 + aug_scale_min: 0.1 + aug_scale_max: 1.9 + aug_rand_translate: 0.0 + jitter: 0.3 + area_thresh: 0.1 + random_pad: true + use_tie_breaker: true + anchor_thresh: 0.4 + validation_data: + global_batch_size: 8 + dtype: float32 + input_path: 'gs://cam2-datasets/coco/val*' + is_training: false + drop_remainder: true + parser: + max_num_instances: 200 + letter_box: false + use_tie_breaker: true + anchor_thresh: 0.4 + weight_decay: 0.000 +trainer: + train_steps: 555000 + validation_steps: 625 + steps_per_loop: 1850 + summary_interval: 1850 + validation_interval: 9250 + checkpoint_interval: 1850 + optimizer_config: + ema: + average_decay: 0.9998 + trainable_weights_only: false + dynamic_decay: true + learning_rate: + type: stepwise + stepwise: + boundaries: [400000] + name: PiecewiseConstantDecay + values: [0.00131, 0.000131] + optimizer: + type: sgd_torch + sgd_torch: + momentum: 0.949 + momentum_start: 0.949 + nesterov: true + warmup_steps: 1000 + weight_decay: 0.0005 + name: SGD + warmup: + type: 'linear' + linear: + warmup_steps: 1000 # learning rate rises from 0 to 0.0013 over 1000 steps diff --git a/official/vision/beta/projects/yolo/configs/experiments/yolox/yolox.yaml b/official/vision/beta/projects/yolo/configs/experiments/yolox/yolox.yaml new file mode 100644 index 00000000000..1b3ae51979f --- /dev/null +++ b/official/vision/beta/projects/yolo/configs/experiments/yolox/yolox.yaml @@ -0,0 +1,92 @@ +# --experiment_type=yolox +# mAP +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'float32' + tpu_enable_xla_dynamic_padder: false +task: + model: + input_size: [640, 640, 3] + backbone: + type: 'darknet' + darknet: + model_id: 'darknet53' + max_level: 5 + min_level: 3 + decoder: + type: yolo_decoder + yolo_decoder: + version: vx + type: regular + head: + smart_bias: true + detection_generator: + box_type: + 'all': anchor_free + scale_xy: + 'all': 2.0 + max_boxes: 300 + nms_type: iou + iou_thresh: 0.001 + nms_thresh: 0.65 + loss: + use_scaled_loss: true + update_on_repeat: true + box_loss_type: + 'all': ciou + ignore_thresh: + 'all': 0.0 + iou_normalizer: + 'all': 0.05 + cls_normalizer: + 'all': 0.3 + object_normalizer: + '5': 0.28 + '4': 0.70 + '3': 2.80 + objectness_smooth: + 'all': 1.0 + norm_activation: + use_sync_bn: true + num_classes: 80 + anchor_boxes: + level_limits: [64, 128] + anchors_per_scale: 1 + train_data: + input_path: 'gs://cam2-datasets/coco/train*' + shuffle_buffer_size: 10000 + parser: + mosaic: + mosaic_frequency: 1.0 + mixup_frequency: 0.2 + mosaic_crop_mode: 'scale' + mosaic_center: 0.25 + aug_scale_min: 0.1 + aug_scale_max: 1.9 + max_num_instances: 300 + letter_box: true + random_flip: true + aug_rand_translate: 0.1 + area_thresh: 0.1 + validation_data: + input_path: 'gs://cam2-datasets/coco/val*' +trainer: + train_steps: 277200 # epoch 300 + optimizer_config: + learning_rate: + cosine: + initial_learning_rate: 0.1 + decay_steps: 277200 # epoch 300 + optimizer: + type: sgd_torch + sgd_torch: + momentum: 0.9 + momentum_start: 0.9 + nesterov: true + warmup_steps: 4620 + weight_decay: 0.0005 + name: SGD + warmup: + type: 'linear' + linear: + warmup_steps: 4620 # 5 epochs diff --git a/official/vision/beta/projects/yolo/configs/head.py b/official/vision/beta/projects/yolo/configs/head.py new file mode 100644 index 00000000000..0014b3e9a09 --- /dev/null +++ b/official/vision/beta/projects/yolo/configs/head.py @@ -0,0 +1,41 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Decoders configurations.""" +import dataclasses +from typing import Optional + +from official.modeling import hyperparams +from official.vision.beta.configs import head + + +@dataclasses.dataclass +class YOLOXHead(hyperparams.Config): + """Builds Yolo decoder. + + If the name is specified, or version is specified we ignore input parameters + and use version and name defaults. + """ + num_classes: Optional[str] = None + width: Optional[str] = 1.0 + strides: Optional[str] = [8, 16, 32] + in_channels: Optional[str] = [256, 512, 1024] + depthwise: Optional[str] = False + activation: Optional[str] = 'silu' + + +@dataclasses.dataclass +class Head(decoders.Decoder): + type: Optional[str] = 'yolox_head' + yolox_head: YOLOXHead = YOLOXHead() diff --git a/official/vision/beta/projects/yolo/configs/yolo.py b/official/vision/beta/projects/yolo/configs/yolo.py index 37af5e73940..6435d09f708 100755 --- a/official/vision/beta/projects/yolo/configs/yolo.py +++ b/official/vision/beta/projects/yolo/configs/yolo.py @@ -24,9 +24,7 @@ from official.modeling import hyperparams from official.vision.beta.configs import common from official.vision.beta.projects.yolo import optimization -from official.vision.beta.projects.yolo.configs import backbones -from official.vision.beta.projects.yolo.configs import decoders - +from official.vision.beta.projects.yolo.configs import backbones, decoders # pytype: disable=annotation-type-mismatch @@ -137,6 +135,15 @@ class YoloHead(hyperparams.Config): smart_bias: bool = True +@dataclasses.dataclass +class YoloxHead(hyperparams.Config): + """Parameterization for the YOLOX Head.""" + width: float = 1.0 + depthwise: bool = False + activation: str = 'silu' + smart_bias: bool = True + + @dataclasses.dataclass class YoloDetectionGenerator(hyperparams.Config): box_type: FPNConfig = dataclasses.field( @@ -173,8 +180,31 @@ class YoloLoss(hyperparams.Config): label_smoothing: float = 0.0 use_scaled_loss: bool = True update_on_repeat: bool = True - + +@dataclasses.dataclass +class YoloxLoss(hyperparams.Config): + ignore_thresh: FPNConfig = dataclasses.field( + default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, 0.0)) + truth_thresh: FPNConfig = dataclasses.field( + default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, 1.0)) + box_loss_type: FPNConfig = dataclasses.field( + default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, 'iou')) + iou_normalizer: FPNConfig = dataclasses.field( + default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, 1.0)) + cls_normalizer: FPNConfig = dataclasses.field( + default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, 1.0)) + object_normalizer: FPNConfig = dataclasses.field( + default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, 1.0)) + max_delta: FPNConfig = dataclasses.field( + default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, np.inf)) + objectness_smooth: FPNConfig = dataclasses.field( + default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, 0.0)) + label_smoothing: float = 0.0 + use_scaled_loss: bool = True + update_on_repeat: bool = True + + @dataclasses.dataclass class Box(hyperparams.Config): box: List[int] = dataclasses.field(default=list) @@ -233,6 +263,28 @@ class Yolo(hyperparams.Config): darknet_based_model: bool = False +@dataclasses.dataclass +class Yolox(hyperparams.Config): + input_size: Optional[List[int]] = dataclasses.field( + default_factory=lambda: [640, 640, 3]) + backbone: backbones.Backbone = backbones.Backbone( + type='darknet', darknet=backbones.Darknet(model_id='darknet53')) + decoder: decoders.Decoder = decoders.Decoder( + type='yolo_decoder', + yolo_decoder=decoders.YoloDecoder(version='vx', type='regular')) + head: YoloxHead = YoloxHead() + detection_generator: YoloDetectionGenerator = YoloDetectionGenerator() + loss: YoloLoss = YoloLoss() + norm_activation: common.NormActivation = common.NormActivation( + activation='mish', + use_sync_bn=True, + norm_momentum=0.99, + norm_epsilon=0.001) + num_classes: int = 80 + anchor_boxes: AnchorBoxes = AnchorBoxes() + darknet_based_model: bool = False + + @dataclasses.dataclass class YoloTask(cfg.TaskConfig): per_category_metrics: bool = False @@ -483,7 +535,7 @@ def scaled_yolo() -> cfg.ExperimentConfig: 'momentum_start': 0.8, 'nesterov': True, 'warmup_steps': steps_per_epoch * warmup_epochs, - 'weight_decay': 0.0005 * train_batch_size / 64.0, + 'weight_decay': 0.0005, } }, 'learning_rate': { @@ -508,3 +560,341 @@ def scaled_yolo() -> cfg.ExperimentConfig: ]) return config + + +@exp_factory.register_config_factory('large_yolo') +def large_yolo() -> cfg.ExperimentConfig: + """COCO object detection with YOLOv4-csp and v4.""" + train_batch_size = 64 + eval_batch_size = 8 + train_epochs = 300 + fine_tune_epochs = 450 + warmup_epochs = 3 + + validation_interval = 5 + steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size + + max_num_instances = 300 + + config = cfg.ExperimentConfig( + runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), + task=YoloTask( + smart_bias_lr=0.1, + init_checkpoint_modules='', + annotation_file=None, + weight_decay=0.0, + model=Yolo( + darknet_based_model=False, + norm_activation=common.NormActivation( + activation='mish', + use_sync_bn=True, + norm_epsilon=0.0001, + norm_momentum=0.97), + head=YoloHead(smart_bias=True), + loss=YoloLoss(use_scaled_loss=True)), + train_data=DataConfig( + input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'), + is_training=True, + global_batch_size=train_batch_size, + dtype='float32', + parser=Parser( + aug_rand_saturation=0.7, + aug_rand_brightness=0.4, + aug_rand_hue=0.015, + letter_box=True, + use_tie_breaker=True, + best_match_only=True, + anchor_thresh=4.0, + random_pad=False, + area_thresh=0.1, + max_num_instances=max_num_instances, + mosaic=Mosaic( + mosaic_crop_mode='scale', + mosaic_frequency=1.0, + mixup_frequency=0.0, + ))), + validation_data=DataConfig( + input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'), + is_training=False, + global_batch_size=eval_batch_size, + drop_remainder=True, + dtype='float32', + parser=Parser( + letter_box=True, + use_tie_breaker=True, + best_match_only=True, + anchor_thresh=4.0, + area_thresh=0.1, + max_num_instances=max_num_instances, + ))), + trainer=cfg.TrainerConfig( + train_steps=train_epochs * steps_per_epoch, + validation_steps=COCO_VAL_EXAMPLES // eval_batch_size, + validation_interval=validation_interval * steps_per_epoch, + steps_per_loop=steps_per_epoch, + summary_interval=steps_per_epoch, + checkpoint_interval=steps_per_epoch, + optimizer_config=optimization.OptimizationConfig({ + 'ema': { + 'average_decay': 0.9999, + 'trainable_weights_only': False, + 'dynamic_decay': True, + }, + 'optimizer': { + 'type': 'sgd_torch', + 'sgd_torch': { + 'momentum': 0.937, + 'momentum_start': 0.9, + 'nesterov': True, + 'warmup_steps': steps_per_epoch * warmup_epochs, + 'weight_decay': 0.0005, + } + }, + 'learning_rate': { + 'type': 'cosine', + 'cosine': { + 'initial_learning_rate': 0.01, + 'alpha': 0.2, + 'decay_steps': fine_tune_epochs * steps_per_epoch, + } + }, + 'warmup': { + 'type': 'linear', + 'linear': { + 'warmup_steps': steps_per_epoch * warmup_epochs, + 'warmup_learning_rate': 0 + } + } + })), + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None' + ]) + + return config + + +@exp_factory.register_config_factory('yolo_tiny') +def yolo_tiny() -> cfg.ExperimentConfig: + """COCO object detection with YOLOv3 and v4.""" + train_batch_size = 256 + eval_batch_size = 8 + train_epochs = 600 + steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size + validation_interval = 10 + + max_num_instances = 200 + config = cfg.ExperimentConfig( + runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), + task=YoloTask( + smart_bias_lr=0.1, + init_checkpoint='', + init_checkpoint_modules='backbone', + annotation_file=None, + weight_decay=0.0, + model=Yolo( + darknet_based_model=True, + norm_activation=common.NormActivation(use_sync_bn=True, + activation="leaky"), + head=YoloHead(smart_bias=True), + loss=YoloLoss(use_scaled_loss=False, update_on_repeat=True)), + train_data=DataConfig( + input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'), + is_training=True, + global_batch_size=train_batch_size, + dtype='float32', + parser=Parser( + letter_box=False, + aug_rand_saturation=1.5, + aug_rand_brightness=1.5, + aug_rand_hue=0.1, + use_tie_breaker=True, + best_match_only=False, + anchor_thresh=0.4, + area_thresh=0.1, + max_num_instances=max_num_instances, + mosaic=Mosaic( + mosaic_frequency=0.75, + mixup_frequency=0.0, + mosaic_crop_mode='crop', + mosaic_center=0.2))), + validation_data=DataConfig( + input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'), + is_training=False, + global_batch_size=eval_batch_size, + drop_remainder=True, + dtype='float32', + parser=Parser( + letter_box=False, + use_tie_breaker=True, + best_match_only=False, + anchor_thresh=0.4, + area_thresh=0.1, + max_num_instances=max_num_instances, + ))), + trainer=cfg.TrainerConfig( + train_steps=train_epochs * steps_per_epoch, + validation_steps=COCO_VAL_EXAMPLES // eval_batch_size, + validation_interval=validation_interval * steps_per_epoch, + steps_per_loop=steps_per_epoch, + summary_interval=steps_per_epoch, + checkpoint_interval=steps_per_epoch, + optimizer_config=optimization.OptimizationConfig({ + 'ema': { + 'average_decay': 0.9998, + 'trainable_weights_only': False, + 'dynamic_decay': True, + }, + 'optimizer': { + 'type': 'sgd_torch', + 'sgd_torch': { + 'momentum': 0.9, + 'momentum_start': 0.9, + 'nesterov': True, + 'warmup_steps': 1000, + 'weight_decay': 0.0005, + } + }, + 'learning_rate': { + 'type': 'stepwise', + 'stepwise': { + 'boundaries': [ + 0.8 * train_epochs * steps_per_epoch + ], + 'values': [ + 0.00261 * train_batch_size / 64.0, + 0.000261 * train_batch_size / 64.0, + ] + } + }, + 'warmup': { + 'type': 'linear', + 'linear': { + 'warmup_steps': 1000, + 'warmup_learning_rate': 0 + } + } + })), + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None' + ]) + + return config + + +@exp_factory.register_config_factory('yolox_regular') +def yolox_regular() -> cfg.ExperimentConfig: + """COCO object detection with YOLOvx.""" + train_batch_size = 128 + eval_batch_size = 8 + train_epochs = 300 + steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size + validation_interval = 5 + + max_num_instances = 200 + config = cfg.ExperimentConfig( + runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), + task=YoloTask( + smart_bias_lr=0.1, + init_checkpoint='', + init_checkpoint_modules='backbone', + annotation_file=None, + weight_decay=0.0, + model=Yolox( + darknet_based_model=True, + norm_activation=common.NormActivation(use_sync_bn=True), + head=YoloxHead(smart_bias=True), + loss=YoloLoss(use_scaled_loss=True, update_on_repeat=True), + anchor_boxes=AnchorBoxes( + anchors_per_scale=3, + boxes=[ + Box(box=[12, 16]), + Box(box=[19, 36]), + Box(box=[40, 28]), + Box(box=[36, 75]), + Box(box=[76, 55]), + Box(box=[72, 146]), + Box(box=[142, 110]), + Box(box=[192, 243]), + Box(box=[459, 401]) + ])), + train_data=DataConfig( + input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'), + is_training=True, + global_batch_size=train_batch_size, + dtype='float32', + parser=Parser( + letter_box=False, + aug_rand_saturation=1.5, + aug_rand_brightness=1.5, + aug_rand_hue=0.1, + use_tie_breaker=True, + best_match_only=False, + anchor_thresh=0.4, + area_thresh=0.1, + max_num_instances=max_num_instances, + mosaic=Mosaic( + mosaic_frequency=0.75, + mixup_frequency=0.0, + mosaic_crop_mode='crop', + mosaic_center=0.2))), + validation_data=DataConfig( + input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'), + is_training=False, + global_batch_size=eval_batch_size, + drop_remainder=True, + dtype='float32', + parser=Parser( + letter_box=False, + use_tie_breaker=True, + best_match_only=False, + anchor_thresh=0.4, + area_thresh=0.1, + max_num_instances=max_num_instances, + ))), + trainer=cfg.TrainerConfig( + train_steps=train_epochs * steps_per_epoch, + validation_steps=COCO_VAL_EXAMPLES // eval_batch_size, + validation_interval=validation_interval * steps_per_epoch, + steps_per_loop=steps_per_epoch, + summary_interval=steps_per_epoch, + checkpoint_interval=steps_per_epoch, + optimizer_config=optimization.OptimizationConfig({ + 'ema': { + 'average_decay': 0.9998, + 'trainable_weights_only': False, + 'dynamic_decay': True, + }, + 'optimizer': { + 'type': 'sgd_torch', + 'sgd_torch': { + 'momentum': 0.949, + 'momentum_start': 0.949, + 'nesterov': True, + 'warmup_steps': 1000, + 'weight_decay': 0.0005, + } + }, + 'learning_rate': { + 'type': 'cosine', + 'cosine': { + 'initial_learning_rate': 0.02, + 'alpha': 0.2, + 'decay_steps': train_epochs * steps_per_epoch, + } + }, + 'warmup': { + 'type': 'linear', + 'linear': { + 'warmup_steps': 5 * steps_per_epoch, + 'warmup_learning_rate': 0 + } + } + })), + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None' + ]) + + return config diff --git a/official/vision/beta/projects/yolo/dataloaders/__init__.py b/official/vision/beta/projects/yolo/dataloaders/__init__.py index a25710c222e..e04127d3fc8 100644 --- a/official/vision/beta/projects/yolo/dataloaders/__init__.py +++ b/official/vision/beta/projects/yolo/dataloaders/__init__.py @@ -11,5 +11,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - - diff --git a/official/vision/beta/projects/yolo/dataloaders/classification_input.py b/official/vision/beta/projects/yolo/dataloaders/classification_input.py index 57d7ec2382a..07498eb6476 100755 --- a/official/vision/beta/projects/yolo/dataloaders/classification_input.py +++ b/official/vision/beta/projects/yolo/dataloaders/classification_input.py @@ -14,6 +14,7 @@ """Classification decoder and parser.""" import tensorflow as tf + from official.vision.beta.dataloaders import classification_input from official.vision.beta.ops import preprocess_ops diff --git a/official/vision/beta/projects/yolo/dataloaders/yolo_input.py b/official/vision/beta/projects/yolo/dataloaders/yolo_input.py index a9953ce1e8d..bda46d15382 100755 --- a/official/vision/beta/projects/yolo/dataloaders/yolo_input.py +++ b/official/vision/beta/projects/yolo/dataloaders/yolo_input.py @@ -15,12 +15,10 @@ """Detection Data parser and processing for YOLO.""" import tensorflow as tf -from official.vision.beta.dataloaders import parser -from official.vision.beta.dataloaders import utils +from official.vision.beta.dataloaders import parser, utils from official.vision.beta.ops import box_ops as bbox_ops from official.vision.beta.ops import preprocess_ops -from official.vision.beta.projects.yolo.ops import anchor -from official.vision.beta.projects.yolo.ops import preprocessing_ops +from official.vision.beta.projects.yolo.ops import anchor, preprocessing_ops class Parser(parser.Parser): @@ -237,14 +235,14 @@ def _parse_train_data(self, data): affine=affine, shuffle_boxes=False, area_thresh=self._area_thresh, - augment=True, + filter_and_clip_boxes=True, seed=self._seed) classes = tf.gather(classes, inds) info = infos[-1] else: image = tf.image.resize( image, (self._image_h, self._image_w), method='nearest') - output_size = tf.cast([640, 640], tf.float32) + output_size = tf.cast([self._image_h, self._image_w], tf.float32) boxes_ = bbox_ops.denormalize_boxes(boxes, output_size) inds = bbox_ops.get_non_empty_box_indices(boxes_) boxes = tf.gather(boxes, inds) @@ -286,7 +284,8 @@ def _parse_eval_data(self, data): # Clip and clean boxes. image = image / 255.0 boxes, inds = preprocessing_ops.transform_and_clip_boxes( - boxes, infos, shuffle_boxes=False, area_thresh=0.0, augment=True) + boxes, infos, shuffle_boxes=False, area_thresh=0.0, + filter_and_clip_boxes=False) classes = tf.gather(classes, inds) info = infos[-1] @@ -342,17 +341,17 @@ def _build_label(self, # Update the labels dictionary. if not is_training: - # Sets up groundtruth data for evaluation. groundtruths = { 'source_id': labels['source_id'], - 'height': height, - 'width': width, - 'num_detections': tf.shape(gt_boxes)[0], + 'height': data["height"], + 'width': data["width"], + 'num_detections': tf.shape(data["groundtruth_boxes"])[0], 'image_info': info, - 'boxes': gt_boxes, - 'classes': gt_classes, - 'areas': tf.gather(data['groundtruth_area'], inds), + 'boxes': bbox_ops.denormalize_boxes(data["groundtruth_boxes"], + tf.cast([data["height"], data["width"]], gt_boxes.dtype)), + 'classes': data["groundtruth_classes"], + 'areas': data["groundtruth_area"], 'is_crowds': tf.cast(tf.gather(data['groundtruth_is_crowd'], inds), tf.int32), } diff --git a/official/vision/beta/projects/yolo/dataloaders/yolo_input_test.py b/official/vision/beta/projects/yolo/dataloaders/yolo_input_test.py new file mode 100644 index 00000000000..8c9003a0bfa --- /dev/null +++ b/official/vision/beta/projects/yolo/dataloaders/yolo_input_test.py @@ -0,0 +1,122 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Yolo Dataset Testing functions""" +import os + +import tensorflow as tf +from absl.testing import parameterized + +from official.core import task_factory, train_utils +from official.vision.beta.projects.yolo.common import \ + registry_imports # pylint: disable=unused-import +from official.vision.beta.projects.yolo.configs import \ + darknet_classification as dcfg +from official.vision.beta.projects.yolo.tasks import \ + image_classification as imc + +PATH_TO_COCO = '/media/vbanna/DATA_SHARE/CV/datasets/COCO_raw/records/' + +def test_yolo_input_task(scaled_pipeline = True, batch_size = 1): + if not scaled_pipeline: + experiment = "yolo_darknet" + config_path = [ + "official/vision/beta/projects/yolo/configs/experiments/yolov4/detection/yolov4_512_tpu.yaml"] + else: + experiment = "large_yolo" + # config_path = [ + # "official/vision/beta/projects/yolo/configs/experiments/scaled-yolo/detection/yolo_l_p6_1280_tpu.yaml"] + config_path = [ + "official/vision/beta/projects/yolo/configs/experiments/scaled-yolo/detection/yolo_l_p7_1536_tpu.yaml"] + + config = train_utils.ParseConfigOptions(experiment=experiment, + config_file=config_path) + params = train_utils.parse_configuration(config) + config = params.task + task = task_factory.get_task(params.task) + + config.train_data.global_batch_size = batch_size + config.validation_data.global_batch_size = 1 + config.train_data.dtype = 'float32' + config.validation_data.dtype = 'float32' + config.validation_data.shuffle_buffer_size = 1 + config.train_data.shuffle_buffer_size = 1 + config.train_data.input_path = os.path.join(PATH_TO_COCO, 'train*') + config.validation_data.input_path = os.path.join(PATH_TO_COCO, 'val*') + + with tf.device('/CPU:0'): + train_data = task.build_inputs(config.train_data) + test_data = task.build_inputs(config.validation_data) + return train_data, test_data, config + +def test_yolo_pipeline_visually(is_training=True, num=30): + # visualize the datapipeline + import matplotlib.pyplot as plt + dataset, testing, _ = test_yolo_input_task() + + data = dataset if is_training else testing + data = data.take(num) + for l, (image, label) in enumerate(data): + image = tf.image.draw_bounding_boxes( + image, label['bbox'], [[0.0, 1.0, 1.0]]) + + gt = label['true_conf'] + + obj3 = tf.clip_by_value(gt['3'][..., 0], 0.0, 1.0) + obj4 = tf.clip_by_value(gt['4'][..., 0], 0.0, 1.0) + obj5 = tf.clip_by_value(gt['5'][..., 0], 0.0, 1.0) + obj6 = tf.clip_by_value(gt['6'][..., 0], 0.0, 1.0) + obj7 = tf.clip_by_value(gt['7'][..., 0], 0.0, 1.0) + + for shind in range(1): + fig, axe = plt.subplots(2, 4) + + image = image[shind] + + axe[0, 0].imshow(image) + axe[0, 1].imshow(obj3[shind, ..., :3].numpy()) + axe[0, 2].imshow(obj4[shind, ..., :3].numpy()) + axe[0, 3].imshow(obj5[shind, ..., :3].numpy()) + axe[1, 0].imshow(obj6[shind, ..., :3].numpy()) + axe[1, 2].imshow(obj7[shind, ..., :3].numpy()) + axe[1, 1].imshow(obj6[shind, ..., 3].numpy()) + axe[1, 3].imshow(obj7[shind, ..., 3].numpy()) + + fig.set_size_inches(18.5, 6.5, forward=True) + plt.tight_layout() + plt.show() + +class YoloDetectionInputTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters(('scaled', True), ('darknet', False)) + def test_yolo_input(self, scaled_pipeline): + # builds a pipline forom the config and tests the datapipline shapes + # dataset, _, params = test_yolo_input_task( + # scaled_pipeline=scaled_pipeline, + # batch_size=1) + _, dataset, params = test_yolo_input_task( + scaled_pipeline=scaled_pipeline, + batch_size=1) + + dataset = dataset.take(100) + + for image, label in dataset: + self.assertAllEqual(image.shape, ([1] + params.model.input_size)) + self.assertTrue( + tf.reduce_all(tf.math.logical_and(image >= 0, image <= 1))) + + +if __name__ == '__main__': + # tf.test.main() + test_yolo_pipeline_visually(is_training=True, num=20) diff --git a/official/vision/beta/projects/yolo/losses/__init__.py b/official/vision/beta/projects/yolo/losses/__init__.py index e419af524b5..e04127d3fc8 100644 --- a/official/vision/beta/projects/yolo/losses/__init__.py +++ b/official/vision/beta/projects/yolo/losses/__init__.py @@ -11,4 +11,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - diff --git a/official/vision/beta/projects/yolo/losses/yolo_loss.py b/official/vision/beta/projects/yolo/losses/yolo_loss.py index aac117bdf58..d16db20fed0 100755 --- a/official/vision/beta/projects/yolo/losses/yolo_loss.py +++ b/official/vision/beta/projects/yolo/losses/yolo_loss.py @@ -19,9 +19,8 @@ import tensorflow as tf -from official.vision.beta.projects.yolo.ops import box_ops -from official.vision.beta.projects.yolo.ops import loss_utils -from official.vision.beta.projects.yolo.ops import math_ops +from official.vision.beta.projects.yolo.ops import (box_ops, loss_utils, + math_ops) class YoloLossBase(object, metaclass=abc.ABCMeta): diff --git a/official/vision/beta/projects/yolo/losses/yolo_loss_test.py b/official/vision/beta/projects/yolo/losses/yolo_loss_test.py index b9490181269..49af31e94b0 100755 --- a/official/vision/beta/projects/yolo/losses/yolo_loss_test.py +++ b/official/vision/beta/projects/yolo/losses/yolo_loss_test.py @@ -14,8 +14,8 @@ """Tests for yolo heads.""" -from absl.testing import parameterized import tensorflow as tf +from absl.testing import parameterized from official.vision.beta.projects.yolo.losses import yolo_loss diff --git a/official/vision/beta/projects/yolo/modeling/backbones/darknet.py b/official/vision/beta/projects/yolo/modeling/backbones/darknet.py index 7c3086bd66d..d1fe0002af9 100644 --- a/official/vision/beta/projects/yolo/modeling/backbones/darknet.py +++ b/official/vision/beta/projects/yolo/modeling/backbones/darknet.py @@ -36,6 +36,7 @@ """ import collections + import tensorflow as tf from official.modeling import hyperparams @@ -225,7 +226,7 @@ def __call__(self, config, kwargs): False ], [ - 'DarkRes', 'csp', 1, True, 64, None, None, None, None, 'mish', -1, + 'DarkRes', 'csp', 1, False, 64, None, None, None, None, 'mish', -1, 1, 1, False ], [ diff --git a/official/vision/beta/projects/yolo/modeling/backbones/darknet_test.py b/official/vision/beta/projects/yolo/modeling/backbones/darknet_test.py index 9441b06a311..a045768b9c6 100644 --- a/official/vision/beta/projects/yolo/modeling/backbones/darknet_test.py +++ b/official/vision/beta/projects/yolo/modeling/backbones/darknet_test.py @@ -15,12 +15,11 @@ # Lint as: python3 """Tests for yolo.""" -from absl.testing import parameterized import numpy as np import tensorflow as tf +from absl.testing import parameterized +from tensorflow.python.distribute import combinations, strategy_combinations -from tensorflow.python.distribute import combinations -from tensorflow.python.distribute import strategy_combinations from official.vision.beta.projects.yolo.modeling.backbones import darknet diff --git a/official/vision/beta/projects/yolo/modeling/decoders/__init__.py b/official/vision/beta/projects/yolo/modeling/decoders/__init__.py index e419af524b5..e04127d3fc8 100644 --- a/official/vision/beta/projects/yolo/modeling/decoders/__init__.py +++ b/official/vision/beta/projects/yolo/modeling/decoders/__init__.py @@ -11,4 +11,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - diff --git a/official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder.py b/official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder.py index 51c39098861..ac53f2e23f3 100644 --- a/official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder.py +++ b/official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder.py @@ -13,7 +13,7 @@ # limitations under the License. """Feature Pyramid Network and Path Aggregation variants used in YOLO.""" -from typing import Mapping, Union, Optional +from typing import Mapping, Optional, Union import tensorflow as tf @@ -50,8 +50,18 @@ max_level_process_len=None, csp_stack=7, fpn_depth=7, + max_fpn_depth=5, + max_csp_stack=5, path_process_len=8, - fpn_filter_scale=2), + fpn_filter_scale=1), + csp_xlarge=dict( + embed_spp=False, + use_fpn=True, + max_level_process_len=None, + csp_stack=7, + fpn_depth=7, + path_process_len=8, + fpn_filter_scale=1), ), 'v3': dict( @@ -68,9 +78,22 @@ spp=dict( embed_spp=True, use_fpn=False, - max_level_process_len=2, - path_process_len=1), + max_level_process_len=None, + path_process_len=6), ), + 'vx': + dict( + regular=dict( + embed_spp=True, + use_fpn=False, + max_level_process_len=None, + path_process_len=5), + fpn=dict( + embed_spp=True, + use_fpn=True, + max_level_process_len=None, + path_process_len=5), + ), } @@ -87,6 +110,8 @@ class YoloFPN(tf.keras.layers.Layer): def __init__(self, fpn_depth=4, + max_fpn_depth=None, + max_csp_stack=None, use_spatial_attention=False, csp_stack=False, activation='leaky', @@ -104,8 +129,12 @@ def __init__(self, Args: fpn_depth: `int`, number of layers to use in each FPN path if you choose to use an FPN. + max_fpn_depth: `int`, number of layers to use in each FPN path + if you choose to use an FPN along the largest FPN level. use_spatial_attention: `bool`, use the spatial attention module. csp_stack: `bool`, CSPize the FPN. + max_csp_stack: `int`, number of layers to use for CSP on the largest_path + only. activation: `str`, the activation function to use typically leaky or mish. fpn_filter_scale: `int`, scaling factor for the FPN filters. use_sync_bn: if True, use synchronized batch normalization. @@ -121,6 +150,7 @@ def __init__(self, super().__init__(**kwargs) self._fpn_depth = fpn_depth + self._max_fpn_depth = max_fpn_depth or self._fpn_depth self._activation = activation self._use_sync_bn = use_sync_bn @@ -133,6 +163,7 @@ def __init__(self, self._use_spatial_attention = use_spatial_attention self._filter_scale = fpn_filter_scale self._csp_stack = csp_stack + self._max_csp_stack = max_csp_stack or min(self._max_fpn_depth, csp_stack) self._base_config = dict( activation=self._activation, @@ -184,6 +215,7 @@ def build(self, inputs): for level, depth in zip( reversed(range(self._min_level, self._max_level + 1)), self._depths): + if level == self._min_level: self.resamples[str(level)] = nn_blocks.PathAggregationBlock( filters=depth // 2, @@ -211,10 +243,10 @@ def build(self, inputs): else: self.preprocessors[str(level)] = nn_blocks.DarkRouteProcess( filters=depth, - repetitions=self._fpn_depth + 1 * int(self._csp_stack == 0), + repetitions=self._max_fpn_depth + 1 * int(self._csp_stack == 0), insert_spp=True, block_invert=False, - csp_stack=self._csp_stack, + csp_stack=min(self._csp_stack, self._max_fpn_depth), **self._base_config) def call(self, inputs): @@ -349,13 +381,16 @@ def build(self, inputs): downsample = False upsample = True - if self._csp_stack == 0: - proc_filters = lambda x: x - resample_filters = lambda x: x // 2 - else: - proc_filters = lambda x: x * 2 - resample_filters = lambda x: x for level, depth in zip(self._iterator, self._depths): + if level > 5: + proc_filters = lambda x: x * 2 + resample_filters = lambda x: x + elif self._csp_stack == 0: + proc_filters = lambda x: x + resample_filters = lambda x: x // 2 + else: + proc_filters = lambda x: x * 2 + resample_filters = lambda x: x if level == self._input: self.preprocessors[str(level)] = nn_blocks.DarkRouteProcess( filters=proc_filters(depth), @@ -396,7 +431,7 @@ def get_raw_depths(self, minimum_depth, inputs): depths = [] if len(inputs.keys()) > 3 or self._fpn_filter_scale > 1: for i in range(self._min_level, self._max_level + 1): - depths.append(inputs[str(i)][-1] * 2) + depths.append(inputs[str(i)][-1]) else: for _ in range(self._min_level, self._max_level + 1): depths.append(minimum_depth) @@ -429,6 +464,8 @@ def __init__(self, use_spatial_attention=False, csp_stack=False, fpn_depth=4, + max_fpn_depth=None, + max_csp_stack=None, fpn_filter_scale=1, path_process_len=6, max_level_process_len=None, @@ -475,6 +512,8 @@ def __init__(self, self._input_specs = input_specs self._use_fpn = use_fpn self._fpn_depth = fpn_depth + self._max_fpn_depth = max_fpn_depth + self._max_csp_stack = max_csp_stack self._path_process_len = path_process_len self._max_level_process_len = max_level_process_len self._embed_spp = embed_spp @@ -514,8 +553,10 @@ def __init__(self, } if self._use_fpn: inter_outs = YoloFPN( - fpn_depth=self._fpn_depth, **self._base_config)( - inputs) + fpn_depth=self._fpn_depth, + max_fpn_depth=self._max_fpn_depth, + max_csp_stack=self._max_csp_stack, + **self._base_config)(inputs) outputs = YoloPAN(**self._decoder_config)(inter_outs) else: inter_outs = None diff --git a/official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder_test.py b/official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder_test.py index 611c4585945..0e2f764ced7 100644 --- a/official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder_test.py +++ b/official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder_test.py @@ -15,13 +15,13 @@ # Lint as: python3 """Tests for YOLO.""" +import tensorflow as tf # Import libraries from absl.testing import parameterized -import tensorflow as tf +from tensorflow.python.distribute import combinations, strategy_combinations -from tensorflow.python.distribute import combinations -from tensorflow.python.distribute import strategy_combinations -from official.vision.beta.projects.yolo.modeling.decoders import yolo_decoder as decoders +from official.vision.beta.projects.yolo.modeling.decoders import \ + yolo_decoder as decoders class YoloDecoderTest(parameterized.TestCase, tf.test.TestCase): diff --git a/official/vision/beta/projects/yolo/modeling/factory.py b/official/vision/beta/projects/yolo/modeling/factory.py index a841131062a..e6e81719dd1 100644 --- a/official/vision/beta/projects/yolo/modeling/factory.py +++ b/official/vision/beta/projects/yolo/modeling/factory.py @@ -15,13 +15,15 @@ """Contains common factory functions yolo neural networks.""" from absl import logging + from official.vision.beta.modeling.backbones import factory as backbone_factory from official.vision.beta.modeling.decoders import factory as decoder_factory - from official.vision.beta.projects.yolo.configs import yolo from official.vision.beta.projects.yolo.modeling import yolo_model -from official.vision.beta.projects.yolo.modeling.heads import yolo_head -from official.vision.beta.projects.yolo.modeling.layers import detection_generator +from official.vision.beta.projects.yolo.modeling.heads import (yolo_head, + yolox_head) +from official.vision.beta.projects.yolo.modeling.layers import \ + detection_generator def build_yolo_detection_generator(model_config: yolo.Yolo, anchor_boxes): @@ -55,7 +57,9 @@ def build_yolo_head(input_specs, model_config: yolo.Yolo, l2_regularization): """Builds yolo head.""" min_level = min(map(int, input_specs.keys())) max_level = max(map(int, input_specs.keys())) - head = yolo_head.YoloHead( + + if isinstance(model_config, yolo.Yolox): + head = yolox_head.YoloxHead( min_level=min_level, max_level=max_level, classes=model_config.num_classes, @@ -64,6 +68,16 @@ def build_yolo_head(input_specs, model_config: yolo.Yolo, l2_regularization): norm_epsilon=model_config.norm_activation.norm_epsilon, kernel_regularizer=l2_regularization, smart_bias=model_config.head.smart_bias) + else: + head = yolo_head.YoloHead( + min_level=min_level, + max_level=max_level, + classes=model_config.num_classes, + boxes_per_level=model_config.anchor_boxes.anchors_per_scale, + norm_momentum=model_config.norm_activation.norm_momentum, + norm_epsilon=model_config.norm_activation.norm_epsilon, + kernel_regularizer=l2_regularization, + smart_bias=model_config.head.smart_bias) return head diff --git a/official/vision/beta/projects/yolo/modeling/heads/__init__.py b/official/vision/beta/projects/yolo/modeling/heads/__init__.py index e419af524b5..e04127d3fc8 100644 --- a/official/vision/beta/projects/yolo/modeling/heads/__init__.py +++ b/official/vision/beta/projects/yolo/modeling/heads/__init__.py @@ -11,4 +11,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - diff --git a/official/vision/beta/projects/yolo/modeling/heads/yolo_head.py b/official/vision/beta/projects/yolo/modeling/heads/yolo_head.py index 23d41a045e8..7ead787434d 100644 --- a/official/vision/beta/projects/yolo/modeling/heads/yolo_head.py +++ b/official/vision/beta/projects/yolo/modeling/heads/yolo_head.py @@ -15,6 +15,7 @@ """Yolo heads.""" import tensorflow as tf + from official.vision.beta.projects.yolo.modeling.layers import nn_blocks diff --git a/official/vision/beta/projects/yolo/modeling/heads/yolo_head_test.py b/official/vision/beta/projects/yolo/modeling/heads/yolo_head_test.py index 8c5414e5d84..4d5e4af3efb 100644 --- a/official/vision/beta/projects/yolo/modeling/heads/yolo_head_test.py +++ b/official/vision/beta/projects/yolo/modeling/heads/yolo_head_test.py @@ -15,11 +15,12 @@ # Lint as: python3 """Tests for yolo heads.""" +import tensorflow as tf # Import libraries from absl.testing import parameterized -import tensorflow as tf -from official.vision.beta.projects.yolo.modeling.heads import yolo_head as heads +from official.vision.beta.projects.yolo.modeling.heads import \ + yolo_head as heads class YoloDecoderTest(parameterized.TestCase, tf.test.TestCase): diff --git a/official/vision/beta/projects/yolo/modeling/heads/yolox_head.py b/official/vision/beta/projects/yolo/modeling/heads/yolox_head.py new file mode 100644 index 00000000000..c580372f352 --- /dev/null +++ b/official/vision/beta/projects/yolo/modeling/heads/yolox_head.py @@ -0,0 +1,232 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Yolox heads.""" +import tensorflow as tf +from tensorflow.keras.models import Sequential + +from official.vision.beta.projects.yolo.modeling.layers import nn_blocks +from official.vision.beta.projects.yolo.ops import box_ops + + +class YoloxHead(tf.keras.layers.Layer): + """YOLOX Prediction Head.""" + + def __init__( + self, + min_level, + max_level, + classes=80, + boxes_per_level=1, + output_extras=0, + norm_momentum=0.99, + norm_epsilon=0.001, + kernel_initializer='VarianceScaling', + kernel_regularizer=None, + bias_regularizer=None, + activation='silu', + smart_bias=False, + use_separable_conv=False, + width_scaling=1.0, + prior_prob=1e-2, + **kwargs): + + """YoloX Prediction Head initialization function. + + Args: + min_level: `int`, the minimum backbone output level. + max_level: `int`, the maximum backbone output level. + classes: `int`, number of classes per category. + boxes_per_level: `int`, number of boxes to predict per level. + output_extras: `int`, number of additional output channels that the head. + should predict for non-object detection and non-image classification + tasks. + norm_momentum: `float`, normalization momentum for the moving average. + norm_epsilon: `float`, small float added to variance to avoid dividing by + zero. + kernel_initializer: kernel_initializer for convolutional layers. + kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D. + bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d. + activation: `str`, the activation function to use. Default value: "silu". + smart_bias: `bool`, whether to use smart bias. + use_separable_conv: `bool` wether to use separable convs. + width_scaling: `float`, factor by which the filters should be scaled. + prior_prob: 'float', prior probability of custom value between 0.0 and 1. + Defaults to 1e-2. + **kwargs: keyword arguments to be passed. + """ + + super().__init__(**kwargs) + self._min_level = min_level + self._max_level = max_level + + self._key_list = [ + str(key) for key in range(self._min_level, self._max_level + 1) + ] + + self._classes = classes + self._boxes_per_level = boxes_per_level + self._output_extras = output_extras + self._width_scaling = width_scaling + self._smart_bias = smart_bias + self._use_separable_conv = use_separable_conv + self._prior_prob = prior_prob + + self._stems = dict() + + self._bias = -tf.math.log((1 - self._prior_prob) / self._prior_prob) + + self._base_config = dict( + activation=activation, + norm_momentum=norm_momentum, + norm_epsilon=norm_epsilon, + kernel_initializer=kernel_initializer, + kernel_regularizer=kernel_regularizer, + bias_regularizer=bias_regularizer) + + + + def build(self, input_shape): + + self._cls_convs = dict() + self._reg_convs = dict() + + self._cls_preds = dict() + self._reg_preds = dict() + self._obj_preds = dict() + + self._cls_head = dict() + self._obj_head = dict() + self._reg_head = dict() + + for k in self._key_list: + self._stems[k] = nn_blocks.ConvBN( + filters=int(256 * self._width_scaling), + kernel_size=(1, 1), + strides=(1, 1), + use_bn=True, + use_separable_conv=self._use_separable_conv, + **self._base_config, + ) + + self._cls_convs[k] = Sequential( + [ + nn_blocks.ConvBN( + filters=int(256 * self._width_scaling), + kernel_size=(3, 3), + strides=(1, 1), + use_bn=True, + use_separable_conv=self._use_separable_conv, + **self._base_config, + ), + nn_blocks.ConvBN( + filters=int(256 * self._width_scaling), + kernel_size=(3, 3), + strides=(1, 1), + use_bn=True, + use_separable_conv=self._use_separable_conv, + **self._base_config, + ), + ] + ) + + self._reg_convs[k] = Sequential( + [ + nn_blocks.ConvBN( + filters=int(256 * self._width_scaling), + kernel_size=(3, 3), + strides=(1, 1), + use_bn=True, + use_separable_conv=self._use_separable_conv, + **self._base_config, + ), + nn_blocks.ConvBN( + filters=int(256 * self._width_scaling), + kernel_size=(3, 3), + strides=(1, 1), + use_bn=True, + use_separable_conv=self._use_separable_conv, + **self._base_config, + ), + ] + ) + + self._cls_preds[k] = tf.keras.layers.Conv2D( + filters=self._boxes_per_level * self._classes, + kernel_size=(1, 1), + strides=(1, 1), + padding='same', + bias_initializer=tf.keras.initializers.constant(self._bias)) + + self._reg_preds[k] = tf.keras.layers.Conv2D( + filters=4, + kernel_size=(1, 1), + strides=(1, 1), + padding='same') + + self._obj_preds[k] = tf.keras.layers.Conv2D( + filters=1 * self._boxes_per_level, + kernel_size=(1, 1), + strides=(1, 1), + padding='same', + bias_initializer=tf.keras.initializers.constant(self._bias)) + + for key in self._key_list: + self._cls_head[key] = Sequential() + self._cls_head[key].add(self._stems[key]) + self._cls_head[key].add(self._cls_convs[key]) + self._cls_head[key].add(self._cls_preds[key]) + + self._obj_head[key] = Sequential() + self._obj_head[key].add(self._stems[key]) + self._obj_head[key].add(self._reg_convs[key]) + self._obj_head[key].add(self._obj_preds[key]) + + self._reg_head[key] = Sequential() + self._reg_head[key].add(self._stems[key]) + self._reg_head[key].add(self._reg_convs[key]) + self._reg_head[key].add(self._reg_preds[key]) + + def call(self, inputs, *args, **kwargs): + outputs = dict() + + for k in self._key_list: + ordered_preds = [] + cls_output = self._cls_head[k](inputs[k]) + reg_output = self._reg_head[k](inputs[k]) + obj_output = self._obj_head[k](inputs[k]) + + for b in range(self._boxes_per_level): + ordered_preds.append(reg_output[:,:,:,4 * b: 4 * (b + 1)]) + ordered_preds.append(obj_output[:,:,:,b: b + 1]) + ordered_preds.append(cls_output[:,:,:,self._classes * b: self._classes * (b + 1)]) + + output = tf.concat(ordered_preds, axis=-1) + outputs[k] = output + #Outputs are not flattened here. + return outputs + + def get_config(self): + config = dict( + min_level=self._min_level, + max_level=self._max_level, + classes=self._classes, + boxes_per_level=self._boxes_per_level, + output_extras=self._output_extras) + return config + + @classmethod + def from_config(cls, config, custom_objects=None): + return cls(**config) diff --git a/official/vision/beta/projects/yolo/modeling/heads/yolox_head_test.py b/official/vision/beta/projects/yolo/modeling/heads/yolox_head_test.py new file mode 100644 index 00000000000..6f2068e7141 --- /dev/null +++ b/official/vision/beta/projects/yolo/modeling/heads/yolox_head_test.py @@ -0,0 +1,75 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for yolo heads.""" + +import tensorflow as tf +# Import libraries +from absl.testing import parameterized + +from official.vision.beta.projects.yolo.modeling.heads import \ + yolox_head as heads + + +class YoloDecoderTest(parameterized.TestCase, tf.test.TestCase): + + def test_network_creation(self): + """Test creation of YOLO family models.""" + tf.keras.backend.set_image_data_format('channels_last') + input_shape = { + '3': [1, 52, 52, 256], + '4': [1, 26, 26, 512], + '5': [1, 13, 13, 1024] + } + classes = 100 + bps = 3 + head = heads.YoloxHead(3, 5, classes=classes, boxes_per_level=bps) + + inputs = {} + for key in input_shape: + inputs[key] = tf.ones(input_shape[key], dtype=tf.float32) + + endpoints = head(inputs) + # print(endpoints) + + for key in endpoints.keys(): + expected_input_shape = input_shape[key] + expected_input_shape[-1] = (classes + 5) * bps + self.assertAllEqual(endpoints[key].shape.as_list(), expected_input_shape) + + def test_serialize_deserialize(self): + # Create a network object that sets all of its config options. + tf.keras.backend.set_image_data_format('channels_last') + input_shape = { + '3': [1, 52, 52, 256], + '4': [1, 26, 26, 512], + '5': [1, 13, 13, 1024] + } + classes = 100 + bps = 3 + head = heads.YoloxHead(3, 5, classes=classes, boxes_per_level=bps) + + inputs = {} + for key in input_shape: + inputs[key] = tf.ones(input_shape[key], dtype=tf.float32) + + _ = head(inputs) + configs = head.get_config() + head_from_config = heads.YoloxHead.from_config(configs) + self.assertAllEqual(head.get_config(), head_from_config.get_config()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/projects/yolo/modeling/layers/detection_generator.py b/official/vision/beta/projects/yolo/modeling/layers/detection_generator.py index 68d70bdb978..ed0e26a6172 100644 --- a/official/vision/beta/projects/yolo/modeling/layers/detection_generator.py +++ b/official/vision/beta/projects/yolo/modeling/layers/detection_generator.py @@ -17,8 +17,7 @@ from official.vision.beta.modeling.layers import detection_generator from official.vision.beta.projects.yolo.losses import yolo_loss -from official.vision.beta.projects.yolo.ops import box_ops -from official.vision.beta.projects.yolo.ops import loss_utils +from official.vision.beta.projects.yolo.ops import box_ops, loss_utils @tf.keras.utils.register_keras_serializable(package='yolo') diff --git a/official/vision/beta/projects/yolo/modeling/layers/detection_generator_test.py b/official/vision/beta/projects/yolo/modeling/layers/detection_generator_test.py index ebe70060427..5d0d2b03a21 100644 --- a/official/vision/beta/projects/yolo/modeling/layers/detection_generator_test.py +++ b/official/vision/beta/projects/yolo/modeling/layers/detection_generator_test.py @@ -13,10 +13,11 @@ # limitations under the License. """Tests for yolo detection generator.""" -from absl.testing import parameterized import tensorflow as tf +from absl.testing import parameterized -from official.vision.beta.projects.yolo.modeling.layers import detection_generator as dg +from official.vision.beta.projects.yolo.modeling.layers import \ + detection_generator as dg class YoloDecoderTest(parameterized.TestCase, tf.test.TestCase): diff --git a/official/vision/beta/projects/yolo/modeling/layers/nn_blocks.py b/official/vision/beta/projects/yolo/modeling/layers/nn_blocks.py index 5fc98ea2f63..d5d0851114b 100644 --- a/official/vision/beta/projects/yolo/modeling/layers/nn_blocks.py +++ b/official/vision/beta/projects/yolo/modeling/layers/nn_blocks.py @@ -11,11 +11,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Contains common building blocks for yolo neural networks.""" from typing import Callable, List, Tuple import tensorflow as tf +import tensorflow.keras.backend as K +from tensorflow.keras import Sequential +from tensorflow.keras.layers import (Activation, BatchNormalization, Conv2D, + Layer, LeakyReLU, MaxPool2D, ReLU, + UpSampling2D, concatenate) from official.modeling import tf_utils from official.vision.beta.ops import spatial_transform_ops @@ -1725,3 +1729,28 @@ def call(self, x, training=None): x[..., 1::2, 1::2, :] ], axis=-1) + +class SiLU(Layer): + def __init__(self, *args, **kwargs): + super(SiLU, self).__init__(*args, **kwargs) + + def call(self, x, **kwargs): + return x * K.sigmoid(x) + + def get_config(self): + config = super(SiLU, self).get_config() + return config + + def compute_output_shape(self, input_shape): + return input_shape + +def get_activation(name="silu"): + if name == "silu": + module = SiLU() + elif name == "relu": + module = ReLU() + elif name == "lrelu": + module = LeakyReLU(0.1) + else: + raise AttributeError("Unsupported act type: {}".format(name)) + return module diff --git a/official/vision/beta/projects/yolo/modeling/layers/nn_blocks_test.py b/official/vision/beta/projects/yolo/modeling/layers/nn_blocks_test.py index b43beefba60..71452792eb9 100644 --- a/official/vision/beta/projects/yolo/modeling/layers/nn_blocks_test.py +++ b/official/vision/beta/projects/yolo/modeling/layers/nn_blocks_test.py @@ -12,10 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Lint as: python3 -from absl.testing import parameterized import numpy as np import tensorflow as tf +# Lint as: python3 +from absl.testing import parameterized from official.vision.beta.projects.yolo.modeling.layers import nn_blocks diff --git a/official/vision/beta/projects/yolo/modeling/yolo_model.py b/official/vision/beta/projects/yolo/modeling/yolo_model.py index 06f79750ea8..44129a2df55 100644 --- a/official/vision/beta/projects/yolo/modeling/yolo_model.py +++ b/official/vision/beta/projects/yolo/modeling/yolo_model.py @@ -15,7 +15,9 @@ """Yolo models.""" from typing import Mapping, Union + import tensorflow as tf + from official.vision.beta.projects.yolo.modeling.layers import nn_blocks diff --git a/official/vision/beta/projects/yolo/ops/__init__.py b/official/vision/beta/projects/yolo/ops/__init__.py index a25710c222e..e04127d3fc8 100644 --- a/official/vision/beta/projects/yolo/ops/__init__.py +++ b/official/vision/beta/projects/yolo/ops/__init__.py @@ -11,5 +11,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - - diff --git a/official/vision/beta/projects/yolo/ops/anchor.py b/official/vision/beta/projects/yolo/ops/anchor.py index dfe675984a7..3aa6b09c738 100644 --- a/official/vision/beta/projects/yolo/ops/anchor.py +++ b/official/vision/beta/projects/yolo/ops/anchor.py @@ -16,13 +16,10 @@ import numpy as np import tensorflow as tf -from official.vision.beta.projects.yolo.ops import box_ops -from official.vision.beta.projects.yolo.ops import loss_utils -from official.vision.beta.projects.yolo.ops import preprocessing_ops +from yolo.ops import box_ops, loss_utils, preprocessing_ops INF = 10000000 - def get_best_anchor(y_true, anchors, stride, @@ -32,12 +29,11 @@ def get_best_anchor(y_true, best_match_only=False, use_tie_breaker=True): """Get the correct anchor that is assoiciated with each box using IOU. - + Args: y_true: tf.Tensor[] for the list of bounding boxes in the yolo format. - anchors: list or tensor for the anchor boxes to be used in prediction found - via Kmeans. - stride: `int` stride for the anchors. + anchors: list or tensor for the anchor boxes to be used in prediction + found via Kmeans. width: int for the image width. height: int for the image height. iou_thresh: `float` the minimum iou threshold to use for selecting boxes for @@ -46,11 +42,11 @@ def get_best_anchor(y_true, the iou threshold, when set to True, this match will be dropped as no anchors can be linked to it. use_tie_breaker: `bool` if there is many anchors for a given box, then - attempt to use all of them, if False, only the first matching box will be - used. - Returns: - tf.Tensor: y_true with the anchor associated with each ground truth box - known + attempt to use all of them, if False, only the first matching box will + be used. + Return: + tf.Tensor: y_true with the anchor associated with each ground truth + box known """ with tf.name_scope('get_best_anchor'): width = tf.cast(width, dtype=tf.float32) @@ -61,7 +57,7 @@ def get_best_anchor(y_true, true_wh = tf.cast(y_true[..., 2:4], dtype=tf.float32) * scaler # scale down from large anchor to small anchor type - anchors = tf.cast(anchors, dtype=tf.float32) / stride + anchors = tf.cast(anchors, dtype=tf.float32)/stride k = tf.shape(anchors)[0] @@ -93,7 +89,9 @@ def get_best_anchor(y_true, iou_type=3, ) values, indexes = tf.math.top_k( - iou_raw, k=tf.cast(k, dtype=tf.int32), sorted=True) + iou_raw, + k=tf.cast(k, dtype=tf.int32), + sorted=True) ind_mask = tf.cast(values >= iou_thresh, dtype=indexes.dtype) # pad the indexs such that all values less than the thresh are -1 @@ -104,21 +102,16 @@ def get_best_anchor(y_true, elif use_tie_breaker: iou_index = tf.concat([ tf.expand_dims(indexes[..., 0], axis=-1), - ((indexes[..., 1:] + 1) * ind_mask[..., 1:]) - 1 - ], - axis=-1) + ((indexes[..., 1:] + 1) * ind_mask[..., 1:]) - 1], axis=-1) else: iou_index = tf.concat([ tf.expand_dims(indexes[..., 0], axis=-1), - tf.zeros_like(indexes[..., 1:]) - 1 - ], - axis=-1) + tf.zeros_like(indexes[..., 1:]) - 1], axis=-1) return tf.cast(iou_index, dtype=tf.float32), tf.cast(values, dtype=tf.float32) - class YoloAnchorLabeler: - """Anchor labeler for the Yolo Models.""" + """Anchor labeler for the Yolo Models""" def __init__(self, anchors=None, @@ -138,8 +131,8 @@ def __init__(self, anchor_free_level_limits: `List` the box sizes that will be allowed at each FPN level as is done in the FCOS and YOLOX paper for anchor free box assignment. - level_strides: `Dict[int]` for how much the model scales down the images - at the each level. + level_strides: `Dict[int]` for how much the model scales down the + images at the each level. center_radius: `Dict[float]` for radius around each box center to search for extra centers in each level. max_num_instances: `int` for the number of boxes to compute loss on. @@ -159,8 +152,9 @@ def __init__(self, """ self.anchors = anchors self.masks = self._get_mask() + self.use_tie_breaker = use_tie_breaker self.anchor_free_level_limits = self._get_level_limits( - anchor_free_level_limits) + anchor_free_level_limits) if darknet and self.anchor_free_level_limits is None: center_radius = None @@ -172,7 +166,7 @@ def __init__(self, self.num_instances = {key: maxim for key in self.keys} elif not darknet: self.num_instances = { - key: (6 - i) * max_num_instances for i, key in enumerate(self.keys) + key: (6 - i) * max_num_instances for i, key in enumerate(self.keys) } else: self.num_instances = {key: max_num_instances for key in self.keys} @@ -181,7 +175,6 @@ def __init__(self, self.level_strides = level_strides self.match_threshold = match_threshold self.best_matches_only = best_matches_only - self.use_tie_breaker = use_tie_breaker self.dtype = dtype def _get_mask(self): @@ -203,8 +196,20 @@ def _get_level_limits(self, level_limits): level_limits_dict = {} level_limits = [0.0] + level_limits + [np.inf] + k = 0 for i, key in enumerate(self.anchors.keys()): - level_limits_dict[key] = level_limits[i:i + 2] + level_limits_dict[key] = [] + + base = k + for j, lst in enumerate(self.anchors[key]): + # level_limits_dict[key].append(level_limits[k:k + 2]) + if self.use_tie_breaker: + base = k + level_limits_dict[key].append([level_limits[base], + level_limits[k + 1]]) + k += 1 + level_limits_dict[key] = tf.convert_to_tensor(level_limits_dict[key]) + print(level_limits_dict) else: level_limits_dict = None return level_limits_dict @@ -225,29 +230,19 @@ def _tie_breaking_search(self, anchors, mask, boxes, classes): anchor_id = tf.cast(anchor_id, boxes.dtype) return boxes, classes, anchor_id - def _get_anchor_id(self, - key, - boxes, - classes, - width, - height, - stride, + def _get_anchor_id(self, key, boxes, classes, width, height, stride, iou_index=None): - """Find the object anchor assignments in an anchor based paradigm.""" + """Find the object anchor assignments in an anchor based paradigm. """ # find the best anchor anchors = self.anchors[key] num_anchors = len(anchors) if self.best_matches_only: # get the best anchor for each box - iou_index, _ = get_best_anchor( - boxes, - anchors, - stride, - width=width, - height=height, - best_match_only=True, - iou_thresh=self.match_threshold) + iou_index, _ = get_best_anchor(boxes, anchors, stride, + width=width, height=height, + best_match_only=True, + iou_thresh=self.match_threshold) mask = range(num_anchors) else: # search is done across FPN levels, get the mask of anchor indexes @@ -255,12 +250,12 @@ def _get_anchor_id(self, mask = self.masks[key] # search for the correct box to use - (boxes, classes, - anchors) = self._tie_breaking_search(iou_index, mask, boxes, classes) + (boxes, classes, anchors) = self._tie_breaking_search(iou_index, mask, + boxes, classes) return boxes, classes, anchors, num_anchors def _get_centers(self, boxes, classes, anchors, width, height, scale_xy): - """Find the object center assignments in an anchor based paradigm.""" + """Find the object center assignments in an anchor based paradigm. """ offset = tf.cast(0.5 * (scale_xy - 1), boxes.dtype) grid_xy, _ = tf.split(boxes, 2, axis=-1) @@ -270,18 +265,20 @@ def _get_centers(self, boxes, classes, anchors, width, height, scale_xy): centers = tf.math.floor(grid_xy) if offset != 0.0: - clamp = lambda x, ma: tf.maximum( # pylint:disable=g-long-lambda + clamp = lambda x, ma: tf.maximum( tf.minimum(x, tf.cast(ma, x.dtype)), tf.zeros_like(x)) grid_xy_index = grid_xy - centers positive_shift = ((grid_xy_index < offset) & (grid_xy > 1.)) - negative_shift = ((grid_xy_index > (1 - offset)) & (grid_xy < - (wh_scale - 1.))) + negative_shift = ( + (grid_xy_index > (1 - offset)) & (grid_xy < (wh_scale - 1.))) zero, _ = tf.split(tf.ones_like(positive_shift), 2, axis=-1) - shift_mask = tf.concat([zero, positive_shift, negative_shift], axis=-1) - offset = tf.cast([[0, 0], [1, 0], [0, 1], [-1, 0], [0, -1]], - offset.dtype) * offset + shift_mask = tf.concat( + [zero, positive_shift, negative_shift], axis=-1) + offset = tf.cast([[0, 0], [1, 0], + [0, 1], [-1, 0], + [0, -1]], offset.dtype) * offset num_shifts = tf.shape(shift_mask) num_shifts = num_shifts[-1] @@ -294,22 +291,24 @@ def _get_centers(self, boxes, classes, anchors, width, height, scale_xy): shift_ind = shift_ind - (1 - shift_mask) shift_ind = tf.expand_dims(shift_ind, axis=-1) - boxes_and_centers = tf.concat([boxes, classes, anchors, shift_ind], - axis=-1) + boxes_and_centers = tf.concat( + [boxes, classes, anchors, shift_ind], axis=-1) boxes_and_centers = tf.reshape(boxes_and_centers, [-1, 7]) _, center_ids = tf.split(boxes_and_centers, [6, 1], axis=-1) + #center_ids = tf.squeeze(center_ids, axis = -1) select = tf.where(center_ids >= 0) select, _ = tf.split(select, 2, axis=-1) boxes_and_centers = tf.gather_nd(boxes_and_centers, select) + # center_ids = tf.cast(center_ids, tf.int32) center_ids = tf.gather_nd(center_ids, select) center_ids = tf.cast(center_ids, tf.int32) shifts = tf.gather_nd(offset, center_ids) - boxes, classes, anchors, _ = tf.split( - boxes_and_centers, [4, 1, 1, 1], axis=-1) + boxes, classes, anchors, _ = tf.split(boxes_and_centers, + [4, 1, 1, 1], axis=-1) grid_xy, _ = tf.split(boxes, 2, axis=-1) centers = tf.math.floor(grid_xy * wh_scale - shifts) centers = clamp(centers, wh_scale - 1) @@ -318,7 +317,13 @@ def _get_centers(self, boxes, classes, anchors, width, height, scale_xy): centers = tf.cast(tf.concat([y, x, anchors], axis=-1), tf.int32) return boxes, classes, centers - def _get_anchor_free(self, key, boxes, classes, height, width, stride, + def _get_anchor_free(self, + key, + boxes, + classes, + height, + width, + stride, center_radius): """Find the box assignements in an anchor free paradigm.""" level_limits = self.anchor_free_level_limits[key] @@ -327,6 +332,7 @@ def _get_anchor_free(self, key, boxes, classes, height, width, stride, grid_points = tf.squeeze(grid_points, axis=0) box_list = boxes class_list = classes + num_anchors = 1 grid_points = (grid_points + 0.5) * stride x_centers, y_centers = grid_points[..., 0], grid_points[..., 1] @@ -345,13 +351,18 @@ def _get_anchor_free(self, key, boxes, classes, height, width, stride, b_b = tlbr_boxes[..., 2] - y_centers b_r = tlbr_boxes[..., 3] - x_centers box_delta = tf.stack([b_t, b_l, b_b, b_r], axis=-1) + is_in_boxes = tf.reduce_min(box_delta, axis=-1) > 0.0 if level_limits is not None: max_reg_targets_per_im = tf.reduce_max(box_delta, axis=-1) - gt_min = max_reg_targets_per_im >= level_limits[0] - gt_max = max_reg_targets_per_im <= level_limits[1] - is_in_boxes = tf.logical_and(gt_min, gt_max) - else: - is_in_boxes = tf.reduce_min(box_delta, axis=-1) > 0.0 + level_limits = tf.cast(level_limits, max_reg_targets_per_im.dtype) + num_anchors = tf.shape(level_limits)[0] + + max_reg_targets_per_im = tf.expand_dims(max_reg_targets_per_im, axis=-1) + + gt_min = max_reg_targets_per_im >= level_limits[..., 0] + gt_max = max_reg_targets_per_im <= level_limits[..., 1] + is_in_level = tf.logical_and(gt_min, gt_max) + is_in_boxes_all = tf.reduce_any(is_in_boxes, axis=(0, 1), keepdims=True) # check if the center is in the receptive feild of the this fpn level @@ -368,17 +379,22 @@ def _get_anchor_free(self, key, boxes, classes, height, width, stride, is_in_boxes_and_center = tf.logical_and(is_in_boxes, is_in_centers) is_in_boxes_and_center = tf.logical_and(is_in_index, is_in_boxes_and_center) - if self.use_tie_breaker: - boxes_all = tf.cast(is_in_boxes_and_center, area.dtype) - boxes_all = ((boxes_all * area) + ((1 - boxes_all) * INF)) - boxes_min = tf.reduce_min(boxes_all, axis=-1, keepdims=True) - boxes_min = tf.where(boxes_min == INF, -1.0, boxes_min) - is_in_boxes_and_center = boxes_all == boxes_min + if level_limits is not None: + is_in_boxes_and_center = tf.expand_dims(is_in_boxes_and_center, axis=-1) + is_in_boxes_and_center = tf.logical_and(is_in_level, + is_in_boxes_and_center) + + # if self.use_tie_breaker: + # boxes_all = tf.cast(is_in_boxes_and_center, area.dtype) + # boxes_all = ((boxes_all * area) + ((1 - boxes_all) * INF)) + # boxes_min = tf.reduce_min(boxes_all, axis = -1, keepdims = True) + # boxes_min = tf.where(boxes_min == INF, -1.0, boxes_min) + # is_in_boxes_and_center = boxes_all == boxes_min # construct the index update grid - reps = tf.reduce_sum(tf.cast(is_in_boxes_and_center, tf.int16), axis=-1) + reps = tf.reduce_sum(tf.cast(is_in_boxes_and_center, tf.int16), axis=-2) indexes = tf.cast(tf.where(is_in_boxes_and_center), tf.int32) - y, x, t = tf.split(indexes, 3, axis=-1) + y, x, t, a = tf.split(indexes, 4, axis=-1) boxes = tf.gather_nd(box_list, t) classes = tf.cast(tf.gather_nd(class_list, t), boxes.dtype) @@ -389,8 +405,8 @@ def _get_anchor_free(self, key, boxes, classes, height, width, stride, # return the samples and the indexes samples = tf.concat([boxes, conf, classes], axis=-1) - indexes = tf.concat([y, x, tf.zeros_like(t)], axis=-1) - return indexes, samples + indexes = tf.concat([y, x, a], axis=-1) + return indexes, samples, num_anchors def build_label_per_path(self, key, @@ -403,20 +419,23 @@ def build_label_per_path(self, stride = self.level_strides[key] scale_xy = self.center_radius[key] if self.center_radius is not None else 1 - width = tf.cast(width // stride, boxes.dtype) - height = tf.cast(height // stride, boxes.dtype) + width = tf.cast(width//stride, boxes.dtype) + height = tf.cast(height//stride, boxes.dtype) if self.anchor_free_level_limits is None: - (boxes, classes, anchors, num_anchors) = self._get_anchor_id( - key, boxes, classes, width, height, stride, iou_index=iou_index) + (boxes, classes, + anchors, num_anchors) = self._get_anchor_id(key, boxes, classes, + width, height, stride, + iou_index=iou_index) boxes, classes, centers = self._get_centers(boxes, classes, anchors, width, height, scale_xy) ind_mask = tf.ones_like(classes) updates = tf.concat([boxes, ind_mask, classes], axis=-1) else: - num_anchors = 1 - (centers, updates) = self._get_anchor_free(key, boxes, classes, height, - width, stride, scale_xy) + (centers, updates, num_anchors) = self._get_anchor_free(key, boxes, + classes, height, + width, stride, + scale_xy) boxes, ind_mask, classes = tf.split(updates, [4, 1, 1], axis=-1) width = tf.cast(width, tf.int32) @@ -438,17 +457,17 @@ def __call__(self, boxes, classes, width, height): """Builds the labels for a single image, not functional in batch mode. Args: - boxes: `Tensor` of shape [None, 4] indicating the object locations in an - image. + boxes: `Tensor` of shape [None, 4] indicating the object locations in + an image. classes: `Tensor` of shape [None] indicating the each objects classes. width: `int` for the images width. height: `int` for the images height. - + num_instances: `int` for the maximum number of expanded boxes to allow. Returns: centers: `Tensor` of shape [None, 3] of indexes in the final grid where boxes are located. updates: `Tensor` of shape [None, 8] the value to place in the final grid. - full: `Tensor` of [width/stride, height/stride, num_anchors, 1] holding + full: `Tensor` of [width/stride, height/stride, num_anchors, 2] holding a mask of where boxes are locates for confidence losses. """ indexes = {} @@ -465,15 +484,11 @@ def __call__(self, boxes, classes, width, height): stride = tf.cast([width, height], boxes.dtype) # get the best anchor for each box - iou_index, _ = get_best_anchor( - boxes, - anchorsvec, - stride, - width=1.0, - height=1.0, - best_match_only=False, - use_tie_breaker=self.use_tie_breaker, - iou_thresh=self.match_threshold) + iou_index, _ = get_best_anchor(boxes, anchorsvec, stride, + width=1.0, height=1.0, + best_match_only=False, + use_tie_breaker=self.use_tie_breaker, + iou_thresh=self.match_threshold) for key in self.keys: indexes[key], updates[key], true_grids[key] = self.build_label_per_path( diff --git a/official/vision/beta/projects/yolo/ops/box_ops.py b/official/vision/beta/projects/yolo/ops/box_ops.py index 6d15f5d3157..968cb448522 100644 --- a/official/vision/beta/projects/yolo/ops/box_ops.py +++ b/official/vision/beta/projects/yolo/ops/box_ops.py @@ -14,7 +14,9 @@ """Yolo box ops.""" import math + import tensorflow as tf + from official.vision.beta.projects.yolo.ops import math_ops diff --git a/official/vision/beta/projects/yolo/ops/box_ops_test.py b/official/vision/beta/projects/yolo/ops/box_ops_test.py index afba1ee53c1..f0333209759 100644 --- a/official/vision/beta/projects/yolo/ops/box_ops_test.py +++ b/official/vision/beta/projects/yolo/ops/box_ops_test.py @@ -13,9 +13,9 @@ # limitations under the License. """box_ops tests.""" -from absl.testing import parameterized import numpy as np import tensorflow as tf +from absl.testing import parameterized from official.vision.beta.projects.yolo.ops import box_ops diff --git a/official/vision/beta/projects/yolo/ops/loss_utils.py b/official/vision/beta/projects/yolo/ops/loss_utils.py index 5536290199b..83946cd6a19 100755 --- a/official/vision/beta/projects/yolo/ops/loss_utils.py +++ b/official/vision/beta/projects/yolo/ops/loss_utils.py @@ -17,8 +17,7 @@ import numpy as np import tensorflow as tf -from official.vision.beta.projects.yolo.ops import box_ops -from official.vision.beta.projects.yolo.ops import math_ops +from official.vision.beta.projects.yolo.ops import box_ops, math_ops @tf.custom_gradient diff --git a/official/vision/beta/projects/yolo/ops/mosaic.py b/official/vision/beta/projects/yolo/ops/mosaic.py index 0ab68c2a6c9..c5be1d2ac45 100755 --- a/official/vision/beta/projects/yolo/ops/mosaic.py +++ b/official/vision/beta/projects/yolo/ops/mosaic.py @@ -14,11 +14,11 @@ """Mosaic op.""" import random + import tensorflow as tf import tensorflow_addons as tfa -from official.vision.beta.ops import box_ops -from official.vision.beta.ops import preprocess_ops +from official.vision.beta.ops import box_ops, preprocess_ops from official.vision.beta.projects.yolo.ops import preprocessing_ops @@ -179,7 +179,7 @@ def _augment_image(self, infos, area_thresh=self._area_thresh, shuffle_boxes=False, - augment=True, + filter_and_clip_boxes=True, seed=self._seed) classes, is_crowd, area = self._select_ind(inds, classes, is_crowd, area) # pylint:disable=unbalanced-tuple-unpacking return image, boxes, classes, is_crowd, area, crop_points diff --git a/official/vision/beta/projects/yolo/ops/preprocessing_ops.py b/official/vision/beta/projects/yolo/ops/preprocessing_ops.py index fc642d20c2b..2d870dea1d8 100755 --- a/official/vision/beta/projects/yolo/ops/preprocessing_ops.py +++ b/official/vision/beta/projects/yolo/ops/preprocessing_ops.py @@ -482,11 +482,15 @@ def cast(values, dtype): image_ = tf.pad( cropped_image, [[pad[0], pad[2]], [pad[1], pad[3]], [0, 0]], constant_values=PAD_VALUE) + + # Pad and scale info + isize = tf.cast(tf.shape(image_)[:2], dtype=tf.float32) + osize = tf.cast((desired_size[0], desired_size[1]), dtype=tf.float32) pad_info = tf.stack([ tf.cast(tf.shape(cropped_image)[:2], tf.float32), - tf.cast(tf.shape(image_)[:2], dtype=tf.float32), - tf.ones_like(original_dims, dtype=tf.float32), - (-tf.cast(pad[:2], tf.float32)) + osize, + osize/isize, + (-tf.cast(pad[:2], tf.float32)*osize/isize) ]) infos.append(pad_info) @@ -761,7 +765,9 @@ def boxes_candidates(clipped_boxes, Returns: indices[:, 0]: A `Tensor` representing valid boxes after filtering. """ - + if area_thr == 0.0: + wh_thr = 0 + ar_thr = np.inf area_thr = tf.math.abs(area_thr) # Get the scaled and shifted heights of the original @@ -778,8 +784,8 @@ def boxes_candidates(clipped_boxes, clipped_height / (clipped_width + 1e-16)) # Ensure the clipped width adn height are larger than a preset threshold. - conda = clipped_width > wh_thr - condb = clipped_height > wh_thr + conda = clipped_width >= wh_thr + condb = clipped_height >= wh_thr # Ensure the area of the clipped box is larger than the area threshold. area = (clipped_height * clipped_width) / (og_width * og_height + 1e-16) @@ -837,7 +843,7 @@ def transform_and_clip_boxes(boxes, shuffle_boxes=False, area_thresh=0.1, seed=None, - augment=True): + filter_and_clip_boxes=True): """Clips and cleans the boxes. Args: @@ -868,8 +874,8 @@ def get_valid_boxes(boxes): # Make sure all boxes are valid to start, clip to [0, 1] and get only the # valid boxes. - output_size = tf.cast([640, 640], tf.float32) - if augment: + output_size = None + if filter_and_clip_boxes: boxes = tf.math.maximum(tf.math.minimum(boxes, 1.0), 0.0) cond = get_valid_boxes(boxes) @@ -918,16 +924,17 @@ def get_valid_boxes(boxes): boxes *= tf.cast(tf.expand_dims(cond, axis=-1), boxes.dtype) # Threshold the existing boxes. - if augment: - boxes_ = bbox_ops.denormalize_boxes(boxes, output_size) - box_history_ = bbox_ops.denormalize_boxes(box_history, output_size) - inds = boxes_candidates(boxes_, box_history_, area_thr=area_thresh) + if filter_and_clip_boxes: + if output_size is not None: + boxes_ = bbox_ops.denormalize_boxes(boxes, output_size) + box_history_ = bbox_ops.denormalize_boxes(box_history, output_size) + inds = boxes_candidates(boxes_, box_history_, area_thr=area_thresh) + else: + inds = boxes_candidates(boxes, box_history, wh_thr = 0.0, area_thr=area_thresh) # Select and gather the good boxes. if shuffle_boxes: inds = tf.random.shuffle(inds, seed=seed) else: - boxes = box_history - boxes_ = bbox_ops.denormalize_boxes(boxes, output_size) - inds = bbox_ops.get_non_empty_box_indices(boxes_) + inds = bbox_ops.get_non_empty_box_indices(boxes) boxes = tf.gather(boxes, inds) return boxes, inds diff --git a/official/vision/beta/projects/yolo/ops/preprocessing_ops_test.py b/official/vision/beta/projects/yolo/ops/preprocessing_ops_test.py index 43cca574b7f..a5dba12fc4e 100755 --- a/official/vision/beta/projects/yolo/ops/preprocessing_ops_test.py +++ b/official/vision/beta/projects/yolo/ops/preprocessing_ops_test.py @@ -13,9 +13,9 @@ # limitations under the License. """Tests for preprocessing_ops.py.""" -from absl.testing import parameterized import numpy as np import tensorflow as tf +from absl.testing import parameterized from official.vision.beta.ops import box_ops as bbox_ops from official.vision.beta.projects.yolo.ops import preprocessing_ops diff --git a/official/vision/beta/projects/yolo/optimization/__init__.py b/official/vision/beta/projects/yolo/optimization/__init__.py index 6ff51c80648..46d5d5003b9 100755 --- a/official/vision/beta/projects/yolo/optimization/__init__.py +++ b/official/vision/beta/projects/yolo/optimization/__init__.py @@ -14,9 +14,10 @@ """Optimization package definition.""" -# pylint: disable=wildcard-import from official.modeling.optimization.configs.learning_rate_config import * -from official.modeling.optimization.ema_optimizer import ExponentialMovingAverage +from official.modeling.optimization.ema_optimizer import \ + ExponentialMovingAverage from official.vision.beta.projects.yolo.optimization.configs.optimization_config import * from official.vision.beta.projects.yolo.optimization.configs.optimizer_config import * -from official.vision.beta.projects.yolo.optimization.optimizer_factory import OptimizerFactory as YoloOptimizerFactory +from official.vision.beta.projects.yolo.optimization.optimizer_factory import \ + OptimizerFactory as YoloOptimizerFactory diff --git a/official/vision/beta/projects/yolo/optimization/configs/__init__.py b/official/vision/beta/projects/yolo/optimization/configs/__init__.py index e419af524b5..e04127d3fc8 100755 --- a/official/vision/beta/projects/yolo/optimization/configs/__init__.py +++ b/official/vision/beta/projects/yolo/optimization/configs/__init__.py @@ -11,4 +11,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - diff --git a/official/vision/beta/projects/yolo/optimization/configs/optimization_config.py b/official/vision/beta/projects/yolo/optimization/configs/optimization_config.py index 92b8d1a79b1..64ba9e985c6 100755 --- a/official/vision/beta/projects/yolo/optimization/configs/optimization_config.py +++ b/official/vision/beta/projects/yolo/optimization/configs/optimization_config.py @@ -21,8 +21,10 @@ import dataclasses from typing import Optional -from official.modeling.optimization.configs import optimization_config as optimization_cfg -from official.vision.beta.projects.yolo.optimization.configs import optimizer_config as opt_cfg +from official.modeling.optimization.configs import \ + optimization_config as optimization_cfg +from official.vision.beta.projects.yolo.optimization.configs import \ + optimizer_config as opt_cfg @dataclasses.dataclass diff --git a/official/vision/beta/projects/yolo/optimization/optimizer_factory.py b/official/vision/beta/projects/yolo/optimization/optimizer_factory.py index b2126d16bc2..bd3414215bb 100755 --- a/official/vision/beta/projects/yolo/optimization/optimizer_factory.py +++ b/official/vision/beta/projects/yolo/optimization/optimizer_factory.py @@ -16,8 +16,7 @@ import gin -from official.modeling.optimization import ema_optimizer -from official.modeling.optimization import optimizer_factory +from official.modeling.optimization import ema_optimizer, optimizer_factory from official.vision.beta.projects.yolo.optimization import sgd_torch optimizer_factory.OPTIMIZERS_CLS.update({ diff --git a/official/vision/beta/projects/yolo/optimization/sgd_torch.py b/official/vision/beta/projects/yolo/optimization/sgd_torch.py index a79e5671aef..b5ea7e471fc 100644 --- a/official/vision/beta/projects/yolo/optimization/sgd_torch.py +++ b/official/vision/beta/projects/yolo/optimization/sgd_torch.py @@ -15,8 +15,8 @@ """SGD PyTorch optimizer.""" import re -from absl import logging import tensorflow as tf +from absl import logging LearningRateSchedule = tf.keras.optimizers.schedules.LearningRateSchedule @@ -302,6 +302,7 @@ def get_config(self): "decay": self._initial_decay, "momentum": self._serialize_hyperparameter("momentum"), "momentum_start": self._serialize_hyperparameter("momentum_start"), + "weight_decay": self._serialize_hyperparameter("weight_decay"), "warmup_steps": self._serialize_hyperparameter("warmup_steps"), "nesterov": self.nesterov, }) diff --git a/official/vision/beta/projects/yolo/tasks/image_classification.py b/official/vision/beta/projects/yolo/tasks/image_classification.py index 4edef631fce..1094afb6586 100644 --- a/official/vision/beta/projects/yolo/tasks/image_classification.py +++ b/official/vision/beta/projects/yolo/tasks/image_classification.py @@ -15,10 +15,11 @@ """Image classification task definition.""" from official.common import dataset_fn from official.core import task_factory -from official.vision.beta.dataloaders import classification_input as classification_input_base -from official.vision.beta.dataloaders import input_reader_factory -from official.vision.beta.dataloaders import tfds_factory -from official.vision.beta.projects.yolo.configs import darknet_classification as exp_cfg +from official.vision.beta.dataloaders import \ + classification_input as classification_input_base +from official.vision.beta.dataloaders import input_reader_factory, tfds_factory +from official.vision.beta.projects.yolo.configs import \ + darknet_classification as exp_cfg from official.vision.beta.projects.yolo.dataloaders import classification_input from official.vision.beta.tasks import image_classification diff --git a/official/vision/beta/projects/yolo/tasks/yolo.py b/official/vision/beta/projects/yolo/tasks/yolo.py index 3683952a304..cd8641cc83f 100755 --- a/official/vision/beta/projects/yolo/tasks/yolo.py +++ b/official/vision/beta/projects/yolo/tasks/yolo.py @@ -17,25 +17,22 @@ import collections from typing import Optional -from absl import logging import tensorflow as tf +from absl import logging -from official.core import base_task -from official.core import config_definitions -from official.core import input_reader -from official.core import task_factory +from official.core import (base_task, config_definitions, input_reader, + task_factory) from official.modeling import performance -from official.vision.beta.dataloaders import tfds_factory -from official.vision.beta.dataloaders import tf_example_label_map_decoder +from official.vision.beta.dataloaders import (tf_example_label_map_decoder, + tfds_factory) from official.vision.beta.evaluation import coco_evaluator from official.vision.beta.ops import box_ops from official.vision.beta.projects.yolo import optimization from official.vision.beta.projects.yolo.configs import yolo as exp_cfg -from official.vision.beta.projects.yolo.dataloaders import tf_example_decoder -from official.vision.beta.projects.yolo.dataloaders import yolo_input +from official.vision.beta.projects.yolo.dataloaders import (tf_example_decoder, + yolo_input) from official.vision.beta.projects.yolo.modeling import factory -from official.vision.beta.projects.yolo.ops import mosaic -from official.vision.beta.projects.yolo.ops import preprocessing_ops +from official.vision.beta.projects.yolo.ops import mosaic, preprocessing_ops from official.vision.beta.projects.yolo.tasks import task_utils OptimizationConfig = optimization.OptimizationConfig @@ -255,16 +252,22 @@ def train_step(self, inputs, model, optimizer, metrics=None): logs.update({m.name: m.result()}) return logs - def _reorg_boxes(self, boxes, num_detections, image): + def _reorg_boxes(self, boxes, info, num_detections): """Scale and Clean boxes prior to Evaluation.""" - - # Build a prediciton mask to take only the number of detections mask = tf.sequence_mask(num_detections, maxlen=tf.shape(boxes)[1]) - mask = tf.cast(tf.expand_dims(mask, axis=-1), boxes.dtype) + mask = tf.cast(tf.expand_dims(mask, axis = -1), boxes.dtype) # Denormalize the boxes by the shape of the image - inshape = tf.cast(preprocessing_ops.get_image_shape(image), boxes.dtype) + inshape = tf.expand_dims(info[:, 1, :], axis = 1) + ogshape = tf.expand_dims(info[:, 0, :], axis = 1) + scale = tf.expand_dims(info[:, 2, :], axis = 1) + offset = tf.expand_dims(info[:, 3, :], axis = 1) + boxes = box_ops.denormalize_boxes(boxes, inshape) + boxes = box_ops.clip_boxes(boxes, inshape) + boxes += tf.tile(offset, [1, 1, 2]) + boxes /= tf.tile(scale, [1, 1, 2]) + boxes = box_ops.clip_boxes(boxes, ogshape) # Mask the boxes for usage boxes *= mask @@ -292,10 +295,8 @@ def validation_step(self, inputs, model, metrics=None): logs = {self.loss: metric_loss} # Reorganize and rescale the boxes - boxes = self._reorg_boxes(y_pred['bbox'], y_pred['num_detections'], image) - label['groundtruths']['boxes'] = self._reorg_boxes( - label['groundtruths']['boxes'], label['groundtruths']['num_detections'], - image) + info = label['groundtruths']['image_info'] + boxes = self._reorg_boxes(y_pred['bbox'], info, y_pred["num_detections"]) # Build the input for the coc evaluation metric coco_model_outputs = { diff --git a/official/vision/beta/projects/yolo/train.py b/official/vision/beta/projects/yolo/train.py index 78ee1ac32ae..09e97ab74ba 100644 --- a/official/vision/beta/projects/yolo/train.py +++ b/official/vision/beta/projects/yolo/train.py @@ -14,12 +14,12 @@ """TensorFlow Model Garden Vision training driver.""" -from absl import app -from absl import flags +from absl import app, flags from official.common import flags as tfm_flags from official.vision.beta import train -from official.vision.beta.projects.yolo.common import registry_imports # pylint: disable=unused-import +from official.vision.beta.projects.yolo.common import \ + registry_imports # pylint: disable=unused-import FLAGS = flags.FLAGS