PurdueDualityLab · patel996 · Oct 29, 2021 · Oct 29, 2021 · Nov 1, 2021 · Nov 4, 2021
diff --git a/official/vision/beta/projects/yolo/__init__.py b/official/vision/beta/projects/yolo/__init__.py
diff --git a/official/vision/beta/projects/yolo/common/registry_imports.py b/official/vision/beta/projects/yolo/common/registry_imports.py
@@ -17,20 +17,16 @@
 # pylint: disable=unused-import
 # pylint: disable=g-bad-import-order
 from official.common import registry_imports
-
 # import configs
 from official.vision.beta.projects.yolo.configs import darknet_classification
 from official.vision.beta.projects.yolo.configs import yolo as yolo_config
-
 # import modeling components
 from official.vision.beta.projects.yolo.modeling.backbones import darknet
 from official.vision.beta.projects.yolo.modeling.decoders import yolo_decoder
-
+# import optimization packages
+from official.vision.beta.projects.yolo.optimization import optimizer_factory
+from official.vision.beta.projects.yolo.optimization.configs import (
+    optimization_config, optimizer_config)
 # import tasks
 from official.vision.beta.projects.yolo.tasks import image_classification
 from official.vision.beta.projects.yolo.tasks import yolo as yolo_task
-
-# import optimization packages
-from official.vision.beta.projects.yolo.optimization import optimizer_factory
-from official.vision.beta.projects.yolo.optimization.configs import optimizer_config
-from official.vision.beta.projects.yolo.optimization.configs import optimization_config
diff --git a/official/vision/beta/projects/yolo/configs/backbones.py b/official/vision/beta/projects/yolo/configs/backbones.py
@@ -14,6 +14,7 @@
 
 """Backbones configurations."""
 import dataclasses
+
 from official.modeling import hyperparams
 from official.vision.beta.configs import backbones
 

diff --git a/official/vision/beta/projects/yolo/configs/decoders.py b/official/vision/beta/projects/yolo/configs/decoders.py
@@ -15,6 +15,7 @@
 """Decoders configurations."""
 import dataclasses
 from typing import Optional
+
 from official.modeling import hyperparams
 from official.vision.beta.configs import decoders
 
@@ -33,6 +34,8 @@ class YoloDecoder(hyperparams.Config):
   use_separable_conv: bool = False
   csp_stack: Optional[bool] = None
   fpn_depth: Optional[int] = None
+  max_fpn_depth: Optional[int] = None
+  max_csp_stack: Optional[int] = None
   fpn_filter_scale: Optional[int] = None
   path_process_len: Optional[int] = None
   max_level_process_len: Optional[int] = None

diff --git a/...ta/projects/yolo/configs/experiments/scaled-yolo/detection-finetune/yolo_csp_640_tpu.yaml b/...ta/projects/yolo/configs/experiments/scaled-yolo/detection-finetune/yolo_csp_640_tpu.yaml
@@ -0,0 +1,80 @@
+# --experiment_type=scaled_yolo
+# mAP 47.6
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'float32'
+  tpu_enable_xla_dynamic_padder: false
+task:
+  model:
+    input_size: [640, 640, 3]
+    backbone:
+      type: 'darknet'
+      darknet:
+        model_id: 'altered_cspdarknet53'
+        max_level: 5
+        min_level: 3
+    decoder:
+      type: yolo_decoder
+      yolo_decoder:
+        version: v4
+        type: csp
+    head:
+      smart_bias: true
+    detection_generator:
+      box_type:
+        'all': scaled
+      scale_xy:
+        'all': 2.0
+      max_boxes: 300
+      nms_type: iou
+      iou_thresh: 0.001
+      nms_thresh: 0.65
+    loss:
+      use_scaled_loss: true
+      update_on_repeat: true
+      box_loss_type:
+        'all': ciou
+      ignore_thresh:
+        'all': 0.0
+      iou_normalizer:
+        'all': 0.05
+      cls_normalizer:
+        'all': 0.3
+      object_normalizer:
+        '5': 0.28
+        '4': 0.70
+        '3': 2.80
+      objectness_smooth:
+        'all': 1.0
+    norm_activation:
+      use_sync_bn: true
+    num_classes: 80
+    anchor_boxes:
+      anchors_per_scale: 3
+      boxes: [box: [12, 16], box: [19, 36], box: [40, 28],
+              box: [36, 75], box: [76, 55], box: [72, 146],
+              box: [142, 110], box: [192, 243], box: [459, 401]]
+  train_data:
+    input_path: 'gs://cam2-datasets/coco/train*'
+    shuffle_buffer_size: 10000
+    parser:
+      mosaic:
+        mosaic_frequency: 1.0
+        mixup_frequency: 0.2
+        mosaic_crop_mode: 'scale'
+        mosaic_center: 0.25
+        aug_scale_min: 0.1
+        aug_scale_max: 1.9
+      max_num_instances: 300
+      letter_box: true
+      random_flip: true
+      aug_rand_translate: 0.1
+      area_thresh: 0.1
+  validation_data:
+    input_path: 'gs://cam2-datasets/coco/val*'
+trainer:
+  train_steps: 831600 # epoch 300 to 450 
+  optimizer_config:
+    learning_rate:
+      cosine:
+        decay_steps: 831600 # epoch 300 to 450 
diff --git a/...a/projects/yolo/configs/experiments/scaled-yolo/detection-finetune/yolo_l_p5_896_tpu.yaml b/...a/projects/yolo/configs/experiments/scaled-yolo/detection-finetune/yolo_l_p5_896_tpu.yaml
@@ -0,0 +1,82 @@
+# --experiment_type=large_yolo_finetune
+# mAP 51.1%
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'float32'
+  tpu_enable_xla_dynamic_padder: false
+task:
+  model:
+    input_size: [896, 896, 3]
+    backbone:
+      type: 'darknet'
+      darknet:
+        model_id: 'csp-large'
+        max_level: 5
+        min_level: 3
+        width_scale: 1.00
+        depth_scale: 1.00
+    decoder:
+      type: yolo_decoder
+      yolo_decoder:
+        version: v4
+        type: csp_large
+    head:
+      smart_bias: true
+    detection_generator:
+      box_type:
+        'all': scaled
+      scale_xy:
+        'all': 2.0
+      max_boxes: 300
+      nms_type: iou
+      iou_thresh: 0.001
+      nms_thresh: 0.65
+    loss:
+      use_scaled_loss: true
+      update_on_repeat: true
+      box_loss_type:  
+        'all': ciou
+      ignore_thresh:
+        'all': 0.0
+      iou_normalizer: 
+        'all': 0.05
+      cls_normalizer: 
+        'all': 0.5
+      object_normalizer: 
+        '5': 0.4
+        '4': 1.0
+        '3': 4.0
+      objectness_smooth: 
+        'all': 1.0
+    norm_activation:
+      use_sync_bn: true
+    num_classes: 80
+    anchor_boxes:
+      anchors_per_scale: 4
+      boxes: [box: [13,17], box: [31,25], box: [24,51], box: [61,45], 
+              box: [48,102], box: [119,96], box: [97,189], box: [217,184], 
+              box: [171,384], box: [324,451],  box: [616,618], box: [800,800]]
+  train_data:
+    input_path: 'gs://cam2-datasets/coco/train*'
+    shuffle_buffer_size: 10000
+    parser:
+      mosaic:
+        mosaic_frequency: 1.0
+        mixup_frequency: 0.2
+        mosaic_crop_mode: 'scale'
+        mosaic_center: 0.0
+        aug_scale_min: 0.2
+        aug_scale_max: 1.8
+      max_num_instances: 300
+      letter_box: true
+      random_flip: true
+      aug_rand_translate: 0.5
+      area_thresh: 0.1
+  validation_data:
+    input_path: 'gs://cam2-datasets/coco/val*'
+trainer:
+  train_steps: 831600 # epoch 300 to 450 
+  optimizer_config:
+    learning_rate:
+      cosine:
+        decay_steps: 831600 # epoch 300 to 450 
diff --git a/.../projects/yolo/configs/experiments/scaled-yolo/detection-finetune/yolo_l_p6_1280_tpu.yaml b/.../projects/yolo/configs/experiments/scaled-yolo/detection-finetune/yolo_l_p6_1280_tpu.yaml
@@ -0,0 +1,83 @@
+# --experiment_type=large_yolo_finetune
+# mAP 54.4%
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'float32'
+  tpu_enable_xla_dynamic_padder: false
+task:
+  model:
+    input_size: [1280, 1280, 3]
+    backbone:
+      type: 'darknet'
+      darknet:
+        model_id: 'csp-large'
+        max_level: 6
+        min_level: 3
+        width_scale: 1.00
+        depth_scale: 1.00
+    decoder:
+      type: yolo_decoder
+      yolo_decoder:
+        version: v4
+        type: csp_large
+    head:
+      smart_bias: true
+    detection_generator:
+      box_type:
+        'all': scaled
+      scale_xy:
+        'all': 2.0
+      max_boxes: 300
+      nms_type: iou
+      iou_thresh: 0.001
+      nms_thresh: 0.65
+    loss:
+      use_scaled_loss: true
+      update_on_repeat: true
+      box_loss_type:  
+        'all': ciou
+      ignore_thresh:
+        'all': 0.0
+      iou_normalizer: 
+        'all': 0.05
+      cls_normalizer: 
+        'all': 0.5
+      object_normalizer: 
+        '6': 0.1
+        '5': 0.4
+        '4': 1.0
+        '3': 4.0
+      objectness_smooth: 
+        'all': 1.0
+    norm_activation:
+      use_sync_bn: true
+    num_classes: 80
+    anchor_boxes:
+      anchors_per_scale: 4
+      boxes:  [box: [13,17], box: [31,25], box: [24,51], box: [61,45], 
+              box: [61,45], box: [48,102], box: [119,96], box: [97,189], 
+              box: [97,189], box: [217,184], box: [171,384], box: [324,451], 
+              box: [324,451], box: [545,357], box: [616,618], box: [1024,1024]]
+  train_data:
+    input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/train*'
+    shuffle_buffer_size: 10000
+    parser:
+      mosaic:
+        mosaic_frequency: 1.0
+        mixup_frequency: 0.2
+        mosaic_crop_mode: 'scale'
+        mosaic_center: 0.0
+        aug_scale_min: 0.2
+        aug_scale_max: 1.8
+      max_num_instances: 300
+      letter_box: true
+      random_flip: true
+      aug_rand_translate: 0.5
+      area_thresh: 0.1
+  validation_data:
+    input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/val*'
+  trainer:
+    train_steps: 831600 # epoch 300 to 450 
+    learning_rate:
+      cosine:
+        decay_steps: 831600 # epoch 300 to 450 
diff --git a/.../projects/yolo/configs/experiments/scaled-yolo/detection-finetune/yolo_l_p7_1536_tpu.yaml b/.../projects/yolo/configs/experiments/scaled-yolo/detection-finetune/yolo_l_p7_1536_tpu.yaml
@@ -0,0 +1,85 @@
+# --experiment_type=large_yolo
+# mAP 55.3%
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'float32'
+  tpu_enable_xla_dynamic_padder: false
+task:
+  model:
+    input_size: [1536, 1536, 3]
+    backbone:
+      type: 'darknet'
+      darknet:
+        model_id: 'csp-large'
+        max_level: 7
+        min_level: 3
+        width_scale: 1.25
+        depth_scale: 1.00
+    decoder:
+      type: yolo_decoder
+      yolo_decoder:
+        version: v4
+        type: csp_large
+    head:
+      smart_bias: true
+    detection_generator:
+      box_type:
+        'all': scaled
+      scale_xy:
+        'all': 2.0
+      max_boxes: 300
+      nms_type: iou
+      iou_thresh: 0.001
+      nms_thresh: 0.65
+    loss:
+      use_scaled_loss: true
+      update_on_repeat: true
+      box_loss_type:  
+        'all': ciou
+      ignore_thresh:
+        'all': 0.0
+      iou_normalizer: 
+        'all': 0.05
+      cls_normalizer: 
+        'all': 0.5
+      object_normalizer: 
+        '7': 0.1
+        '6': 0.4
+        '5': 0.5
+        '4': 1.0
+        '3': 4.0
+      objectness_smooth: 
+        'all': 1.0
+    norm_activation:
+      use_sync_bn: true
+    num_classes: 80
+    anchor_boxes:
+      anchors_per_scale: 4
+      boxes:  [box: [13,17], box: [22,25], box: [55,41], box: [27,66],
+              box: [57,88], box: [112,69], box: [69,177], box: [136,138],  
+              box: [136,138], box: [287,114], box: [134,275], box: [268,248],
+              box: [268,248], box: [232,504], box: [445,416], box: [640,640], 
+              box: [812,393], box: [477,808], box: [1070,908], box: [1408,1408]]
+  train_data:
+    input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/train*'
+    shuffle_buffer_size: 10000
+    parser:
+      mosaic:
+        mosaic_frequency: 1.0
+        mixup_frequency: 0.2
+        mosaic_crop_mode: 'scale'
+        mosaic_center: 0.0
+        aug_scale_min: 0.2
+        aug_scale_max: 1.8
+      max_num_instances: 300
+      letter_box: true
+      random_flip: true
+      aug_rand_translate: 0.5
+      area_thresh: 0.1
+  validation_data:
+    input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/val*'
+  trainer:
+    train_steps: 831600 # epoch 300 to 450 
+    learning_rate:
+      cosine:
+        decay_steps: 831600 # epoch 300 to 450