diff --git a/projects/maskdino/configs/data/cityscapes_semantic_seg.py b/projects/maskdino/configs/data/cityscapes_semantic_seg.py new file mode 100644 index 00000000..e12e5ce6 --- /dev/null +++ b/projects/maskdino/configs/data/cityscapes_semantic_seg.py @@ -0,0 +1,58 @@ +from omegaconf import OmegaConf + +import detectron2.data.transforms as T +from detectron2.config import LazyCall as L +from detectron2.data import ( + build_detection_test_loader, + build_detection_train_loader, + get_detection_dataset_dicts, +) +from detectron2.data.dataset_mapper import DatasetMapper +from detectron2.evaluation import CityscapesSemSegEvaluator +from detectron2.data import MetadataCatalog + +# from detrex.data import DetrDatasetMapper +# from projects.maskDINO.data.dataset_mappers.coco_instance_lsj_aug_dataset_mapper import COCOInstanceLSJDatasetMapper, build_transform_gen +from detrex.data.dataset_mappers.mask_former_semantic_dataset_mapper import build_transform_gen, MaskFormerSemanticDatasetMapper + +dataloader = OmegaConf.create() + +dataloader.train = L(build_detection_train_loader)( + dataset=L(get_detection_dataset_dicts)(names="cityscapes_fine_sem_seg_train"), + mapper=L(MaskFormerSemanticDatasetMapper)( + augmentation=L(build_transform_gen)( + min_size_train=[int(x * 0.1 * 1024) for x in range(5, 21)], + max_size_train=4096, + min_size_train_sampling='choice', + enabled_crop=True, + crop_params=dict(crop_type='absolute', crop_size=(512, 1024), single_category_max_area=1.0), + color_aug_ssd=True, + img_format='RGB', + ), + meta=MetadataCatalog.get("cityscapes_fine_sem_seg_train"), + size_divisibility=-1, + is_train=True, + image_format="RGB", + ), + total_batch_size=16, + num_workers=4, +) + +dataloader.test = L(build_detection_test_loader)( + dataset=L(get_detection_dataset_dicts)(names="cityscapes_fine_sem_seg_val", filter_empty=False), + mapper=L(DatasetMapper)( + augmentation=[ + L(T.ResizeShortestEdge)( + short_edge_length=1024, + max_size=4096, + ), + ], + is_train=False, + image_format="RGB", + ), + num_workers=4, +) + +dataloader.evaluator = L(CityscapesSemSegEvaluator)( + dataset_name="${..test.dataset.names}", +) diff --git a/projects/maskdino/configs/maskdino_r50_cityscapes_semantic_seg_50ep.py b/projects/maskdino/configs/maskdino_r50_cityscapes_semantic_seg_50ep.py new file mode 100644 index 00000000..780e11c7 --- /dev/null +++ b/projects/maskdino/configs/maskdino_r50_cityscapes_semantic_seg_50ep.py @@ -0,0 +1,67 @@ +from detrex.config import get_config +from .models.maskdino_r50 import model +from .data.cityscapes_semantic_seg import dataloader + +from fvcore.common.param_scheduler import MultiStepParamScheduler +from detectron2.config import LazyCall as L +from detectron2.solver import WarmupParamScheduler + +model.semantic_on=True +model.instance_on=False +model.panoptic_on=False + +train = get_config("common/train.py").train +# max training iterations +train.max_iter = 368750 +# warmup lr scheduler +lr_multiplier = L(WarmupParamScheduler)( + scheduler=L(MultiStepParamScheduler)( + values=[1.0, 0.1], + milestones=[327778, 355092], + ), + warmup_length=10 / train.max_iter, + warmup_factor=1.0, +) + +optimizer = get_config("common/optim.py").AdamW +# lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_50ep + +# initialize checkpoint to be loaded +train.init_checkpoint = "detectron2://ImageNetPretrained/torchvision/R-50.pkl" +train.output_dir = "./output/dab_detr_r50_50ep" + + +# run evaluation every 5000 iters +train.eval_period = 5000 + +# log training infomation every 20 iters +train.log_period = 20 + +# save checkpoint every 5000 iters +train.checkpointer.period = 5000 + +# gradient clipping for training +train.clip_grad.enabled = True +train.clip_grad.params.max_norm = 0.01 +train.clip_grad.params.norm_type = 2 + +# set training devices +train.device = "cuda" + + +# modify optimizer config +optimizer.lr = 1e-4 +optimizer.betas = (0.9, 0.999) +optimizer.weight_decay = 1e-4 +optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1 + +# # modify dataloader config +dataloader.train.num_workers = 16 +# +# # please notice that this is total batch size. +# # surpose you're using 4 gpus for training and the batch size for +# # each gpu is 16/4 = 4 +dataloader.train.total_batch_size = 16 + +# dump the testing results into output_dir for visualization +dataloader.evaluator.output_dir = train.output_dir