diff --git a/configs/qdtrack-hdf5.py b/configs/qdtrack-hdf5.py new file mode 100644 index 0000000..2e12911 --- /dev/null +++ b/configs/qdtrack-hdf5.py @@ -0,0 +1,260 @@ +# model settings +model = dict( + type='QuasiDenseFasterRCNN', + pretrained='torchvision://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + scales=[8], + ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), + roi_head=dict( + type='QuasiDenseRoIHead', + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=8, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=False, + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), + track_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + track_head=dict( + type='QuasiDenseEmbedHead', + num_convs=4, + num_fcs=1, + embed_channels=256, + norm_cfg=dict(type='GN', num_groups=32), + loss_track=dict(type='MultiPosCrossEntropyLoss', loss_weight=0.25), + loss_track_aux=dict( + type='L2Loss', + neg_pos_ub=3, + pos_margin=0, + neg_margin=0.3, + hard_mining=True, + loss_weight=1.0))), + tracker=dict( + type='QuasiDenseEmbedTracker', + init_score_thr=0.7, + obj_score_thr=0.3, + match_score_thr=0.5, + memo_tracklet_frames=10, + memo_backdrop_frames=1, + memo_momentum=0.8, + nms_conf_thr=0.5, + nms_backdrop_iou_thr=0.3, + nms_class_iou_thr=0.7, + with_cats=True, + match_metric='bisoftmax'), + + # model training and testing settings + train_cfg = dict( + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=-1, + pos_weight=-1, + debug=False), + rpn_proposal=dict( + nms_across_levels=False, + nms_pre=2000, + nms_post=1000, + max_num=1000, + nms_thr=0.7, + min_bbox_size=0), + rcnn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + match_low_quality=False, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + pos_weight=-1, + debug=False), + embed=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.5, + match_low_quality=False, + ignore_iof_thr=-1), + sampler=dict( + type='CombinedSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=3, + add_gt_as_proposals=True, + pos_sampler=dict(type='InstanceBalancedPosSampler'), + neg_sampler=dict( + type='IoUBalancedNegSampler', + floor_thr=-1, + floor_fraction=0, + num_bins=3)))), + + test_cfg = dict( + rpn=dict( + nms_across_levels=False, + nms_pre=1000, + nms_post=1000, + max_num=1000, + nms_thr=0.7, + min_bbox_size=0), + rcnn=dict( + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100)), +) + +# dataset settings +dataset_type = 'BDDVideoDataset' +data_root = '/cluster/work/cvl/xiali/bdd100k/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict( + type='LoadMultiImagesFromFile', + to_float32=True, + file_client_args=dict( + vid_db_path=data_root + 'hdf5s/train.hdf5', + img_db_path=data_root + 'hdf5s/train_det.hdf5', + backend='hdf5')), + dict(type='SeqLoadAnnotations', with_bbox=True, with_ins_id=True), + dict(type='SeqResize', img_scale=(1296, 720), keep_ratio=True), + dict(type='SeqRandomFlip', share_params=True, flip_ratio=0.5), + dict(type='SeqNormalize', **img_norm_cfg), + dict(type='SeqPad', size_divisor=32), + dict(type='SeqDefaultFormatBundle'), + dict( + type='SeqCollect', + keys=['img', 'gt_bboxes', 'gt_labels', 'gt_match_indices'], + ref_prefix='ref'), +] +test_pipeline = [ + dict( + type='LoadImageFromFile', + to_float32=True, + file_client_args=dict( + vid_db_path=data_root + 'hdf5s/val.hdf5', + backend='hdf5')), + dict( + type='MultiScaleFlipAug', + img_scale=(1296, 720), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='VideoCollect', keys=['img']) + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=[ + dict( + type=dataset_type, + ann_file=data_root + 'jsons/box_track_train_cocofmt.json', + key_img_sampler=dict(interval=1), + ref_img_sampler=dict(num_ref_imgs=1, scope=3, method='uniform'), + pipeline=train_pipeline), + dict( + type=dataset_type, + load_as_video=False, + ann_file=data_root + 'jsons/det_train_cocofmt.json', + pipeline=train_pipeline) + ], + val=dict( + type=dataset_type, + ann_file=data_root + 'jsons/box_track_val_cocofmt.json', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'jsons/box_track_val_cocofmt.json', + pipeline=test_pipeline)) +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=1000, + warmup_ratio=1.0 / 1000, + step=[8, 11]) +# checkpoint saving +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + # dict(type='TensorboardLoggerHook') + ]) +# yapf:enable +# runtime settings +total_epochs = 12 +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1)] +evaluation = dict(metric=['bbox', 'track'], interval=2) diff --git a/qdtrack/datasets/pipelines/__init__.py b/qdtrack/datasets/pipelines/__init__.py index 2e4b751..0d4333f 100644 --- a/qdtrack/datasets/pipelines/__init__.py +++ b/qdtrack/datasets/pipelines/__init__.py @@ -1,4 +1,5 @@ from .formatting import VideoCollect, SeqCollect, SeqDefaultFormatBundle +from .hdf5backend import HDF5Backend from .loading import LoadMultiImagesFromFile, SeqLoadAnnotations from .transforms import SeqNormalize, SeqPad, SeqRandomFlip, SeqResize @@ -6,4 +7,6 @@ 'LoadMultiImagesFromFile', 'SeqLoadAnnotations', 'SeqResize', 'SeqNormalize', 'SeqRandomFlip', 'SeqPad', 'SeqDefaultFormatBundle', 'SeqCollect', 'VideoCollect' + 'SeqCollect', 'VideoCollect', 'HDF5Backend' ] + diff --git a/qdtrack/datasets/pipelines/hdf5backend.py b/qdtrack/datasets/pipelines/hdf5backend.py new file mode 100644 index 0000000..9b6a274 --- /dev/null +++ b/qdtrack/datasets/pipelines/hdf5backend.py @@ -0,0 +1,37 @@ +import h5py +import mmcv +import numpy as np +import os +from mmcv import BaseStorageBackend, FileClient + + +@FileClient.register_backend('hdf5') +class HDF5Backend(BaseStorageBackend): + + def __init__(self, vid_db_path, img_db_path="", **kwargs): + self.vid_db_path = str(vid_db_path) + self.img_db_path = str(img_db_path) + self.vid_client = None + self.img_client = None + + def get(self, filepath): + """Get values according to the filepath. + Args: + filepath (str | obj:`Path`): Here, filepath is the lmdb key. + """ + filepath = str(filepath) + filefolder, filename = os.path.split(filepath) + if filefolder == "": + if self.img_client is None: + self.img_client = h5py.File(self.img_db_path, 'r') + value_buf = np.array(self.img_client[filename]) + else: + if self.vid_client is None: + self.vid_client = h5py.File(self.vid_db_path, 'r') + group = self.vid_client[filefolder] + value_buf = np.array(group[filename]) + return value_buf + + def get_text(self, filepath): + raise NotImplementedError + diff --git a/requirements.txt b/requirements.txt index 9c9c35b..f41d3ef 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +h5py mmcv>=0.3.0 mmdet motmetrics @@ -5,4 +6,4 @@ numpy torch>=1.1 torchvision -git+git://github.com/bdd100k/bdd100k.git \ No newline at end of file +git+git://github.com/bdd100k/bdd100k.git diff --git a/tools/bsub_test.sh b/tools/bsub_test.sh new file mode 100755 index 0000000..7a1fb9f --- /dev/null +++ b/tools/bsub_test.sh @@ -0,0 +1,10 @@ +#!/bin/bash +CONFIG=$1 +GPUS=$2 +NAME=$3 +EPOCH=$4 + +bsub -n 36 -W 4:00 -J $NAME \ + -R "rusage[mem=5000,ngpus_excl_p=${GPUS}]" \ + -R "select[gpu_model0==GeForceRTX2080Ti]" \ + "./tools/test.sh ${CONFIG} ${GPUS} ${EPOCH}" diff --git a/tools/bsub_train.sh b/tools/bsub_train.sh new file mode 100755 index 0000000..0c934cd --- /dev/null +++ b/tools/bsub_train.sh @@ -0,0 +1,9 @@ +#!/bin/bash +CONFIG=$1 +GPUS=$2 +NAME=$3 + +bsub -n 36 -W 120:00 -J $NAME \ + -R "rusage[mem=5000,ngpus_excl_p=${GPUS}]" \ + -R "select[gpu_model0==GeForceRTX2080Ti]" \ + "./tools/train.sh ${CONFIG} ${GPUS}" diff --git a/tools/dist_test.sh b/tools/dist_test.sh old mode 100644 new mode 100755 diff --git a/tools/dist_train.sh b/tools/dist_train.sh old mode 100644 new mode 100755 diff --git a/tools/slurm_test.sh b/tools/slurm_test.sh old mode 100644 new mode 100755 diff --git a/tools/slurm_train.sh b/tools/slurm_train.sh old mode 100644 new mode 100755 diff --git a/tools/test.sh b/tools/test.sh new file mode 100755 index 0000000..2d8cb08 --- /dev/null +++ b/tools/test.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +CONFIG=$1 +GPUS=$2 +EPOCH=$3 +PORT=${PORT:-29500} + +CFG_FILE="./configs/${CONFIG}.py" +PTH_FILE="./work_dirs/${CONFIG}/epoch_${EPOCH}.pth" +OUT_FILE="./${CONFIG}.pkl" + +module load eth_proxy +source ../venv/bin/activate + +PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ +python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ + $(dirname "$0")/test.py $CFG_FILE $PTH_FILE --eval track --out $OUT_FILE \ + --launcher pytorch ${@:4} diff --git a/tools/train.sh b/tools/train.sh new file mode 100755 index 0000000..eca21da --- /dev/null +++ b/tools/train.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +function rand() { + min=$1 + max=$(($2-$min+1)) + num=$(($RANDOM+1000000000000)) + echo $(($num%$max+$min)) +} + +CONFIG=$1 +GPUS=$2 +PORT=$(rand 6000 12000) + +CFG_FILE="./configs/${CONFIG}.py" +PTH_FILE="./work_dirs/${CONFIG}/latest.pth" + +module load eth_proxy +source ../venv/bin/activate + +PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ +python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ + $(dirname "$0")/train.py $CFG_FILE --launcher pytorch ${@:4} + +PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ +python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ + $(dirname "$0")/test.py $CFG_FILE $PTH_FILE --eval track accuracy \ + --launcher pytorch ${@:4}