diff --git a/configs/qdtrack-hdf5.py b/configs/qdtrack-hdf5.py
new file mode 100644
index 0000000..2e12911
--- /dev/null
+++ b/configs/qdtrack-hdf5.py
@@ -0,0 +1,260 @@
+# model settings
+model = dict(
+    type='QuasiDenseFasterRCNN',
+    pretrained='torchvision://resnet50',
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch'),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=256,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            scales=[8],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
+    roi_head=dict(
+        type='QuasiDenseRoIHead',
+        bbox_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        bbox_head=dict(
+            type='Shared2FCBBoxHead',
+            in_channels=256,
+            fc_out_channels=1024,
+            roi_feat_size=7,
+            num_classes=8,
+            bbox_coder=dict(
+                type='DeltaXYWHBBoxCoder',
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
+            reg_class_agnostic=False,
+            loss_cls=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+            loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)),
+        track_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        track_head=dict(
+            type='QuasiDenseEmbedHead',
+            num_convs=4,
+            num_fcs=1,
+            embed_channels=256,
+            norm_cfg=dict(type='GN', num_groups=32),
+            loss_track=dict(type='MultiPosCrossEntropyLoss', loss_weight=0.25),
+            loss_track_aux=dict(
+                type='L2Loss',
+                neg_pos_ub=3,
+                pos_margin=0,
+                neg_margin=0.3,
+                hard_mining=True,
+                loss_weight=1.0))),
+    tracker=dict(
+        type='QuasiDenseEmbedTracker',
+        init_score_thr=0.7,
+        obj_score_thr=0.3,
+        match_score_thr=0.5,
+        memo_tracklet_frames=10,
+        memo_backdrop_frames=1,
+        memo_momentum=0.8,
+        nms_conf_thr=0.5,
+        nms_backdrop_iou_thr=0.3,
+        nms_class_iou_thr=0.7,
+        with_cats=True,
+        match_metric='bisoftmax'),
+
+    # model training and testing settings
+    train_cfg = dict(
+        rpn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=-1,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_across_levels=False,
+            nms_pre=2000,
+            nms_post=1000,
+            max_num=1000,
+            nms_thr=0.7,
+            min_bbox_size=0),
+        rcnn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.5,
+                neg_iou_thr=0.5,
+                min_pos_iou=0.5,
+                match_low_quality=False,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            pos_weight=-1,
+            debug=False),
+        embed=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.5,
+                match_low_quality=False,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='CombinedSampler',
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=3,
+                add_gt_as_proposals=True,
+                pos_sampler=dict(type='InstanceBalancedPosSampler'),
+                neg_sampler=dict(
+                    type='IoUBalancedNegSampler',
+                    floor_thr=-1,
+                    floor_fraction=0,
+                    num_bins=3)))),
+
+    test_cfg = dict(
+        rpn=dict(
+            nms_across_levels=False,
+            nms_pre=1000,
+            nms_post=1000,
+            max_num=1000,
+            nms_thr=0.7,
+            min_bbox_size=0),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type='nms', iou_threshold=0.5),
+            max_per_img=100)),
+)
+
+# dataset settings
+dataset_type = 'BDDVideoDataset'
+data_root = '/cluster/work/cvl/xiali/bdd100k/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+    dict(
+        type='LoadMultiImagesFromFile',
+        to_float32=True,
+        file_client_args=dict(
+            vid_db_path=data_root + 'hdf5s/train.hdf5',
+            img_db_path=data_root + 'hdf5s/train_det.hdf5',
+            backend='hdf5')),
+    dict(type='SeqLoadAnnotations', with_bbox=True, with_ins_id=True),
+    dict(type='SeqResize', img_scale=(1296, 720), keep_ratio=True),
+    dict(type='SeqRandomFlip', share_params=True, flip_ratio=0.5),
+    dict(type='SeqNormalize', **img_norm_cfg),
+    dict(type='SeqPad', size_divisor=32),
+    dict(type='SeqDefaultFormatBundle'),
+    dict(
+        type='SeqCollect',
+        keys=['img', 'gt_bboxes', 'gt_labels', 'gt_match_indices'],
+        ref_prefix='ref'),
+]
+test_pipeline = [
+    dict(
+        type='LoadImageFromFile',
+        to_float32=True,
+        file_client_args=dict(
+            vid_db_path=data_root + 'hdf5s/val.hdf5',
+            backend='hdf5')),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(1296, 720),
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='VideoCollect', keys=['img'])
+        ])
+]
+data = dict(
+    samples_per_gpu=2,
+    workers_per_gpu=2,
+    train=[
+        dict(
+            type=dataset_type,
+            ann_file=data_root + 'jsons/box_track_train_cocofmt.json',
+            key_img_sampler=dict(interval=1),
+            ref_img_sampler=dict(num_ref_imgs=1, scope=3, method='uniform'),
+            pipeline=train_pipeline),
+        dict(
+            type=dataset_type,
+            load_as_video=False,
+            ann_file=data_root + 'jsons/det_train_cocofmt.json',
+            pipeline=train_pipeline)
+    ],
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root + 'jsons/box_track_val_cocofmt.json',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + 'jsons/box_track_val_cocofmt.json',
+        pipeline=test_pipeline))
+# optimizer
+optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=1000,
+    warmup_ratio=1.0 / 1000,
+    step=[8, 11])
+# checkpoint saving
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+# runtime settings
+total_epochs = 12
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
+evaluation = dict(metric=['bbox', 'track'], interval=2)
diff --git a/qdtrack/datasets/pipelines/__init__.py b/qdtrack/datasets/pipelines/__init__.py
index 2e4b751..0d4333f 100644
--- a/qdtrack/datasets/pipelines/__init__.py
+++ b/qdtrack/datasets/pipelines/__init__.py
@@ -1,4 +1,5 @@
 from .formatting import VideoCollect, SeqCollect, SeqDefaultFormatBundle
+from .hdf5backend import HDF5Backend
 from .loading import LoadMultiImagesFromFile, SeqLoadAnnotations
 from .transforms import SeqNormalize, SeqPad, SeqRandomFlip, SeqResize
 
@@ -6,4 +7,6 @@
     'LoadMultiImagesFromFile', 'SeqLoadAnnotations', 'SeqResize',
     'SeqNormalize', 'SeqRandomFlip', 'SeqPad', 'SeqDefaultFormatBundle',
     'SeqCollect', 'VideoCollect'
+    'SeqCollect', 'VideoCollect', 'HDF5Backend'
 ]
+
diff --git a/qdtrack/datasets/pipelines/hdf5backend.py b/qdtrack/datasets/pipelines/hdf5backend.py
new file mode 100644
index 0000000..9b6a274
--- /dev/null
+++ b/qdtrack/datasets/pipelines/hdf5backend.py
@@ -0,0 +1,37 @@
+import h5py
+import mmcv
+import numpy as np
+import os
+from mmcv import BaseStorageBackend, FileClient
+
+
+@FileClient.register_backend('hdf5')
+class HDF5Backend(BaseStorageBackend):
+
+    def __init__(self, vid_db_path, img_db_path="", **kwargs):
+        self.vid_db_path = str(vid_db_path)
+        self.img_db_path = str(img_db_path)
+        self.vid_client = None
+        self.img_client = None
+
+    def get(self, filepath):
+        """Get values according to the filepath.
+        Args:
+            filepath (str | obj:`Path`): Here, filepath is the lmdb key.
+        """
+        filepath = str(filepath)
+        filefolder, filename = os.path.split(filepath)
+        if filefolder == "":
+            if self.img_client is None:
+                self.img_client = h5py.File(self.img_db_path, 'r')
+            value_buf = np.array(self.img_client[filename])
+        else:
+            if self.vid_client is None:
+                self.vid_client = h5py.File(self.vid_db_path, 'r')
+            group = self.vid_client[filefolder]
+            value_buf = np.array(group[filename])
+        return value_buf
+
+    def get_text(self, filepath):
+        raise NotImplementedError
+
diff --git a/requirements.txt b/requirements.txt
index 9c9c35b..f41d3ef 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
+h5py
 mmcv>=0.3.0
 mmdet
 motmetrics
@@ -5,4 +6,4 @@ numpy
 torch>=1.1
 torchvision
 
-git+git://github.com/bdd100k/bdd100k.git
\ No newline at end of file
+git+git://github.com/bdd100k/bdd100k.git
diff --git a/tools/bsub_test.sh b/tools/bsub_test.sh
new file mode 100755
index 0000000..7a1fb9f
--- /dev/null
+++ b/tools/bsub_test.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+CONFIG=$1
+GPUS=$2
+NAME=$3
+EPOCH=$4
+
+bsub -n 36 -W 4:00 -J $NAME \
+    -R "rusage[mem=5000,ngpus_excl_p=${GPUS}]" \
+    -R "select[gpu_model0==GeForceRTX2080Ti]" \
+    "./tools/test.sh ${CONFIG} ${GPUS} ${EPOCH}"
diff --git a/tools/bsub_train.sh b/tools/bsub_train.sh
new file mode 100755
index 0000000..0c934cd
--- /dev/null
+++ b/tools/bsub_train.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+CONFIG=$1
+GPUS=$2
+NAME=$3
+
+bsub -n 36 -W 120:00 -J $NAME \
+    -R "rusage[mem=5000,ngpus_excl_p=${GPUS}]" \
+    -R "select[gpu_model0==GeForceRTX2080Ti]" \
+    "./tools/train.sh ${CONFIG} ${GPUS}" 
diff --git a/tools/dist_test.sh b/tools/dist_test.sh
old mode 100644
new mode 100755
diff --git a/tools/dist_train.sh b/tools/dist_train.sh
old mode 100644
new mode 100755
diff --git a/tools/slurm_test.sh b/tools/slurm_test.sh
old mode 100644
new mode 100755
diff --git a/tools/slurm_train.sh b/tools/slurm_train.sh
old mode 100644
new mode 100755
diff --git a/tools/test.sh b/tools/test.sh
new file mode 100755
index 0000000..2d8cb08
--- /dev/null
+++ b/tools/test.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+CONFIG=$1
+GPUS=$2
+EPOCH=$3
+PORT=${PORT:-29500}
+
+CFG_FILE="./configs/${CONFIG}.py"
+PTH_FILE="./work_dirs/${CONFIG}/epoch_${EPOCH}.pth"
+OUT_FILE="./${CONFIG}.pkl"
+
+module load eth_proxy
+source ../venv/bin/activate
+
+PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
+python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
+    $(dirname "$0")/test.py $CFG_FILE $PTH_FILE --eval track --out $OUT_FILE \
+    --launcher pytorch ${@:4} 
diff --git a/tools/train.sh b/tools/train.sh
new file mode 100755
index 0000000..eca21da
--- /dev/null
+++ b/tools/train.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+function rand() {
+    min=$1
+    max=$(($2-$min+1))
+    num=$(($RANDOM+1000000000000))
+    echo $(($num%$max+$min))
+}
+
+CONFIG=$1
+GPUS=$2
+PORT=$(rand 6000 12000)
+
+CFG_FILE="./configs/${CONFIG}.py"
+PTH_FILE="./work_dirs/${CONFIG}/latest.pth"
+
+module load eth_proxy
+source ../venv/bin/activate
+
+PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
+python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
+    $(dirname "$0")/train.py $CFG_FILE --launcher pytorch ${@:4}
+
+PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
+python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
+    $(dirname "$0")/test.py $CFG_FILE $PTH_FILE --eval track accuracy \
+    --launcher pytorch ${@:4}