Skip to content

Commit

Permalink
minor change for EFA running
Browse files Browse the repository at this point in the history
  • Loading branch information
Wu committed Jun 6, 2019
1 parent 9a77b35 commit ccbbdab
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions infra/ami/train_efa.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,26 +27,26 @@ mpirun -np ${NUM_GPU} \
-x LD_LIBRARY_PATH \
-x PATH \
-x NCCL_SOCKET_IFNAME=^docker0,lo \
-x NCCL_MIN_NRINGS=8 \
-x NCCL_MIN_NRINGS=13 \
-x NCCL_DEBUG=INFO \
-x TENSORPACK_FP16=1 \
-x HOROVOD_CYCLE_TIME=0.5 \
-x HOROVOD_FUSION_THRESHOLD=67108864 \
python3 tensorpack-mask-rcnn/MaskRCNN/train.py \
python3 /home/ec2-user/tensorpack-mask-rcnn/MaskRCNN/train.py \
--fp16 \
--throughput_log_freq ${THROUGHPUT_LOG_FREQ} \
--config \
MODE_MASK=True \
MODE_FPN=True \
DATA.BASEDIR=/data \
DATA.BASEDIR=/home/ec2-user/data \
DATA.TRAIN='["train2017"]' \
DATA.VAL='("val2017",)' \
TRAIN.BATCH_SIZE_PER_GPU=${BATCH_SIZE_PER_GPU} \
TRAIN.LR_EPOCH_SCHEDULE='[(8, 0.1), (10, 0.01), (12, None)]' \
TRAIN.EVAL_PERIOD=12 \
RPN.TOPK_PER_IMAGE=True \
PREPROC.PREDEFINED_PADDING=True \
BACKBONE.WEIGHTS=/data/pretrained-models/ImageNet-R50-AlignPadding.npz \
BACKBONE.WEIGHTS=/home/ec2-user/data/pretrained-models/ImageNet-R50-AlignPadding.npz \
BACKBONE.NORM=FreezeBN \
TRAINER=horovod
#For 32x4
Expand Down

0 comments on commit ccbbdab

Please sign in to comment.