|
| 1 | +from easydict import EasyDict |
| 2 | + |
| 3 | +pong_ppo_config = dict( |
| 4 | + exp_name='data_pong/pong_ppo_ddp_seed0', |
| 5 | + env=dict( |
| 6 | + collector_env_num=8, |
| 7 | + evaluator_env_num=8, |
| 8 | + n_evaluator_episode=8, |
| 9 | + stop_value=20, |
| 10 | + env_id='PongNoFrameskip-v4', |
| 11 | + #'ALE/Pong-v5' is available. But special setting is needed after gym make. |
| 12 | + frame_stack=4, |
| 13 | + ), |
| 14 | + policy=dict( |
| 15 | + multi_gpu=True, |
| 16 | + cuda=True, |
| 17 | + recompute_adv=True, |
| 18 | + action_space='discrete', |
| 19 | + model=dict( |
| 20 | + obs_shape=[4, 84, 84], |
| 21 | + action_shape=6, |
| 22 | + action_space='discrete', |
| 23 | + encoder_hidden_size_list=[64, 64, 128], |
| 24 | + actor_head_hidden_size=128, |
| 25 | + critic_head_hidden_size=128, |
| 26 | + ), |
| 27 | + learn=dict( |
| 28 | + epoch_per_collect=10, |
| 29 | + update_per_collect=1, |
| 30 | + batch_size=320, |
| 31 | + learning_rate=3e-4, |
| 32 | + value_weight=0.5, |
| 33 | + entropy_weight=0.001, |
| 34 | + clip_ratio=0.2, |
| 35 | + adv_norm=True, |
| 36 | + value_norm=True, |
| 37 | + # for ppo, when we recompute adv, we need the key done in data to split traj, so we must |
| 38 | + # use ignore_done=False here, |
| 39 | + # but when we add key traj_flag in data as the backup for key done, we could choose to use ignore_done=True |
| 40 | + # for halfcheetah, the length=1000 |
| 41 | + ignore_done=False, |
| 42 | + grad_clip_type='clip_norm', |
| 43 | + grad_clip_value=0.5, |
| 44 | + ), |
| 45 | + collect=dict( |
| 46 | + n_sample=3200, |
| 47 | + unroll_len=1, |
| 48 | + discount_factor=0.99, |
| 49 | + gae_lambda=0.95, |
| 50 | + ), |
| 51 | + eval=dict(evaluator=dict(eval_freq=1000, )), |
| 52 | + ), |
| 53 | +) |
| 54 | +main_config = EasyDict(pong_ppo_config) |
| 55 | + |
| 56 | +pong_ppo_create_config = dict( |
| 57 | + env=dict( |
| 58 | + type='atari', |
| 59 | + import_names=['dizoo.atari.envs.atari_env'], |
| 60 | + ), |
| 61 | + env_manager=dict(type='subprocess'), |
| 62 | + policy=dict(type='ppo'), |
| 63 | +) |
| 64 | +create_config = EasyDict(pong_ppo_create_config) |
| 65 | + |
| 66 | +if __name__ == "__main__": |
| 67 | + """ |
| 68 | + Overview: |
| 69 | + This script should be executed with <nproc_per_node> GPUs. |
| 70 | + Run the following command to launch the script: |
| 71 | + python -m torch.distributed.launch --nproc_per_node=2 ./dizoo/atari/config/serial/pong/pong_ppo_ddp_config.py |
| 72 | + """ |
| 73 | + from ding.utils import DDPContext |
| 74 | + from ding.entry import serial_pipeline_onpolicy |
| 75 | + with DDPContext(): |
| 76 | + serial_pipeline_onpolicy((main_config, create_config), seed=0, max_env_step=int(3e6)) |
0 commit comments