diff --git a/examples/model_100.pt b/examples/model_100.pt index 47fe5e9a..c30b8744 100644 Binary files a/examples/model_100.pt and b/examples/model_100.pt differ diff --git a/sim/genesis/README.md b/sim/genesis/README.md index 866ad5c5..6400c065 100644 --- a/sim/genesis/README.md +++ b/sim/genesis/README.md @@ -4,7 +4,7 @@ # 安装rsl_rl。 ``` git clone https://github.com/leggedrobotics/rsl_rl -cd rsl_rl && git checkout v1.0.2 && pip install -e . +cd rsl_rl && git checkout v2.0.1 && pip install -e . ``` # 安装tensorboard。 diff --git a/sim/genesis/README_EN.md b/sim/genesis/README_EN.md index 51f998ff..d517fdf5 100644 --- a/sim/genesis/README_EN.md +++ b/sim/genesis/README_EN.md @@ -10,7 +10,7 @@ cd sim/genesis ### Install rsl_rl ```bash git clone https://github.com/leggedrobotics/rsl_rl -cd rsl_rl && git checkout v1.0.2 && pip install -e . +cd rsl_rl && git checkout v2.0.1 && pip install -e . ``` ### Install TensorBoard diff --git a/sim/genesis/zeroth_env.py b/sim/genesis/zeroth_env.py index de759fdf..a7ec89d0 100644 --- a/sim/genesis/zeroth_env.py +++ b/sim/genesis/zeroth_env.py @@ -181,10 +181,15 @@ def step(self, actions): self.last_actions[:] = self.actions[:] self.last_dof_vel[:] = self.dof_vel[:] - return self.obs_buf, None, self.rew_buf, self.reset_buf, self.extras + return self.obs_buf, self.rew_buf, self.reset_buf, { + "observations": { + "critic": self.obs_buf + }, + **self.extras + } def get_observations(self): - return self.obs_buf + return self.obs_buf, {"observations": {"critic": self.obs_buf}} def get_privileged_observations(self): return None diff --git a/sim/genesis/zeroth_eval.py b/sim/genesis/zeroth_eval.py index f5e62ba0..aeb4b8d5 100644 --- a/sim/genesis/zeroth_eval.py +++ b/sim/genesis/zeroth_eval.py @@ -11,7 +11,7 @@ def run_sim(env, policy, obs): while True: actions = policy(obs) - obs, _, rews, dones, infos = env.step(actions) + obs, _, _, _ = env.step(actions) def main(): parser = argparse.ArgumentParser() @@ -23,6 +23,10 @@ def main(): log_dir = f"logs/{args.exp_name}" env_cfg, obs_cfg, reward_cfg, command_cfg, train_cfg = pickle.load(open(f"logs/{args.exp_name}/cfgs.pkl", "rb")) + # Add missing class_name fields + train_cfg["algorithm"]["class_name"] = "PPO" + train_cfg["policy"]["class_name"] = "ActorCritic" + print("train_cfg:", train_cfg) # Add debug print reward_cfg["reward_scales"] = {} env = ZerothEnv( diff --git a/sim/genesis/zeroth_train.py b/sim/genesis/zeroth_train.py index fcba43bb..e1872b78 100644 --- a/sim/genesis/zeroth_train.py +++ b/sim/genesis/zeroth_train.py @@ -11,6 +11,9 @@ def get_train_cfg(exp_name, max_iterations): train_cfg_dict = { + "num_steps_per_env": 48, + "save_interval": 10, + "empirical_normalization": True, #这个功能可以帮助稳定训练过程,特别是在观测值范围变化较大的情况下 "algorithm": { "clip_param": 0.2, "desired_kl": 0.01, @@ -24,42 +27,27 @@ def get_train_cfg(exp_name, max_iterations): "schedule": "adaptive", "use_clipped_value_loss": True, "value_loss_coef": 1.0, + "class_name": "PPO", }, - "init_member_classes": {}, "policy": { "activation": "elu", "actor_hidden_dims": [512, 256, 128], "critic_hidden_dims": [512, 256, 128], "init_noise_std": 1.0, + "class_name": "ActorCritic", }, "runner": { "algorithm_class_name": "PPO", - "checkpoint": -1, "experiment_name": exp_name, - "load_run": -1, - "log_interval": 1, - "max_iterations": max_iterations, - "num_steps_per_env": 48, - "policy_class_name": "ActorCritic", - "record_interval": -1, - "resume": False, - "resume_path": None, - "run_name": "", - "runner_class_name": "OnPolicyRunner", - "save_interval": 10, - }, - "runner_class_name": "OnPolicyRunner", - "seed": 1, + "run_name": "zeroth-walking", + } } return train_cfg_dict def get_cfgs(): - env_cfg = { - "num_actions": 12, - # joint/link names - "default_joint_angles": { # [rad] + default_joint_angles={ # [rad] "left_elbow_yaw": 3.14, "right_elbow_yaw": 3.14, "right_hip_pitch": 0.0, @@ -72,21 +60,12 @@ def get_cfgs(): "left_knee_pitch": 0.0, "right_ankle_pitch": 0.0, "left_ankle_pitch": 0.0, - }, - "dof_names": [ - "left_elbow_yaw", - "right_elbow_yaw", - "right_hip_pitch", - "left_hip_pitch", - "right_hip_yaw", - "left_hip_yaw", - "right_hip_roll", - "left_hip_roll", - "right_knee_pitch", - "left_knee_pitch", - "right_ankle_pitch", - "left_ankle_pitch", - ], + } + env_cfg = { + "num_actions": 12, + # joint/link names + "default_joint_angles": default_joint_angles, + "dof_names": list(default_joint_angles.keys()), # PD "kp": 20.0, "kd": 0.5, @@ -128,8 +107,8 @@ def get_cfgs(): "num_commands": 3, # "lin_vel_y_range": [-0.5, -0.5], # move forward slowly "lin_vel_y_range": [-0.6, -0.6], # move faster than above! - "lin_vel_x_range": [0, 0], - "ang_vel_range": [0, 0], + "lin_vel_x_range": [-0.01, 0.01], + "ang_vel_range": [-0.01, 0.01], } return env_cfg, obs_cfg, reward_cfg, command_cfg @@ -139,7 +118,7 @@ def main(): parser = argparse.ArgumentParser() parser.add_argument("-e", "--exp_name", type=str, default="zeroth-walking") parser.add_argument("-B", "--num_envs", type=int, default=4096) - parser.add_argument("--max_iterations", type=int, default=100) + parser.add_argument("--max_iterations", type=int, default=101) args = parser.parse_args() gs.init(logging_level="warning")