diff --git a/sim/deploy/config.py b/sim/deploy/config.py
old mode 100644
new mode 100755
index 496e7922..07ee4d87
--- a/sim/deploy/config.py
+++ b/sim/deploy/config.py
@@ -57,7 +57,8 @@ class RobotConfig:
kds: np.ndarray
tau_limit: np.ndarray
robot_model_path: str
- dt: float = 0.001
+ # is2ac
+ dt: float = 0.00002 # 0.001
phase_duration: float = 0.64
duration: float = 10.0
decimation: int = 10
diff --git a/sim/deploy/policy.py b/sim/deploy/policy.py
old mode 100644
new mode 100755
index 41c1bcf6..04dc9207
--- a/sim/deploy/policy.py
+++ b/sim/deploy/policy.py
@@ -35,7 +35,12 @@ def __init__(
self.hist_obs = deque()
for _ in range(frame_stack):
self.hist_obs.append(np.zeros([1, obs_size], dtype=np.double))
- self.model = torch.jit.load(model_path)
+ # Load the model.
+ # Changed to load onto same device as other tensors
+ # TODO: bring back jit when ready
+ # self.model = torch.jit.load(model_path)
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+ self.model = torch.load(model_path, map_location=self.device)
self.action = np.zeros((num_actions), dtype=np.double)
@abstractmethod
@@ -167,7 +172,7 @@ def next_action(
for i in range(self.cfg.frame_stack):
policy_input[0, i * self.cfg.num_single_obs : (i + 1) * self.cfg.num_single_obs] = self.hist_obs[i][0, :]
- self.action[:] = self.model(torch.tensor(policy_input))[0].detach().numpy()
+ self.action[:] = self.model(torch.tensor(policy_input).to(self.device))[0].detach().cpu().numpy()
self.action = np.clip(self.action, -self.cfg.normalization.clip_actions, self.cfg.normalization.clip_actions)
return self.action
diff --git a/sim/deploy/run.py b/sim/deploy/run.py
index 9f5e02fe..f8d2aef7 100755
--- a/sim/deploy/run.py
+++ b/sim/deploy/run.py
@@ -1,4 +1,5 @@
"""Basic sim2sim and sim2real deployment script.
+
Run example:
mjpython sim/deploy/run.py --load_model sim/deploy/tests/walking_policy.pt --world MUJOCO
@@ -21,7 +22,7 @@
from sim.deploy.config import RobotConfig
from sim.env import stompy_mjcf_path
-from sim.stompy.joints import StompyFixed
+from sim.new_test.joints import Stompy as StompyFixed
class Worlds(Enum):
@@ -62,11 +63,19 @@ class MujocoWorld(World):
def __init__(self, cfg: RobotConfig):
self.cfg = cfg
self.model = mujoco.MjModel.from_xml_path(str(self.cfg.robot_model_path))
+
self.model.opt.timestep = self.cfg.dt
+
# First step
self.data = mujoco.MjData(self.model)
+ self.data.qpos = self.model.keyframe("default").qpos
+
mujoco.mj_step(self.model, self.data)
+ # Set self.data initial state to the default standing position
+ # self.data.qpos = StompyFixed.default_standing()
+ # TODO: This line should produce same results soon
+
def step(
self,
tau: np.ndarray = None,
@@ -109,19 +118,25 @@ def get_observation(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarra
return dof_pos, dof_vel, orientation, ang_vel
def simulate(self, policy: SimPolicy) -> None:
+
with mujoco.viewer.launch_passive(self.model, self.data) as viewer:
for step in tqdm(range(int(cfg.duration / cfg.dt)), desc="Simulating..."):
# Get the world state
dof_pos, dof_vel, orientation, ang_vel = self.get_observation()
+ # Zero action
+ target_dof_pos = dof_pos
+
# We update the policy at a lower frequency
# The simulation runs at 1000hz, but the policy is updated at 100hz
- if step % cfg.decimation == 0:
- action = policy.next_action(dof_pos, dof_vel, orientation, ang_vel, step)
- target_dof_pos = action * cfg.action_scale
+ # if step % cfg.decimation == 0:
+ # action = policy.next_action(dof_pos, dof_vel, orientation, ang_vel, step)
+ # target_dof_pos = action * cfg.action_scale
tau = policy.pd_control(target_dof_pos, dof_pos, cfg.kps, dof_vel, cfg.kds)
-
+ # breakpoint()
+ # set tau to zero for now
+ # tau = np.zeros_like(tau)
self.step(tau=tau)
viewer.sync()
@@ -141,7 +156,7 @@ def __init__(self, cfg: RobotConfig):
delattr(args, "load_model")
# adapt
- args.task = "legs_ppo"
+ args.task = "only_legs_ppo"
args.device = "cpu"
args.physics_engine = gymapi.SIM_PHYSX
args.num_envs = 1
diff --git a/sim/deploy/tests/model_400_model_only.pt b/sim/deploy/tests/model_400_model_only.pt
new file mode 100644
index 00000000..2b58de6f
Binary files /dev/null and b/sim/deploy/tests/model_400_model_only.pt differ
diff --git a/sim/humanoid_gym/envs/__init__.py b/sim/humanoid_gym/envs/__init__.py
index 52ac3531..fc675d62 100755
--- a/sim/humanoid_gym/envs/__init__.py
+++ b/sim/humanoid_gym/envs/__init__.py
@@ -13,8 +13,12 @@
# fmt: on
from .getup_config import GetupCfg, GetupCfgPPO
from .getup_env import GetupFreeEnv
+from .hexmove_config import HexmoveCfg, HexmoveCfgPPO
+from .hexmove_env import HexmoveFreeEnv
from .legs_config import LegsCfg, LegsCfgPPO
from .legs_env import LegsFreeEnv
+from .only_legs_config import OnlyLegsCfg, OnlyLegsCfgPPO
+from .only_legs_env import OnlyLegsFreeEnv
from .stompy_config import StompyCfg, StompyCfgPPO
from .stompy_env import StompyFreeEnv
@@ -30,6 +34,7 @@ def register_tasks() -> None:
task_registry.register("stompy_ppo", StompyFreeEnv, StompyCfg(), StompyCfgPPO())
task_registry.register("getup_ppo", GetupFreeEnv, GetupCfg(), GetupCfgPPO())
task_registry.register("legs_ppo", LegsFreeEnv, LegsCfg(), LegsCfgPPO())
+ task_registry.register("only_legs_ppo", OnlyLegsFreeEnv, OnlyLegsCfg(), OnlyLegsCfgPPO())
register_tasks()
diff --git a/sim/humanoid_gym/envs/getup_env.py b/sim/humanoid_gym/envs/getup_env.py
index 41434cd4..f5ae50c4 100755
--- a/sim/humanoid_gym/envs/getup_env.py
+++ b/sim/humanoid_gym/envs/getup_env.py
@@ -263,7 +263,7 @@ def check_termination(self):
t = len(self.reset_buf)
self.height_history[:, t] = self.root_states[:, 2]
self.window_size = 10
- breakpoint()
+
above_threshold = self.episode_length_buf > self.window_size:
self.reset_buf = torch.any(
diff --git a/sim/humanoid_gym/envs/only_legs_config.py b/sim/humanoid_gym/envs/only_legs_config.py
new file mode 100755
index 00000000..3d72bf00
--- /dev/null
+++ b/sim/humanoid_gym/envs/only_legs_config.py
@@ -0,0 +1,269 @@
+"""Defines the environment configuration for the Getting up task"""
+
+from humanoid.envs.base.legged_robot_config import ( # type: ignore
+ LeggedRobotCfg,
+ LeggedRobotCfgPPO,
+)
+
+from sim.env import stompy_urdf_path
+from sim.new_test.joints import Stompy
+
+NUM_JOINTS = len(Stompy.all_joints()) # 33
+
+
+class OnlyLegsCfg(LeggedRobotCfg):
+ """
+ Configuration class for the Legs humanoid robot.
+ """
+
+ class env(LeggedRobotCfg.env):
+ # change the observation dim
+
+ frame_stack = 15
+ c_frame_stack = 3
+ num_single_obs = 11 + NUM_JOINTS * 3
+ num_observations = int(frame_stack * num_single_obs)
+ single_num_privileged_obs = 25 + NUM_JOINTS * 4
+ num_privileged_obs = int(c_frame_stack * single_num_privileged_obs)
+ num_actions = NUM_JOINTS
+ num_envs = 4096
+ episode_length_s = 24 # episode length in seconds
+ use_ref_actions = False
+
+ class safety:
+ # safety factors
+ pos_limit = 0.9
+ vel_limit = 0.9
+ torque_limit = 0.9
+
+ class asset(LeggedRobotCfg.asset):
+ file = str(stompy_urdf_path(legs_only=True))
+
+ name = "stompy"
+
+ foot_name = "_foot_1_rmd_x4_24_mock_1_inner_rmd_x4_24_1"
+ knee_name = "_rmd_x8_90_mock_3_inner_rmd_x8_90_1"
+
+ termination_height = 0.23
+ default_feet_height = 0.0
+ # terminate_after_contacts_on = ["link_leg_assembly_left_1_leg_part_1_2", "link_leg_assembly_right_1_leg_part_1_2"]
+
+ penalize_contacts_on = []
+ self_collisions = 0 # 1 to disable, 0 to enable...bitwise filter
+
+ collapse_fixed_joints = True
+
+ flip_visual_attachments = False
+ replace_cylinder_with_capsule = False
+ fix_base_link = False
+
+ class terrain(LeggedRobotCfg.terrain):
+ mesh_type = "plane"
+ # mesh_type = 'trimesh'
+ curriculum = False
+ # rough terrain only:
+ measure_heights = False
+ static_friction = 0.6
+ dynamic_friction = 0.6
+ terrain_length = 8.0
+ terrain_width = 8.0
+ num_rows = 10 # number of terrain rows (levels)
+ num_cols = 10 # number of terrain cols (types)
+ max_init_terrain_level = 10 # starting curriculum state
+ # plane; obstacles; uniform; slope_up; slope_down, stair_up, stair_down
+ terrain_proportions = [0.2, 0.2, 0.4, 0.1, 0.1, 0, 0]
+ restitution = 0.0
+
+ class noise:
+ add_noise = False
+ noise_level = 0.6 # scales other values
+
+ class noise_scales:
+ dof_pos = 0.05
+ dof_vel = 0.5
+ ang_vel = 0.1
+ lin_vel = 0.05
+ quat = 0.03
+ height_measurements = 0.1
+
+ class init_state(LeggedRobotCfg.init_state):
+ pos = [0.0, 0.0, 0.72]
+
+ default_joint_angles = {k: 0.0 for k in Stompy.all_joints()}
+
+ default_positions = Stompy.default_standing()
+ for joint in default_positions:
+ default_joint_angles[joint] = default_positions[joint]
+
+ class control(LeggedRobotCfg.control):
+ # PD Drive parameters:
+ stiffness = {
+ "shoulder": 200,
+ "elbow": 200,
+ "wrist": 200,
+ "hand": 200,
+ "torso": 200,
+ "hip": 250,
+ "ankle": 200,
+ "knee": 350,
+ }
+ damping = {
+ "shoulder": 10,
+ "elbow": 10,
+ "wrist": 10,
+ "hand": 10,
+ "torso": 10,
+ "hip": 10,
+ "ankle": 10,
+ "knee": 10,
+ }
+ for k in stiffness:
+ stiffness[k] *= 0.00001
+ damping[k] *= 0.1
+ action_scale = 2500
+ # decimation: Number of control action updates @ sim DT per policy DT
+ decimation = 10 # 100hz
+
+ class sim(LeggedRobotCfg.sim):
+ dt = 0.001 # 1000 Hz
+ substeps = 1 # 2
+ up_axis = 1 # 0 is y, 1 is z
+
+ class physx(LeggedRobotCfg.sim.physx):
+ num_threads = 12
+ solver_type = 0 # 0: pgs, 1: tgs
+ num_position_iterations = 4
+ num_velocity_iterations = 1
+ contact_offset = 0.01 # [m]
+ rest_offset = 0.0 # -0.02 # [m]
+ bounce_threshold_velocity = 0.1 # [m/s]
+ max_depenetration_velocity = 1.0
+ max_gpu_contact_pairs = 2**23 # 2**24 -> needed for 8000 envs and more
+ default_buffer_size_multiplier = 5
+ # 0: never, 1: last sub-step, 2: all sub-steps (default=2)
+ contact_collection = 2
+
+ class domain_rand:
+ randomize_friction = True
+ friction_range = [0.1, 2.0]
+
+ randomize_base_mass = True
+ # added_mass_range = [-1.0, 1.0]
+ added_mass_range = [-0.2, 0.2]
+ push_robots = True
+ push_interval_s = 4
+ max_push_vel_xy = 0.2
+ max_push_ang_vel = 0.4
+ dynamic_randomization = 0.02
+
+ class commands(LeggedRobotCfg.commands):
+ # Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error)
+ num_commands = 4
+ resampling_time = 8.0 # time before command are changed[s]
+ heading_command = True # if true: compute ang vel command from heading error
+
+ class ranges:
+ lin_vel_x = [-0.3, 0.6] # min max [m/s]
+ lin_vel_y = [-0.3, 0.3] # min max [m/s]
+ ang_vel_yaw = [-0.3, 0.3] # min max [rad/s]
+ heading = [-3.14, 3.14]
+
+ class rewards:
+ # quite important to keep it right
+ base_height_target = 0.72
+ min_dist = 0.2
+ max_dist = 0.5
+ # put some settings here for LLM parameter tuning
+ target_joint_pos_scale = 0.17 # rad
+ target_feet_height = 0.06 # m
+ cycle_time = 0.64 # sec
+ # if true negative total rewards are clipped at zero (avoids early termination problems)
+ only_positive_rewards = True
+ # tracking reward = exp(error*sigma)
+ tracking_sigma = 5
+ max_contact_force = 400 # forces above this value are penalized
+
+ class scales:
+ # # reference motion tracking
+ # joint_pos = 1.6
+ # feet_clearance = 1.0
+ # feet_contact_number = 1.2
+ # # gait
+ # feet_air_time = 1.0
+ # foot_slip = -0.05
+ # feet_distance = 0.2
+ # knee_distance = 0.2
+ # # contact
+ # feet_contact_forces = -0.01
+ # # vel tracking
+ # tracking_lin_vel = 1.2
+ # tracking_ang_vel = 1.1
+ # vel_mismatch_exp = 0.5 # lin_z; ang x,y
+ # low_speed = 0.2
+ # track_vel_hard = 0.5
+
+ # above this was removed for standing policy
+ # base pos
+ default_joint_pos = 0.5
+ orientation = 1
+ base_height = 0.2
+
+ # energy
+ action_smoothness = -0.002
+ torques = -1e-5
+ dof_vel = -5e-4
+ dof_acc = -1e-7
+ base_acc = 0.2
+ collision = -1.0
+
+ class normalization:
+ class obs_scales:
+ # is2ac
+ lin_vel = 2.0
+ ang_vel = 1.0
+ dof_pos = 1.0
+ dof_vel = 0.05
+ quat = 1.0
+ height_measurements = 5.0
+
+ clip_observations = 18.0
+ clip_actions = 18.0
+
+ class viewer:
+ ref_env = 0
+ pos = [4, -4, 2]
+ lookat = [0, -2, 0]
+
+
+class OnlyLegsCfgPPO(LeggedRobotCfgPPO):
+ seed = 5
+ runner_class_name = "OnPolicyRunner" # DWLOnPolicyRunner
+
+ class policy:
+ init_noise_std = 1.0
+ actor_hidden_dims = [512, 256, 128]
+ critic_hidden_dims = [768, 256, 128]
+
+ class algorithm(LeggedRobotCfgPPO.algorithm):
+ entropy_coef = 0.001
+ learning_rate = 1e-5
+ num_learning_epochs = 2
+ gamma = 0.994
+ lam = 0.9
+ num_mini_batches = 4
+
+ class runner:
+ policy_class_name = "ActorCritic"
+ algorithm_class_name = "PPO"
+ num_steps_per_env = 60 # per iteration
+ max_iterations = 10001 # number of policy updates
+
+ # logging
+ save_interval = 100 # check for potential saves every this many iterations
+ experiment_name = "Legs"
+ run_name = ""
+ # load and resume
+ resume = False
+ load_run = -1 # -1 = last run
+ checkpoint = -1 # -1 = last saved model
+ resume_path = None # updated from load_run and chkpt
diff --git a/sim/humanoid_gym/envs/only_legs_env.py b/sim/humanoid_gym/envs/only_legs_env.py
new file mode 100755
index 00000000..39648ba6
--- /dev/null
+++ b/sim/humanoid_gym/envs/only_legs_env.py
@@ -0,0 +1,549 @@
+# mypy: disable-error-code="valid-newtype"
+"""Defines the environment for training the humanoid."""
+
+import torch # type: ignore[import]
+from humanoid.envs import LeggedRobot
+from humanoid.envs.base.legged_robot_config import LeggedRobotCfg
+from humanoid.utils.terrain import HumanoidTerrain
+from isaacgym import gymtorch
+from isaacgym.torch_utils import *
+
+from sim.stompy_legs.joints import Stompy
+
+
+class OnlyLegsFreeEnv(LeggedRobot):
+ """StompyFreeEnv is a class that represents a custom environment for a legged robot.
+
+ Args:
+ cfg (LeggedRobotCfg): Configuration object for the legged robot.
+ sim_params: Parameters for the simulation.
+ physics_engine: Physics engine used in the simulation.
+ sim_device: Device used for the simulation.
+ headless: Flag indicating whether the simulation should be run in headless mode.
+
+ Attributes:
+ last_feet_z (float): The z-coordinate of the last feet position.
+ feet_height (torch.Tensor): Tensor representing the height of the feet.
+ sim (gymtorch.GymSim): The simulation object.
+ terrain (HumanoidTerrain): The terrain object.
+ up_axis_idx (int): The index representing the up axis.
+ command_input (torch.Tensor): Tensor representing the command input.
+ privileged_obs_buf (torch.Tensor): Tensor representing the privileged observations buffer.
+ obs_buf (torch.Tensor): Tensor representing the observations buffer.
+ obs_history (collections.deque): Deque containing the history of observations.
+ critic_history (collections.deque): Deque containing the history of critic observations.
+
+ Methods:
+ _push_robots(): Randomly pushes the robots by setting a randomized base velocity.
+ _get_phase(): Calculates the phase of the gait cycle.
+ _get_gait_phase(): Calculates the gait phase.
+ compute_ref_state(): Computes the reference state.
+ create_sim(): Creates the simulation, terrain, and environments.
+ _get_noise_scale_vec(cfg): Sets a vector used to scale the noise added to the observations.
+ step(actions): Performs a simulation step with the given actions.
+ compute_observations(): Computes the observations.
+ reset_idx(env_ids): Resets the environment for the specified environment IDs.
+ """
+
+ def __init__(self, cfg: LeggedRobotCfg, sim_params, physics_engine, sim_device, headless):
+ super().__init__(cfg, sim_params, physics_engine, sim_device, headless)
+ self.last_feet_z = self.cfg.asset.default_feet_height
+ self.feet_height = torch.zeros((self.num_envs, 2), device=self.device)
+ self.reset_idx(torch.tensor(range(self.num_envs), device=self.device))
+
+ env_handle = self.envs[0]
+ actor_handle = self.actor_handles[0]
+
+ self.legs_joints = {}
+ for name, joint in Stompy.legs.left.joints_motors():
+ joint_handle = self.gym.find_actor_dof_handle(env_handle, actor_handle, joint)
+ self.legs_joints["left_" + name] = joint_handle
+
+ for name, joint in Stompy.legs.right.joints_motors():
+ joint_handle = self.gym.find_actor_dof_handle(env_handle, actor_handle, joint)
+ self.legs_joints["right_" + name] = joint_handle
+ self.compute_observations()
+
+ def _push_robots(self):
+ """Random pushes the robots. Emulates an impulse by setting a randomized base velocity."""
+ max_vel = self.cfg.domain_rand.max_push_vel_xy
+ max_push_angular = self.cfg.domain_rand.max_push_ang_vel
+ self.rand_push_force[:, :2] = torch_rand_float(
+ -max_vel, max_vel, (self.num_envs, 2), device=self.device
+ ) # lin vel x/y
+ self.root_states[:, 7:9] = self.rand_push_force[:, :2]
+
+ self.rand_push_torque = torch_rand_float(
+ -max_push_angular, max_push_angular, (self.num_envs, 3), device=self.device
+ )
+ self.root_states[:, 10:13] = self.rand_push_torque
+
+ self.gym.set_actor_root_state_tensor(self.sim, gymtorch.unwrap_tensor(self.root_states))
+
+ def _get_phase(self):
+ cycle_time = self.cfg.rewards.cycle_time
+ phase = self.episode_length_buf * self.dt / cycle_time
+ return phase
+
+ def _get_gait_phase(self):
+ # return float mask 1 is stance, 0 is swing
+ phase = self._get_phase()
+ sin_pos = torch.sin(2 * torch.pi * phase)
+ # Add double support phase
+ stance_mask = torch.zeros((self.num_envs, 2), device=self.device)
+ # left foot stance
+ stance_mask[:, 0] = sin_pos >= 0
+ # right foot stance
+ stance_mask[:, 1] = sin_pos < 0
+ # Double support phase
+ stance_mask[torch.abs(sin_pos) < 0.1] = 1
+
+ return stance_mask
+
+ def check_termination(self):
+ """Check if environments need to be reset"""
+ self.reset_buf = torch.any(
+ torch.norm(self.contact_forces[:, self.termination_contact_indices, :], dim=-1) > 1.0,
+ dim=1,
+ )
+ self.reset_buf |= self.root_states[:, 2] < self.cfg.asset.termination_height
+ self.time_out_buf = self.episode_length_buf > self.max_episode_length # no terminal reward for time-outs
+ self.reset_buf |= self.time_out_buf
+
+ def compute_ref_state(self):
+ phase = self._get_phase()
+ sin_pos = torch.sin(2 * torch.pi * phase)
+ sin_pos_l = sin_pos.clone()
+ sin_pos_r = sin_pos.clone()
+
+ self.ref_dof_pos = torch.zeros_like(self.dof_pos)
+ scale_1 = self.cfg.rewards.target_joint_pos_scale
+ scale_2 = 2 * scale_1
+ # left foot stance phase set to default joint pos
+ sin_pos_l[sin_pos_l > 0] = 0
+ self.ref_dof_pos[:, self.legs_joints["left_hip_pitch"]] = sin_pos_l * scale_1
+ self.ref_dof_pos[:, self.legs_joints["left_knee_pitch"]] = sin_pos_l * scale_2
+ self.ref_dof_pos[:, self.legs_joints["left_ankle_roll"]] = sin_pos_l * scale_1
+
+ # right foot stance phase set to default joint pos
+ sin_pos_r[sin_pos_r < 0] = 0
+ self.ref_dof_pos[:, self.legs_joints["right_hip_pitch"]] = sin_pos_r * scale_1
+ self.ref_dof_pos[:, self.legs_joints["right_knee_pitch"]] = sin_pos_r * scale_2
+ self.ref_dof_pos[:, self.legs_joints["right_ankle_roll"]] = sin_pos_r * scale_1
+
+ # Double support phase
+ self.ref_dof_pos[torch.abs(sin_pos) < 0.1] = 0
+
+ self.ref_action = 2 * self.ref_dof_pos
+
+ def create_sim(self):
+ """Creates simulation, terrain and evironments"""
+ self.up_axis_idx = 2 # 2 for z, 1 for y -> adapt gravity accordingly
+ self.sim = self.gym.create_sim(
+ self.sim_device_id,
+ self.graphics_device_id,
+ self.physics_engine,
+ self.sim_params,
+ )
+ mesh_type = self.cfg.terrain.mesh_type
+ if mesh_type in ["heightfield", "trimesh"]:
+ self.terrain = HumanoidTerrain(self.cfg.terrain, self.num_envs)
+ if mesh_type == "plane":
+ self._create_ground_plane()
+ elif mesh_type == "heightfield":
+ self._create_heightfield()
+ elif mesh_type == "trimesh":
+ self._create_trimesh()
+ elif mesh_type is not None:
+ raise ValueError("Terrain mesh type not recognised. Allowed types are [None, plane, heightfield, trimesh]")
+ self._create_envs()
+
+ def _get_noise_scale_vec(self, cfg):
+ """Sets a vector used to scale the noise added to the observations.
+ [NOTE]: Must be adapted when changing the observations structure
+
+ Args:
+ cfg (Dict): Environment config file
+
+ Returns:
+ [torch.Tensor]: Vector of scales used to multiply a uniform distribution in [-1, 1]
+ """
+ num_actions = self.num_actions
+ noise_vec = torch.zeros(self.cfg.env.num_single_obs, device=self.device)
+ self.add_noise = self.cfg.noise.add_noise
+ noise_scales = self.cfg.noise.noise_scales
+ noise_vec[0:5] = 0.0 # commands
+ noise_vec[5 : (num_actions + 5)] = noise_scales.dof_pos * self.obs_scales.dof_pos
+ noise_vec[(num_actions + 5) : (2 * num_actions + 5)] = noise_scales.dof_vel * self.obs_scales.dof_vel
+ noise_vec[(2 * num_actions + 5) : (3 * num_actions + 5)] = 0.0 # previous actions
+ noise_vec[(3 * num_actions + 5) : (3 * num_actions + 5) + 3] = (
+ noise_scales.ang_vel * self.obs_scales.ang_vel
+ ) # ang vel
+ noise_vec[(3 * num_actions + 5) + 3 : (3 * num_actions + 5)] = (
+ noise_scales.quat * self.obs_scales.quat
+ ) # euler x,y
+ return noise_vec
+
+ def step(self, actions):
+ if self.cfg.env.use_ref_actions:
+ actions += self.ref_action
+ # dynamic randomization
+ delay = torch.rand((self.num_envs, 1), device=self.device)
+ actions = (1 - delay) * actions + delay * self.actions
+ actions += self.cfg.domain_rand.dynamic_randomization * torch.randn_like(actions) * actions
+ return super().step(actions)
+
+ def compute_observations(self):
+ phase = self._get_phase()
+ self.compute_ref_state()
+
+ sin_pos = torch.sin(2 * torch.pi * phase).unsqueeze(1)
+ cos_pos = torch.cos(2 * torch.pi * phase).unsqueeze(1)
+
+ stance_mask = self._get_gait_phase()
+ contact_mask = self.contact_forces[:, self.feet_indices, 2] > 5.0
+
+ self.command_input = torch.cat((sin_pos, cos_pos, self.commands[:, :3] * self.commands_scale), dim=1)
+ q = (self.dof_pos - self.default_dof_pos) * self.obs_scales.dof_pos
+ dq = self.dof_vel * self.obs_scales.dof_vel
+
+ diff = self.dof_pos - self.ref_dof_pos
+ self.privileged_obs_buf = torch.cat(
+ (
+ self.command_input, # 2 + 3
+ (self.dof_pos - self.default_joint_pd_target) * self.obs_scales.dof_pos, # 12
+ self.dof_vel * self.obs_scales.dof_vel, # 12
+ self.actions, # 12
+ diff, # 12
+ self.base_lin_vel * self.obs_scales.lin_vel, # 3
+ self.base_ang_vel * self.obs_scales.ang_vel, # 3
+ self.base_euler_xyz * self.obs_scales.quat, # 3
+ self.rand_push_force[:, :2], # 3
+ self.rand_push_torque, # 3
+ self.env_frictions, # 1
+ self.body_mass / 30.0, # 1
+ stance_mask, # 2
+ contact_mask, # 2
+ ),
+ dim=-1,
+ )
+
+ obs_buf = torch.cat(
+ (
+ self.command_input, # 5 = 2D(sin cos) + 3D(vel_x, vel_y, aug_vel_yaw)
+ q, # 12D
+ dq, # 12D
+ self.actions, # 12D
+ self.base_ang_vel * self.obs_scales.ang_vel, # 3
+ self.base_euler_xyz * self.obs_scales.quat, # 3
+ ),
+ dim=-1,
+ )
+
+ if self.cfg.terrain.measure_heights:
+ heights = (
+ torch.clip(
+ self.root_states[:, 2].unsqueeze(1) - 0.5 - self.measured_heights,
+ -1,
+ 1.0,
+ )
+ * self.obs_scales.height_measurements
+ )
+ self.privileged_obs_buf = torch.cat((self.obs_buf, heights), dim=-1)
+
+ if self.add_noise:
+ obs_now = obs_buf.clone() + torch.randn_like(obs_buf) * self.noise_scale_vec * self.cfg.noise.noise_level
+ else:
+ obs_now = obs_buf.clone()
+ self.obs_history.append(obs_now)
+ self.critic_history.append(self.privileged_obs_buf)
+
+ obs_buf_all = torch.stack([self.obs_history[i] for i in range(self.obs_history.maxlen)], dim=1) # N,T,K
+
+ self.obs_buf = obs_buf_all.reshape(self.num_envs, -1) # N, T*K
+ self.privileged_obs_buf = torch.cat([self.critic_history[i] for i in range(self.cfg.env.c_frame_stack)], dim=1)
+
+ def reset_idx(self, env_ids):
+ super().reset_idx(env_ids)
+ for i in range(self.obs_history.maxlen):
+ self.obs_history[i][env_ids] *= 0
+ for i in range(self.critic_history.maxlen):
+ self.critic_history[i][env_ids] *= 0
+
+ # ================================================ Rewards ================================================== #
+ def _reward_joint_pos(self):
+ """
+ Calculates the reward based on the difference between the current joint positions and the target joint positions.
+ """
+ joint_pos = self.dof_pos.clone()
+ pos_target = self.ref_dof_pos.clone()
+ diff = joint_pos - pos_target
+ r = torch.exp(-2 * torch.norm(diff, dim=1)) - 0.2 * torch.norm(diff, dim=1).clamp(0, 0.5)
+
+ return r
+
+ def _reward_feet_distance(self):
+ """
+ Calculates the reward based on the distance between the feet. Penilize feet get close to each other or too far away.
+ """
+ foot_pos = self.rigid_state[:, self.feet_indices, :2]
+ foot_dist = torch.norm(foot_pos[:, 0, :] - foot_pos[:, 1, :], dim=1)
+ fd = self.cfg.rewards.min_dist
+ max_df = self.cfg.rewards.max_dist
+ d_min = torch.clamp(foot_dist - fd, -0.5, 0.0)
+ d_max = torch.clamp(foot_dist - max_df, 0, 0.5)
+ return (torch.exp(-torch.abs(d_min) * 100) + torch.exp(-torch.abs(d_max) * 100)) / 2
+
+ def _reward_knee_distance(self):
+ """
+ Calculates the reward based on the distance between the knee of the humanoid.
+ """
+ foot_pos = self.rigid_state[:, self.knee_indices, :2]
+ foot_dist = torch.norm(foot_pos[:, 0, :] - foot_pos[:, 1, :], dim=1)
+ fd = self.cfg.rewards.min_dist
+ max_df = self.cfg.rewards.max_dist / 2
+ d_min = torch.clamp(foot_dist - fd, -0.5, 0.0)
+ d_max = torch.clamp(foot_dist - max_df, 0, 0.5)
+ return (torch.exp(-torch.abs(d_min) * 100) + torch.exp(-torch.abs(d_max) * 100)) / 2
+
+ def _reward_foot_slip(self):
+ """
+ Calculates the reward for minimizing foot slip. The reward is based on the contact forces
+ and the speed of the feet. A contact threshold is used to determine if the foot is in contact
+ with the ground. The speed of the foot is calculated and scaled by the contact condition.
+ """
+ contact = self.contact_forces[:, self.feet_indices, 2] > 5.0
+ foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 10:12], dim=2)
+ rew = torch.sqrt(foot_speed_norm)
+ rew *= contact
+ return torch.sum(rew, dim=1)
+
+ def _reward_feet_air_time(self):
+ """
+ Calculates the reward for feet air time, promoting longer steps. This is achieved by
+ checking the first contact with the ground after being in the air. The air time is
+ limited to a maximum value for reward calculation.
+ """
+ contact = self.contact_forces[:, self.feet_indices, 2] > 5.0
+ stance_mask = self._get_gait_phase()
+ self.contact_filt = torch.logical_or(torch.logical_or(contact, stance_mask), self.last_contacts)
+ self.last_contacts = contact
+ first_contact = (self.feet_air_time > 0.0) * self.contact_filt
+ self.feet_air_time += self.dt
+ air_time = self.feet_air_time.clamp(0, 0.5) * first_contact
+ self.feet_air_time *= ~self.contact_filt
+ return air_time.sum(dim=1)
+
+ def _reward_feet_contact_number(self):
+ """
+ Calculates a reward based on the number of feet contacts aligning with the gait phase.
+ Rewards or penalizes depending on whether the foot contact matches the expected gait phase.
+ """
+ contact = self.contact_forces[:, self.feet_indices, 2] > 5.0
+ stance_mask = self._get_gait_phase()
+ reward = torch.where(contact == stance_mask, 1, -0.3)
+ return torch.mean(reward, dim=1)
+
+ def _reward_orientation(self):
+ """
+ Calculates the reward for maintaining a flat base orientation. It penalizes deviation
+ from the desired base orientation using the base euler angles and the projected gravity vector.
+ """
+ quat_mismatch = torch.exp(-torch.sum(torch.abs(self.base_euler_xyz[:, :2]), dim=1) * 10)
+ orientation = torch.exp(-torch.norm(self.projected_gravity[:, :2], dim=1) * 20)
+
+ return (quat_mismatch + orientation) / 2.0
+
+ def _reward_feet_contact_forces(self):
+ """
+ Calculates the reward for keeping contact forces within a specified range. Penalizes
+ high contact forces on the feet.
+ """
+ return torch.sum(
+ (
+ torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1) - self.cfg.rewards.max_contact_force
+ ).clip(0, 400),
+ dim=1,
+ )
+
+ def _reward_default_joint_pos(self):
+ """
+ Calculates the reward for keeping joint positions close to default positions, with a focus
+ on penalizing deviation in yaw and roll directions. Excludes yaw and roll from the main penalty.
+ """
+ joint_diff = self.dof_pos - self.default_joint_pd_target
+ left_yaw_roll = joint_diff[:, :2]
+ right_yaw_roll = joint_diff[:, 6:8]
+ yaw_roll = torch.norm(left_yaw_roll, dim=1) + torch.norm(right_yaw_roll, dim=1)
+ yaw_roll = torch.clamp(yaw_roll - 0.1, 0, 50)
+ return torch.exp(-yaw_roll * 100) - 0.01 * torch.norm(joint_diff, dim=1)
+
+ def _reward_base_height(self):
+ """
+ Calculates the reward based on the robot's base height. Penalizes deviation from a target base height.
+ The reward is computed based on the height difference between the robot's base and the average height
+ of its feet when they are in contact with the ground.
+ """
+ stance_mask = self._get_gait_phase()
+ measured_heights = torch.sum(self.rigid_state[:, self.feet_indices, 2] * stance_mask, dim=1) / torch.sum(
+ stance_mask, dim=1
+ )
+ base_height = self.root_states[:, 2] - (measured_heights - self.cfg.asset.default_feet_height)
+ reward = torch.exp(-torch.abs(base_height - self.cfg.rewards.base_height_target) * 100)
+ return reward
+
+ def _reward_base_acc(self):
+ """
+ Computes the reward based on the base's acceleration. Penalizes high accelerations of the robot's base,
+ encouraging smoother motion.
+ """
+ root_acc = self.last_root_vel - self.root_states[:, 7:13]
+ rew = torch.exp(-torch.norm(root_acc, dim=1) * 3)
+ return rew
+
+ def _reward_vel_mismatch_exp(self):
+ """
+ Computes a reward based on the mismatch in the robot's linear and angular velocities.
+ Encourages the robot to maintain a stable velocity by penalizing large deviations.
+ """
+ lin_mismatch = torch.exp(-torch.square(self.base_lin_vel[:, 2]) * 10)
+ ang_mismatch = torch.exp(-torch.norm(self.base_ang_vel[:, :2], dim=1) * 5.0)
+
+ c_update = (lin_mismatch + ang_mismatch) / 2.0
+
+ return c_update
+
+ def _reward_track_vel_hard(self):
+ """
+ Calculates a reward for accurately tracking both linear and angular velocity commands.
+ Penalizes deviations from specified linear and angular velocity targets.
+ """
+ # Tracking of linear velocity commands (xy axes)
+ lin_vel_error = torch.norm(self.commands[:, :2] - self.base_lin_vel[:, :2], dim=1)
+ lin_vel_error_exp = torch.exp(-lin_vel_error * 10)
+
+ # Tracking of angular velocity commands (yaw)
+ ang_vel_error = torch.abs(self.commands[:, 2] - self.base_ang_vel[:, 2])
+ ang_vel_error_exp = torch.exp(-ang_vel_error * 10)
+
+ linear_error = 0.2 * (lin_vel_error + ang_vel_error)
+
+ return (lin_vel_error_exp + ang_vel_error_exp) / 2.0 - linear_error
+
+ def _reward_tracking_lin_vel(self):
+ """
+ Tracks linear velocity commands along the xy axes.
+ Calculates a reward based on how closely the robot's linear velocity matches the commanded values.
+ """
+ lin_vel_error = torch.sum(torch.square(self.commands[:, :2] - self.base_lin_vel[:, :2]), dim=1)
+ return torch.exp(-lin_vel_error * self.cfg.rewards.tracking_sigma)
+
+ def _reward_tracking_ang_vel(self):
+ """
+ Tracks angular velocity commands for yaw rotation.
+ Computes a reward based on how closely the robot's angular velocity matches the commanded yaw values.
+ """
+
+ ang_vel_error = torch.square(self.commands[:, 2] - self.base_ang_vel[:, 2])
+ return torch.exp(-ang_vel_error * self.cfg.rewards.tracking_sigma)
+
+ def _reward_feet_clearance(self):
+ """
+ Calculates reward based on the clearance of the swing leg from the ground during movement.
+ Encourages appropriate lift of the feet during the swing phase of the gait.
+ """
+
+ # Compute feet contact mask
+ contact = self.contact_forces[:, self.feet_indices, 2] > 5.0
+
+ # Get the z-position of the feet and compute the change in z-position
+ feet_z = self.rigid_state[:, self.feet_indices, 2] - self.cfg.asset.default_feet_height
+ delta_z = feet_z - self.last_feet_z
+ self.feet_height += delta_z
+ self.last_feet_z = feet_z
+
+ # Compute swing mask
+ swing_mask = 1 - self._get_gait_phase()
+
+ # feet height should be closed to target feet height at the peak
+ rew_pos = torch.abs(self.feet_height - self.cfg.rewards.target_feet_height) < 0.02
+ rew_pos = torch.sum(rew_pos * swing_mask, dim=1)
+ self.feet_height *= ~contact
+
+ return rew_pos
+
+ def _reward_low_speed(self):
+ """
+ Rewards or penalizes the robot based on its speed relative to the commanded speed.
+ This function checks if the robot is moving too slow, too fast, or at the desired speed,
+ and if the movement direction matches the command.
+ """
+ # Calculate the absolute value of speed and command for comparison
+ absolute_speed = torch.abs(self.base_lin_vel[:, 0])
+ absolute_command = torch.abs(self.commands[:, 0])
+
+ # Define speed criteria for desired range
+ speed_too_low = absolute_speed < 0.5 * absolute_command
+ speed_too_high = absolute_speed > 1.2 * absolute_command
+ speed_desired = ~(speed_too_low | speed_too_high)
+
+ # Check if the speed and command directions are mismatched
+ sign_mismatch = torch.sign(self.base_lin_vel[:, 0]) != torch.sign(self.commands[:, 0])
+
+ # Initialize reward tensor
+ reward = torch.zeros_like(self.base_lin_vel[:, 0])
+
+ # Assign rewards based on conditions
+ # Speed too low
+ reward[speed_too_low] = -1.0
+ # Speed too high
+ reward[speed_too_high] = 0.0
+ # Speed within desired range
+ reward[speed_desired] = 1.2
+ # Sign mismatch has the highest priority
+ reward[sign_mismatch] = -2.0
+ return reward * (self.commands[:, 0].abs() > 0.1)
+
+ def _reward_torques(self):
+ """
+ Penalizes the use of high torques in the robot's joints. Encourages efficient movement by minimizing
+ the necessary force exerted by the motors.
+ """
+ return torch.sum(torch.square(self.torques), dim=1)
+
+ def _reward_dof_vel(self):
+ """
+ Penalizes high velocities at the degrees of freedom (DOF) of the robot. This encourages smoother and
+ more controlled movements.
+ """
+ return torch.sum(torch.square(self.dof_vel), dim=1)
+
+ def _reward_dof_acc(self):
+ """
+ Penalizes high accelerations at the robot's degrees of freedom (DOF). This is important for ensuring
+ smooth and stable motion, reducing wear on the robot's mechanical parts.
+ """
+ return torch.sum(torch.square((self.last_dof_vel - self.dof_vel) / self.dt), dim=1)
+
+ def _reward_collision(self):
+ """
+ Penalizes collisions of the robot with the environment, specifically focusing on selected body parts.
+ This encourages the robot to avoid undesired contact with objects or surfaces.
+ """
+ return torch.sum(
+ 1.0 * (torch.norm(self.contact_forces[:, self.penalised_contact_indices, :], dim=-1) > 0.1),
+ dim=1,
+ )
+
+ def _reward_action_smoothness(self):
+ """
+ Encourages smoothness in the robot's actions by penalizing large differences between consecutive actions.
+ This is important for achieving fluid motion and reducing mechanical stress.
+ """
+ term_1 = torch.sum(torch.square(self.last_actions - self.actions), dim=1)
+ term_2 = torch.sum(
+ torch.square(self.actions + self.last_last_actions - 2 * self.last_actions),
+ dim=1,
+ )
+ term_3 = 0.05 * torch.sum(torch.abs(self.actions), dim=1)
+ return term_1 + term_2 + term_3
diff --git a/sim/humanoid_gym/envs/stompy_config.py b/sim/humanoid_gym/envs/stompy_config.py
index ee6216bb..01164782 100755
--- a/sim/humanoid_gym/envs/stompy_config.py
+++ b/sim/humanoid_gym/envs/stompy_config.py
@@ -178,23 +178,25 @@ class rewards:
max_contact_force = 400 # forces above this value are penalized
class scales:
- # reference motion tracking
- joint_pos = 1.6
- feet_clearance = 1.0
- feet_contact_number = 1.2
- # gait
- feet_air_time = 1.0
- foot_slip = -0.05
- feet_distance = 0.2
- knee_distance = 0.2
- # contact
- feet_contact_forces = -0.01
- # vel tracking
- tracking_lin_vel = 1.2
- tracking_ang_vel = 1.1
- vel_mismatch_exp = 0.5 # lin_z; ang x,y
- low_speed = 0.2
- track_vel_hard = 0.5
+ # # reference motion tracking
+ # joint_pos = 1.6
+ # feet_clearance = 1.0
+ # feet_contact_number = 1.2
+ # # gait
+ # feet_air_time = 1.0
+ # foot_slip = -0.05
+ # feet_distance = 0.2
+ # knee_distance = 0.2
+ # # contact
+ # feet_contact_forces = -0.01
+ # # vel tracking
+ # tracking_lin_vel = 1.2
+ # tracking_ang_vel = 1.1
+ # vel_mismatch_exp = 0.5 # lin_z; ang x,y
+ # low_speed = 0.2
+ # track_vel_hard = 0.5
+
+ # above this was removed
# base pos
default_joint_pos = 0.5
orientation = 1
diff --git a/sim/scripts/create_fixed_torso.py b/sim/scripts/create_fixed_torso.py
index 861fbdf0..264554b1 100755
--- a/sim/scripts/create_fixed_torso.py
+++ b/sim/scripts/create_fixed_torso.py
@@ -3,19 +3,18 @@
import xml.etree.ElementTree as ET
-from sim.stompy.joints import StompyFixed
+from sim.stompy_legs.joints import Stompy
-STOMPY_URDF = "sim/stompy/robot.urdf"
+STOMPY_URDF = "sim/stompy_legs/robot.urdf"
def update_urdf() -> None:
tree = ET.parse(STOMPY_URDF)
root = tree.getroot()
- stompy = StompyFixed()
-
+ stompy = Stompy()
+ print(stompy.default_standing())
revolute_joints = set(stompy.default_standing().keys())
joint_limits = stompy.default_limits()
-
for joint in root.findall("joint"):
joint_name = joint.get("name")
if joint_name not in revolute_joints:
@@ -28,9 +27,8 @@ def update_urdf() -> None:
upper = str(limits.get("upper", 0.0))
limit.set("lower", lower)
limit.set("upper", upper)
-
# Save the modified URDF to a new file
- tree.write("sim/stompy/robot_fixed.urdf")
+ tree.write("sim/stompy_legs/robot_fixed.urdf")
if __name__ == "__main__":
diff --git a/sim/scripts/simulate_urdf.py b/sim/scripts/simulate_urdf.py
index fb6e5a93..cac1aaa5 100755
--- a/sim/scripts/simulate_urdf.py
+++ b/sim/scripts/simulate_urdf.py
@@ -15,7 +15,7 @@
from sim.env import stompy_urdf_path
from sim.logging import configure_logging
-from sim.stompy.joints import Stompy
+from sim.stompy_legs.joints import Stompy
logger = logging.getLogger(__name__)
@@ -117,14 +117,14 @@ def load_gym() -> GymParams:
asset_options.default_dof_drive_mode = DRIVE_MODE
asset_options.collapse_fixed_joints = True
asset_options.disable_gravity = False
- asset_options.fix_base_link = False
- asset_path = stompy_urdf_path()
+ asset_options.fix_base_link = True
+ asset_path = stompy_urdf_path(legs_only=True)
robot_asset = gym.load_urdf(sim, str(asset_path.parent), str(asset_path.name), asset_options)
# Adds the robot to the environment.
initial_pose = gymapi.Transform()
- initial_pose.p = gymapi.Vec3(0.0, 2.0, 0.0)
- initial_pose.r = gymapi.Quat(0.0, 0.0, 0.0, 1.0)
+ initial_pose.p = gymapi.Vec3(0.0, 1.0, 0.0)
+ initial_pose.r = gymapi.Quat(-1.0, 0.0, 0.0, 1.0)
robot = gym.create_actor(env, robot_asset, initial_pose, "robot")
# Configure DOF properties.
@@ -136,7 +136,7 @@ def load_gym() -> GymParams:
gym.set_actor_dof_properties(env, robot, props)
# Look at the first environment.
- cam_pos = gymapi.Vec3(8, 4, 1.5)
+ cam_pos = gymapi.Vec3(1, 2, 1.5)
cam_target = gymapi.Vec3(0, 2, 1.5)
gym.viewer_camera_look_at(viewer, None, cam_pos, cam_target)
@@ -240,5 +240,5 @@ def main() -> None:
if __name__ == "__main__":
- # python -m sim.scripts.drop_urdf
+ # python -m sim.scripts.simulate_urdf
main()
diff --git a/sim/stompy_legs/__init__.py b/sim/stompy_legs/__init__.py
new file mode 100755
index 00000000..e69de29b
diff --git a/sim/stompy_legs/joints.py b/sim/stompy_legs/joints.py
new file mode 100644
index 00000000..e66f217c
--- /dev/null
+++ b/sim/stompy_legs/joints.py
@@ -0,0 +1,165 @@
+"""Defines a more Pythonic interface for specifying the joint names.
+
+The best way to re-generate this snippet for a new robot is to use the
+`sim/scripts/print_joints.py` script. This script will print out a hierarchical
+tree of the various joint names in the robot.
+"""
+
+import textwrap
+from abc import ABC
+from typing import Dict, List, Tuple, Union
+
+
+class Node(ABC):
+ @classmethod
+ def children(cls) -> List["Union[Node, str]"]:
+ return [
+ attr
+ for attr in (getattr(cls, attr) for attr in dir(cls) if not attr.startswith("__"))
+ if isinstance(attr, (Node, str))
+ ]
+
+ @classmethod
+ def joints(cls) -> List[str]:
+ return [
+ attr
+ for attr in (getattr(cls, attr) for attr in dir(cls) if not attr.startswith("__"))
+ if isinstance(attr, str)
+ ]
+
+ @classmethod
+ def joints_motors(cls) -> List[Tuple[str, str]]:
+ joint_names: List[Tuple[str, str]] = []
+ for attr in dir(cls):
+ if not attr.startswith("__"):
+ attr2 = getattr(cls, attr)
+ if isinstance(attr2, str):
+ joint_names.append((attr, attr2))
+
+ return joint_names
+
+ @classmethod
+ def all_joints(cls) -> List[str]:
+ leaves = cls.joints()
+ for child in cls.children():
+ if isinstance(child, Node):
+ leaves.extend(child.all_joints())
+ return leaves
+
+ def __str__(self) -> str:
+ parts = [str(child) for child in self.children()]
+ parts_str = textwrap.indent("\n" + "\n".join(parts), " ")
+ return f"[{self.__class__.__name__}]{parts_str}"
+
+
+class LeftLeg(Node):
+ hip_roll = "left hip roll"
+ hip_yaw = "left hip yaw"
+ hip_pitch = "left hip pitch"
+ knee_pitch = "left knee pitch"
+ ankle_pitch = "left ankle pitch"
+ ankle_roll = "left ankle roll"
+
+
+class RightLeg(Node):
+ hip_roll = "right hip roll"
+ hip_yaw = "right hip yaw"
+ hip_pitch = "right hip pitch"
+ knee_pitch = "right knee pitch"
+ ankle_pitch = "right ankle pitch"
+ ankle_roll = "right ankle roll"
+
+
+class Legs(Node):
+ left = LeftLeg()
+ right = RightLeg()
+
+
+class Stompy(Node):
+ legs = Legs()
+
+ @classmethod
+ def default_positions(cls) -> Dict[str, float]:
+ return {}
+
+ @classmethod
+ def default_standing(cls) -> Dict[str, float]:
+ return {
+ # # # legs
+ Stompy.legs.left.hip_pitch: 1.61,
+ Stompy.legs.left.hip_roll: 0,
+ Stompy.legs.left.hip_yaw: 1,
+ Stompy.legs.left.knee_pitch: 2.05,
+ Stompy.legs.left.ankle_pitch: 0.33,
+ Stompy.legs.left.ankle_roll: 1.73,
+ Stompy.legs.right.hip_pitch: 0,
+ Stompy.legs.right.hip_roll: -1.6,
+ Stompy.legs.right.hip_yaw: -2.15,
+ Stompy.legs.right.knee_pitch: 2.16,
+ Stompy.legs.right.ankle_pitch: 0.5,
+ Stompy.legs.right.ankle_roll: 1.72,
+ }
+
+ @classmethod
+ def default_limits(cls) -> Dict[str, Dict[str, float]]:
+ return {
+ Stompy.legs.left.hip_pitch: {
+ "lower": 0.5,
+ "upper": 2.69,
+ },
+ Stompy.legs.left.hip_roll: {
+ "lower": -0.5,
+ "upper": 0.5,
+ },
+ Stompy.legs.left.hip_yaw: {
+ "lower": 0.5,
+ "upper": 1.19,
+ },
+ Stompy.legs.left.knee_pitch: {
+ "lower": 1,
+ "upper": 3.1,
+ },
+ Stompy.legs.left.ankle_pitch: {
+ "lower": -0.3,
+ "upper": 1.13,
+ },
+ Stompy.legs.left.ankle_roll: {
+ "lower": 1.3,
+ "upper": 2,
+ },
+ Stompy.legs.right.hip_pitch: {
+ "lower": -1,
+ "upper": 1,
+ },
+ Stompy.legs.right.hip_roll: {
+ "lower": -2.39,
+ "upper": -1,
+ },
+ Stompy.legs.right.hip_yaw: {
+ "lower": -2.2,
+ "upper": -1,
+ },
+ Stompy.legs.right.knee_pitch: {
+ "lower": 1.54,
+ "upper": 3,
+ },
+ Stompy.legs.right.ankle_pitch: {
+ "lower": -0.5,
+ "upper": 0.94,
+ },
+ Stompy.legs.right.ankle_roll: {
+ "lower": 1,
+ "upper": 2.3,
+ },
+ }
+
+
+def print_joints() -> None:
+ joints = Stompy.all_joints()
+ assert len(joints) == len(set(joints)), "Duplicate joint names found!"
+ print(Stompy())
+
+
+if __name__ == "__main__":
+ # python -m sim.stompy.joints
+ print_joints()
diff --git a/sim/stompy_legs/robot_fixed.urdf b/sim/stompy_legs/robot_fixed.urdf
new file mode 100644
index 00000000..aa14d37c
--- /dev/null
+++ b/sim/stompy_legs/robot_fixed.urdf
@@ -0,0 +1,1263 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/sim/stompy_legs/robot_fixed.xml b/sim/stompy_legs/robot_fixed.xml
new file mode 100644
index 00000000..827e661d
--- /dev/null
+++ b/sim/stompy_legs/robot_fixed.xml
@@ -0,0 +1,286 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/third_party/humanoid-gym b/third_party/humanoid-gym
index 9315d813..7945633d 160000
--- a/third_party/humanoid-gym
+++ b/third_party/humanoid-gym
@@ -1 +1 @@
-Subproject commit 9315d8135f24fa4a798249267991e797f895b43a
+Subproject commit 7945633dec64d291e7c394cd94f4ad3532b73717