zeroth-robotics
diff --git a/‎sim/genesis/zeroth_env.py
Lines changed: 1 addition & 266 deletions b/‎sim/genesis/zeroth_env.py
Lines changed: 1 addition & 266 deletions
@@ -27,33 +27,20 @@ def __init__(self, num_envs, env_cfg, obs_cfg, reward_cfg, command_cfg, show_vie
         self.max_steps = 40_000_000
         self.num_envs = num_envs
         self.num_single_obs = obs_cfg["num_single_obs"]
-        self.single_num_privileged_obs = obs_cfg["single_num_privileged_obs"]
         self.num_actions = env_cfg["num_actions"]
         self.num_commands = command_cfg["num_commands"]
 
-        self.add_noise = obs_cfg.get("add_noise", False)
-        
         # observation history
         self.frame_stack = obs_cfg.get("frame_stack", 1)
         self.num_obs = self.num_single_obs * self.frame_stack
         self.c_frame_stack = obs_cfg.get("c_frame_stack", 1)
         self.obs_history = collections.deque(maxlen=self.frame_stack)
         self.critic_history = collections.deque(maxlen=self.c_frame_stack)
 
-        # privileged observation config
-        self.num_privileged_obs = self.single_num_privileged_obs * self.c_frame_stack
-        print(f"Number of privileged observations: {self.num_privileged_obs}")
-
         for _ in range(self.frame_stack):
             self.obs_history.append(
                 torch.zeros(self.num_envs, self.num_single_obs, dtype=torch.float, device=self.device)
             )
-        for _ in range(self.c_frame_stack):
-            self.critic_history.append(
-                torch.zeros(
-                    self.num_envs, self.single_num_privileged_obs, dtype=torch.float, device=self.device
-                )
-            )
 
         self.simulate_action_latency = True  # there is a 1 step latency on real robot
         self.dt = 0.02  # control frequence on real robot is 50Hz
@@ -103,11 +90,6 @@ def __init__(self, num_envs, env_cfg, obs_cfg, reward_cfg, command_cfg, show_vie
             collision=True
         ))
 
-        # terrain measurement
-        self.measure_heights = False
-        self.measured_heights = torch.zeros((self.num_envs, 1), device=self.device)  # 初始化为全零张量
-        self.height_samples = 64
-
         # add robot
         self.base_init_pos = torch.tensor(self.env_cfg["base_init_pos"], device=self.device)
         self.base_init_quat = torch.tensor(self.env_cfg["base_init_quat"], device=self.device)
@@ -121,13 +103,10 @@ def __init__(self, num_envs, env_cfg, obs_cfg, reward_cfg, command_cfg, show_vie
         )
 
         # build
-        print(f'num_envs: {num_envs}')
         self.scene.build(n_envs=num_envs, env_spacing=(1.0,1.0))
 
-        print(f'{self.env_cfg["dof_names"]}')
         # names to indices
         self.motor_dofs = [self.robot.get_joint(name).dof_idx_local for name in self.env_cfg["dof_names"]]
-        print(f"Motor dofs: {self.motor_dofs}") # Motor dofs: [6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
         # Initialize legs_joints mapping with bounds checking
         self.legs_joints = {}
         joint_names = ["left_hip_pitch", "left_knee_pitch", "left_ankle_pitch",
@@ -139,34 +118,12 @@ def __init__(self, num_envs, env_cfg, obs_cfg, reward_cfg, command_cfg, show_vie
             else:
                 print(f"Warning: Joint {name} not found in motor_dofs")
 
-        print(f"Legs joints: {self.legs_joints}")
-        #TODO: Initialize legs_joints mapping correctly
-
-        # Initialize feet indices
-        self.feet_indices = [
-            self.robot.get_link("foot_left").idx - 6,
-            self.robot.get_link("foot_right").idx - 6,
-        ]
-        print(f"Feet indices: {self.feet_indices}")
         # Get number of bodies in the robot
         self.num_bodies = self.robot.n_links
-        print(f"Number of bodies in the robot: {self.num_bodies}")
-        print(f"Robot links: {self.robot.links}")
-        for i,e in enumerate(self.robot.links):
-            # print(f"Body {i} name: {e}")
-            print(f"Body {i} name: {e.name}")
-        # Initialize observation related buffers
-        self.contact_forces = torch.zeros((self.num_envs, len(self.feet_indices), 3), device=self.device)
-
-        # print(f"Contact forces length: {len(self.contact_forces)}")
-        # print(f"First element type: {type(self.contact_forces[0]) if len(self.contact_forces) > 0 else None}")
-        # print(f"First element length: {len(self.contact_forces[0]) if len(self.contact_forces) > 0 else None}")
-    
-        self.net_contact_forces = torch.zeros((self.num_envs, self.num_bodies, 3), device=self.device)
+
         self.rand_push_force = torch.zeros((self.num_envs, 3), device=self.device)
         self.rand_push_torque = torch.zeros((self.num_envs, 3), device=self.device)
         self.env_frictions = torch.ones((self.num_envs,), device=self.device)
-        self.body_mass = torch.ones((self.num_envs,), device=self.device) * 30.0
 
         # PD control parameters
         self.robot.set_dofs_kp([self.env_cfg["kp"]] * self.num_actions, self.motor_dofs)
@@ -188,8 +145,6 @@ def __init__(self, num_envs, env_cfg, obs_cfg, reward_cfg, command_cfg, show_vie
         )
         # observation buffers
         self.obs_buf = torch.zeros((self.num_envs, self.num_obs), device=self.device, dtype=gs.tc_float)
-        if self.num_privileged_obs is not None:
-            self.privileged_obs_buf = torch.zeros((self.num_envs, self.num_privileged_obs), device=self.device, dtype=gs.tc_float)
 
         self.rew_buf = torch.zeros((self.num_envs,), device=self.device, dtype=gs.tc_float)
         self.reset_buf = torch.ones((self.num_envs,), device=self.device, dtype=gs.tc_int)
@@ -218,20 +173,15 @@ def __init__(self, num_envs, env_cfg, obs_cfg, reward_cfg, command_cfg, show_vie
             "observations": {},
         }  # extra information for logging
 
-        self.noise_scale_vec = self._get_noise_scale_vec(obs_cfg)
-
         # Initialize missing variables
         self.default_joint_pd_target = self.default_dof_pos.clone()
         self.base_euler = torch.zeros((self.num_envs, 3), device=self.device)
-        self.filtered_base_height = torch.zeros((self.num_envs, 1), device=self.device)  # 初始化为二维张量
         self.filtered_base_height = torch.zeros(self.num_envs, device=self.device)
         # Initialize terrain difficulty
         self.terrain_difficulty = torch.zeros(self.num_envs, device=self.device)
         self.difficulty_factors = {
             "random_uniform_terrain": 0.3,
         }
-        self.root_states = torch.zeros((self.num_envs, 13), device=self.device)
-        self.rigid_state = torch.zeros((self.num_envs, self.num_bodies, 13), device=self.device)
 
     def _resample_commands(self, envs_idx):
         self.commands[envs_idx, 0] = gs_rand_float(*self.command_cfg["lin_vel_x_range"], (len(envs_idx),), self.device)
@@ -261,13 +211,6 @@ def step(self, actions):
         self.dof_pos[:] = self.robot.get_dofs_position(self.motor_dofs)
         self.dof_vel[:] = self.robot.get_dofs_velocity(self.motor_dofs)
 
-        # Get net contact forces for all bodies from rigid solver
-        # contact_forces = self.robot.get_links_net_contact_force()
-        # if contact_forces is not None:
-        #     for env_idx in range(self.num_envs):
-        #         if env_idx < contact_forces.shape[0]:
-        #             self.net_contact_forces[env_idx] = contact_forces[env_idx][:self.num_bodies].clone()
-
         # resample commands
         envs_idx = (
             (self.episode_length_buf % int(self.env_cfg["resampling_time_s"] / self.dt) == 0)
@@ -305,42 +248,15 @@ def step(self, actions):
 
         # obs, rewards, dones, infos
         return self.obs_buf, self.rew_buf, self.reset_buf, self.extras
-    # {
-    #         "observations": {
-    #             "critic": self.privileged_obs_buf
-    #         },
-    #         **self.extras
-    #     }
-
 
     def get_observations(self):
         return self.obs_buf, self.extras
-    # , {
-    #         "observations": {
-    #             "critic": self.privileged_obs_buf
-    #         },
-    #         **self.extras
-    #     }
 
     def _get_phase(self):
         cycle_time = self.env_cfg.get("cycle_time", 1.0)
         phase = self.episode_length_buf * self.dt / cycle_time
         return phase
 
-    def _get_gait_phase(self):
-        # return float mask 1 is stance, 0 is swing
-        phase = self._get_phase()
-        sin_pos = torch.sin(2 * torch.pi * phase)
-        # Add double support phase
-        stance_mask = torch.zeros((self.num_envs, 2), device=self.device)
-        # left foot stance
-        stance_mask[:, 0] = sin_pos >= 0
-        # right foot stance
-        stance_mask[:, 1] = sin_pos < 0
-        # Double support phase
-        stance_mask[torch.abs(sin_pos) < 0.1] = 1
-        return stance_mask
-
     def compute_ref_state(self):
         phase = self._get_phase()
         sin_pos = torch.sin(2 * torch.pi * phase)
@@ -377,70 +293,17 @@ def safe_update(joint_name, scale):
         # Double support phase
         self.ref_dof_pos[torch.abs(sin_pos) < 0.1] = 0
 
-        self.ref_action = 2 * self.ref_dof_pos
-
-    def _get_noise_scale_vec(self, cfg):
-        """Sets a vector used to scale the noise added to the observations.
-            [NOTE]: Must be adapted when changing the observations structure
-
-        Args:
-            cfg (Dict): Environment config file
-
-        Returns:
-            [torch.Tensor]: Vector of scales used to multiply a uniform distribution in [-1, 1]
-        """
-        num_actions = self.num_actions
-        noise_vec = torch.zeros(self.num_single_obs, device=self.device)
-        self.add_noise = cfg.get("add_noise", False)
-        noise_scales = cfg["noise_scales"]
-        noise_vec[0:5] = 0.0  # commands
-        noise_vec[5 : (num_actions + 5)] = noise_scales["dof_pos"] * self.obs_scales["dof_pos"]
-        noise_vec[(num_actions + 5) : (2 * num_actions + 5)] = noise_scales["dof_vel"] * self.obs_scales["dof_vel"]
-        noise_vec[(2 * num_actions + 5) : (3 * num_actions + 5)] = 0.0  # previous actions
-        noise_vec[(3 * num_actions + 5) : (3 * num_actions + 5) + 3] = (
-            noise_scales["ang_vel"] * self.obs_scales["ang_vel"]
-        )  # ang vel
-        noise_vec[(3 * num_actions + 5) + 3 : (3 * num_actions + 5)] = (
-            noise_scales["quat"] * self.obs_scales["quat"]
-        )  # euler x,y
-        return noise_vec
-
     def compute_observations(self):
         phase = self._get_phase()
         self.compute_ref_state()
 
         sin_pos = torch.sin(2 * torch.pi * phase).unsqueeze(1)
         cos_pos = torch.cos(2 * torch.pi * phase).unsqueeze(1)
 
-        # stance_mask = self._get_gait_phase()
-        # contact_mask = self.contact_forces[:, self.feet_indices, 2] > 5.0
-
         self.command_input = torch.cat((sin_pos, cos_pos, self.commands[:, :3] * self.commands_scale), dim=1)
         q = (self.dof_pos - self.default_dof_pos) * self.obs_scales["dof_pos"]
         dq = self.dof_vel * self.obs_scales["dof_vel"]
 
-        # diff = self.dof_pos - self.ref_dof_pos
-        # 单帧privileged观测值（不包含历史）
-        # self.privileged_obs_buf = torch.cat(
-        #     (
-        #         self.command_input,
-        #         (self.dof_pos - self.default_joint_pd_target) * self.obs_scales['dof_pos'],
-        #         self.dof_vel * self.obs_scales['dof_vel'],
-        #         self.actions,
-        #         diff,
-        #         self.base_lin_vel * self.obs_scales['lin_vel'],
-        #         self.base_ang_vel * self.obs_scales['ang_vel'],
-        #         self.base_euler * self.obs_scales['quat'],
-        #         self.rand_push_force[:, :2],
-        #         self.rand_push_torque,
-        #         self.env_frictions.unsqueeze(1),
-        #         (self.body_mass / 30.0).unsqueeze(1),
-        #         stance_mask,
-        #         contact_mask,
-        #     ),
-        #     dim=-1
-        # )
-
         obs_buf = torch.cat( # total 41 dim
             (
                 self.command_input,  # 5 = 2D(sin cos) + 3D(vel_x, vel_y, aug_vel_yaw)
@@ -449,48 +312,12 @@ def compute_observations(self):
                 self.actions,  # 10D
                 self.base_ang_vel * self.obs_scales["ang_vel"],  # 3
                 self.base_euler * self.obs_scales["quat"],  # 3
-        #         (self.measured_heights if self.measured_heights is not None else torch.zeros((self.num_envs, 1), device=self.device))\
-        #  * self.obs_scales["height_measurements"],  # 1 .mean(dim=1, keepdim=True)
-                # self.terrain_difficulty.unsqueeze(1),  # 1
             ),
             dim=-1,
         )
 
-        # if self.measure_heights:
-        #     heights = (
-        #         torch.clip(
-        #             self.root_states[:, 2].unsqueeze(1) - 0.5 - self.measured_heights,
-        #             -1,
-        #             1.0,
-        #         )
-        #         * self.obs_scales["height_measurements"]
-        #     )
-        #     self.privileged_obs_buf = torch.cat((self.obs_buf, heights), dim=-1)
-
-        # if self.add_noise:
-        #     noise_level = self.obs_cfg.get("noise_level", 0.1)
-        #     obs_now = obs_buf.clone() + torch.randn_like(obs_buf) * self.noise_scale_vec * noise_level
-        # else:
-        #     obs_now = obs_buf.clone()
-        # self.obs_history.append(obs_now)
-        # self.critic_history.append(self.privileged_obs_buf)
-
-        # obs_buf_all = torch.stack([self.obs_history[i] for i in range(self.obs_history.maxlen)], dim=1)  # N,T,K
-        # Ensure deque length is sufficient
-        # if len(self.obs_history) < self.obs_history.maxlen:
-        #     obs_buf_all = torch.stack([self.obs_history[i] for i in range(len(self.obs_history))], dim=1)  # N,T,K
-        # else:
-        #     obs_buf_all = torch.stack([self.obs_history[i] for i in range(self.obs_history.maxlen)], dim=1)  # N,T,K
-
-        # self.obs_buf = obs_buf_all.reshape(self.num_envs, -1)  # N, T*K
         self.obs_buf = obs_buf
 
-        # 直接使用当前帧privileged观测值
-        # if len(self.critic_history) < self.c_frame_stack:
-        #     self.privileged_obs_buf = torch.cat([self.critic_history[i] for i in range(len(self.critic_history))], dim=1)
-        # else:
-        #     self.privileged_obs_buf = torch.cat([self.critic_history[i] for i in range(self.c_frame_stack)], dim=1)
-
     def reset_idx(self, envs_idx):
         if len(envs_idx) == 0:
             return
@@ -646,95 +473,3 @@ def _reward_gait_symmetry(self):
         knee_symmetry = torch.abs(left_knee - right_knee)
 
         return torch.exp(-(hip_symmetry + knee_symmetry))
-
-    # def _reward_energy_efficiency(self):
-    #     # Reward energy efficiency by penalizing high joint velocities
-    #     return self.reward_cfg["reward_scales"]["dof_vel"] * torch.sum(torch.square(self.dof_vel), dim=1)
-
-    # def _reward_orientation(self):
-    #     # Penalize base orientation away from upright
-    #     return torch.exp(-torch.abs(self.base_euler[:, 0]) - torch.abs(self.base_euler[:, 1])) * self.reward_cfg["reward_scales"]["orientation"]
-
-    # def _reward_terrain_adaptation(self):
-    #     """Reward for adapting to different terrain types"""
-    #     # Calculate foot clearance
-    #     foot_clearance = torch.zeros((self.num_envs, 2), device=self.device)
-    #     for i, foot_idx in enumerate(self.feet_indices):
-    #         foot_pos = self.robot.get_link(id=foot_idx).get_pos()
-    #         # 添加地形坐标边界检查
-    #         x_coords = torch.clamp(foot_pos[:, 0].long().cpu(), 0, self.terrain.terrain_hf.shape[0]-1)
-    #         y_coords = torch.clamp(foot_pos[:, 1].long().cpu(), 0, self.terrain.terrain_hf.shape[1]-1)
-    #         terrain_height = torch.tensor(self.terrain.terrain_hf[x_coords, y_coords]).to(self.device)
-    #         foot_clearance[:, i] = foot_pos[:, 2] - terrain_height
-
-    #     # Reward for maintaining appropriate foot clearance
-    #     target_clearance = 0.05  # 5cm
-    #     clearance_error = torch.abs(foot_clearance - target_clearance)
-    #     return torch.exp(-torch.mean(clearance_error, dim=1) / 0.02)
-
-    # def _reward_terrain_stability(self):
-    #     """Reward for maintaining stability on uneven terrain"""
-    #     # Penalize large base orientation changes
-    #     base_euler = quat_to_xyz(self.base_quat)
-    #     return torch.exp(-torch.abs(base_euler[:, 0]) - torch.abs(base_euler[:, 1]))
-
-    # def _reward_terrain_progress(self):
-    #     """Reward for making progress across different terrain types"""
-    #     # Calculate forward progress relative to terrain difficulty
-    #     forward_vel = self.base_lin_vel[:, 0]
-        
-    #     # Difficulty factors based on terrain type
-    #     difficulty_factors = {
-    #         "flat_terrain": 0.1,
-    #         "random_uniform_terrain": 0.3,
-    #         "stepping_stones_terrain": 0.5,
-    #         "pyramid_sloped_terrain": 0.7,
-    #         "discrete_obstacles_terrain": 0.8,
-    #         "wave_terrain": 0.6,
-    #         "pyramid_stairs_terrain": 0.9,
-    #         "sloped_terrain": 0.4
-    #     }
-        
-    #     # Get current terrain type
-    #     terrain_type = self.terrain.get_type(self.base_pos[:, :2])
-    #     difficulty = difficulty_factors.get(terrain_type, 0.5)
-        
-    #     return forward_vel / (difficulty + 0.1)
-
-    # def _reward_joint_pos(self):
-    #     """Calculates the reward based on the difference between the current joint positions and the target joint positions."""
-    #     joint_pos = self.dof_pos.clone()
-    #     pos_target = getattr(self, 'ref_dof_pos', self.default_dof_pos.repeat(self.num_envs, 1).to(self.device)).clone()
-    #     diff = joint_pos - pos_target
-    #     r = torch.exp(-2 * torch.norm(diff, dim=1)) - 0.2 * torch.norm(diff, dim=1).clamp(0, 0.5)
-
-    #     return r
-
-    # def _reward_contact_stability(self):
-    #     """Reward for maintaining stable contact force distribution"""
-    #     # Get contact forces for feet
-    #     left_foot_force = self.net_contact_forces[:, self.feet_indices[0]]
-    #     right_foot_force = self.net_contact_forces[:, self.feet_indices[1]]
-        
-    #     # Calculate force magnitude difference
-    #     force_diff = torch.norm(left_foot_force, dim=1) - torch.norm(right_foot_force, dim=1)
-        
-    #     # Reward for balanced contact forces
-    #     return torch.exp(-torch.abs(force_diff) / 50.0)
-    
-    # def _reward_foot_slip(self):
-    #     """Calculates the reward for minimizing foot slip. The reward is based on the contact forces
-    #     and the speed of the feet. A contact threshold is used to determine if the foot is in contact
-    #     with the ground. The speed of the foot is calculated and scaled by the contact condition.
-    #     """
-    #     contact = self.contact_forces[:, self.feet_indices, 2] > 5.0
-    #     foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 7:9], dim=2)
-    #     rew = torch.sqrt(foot_speed_norm)
-    #     rew *= contact
-    #     return torch.sum(rew, dim=1)
-
-    # def _reward_dof_vel(self):
-    #     """Penalizes high velocities at the degrees of freedom (DOF) of the robot. This encourages smoother and
-    #     more controlled movements.
-    #     """
-    #     return torch.sum(torch.square(self.dof_vel), dim=1)