kscalelabs · budzianowski · Jul 4, 2024 · Jul 17, 2024 · Jul 18, 2024 · Jul 18, 2024
diff --git a/sim/deploy/config.py b/sim/deploy/config.py
@@ -58,7 +58,7 @@ class RobotConfig:
     tau_limit: np.ndarray
     robot_model_path: str
     # is2ac
-    dt: float = 0.00002  # 0.001
+    dt: float = 0.0002  # 0.001
     phase_duration: float = 0.64
     duration: float = 10.0
     decimation: int = 10

diff --git a/sim/deploy/policy.py b/sim/deploy/policy.py
@@ -109,7 +109,10 @@ def pd_control(
             Calculated torques.
         """
         target_dof_vel = np.zeros(self.cfg.num_actions, dtype=np.double)
-        return (target_dof_pos - dof_pos) * kp + (target_dof_vel - dof_vel) * kd
+        # breakpoint()
+        res = ( (target_dof_pos - dof_pos) * kp + (target_dof_vel - dof_vel) * kd )
+        # breakpoint()
+        return res * 0.0025
 
     def parse_action(
         self, dof_pos: np.ndarray, dof_vel: np.ndarray, eu_ang: np.ndarray, ang_vel: np.ndarray, count_lowlevel: int

diff --git a/sim/deploy/run.py b/sim/deploy/run.py
@@ -22,7 +22,7 @@
 
 from sim.deploy.config import RobotConfig
 from sim.env import stompy_mjcf_path
-from sim.new_test.joints import Stompy as StompyFixed
+from sim.stompy_legs.joints import Stompy as StompyFixed
 
 
 class Worlds(Enum):
@@ -125,16 +125,19 @@ def simulate(self, policy: SimPolicy) -> None:
                 dof_pos, dof_vel, orientation, ang_vel = self.get_observation()
 
                 # Zero action
-                target_dof_pos = dof_pos
+                # target_dof_pos = dof_pos
 
                 # We update the policy at a lower frequency
                 # The simulation runs at 1000hz, but the policy is updated at 100hz
-                # if step % cfg.decimation == 0:
-                #     action = policy.next_action(dof_pos, dof_vel, orientation, ang_vel, step)
-                #     target_dof_pos = action * cfg.action_scale
-
+                if step % cfg.decimation == 0:
+                    action = policy.next_action(dof_pos, dof_vel, orientation, ang_vel, step)
+                    target_dof_pos = action
+                # set target_dof_pos to 0s
+                # target_dof_pos = np.zeros_like(target_dof_pos)
                 tau = policy.pd_control(target_dof_pos, dof_pos, cfg.kps, dof_vel, cfg.kds)
                 # breakpoint()
+                if step % 1000 == 0:
+                    breakpoint()
                 # set tau to zero for now
                 # tau = np.zeros_like(tau)
                 self.step(tau=tau)
@@ -271,6 +274,10 @@ def main(args: argparse.Namespace, cfg: RobotConfig) -> None:
     robot_path = stompy_mjcf_path(legs_only=True)
     num_single_obs = dof * 3 + 11
 
+    # is2ac, lets scale kps and kds to be the same as our stiffness and dmaping
+    # kps = np.ones((dof), dtype=np.double)
+    # kds = np.ones((dof), dtype=np.double) * 1
+    # tau_limit = np.ones((dof), dtype=np.double)
     kps = np.ones((dof), dtype=np.double) * 200
     kds = np.ones((dof), dtype=np.double) * 10
     tau_limit = np.ones((dof), dtype=np.double) * 200

diff --git a/sim/deploy/tests/model_1500_model_only.pt b/sim/deploy/tests/model_1500_model_only.pt
diff --git a/sim/env.py b/sim/env.py
@@ -3,6 +3,10 @@
 import os
 from pathlib import Path
 
+from dotenv import load_dotenv
+
+load_dotenv()
+
 
 def model_dir() -> Path:
     return Path(os.environ.get("MODEL_DIR", "models"))

diff --git a/sim/humanoid_gym/envs/__init__.py b/sim/humanoid_gym/envs/__init__.py
@@ -13,8 +13,6 @@
 # fmt: on
 from .getup_config import GetupCfg, GetupCfgPPO
 from .getup_env import GetupFreeEnv
-from .hexmove_config import HexmoveCfg, HexmoveCfgPPO
-from .hexmove_env import HexmoveFreeEnv
 from .legs_config import LegsCfg, LegsCfgPPO
 from .legs_env import LegsFreeEnv
 from .only_legs_config import OnlyLegsCfg, OnlyLegsCfgPPO

diff --git a/sim/humanoid_gym/envs/getup_config.py b/sim/humanoid_gym/envs/getup_config.py
@@ -8,7 +8,7 @@
 from humanoid.envs.base.legged_robot_config import LeggedRobotCfg, LeggedRobotCfgPPO
 
 from sim.env import stompy_urdf_path
-from sim.stompy.joints import Stompy
+from sim.stompy2.joints import Stompy
 
 NUM_JOINTS = len(Stompy.all_joints())  # 37
 

diff --git a/sim/humanoid_gym/envs/getup_env.py b/sim/humanoid_gym/envs/getup_env.py
@@ -8,7 +8,7 @@
 from isaacgym import gymtorch
 from isaacgym.torch_utils import *
 
-from sim.stompy.joints import Stompy
+from sim.stompy2.joints import Stompy
 
 default_feet_height = 0.0
 

diff --git a/sim/humanoid_gym/envs/legs_config.py b/sim/humanoid_gym/envs/legs_config.py
@@ -4,7 +4,7 @@
 
 from sim.env import stompy_urdf_path
 from sim.humanoid_gym.envs.stompy_config import StompyCfg
-from sim.stompy.joints import StompyFixed
+from sim.stompy2.joints import StompyFixed
 
 NUM_JOINTS = len(StompyFixed.default_standing())  # 17
 

diff --git a/sim/humanoid_gym/envs/legs_env.py b/sim/humanoid_gym/envs/legs_env.py
@@ -8,7 +8,7 @@
 from isaacgym import gymtorch
 from isaacgym.torch_utils import *
 
-from sim.stompy.joints import StompyFixed
+from sim.stompy2.joints import StompyFixed
 
 
 class LegsFreeEnv(LeggedRobot):
@@ -302,6 +302,7 @@ def _reward_knee_distance(self):
         """
         Calculates the reward based on the distance between the knee of the humanoid.
         """
+        # breakpoint()
         foot_pos = self.rigid_state[:, self.knee_indices, :2]
         foot_dist = torch.norm(foot_pos[:, 0, :] - foot_pos[:, 1, :], dim=1)
         fd = self.cfg.rewards.min_dist

diff --git a/sim/humanoid_gym/envs/only_legs_config.py b/sim/humanoid_gym/envs/only_legs_config.py
@@ -6,7 +6,7 @@
 )
 
 from sim.env import stompy_urdf_path
-from sim.new_test.joints import Stompy
+from sim.stompy_legs.joints import Stompy
 
 NUM_JOINTS = len(Stompy.all_joints())  # 33
 
@@ -75,7 +75,7 @@ class terrain(LeggedRobotCfg.terrain):
         restitution = 0.0
 
     class noise:
-        add_noise = False
+        add_noise = True
         noise_level = 0.6  # scales other values
 
         class noise_scales:
@@ -117,10 +117,10 @@ class control(LeggedRobotCfg.control):
             "ankle": 10,
             "knee": 10,
         }
-        for k in stiffness:
-            stiffness[k] *= 0.00001
-            damping[k] *= 0.1
-        action_scale = 2500
+        # for k in stiffness:
+        #     stiffness[k] *= 0.00001
+        #     damping[k] *= 0.1
+        action_scale = 0.25
         # decimation: Number of control action updates @ sim DT per policy DT
         decimation = 10  # 100hz
 
@@ -135,7 +135,7 @@ class physx(LeggedRobotCfg.sim.physx):
             num_position_iterations = 4
             num_velocity_iterations = 1
             contact_offset = 0.01  # [m]
-            rest_offset = 0.0  # -0.02  # [m]
+            rest_offset = 0  # -0.02  # [m]
             bounce_threshold_velocity = 0.1  # [m/s]
             max_depenetration_velocity = 1.0
             max_gpu_contact_pairs = 2**23  # 2**24 -> needed for 8000 envs and more
@@ -149,12 +149,12 @@ class domain_rand:
 
         randomize_base_mass = True
         # added_mass_range = [-1.0, 1.0]
-        added_mass_range = [-0.2, 0.2]
+        added_mass_range = [-0.5, 0.5]
         push_robots = True
         push_interval_s = 4
         max_push_vel_xy = 0.2
         max_push_ang_vel = 0.4
-        dynamic_randomization = 0.02
+        dynamic_randomization = 0.05
 
     class commands(LeggedRobotCfg.commands):
         # Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error)
@@ -171,8 +171,13 @@ class ranges:
     class rewards:
         # quite important to keep it right
         base_height_target = 0.72
+
+        #distance between the knees and feet is2ac
         min_dist = 0.2
         max_dist = 0.5
+        # min_dist = 0.4
+        # max_dist = 0.7
+
         # put some settings here for LLM parameter tuning
         target_joint_pos_scale = 0.17  # rad
         target_feet_height = 0.06  # m
@@ -184,23 +189,23 @@ class rewards:
         max_contact_force = 400  # forces above this value are penalized
 
         class scales:
-            # # reference motion tracking
-            # joint_pos = 1.6
-            # feet_clearance = 1.0
-            # feet_contact_number = 1.2
-            # # gait
-            # feet_air_time = 1.0
-            # foot_slip = -0.05
-            # feet_distance = 0.2
-            # knee_distance = 0.2
-            # # contact
-            # feet_contact_forces = -0.01
-            # # vel tracking
-            # tracking_lin_vel = 1.2
-            # tracking_ang_vel = 1.1
-            # vel_mismatch_exp = 0.5  # lin_z; ang x,y
-            # low_speed = 0.2
-            # track_vel_hard = 0.5
+            # reference motion tracking
+            joint_pos = 1.6
+            feet_clearance = 1.0
+            feet_contact_number = 1.2
+            # gait
+            feet_air_time = 1.0
+            foot_slip = -0.05
+            feet_distance = 0.2
+            knee_distance = 0.2
+            # contact
+            feet_contact_forces = -0.01
+            # vel tracking
+            tracking_lin_vel = 1.2
+            tracking_ang_vel = 1.1
+            vel_mismatch_exp = 0.5  # lin_z; ang x,y
+            low_speed = 0.2
+            track_vel_hard = 0.5
 
             # above this was removed for standing policy
             # base pos
@@ -218,7 +223,6 @@ class scales:
 
     class normalization:
         class obs_scales:
-            # is2ac
             lin_vel = 2.0
             ang_vel = 1.0
             dof_pos = 1.0

diff --git a/sim/humanoid_gym/envs/only_legs_env.py b/sim/humanoid_gym/envs/only_legs_env.py
@@ -115,21 +115,27 @@ def compute_ref_state(self):
         sin_pos = torch.sin(2 * torch.pi * phase)
         sin_pos_l = sin_pos.clone()
         sin_pos_r = sin_pos.clone()
+        # breakpoint()
+        # self.ref_dof_pos = torch.zeros_like(self.dof_pos)
+        default_clone = self.default_dof_pos.clone()
+        self.ref_dof_pos = self.default_dof_pos.repeat(self.num_envs, 1)
 
-        self.ref_dof_pos = torch.zeros_like(self.dof_pos)
         scale_1 = self.cfg.rewards.target_joint_pos_scale
         scale_2 = 2 * scale_1
         # left foot stance phase set to default joint pos
         sin_pos_l[sin_pos_l > 0] = 0
-        self.ref_dof_pos[:, self.legs_joints["left_hip_pitch"]] = sin_pos_l * scale_1
-        self.ref_dof_pos[:, self.legs_joints["left_knee_pitch"]] = sin_pos_l * scale_2
-        self.ref_dof_pos[:, self.legs_joints["left_ankle_roll"]] = sin_pos_l * scale_1
+
+        self.ref_dof_pos[:, self.legs_joints["left_hip_pitch"]] += sin_pos_l * scale_1
+        self.ref_dof_pos[:, self.legs_joints["left_knee_pitch"]] += sin_pos_l * scale_2
+        self.ref_dof_pos[:, self.legs_joints["left_ankle_pitch"]] += sin_pos_l * scale_1
 
         # right foot stance phase set to default joint pos
         sin_pos_r[sin_pos_r < 0] = 0
-        self.ref_dof_pos[:, self.legs_joints["right_hip_pitch"]] = sin_pos_r * scale_1
-        self.ref_dof_pos[:, self.legs_joints["right_knee_pitch"]] = sin_pos_r * scale_2
-        self.ref_dof_pos[:, self.legs_joints["right_ankle_roll"]] = sin_pos_r * scale_1
+        self.ref_dof_pos[:, self.legs_joints["right_hip_pitch"]] += sin_pos_r * scale_1
+        self.ref_dof_pos[:, self.legs_joints["right_knee_pitch"]] += sin_pos_r * scale_2
+        self.ref_dof_pos[:, self.legs_joints["right_ankle_pitch"]] += sin_pos_r * scale_1
+
+        # breakpoint()
 
         # Double support phase
         self.ref_dof_pos[torch.abs(sin_pos) < 0.1] = 0
@@ -298,12 +304,12 @@ def _reward_knee_distance(self):
         """
         Calculates the reward based on the distance between the knee of the humanoid.
         """
-        foot_pos = self.rigid_state[:, self.knee_indices, :2]
-        foot_dist = torch.norm(foot_pos[:, 0, :] - foot_pos[:, 1, :], dim=1)
+        knee_pos = self.rigid_state[:, self.knee_indices, :2]
+        knee_dist = torch.norm(knee_pos[:, 0, :] - knee_pos[:, 1, :], dim=1)
         fd = self.cfg.rewards.min_dist
         max_df = self.cfg.rewards.max_dist / 2
-        d_min = torch.clamp(foot_dist - fd, -0.5, 0.0)
-        d_max = torch.clamp(foot_dist - max_df, 0, 0.5)
+        d_min = torch.clamp(knee_dist - fd, -0.5, 0.0)
+        d_max = torch.clamp(knee_dist - max_df, 0, 0.5)
         return (torch.exp(-torch.abs(d_min) * 100) + torch.exp(-torch.abs(d_max) * 100)) / 2
 
     def _reward_foot_slip(self):

diff --git a/sim/humanoid_gym/envs/stompy_config.py b/sim/humanoid_gym/envs/stompy_config.py
@@ -6,7 +6,7 @@
 )
 
 from sim.env import stompy_urdf_path
-from sim.stompy.joints import Stompy
+from sim.stompy2.joints import Stompy
 
 NUM_JOINTS = len(Stompy.all_joints())  # 33
 

diff --git a/sim/humanoid_gym/envs/stompy_env.py b/sim/humanoid_gym/envs/stompy_env.py
@@ -8,7 +8,7 @@
 from isaacgym import gymtorch
 from isaacgym.torch_utils import *
 
-from sim.stompy.joints import Stompy
+from sim.stompy2.joints import Stompy
 
 
 class StompyFreeEnv(LeggedRobot):
@@ -56,6 +56,7 @@ def __init__(self, cfg: LeggedRobotCfg, sim_params, physics_engine, sim_device,
 
         self.legs_joints = {}
         for name, joint in Stompy.legs.left.joints_motors():
+            print(name)
             joint_handle = self.gym.find_actor_dof_handle(env_handle, actor_handle, joint)
             self.legs_joints["left_" + name] = joint_handle
 
@@ -385,6 +386,7 @@ def _reward_base_height(self):
         The reward is computed based on the height difference between the robot's base and the average height
         of its feet when they are in contact with the ground.
         """
+        # breakpoint()
         stance_mask = self._get_gait_phase()
         measured_heights = torch.sum(self.rigid_state[:, self.feet_indices, 2] * stance_mask, dim=1) / torch.sum(
             stance_mask, dim=1

diff --git a/sim/humanoid_gym/play.py b/sim/humanoid_gym/play.py
@@ -36,6 +36,7 @@ def play(args: argparse.Namespace) -> None:
     env_cfg.domain_rand.joint_angle_noise = 0.0
     env_cfg.noise.curriculum = False
     env_cfg.noise.noise_level = 0.5
+    env_cfg.sim.physx.solver_type = 0
 
     train_cfg.seed = 123145
     logger.info("train_cfg.runner_class_name: %s", train_cfg.runner_class_name)

diff --git a/sim/scripts/simulate_urdf.py b/sim/scripts/simulate_urdf.py
@@ -12,7 +12,6 @@
 from typing import Any, Dict, Literal, NewType
 
 from isaacgym import gymapi, gymtorch, gymutil
-
 from sim.env import stompy_urdf_path
 from sim.logging import configure_logging
 from sim.stompy_legs.joints import Stompy
@@ -152,6 +151,7 @@ def load_gym() -> GymParams:
     # dof_vel[:] = 0.0
     starting_positions = Stompy.default_standing()
     dof_ids: Dict[str, int] = gym.get_actor_dof_dict(env, robot)
+    print(starting_positions)
     for joint_name, joint_position in starting_positions.items():
         dof_pos[0, dof_ids[joint_name]] = joint_position
     env_ids_int32 = torch.zeros(1, dtype=torch.int32)
@@ -161,6 +161,7 @@ def load_gym() -> GymParams:
         gymtorch.unwrap_tensor(env_ids_int32),
         1,
     )
+    print(dof_pos)
 
     return GymParams(
         gym=gym,