Skip to content

Commit 3843e4c

Browse files
Merge pull request #24 from zeroth-robotics/cmd-arrow
Cmd arrow
2 parents 4827e5a + f50edf4 commit 3843e4c

File tree

6 files changed

+265
-45
lines changed

6 files changed

+265
-45
lines changed

sim/envs/humanoids/stompymicro_config.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
"""Defines the environment configuration for the Getting up task"""
22

3+
import numpy as np
4+
35
from sim.env import robot_urdf_path
46
from sim.envs.base.legged_robot_config import ( # type: ignore
57
LeggedRobotCfg,
@@ -148,7 +150,7 @@ class ranges:
148150
lin_vel_x = [-0.05, 0.23] # min max [m/s]
149151
lin_vel_y = [-0.04, 0.04] # min max [m/s]
150152
ang_vel_yaw = [-0.1, 0.1] # min max [rad/s]
151-
heading = [-3.14, 3.14]
153+
heading = [-np.pi, np.pi] # min max [rad]
152154

153155
class rewards:
154156
base_height_target = Robot.height

sim/play.py

Lines changed: 30 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -8,49 +8,27 @@
88
# mypy: ignore-errors
99

1010
import argparse
11-
import copy
1211
import logging
1312
import os
1413
from datetime import datetime
15-
from typing import Any, Union
1614

1715
import cv2
1816
import h5py
1917
import numpy as np
2018
from isaacgym import gymapi
2119
from tqdm import tqdm
2220

23-
logger = logging.getLogger(__name__)
24-
2521
from sim.env import run_dir # noqa: E402
2622
from sim.envs import task_registry # noqa: E402
27-
from sim.utils.helpers import get_args # noqa: E402
23+
from sim.utils.cmd_manager import CommandManager # noqa: E402
24+
from sim.utils.helpers import ( # noqa: E402
25+
export_policy_as_jit,
26+
export_policy_as_onnx,
27+
get_args,
28+
)
2829
from sim.utils.logger import Logger # noqa: E402
2930

30-
import torch # isort: skip
31-
32-
33-
def export_policy_as_jit(actor_critic: Any, path: Union[str, os.PathLike]) -> None:
34-
os.makedirs(path, exist_ok=True)
35-
path = os.path.join(path, "policy_1.pt")
36-
model = copy.deepcopy(actor_critic.actor).to("cpu")
37-
traced_script_module = torch.jit.script(model)
38-
traced_script_module.save(path)
39-
40-
41-
def export_policy_as_onnx(actor_critic, path):
42-
os.makedirs(path, exist_ok=True)
43-
path = os.path.join(path, "policy_1.onnx")
44-
model = copy.deepcopy(actor_critic.actor).to("cpu")
45-
46-
# Get the input dimension from the first layer of the model
47-
first_layer = next(model.parameters())
48-
input_dim = first_layer.shape[1]
49-
50-
# Create a dummy input tensor with the correct dimensions
51-
dummy_input = torch.randn(1, input_dim)
52-
53-
torch.onnx.export(model, dummy_input, path)
31+
logger = logging.getLogger(__name__)
5432

5533

5634
def play(args: argparse.Namespace) -> None:
@@ -86,7 +64,6 @@ def play(args: argparse.Namespace) -> None:
8664
policy = ppo_runner.get_inference_policy(device=env.device)
8765

8866
# Export policy if needed
89-
EXPORT_POLICY = True
9067
if EXPORT_POLICY:
9168
path = os.path.join(".")
9269
export_policy_as_jit(ppo_runner.alg.actor_critic, path)
@@ -104,6 +81,7 @@ def play(args: argparse.Namespace) -> None:
10481
joint_index = 1
10582
stop_state_log = 1000
10683
now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
84+
10785
if args.log_h5:
10886
h5_file = h5py.File(f"data{now}.h5", "w")
10987

@@ -136,7 +114,7 @@ def play(args: argparse.Namespace) -> None:
136114
"observations/euler", (max_timesteps, buf_len, 3), dtype=np.float32
137115
) # root orientation
138116

139-
if RENDER:
117+
if not args.headless:
140118
camera_properties = gymapi.CameraProperties()
141119
camera_properties.width = 1920
142120
camera_properties.height = 1080
@@ -165,26 +143,36 @@ def play(args: argparse.Namespace) -> None:
165143
os.mkdir(experiment_dir)
166144
video = cv2.VideoWriter(dir, fourcc, 50.0, (1920, 1080))
167145

146+
cmd_manager = CommandManager(
147+
num_envs=env_cfg.env.num_envs,
148+
mode=CMD_MODE,
149+
default_cmd=DEFAULT_COMMAND,
150+
device=env.device,
151+
env_cfg=env_cfg
152+
)
153+
168154
for t in tqdm(range(stop_state_log)):
169155
actions = policy(obs.detach())
170156
if args.log_h5:
171157
dset_actions[t] = actions.detach().numpy()
172158

173-
if FIX_COMMAND:
174-
env.commands[:, 0] = 0.2
175-
env.commands[:, 1] = 0.0
176-
env.commands[:, 2] = 0.0
177-
env.commands[:, 3] = 0.0
159+
env.commands[:] = cmd_manager.update(env.dt)
160+
178161
obs, critic_obs, rews, dones, infos = env.step(actions.detach())
179162
print(f"IMU: {obs[0, (3 * env.num_actions + 5) + 3 : (3 * env.num_actions + 5) + 2 * 3]}")
180163

181-
if RENDER:
164+
if not args.headless:
182165
env.gym.fetch_results(env.sim, True)
183166
env.gym.step_graphics(env.sim)
184167
env.gym.render_all_camera_sensors(env.sim)
185168
img = env.gym.get_camera_image(env.sim, env.envs[0], h1, gymapi.IMAGE_COLOR)
186169
img = np.reshape(img, (1080, 1920, 4))
187170
img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR)
171+
robot_positions = env.root_states[:, 0:3].cpu().numpy()
172+
actual_vels = np.stack([env.base_lin_vel[:, 0].cpu().numpy(), env.base_lin_vel[:, 1].cpu().numpy()], axis=1)
173+
174+
if args.command_arrow:
175+
cmd_manager.draw(env.gym, env.viewer, env.envs, robot_positions, actual_vels)
188176

189177
video.write(img[..., :3])
190178

@@ -237,7 +225,7 @@ def play(args: argparse.Namespace) -> None:
237225
env_logger.print_rewards()
238226
env_logger.plot_states()
239227

240-
if RENDER:
228+
if not args.headless:
241229
video.release()
242230

243231
if args.log_h5:
@@ -246,11 +234,12 @@ def play(args: argparse.Namespace) -> None:
246234

247235

248236
if __name__ == "__main__":
249-
RENDER = True
250-
FIX_COMMAND = True
251-
237+
EXPORT_POLICY = True
252238
EXPORT_ONNX = True
253239

240+
DEFAULT_COMMAND = [0.3, 0.0, 0.0, 0.0]
241+
CMD_MODE = "random" # options: "fixed", "oscillating", "random", "keyboard"
242+
254243
base_args = get_args()
255244
parser = argparse.ArgumentParser(description="Extend base arguments with log_h5")
256245
parser.add_argument("--log_h5", action="store_true", help="Enable HDF5 logging")

sim/resources/stompymicro/joints.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ def default_standing(cls) -> Dict[str, float]:
101101
cls.legs.left.knee_pitch: -0.741,
102102
cls.legs.left.hip_yaw: 0,
103103
cls.legs.left.hip_roll: 0,
104-
cls.legs.left.ankle_pitch: -0.5,
104+
cls.legs.left.ankle_pitch: -0.5,
105105
cls.legs.right.hip_pitch: -0.23,
106106
cls.legs.right.knee_pitch: 0.741,
107107
cls.legs.right.ankle_pitch: 0.5,

sim/sim2sim.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,7 @@ def get_policy_output(policy, input_data):
316316
dt=0.001,
317317
decimation=10,
318318
cycle_time=0.4,
319-
tau_factor=4.,
319+
tau_factor=4.0,
320320
)
321321

322322
run_mujoco(policy, cfg, args.keyboard_use)

sim/utils/cmd_manager.py

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
from enum import Enum
2+
from typing import List
3+
4+
import numpy as np
5+
import torch
6+
7+
from sim.utils.helpers import draw_vector
8+
9+
10+
class CommandMode(Enum):
11+
FIXED = "fixed"
12+
OSCILLATING = "oscillating"
13+
KEYBOARD = "keyboard"
14+
RANDOM = "random"
15+
16+
17+
class CommandManager:
18+
"""Manages robot commands"""
19+
20+
def __init__(
21+
self,
22+
num_envs: int = 1,
23+
mode: str = "fixed",
24+
default_cmd: List[float] = [0.3, 0.0, 0.0, 0.0],
25+
device="cpu",
26+
env_cfg=None,
27+
):
28+
self.num_envs = num_envs
29+
self.mode = CommandMode(mode)
30+
self.device = device
31+
self.default_cmd = torch.tensor(default_cmd, device=self.device)
32+
self.commands = self.default_cmd.repeat(num_envs, 1)
33+
self.time = 0
34+
self.env_cfg = env_cfg
35+
36+
# Mode-specific parameters
37+
if self.mode == CommandMode.OSCILLATING:
38+
self.osc_period = 5.0 # secs
39+
self.min_x_vel = env_cfg.commands.ranges.lin_vel_x[0] if env_cfg else 0.0
40+
self.max_x_vel = env_cfg.commands.ranges.lin_vel_x[1] if env_cfg else 0.3
41+
self.osc_amplitude = (self.max_x_vel - self.min_x_vel) / 2
42+
self.osc_offset = (self.max_x_vel + self.min_x_vel) / 2
43+
elif self.mode == CommandMode.RANDOM:
44+
self.cmd_ranges = {
45+
'lin_vel_x': env_cfg.commands.ranges.lin_vel_x,
46+
'lin_vel_y': env_cfg.commands.ranges.lin_vel_y,
47+
'ang_vel_yaw': env_cfg.commands.ranges.ang_vel_yaw,
48+
'heading': env_cfg.commands.ranges.heading
49+
} if env_cfg else {
50+
'lin_vel_x': [-0.05, 0.23],
51+
'lin_vel_y': [-0.05, 0.05],
52+
'ang_vel_yaw': [-0.5, 0.5],
53+
'heading': [-np.pi, np.pi]
54+
}
55+
self.resampling_time = env_cfg.commands.resampling_time if env_cfg else 8.0
56+
self.last_sample_time = 0.0
57+
elif self.mode == CommandMode.KEYBOARD:
58+
try:
59+
import pygame
60+
pygame.init()
61+
pygame.display.set_mode((100, 100))
62+
self.x_vel_cmd = 0.0
63+
self.y_vel_cmd = 0.0
64+
self.yaw_vel_cmd = 0.0
65+
except ImportError:
66+
print("WARNING: pygame not found, falling back to fixed commands")
67+
self.mode = CommandMode.FIXED
68+
69+
def update(self, dt: float) -> torch.Tensor:
70+
"""Updates and returns commands based on current mode."""
71+
self.time += dt
72+
73+
if self.mode == CommandMode.FIXED:
74+
return self.commands
75+
elif self.mode == CommandMode.OSCILLATING:
76+
# Oscillate x velocity between min and max
77+
x_vel = self.osc_offset + self.osc_amplitude * torch.sin(
78+
torch.tensor(2 * np.pi * self.time / self.osc_period)
79+
)
80+
self.commands[:, 0] = x_vel.to(self.device)
81+
elif self.mode == CommandMode.RANDOM:
82+
if self.time - self.last_sample_time >= self.resampling_time:
83+
self.last_sample_time = self.time
84+
# Generate random commands within training ranges
85+
new_commands = torch.tensor([
86+
np.random.uniform(*self.cmd_ranges['lin_vel_x']),
87+
np.random.uniform(*self.cmd_ranges['lin_vel_y']),
88+
0.0,
89+
np.random.uniform(*self.cmd_ranges['heading'])
90+
], device=self.device) if self.env_cfg and self.env_cfg.commands.heading_command else torch.tensor([
91+
np.random.uniform(*self.cmd_ranges['lin_vel_x']),
92+
np.random.uniform(*self.cmd_ranges['lin_vel_y']),
93+
np.random.uniform(*self.cmd_ranges['ang_vel_yaw']),
94+
0.0
95+
], device=self.device)
96+
self.commands = new_commands.repeat(self.num_envs, 1)
97+
elif self.mode == CommandMode.KEYBOARD:
98+
self._handle_keyboard_input()
99+
self.commands[:, 0] = torch.tensor(self.x_vel_cmd, device=self.device)
100+
self.commands[:, 1] = torch.tensor(self.y_vel_cmd, device=self.device)
101+
self.commands[:, 2] = torch.tensor(self.yaw_vel_cmd, device=self.device)
102+
103+
return self.commands
104+
105+
def draw(self, gym, viewer, env_handles, robot_positions, actual_vels) -> None:
106+
"""Draws command and actual velocity arrows for all robots."""
107+
if viewer is None:
108+
return
109+
110+
gym.clear_lines(viewer)
111+
cmd_vels = self.commands[:, :2].cpu().numpy()
112+
for env_handle, robot_pos, cmd_vel, actual_vel in zip(env_handles, robot_positions, cmd_vels, actual_vels):
113+
draw_vector(gym, viewer, env_handle, robot_pos, cmd_vel, (0.0, 1.0, 0.0)) # cmd vector (green)
114+
draw_vector(gym, viewer, env_handle, robot_pos, actual_vel, (1.0, 0.0, 0.0)) # vel vector (red)
115+
116+
def _handle_keyboard_input(self):
117+
"""Handles keyboard input for command updates."""
118+
import pygame
119+
120+
for event in pygame.event.get():
121+
if event.type == pygame.QUIT:
122+
pygame.quit()
123+
124+
keys = pygame.key.get_pressed()
125+
126+
# Update movement commands based on arrow keys
127+
if keys[pygame.K_UP]:
128+
self.x_vel_cmd = min(self.x_vel_cmd + 0.0005, 0.5)
129+
if keys[pygame.K_DOWN]:
130+
self.x_vel_cmd = max(self.x_vel_cmd - 0.0005, -0.5)
131+
if keys[pygame.K_LEFT]:
132+
self.y_vel_cmd = min(self.y_vel_cmd + 0.0005, 0.5)
133+
if keys[pygame.K_RIGHT]:
134+
self.y_vel_cmd = max(self.y_vel_cmd - 0.0005, -0.5)
135+
136+
# Yaw control
137+
if keys[pygame.K_a]:
138+
self.yaw_vel_cmd = min(self.yaw_vel_cmd + 0.001, 0.5)
139+
if keys[pygame.K_z]:
140+
self.yaw_vel_cmd = max(self.yaw_vel_cmd - 0.001, -0.5)
141+
142+
# Reset commands
143+
if keys[pygame.K_SPACE]:
144+
self.x_vel_cmd = 0.0
145+
self.y_vel_cmd = 0.0
146+
self.yaw_vel_cmd = 0.0

0 commit comments

Comments
 (0)