Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integration of HumanoidBench tasks #339

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ set(MUJOCO_BUILD_TESTS OFF)
set(MUJOCO_TEST_PYTHON_UTIL OFF)

set(MUJOCO_MPC_MUJOCO_GIT_TAG
088079eff0450e32b98ee743141780ed68307506
3.1.4
CACHE STRING "Git revision for MuJoCo."
)

Expand Down
6 changes: 0 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,6 @@ Quadruped task:

[![Quadruped](http://img.youtube.com/vi/esLuwaWz4oE/hqdefault.jpg)](https://www.youtube.com/watch?v=esLuwaWz4oE)


Bimanual manipulation:

[![Bimanual](http://img.youtube.com/vi/aCNCKVThKIE/hqdefault.jpg)](https://www.youtube.com/watch?v=aCNCKVThKIE)


Rubik's cube 10-move unscramble:

[![Unscramble](http://img.youtube.com/vi/ZRRvVWV-Muk/hqdefault.jpg)](https://www.youtube.com/watch?v=ZRRvVWV-Muk)
Expand Down
4 changes: 0 additions & 4 deletions cmake/MpcOptions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,6 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang
endif()
endif()

if(NOT CMAKE_INTERPROCEDURAL_OPTIMIZATION AND (CMAKE_BUILD_TYPE AND NOT CMAKE_BUILD_TYPE STREQUAL "Debug"))
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON)
endif()

include(MujocoHarden)
set(EXTRA_COMPILE_OPTIONS ${EXTRA_COMPILE_OPTIONS} ${MUJOCO_HARDEN_COMPILE_OPTIONS})
set(EXTRA_LINK_OPTIONS ${EXTRA_LINK_OPTIONS} ${MUJOCO_HARDEN_LINK_OPTIONS})
Expand Down
9 changes: 9 additions & 0 deletions mjpc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,15 @@ add_library(
tasks/swimmer/swimmer.h
tasks/walker/walker.cc
tasks/walker/walker.h
tasks/humanoid_bench/utility/dm_control_utils_rewards.cpp
tasks/humanoid_bench/walk/walk.cc
tasks/humanoid_bench/walk/walk.h
tasks/humanoid_bench/walk_reward.cc
tasks/humanoid_bench/walk_reward.h
tasks/humanoid_bench/stand/stand.cc
tasks/humanoid_bench/stand/stand.h
tasks/humanoid_bench/push/push.cc
tasks/humanoid_bench/push/push.h
planners/planner.cc
planners/planner.h
planners/policy.h
Expand Down
18 changes: 11 additions & 7 deletions mjpc/agent.cc
Original file line number Diff line number Diff line change
Expand Up @@ -451,8 +451,8 @@ int Agent::SetModeByName(std::string_view name) {
void Agent::ModifyScene(mjvScene* scn) {
// if acting is off make all geom colors grayscale
if (!action_enabled) {
int cube = mj_name2id(model_, mjOBJ_MATERIAL, "cube");
int graycube = mj_name2id(model_, mjOBJ_MATERIAL, "graycube");
int cube = mj_name2id(model_, mjOBJ_TEXTURE, "cube");
int graycube = mj_name2id(model_, mjOBJ_TEXTURE, "graycube");
for (int i = 0; i < scn->ngeom; i++) {
mjvGeom* g = scn->geoms + i;
// skip static and decor geoms
Expand All @@ -463,8 +463,8 @@ void Agent::ModifyScene(mjvScene* scn) {
double rgb_average = (g->rgba[0] + g->rgba[1] + g->rgba[2]) / 3;
g->rgba[0] = g->rgba[1] = g->rgba[2] = rgb_average;
// specifically for the hand task, make grayscale cube.
if (cube > -1 && graycube > -1 && g->matid == cube) {
g->matid = graycube;
if (cube > -1 && graycube > -1 && g->texid == cube) {
g->texid = graycube;
}
}
}
Expand Down Expand Up @@ -508,10 +508,14 @@ void Agent::ModifyScene(mjvScene* scn) {
color);

// make geometry
mjv_connector(
mjv_makeConnector(
&scn->geoms[scn->ngeom], mjGEOM_CAPSULE, width,
winner->trace.data() + 3 * num_trace * i + 3 * j,
winner->trace.data() + 3 * num_trace * (i + 1) + 3 * j);
winner->trace[3 * num_trace * i + 3 * j],
winner->trace[3 * num_trace * i + 1 + 3 * j],
winner->trace[3 * num_trace * i + 2 + 3 * j],
winner->trace[3 * num_trace * (i + 1) + 3 * j],
winner->trace[3 * num_trace * (i + 1) + 1 + 3 * j],
winner->trace[3 * num_trace * (i + 1) + 2 + 3 * j]);
// increment number of geometries
scn->ngeom += 1;
}
Expand Down
30 changes: 16 additions & 14 deletions mjpc/grpc/agent_service.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,12 @@ using ::agent::GetAllModesRequest;
using ::agent::GetAllModesResponse;
using ::agent::GetBestTrajectoryRequest;
using ::agent::GetBestTrajectoryResponse;
using ::agent::GetResidualsRequest;
using ::agent::GetResidualsResponse;
using ::agent::GetCostValuesAndWeightsRequest;
using ::agent::GetCostValuesAndWeightsResponse;
using ::agent::GetModeRequest;
using ::agent::GetModeResponse;
using ::agent::GetResidualsRequest;
using ::agent::GetResidualsResponse;
using ::agent::GetStateRequest;
using ::agent::GetStateResponse;
using ::agent::GetTaskParametersRequest;
Expand Down Expand Up @@ -115,7 +115,8 @@ grpc::Status AgentService::Init(grpc::ServerContext* context,
<< "Multiple instances of AgentService detected.";
agent_model = agent_.GetModel();
// copy the model before agent model's timestep and integrator are updated
CHECK_EQ(model, nullptr) << "Multiple instances of AgentService detected.";
CHECK_EQ(model, nullptr)
<< "Multiple instances of AgentService detected.";
model = mj_copyModel(nullptr, agent_model);
data_ = mj_makeData(model);
rollout_data_.reset(mj_makeData(model));
Expand Down Expand Up @@ -189,14 +190,14 @@ grpc::Status AgentService::GetAction(grpc::ServerContext* context,
return out;
}

grpc::Status AgentService::GetResiduals(grpc::ServerContext* context,
const GetResidualsRequest* request,
GetResidualsResponse* response) {
grpc::Status AgentService::GetResiduals(
grpc::ServerContext* context, const GetResidualsRequest* request,
GetResidualsResponse* response) {
if (!Initialized()) {
return {grpc::StatusCode::FAILED_PRECONDITION, "Init not called."};
}
return grpc_agent_util::GetResiduals(request, &agent_, model, data_,
response);
return grpc_agent_util::GetResiduals(request, &agent_, model,
data_, response);
}

grpc::Status AgentService::GetCostValuesAndWeights(
Expand Down Expand Up @@ -274,9 +275,9 @@ grpc::Status AgentService::GetTaskParameters(
return grpc_agent_util::GetTaskParameters(request, &agent_, response);
}

grpc::Status AgentService::SetCostWeights(grpc::ServerContext* context,
const SetCostWeightsRequest* request,
SetCostWeightsResponse* response) {
grpc::Status AgentService::SetCostWeights(
grpc::ServerContext* context, const SetCostWeightsRequest* request,
SetCostWeightsResponse* response) {
if (!Initialized()) {
return {grpc::StatusCode::FAILED_PRECONDITION, "Init not called."};
}
Expand Down Expand Up @@ -349,9 +350,10 @@ grpc::Status AgentService::GetBestTrajectory(
return grpc::Status::OK;
}

grpc::Status AgentService::SetAnything(grpc::ServerContext* context,
const SetAnythingRequest* request,
SetAnythingResponse* response) {

grpc::Status AgentService::SetAnything(
grpc::ServerContext* context, const SetAnythingRequest* request,
SetAnythingResponse* response) {
if (!Initialized()) {
return {grpc::StatusCode::FAILED_PRECONDITION, "Init not called."};
}
Expand Down
8 changes: 6 additions & 2 deletions mjpc/grpc/direct_service.cc
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,9 @@ grpc::Status DirectService::Init(grpc::ServerContext* context,
// mjVFS structs need to be allocated on the heap, because it's ~2MB
auto vfs = std::make_unique<mjVFS>();
mj_defaultVFS(vfs.get());
mj_addBufferVFS(vfs.get(), file, mjb.data(), mjb.size());
mj_makeEmptyFileVFS(vfs.get(), file, mjb.size());
int file_idx = mj_findFileVFS(vfs.get(), file);
memcpy(vfs->filedata[file_idx], mjb.data(), mjb.size());
tmp_model = {mj_loadModel(file, vfs.get()), mj_deleteModel};
mj_deleteFileVFS(vfs.get(), file);
} else if (request->has_model() && request->model().has_xml()) {
Expand All @@ -86,7 +88,9 @@ grpc::Status DirectService::Init(grpc::ServerContext* context,
// mjVFS structs need to be allocated on the heap, because it's ~2MB
auto vfs = std::make_unique<mjVFS>();
mj_defaultVFS(vfs.get());
mj_addBufferVFS(vfs.get(), file, model_xml.data(), model_xml.size());
mj_makeEmptyFileVFS(vfs.get(), file, model_xml.size());
int file_idx = mj_findFileVFS(vfs.get(), file);
memcpy(vfs->filedata[file_idx], model_xml.data(), model_xml.size());
tmp_model = {mj_loadXML(file, vfs.get(), load_error, sizeof(load_error)),
mj_deleteModel};
mj_deleteFileVFS(vfs.get(), file);
Expand Down
9 changes: 7 additions & 2 deletions mjpc/grpc/filter_service.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include "mjpc/grpc/filter_service.h"

#include <cstring>
#include <memory>
#include <sstream>
#include <string_view>
Expand Down Expand Up @@ -69,7 +70,9 @@ grpc::Status FilterService::Init(grpc::ServerContext* context,
// mjVFS structs need to be allocated on the heap, because it's ~2MB
auto vfs = std::make_unique<mjVFS>();
mj_defaultVFS(vfs.get());
mj_addBufferVFS(vfs.get(), file, mjb.data(), mjb.size());
mj_makeEmptyFileVFS(vfs.get(), file, mjb.size());
int file_idx = mj_findFileVFS(vfs.get(), file);
memcpy(vfs->filedata[file_idx], mjb.data(), mjb.size());
tmp_model = {mj_loadModel(file, vfs.get()), mj_deleteModel};
mj_deleteFileVFS(vfs.get(), file);
} else if (request->has_model() && request->model().has_xml()) {
Expand All @@ -81,7 +84,9 @@ grpc::Status FilterService::Init(grpc::ServerContext* context,
// mjVFS structs need to be allocated on the heap, because it's ~2MB
auto vfs = std::make_unique<mjVFS>();
mj_defaultVFS(vfs.get());
mj_addBufferVFS(vfs.get(), file, model_xml.data(), model_xml.size());
mj_makeEmptyFileVFS(vfs.get(), file, model_xml.size());
int file_idx = mj_findFileVFS(vfs.get(), file);
memcpy(vfs->filedata[file_idx], model_xml.data(), model_xml.size());
tmp_model = {mj_loadXML(file, vfs.get(), load_error, sizeof(load_error)),
mj_deleteModel};
mj_deleteFileVFS(vfs.get(), file);
Expand Down
9 changes: 7 additions & 2 deletions mjpc/grpc/grpc_agent_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include "mjpc/grpc/grpc_agent_util.h"

#include <cstring>
#include <memory>
#include <sstream>
#include <string>
Expand Down Expand Up @@ -520,7 +521,9 @@ mjpc::UniqueMjModel LoadModelFromString(std::string_view xml, char* error,
// mjVFS structs need to be allocated on the heap, because it's ~2MB
auto vfs = std::make_unique<mjVFS>();
mj_defaultVFS(vfs.get());
mj_addBufferVFS(vfs.get(), file, xml.data(), xml.size());
mj_makeEmptyFileVFS(vfs.get(), file, xml.size());
int file_idx = mj_findFileVFS(vfs.get(), file);
std::memcpy(vfs->filedata[file_idx], xml.data(), xml.size());
mjpc::UniqueMjModel m = {mj_loadXML(file, vfs.get(), error, error_size),
mj_deleteModel};
mj_deleteFileVFS(vfs.get(), file);
Expand All @@ -532,7 +535,9 @@ mjpc::UniqueMjModel LoadModelFromBytes(std::string_view mjb) {
// mjVFS structs need to be allocated on the heap, because it's ~2MB
auto vfs = std::make_unique<mjVFS>();
mj_defaultVFS(vfs.get());
mj_addBufferVFS(vfs.get(), file, mjb.data(), mjb.size());
mj_makeEmptyFileVFS(vfs.get(), file, mjb.size());
int file_idx = mj_findFileVFS(vfs.get(), file);
memcpy(vfs->filedata[file_idx], mjb.data(), mjb.size());
mjpc::UniqueMjModel m = {mj_loadModel(file, vfs.get()), mj_deleteModel};
mj_deleteFileVFS(vfs.get(), file);
return m;
Expand Down
10 changes: 7 additions & 3 deletions mjpc/planners/cross_entropy/planner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -465,10 +465,14 @@ void CrossEntropyPlanner::Traces(mjvScene* scn) {
// elite index
int idx = trajectory_order[k];
// make geometry
mjv_connector(
mjv_makeConnector(
&scn->geoms[scn->ngeom], mjGEOM_LINE, width,
trajectory[idx].trace.data() + 3*task->num_trace * i + 3 * j,
trajectory[idx].trace.data() + 3*task->num_trace * (i + 1) + 3 * j);
trajectory[idx].trace[3 * task->num_trace * i + 3 * j],
trajectory[idx].trace[3 * task->num_trace * i + 1 + 3 * j],
trajectory[idx].trace[3 * task->num_trace * i + 2 + 3 * j],
trajectory[idx].trace[3 * task->num_trace * (i + 1) + 3 * j],
trajectory[idx].trace[3 * task->num_trace * (i + 1) + 1 + 3 * j],
trajectory[idx].trace[3 * task->num_trace * (i + 1) + 2 + 3 * j]);

// increment number of geometries
scn->ngeom += 1;
Expand Down
10 changes: 7 additions & 3 deletions mjpc/planners/gradient/planner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -448,10 +448,14 @@ void GradientPlanner::Traces(mjvScene* scn) {
color);

// make geometry
mjv_connector(
mjv_makeConnector(
&scn->geoms[scn->ngeom], mjGEOM_LINE, width,
trajectory[k].trace.data() + 3 * task->num_trace * i + 3 * j,
trajectory[k].trace.data() + 3 * task->num_trace * (i + 1) + 3 * j);
trajectory[k].trace[3 * task->num_trace * i + 3 * j],
trajectory[k].trace[3 * task->num_trace * i + 1 + 3 * j],
trajectory[k].trace[3 * task->num_trace * i + 2 + 3 * j],
trajectory[k].trace[3 * task->num_trace * (i + 1) + 3 * j],
trajectory[k].trace[3 * task->num_trace * (i + 1) + 1 + 3 * j],
trajectory[k].trace[3 * task->num_trace * (i + 1) + 2 + 3 * j]);

// increment number of geometries
scn->ngeom += 1;
Expand Down
10 changes: 7 additions & 3 deletions mjpc/planners/sample_gradient/planner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -542,10 +542,14 @@ void SampleGradientPlanner::Traces(mjvScene* scn) {
idx < num_noisy ? white : orange);

// make geometry
mjv_connector(
mjv_makeConnector(
&scn->geoms[scn->ngeom], mjGEOM_LINE, width,
trajectory[k].trace.data() + 3 * task->num_trace * i + 3 * j,
trajectory[k].trace.data() + 3 * task->num_trace * (i + 1) + 3 * j);
trajectory[idx].trace[3 * task->num_trace * i + 3 * j],
trajectory[idx].trace[3 * task->num_trace * i + 1 + 3 * j],
trajectory[idx].trace[3 * task->num_trace * i + 2 + 3 * j],
trajectory[idx].trace[3 * task->num_trace * (i + 1) + 3 * j],
trajectory[idx].trace[3 * task->num_trace * (i + 1) + 1 + 3 * j],
trajectory[idx].trace[3 * task->num_trace * (i + 1) + 2 + 3 * j]);

// increment number of geometries
scn->ngeom += 1;
Expand Down
41 changes: 16 additions & 25 deletions mjpc/planners/sampling/planner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

#include <algorithm>
#include <chrono>
#include <mutex>
#include <shared_mutex>

#include <absl/random/random.h>
Expand Down Expand Up @@ -113,11 +112,8 @@ void SamplingPlanner::Reset(int horizon,
time = 0.0;

// policy parameters
{
const std::unique_lock<std::shared_mutex> lock(mtx_);
policy.Reset(horizon, initial_repeated_action);
previous_policy.Reset(horizon, initial_repeated_action);
}
policy.Reset(horizon, initial_repeated_action);
previous_policy.Reset(horizon, initial_repeated_action);

// scratch
plan_scratch.Clear();
Expand Down Expand Up @@ -154,9 +150,6 @@ void SamplingPlanner::SetState(const State& state) {

int SamplingPlanner::OptimizePolicyCandidates(int ncandidates, int horizon,
ThreadPool& pool) {
// resample nominal policy to current time
this->UpdateNominalPolicy(horizon);

// if num_trajectory_ has changed, use it in this new iteration.
// num_trajectory_ might change while this function runs. Keep it constant
// for the duration of this function.
Expand Down Expand Up @@ -195,6 +188,9 @@ int SamplingPlanner::OptimizePolicyCandidates(int ncandidates, int horizon,

// optimize nominal policy using random sampling
void SamplingPlanner::OptimizePolicy(int horizon, ThreadPool& pool) {
// resample nominal policy to current time
this->UpdateNominalPolicy(horizon);

OptimizePolicyCandidates(1, horizon, pool);

// ----- update policy ----- //
Expand Down Expand Up @@ -270,19 +266,10 @@ void SamplingPlanner::UpdateNominalPolicy(int horizon) {
time_shift = time_horizon;
}

const std::unique_lock<std::shared_mutex> lock(mtx_);

// special case for when simulation time is reset (which doesn't cause
// Planner::Reset)
if (policy.plan.Size() && policy.plan.begin()->time() > nominal_time) {
// time went backwards. keep the nominal plan, but start at the new time
policy.plan.ShiftTime(nominal_time);
previous_policy.plan.ShiftTime(nominal_time);
}

const std::shared_lock<std::shared_mutex> lock(mtx_);
policy.plan.DiscardBefore(nominal_time);
if (policy.plan.Size() == 0) {
policy.plan.AddNode(nominal_time);
policy.plan.AddNode(time);
}
while (policy.plan.Size() < num_spline_points) {
// duplicate the last node, with a time further in the future.
Expand Down Expand Up @@ -316,7 +303,7 @@ void SamplingPlanner::UpdateNominalPolicy(int horizon) {

// copy scratch into plan
{
const std::unique_lock<std::shared_mutex> lock(mtx_);
const std::shared_lock<std::shared_mutex> lock(mtx_);
policy.plan = plan_scratch;
}
}
Expand Down Expand Up @@ -430,10 +417,14 @@ void SamplingPlanner::Traces(mjvScene* scn) {
color);

// make geometry
mjv_connector(
mjv_makeConnector(
&scn->geoms[scn->ngeom], mjGEOM_LINE, width,
trajectory[k].trace.data() + 3 * task->num_trace * i + 3 * j,
trajectory[k].trace.data() + 3 * task->num_trace * (i + 1) + 3 * j);
trajectory[k].trace[3 * task->num_trace * i + 3 * j],
trajectory[k].trace[3 * task->num_trace * i + 1 + 3 * j],
trajectory[k].trace[3 * task->num_trace * i + 2 + 3 * j],
trajectory[k].trace[3 * task->num_trace * (i + 1) + 3 * j],
trajectory[k].trace[3 * task->num_trace * (i + 1) + 1 + 3 * j],
trajectory[k].trace[3 * task->num_trace * (i + 1) + 2 + 3 * j]);

// increment number of geometries
scn->ngeom += 1;
Expand Down Expand Up @@ -536,7 +527,7 @@ void SamplingPlanner::CopyCandidateToPolicy(int candidate) {
winner = trajectory_order[candidate];

{
const std::unique_lock<std::shared_mutex> lock(mtx_);
const std::shared_lock<std::shared_mutex> lock(mtx_);
previous_policy = policy;
policy = candidate_policy[winner];
}
Expand Down
Loading