From 34f84b3a65c0dab36fc632d2f41ce21dcef65355 Mon Sep 17 00:00:00 2001
From: Benjamin Beyret
Date: Wed, 29 May 2019 15:46:51 +0100
Subject: [PATCH 01/23] add animalai_packages for upload to PyPi
---
.gitignore | 9 +-
{animalai => animalai_bkp}/__init__.py | 0
.../communicator_objects/__init__.py | 0
.../agent_action_proto_pb2.py | 0
.../agent_info_proto_pb2.py | 0
.../arena_parameters_proto_pb2.py | 0
.../brain_parameters_proto_pb2.py | 0
.../communicator_objects/command_proto_pb2.py | 0
.../demonstration_meta_proto_pb2.py | 0
.../engine_configuration_proto_pb2.py | 0
.../communicator_objects/header_pb2.py | 0
.../resolution_proto_pb2.py | 0
.../space_type_proto_pb2.py | 0
.../communicator_objects/unity_input_pb2.py | 0
.../communicator_objects/unity_message_pb2.py | 0
.../communicator_objects/unity_output_pb2.py | 0
.../unity_rl_initialization_input_pb2.py | 0
.../unity_rl_initialization_output_pb2.py | 0
.../unity_rl_input_pb2.py | 0
.../unity_rl_output_pb2.py | 0
.../unity_rl_reset_input_pb2.py | 0
.../unity_rl_reset_output_pb2.py | 0
.../unity_to_external_pb2.py | 0
.../unity_to_external_pb2_grpc.py | 0
{animalai => animalai_bkp}/envs/__init__.py | 0
.../envs/arena_config.py | 0
{animalai => animalai_bkp}/envs/brain.py | 0
.../envs/communicator.py | 0
.../envs/environment.py | 0
{animalai => animalai_bkp}/envs/exception.py | 0
.../envs/rpc_communicator.py | 0
.../envs/socket_communicator.py | 0
.../trainers/__init__.py | 0
.../trainers/barracuda.py | 0
.../trainers/bc/__init__.py | 0
.../trainers/bc/models.py | 0
.../trainers/bc/offline_trainer.py | 0
.../trainers/bc/online_trainer.py | 0
.../trainers/bc/policy.py | 0
.../trainers/bc/trainer.py | 0
{animalai => animalai_bkp}/trainers/buffer.py | 0
.../trainers/curriculum.py | 0
.../trainers/demo_loader.py | 0
.../trainers/exception.py | 0
{animalai => animalai_bkp}/trainers/learn.py | 0
.../trainers/meta_curriculum.py | 0
{animalai => animalai_bkp}/trainers/models.py | 0
{animalai => animalai_bkp}/trainers/policy.py | 0
.../trainers/ppo/__init__.py | 0
.../trainers/ppo/models.py | 0
.../trainers/ppo/policy.py | 0
.../trainers/ppo/trainer.py | 0
.../trainers/tensorflow_to_barracuda.py | 0
.../trainers/trainer.py | 0
.../trainers/trainer_controller.py | 0
animalai_packages/animalai_envs/LICENSE | 201 ++++
animalai_packages/animalai_envs/README.md | 5 +
.../animalai_envs/animalai/__init__.py | 1 +
.../animalai/communicator_objects/__init__.py | 22 +
.../agent_action_proto_pb2.py | 92 ++
.../agent_info_proto_pb2.py | 141 +++
.../arena_parameters_proto_pb2.py | 209 ++++
.../brain_parameters_proto_pb2.py | 125 ++
.../communicator_objects/command_proto_pb2.py | 64 +
.../demonstration_meta_proto_pb2.py | 99 ++
.../engine_configuration_proto_pb2.py | 106 ++
.../communicator_objects/header_pb2.py | 78 ++
.../resolution_proto_pb2.py | 85 ++
.../space_type_proto_pb2.py | 61 +
.../communicator_objects/unity_input_pb2.py | 92 ++
.../communicator_objects/unity_message_pb2.py | 92 ++
.../communicator_objects/unity_output_pb2.py | 83 ++
.../unity_rl_initialization_input_pb2.py | 71 ++
.../unity_rl_initialization_output_pb2.py | 95 ++
.../unity_rl_input_pb2.py | 178 +++
.../unity_rl_output_pb2.py | 169 +++
.../unity_rl_reset_input_pb2.py | 122 ++
.../unity_rl_reset_output_pb2.py | 71 ++
.../unity_to_external_pb2.py | 57 +
.../unity_to_external_pb2_grpc.py | 46 +
.../animalai_envs/animalai/envs/__init__.py | 3 +
.../animalai/envs/arena_config.py | 115 ++
.../animalai_envs/animalai/envs/brain.py | 149 +++
.../animalai/envs/communicator.py | 35 +
.../animalai/envs/environment.py | 491 ++++++++
.../animalai_envs/animalai/envs/exception.py | 63 +
.../animalai/envs/rpc_communicator.py | 115 ++
.../animalai/envs/socket_communicator.py | 98 ++
animalai_packages/animalai_envs/setup.py | 29 +
animalai_packages/animalai_train/LICENSE | 201 ++++
animalai_packages/animalai_train/README.md | 5 +
.../animalai_train/animalai_train/__init__.py | 1 +
.../animalai_train/trainers/__init__.py | 15 +
.../animalai_train/trainers/barracuda.py | 491 ++++++++
.../animalai_train/trainers/bc/__init__.py | 4 +
.../animalai_train/trainers/bc/models.py | 55 +
.../trainers/bc/offline_trainer.py | 56 +
.../trainers/bc/online_trainer.py | 116 ++
.../animalai_train/trainers/bc/policy.py | 93 ++
.../animalai_train/trainers/bc/trainer.py | 190 +++
.../animalai_train/trainers/buffer.py | 255 ++++
.../animalai_train/trainers/curriculum.py | 112 ++
.../animalai_train/trainers/demo_loader.py | 94 ++
.../animalai_train/trainers/exception.py | 20 +
.../animalai_train/trainers/learn.py | 249 ++++
.../trainers/meta_curriculum.py | 147 +++
.../animalai_train/trainers/models.py | 380 ++++++
.../animalai_train/trainers/policy.py | 212 ++++
.../animalai_train/trainers/ppo/__init__.py | 3 +
.../animalai_train/trainers/ppo/models.py | 195 ++++
.../animalai_train/trainers/ppo/policy.py | 214 ++++
.../animalai_train/trainers/ppo/trainer.py | 386 ++++++
.../trainers/tensorflow_to_barracuda.py | 1034 +++++++++++++++++
.../animalai_train/trainers/trainer.py | 198 ++++
.../trainers/trainer_controller.py | 301 +++++
animalai_packages/animalai_train/setup.py | 33 +
train.py | 2 +-
117 files changed, 8531 insertions(+), 3 deletions(-)
rename {animalai => animalai_bkp}/__init__.py (100%)
rename {animalai => animalai_bkp}/communicator_objects/__init__.py (100%)
rename {animalai => animalai_bkp}/communicator_objects/agent_action_proto_pb2.py (100%)
rename {animalai => animalai_bkp}/communicator_objects/agent_info_proto_pb2.py (100%)
rename {animalai => animalai_bkp}/communicator_objects/arena_parameters_proto_pb2.py (100%)
rename {animalai => animalai_bkp}/communicator_objects/brain_parameters_proto_pb2.py (100%)
rename {animalai => animalai_bkp}/communicator_objects/command_proto_pb2.py (100%)
rename {animalai => animalai_bkp}/communicator_objects/demonstration_meta_proto_pb2.py (100%)
rename {animalai => animalai_bkp}/communicator_objects/engine_configuration_proto_pb2.py (100%)
rename {animalai => animalai_bkp}/communicator_objects/header_pb2.py (100%)
rename {animalai => animalai_bkp}/communicator_objects/resolution_proto_pb2.py (100%)
rename {animalai => animalai_bkp}/communicator_objects/space_type_proto_pb2.py (100%)
rename {animalai => animalai_bkp}/communicator_objects/unity_input_pb2.py (100%)
rename {animalai => animalai_bkp}/communicator_objects/unity_message_pb2.py (100%)
rename {animalai => animalai_bkp}/communicator_objects/unity_output_pb2.py (100%)
rename {animalai => animalai_bkp}/communicator_objects/unity_rl_initialization_input_pb2.py (100%)
rename {animalai => animalai_bkp}/communicator_objects/unity_rl_initialization_output_pb2.py (100%)
rename {animalai => animalai_bkp}/communicator_objects/unity_rl_input_pb2.py (100%)
rename {animalai => animalai_bkp}/communicator_objects/unity_rl_output_pb2.py (100%)
rename {animalai => animalai_bkp}/communicator_objects/unity_rl_reset_input_pb2.py (100%)
rename {animalai => animalai_bkp}/communicator_objects/unity_rl_reset_output_pb2.py (100%)
rename {animalai => animalai_bkp}/communicator_objects/unity_to_external_pb2.py (100%)
rename {animalai => animalai_bkp}/communicator_objects/unity_to_external_pb2_grpc.py (100%)
rename {animalai => animalai_bkp}/envs/__init__.py (100%)
rename {animalai => animalai_bkp}/envs/arena_config.py (100%)
rename {animalai => animalai_bkp}/envs/brain.py (100%)
rename {animalai => animalai_bkp}/envs/communicator.py (100%)
rename {animalai => animalai_bkp}/envs/environment.py (100%)
rename {animalai => animalai_bkp}/envs/exception.py (100%)
rename {animalai => animalai_bkp}/envs/rpc_communicator.py (100%)
rename {animalai => animalai_bkp}/envs/socket_communicator.py (100%)
rename {animalai => animalai_bkp}/trainers/__init__.py (100%)
rename {animalai => animalai_bkp}/trainers/barracuda.py (100%)
rename {animalai => animalai_bkp}/trainers/bc/__init__.py (100%)
rename {animalai => animalai_bkp}/trainers/bc/models.py (100%)
rename {animalai => animalai_bkp}/trainers/bc/offline_trainer.py (100%)
rename {animalai => animalai_bkp}/trainers/bc/online_trainer.py (100%)
rename {animalai => animalai_bkp}/trainers/bc/policy.py (100%)
rename {animalai => animalai_bkp}/trainers/bc/trainer.py (100%)
rename {animalai => animalai_bkp}/trainers/buffer.py (100%)
rename {animalai => animalai_bkp}/trainers/curriculum.py (100%)
rename {animalai => animalai_bkp}/trainers/demo_loader.py (100%)
rename {animalai => animalai_bkp}/trainers/exception.py (100%)
rename {animalai => animalai_bkp}/trainers/learn.py (100%)
rename {animalai => animalai_bkp}/trainers/meta_curriculum.py (100%)
rename {animalai => animalai_bkp}/trainers/models.py (100%)
rename {animalai => animalai_bkp}/trainers/policy.py (100%)
rename {animalai => animalai_bkp}/trainers/ppo/__init__.py (100%)
rename {animalai => animalai_bkp}/trainers/ppo/models.py (100%)
rename {animalai => animalai_bkp}/trainers/ppo/policy.py (100%)
rename {animalai => animalai_bkp}/trainers/ppo/trainer.py (100%)
rename {animalai => animalai_bkp}/trainers/tensorflow_to_barracuda.py (100%)
rename {animalai => animalai_bkp}/trainers/trainer.py (100%)
rename {animalai => animalai_bkp}/trainers/trainer_controller.py (100%)
create mode 100644 animalai_packages/animalai_envs/LICENSE
create mode 100644 animalai_packages/animalai_envs/README.md
create mode 100644 animalai_packages/animalai_envs/animalai/__init__.py
create mode 100644 animalai_packages/animalai_envs/animalai/communicator_objects/__init__.py
create mode 100644 animalai_packages/animalai_envs/animalai/communicator_objects/agent_action_proto_pb2.py
create mode 100644 animalai_packages/animalai_envs/animalai/communicator_objects/agent_info_proto_pb2.py
create mode 100644 animalai_packages/animalai_envs/animalai/communicator_objects/arena_parameters_proto_pb2.py
create mode 100644 animalai_packages/animalai_envs/animalai/communicator_objects/brain_parameters_proto_pb2.py
create mode 100644 animalai_packages/animalai_envs/animalai/communicator_objects/command_proto_pb2.py
create mode 100644 animalai_packages/animalai_envs/animalai/communicator_objects/demonstration_meta_proto_pb2.py
create mode 100644 animalai_packages/animalai_envs/animalai/communicator_objects/engine_configuration_proto_pb2.py
create mode 100644 animalai_packages/animalai_envs/animalai/communicator_objects/header_pb2.py
create mode 100644 animalai_packages/animalai_envs/animalai/communicator_objects/resolution_proto_pb2.py
create mode 100644 animalai_packages/animalai_envs/animalai/communicator_objects/space_type_proto_pb2.py
create mode 100644 animalai_packages/animalai_envs/animalai/communicator_objects/unity_input_pb2.py
create mode 100644 animalai_packages/animalai_envs/animalai/communicator_objects/unity_message_pb2.py
create mode 100644 animalai_packages/animalai_envs/animalai/communicator_objects/unity_output_pb2.py
create mode 100644 animalai_packages/animalai_envs/animalai/communicator_objects/unity_rl_initialization_input_pb2.py
create mode 100644 animalai_packages/animalai_envs/animalai/communicator_objects/unity_rl_initialization_output_pb2.py
create mode 100644 animalai_packages/animalai_envs/animalai/communicator_objects/unity_rl_input_pb2.py
create mode 100644 animalai_packages/animalai_envs/animalai/communicator_objects/unity_rl_output_pb2.py
create mode 100644 animalai_packages/animalai_envs/animalai/communicator_objects/unity_rl_reset_input_pb2.py
create mode 100644 animalai_packages/animalai_envs/animalai/communicator_objects/unity_rl_reset_output_pb2.py
create mode 100644 animalai_packages/animalai_envs/animalai/communicator_objects/unity_to_external_pb2.py
create mode 100644 animalai_packages/animalai_envs/animalai/communicator_objects/unity_to_external_pb2_grpc.py
create mode 100644 animalai_packages/animalai_envs/animalai/envs/__init__.py
create mode 100644 animalai_packages/animalai_envs/animalai/envs/arena_config.py
create mode 100644 animalai_packages/animalai_envs/animalai/envs/brain.py
create mode 100644 animalai_packages/animalai_envs/animalai/envs/communicator.py
create mode 100644 animalai_packages/animalai_envs/animalai/envs/environment.py
create mode 100644 animalai_packages/animalai_envs/animalai/envs/exception.py
create mode 100644 animalai_packages/animalai_envs/animalai/envs/rpc_communicator.py
create mode 100644 animalai_packages/animalai_envs/animalai/envs/socket_communicator.py
create mode 100644 animalai_packages/animalai_envs/setup.py
create mode 100644 animalai_packages/animalai_train/LICENSE
create mode 100644 animalai_packages/animalai_train/README.md
create mode 100644 animalai_packages/animalai_train/animalai_train/__init__.py
create mode 100644 animalai_packages/animalai_train/animalai_train/trainers/__init__.py
create mode 100644 animalai_packages/animalai_train/animalai_train/trainers/barracuda.py
create mode 100644 animalai_packages/animalai_train/animalai_train/trainers/bc/__init__.py
create mode 100644 animalai_packages/animalai_train/animalai_train/trainers/bc/models.py
create mode 100644 animalai_packages/animalai_train/animalai_train/trainers/bc/offline_trainer.py
create mode 100644 animalai_packages/animalai_train/animalai_train/trainers/bc/online_trainer.py
create mode 100644 animalai_packages/animalai_train/animalai_train/trainers/bc/policy.py
create mode 100644 animalai_packages/animalai_train/animalai_train/trainers/bc/trainer.py
create mode 100644 animalai_packages/animalai_train/animalai_train/trainers/buffer.py
create mode 100644 animalai_packages/animalai_train/animalai_train/trainers/curriculum.py
create mode 100644 animalai_packages/animalai_train/animalai_train/trainers/demo_loader.py
create mode 100644 animalai_packages/animalai_train/animalai_train/trainers/exception.py
create mode 100644 animalai_packages/animalai_train/animalai_train/trainers/learn.py
create mode 100644 animalai_packages/animalai_train/animalai_train/trainers/meta_curriculum.py
create mode 100644 animalai_packages/animalai_train/animalai_train/trainers/models.py
create mode 100644 animalai_packages/animalai_train/animalai_train/trainers/policy.py
create mode 100644 animalai_packages/animalai_train/animalai_train/trainers/ppo/__init__.py
create mode 100644 animalai_packages/animalai_train/animalai_train/trainers/ppo/models.py
create mode 100644 animalai_packages/animalai_train/animalai_train/trainers/ppo/policy.py
create mode 100644 animalai_packages/animalai_train/animalai_train/trainers/ppo/trainer.py
create mode 100644 animalai_packages/animalai_train/animalai_train/trainers/tensorflow_to_barracuda.py
create mode 100644 animalai_packages/animalai_train/animalai_train/trainers/trainer.py
create mode 100644 animalai_packages/animalai_train/animalai_train/trainers/trainer_controller.py
create mode 100644 animalai_packages/animalai_train/setup.py
diff --git a/.gitignore b/.gitignore
index dfd1107a..82fde7b4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,5 +5,10 @@ env/*
/.idea
__pycache__/
UnitySDK.log
-/venv
-/dev
\ No newline at end of file
+venv/
+*/venv
+/dev
+build/
+dist/
+*.egg-info*
+*.eggs*
\ No newline at end of file
diff --git a/animalai/__init__.py b/animalai_bkp/__init__.py
similarity index 100%
rename from animalai/__init__.py
rename to animalai_bkp/__init__.py
diff --git a/animalai/communicator_objects/__init__.py b/animalai_bkp/communicator_objects/__init__.py
similarity index 100%
rename from animalai/communicator_objects/__init__.py
rename to animalai_bkp/communicator_objects/__init__.py
diff --git a/animalai/communicator_objects/agent_action_proto_pb2.py b/animalai_bkp/communicator_objects/agent_action_proto_pb2.py
similarity index 100%
rename from animalai/communicator_objects/agent_action_proto_pb2.py
rename to animalai_bkp/communicator_objects/agent_action_proto_pb2.py
diff --git a/animalai/communicator_objects/agent_info_proto_pb2.py b/animalai_bkp/communicator_objects/agent_info_proto_pb2.py
similarity index 100%
rename from animalai/communicator_objects/agent_info_proto_pb2.py
rename to animalai_bkp/communicator_objects/agent_info_proto_pb2.py
diff --git a/animalai/communicator_objects/arena_parameters_proto_pb2.py b/animalai_bkp/communicator_objects/arena_parameters_proto_pb2.py
similarity index 100%
rename from animalai/communicator_objects/arena_parameters_proto_pb2.py
rename to animalai_bkp/communicator_objects/arena_parameters_proto_pb2.py
diff --git a/animalai/communicator_objects/brain_parameters_proto_pb2.py b/animalai_bkp/communicator_objects/brain_parameters_proto_pb2.py
similarity index 100%
rename from animalai/communicator_objects/brain_parameters_proto_pb2.py
rename to animalai_bkp/communicator_objects/brain_parameters_proto_pb2.py
diff --git a/animalai/communicator_objects/command_proto_pb2.py b/animalai_bkp/communicator_objects/command_proto_pb2.py
similarity index 100%
rename from animalai/communicator_objects/command_proto_pb2.py
rename to animalai_bkp/communicator_objects/command_proto_pb2.py
diff --git a/animalai/communicator_objects/demonstration_meta_proto_pb2.py b/animalai_bkp/communicator_objects/demonstration_meta_proto_pb2.py
similarity index 100%
rename from animalai/communicator_objects/demonstration_meta_proto_pb2.py
rename to animalai_bkp/communicator_objects/demonstration_meta_proto_pb2.py
diff --git a/animalai/communicator_objects/engine_configuration_proto_pb2.py b/animalai_bkp/communicator_objects/engine_configuration_proto_pb2.py
similarity index 100%
rename from animalai/communicator_objects/engine_configuration_proto_pb2.py
rename to animalai_bkp/communicator_objects/engine_configuration_proto_pb2.py
diff --git a/animalai/communicator_objects/header_pb2.py b/animalai_bkp/communicator_objects/header_pb2.py
similarity index 100%
rename from animalai/communicator_objects/header_pb2.py
rename to animalai_bkp/communicator_objects/header_pb2.py
diff --git a/animalai/communicator_objects/resolution_proto_pb2.py b/animalai_bkp/communicator_objects/resolution_proto_pb2.py
similarity index 100%
rename from animalai/communicator_objects/resolution_proto_pb2.py
rename to animalai_bkp/communicator_objects/resolution_proto_pb2.py
diff --git a/animalai/communicator_objects/space_type_proto_pb2.py b/animalai_bkp/communicator_objects/space_type_proto_pb2.py
similarity index 100%
rename from animalai/communicator_objects/space_type_proto_pb2.py
rename to animalai_bkp/communicator_objects/space_type_proto_pb2.py
diff --git a/animalai/communicator_objects/unity_input_pb2.py b/animalai_bkp/communicator_objects/unity_input_pb2.py
similarity index 100%
rename from animalai/communicator_objects/unity_input_pb2.py
rename to animalai_bkp/communicator_objects/unity_input_pb2.py
diff --git a/animalai/communicator_objects/unity_message_pb2.py b/animalai_bkp/communicator_objects/unity_message_pb2.py
similarity index 100%
rename from animalai/communicator_objects/unity_message_pb2.py
rename to animalai_bkp/communicator_objects/unity_message_pb2.py
diff --git a/animalai/communicator_objects/unity_output_pb2.py b/animalai_bkp/communicator_objects/unity_output_pb2.py
similarity index 100%
rename from animalai/communicator_objects/unity_output_pb2.py
rename to animalai_bkp/communicator_objects/unity_output_pb2.py
diff --git a/animalai/communicator_objects/unity_rl_initialization_input_pb2.py b/animalai_bkp/communicator_objects/unity_rl_initialization_input_pb2.py
similarity index 100%
rename from animalai/communicator_objects/unity_rl_initialization_input_pb2.py
rename to animalai_bkp/communicator_objects/unity_rl_initialization_input_pb2.py
diff --git a/animalai/communicator_objects/unity_rl_initialization_output_pb2.py b/animalai_bkp/communicator_objects/unity_rl_initialization_output_pb2.py
similarity index 100%
rename from animalai/communicator_objects/unity_rl_initialization_output_pb2.py
rename to animalai_bkp/communicator_objects/unity_rl_initialization_output_pb2.py
diff --git a/animalai/communicator_objects/unity_rl_input_pb2.py b/animalai_bkp/communicator_objects/unity_rl_input_pb2.py
similarity index 100%
rename from animalai/communicator_objects/unity_rl_input_pb2.py
rename to animalai_bkp/communicator_objects/unity_rl_input_pb2.py
diff --git a/animalai/communicator_objects/unity_rl_output_pb2.py b/animalai_bkp/communicator_objects/unity_rl_output_pb2.py
similarity index 100%
rename from animalai/communicator_objects/unity_rl_output_pb2.py
rename to animalai_bkp/communicator_objects/unity_rl_output_pb2.py
diff --git a/animalai/communicator_objects/unity_rl_reset_input_pb2.py b/animalai_bkp/communicator_objects/unity_rl_reset_input_pb2.py
similarity index 100%
rename from animalai/communicator_objects/unity_rl_reset_input_pb2.py
rename to animalai_bkp/communicator_objects/unity_rl_reset_input_pb2.py
diff --git a/animalai/communicator_objects/unity_rl_reset_output_pb2.py b/animalai_bkp/communicator_objects/unity_rl_reset_output_pb2.py
similarity index 100%
rename from animalai/communicator_objects/unity_rl_reset_output_pb2.py
rename to animalai_bkp/communicator_objects/unity_rl_reset_output_pb2.py
diff --git a/animalai/communicator_objects/unity_to_external_pb2.py b/animalai_bkp/communicator_objects/unity_to_external_pb2.py
similarity index 100%
rename from animalai/communicator_objects/unity_to_external_pb2.py
rename to animalai_bkp/communicator_objects/unity_to_external_pb2.py
diff --git a/animalai/communicator_objects/unity_to_external_pb2_grpc.py b/animalai_bkp/communicator_objects/unity_to_external_pb2_grpc.py
similarity index 100%
rename from animalai/communicator_objects/unity_to_external_pb2_grpc.py
rename to animalai_bkp/communicator_objects/unity_to_external_pb2_grpc.py
diff --git a/animalai/envs/__init__.py b/animalai_bkp/envs/__init__.py
similarity index 100%
rename from animalai/envs/__init__.py
rename to animalai_bkp/envs/__init__.py
diff --git a/animalai/envs/arena_config.py b/animalai_bkp/envs/arena_config.py
similarity index 100%
rename from animalai/envs/arena_config.py
rename to animalai_bkp/envs/arena_config.py
diff --git a/animalai/envs/brain.py b/animalai_bkp/envs/brain.py
similarity index 100%
rename from animalai/envs/brain.py
rename to animalai_bkp/envs/brain.py
diff --git a/animalai/envs/communicator.py b/animalai_bkp/envs/communicator.py
similarity index 100%
rename from animalai/envs/communicator.py
rename to animalai_bkp/envs/communicator.py
diff --git a/animalai/envs/environment.py b/animalai_bkp/envs/environment.py
similarity index 100%
rename from animalai/envs/environment.py
rename to animalai_bkp/envs/environment.py
diff --git a/animalai/envs/exception.py b/animalai_bkp/envs/exception.py
similarity index 100%
rename from animalai/envs/exception.py
rename to animalai_bkp/envs/exception.py
diff --git a/animalai/envs/rpc_communicator.py b/animalai_bkp/envs/rpc_communicator.py
similarity index 100%
rename from animalai/envs/rpc_communicator.py
rename to animalai_bkp/envs/rpc_communicator.py
diff --git a/animalai/envs/socket_communicator.py b/animalai_bkp/envs/socket_communicator.py
similarity index 100%
rename from animalai/envs/socket_communicator.py
rename to animalai_bkp/envs/socket_communicator.py
diff --git a/animalai/trainers/__init__.py b/animalai_bkp/trainers/__init__.py
similarity index 100%
rename from animalai/trainers/__init__.py
rename to animalai_bkp/trainers/__init__.py
diff --git a/animalai/trainers/barracuda.py b/animalai_bkp/trainers/barracuda.py
similarity index 100%
rename from animalai/trainers/barracuda.py
rename to animalai_bkp/trainers/barracuda.py
diff --git a/animalai/trainers/bc/__init__.py b/animalai_bkp/trainers/bc/__init__.py
similarity index 100%
rename from animalai/trainers/bc/__init__.py
rename to animalai_bkp/trainers/bc/__init__.py
diff --git a/animalai/trainers/bc/models.py b/animalai_bkp/trainers/bc/models.py
similarity index 100%
rename from animalai/trainers/bc/models.py
rename to animalai_bkp/trainers/bc/models.py
diff --git a/animalai/trainers/bc/offline_trainer.py b/animalai_bkp/trainers/bc/offline_trainer.py
similarity index 100%
rename from animalai/trainers/bc/offline_trainer.py
rename to animalai_bkp/trainers/bc/offline_trainer.py
diff --git a/animalai/trainers/bc/online_trainer.py b/animalai_bkp/trainers/bc/online_trainer.py
similarity index 100%
rename from animalai/trainers/bc/online_trainer.py
rename to animalai_bkp/trainers/bc/online_trainer.py
diff --git a/animalai/trainers/bc/policy.py b/animalai_bkp/trainers/bc/policy.py
similarity index 100%
rename from animalai/trainers/bc/policy.py
rename to animalai_bkp/trainers/bc/policy.py
diff --git a/animalai/trainers/bc/trainer.py b/animalai_bkp/trainers/bc/trainer.py
similarity index 100%
rename from animalai/trainers/bc/trainer.py
rename to animalai_bkp/trainers/bc/trainer.py
diff --git a/animalai/trainers/buffer.py b/animalai_bkp/trainers/buffer.py
similarity index 100%
rename from animalai/trainers/buffer.py
rename to animalai_bkp/trainers/buffer.py
diff --git a/animalai/trainers/curriculum.py b/animalai_bkp/trainers/curriculum.py
similarity index 100%
rename from animalai/trainers/curriculum.py
rename to animalai_bkp/trainers/curriculum.py
diff --git a/animalai/trainers/demo_loader.py b/animalai_bkp/trainers/demo_loader.py
similarity index 100%
rename from animalai/trainers/demo_loader.py
rename to animalai_bkp/trainers/demo_loader.py
diff --git a/animalai/trainers/exception.py b/animalai_bkp/trainers/exception.py
similarity index 100%
rename from animalai/trainers/exception.py
rename to animalai_bkp/trainers/exception.py
diff --git a/animalai/trainers/learn.py b/animalai_bkp/trainers/learn.py
similarity index 100%
rename from animalai/trainers/learn.py
rename to animalai_bkp/trainers/learn.py
diff --git a/animalai/trainers/meta_curriculum.py b/animalai_bkp/trainers/meta_curriculum.py
similarity index 100%
rename from animalai/trainers/meta_curriculum.py
rename to animalai_bkp/trainers/meta_curriculum.py
diff --git a/animalai/trainers/models.py b/animalai_bkp/trainers/models.py
similarity index 100%
rename from animalai/trainers/models.py
rename to animalai_bkp/trainers/models.py
diff --git a/animalai/trainers/policy.py b/animalai_bkp/trainers/policy.py
similarity index 100%
rename from animalai/trainers/policy.py
rename to animalai_bkp/trainers/policy.py
diff --git a/animalai/trainers/ppo/__init__.py b/animalai_bkp/trainers/ppo/__init__.py
similarity index 100%
rename from animalai/trainers/ppo/__init__.py
rename to animalai_bkp/trainers/ppo/__init__.py
diff --git a/animalai/trainers/ppo/models.py b/animalai_bkp/trainers/ppo/models.py
similarity index 100%
rename from animalai/trainers/ppo/models.py
rename to animalai_bkp/trainers/ppo/models.py
diff --git a/animalai/trainers/ppo/policy.py b/animalai_bkp/trainers/ppo/policy.py
similarity index 100%
rename from animalai/trainers/ppo/policy.py
rename to animalai_bkp/trainers/ppo/policy.py
diff --git a/animalai/trainers/ppo/trainer.py b/animalai_bkp/trainers/ppo/trainer.py
similarity index 100%
rename from animalai/trainers/ppo/trainer.py
rename to animalai_bkp/trainers/ppo/trainer.py
diff --git a/animalai/trainers/tensorflow_to_barracuda.py b/animalai_bkp/trainers/tensorflow_to_barracuda.py
similarity index 100%
rename from animalai/trainers/tensorflow_to_barracuda.py
rename to animalai_bkp/trainers/tensorflow_to_barracuda.py
diff --git a/animalai/trainers/trainer.py b/animalai_bkp/trainers/trainer.py
similarity index 100%
rename from animalai/trainers/trainer.py
rename to animalai_bkp/trainers/trainer.py
diff --git a/animalai/trainers/trainer_controller.py b/animalai_bkp/trainers/trainer_controller.py
similarity index 100%
rename from animalai/trainers/trainer_controller.py
rename to animalai_bkp/trainers/trainer_controller.py
diff --git a/animalai_packages/animalai_envs/LICENSE b/animalai_packages/animalai_envs/LICENSE
new file mode 100644
index 00000000..7ff5035e
--- /dev/null
+++ b/animalai_packages/animalai_envs/LICENSE
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "{}"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright 2017 Unity Technologies
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
\ No newline at end of file
diff --git a/animalai_packages/animalai_envs/README.md b/animalai_packages/animalai_envs/README.md
new file mode 100644
index 00000000..1dd411bf
--- /dev/null
+++ b/animalai_packages/animalai_envs/README.md
@@ -0,0 +1,5 @@
+# AnimalAI Python API
+
+This package provides the Python API used for training agents for the Animal AI Olympics competition.
+
+This library is mostly the same as [Unity's MLAgents](https://github.com/Unity-Technologies/ml-agents).
\ No newline at end of file
diff --git a/animalai_packages/animalai_envs/animalai/__init__.py b/animalai_packages/animalai_envs/animalai/__init__.py
new file mode 100644
index 00000000..c088da9f
--- /dev/null
+++ b/animalai_packages/animalai_envs/animalai/__init__.py
@@ -0,0 +1 @@
+name= "animalai"
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/__init__.py b/animalai_packages/animalai_envs/animalai/communicator_objects/__init__.py
new file mode 100644
index 00000000..571ec13b
--- /dev/null
+++ b/animalai_packages/animalai_envs/animalai/communicator_objects/__init__.py
@@ -0,0 +1,22 @@
+from .agent_action_proto_pb2 import *
+from .agent_info_proto_pb2 import *
+from .arena_parameters_proto_pb2 import *
+from .brain_parameters_proto_pb2 import *
+from .command_proto_pb2 import *
+from .demonstration_meta_proto_pb2 import *
+from .engine_configuration_proto_pb2 import *
+from .header_pb2 import *
+from .__init__ import *
+from .resolution_proto_pb2 import *
+from .space_type_proto_pb2 import *
+from .unity_input_pb2 import *
+from .unity_message_pb2 import *
+from .unity_output_pb2 import *
+from .unity_rl_initialization_input_pb2 import *
+from .unity_rl_initialization_output_pb2 import *
+from .unity_rl_input_pb2 import *
+from .unity_rl_output_pb2 import *
+from .unity_rl_reset_input_pb2 import *
+from .unity_rl_reset_output_pb2 import *
+from .unity_to_external_pb2_grpc import *
+from .unity_to_external_pb2 import *
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/agent_action_proto_pb2.py b/animalai_packages/animalai_envs/animalai/communicator_objects/agent_action_proto_pb2.py
new file mode 100644
index 00000000..8d19593e
--- /dev/null
+++ b/animalai_packages/animalai_envs/animalai/communicator_objects/agent_action_proto_pb2.py
@@ -0,0 +1,92 @@
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler. DO NOT EDIT!
+# source: animalai/communicator_objects/agent_action_proto.proto
+
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+ name='animalai/communicator_objects/agent_action_proto.proto',
+ package='communicator_objects',
+ syntax='proto3',
+ serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
+ serialized_pb=_b('\n6animalai/communicator_objects/agent_action_proto.proto\x12\x14\x63ommunicator_objects\"a\n\x10\x41gentActionProto\x12\x16\n\x0evector_actions\x18\x01 \x03(\x02\x12\x14\n\x0ctext_actions\x18\x02 \x01(\t\x12\x10\n\x08memories\x18\x03 \x03(\x02\x12\r\n\x05value\x18\x04 \x01(\x02\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
+)
+
+
+
+
+_AGENTACTIONPROTO = _descriptor.Descriptor(
+ name='AgentActionProto',
+ full_name='communicator_objects.AgentActionProto',
+ filename=None,
+ file=DESCRIPTOR,
+ containing_type=None,
+ fields=[
+ _descriptor.FieldDescriptor(
+ name='vector_actions', full_name='communicator_objects.AgentActionProto.vector_actions', index=0,
+ number=1, type=2, cpp_type=6, label=3,
+ has_default_value=False, default_value=[],
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='text_actions', full_name='communicator_objects.AgentActionProto.text_actions', index=1,
+ number=2, type=9, cpp_type=9, label=1,
+ has_default_value=False, default_value=_b("").decode('utf-8'),
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='memories', full_name='communicator_objects.AgentActionProto.memories', index=2,
+ number=3, type=2, cpp_type=6, label=3,
+ has_default_value=False, default_value=[],
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='value', full_name='communicator_objects.AgentActionProto.value', index=3,
+ number=4, type=2, cpp_type=6, label=1,
+ has_default_value=False, default_value=float(0),
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ ],
+ extensions=[
+ ],
+ nested_types=[],
+ enum_types=[
+ ],
+ serialized_options=None,
+ is_extendable=False,
+ syntax='proto3',
+ extension_ranges=[],
+ oneofs=[
+ ],
+ serialized_start=80,
+ serialized_end=177,
+)
+
+DESCRIPTOR.message_types_by_name['AgentActionProto'] = _AGENTACTIONPROTO
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+AgentActionProto = _reflection.GeneratedProtocolMessageType('AgentActionProto', (_message.Message,), {
+ 'DESCRIPTOR' : _AGENTACTIONPROTO,
+ '__module__' : 'animalai.communicator_objects.agent_action_proto_pb2'
+ # @@protoc_insertion_point(class_scope:communicator_objects.AgentActionProto)
+ })
+_sym_db.RegisterMessage(AgentActionProto)
+
+
+DESCRIPTOR._options = None
+# @@protoc_insertion_point(module_scope)
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/agent_info_proto_pb2.py b/animalai_packages/animalai_envs/animalai/communicator_objects/agent_info_proto_pb2.py
new file mode 100644
index 00000000..0dad7e85
--- /dev/null
+++ b/animalai_packages/animalai_envs/animalai/communicator_objects/agent_info_proto_pb2.py
@@ -0,0 +1,141 @@
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler. DO NOT EDIT!
+# source: animalai/communicator_objects/agent_info_proto.proto
+
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+ name='animalai/communicator_objects/agent_info_proto.proto',
+ package='communicator_objects',
+ syntax='proto3',
+ serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
+ serialized_pb=_b('\n4animalai/communicator_objects/agent_info_proto.proto\x12\x14\x63ommunicator_objects\"\x92\x02\n\x0e\x41gentInfoProto\x12\"\n\x1astacked_vector_observation\x18\x01 \x03(\x02\x12\x1b\n\x13visual_observations\x18\x02 \x03(\x0c\x12\x18\n\x10text_observation\x18\x03 \x01(\t\x12\x1d\n\x15stored_vector_actions\x18\x04 \x03(\x02\x12\x1b\n\x13stored_text_actions\x18\x05 \x01(\t\x12\x10\n\x08memories\x18\x06 \x03(\x02\x12\x0e\n\x06reward\x18\x07 \x01(\x02\x12\x0c\n\x04\x64one\x18\x08 \x01(\x08\x12\x18\n\x10max_step_reached\x18\t \x01(\x08\x12\n\n\x02id\x18\n \x01(\x05\x12\x13\n\x0b\x61\x63tion_mask\x18\x0b \x03(\x08\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
+)
+
+
+
+
+_AGENTINFOPROTO = _descriptor.Descriptor(
+ name='AgentInfoProto',
+ full_name='communicator_objects.AgentInfoProto',
+ filename=None,
+ file=DESCRIPTOR,
+ containing_type=None,
+ fields=[
+ _descriptor.FieldDescriptor(
+ name='stacked_vector_observation', full_name='communicator_objects.AgentInfoProto.stacked_vector_observation', index=0,
+ number=1, type=2, cpp_type=6, label=3,
+ has_default_value=False, default_value=[],
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='visual_observations', full_name='communicator_objects.AgentInfoProto.visual_observations', index=1,
+ number=2, type=12, cpp_type=9, label=3,
+ has_default_value=False, default_value=[],
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='text_observation', full_name='communicator_objects.AgentInfoProto.text_observation', index=2,
+ number=3, type=9, cpp_type=9, label=1,
+ has_default_value=False, default_value=_b("").decode('utf-8'),
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='stored_vector_actions', full_name='communicator_objects.AgentInfoProto.stored_vector_actions', index=3,
+ number=4, type=2, cpp_type=6, label=3,
+ has_default_value=False, default_value=[],
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='stored_text_actions', full_name='communicator_objects.AgentInfoProto.stored_text_actions', index=4,
+ number=5, type=9, cpp_type=9, label=1,
+ has_default_value=False, default_value=_b("").decode('utf-8'),
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='memories', full_name='communicator_objects.AgentInfoProto.memories', index=5,
+ number=6, type=2, cpp_type=6, label=3,
+ has_default_value=False, default_value=[],
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='reward', full_name='communicator_objects.AgentInfoProto.reward', index=6,
+ number=7, type=2, cpp_type=6, label=1,
+ has_default_value=False, default_value=float(0),
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='done', full_name='communicator_objects.AgentInfoProto.done', index=7,
+ number=8, type=8, cpp_type=7, label=1,
+ has_default_value=False, default_value=False,
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='max_step_reached', full_name='communicator_objects.AgentInfoProto.max_step_reached', index=8,
+ number=9, type=8, cpp_type=7, label=1,
+ has_default_value=False, default_value=False,
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='id', full_name='communicator_objects.AgentInfoProto.id', index=9,
+ number=10, type=5, cpp_type=1, label=1,
+ has_default_value=False, default_value=0,
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='action_mask', full_name='communicator_objects.AgentInfoProto.action_mask', index=10,
+ number=11, type=8, cpp_type=7, label=3,
+ has_default_value=False, default_value=[],
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ ],
+ extensions=[
+ ],
+ nested_types=[],
+ enum_types=[
+ ],
+ serialized_options=None,
+ is_extendable=False,
+ syntax='proto3',
+ extension_ranges=[],
+ oneofs=[
+ ],
+ serialized_start=79,
+ serialized_end=353,
+)
+
+DESCRIPTOR.message_types_by_name['AgentInfoProto'] = _AGENTINFOPROTO
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+AgentInfoProto = _reflection.GeneratedProtocolMessageType('AgentInfoProto', (_message.Message,), {
+ 'DESCRIPTOR' : _AGENTINFOPROTO,
+ '__module__' : 'animalai.communicator_objects.agent_info_proto_pb2'
+ # @@protoc_insertion_point(class_scope:communicator_objects.AgentInfoProto)
+ })
+_sym_db.RegisterMessage(AgentInfoProto)
+
+
+DESCRIPTOR._options = None
+# @@protoc_insertion_point(module_scope)
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/arena_parameters_proto_pb2.py b/animalai_packages/animalai_envs/animalai/communicator_objects/arena_parameters_proto_pb2.py
new file mode 100644
index 00000000..930b300c
--- /dev/null
+++ b/animalai_packages/animalai_envs/animalai/communicator_objects/arena_parameters_proto_pb2.py
@@ -0,0 +1,209 @@
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler. DO NOT EDIT!
+# source: animalai/communicator_objects/arena_parameters_proto.proto
+
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+ name='animalai/communicator_objects/arena_parameters_proto.proto',
+ package='communicator_objects',
+ syntax='proto3',
+ serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
+ serialized_pb=_b('\n:animalai/communicator_objects/arena_parameters_proto.proto\x12\x14\x63ommunicator_objects\"\xcf\x03\n\x14\x41renaParametersProto\x12\t\n\x01t\x18\x01 \x01(\x05\x12\x46\n\x05items\x18\x02 \x03(\x0b\x32\x37.communicator_objects.ArenaParametersProto.ItemsToSpawn\x12\x11\n\tblackouts\x18\x03 \x03(\x05\x1a\xd0\x02\n\x0cItemsToSpawn\x12\x0c\n\x04name\x18\x01 \x01(\t\x12R\n\tpositions\x18\x03 \x03(\x0b\x32?.communicator_objects.ArenaParametersProto.ItemsToSpawn.Vector3\x12\x11\n\trotations\x18\x04 \x03(\x02\x12N\n\x05sizes\x18\x05 \x03(\x0b\x32?.communicator_objects.ArenaParametersProto.ItemsToSpawn.Vector3\x12O\n\x06\x63olors\x18\x06 \x03(\x0b\x32?.communicator_objects.ArenaParametersProto.ItemsToSpawn.Vector3\x1a*\n\x07Vector3\x12\t\n\x01x\x18\x01 \x01(\x02\x12\t\n\x01y\x18\x02 \x01(\x02\x12\t\n\x01z\x18\x03 \x01(\x02\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
+)
+
+
+
+
+_ARENAPARAMETERSPROTO_ITEMSTOSPAWN_VECTOR3 = _descriptor.Descriptor(
+ name='Vector3',
+ full_name='communicator_objects.ArenaParametersProto.ItemsToSpawn.Vector3',
+ filename=None,
+ file=DESCRIPTOR,
+ containing_type=None,
+ fields=[
+ _descriptor.FieldDescriptor(
+ name='x', full_name='communicator_objects.ArenaParametersProto.ItemsToSpawn.Vector3.x', index=0,
+ number=1, type=2, cpp_type=6, label=1,
+ has_default_value=False, default_value=float(0),
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='y', full_name='communicator_objects.ArenaParametersProto.ItemsToSpawn.Vector3.y', index=1,
+ number=2, type=2, cpp_type=6, label=1,
+ has_default_value=False, default_value=float(0),
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='z', full_name='communicator_objects.ArenaParametersProto.ItemsToSpawn.Vector3.z', index=2,
+ number=3, type=2, cpp_type=6, label=1,
+ has_default_value=False, default_value=float(0),
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ ],
+ extensions=[
+ ],
+ nested_types=[],
+ enum_types=[
+ ],
+ serialized_options=None,
+ is_extendable=False,
+ syntax='proto3',
+ extension_ranges=[],
+ oneofs=[
+ ],
+ serialized_start=506,
+ serialized_end=548,
+)
+
+_ARENAPARAMETERSPROTO_ITEMSTOSPAWN = _descriptor.Descriptor(
+ name='ItemsToSpawn',
+ full_name='communicator_objects.ArenaParametersProto.ItemsToSpawn',
+ filename=None,
+ file=DESCRIPTOR,
+ containing_type=None,
+ fields=[
+ _descriptor.FieldDescriptor(
+ name='name', full_name='communicator_objects.ArenaParametersProto.ItemsToSpawn.name', index=0,
+ number=1, type=9, cpp_type=9, label=1,
+ has_default_value=False, default_value=_b("").decode('utf-8'),
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='positions', full_name='communicator_objects.ArenaParametersProto.ItemsToSpawn.positions', index=1,
+ number=3, type=11, cpp_type=10, label=3,
+ has_default_value=False, default_value=[],
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='rotations', full_name='communicator_objects.ArenaParametersProto.ItemsToSpawn.rotations', index=2,
+ number=4, type=2, cpp_type=6, label=3,
+ has_default_value=False, default_value=[],
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='sizes', full_name='communicator_objects.ArenaParametersProto.ItemsToSpawn.sizes', index=3,
+ number=5, type=11, cpp_type=10, label=3,
+ has_default_value=False, default_value=[],
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='colors', full_name='communicator_objects.ArenaParametersProto.ItemsToSpawn.colors', index=4,
+ number=6, type=11, cpp_type=10, label=3,
+ has_default_value=False, default_value=[],
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ ],
+ extensions=[
+ ],
+ nested_types=[_ARENAPARAMETERSPROTO_ITEMSTOSPAWN_VECTOR3, ],
+ enum_types=[
+ ],
+ serialized_options=None,
+ is_extendable=False,
+ syntax='proto3',
+ extension_ranges=[],
+ oneofs=[
+ ],
+ serialized_start=212,
+ serialized_end=548,
+)
+
+_ARENAPARAMETERSPROTO = _descriptor.Descriptor(
+ name='ArenaParametersProto',
+ full_name='communicator_objects.ArenaParametersProto',
+ filename=None,
+ file=DESCRIPTOR,
+ containing_type=None,
+ fields=[
+ _descriptor.FieldDescriptor(
+ name='t', full_name='communicator_objects.ArenaParametersProto.t', index=0,
+ number=1, type=5, cpp_type=1, label=1,
+ has_default_value=False, default_value=0,
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='items', full_name='communicator_objects.ArenaParametersProto.items', index=1,
+ number=2, type=11, cpp_type=10, label=3,
+ has_default_value=False, default_value=[],
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='blackouts', full_name='communicator_objects.ArenaParametersProto.blackouts', index=2,
+ number=3, type=5, cpp_type=1, label=3,
+ has_default_value=False, default_value=[],
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ ],
+ extensions=[
+ ],
+ nested_types=[_ARENAPARAMETERSPROTO_ITEMSTOSPAWN, ],
+ enum_types=[
+ ],
+ serialized_options=None,
+ is_extendable=False,
+ syntax='proto3',
+ extension_ranges=[],
+ oneofs=[
+ ],
+ serialized_start=85,
+ serialized_end=548,
+)
+
+_ARENAPARAMETERSPROTO_ITEMSTOSPAWN_VECTOR3.containing_type = _ARENAPARAMETERSPROTO_ITEMSTOSPAWN
+_ARENAPARAMETERSPROTO_ITEMSTOSPAWN.fields_by_name['positions'].message_type = _ARENAPARAMETERSPROTO_ITEMSTOSPAWN_VECTOR3
+_ARENAPARAMETERSPROTO_ITEMSTOSPAWN.fields_by_name['sizes'].message_type = _ARENAPARAMETERSPROTO_ITEMSTOSPAWN_VECTOR3
+_ARENAPARAMETERSPROTO_ITEMSTOSPAWN.fields_by_name['colors'].message_type = _ARENAPARAMETERSPROTO_ITEMSTOSPAWN_VECTOR3
+_ARENAPARAMETERSPROTO_ITEMSTOSPAWN.containing_type = _ARENAPARAMETERSPROTO
+_ARENAPARAMETERSPROTO.fields_by_name['items'].message_type = _ARENAPARAMETERSPROTO_ITEMSTOSPAWN
+DESCRIPTOR.message_types_by_name['ArenaParametersProto'] = _ARENAPARAMETERSPROTO
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+ArenaParametersProto = _reflection.GeneratedProtocolMessageType('ArenaParametersProto', (_message.Message,), {
+
+ 'ItemsToSpawn' : _reflection.GeneratedProtocolMessageType('ItemsToSpawn', (_message.Message,), {
+
+ 'Vector3' : _reflection.GeneratedProtocolMessageType('Vector3', (_message.Message,), {
+ 'DESCRIPTOR' : _ARENAPARAMETERSPROTO_ITEMSTOSPAWN_VECTOR3,
+ '__module__' : 'animalai.communicator_objects.arena_parameters_proto_pb2'
+ # @@protoc_insertion_point(class_scope:communicator_objects.ArenaParametersProto.ItemsToSpawn.Vector3)
+ })
+ ,
+ 'DESCRIPTOR' : _ARENAPARAMETERSPROTO_ITEMSTOSPAWN,
+ '__module__' : 'animalai.communicator_objects.arena_parameters_proto_pb2'
+ # @@protoc_insertion_point(class_scope:communicator_objects.ArenaParametersProto.ItemsToSpawn)
+ })
+ ,
+ 'DESCRIPTOR' : _ARENAPARAMETERSPROTO,
+ '__module__' : 'animalai.communicator_objects.arena_parameters_proto_pb2'
+ # @@protoc_insertion_point(class_scope:communicator_objects.ArenaParametersProto)
+ })
+_sym_db.RegisterMessage(ArenaParametersProto)
+_sym_db.RegisterMessage(ArenaParametersProto.ItemsToSpawn)
+_sym_db.RegisterMessage(ArenaParametersProto.ItemsToSpawn.Vector3)
+
+
+DESCRIPTOR._options = None
+# @@protoc_insertion_point(module_scope)
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/brain_parameters_proto_pb2.py b/animalai_packages/animalai_envs/animalai/communicator_objects/brain_parameters_proto_pb2.py
new file mode 100644
index 00000000..07091b4f
--- /dev/null
+++ b/animalai_packages/animalai_envs/animalai/communicator_objects/brain_parameters_proto_pb2.py
@@ -0,0 +1,125 @@
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler. DO NOT EDIT!
+# source: animalai/communicator_objects/brain_parameters_proto.proto
+
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+from animalai.communicator_objects import resolution_proto_pb2 as animalai_dot_communicator__objects_dot_resolution__proto__pb2
+from animalai.communicator_objects import space_type_proto_pb2 as animalai_dot_communicator__objects_dot_space__type__proto__pb2
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+ name='animalai/communicator_objects/brain_parameters_proto.proto',
+ package='communicator_objects',
+ syntax='proto3',
+ serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
+ serialized_pb=_b('\n:animalai/communicator_objects/brain_parameters_proto.proto\x12\x14\x63ommunicator_objects\x1a\x34\x61nimalai/communicator_objects/resolution_proto.proto\x1a\x34\x61nimalai/communicator_objects/space_type_proto.proto\"\xd4\x02\n\x14\x42rainParametersProto\x12\x1f\n\x17vector_observation_size\x18\x01 \x01(\x05\x12\'\n\x1fnum_stacked_vector_observations\x18\x02 \x01(\x05\x12\x1a\n\x12vector_action_size\x18\x03 \x03(\x05\x12\x41\n\x12\x63\x61mera_resolutions\x18\x04 \x03(\x0b\x32%.communicator_objects.ResolutionProto\x12\"\n\x1avector_action_descriptions\x18\x05 \x03(\t\x12\x46\n\x18vector_action_space_type\x18\x06 \x01(\x0e\x32$.communicator_objects.SpaceTypeProto\x12\x12\n\nbrain_name\x18\x07 \x01(\t\x12\x13\n\x0bis_training\x18\x08 \x01(\x08\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
+ ,
+ dependencies=[animalai_dot_communicator__objects_dot_resolution__proto__pb2.DESCRIPTOR,animalai_dot_communicator__objects_dot_space__type__proto__pb2.DESCRIPTOR,])
+
+
+
+
+_BRAINPARAMETERSPROTO = _descriptor.Descriptor(
+ name='BrainParametersProto',
+ full_name='communicator_objects.BrainParametersProto',
+ filename=None,
+ file=DESCRIPTOR,
+ containing_type=None,
+ fields=[
+ _descriptor.FieldDescriptor(
+ name='vector_observation_size', full_name='communicator_objects.BrainParametersProto.vector_observation_size', index=0,
+ number=1, type=5, cpp_type=1, label=1,
+ has_default_value=False, default_value=0,
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='num_stacked_vector_observations', full_name='communicator_objects.BrainParametersProto.num_stacked_vector_observations', index=1,
+ number=2, type=5, cpp_type=1, label=1,
+ has_default_value=False, default_value=0,
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='vector_action_size', full_name='communicator_objects.BrainParametersProto.vector_action_size', index=2,
+ number=3, type=5, cpp_type=1, label=3,
+ has_default_value=False, default_value=[],
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='camera_resolutions', full_name='communicator_objects.BrainParametersProto.camera_resolutions', index=3,
+ number=4, type=11, cpp_type=10, label=3,
+ has_default_value=False, default_value=[],
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='vector_action_descriptions', full_name='communicator_objects.BrainParametersProto.vector_action_descriptions', index=4,
+ number=5, type=9, cpp_type=9, label=3,
+ has_default_value=False, default_value=[],
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='vector_action_space_type', full_name='communicator_objects.BrainParametersProto.vector_action_space_type', index=5,
+ number=6, type=14, cpp_type=8, label=1,
+ has_default_value=False, default_value=0,
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='brain_name', full_name='communicator_objects.BrainParametersProto.brain_name', index=6,
+ number=7, type=9, cpp_type=9, label=1,
+ has_default_value=False, default_value=_b("").decode('utf-8'),
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='is_training', full_name='communicator_objects.BrainParametersProto.is_training', index=7,
+ number=8, type=8, cpp_type=7, label=1,
+ has_default_value=False, default_value=False,
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ ],
+ extensions=[
+ ],
+ nested_types=[],
+ enum_types=[
+ ],
+ serialized_options=None,
+ is_extendable=False,
+ syntax='proto3',
+ extension_ranges=[],
+ oneofs=[
+ ],
+ serialized_start=193,
+ serialized_end=533,
+)
+
+_BRAINPARAMETERSPROTO.fields_by_name['camera_resolutions'].message_type = animalai_dot_communicator__objects_dot_resolution__proto__pb2._RESOLUTIONPROTO
+_BRAINPARAMETERSPROTO.fields_by_name['vector_action_space_type'].enum_type = animalai_dot_communicator__objects_dot_space__type__proto__pb2._SPACETYPEPROTO
+DESCRIPTOR.message_types_by_name['BrainParametersProto'] = _BRAINPARAMETERSPROTO
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+BrainParametersProto = _reflection.GeneratedProtocolMessageType('BrainParametersProto', (_message.Message,), {
+ 'DESCRIPTOR' : _BRAINPARAMETERSPROTO,
+ '__module__' : 'animalai.communicator_objects.brain_parameters_proto_pb2'
+ # @@protoc_insertion_point(class_scope:communicator_objects.BrainParametersProto)
+ })
+_sym_db.RegisterMessage(BrainParametersProto)
+
+
+DESCRIPTOR._options = None
+# @@protoc_insertion_point(module_scope)
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/command_proto_pb2.py b/animalai_packages/animalai_envs/animalai/communicator_objects/command_proto_pb2.py
new file mode 100644
index 00000000..4912301f
--- /dev/null
+++ b/animalai_packages/animalai_envs/animalai/communicator_objects/command_proto_pb2.py
@@ -0,0 +1,64 @@
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler. DO NOT EDIT!
+# source: animalai/communicator_objects/command_proto.proto
+
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf.internal import enum_type_wrapper
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+ name='animalai/communicator_objects/command_proto.proto',
+ package='communicator_objects',
+ syntax='proto3',
+ serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
+ serialized_pb=_b('\n1animalai/communicator_objects/command_proto.proto\x12\x14\x63ommunicator_objects*-\n\x0c\x43ommandProto\x12\x08\n\x04STEP\x10\x00\x12\t\n\x05RESET\x10\x01\x12\x08\n\x04QUIT\x10\x02\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
+)
+
+_COMMANDPROTO = _descriptor.EnumDescriptor(
+ name='CommandProto',
+ full_name='communicator_objects.CommandProto',
+ filename=None,
+ file=DESCRIPTOR,
+ values=[
+ _descriptor.EnumValueDescriptor(
+ name='STEP', index=0, number=0,
+ serialized_options=None,
+ type=None),
+ _descriptor.EnumValueDescriptor(
+ name='RESET', index=1, number=1,
+ serialized_options=None,
+ type=None),
+ _descriptor.EnumValueDescriptor(
+ name='QUIT', index=2, number=2,
+ serialized_options=None,
+ type=None),
+ ],
+ containing_type=None,
+ serialized_options=None,
+ serialized_start=75,
+ serialized_end=120,
+)
+_sym_db.RegisterEnumDescriptor(_COMMANDPROTO)
+
+CommandProto = enum_type_wrapper.EnumTypeWrapper(_COMMANDPROTO)
+STEP = 0
+RESET = 1
+QUIT = 2
+
+
+DESCRIPTOR.enum_types_by_name['CommandProto'] = _COMMANDPROTO
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+
+DESCRIPTOR._options = None
+# @@protoc_insertion_point(module_scope)
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/demonstration_meta_proto_pb2.py b/animalai_packages/animalai_envs/animalai/communicator_objects/demonstration_meta_proto_pb2.py
new file mode 100644
index 00000000..107b1335
--- /dev/null
+++ b/animalai_packages/animalai_envs/animalai/communicator_objects/demonstration_meta_proto_pb2.py
@@ -0,0 +1,99 @@
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler. DO NOT EDIT!
+# source: animalai/communicator_objects/demonstration_meta_proto.proto
+
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+ name='animalai/communicator_objects/demonstration_meta_proto.proto',
+ package='communicator_objects',
+ syntax='proto3',
+ serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
+ serialized_pb=_b('\nanimalai/communicator_objects/engine_configuration_proto.proto\x12\x14\x63ommunicator_objects\"\x95\x01\n\x18\x45ngineConfigurationProto\x12\r\n\x05width\x18\x01 \x01(\x05\x12\x0e\n\x06height\x18\x02 \x01(\x05\x12\x15\n\rquality_level\x18\x03 \x01(\x05\x12\x12\n\ntime_scale\x18\x04 \x01(\x02\x12\x19\n\x11target_frame_rate\x18\x05 \x01(\x05\x12\x14\n\x0cshow_monitor\x18\x06 \x01(\x08\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
+)
+
+
+
+
+_ENGINECONFIGURATIONPROTO = _descriptor.Descriptor(
+ name='EngineConfigurationProto',
+ full_name='communicator_objects.EngineConfigurationProto',
+ filename=None,
+ file=DESCRIPTOR,
+ containing_type=None,
+ fields=[
+ _descriptor.FieldDescriptor(
+ name='width', full_name='communicator_objects.EngineConfigurationProto.width', index=0,
+ number=1, type=5, cpp_type=1, label=1,
+ has_default_value=False, default_value=0,
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='height', full_name='communicator_objects.EngineConfigurationProto.height', index=1,
+ number=2, type=5, cpp_type=1, label=1,
+ has_default_value=False, default_value=0,
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='quality_level', full_name='communicator_objects.EngineConfigurationProto.quality_level', index=2,
+ number=3, type=5, cpp_type=1, label=1,
+ has_default_value=False, default_value=0,
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='time_scale', full_name='communicator_objects.EngineConfigurationProto.time_scale', index=3,
+ number=4, type=2, cpp_type=6, label=1,
+ has_default_value=False, default_value=float(0),
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='target_frame_rate', full_name='communicator_objects.EngineConfigurationProto.target_frame_rate', index=4,
+ number=5, type=5, cpp_type=1, label=1,
+ has_default_value=False, default_value=0,
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='show_monitor', full_name='communicator_objects.EngineConfigurationProto.show_monitor', index=5,
+ number=6, type=8, cpp_type=7, label=1,
+ has_default_value=False, default_value=False,
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ ],
+ extensions=[
+ ],
+ nested_types=[],
+ enum_types=[
+ ],
+ serialized_options=None,
+ is_extendable=False,
+ syntax='proto3',
+ extension_ranges=[],
+ oneofs=[
+ ],
+ serialized_start=89,
+ serialized_end=238,
+)
+
+DESCRIPTOR.message_types_by_name['EngineConfigurationProto'] = _ENGINECONFIGURATIONPROTO
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+EngineConfigurationProto = _reflection.GeneratedProtocolMessageType('EngineConfigurationProto', (_message.Message,), {
+ 'DESCRIPTOR' : _ENGINECONFIGURATIONPROTO,
+ '__module__' : 'animalai.communicator_objects.engine_configuration_proto_pb2'
+ # @@protoc_insertion_point(class_scope:communicator_objects.EngineConfigurationProto)
+ })
+_sym_db.RegisterMessage(EngineConfigurationProto)
+
+
+DESCRIPTOR._options = None
+# @@protoc_insertion_point(module_scope)
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/header_pb2.py b/animalai_packages/animalai_envs/animalai/communicator_objects/header_pb2.py
new file mode 100644
index 00000000..6c40f6cc
--- /dev/null
+++ b/animalai_packages/animalai_envs/animalai/communicator_objects/header_pb2.py
@@ -0,0 +1,78 @@
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler. DO NOT EDIT!
+# source: animalai/communicator_objects/header.proto
+
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+ name='animalai/communicator_objects/header.proto',
+ package='communicator_objects',
+ syntax='proto3',
+ serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
+ serialized_pb=_b('\n*animalai/communicator_objects/header.proto\x12\x14\x63ommunicator_objects\")\n\x06Header\x12\x0e\n\x06status\x18\x01 \x01(\x05\x12\x0f\n\x07message\x18\x02 \x01(\tB\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
+)
+
+
+
+
+_HEADER = _descriptor.Descriptor(
+ name='Header',
+ full_name='communicator_objects.Header',
+ filename=None,
+ file=DESCRIPTOR,
+ containing_type=None,
+ fields=[
+ _descriptor.FieldDescriptor(
+ name='status', full_name='communicator_objects.Header.status', index=0,
+ number=1, type=5, cpp_type=1, label=1,
+ has_default_value=False, default_value=0,
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='message', full_name='communicator_objects.Header.message', index=1,
+ number=2, type=9, cpp_type=9, label=1,
+ has_default_value=False, default_value=_b("").decode('utf-8'),
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ ],
+ extensions=[
+ ],
+ nested_types=[],
+ enum_types=[
+ ],
+ serialized_options=None,
+ is_extendable=False,
+ syntax='proto3',
+ extension_ranges=[],
+ oneofs=[
+ ],
+ serialized_start=68,
+ serialized_end=109,
+)
+
+DESCRIPTOR.message_types_by_name['Header'] = _HEADER
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+Header = _reflection.GeneratedProtocolMessageType('Header', (_message.Message,), {
+ 'DESCRIPTOR' : _HEADER,
+ '__module__' : 'animalai.communicator_objects.header_pb2'
+ # @@protoc_insertion_point(class_scope:communicator_objects.Header)
+ })
+_sym_db.RegisterMessage(Header)
+
+
+DESCRIPTOR._options = None
+# @@protoc_insertion_point(module_scope)
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/resolution_proto_pb2.py b/animalai_packages/animalai_envs/animalai/communicator_objects/resolution_proto_pb2.py
new file mode 100644
index 00000000..cade7d49
--- /dev/null
+++ b/animalai_packages/animalai_envs/animalai/communicator_objects/resolution_proto_pb2.py
@@ -0,0 +1,85 @@
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler. DO NOT EDIT!
+# source: animalai/communicator_objects/resolution_proto.proto
+
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+ name='animalai/communicator_objects/resolution_proto.proto',
+ package='communicator_objects',
+ syntax='proto3',
+ serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
+ serialized_pb=_b('\n4animalai/communicator_objects/resolution_proto.proto\x12\x14\x63ommunicator_objects\"D\n\x0fResolutionProto\x12\r\n\x05width\x18\x01 \x01(\x05\x12\x0e\n\x06height\x18\x02 \x01(\x05\x12\x12\n\ngray_scale\x18\x03 \x01(\x08\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
+)
+
+
+
+
+_RESOLUTIONPROTO = _descriptor.Descriptor(
+ name='ResolutionProto',
+ full_name='communicator_objects.ResolutionProto',
+ filename=None,
+ file=DESCRIPTOR,
+ containing_type=None,
+ fields=[
+ _descriptor.FieldDescriptor(
+ name='width', full_name='communicator_objects.ResolutionProto.width', index=0,
+ number=1, type=5, cpp_type=1, label=1,
+ has_default_value=False, default_value=0,
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='height', full_name='communicator_objects.ResolutionProto.height', index=1,
+ number=2, type=5, cpp_type=1, label=1,
+ has_default_value=False, default_value=0,
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='gray_scale', full_name='communicator_objects.ResolutionProto.gray_scale', index=2,
+ number=3, type=8, cpp_type=7, label=1,
+ has_default_value=False, default_value=False,
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ ],
+ extensions=[
+ ],
+ nested_types=[],
+ enum_types=[
+ ],
+ serialized_options=None,
+ is_extendable=False,
+ syntax='proto3',
+ extension_ranges=[],
+ oneofs=[
+ ],
+ serialized_start=78,
+ serialized_end=146,
+)
+
+DESCRIPTOR.message_types_by_name['ResolutionProto'] = _RESOLUTIONPROTO
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+ResolutionProto = _reflection.GeneratedProtocolMessageType('ResolutionProto', (_message.Message,), {
+ 'DESCRIPTOR' : _RESOLUTIONPROTO,
+ '__module__' : 'animalai.communicator_objects.resolution_proto_pb2'
+ # @@protoc_insertion_point(class_scope:communicator_objects.ResolutionProto)
+ })
+_sym_db.RegisterMessage(ResolutionProto)
+
+
+DESCRIPTOR._options = None
+# @@protoc_insertion_point(module_scope)
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/space_type_proto_pb2.py b/animalai_packages/animalai_envs/animalai/communicator_objects/space_type_proto_pb2.py
new file mode 100644
index 00000000..02a9b16e
--- /dev/null
+++ b/animalai_packages/animalai_envs/animalai/communicator_objects/space_type_proto_pb2.py
@@ -0,0 +1,61 @@
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler. DO NOT EDIT!
+# source: animalai/communicator_objects/space_type_proto.proto
+
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf.internal import enum_type_wrapper
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+from animalai.communicator_objects import resolution_proto_pb2 as animalai_dot_communicator__objects_dot_resolution__proto__pb2
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+ name='animalai/communicator_objects/space_type_proto.proto',
+ package='communicator_objects',
+ syntax='proto3',
+ serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
+ serialized_pb=_b('\n4animalai/communicator_objects/space_type_proto.proto\x12\x14\x63ommunicator_objects\x1a\x34\x61nimalai/communicator_objects/resolution_proto.proto*.\n\x0eSpaceTypeProto\x12\x0c\n\x08\x64iscrete\x10\x00\x12\x0e\n\ncontinuous\x10\x01\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
+ ,
+ dependencies=[animalai_dot_communicator__objects_dot_resolution__proto__pb2.DESCRIPTOR,])
+
+_SPACETYPEPROTO = _descriptor.EnumDescriptor(
+ name='SpaceTypeProto',
+ full_name='communicator_objects.SpaceTypeProto',
+ filename=None,
+ file=DESCRIPTOR,
+ values=[
+ _descriptor.EnumValueDescriptor(
+ name='discrete', index=0, number=0,
+ serialized_options=None,
+ type=None),
+ _descriptor.EnumValueDescriptor(
+ name='continuous', index=1, number=1,
+ serialized_options=None,
+ type=None),
+ ],
+ containing_type=None,
+ serialized_options=None,
+ serialized_start=132,
+ serialized_end=178,
+)
+_sym_db.RegisterEnumDescriptor(_SPACETYPEPROTO)
+
+SpaceTypeProto = enum_type_wrapper.EnumTypeWrapper(_SPACETYPEPROTO)
+discrete = 0
+continuous = 1
+
+
+DESCRIPTOR.enum_types_by_name['SpaceTypeProto'] = _SPACETYPEPROTO
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+
+DESCRIPTOR._options = None
+# @@protoc_insertion_point(module_scope)
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/unity_input_pb2.py b/animalai_packages/animalai_envs/animalai/communicator_objects/unity_input_pb2.py
new file mode 100644
index 00000000..a3f9d1c1
--- /dev/null
+++ b/animalai_packages/animalai_envs/animalai/communicator_objects/unity_input_pb2.py
@@ -0,0 +1,92 @@
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler. DO NOT EDIT!
+# source: animalai/communicator_objects/unity_input.proto
+
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+from animalai.communicator_objects import unity_rl_input_pb2 as animalai_dot_communicator__objects_dot_unity__rl__input__pb2
+from animalai.communicator_objects import unity_rl_initialization_input_pb2 as animalai_dot_communicator__objects_dot_unity__rl__initialization__input__pb2
+from animalai.communicator_objects import unity_rl_reset_input_pb2 as animalai_dot_communicator__objects_dot_unity__rl__reset__input__pb2
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+ name='animalai/communicator_objects/unity_input.proto',
+ package='communicator_objects',
+ syntax='proto3',
+ serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
+ serialized_pb=_b('\n/animalai/communicator_objects/unity_input.proto\x12\x14\x63ommunicator_objects\x1a\x32\x61nimalai/communicator_objects/unity_rl_input.proto\x1a\x41\x61nimalai/communicator_objects/unity_rl_initialization_input.proto\x1a\x38\x61nimalai/communicator_objects/unity_rl_reset_input.proto\"\xd6\x01\n\nUnityInput\x12\x34\n\x08rl_input\x18\x01 \x01(\x0b\x32\".communicator_objects.UnityRLInput\x12Q\n\x17rl_initialization_input\x18\x02 \x01(\x0b\x32\x30.communicator_objects.UnityRLInitializationInput\x12?\n\x0erl_reset_input\x18\x03 \x01(\x0b\x32\'.communicator_objects.UnityRLResetInputB\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
+ ,
+ dependencies=[animalai_dot_communicator__objects_dot_unity__rl__input__pb2.DESCRIPTOR,animalai_dot_communicator__objects_dot_unity__rl__initialization__input__pb2.DESCRIPTOR,animalai_dot_communicator__objects_dot_unity__rl__reset__input__pb2.DESCRIPTOR,])
+
+
+
+
+_UNITYINPUT = _descriptor.Descriptor(
+ name='UnityInput',
+ full_name='communicator_objects.UnityInput',
+ filename=None,
+ file=DESCRIPTOR,
+ containing_type=None,
+ fields=[
+ _descriptor.FieldDescriptor(
+ name='rl_input', full_name='communicator_objects.UnityInput.rl_input', index=0,
+ number=1, type=11, cpp_type=10, label=1,
+ has_default_value=False, default_value=None,
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='rl_initialization_input', full_name='communicator_objects.UnityInput.rl_initialization_input', index=1,
+ number=2, type=11, cpp_type=10, label=1,
+ has_default_value=False, default_value=None,
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='rl_reset_input', full_name='communicator_objects.UnityInput.rl_reset_input', index=2,
+ number=3, type=11, cpp_type=10, label=1,
+ has_default_value=False, default_value=None,
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ ],
+ extensions=[
+ ],
+ nested_types=[],
+ enum_types=[
+ ],
+ serialized_options=None,
+ is_extendable=False,
+ syntax='proto3',
+ extension_ranges=[],
+ oneofs=[
+ ],
+ serialized_start=251,
+ serialized_end=465,
+)
+
+_UNITYINPUT.fields_by_name['rl_input'].message_type = animalai_dot_communicator__objects_dot_unity__rl__input__pb2._UNITYRLINPUT
+_UNITYINPUT.fields_by_name['rl_initialization_input'].message_type = animalai_dot_communicator__objects_dot_unity__rl__initialization__input__pb2._UNITYRLINITIALIZATIONINPUT
+_UNITYINPUT.fields_by_name['rl_reset_input'].message_type = animalai_dot_communicator__objects_dot_unity__rl__reset__input__pb2._UNITYRLRESETINPUT
+DESCRIPTOR.message_types_by_name['UnityInput'] = _UNITYINPUT
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+UnityInput = _reflection.GeneratedProtocolMessageType('UnityInput', (_message.Message,), {
+ 'DESCRIPTOR' : _UNITYINPUT,
+ '__module__' : 'animalai.communicator_objects.unity_input_pb2'
+ # @@protoc_insertion_point(class_scope:communicator_objects.UnityInput)
+ })
+_sym_db.RegisterMessage(UnityInput)
+
+
+DESCRIPTOR._options = None
+# @@protoc_insertion_point(module_scope)
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/unity_message_pb2.py b/animalai_packages/animalai_envs/animalai/communicator_objects/unity_message_pb2.py
new file mode 100644
index 00000000..47deefdc
--- /dev/null
+++ b/animalai_packages/animalai_envs/animalai/communicator_objects/unity_message_pb2.py
@@ -0,0 +1,92 @@
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler. DO NOT EDIT!
+# source: animalai/communicator_objects/unity_message.proto
+
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+from animalai.communicator_objects import unity_output_pb2 as animalai_dot_communicator__objects_dot_unity__output__pb2
+from animalai.communicator_objects import unity_input_pb2 as animalai_dot_communicator__objects_dot_unity__input__pb2
+from animalai.communicator_objects import header_pb2 as animalai_dot_communicator__objects_dot_header__pb2
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+ name='animalai/communicator_objects/unity_message.proto',
+ package='communicator_objects',
+ syntax='proto3',
+ serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
+ serialized_pb=_b('\n1animalai/communicator_objects/unity_message.proto\x12\x14\x63ommunicator_objects\x1a\x30\x61nimalai/communicator_objects/unity_output.proto\x1a/animalai/communicator_objects/unity_input.proto\x1a*animalai/communicator_objects/header.proto\"\xac\x01\n\x0cUnityMessage\x12,\n\x06header\x18\x01 \x01(\x0b\x32\x1c.communicator_objects.Header\x12\x37\n\x0cunity_output\x18\x02 \x01(\x0b\x32!.communicator_objects.UnityOutput\x12\x35\n\x0bunity_input\x18\x03 \x01(\x0b\x32 .communicator_objects.UnityInputB\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
+ ,
+ dependencies=[animalai_dot_communicator__objects_dot_unity__output__pb2.DESCRIPTOR,animalai_dot_communicator__objects_dot_unity__input__pb2.DESCRIPTOR,animalai_dot_communicator__objects_dot_header__pb2.DESCRIPTOR,])
+
+
+
+
+_UNITYMESSAGE = _descriptor.Descriptor(
+ name='UnityMessage',
+ full_name='communicator_objects.UnityMessage',
+ filename=None,
+ file=DESCRIPTOR,
+ containing_type=None,
+ fields=[
+ _descriptor.FieldDescriptor(
+ name='header', full_name='communicator_objects.UnityMessage.header', index=0,
+ number=1, type=11, cpp_type=10, label=1,
+ has_default_value=False, default_value=None,
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='unity_output', full_name='communicator_objects.UnityMessage.unity_output', index=1,
+ number=2, type=11, cpp_type=10, label=1,
+ has_default_value=False, default_value=None,
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='unity_input', full_name='communicator_objects.UnityMessage.unity_input', index=2,
+ number=3, type=11, cpp_type=10, label=1,
+ has_default_value=False, default_value=None,
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ ],
+ extensions=[
+ ],
+ nested_types=[],
+ enum_types=[
+ ],
+ serialized_options=None,
+ is_extendable=False,
+ syntax='proto3',
+ extension_ranges=[],
+ oneofs=[
+ ],
+ serialized_start=219,
+ serialized_end=391,
+)
+
+_UNITYMESSAGE.fields_by_name['header'].message_type = animalai_dot_communicator__objects_dot_header__pb2._HEADER
+_UNITYMESSAGE.fields_by_name['unity_output'].message_type = animalai_dot_communicator__objects_dot_unity__output__pb2._UNITYOUTPUT
+_UNITYMESSAGE.fields_by_name['unity_input'].message_type = animalai_dot_communicator__objects_dot_unity__input__pb2._UNITYINPUT
+DESCRIPTOR.message_types_by_name['UnityMessage'] = _UNITYMESSAGE
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+UnityMessage = _reflection.GeneratedProtocolMessageType('UnityMessage', (_message.Message,), {
+ 'DESCRIPTOR' : _UNITYMESSAGE,
+ '__module__' : 'animalai.communicator_objects.unity_message_pb2'
+ # @@protoc_insertion_point(class_scope:communicator_objects.UnityMessage)
+ })
+_sym_db.RegisterMessage(UnityMessage)
+
+
+DESCRIPTOR._options = None
+# @@protoc_insertion_point(module_scope)
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/unity_output_pb2.py b/animalai_packages/animalai_envs/animalai/communicator_objects/unity_output_pb2.py
new file mode 100644
index 00000000..5beed76e
--- /dev/null
+++ b/animalai_packages/animalai_envs/animalai/communicator_objects/unity_output_pb2.py
@@ -0,0 +1,83 @@
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler. DO NOT EDIT!
+# source: animalai/communicator_objects/unity_output.proto
+
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+from animalai.communicator_objects import unity_rl_output_pb2 as animalai_dot_communicator__objects_dot_unity__rl__output__pb2
+from animalai.communicator_objects import unity_rl_initialization_output_pb2 as animalai_dot_communicator__objects_dot_unity__rl__initialization__output__pb2
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+ name='animalai/communicator_objects/unity_output.proto',
+ package='communicator_objects',
+ syntax='proto3',
+ serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
+ serialized_pb=_b('\n0animalai/communicator_objects/unity_output.proto\x12\x14\x63ommunicator_objects\x1a\x33\x61nimalai/communicator_objects/unity_rl_output.proto\x1a\x42\x61nimalai/communicator_objects/unity_rl_initialization_output.proto\"\x9a\x01\n\x0bUnityOutput\x12\x36\n\trl_output\x18\x01 \x01(\x0b\x32#.communicator_objects.UnityRLOutput\x12S\n\x18rl_initialization_output\x18\x02 \x01(\x0b\x32\x31.communicator_objects.UnityRLInitializationOutputB\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
+ ,
+ dependencies=[animalai_dot_communicator__objects_dot_unity__rl__output__pb2.DESCRIPTOR,animalai_dot_communicator__objects_dot_unity__rl__initialization__output__pb2.DESCRIPTOR,])
+
+
+
+
+_UNITYOUTPUT = _descriptor.Descriptor(
+ name='UnityOutput',
+ full_name='communicator_objects.UnityOutput',
+ filename=None,
+ file=DESCRIPTOR,
+ containing_type=None,
+ fields=[
+ _descriptor.FieldDescriptor(
+ name='rl_output', full_name='communicator_objects.UnityOutput.rl_output', index=0,
+ number=1, type=11, cpp_type=10, label=1,
+ has_default_value=False, default_value=None,
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='rl_initialization_output', full_name='communicator_objects.UnityOutput.rl_initialization_output', index=1,
+ number=2, type=11, cpp_type=10, label=1,
+ has_default_value=False, default_value=None,
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ ],
+ extensions=[
+ ],
+ nested_types=[],
+ enum_types=[
+ ],
+ serialized_options=None,
+ is_extendable=False,
+ syntax='proto3',
+ extension_ranges=[],
+ oneofs=[
+ ],
+ serialized_start=196,
+ serialized_end=350,
+)
+
+_UNITYOUTPUT.fields_by_name['rl_output'].message_type = animalai_dot_communicator__objects_dot_unity__rl__output__pb2._UNITYRLOUTPUT
+_UNITYOUTPUT.fields_by_name['rl_initialization_output'].message_type = animalai_dot_communicator__objects_dot_unity__rl__initialization__output__pb2._UNITYRLINITIALIZATIONOUTPUT
+DESCRIPTOR.message_types_by_name['UnityOutput'] = _UNITYOUTPUT
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+UnityOutput = _reflection.GeneratedProtocolMessageType('UnityOutput', (_message.Message,), {
+ 'DESCRIPTOR' : _UNITYOUTPUT,
+ '__module__' : 'animalai.communicator_objects.unity_output_pb2'
+ # @@protoc_insertion_point(class_scope:communicator_objects.UnityOutput)
+ })
+_sym_db.RegisterMessage(UnityOutput)
+
+
+DESCRIPTOR._options = None
+# @@protoc_insertion_point(module_scope)
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/unity_rl_initialization_input_pb2.py b/animalai_packages/animalai_envs/animalai/communicator_objects/unity_rl_initialization_input_pb2.py
new file mode 100644
index 00000000..9b16381b
--- /dev/null
+++ b/animalai_packages/animalai_envs/animalai/communicator_objects/unity_rl_initialization_input_pb2.py
@@ -0,0 +1,71 @@
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler. DO NOT EDIT!
+# source: animalai/communicator_objects/unity_rl_initialization_input.proto
+
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+ name='animalai/communicator_objects/unity_rl_initialization_input.proto',
+ package='communicator_objects',
+ syntax='proto3',
+ serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
+ serialized_pb=_b('\nAanimalai/communicator_objects/unity_rl_initialization_input.proto\x12\x14\x63ommunicator_objects\"*\n\x1aUnityRLInitializationInput\x12\x0c\n\x04seed\x18\x01 \x01(\x05\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
+)
+
+
+
+
+_UNITYRLINITIALIZATIONINPUT = _descriptor.Descriptor(
+ name='UnityRLInitializationInput',
+ full_name='communicator_objects.UnityRLInitializationInput',
+ filename=None,
+ file=DESCRIPTOR,
+ containing_type=None,
+ fields=[
+ _descriptor.FieldDescriptor(
+ name='seed', full_name='communicator_objects.UnityRLInitializationInput.seed', index=0,
+ number=1, type=5, cpp_type=1, label=1,
+ has_default_value=False, default_value=0,
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ ],
+ extensions=[
+ ],
+ nested_types=[],
+ enum_types=[
+ ],
+ serialized_options=None,
+ is_extendable=False,
+ syntax='proto3',
+ extension_ranges=[],
+ oneofs=[
+ ],
+ serialized_start=91,
+ serialized_end=133,
+)
+
+DESCRIPTOR.message_types_by_name['UnityRLInitializationInput'] = _UNITYRLINITIALIZATIONINPUT
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+UnityRLInitializationInput = _reflection.GeneratedProtocolMessageType('UnityRLInitializationInput', (_message.Message,), {
+ 'DESCRIPTOR' : _UNITYRLINITIALIZATIONINPUT,
+ '__module__' : 'animalai.communicator_objects.unity_rl_initialization_input_pb2'
+ # @@protoc_insertion_point(class_scope:communicator_objects.UnityRLInitializationInput)
+ })
+_sym_db.RegisterMessage(UnityRLInitializationInput)
+
+
+DESCRIPTOR._options = None
+# @@protoc_insertion_point(module_scope)
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/unity_rl_initialization_output_pb2.py b/animalai_packages/animalai_envs/animalai/communicator_objects/unity_rl_initialization_output_pb2.py
new file mode 100644
index 00000000..1042578f
--- /dev/null
+++ b/animalai_packages/animalai_envs/animalai/communicator_objects/unity_rl_initialization_output_pb2.py
@@ -0,0 +1,95 @@
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler. DO NOT EDIT!
+# source: animalai/communicator_objects/unity_rl_initialization_output.proto
+
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+from animalai.communicator_objects import brain_parameters_proto_pb2 as animalai_dot_communicator__objects_dot_brain__parameters__proto__pb2
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+ name='animalai/communicator_objects/unity_rl_initialization_output.proto',
+ package='communicator_objects',
+ syntax='proto3',
+ serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
+ serialized_pb=_b('\nBanimalai/communicator_objects/unity_rl_initialization_output.proto\x12\x14\x63ommunicator_objects\x1a:animalai/communicator_objects/brain_parameters_proto.proto\"\x94\x01\n\x1bUnityRLInitializationOutput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\t\x12\x10\n\x08log_path\x18\x03 \x01(\t\x12\x44\n\x10\x62rain_parameters\x18\x05 \x03(\x0b\x32*.communicator_objects.BrainParametersProtoB\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
+ ,
+ dependencies=[animalai_dot_communicator__objects_dot_brain__parameters__proto__pb2.DESCRIPTOR,])
+
+
+
+
+_UNITYRLINITIALIZATIONOUTPUT = _descriptor.Descriptor(
+ name='UnityRLInitializationOutput',
+ full_name='communicator_objects.UnityRLInitializationOutput',
+ filename=None,
+ file=DESCRIPTOR,
+ containing_type=None,
+ fields=[
+ _descriptor.FieldDescriptor(
+ name='name', full_name='communicator_objects.UnityRLInitializationOutput.name', index=0,
+ number=1, type=9, cpp_type=9, label=1,
+ has_default_value=False, default_value=_b("").decode('utf-8'),
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='version', full_name='communicator_objects.UnityRLInitializationOutput.version', index=1,
+ number=2, type=9, cpp_type=9, label=1,
+ has_default_value=False, default_value=_b("").decode('utf-8'),
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='log_path', full_name='communicator_objects.UnityRLInitializationOutput.log_path', index=2,
+ number=3, type=9, cpp_type=9, label=1,
+ has_default_value=False, default_value=_b("").decode('utf-8'),
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='brain_parameters', full_name='communicator_objects.UnityRLInitializationOutput.brain_parameters', index=3,
+ number=5, type=11, cpp_type=10, label=3,
+ has_default_value=False, default_value=[],
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ ],
+ extensions=[
+ ],
+ nested_types=[],
+ enum_types=[
+ ],
+ serialized_options=None,
+ is_extendable=False,
+ syntax='proto3',
+ extension_ranges=[],
+ oneofs=[
+ ],
+ serialized_start=153,
+ serialized_end=301,
+)
+
+_UNITYRLINITIALIZATIONOUTPUT.fields_by_name['brain_parameters'].message_type = animalai_dot_communicator__objects_dot_brain__parameters__proto__pb2._BRAINPARAMETERSPROTO
+DESCRIPTOR.message_types_by_name['UnityRLInitializationOutput'] = _UNITYRLINITIALIZATIONOUTPUT
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+UnityRLInitializationOutput = _reflection.GeneratedProtocolMessageType('UnityRLInitializationOutput', (_message.Message,), {
+ 'DESCRIPTOR' : _UNITYRLINITIALIZATIONOUTPUT,
+ '__module__' : 'animalai.communicator_objects.unity_rl_initialization_output_pb2'
+ # @@protoc_insertion_point(class_scope:communicator_objects.UnityRLInitializationOutput)
+ })
+_sym_db.RegisterMessage(UnityRLInitializationOutput)
+
+
+DESCRIPTOR._options = None
+# @@protoc_insertion_point(module_scope)
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/unity_rl_input_pb2.py b/animalai_packages/animalai_envs/animalai/communicator_objects/unity_rl_input_pb2.py
new file mode 100644
index 00000000..29225764
--- /dev/null
+++ b/animalai_packages/animalai_envs/animalai/communicator_objects/unity_rl_input_pb2.py
@@ -0,0 +1,178 @@
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler. DO NOT EDIT!
+# source: animalai/communicator_objects/unity_rl_input.proto
+
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+from animalai.communicator_objects import agent_action_proto_pb2 as animalai_dot_communicator__objects_dot_agent__action__proto__pb2
+from animalai.communicator_objects import command_proto_pb2 as animalai_dot_communicator__objects_dot_command__proto__pb2
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+ name='animalai/communicator_objects/unity_rl_input.proto',
+ package='communicator_objects',
+ syntax='proto3',
+ serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
+ serialized_pb=_b('\n2animalai/communicator_objects/unity_rl_input.proto\x12\x14\x63ommunicator_objects\x1a\x36\x61nimalai/communicator_objects/agent_action_proto.proto\x1a\x31\x61nimalai/communicator_objects/command_proto.proto\"\xe2\x02\n\x0cUnityRLInput\x12K\n\ragent_actions\x18\x01 \x03(\x0b\x32\x34.communicator_objects.UnityRLInput.AgentActionsEntry\x12\x13\n\x0bis_training\x18\x02 \x01(\x08\x12\x33\n\x07\x63ommand\x18\x03 \x01(\x0e\x32\".communicator_objects.CommandProto\x1aM\n\x14ListAgentActionProto\x12\x35\n\x05value\x18\x01 \x03(\x0b\x32&.communicator_objects.AgentActionProto\x1al\n\x11\x41gentActionsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x46\n\x05value\x18\x02 \x01(\x0b\x32\x37.communicator_objects.UnityRLInput.ListAgentActionProto:\x02\x38\x01\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
+ ,
+ dependencies=[animalai_dot_communicator__objects_dot_agent__action__proto__pb2.DESCRIPTOR,animalai_dot_communicator__objects_dot_command__proto__pb2.DESCRIPTOR,])
+
+
+
+
+_UNITYRLINPUT_LISTAGENTACTIONPROTO = _descriptor.Descriptor(
+ name='ListAgentActionProto',
+ full_name='communicator_objects.UnityRLInput.ListAgentActionProto',
+ filename=None,
+ file=DESCRIPTOR,
+ containing_type=None,
+ fields=[
+ _descriptor.FieldDescriptor(
+ name='value', full_name='communicator_objects.UnityRLInput.ListAgentActionProto.value', index=0,
+ number=1, type=11, cpp_type=10, label=3,
+ has_default_value=False, default_value=[],
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ ],
+ extensions=[
+ ],
+ nested_types=[],
+ enum_types=[
+ ],
+ serialized_options=None,
+ is_extendable=False,
+ syntax='proto3',
+ extension_ranges=[],
+ oneofs=[
+ ],
+ serialized_start=351,
+ serialized_end=428,
+)
+
+_UNITYRLINPUT_AGENTACTIONSENTRY = _descriptor.Descriptor(
+ name='AgentActionsEntry',
+ full_name='communicator_objects.UnityRLInput.AgentActionsEntry',
+ filename=None,
+ file=DESCRIPTOR,
+ containing_type=None,
+ fields=[
+ _descriptor.FieldDescriptor(
+ name='key', full_name='communicator_objects.UnityRLInput.AgentActionsEntry.key', index=0,
+ number=1, type=9, cpp_type=9, label=1,
+ has_default_value=False, default_value=_b("").decode('utf-8'),
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='value', full_name='communicator_objects.UnityRLInput.AgentActionsEntry.value', index=1,
+ number=2, type=11, cpp_type=10, label=1,
+ has_default_value=False, default_value=None,
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ ],
+ extensions=[
+ ],
+ nested_types=[],
+ enum_types=[
+ ],
+ serialized_options=_b('8\001'),
+ is_extendable=False,
+ syntax='proto3',
+ extension_ranges=[],
+ oneofs=[
+ ],
+ serialized_start=430,
+ serialized_end=538,
+)
+
+_UNITYRLINPUT = _descriptor.Descriptor(
+ name='UnityRLInput',
+ full_name='communicator_objects.UnityRLInput',
+ filename=None,
+ file=DESCRIPTOR,
+ containing_type=None,
+ fields=[
+ _descriptor.FieldDescriptor(
+ name='agent_actions', full_name='communicator_objects.UnityRLInput.agent_actions', index=0,
+ number=1, type=11, cpp_type=10, label=3,
+ has_default_value=False, default_value=[],
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='is_training', full_name='communicator_objects.UnityRLInput.is_training', index=1,
+ number=2, type=8, cpp_type=7, label=1,
+ has_default_value=False, default_value=False,
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='command', full_name='communicator_objects.UnityRLInput.command', index=2,
+ number=3, type=14, cpp_type=8, label=1,
+ has_default_value=False, default_value=0,
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ ],
+ extensions=[
+ ],
+ nested_types=[_UNITYRLINPUT_LISTAGENTACTIONPROTO, _UNITYRLINPUT_AGENTACTIONSENTRY, ],
+ enum_types=[
+ ],
+ serialized_options=None,
+ is_extendable=False,
+ syntax='proto3',
+ extension_ranges=[],
+ oneofs=[
+ ],
+ serialized_start=184,
+ serialized_end=538,
+)
+
+_UNITYRLINPUT_LISTAGENTACTIONPROTO.fields_by_name['value'].message_type = animalai_dot_communicator__objects_dot_agent__action__proto__pb2._AGENTACTIONPROTO
+_UNITYRLINPUT_LISTAGENTACTIONPROTO.containing_type = _UNITYRLINPUT
+_UNITYRLINPUT_AGENTACTIONSENTRY.fields_by_name['value'].message_type = _UNITYRLINPUT_LISTAGENTACTIONPROTO
+_UNITYRLINPUT_AGENTACTIONSENTRY.containing_type = _UNITYRLINPUT
+_UNITYRLINPUT.fields_by_name['agent_actions'].message_type = _UNITYRLINPUT_AGENTACTIONSENTRY
+_UNITYRLINPUT.fields_by_name['command'].enum_type = animalai_dot_communicator__objects_dot_command__proto__pb2._COMMANDPROTO
+DESCRIPTOR.message_types_by_name['UnityRLInput'] = _UNITYRLINPUT
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+UnityRLInput = _reflection.GeneratedProtocolMessageType('UnityRLInput', (_message.Message,), {
+
+ 'ListAgentActionProto' : _reflection.GeneratedProtocolMessageType('ListAgentActionProto', (_message.Message,), {
+ 'DESCRIPTOR' : _UNITYRLINPUT_LISTAGENTACTIONPROTO,
+ '__module__' : 'animalai.communicator_objects.unity_rl_input_pb2'
+ # @@protoc_insertion_point(class_scope:communicator_objects.UnityRLInput.ListAgentActionProto)
+ })
+ ,
+
+ 'AgentActionsEntry' : _reflection.GeneratedProtocolMessageType('AgentActionsEntry', (_message.Message,), {
+ 'DESCRIPTOR' : _UNITYRLINPUT_AGENTACTIONSENTRY,
+ '__module__' : 'animalai.communicator_objects.unity_rl_input_pb2'
+ # @@protoc_insertion_point(class_scope:communicator_objects.UnityRLInput.AgentActionsEntry)
+ })
+ ,
+ 'DESCRIPTOR' : _UNITYRLINPUT,
+ '__module__' : 'animalai.communicator_objects.unity_rl_input_pb2'
+ # @@protoc_insertion_point(class_scope:communicator_objects.UnityRLInput)
+ })
+_sym_db.RegisterMessage(UnityRLInput)
+_sym_db.RegisterMessage(UnityRLInput.ListAgentActionProto)
+_sym_db.RegisterMessage(UnityRLInput.AgentActionsEntry)
+
+
+DESCRIPTOR._options = None
+_UNITYRLINPUT_AGENTACTIONSENTRY._options = None
+# @@protoc_insertion_point(module_scope)
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/unity_rl_output_pb2.py b/animalai_packages/animalai_envs/animalai/communicator_objects/unity_rl_output_pb2.py
new file mode 100644
index 00000000..a35cdd20
--- /dev/null
+++ b/animalai_packages/animalai_envs/animalai/communicator_objects/unity_rl_output_pb2.py
@@ -0,0 +1,169 @@
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler. DO NOT EDIT!
+# source: animalai/communicator_objects/unity_rl_output.proto
+
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+from animalai.communicator_objects import agent_info_proto_pb2 as animalai_dot_communicator__objects_dot_agent__info__proto__pb2
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+ name='animalai/communicator_objects/unity_rl_output.proto',
+ package='communicator_objects',
+ syntax='proto3',
+ serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
+ serialized_pb=_b('\n3animalai/communicator_objects/unity_rl_output.proto\x12\x14\x63ommunicator_objects\x1a\x34\x61nimalai/communicator_objects/agent_info_proto.proto\"\xa3\x02\n\rUnityRLOutput\x12\x13\n\x0bglobal_done\x18\x01 \x01(\x08\x12G\n\nagentInfos\x18\x02 \x03(\x0b\x32\x33.communicator_objects.UnityRLOutput.AgentInfosEntry\x1aI\n\x12ListAgentInfoProto\x12\x33\n\x05value\x18\x01 \x03(\x0b\x32$.communicator_objects.AgentInfoProto\x1ai\n\x0f\x41gentInfosEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x45\n\x05value\x18\x02 \x01(\x0b\x32\x36.communicator_objects.UnityRLOutput.ListAgentInfoProto:\x02\x38\x01\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
+ ,
+ dependencies=[animalai_dot_communicator__objects_dot_agent__info__proto__pb2.DESCRIPTOR,])
+
+
+
+
+_UNITYRLOUTPUT_LISTAGENTINFOPROTO = _descriptor.Descriptor(
+ name='ListAgentInfoProto',
+ full_name='communicator_objects.UnityRLOutput.ListAgentInfoProto',
+ filename=None,
+ file=DESCRIPTOR,
+ containing_type=None,
+ fields=[
+ _descriptor.FieldDescriptor(
+ name='value', full_name='communicator_objects.UnityRLOutput.ListAgentInfoProto.value', index=0,
+ number=1, type=11, cpp_type=10, label=3,
+ has_default_value=False, default_value=[],
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ ],
+ extensions=[
+ ],
+ nested_types=[],
+ enum_types=[
+ ],
+ serialized_options=None,
+ is_extendable=False,
+ syntax='proto3',
+ extension_ranges=[],
+ oneofs=[
+ ],
+ serialized_start=243,
+ serialized_end=316,
+)
+
+_UNITYRLOUTPUT_AGENTINFOSENTRY = _descriptor.Descriptor(
+ name='AgentInfosEntry',
+ full_name='communicator_objects.UnityRLOutput.AgentInfosEntry',
+ filename=None,
+ file=DESCRIPTOR,
+ containing_type=None,
+ fields=[
+ _descriptor.FieldDescriptor(
+ name='key', full_name='communicator_objects.UnityRLOutput.AgentInfosEntry.key', index=0,
+ number=1, type=9, cpp_type=9, label=1,
+ has_default_value=False, default_value=_b("").decode('utf-8'),
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='value', full_name='communicator_objects.UnityRLOutput.AgentInfosEntry.value', index=1,
+ number=2, type=11, cpp_type=10, label=1,
+ has_default_value=False, default_value=None,
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ ],
+ extensions=[
+ ],
+ nested_types=[],
+ enum_types=[
+ ],
+ serialized_options=_b('8\001'),
+ is_extendable=False,
+ syntax='proto3',
+ extension_ranges=[],
+ oneofs=[
+ ],
+ serialized_start=318,
+ serialized_end=423,
+)
+
+_UNITYRLOUTPUT = _descriptor.Descriptor(
+ name='UnityRLOutput',
+ full_name='communicator_objects.UnityRLOutput',
+ filename=None,
+ file=DESCRIPTOR,
+ containing_type=None,
+ fields=[
+ _descriptor.FieldDescriptor(
+ name='global_done', full_name='communicator_objects.UnityRLOutput.global_done', index=0,
+ number=1, type=8, cpp_type=7, label=1,
+ has_default_value=False, default_value=False,
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='agentInfos', full_name='communicator_objects.UnityRLOutput.agentInfos', index=1,
+ number=2, type=11, cpp_type=10, label=3,
+ has_default_value=False, default_value=[],
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ ],
+ extensions=[
+ ],
+ nested_types=[_UNITYRLOUTPUT_LISTAGENTINFOPROTO, _UNITYRLOUTPUT_AGENTINFOSENTRY, ],
+ enum_types=[
+ ],
+ serialized_options=None,
+ is_extendable=False,
+ syntax='proto3',
+ extension_ranges=[],
+ oneofs=[
+ ],
+ serialized_start=132,
+ serialized_end=423,
+)
+
+_UNITYRLOUTPUT_LISTAGENTINFOPROTO.fields_by_name['value'].message_type = animalai_dot_communicator__objects_dot_agent__info__proto__pb2._AGENTINFOPROTO
+_UNITYRLOUTPUT_LISTAGENTINFOPROTO.containing_type = _UNITYRLOUTPUT
+_UNITYRLOUTPUT_AGENTINFOSENTRY.fields_by_name['value'].message_type = _UNITYRLOUTPUT_LISTAGENTINFOPROTO
+_UNITYRLOUTPUT_AGENTINFOSENTRY.containing_type = _UNITYRLOUTPUT
+_UNITYRLOUTPUT.fields_by_name['agentInfos'].message_type = _UNITYRLOUTPUT_AGENTINFOSENTRY
+DESCRIPTOR.message_types_by_name['UnityRLOutput'] = _UNITYRLOUTPUT
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+UnityRLOutput = _reflection.GeneratedProtocolMessageType('UnityRLOutput', (_message.Message,), {
+
+ 'ListAgentInfoProto' : _reflection.GeneratedProtocolMessageType('ListAgentInfoProto', (_message.Message,), {
+ 'DESCRIPTOR' : _UNITYRLOUTPUT_LISTAGENTINFOPROTO,
+ '__module__' : 'animalai.communicator_objects.unity_rl_output_pb2'
+ # @@protoc_insertion_point(class_scope:communicator_objects.UnityRLOutput.ListAgentInfoProto)
+ })
+ ,
+
+ 'AgentInfosEntry' : _reflection.GeneratedProtocolMessageType('AgentInfosEntry', (_message.Message,), {
+ 'DESCRIPTOR' : _UNITYRLOUTPUT_AGENTINFOSENTRY,
+ '__module__' : 'animalai.communicator_objects.unity_rl_output_pb2'
+ # @@protoc_insertion_point(class_scope:communicator_objects.UnityRLOutput.AgentInfosEntry)
+ })
+ ,
+ 'DESCRIPTOR' : _UNITYRLOUTPUT,
+ '__module__' : 'animalai.communicator_objects.unity_rl_output_pb2'
+ # @@protoc_insertion_point(class_scope:communicator_objects.UnityRLOutput)
+ })
+_sym_db.RegisterMessage(UnityRLOutput)
+_sym_db.RegisterMessage(UnityRLOutput.ListAgentInfoProto)
+_sym_db.RegisterMessage(UnityRLOutput.AgentInfosEntry)
+
+
+DESCRIPTOR._options = None
+_UNITYRLOUTPUT_AGENTINFOSENTRY._options = None
+# @@protoc_insertion_point(module_scope)
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/unity_rl_reset_input_pb2.py b/animalai_packages/animalai_envs/animalai/communicator_objects/unity_rl_reset_input_pb2.py
new file mode 100644
index 00000000..e07a7286
--- /dev/null
+++ b/animalai_packages/animalai_envs/animalai/communicator_objects/unity_rl_reset_input_pb2.py
@@ -0,0 +1,122 @@
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler. DO NOT EDIT!
+# source: animalai/communicator_objects/unity_rl_reset_input.proto
+
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+from animalai.communicator_objects import arena_parameters_proto_pb2 as animalai_dot_communicator__objects_dot_arena__parameters__proto__pb2
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+ name='animalai/communicator_objects/unity_rl_reset_input.proto',
+ package='communicator_objects',
+ syntax='proto3',
+ serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
+ serialized_pb=_b('\n8animalai/communicator_objects/unity_rl_reset_input.proto\x12\x14\x63ommunicator_objects\x1a:animalai/communicator_objects/arena_parameters_proto.proto\"\xb3\x01\n\x11UnityRLResetInput\x12\x43\n\x06\x61renas\x18\x01 \x03(\x0b\x32\x33.communicator_objects.UnityRLResetInput.ArenasEntry\x1aY\n\x0b\x41renasEntry\x12\x0b\n\x03key\x18\x01 \x01(\x05\x12\x39\n\x05value\x18\x02 \x01(\x0b\x32*.communicator_objects.ArenaParametersProto:\x02\x38\x01\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
+ ,
+ dependencies=[animalai_dot_communicator__objects_dot_arena__parameters__proto__pb2.DESCRIPTOR,])
+
+
+
+
+_UNITYRLRESETINPUT_ARENASENTRY = _descriptor.Descriptor(
+ name='ArenasEntry',
+ full_name='communicator_objects.UnityRLResetInput.ArenasEntry',
+ filename=None,
+ file=DESCRIPTOR,
+ containing_type=None,
+ fields=[
+ _descriptor.FieldDescriptor(
+ name='key', full_name='communicator_objects.UnityRLResetInput.ArenasEntry.key', index=0,
+ number=1, type=5, cpp_type=1, label=1,
+ has_default_value=False, default_value=0,
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ _descriptor.FieldDescriptor(
+ name='value', full_name='communicator_objects.UnityRLResetInput.ArenasEntry.value', index=1,
+ number=2, type=11, cpp_type=10, label=1,
+ has_default_value=False, default_value=None,
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ ],
+ extensions=[
+ ],
+ nested_types=[],
+ enum_types=[
+ ],
+ serialized_options=_b('8\001'),
+ is_extendable=False,
+ syntax='proto3',
+ extension_ranges=[],
+ oneofs=[
+ ],
+ serialized_start=233,
+ serialized_end=322,
+)
+
+_UNITYRLRESETINPUT = _descriptor.Descriptor(
+ name='UnityRLResetInput',
+ full_name='communicator_objects.UnityRLResetInput',
+ filename=None,
+ file=DESCRIPTOR,
+ containing_type=None,
+ fields=[
+ _descriptor.FieldDescriptor(
+ name='arenas', full_name='communicator_objects.UnityRLResetInput.arenas', index=0,
+ number=1, type=11, cpp_type=10, label=3,
+ has_default_value=False, default_value=[],
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ ],
+ extensions=[
+ ],
+ nested_types=[_UNITYRLRESETINPUT_ARENASENTRY, ],
+ enum_types=[
+ ],
+ serialized_options=None,
+ is_extendable=False,
+ syntax='proto3',
+ extension_ranges=[],
+ oneofs=[
+ ],
+ serialized_start=143,
+ serialized_end=322,
+)
+
+_UNITYRLRESETINPUT_ARENASENTRY.fields_by_name['value'].message_type = animalai_dot_communicator__objects_dot_arena__parameters__proto__pb2._ARENAPARAMETERSPROTO
+_UNITYRLRESETINPUT_ARENASENTRY.containing_type = _UNITYRLRESETINPUT
+_UNITYRLRESETINPUT.fields_by_name['arenas'].message_type = _UNITYRLRESETINPUT_ARENASENTRY
+DESCRIPTOR.message_types_by_name['UnityRLResetInput'] = _UNITYRLRESETINPUT
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+UnityRLResetInput = _reflection.GeneratedProtocolMessageType('UnityRLResetInput', (_message.Message,), {
+
+ 'ArenasEntry' : _reflection.GeneratedProtocolMessageType('ArenasEntry', (_message.Message,), {
+ 'DESCRIPTOR' : _UNITYRLRESETINPUT_ARENASENTRY,
+ '__module__' : 'animalai.communicator_objects.unity_rl_reset_input_pb2'
+ # @@protoc_insertion_point(class_scope:communicator_objects.UnityRLResetInput.ArenasEntry)
+ })
+ ,
+ 'DESCRIPTOR' : _UNITYRLRESETINPUT,
+ '__module__' : 'animalai.communicator_objects.unity_rl_reset_input_pb2'
+ # @@protoc_insertion_point(class_scope:communicator_objects.UnityRLResetInput)
+ })
+_sym_db.RegisterMessage(UnityRLResetInput)
+_sym_db.RegisterMessage(UnityRLResetInput.ArenasEntry)
+
+
+DESCRIPTOR._options = None
+_UNITYRLRESETINPUT_ARENASENTRY._options = None
+# @@protoc_insertion_point(module_scope)
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/unity_rl_reset_output_pb2.py b/animalai_packages/animalai_envs/animalai/communicator_objects/unity_rl_reset_output_pb2.py
new file mode 100644
index 00000000..4b5a24c2
--- /dev/null
+++ b/animalai_packages/animalai_envs/animalai/communicator_objects/unity_rl_reset_output_pb2.py
@@ -0,0 +1,71 @@
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler. DO NOT EDIT!
+# source: animalai/communicator_objects/unity_rl_reset_output.proto
+
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+ name='animalai/communicator_objects/unity_rl_reset_output.proto',
+ package='communicator_objects',
+ syntax='proto3',
+ serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
+ serialized_pb=_b('\n9animalai/communicator_objects/unity_rl_reset_output.proto\x12\x14\x63ommunicator_objects\"1\n\x12UnityRLResetOutput\x12\x1b\n\x13\x61renas_instanciated\x18\x01 \x03(\x08\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
+)
+
+
+
+
+_UNITYRLRESETOUTPUT = _descriptor.Descriptor(
+ name='UnityRLResetOutput',
+ full_name='communicator_objects.UnityRLResetOutput',
+ filename=None,
+ file=DESCRIPTOR,
+ containing_type=None,
+ fields=[
+ _descriptor.FieldDescriptor(
+ name='arenas_instanciated', full_name='communicator_objects.UnityRLResetOutput.arenas_instanciated', index=0,
+ number=1, type=8, cpp_type=7, label=3,
+ has_default_value=False, default_value=[],
+ message_type=None, enum_type=None, containing_type=None,
+ is_extension=False, extension_scope=None,
+ serialized_options=None, file=DESCRIPTOR),
+ ],
+ extensions=[
+ ],
+ nested_types=[],
+ enum_types=[
+ ],
+ serialized_options=None,
+ is_extendable=False,
+ syntax='proto3',
+ extension_ranges=[],
+ oneofs=[
+ ],
+ serialized_start=83,
+ serialized_end=132,
+)
+
+DESCRIPTOR.message_types_by_name['UnityRLResetOutput'] = _UNITYRLRESETOUTPUT
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+UnityRLResetOutput = _reflection.GeneratedProtocolMessageType('UnityRLResetOutput', (_message.Message,), {
+ 'DESCRIPTOR' : _UNITYRLRESETOUTPUT,
+ '__module__' : 'animalai.communicator_objects.unity_rl_reset_output_pb2'
+ # @@protoc_insertion_point(class_scope:communicator_objects.UnityRLResetOutput)
+ })
+_sym_db.RegisterMessage(UnityRLResetOutput)
+
+
+DESCRIPTOR._options = None
+# @@protoc_insertion_point(module_scope)
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/unity_to_external_pb2.py b/animalai_packages/animalai_envs/animalai/communicator_objects/unity_to_external_pb2.py
new file mode 100644
index 00000000..852b3813
--- /dev/null
+++ b/animalai_packages/animalai_envs/animalai/communicator_objects/unity_to_external_pb2.py
@@ -0,0 +1,57 @@
+# Generated by the protocol buffer compiler. DO NOT EDIT!
+# source: animalai/communicator_objects/unity_to_external.proto
+
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+from animalai.communicator_objects import unity_message_pb2 as animalai_dot_communicator__objects_dot_unity__message__pb2
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+ name='animalai/communicator_objects/unity_to_external.proto',
+ package='communicator_objects',
+ syntax='proto3',
+ serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
+ serialized_pb=_b('\n5animalai/communicator_objects/unity_to_external.proto\x12\x14\x63ommunicator_objects\x1a\x31\x61nimalai/communicator_objects/unity_message.proto2g\n\x0fUnityToExternal\x12T\n\x08\x45xchange\x12\".communicator_objects.UnityMessage\x1a\".communicator_objects.UnityMessage\"\x00\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
+ ,
+ dependencies=[animalai_dot_communicator__objects_dot_unity__message__pb2.DESCRIPTOR,])
+
+
+
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+
+DESCRIPTOR._options = None
+
+_UNITYTOEXTERNAL = _descriptor.ServiceDescriptor(
+ name='UnityToExternal',
+ full_name='communicator_objects.UnityToExternal',
+ file=DESCRIPTOR,
+ index=0,
+ serialized_options=None,
+ serialized_start=130,
+ serialized_end=233,
+ methods=[
+ _descriptor.MethodDescriptor(
+ name='Exchange',
+ full_name='communicator_objects.UnityToExternal.Exchange',
+ index=0,
+ containing_service=None,
+ input_type=animalai_dot_communicator__objects_dot_unity__message__pb2._UNITYMESSAGE,
+ output_type=animalai_dot_communicator__objects_dot_unity__message__pb2._UNITYMESSAGE,
+ serialized_options=None,
+ ),
+])
+_sym_db.RegisterServiceDescriptor(_UNITYTOEXTERNAL)
+
+DESCRIPTOR.services_by_name['UnityToExternal'] = _UNITYTOEXTERNAL
+
+# @@protoc_insertion_point(module_scope)
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/unity_to_external_pb2_grpc.py b/animalai_packages/animalai_envs/animalai/communicator_objects/unity_to_external_pb2_grpc.py
new file mode 100644
index 00000000..f1406fa1
--- /dev/null
+++ b/animalai_packages/animalai_envs/animalai/communicator_objects/unity_to_external_pb2_grpc.py
@@ -0,0 +1,46 @@
+# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
+import grpc
+
+from animalai.communicator_objects import unity_message_pb2 as animalai_dot_communicator__objects_dot_unity__message__pb2
+
+
+class UnityToExternalStub(object):
+ # missing associated documentation comment in .proto file
+ pass
+
+ def __init__(self, channel):
+ """Constructor.
+
+ Args:
+ channel: A grpc.Channel.
+ """
+ self.Exchange = channel.unary_unary(
+ '/communicator_objects.UnityToExternal/Exchange',
+ request_serializer=animalai_dot_communicator__objects_dot_unity__message__pb2.UnityMessage.SerializeToString,
+ response_deserializer=animalai_dot_communicator__objects_dot_unity__message__pb2.UnityMessage.FromString,
+ )
+
+
+class UnityToExternalServicer(object):
+ # missing associated documentation comment in .proto file
+ pass
+
+ def Exchange(self, request, context):
+ """Sends the academy parameters
+ """
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+
+def add_UnityToExternalServicer_to_server(servicer, server):
+ rpc_method_handlers = {
+ 'Exchange': grpc.unary_unary_rpc_method_handler(
+ servicer.Exchange,
+ request_deserializer=animalai_dot_communicator__objects_dot_unity__message__pb2.UnityMessage.FromString,
+ response_serializer=animalai_dot_communicator__objects_dot_unity__message__pb2.UnityMessage.SerializeToString,
+ ),
+ }
+ generic_handler = grpc.method_handlers_generic_handler(
+ 'communicator_objects.UnityToExternal', rpc_method_handlers)
+ server.add_generic_rpc_handlers((generic_handler,))
diff --git a/animalai_packages/animalai_envs/animalai/envs/__init__.py b/animalai_packages/animalai_envs/animalai/envs/__init__.py
new file mode 100644
index 00000000..93960d2f
--- /dev/null
+++ b/animalai_packages/animalai_envs/animalai/envs/__init__.py
@@ -0,0 +1,3 @@
+from .environment import *
+from .brain import *
+from .exception import *
diff --git a/animalai_packages/animalai_envs/animalai/envs/arena_config.py b/animalai_packages/animalai_envs/animalai/envs/arena_config.py
new file mode 100644
index 00000000..8e48b1a0
--- /dev/null
+++ b/animalai_packages/animalai_envs/animalai/envs/arena_config.py
@@ -0,0 +1,115 @@
+import json
+import jsonpickle
+import yaml
+import copy
+
+from animalai.communicator_objects import UnityRLResetInput, ArenaParametersProto
+
+yaml.Dumper.ignore_aliases = lambda *args: True
+
+
+class Vector3(yaml.YAMLObject):
+ yaml_tag = u'!Vector3'
+
+ def __init__(self, x=0, y=0, z=0):
+ self.x = x
+ self.y = y
+ self.z = z
+
+ def to_proto(self):
+ res = ArenaParametersProto.ItemsToSpawn.Vector3()
+ res.x = self.x
+ res.y = self.y
+ res.z = self.z
+
+ return res
+
+
+class RGB(yaml.YAMLObject):
+ yaml_tag = u'!RGB'
+
+ def __init__(self, r=0, g=0, b=0):
+ self.r = r
+ self.g = g
+ self.b = b
+
+ def to_proto(self):
+ res = ArenaParametersProto.ItemsToSpawn.Vector3()
+ res.x = self.r
+ res.y = self.g
+ res.z = self.b
+
+ return res
+
+
+class Item(yaml.YAMLObject):
+ yaml_tag = u'!Item'
+
+ def __init__(self, name='', positions=None, rotations=None, sizes=None, colors=None):
+ self.name = name
+ self.positions = positions if positions is not None else []
+ self.rotations = rotations if rotations is not None else []
+ self.sizes = sizes if sizes is not None else []
+ self.colors = colors if colors is not None else []
+
+
+class Arena(yaml.YAMLObject):
+ yaml_tag = u'!Arena'
+
+ def __init__(self, t=1000, items=None, blackouts=None):
+ self.t = t
+ self.items = items if items is not None else {}
+ self.blackouts = blackouts if blackouts is not None else []
+
+
+class ArenaConfig(yaml.YAMLObject):
+ yaml_tag = u'!ArenaConfig'
+
+ def __init__(self, yaml_path=None):
+
+ if yaml_path is not None:
+ self.arenas = yaml.load(open(yaml_path, 'r'), Loader=yaml.Loader).arenas
+ else:
+ self.arenas = {}
+
+ def save_config(self, json_path):
+ out = jsonpickle.encode(self.arenas)
+ out = json.loads(out)
+ json.dump(out, open(json_path, 'w'), indent=4)
+
+ def dict_to_arena_config(self) -> UnityRLResetInput:
+ config_out = UnityRLResetInput()
+
+ for k in self.arenas:
+ config_out.arenas[k].CopyFrom(ArenaParametersProto())
+ config_out.arenas[k].t = self.arenas[k].t
+ config_out.arenas[k].blackouts.extend(self.arenas[k].blackouts)
+ for item in self.arenas[k].items:
+ to_spawn = config_out.arenas[k].items.add()
+ to_spawn.name = item.name
+ to_spawn.positions.extend([v.to_proto() for v in item.positions])
+ to_spawn.rotations.extend(item.rotations)
+ to_spawn.sizes.extend([v.to_proto() for v in item.sizes])
+ to_spawn.colors.extend([v.to_proto() for v in item.colors])
+
+ return config_out
+
+ def update(self, arenas_configurations_input):
+
+ if arenas_configurations_input is not None:
+ for arena_i in arenas_configurations_input.arenas:
+ self.arenas[arena_i] = copy.copy(arenas_configurations_input.arenas[arena_i])
+
+
+def constructor_arena(loader, node):
+ fields = loader.construct_mapping(node)
+ return Arena(**fields)
+
+
+def constructor_item(loader, node):
+ fields = loader.construct_mapping(node)
+ return Item(**fields)
+
+
+yaml.add_constructor(u'!Arena', constructor_arena)
+yaml.add_constructor(u'!Item', constructor_item)
diff --git a/animalai_packages/animalai_envs/animalai/envs/brain.py b/animalai_packages/animalai_envs/animalai/envs/brain.py
new file mode 100644
index 00000000..06940fd2
--- /dev/null
+++ b/animalai_packages/animalai_envs/animalai/envs/brain.py
@@ -0,0 +1,149 @@
+import logging
+import numpy as np
+import io
+
+from typing import Dict
+from PIL import Image
+
+logger = logging.getLogger("mlagents.envs")
+
+
+class BrainInfo:
+ def __init__(self, visual_observation, vector_observation, text_observations, memory=None,
+ reward=None, agents=None, local_done=None,
+ vector_action=None, text_action=None, max_reached=None, action_mask=None):
+ """
+ Describes experience at current step of all agents linked to a brain.
+ """
+ self.visual_observations = visual_observation
+ self.vector_observations = vector_observation
+ self.text_observations = text_observations
+ self.memories = memory
+ self.rewards = reward
+ self.local_done = local_done
+ self.max_reached = max_reached
+ self.agents = agents
+ self.previous_vector_actions = vector_action
+ self.previous_text_actions = text_action
+ self.action_masks = action_mask
+
+ @staticmethod
+ def process_pixels(image_bytes, gray_scale):
+ """
+ Converts byte array observation image into numpy array, re-sizes it,
+ and optionally converts it to grey scale
+ :param gray_scale: Whether to convert the image to grayscale.
+ :param image_bytes: input byte array corresponding to image
+ :return: processed numpy array of observation from environment
+ """
+ s = bytearray(image_bytes)
+ image = Image.open(io.BytesIO(s))
+ s = np.array(image) / 255.0
+ if gray_scale:
+ s = np.mean(s, axis=2)
+ s = np.reshape(s, [s.shape[0], s.shape[1], 1])
+ return s
+
+ @staticmethod
+ def from_agent_proto(agent_info_list, brain_params):
+ """
+ Converts list of agent infos to BrainInfo.
+ """
+ vis_obs = []
+ for i in range(brain_params.number_visual_observations):
+ obs = [BrainInfo.process_pixels(x.visual_observations[i],
+ brain_params.camera_resolutions[i]['blackAndWhite'])
+ for x in agent_info_list]
+ vis_obs += [np.array(obs)]
+ if len(agent_info_list) == 0:
+ memory_size = 0
+ else:
+ memory_size = max([len(x.memories) for x in agent_info_list])
+ if memory_size == 0:
+ memory = np.zeros((0, 0))
+ else:
+ [x.memories.extend([0] * (memory_size - len(x.memories))) for x in agent_info_list]
+ memory = np.array([x.memories for x in agent_info_list])
+ total_num_actions = sum(brain_params.vector_action_space_size)
+ mask_actions = np.ones((len(agent_info_list), total_num_actions))
+ for agent_index, agent_info in enumerate(agent_info_list):
+ if agent_info.action_mask is not None:
+ if len(agent_info.action_mask) == total_num_actions:
+ mask_actions[agent_index, :] = [
+ 0 if agent_info.action_mask[k] else 1 for k in range(total_num_actions)]
+ if any([np.isnan(x.reward) for x in agent_info_list]):
+ logger.warning("An agent had a NaN reward for brain " + brain_params.brain_name)
+ if any([np.isnan(x.stacked_vector_observation).any() for x in agent_info_list]):
+ logger.warning("An agent had a NaN observation for brain " + brain_params.brain_name)
+ brain_info = BrainInfo(
+ visual_observation=vis_obs,
+ vector_observation=np.nan_to_num(
+ np.array([x.stacked_vector_observation for x in agent_info_list])),
+ text_observations=[x.text_observation for x in agent_info_list],
+ memory=memory,
+ reward=[x.reward if not np.isnan(x.reward) else 0 for x in agent_info_list],
+ agents=[x.id for x in agent_info_list],
+ local_done=[x.done for x in agent_info_list],
+ vector_action=np.array([x.stored_vector_actions for x in agent_info_list]),
+ text_action=[x.stored_text_actions for x in agent_info_list],
+ max_reached=[x.max_step_reached for x in agent_info_list],
+ action_mask=mask_actions
+ )
+ return brain_info
+
+
+# Renaming of dictionary of brain name to BrainInfo for clarity
+AllBrainInfo = Dict[str, BrainInfo]
+
+
+class BrainParameters:
+ def __init__(self, brain_name, vector_observation_space_size, num_stacked_vector_observations,
+ camera_resolutions, vector_action_space_size,
+ vector_action_descriptions, vector_action_space_type):
+ """
+ Contains all brain-specific parameters.
+ """
+ self.brain_name = brain_name
+ self.vector_observation_space_size = vector_observation_space_size
+ self.num_stacked_vector_observations = num_stacked_vector_observations
+ self.number_visual_observations = len(camera_resolutions)
+ self.camera_resolutions = camera_resolutions
+ self.vector_action_space_size = vector_action_space_size
+ self.vector_action_descriptions = vector_action_descriptions
+ self.vector_action_space_type = ["discrete", "continuous"][vector_action_space_type]
+
+ def __str__(self):
+ return '''Unity brain name: {}
+ Number of Visual Observations (per agent): {}
+ Vector Observation space size (per agent): {}
+ Number of stacked Vector Observation: {}
+ Vector Action space type: {}
+ Vector Action space size (per agent): {}
+ Vector Action descriptions: {}'''.format(self.brain_name,
+ str(self.number_visual_observations),
+ str(self.vector_observation_space_size),
+ str(self.num_stacked_vector_observations),
+ self.vector_action_space_type,
+ str(self.vector_action_space_size),
+ ', '.join(self.vector_action_descriptions))
+
+ @staticmethod
+ def from_proto(brain_param_proto):
+ """
+ Converts brain parameter proto to BrainParameter object.
+ :param brain_param_proto: protobuf object.
+ :return: BrainParameter object.
+ """
+ resolution = [{
+ "height": x.height,
+ "width": x.width,
+ "blackAndWhite": x.gray_scale
+ } for x in brain_param_proto.camera_resolutions]
+ brain_params = BrainParameters(brain_param_proto.brain_name,
+ brain_param_proto.vector_observation_size,
+ brain_param_proto.num_stacked_vector_observations,
+ resolution,
+ brain_param_proto.vector_action_size,
+ brain_param_proto.vector_action_descriptions,
+ brain_param_proto.vector_action_space_type)
+ return brain_params
diff --git a/animalai_packages/animalai_envs/animalai/envs/communicator.py b/animalai_packages/animalai_envs/animalai/envs/communicator.py
new file mode 100644
index 00000000..85b56db1
--- /dev/null
+++ b/animalai_packages/animalai_envs/animalai/envs/communicator.py
@@ -0,0 +1,35 @@
+import logging
+
+from animalai.communicator_objects import UnityOutput, UnityInput
+
+logger = logging.getLogger("mlagents.envs")
+
+
+class Communicator(object):
+ def __init__(self, worker_id=0, base_port=5005):
+ """
+ Python side of the communication. Must be used in pair with the right Unity Communicator equivalent.
+
+ :int base_port: Baseline port number to connect to Unity environment over. worker_id increments over this.
+ :int worker_id: Number to add to communication port (5005) [0]. Used for asynchronous agent scenarios.
+ """
+
+ def initialize(self, inputs: UnityInput) -> UnityOutput:
+ """
+ Used to exchange initialization parameters between Python and the Environment
+ :param inputs: The initialization input that will be sent to the environment.
+ :return: UnityOutput: The initialization output sent by Unity
+ """
+
+ def exchange(self, inputs: UnityInput) -> UnityOutput:
+ """
+ Used to send an input and receive an output from the Environment
+ :param inputs: The UnityInput that needs to be sent the Environment
+ :return: The UnityOutputs generated by the Environment
+ """
+
+ def close(self):
+ """
+ Sends a shutdown signal to the unity environment, and closes the connection.
+ """
+
diff --git a/animalai_packages/animalai_envs/animalai/envs/environment.py b/animalai_packages/animalai_envs/animalai/envs/environment.py
new file mode 100644
index 00000000..6c7fc0ae
--- /dev/null
+++ b/animalai_packages/animalai_envs/animalai/envs/environment.py
@@ -0,0 +1,491 @@
+import atexit
+import glob
+import logging
+import numpy as np
+import os
+import subprocess
+
+from .brain import AllBrainInfo, BrainInfo, BrainParameters
+from .exception import UnityEnvironmentException, UnityActionException, UnityTimeOutException
+
+from animalai.communicator_objects import UnityRLInput, UnityRLOutput, AgentActionProto, \
+ UnityRLInitializationInput, UnityRLInitializationOutput, \
+ UnityRLResetInput, UnityInput, UnityOutput
+
+from .rpc_communicator import RpcCommunicator
+from sys import platform
+from .arena_config import ArenaConfig
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("mlagents.envs")
+
+
+class UnityEnvironment(object):
+ SCALAR_ACTION_TYPES = (int, np.int32, np.int64, float, np.float32, np.float64)
+ SINGLE_BRAIN_ACTION_TYPES = SCALAR_ACTION_TYPES + (list, np.ndarray)
+ SINGLE_BRAIN_TEXT_TYPES = (str, list, np.ndarray)
+
+ def __init__(self, file_name=None,
+ worker_id=0,
+ base_port=5005,
+ seed=0,
+ docker_training=False,
+ n_arenas=1,
+ play=False,
+ arenas_configurations=None):
+ """
+ Starts a new unity environment and establishes a connection with the environment.
+ Notice: Currently communication between Unity and Python takes place over an open socket without authentication.
+ Ensure that the network where training takes place is secure.
+
+ :string file_name: Name of Unity environment binary.
+ :int base_port: Baseline port number to connect to Unity environment over. worker_id increments over this.
+ :int worker_id: Number to add to communication port (5005) [0]. Used for asynchronous agent scenarios.
+ :param docker_training: Informs this class whether the process is being run within a container.
+ """
+
+ atexit.register(self._close)
+ self.n_arenas = n_arenas
+ self.play = play
+ self.port = base_port + worker_id
+ self._buffer_size = 12000
+ self._version_ = "API-7"
+ self._loaded = False # If true, this means the environment was successfully loaded
+ self.proc1 = None # The process that is started. If None, no process was started
+ self.communicator = self.get_communicator(worker_id, base_port)
+ self.arenas_configurations = arenas_configurations if arenas_configurations is not None else ArenaConfig()
+
+ if file_name is not None:
+ self.executable_launcher(file_name, docker_training)
+ else:
+ logger.info("Start training by pressing the Play button in the Unity Editor.")
+ self._loaded = True
+
+ rl_init_parameters_in = UnityRLInitializationInput(
+ seed=seed
+ )
+ try:
+ aca_params = self.send_academy_parameters(rl_init_parameters_in)
+ except UnityTimeOutException:
+ self._close()
+ raise
+ # TODO : think of a better way to expose the academyParameters
+ self._unity_version = aca_params.version
+ if self._unity_version != self._version_:
+ raise UnityEnvironmentException(
+ "The API number is not compatible between Unity and python. Python API : {0}, Unity API : "
+ "{1}.\nPlease go to https://github.com/Unity-Technologies/ml-agents to download the latest version "
+ "of ML-Agents.".format(self._version_, self._unity_version))
+ self._n_agents = {}
+ self._global_done = None
+ self._academy_name = aca_params.name
+ self._log_path = aca_params.log_path
+ self._brains = {}
+ self._brain_names = []
+ self._external_brain_names = []
+ for brain_param in aca_params.brain_parameters:
+ self._brain_names += [brain_param.brain_name]
+ self._brains[brain_param.brain_name] = BrainParameters.from_proto(brain_param)
+ if brain_param.is_training:
+ self._external_brain_names += [brain_param.brain_name]
+ self._num_brains = len(self._brain_names)
+ self._num_external_brains = len(self._external_brain_names)
+ logger.info("\n'{0}' started successfully!\n{1}".format(self._academy_name, str(self)))
+ if self._num_external_brains == 0:
+ logger.warning(" No Learning Brains set to train found in the Unity Environment. "
+ "You will not be able to pass actions to your agent(s).")
+
+ @property
+ def logfile_path(self):
+ return self._log_path
+
+ @property
+ def brains(self):
+ return self._brains
+
+ @property
+ def global_done(self):
+ return self._global_done
+
+ @property
+ def academy_name(self):
+ return self._academy_name
+
+ @property
+ def number_brains(self):
+ return self._num_brains
+
+ @property
+ def number_external_brains(self):
+ return self._num_external_brains
+
+ @property
+ def brain_names(self):
+ return self._brain_names
+
+ @property
+ def external_brain_names(self):
+ return self._external_brain_names
+
+ def executable_launcher(self, file_name, docker_training):
+ cwd = os.getcwd()
+ file_name = (file_name.strip()
+ .replace('.app', '').replace('.exe', '').replace('.x86_64', '').replace('.x86',
+ ''))
+ true_filename = os.path.basename(os.path.normpath(file_name))
+ logger.debug('The true file name is {}'.format(true_filename))
+ launch_string = None
+ if platform == "linux" or platform == "linux2":
+ candidates = glob.glob(os.path.join(cwd, file_name) + '.x86_64')
+ if len(candidates) == 0:
+ candidates = glob.glob(os.path.join(cwd, file_name) + '.x86')
+ if len(candidates) == 0:
+ candidates = glob.glob(file_name + '.x86_64')
+ if len(candidates) == 0:
+ candidates = glob.glob(file_name + '.x86')
+ if len(candidates) > 0:
+ launch_string = candidates[0]
+
+ elif platform == 'darwin':
+ candidates = glob.glob(
+ os.path.join(cwd, file_name + '.app', 'Contents', 'MacOS', true_filename))
+ if len(candidates) == 0:
+ candidates = glob.glob(
+ os.path.join(file_name + '.app', 'Contents', 'MacOS', true_filename))
+ if len(candidates) == 0:
+ candidates = glob.glob(
+ os.path.join(cwd, file_name + '.app', 'Contents', 'MacOS', '*'))
+ if len(candidates) == 0:
+ candidates = glob.glob(os.path.join(file_name + '.app', 'Contents', 'MacOS', '*'))
+ if len(candidates) > 0:
+ launch_string = candidates[0]
+ elif platform == 'win32':
+ candidates = glob.glob(os.path.join(cwd, file_name + '.exe'))
+ if len(candidates) == 0:
+ candidates = glob.glob(file_name + '.exe')
+ if len(candidates) > 0:
+ launch_string = candidates[0]
+ if launch_string is None:
+ self._close()
+ raise UnityEnvironmentException("Couldn't launch the {0} environment. "
+ "Provided filename does not match any environments."
+ .format(true_filename))
+ else:
+ logger.debug("This is the launch string {}".format(launch_string))
+ # Launch Unity environment
+ if not docker_training:
+ if not self.play:
+ self.proc1 = subprocess.Popen(
+ [launch_string, '--port', str(self.port), '--nArenas', str(self.n_arenas)])
+ else:
+ self.proc1 = subprocess.Popen(
+ [launch_string, '--port', str(self.port)])
+
+ else:
+ """
+ Comments for future maintenance:
+ xvfb-run is a wrapper around Xvfb, a virtual xserver where all
+ rendering is done to virtual memory. It automatically creates a
+ new virtual server automatically picking a server number `auto-servernum`.
+ The server is passed the arguments using `server-args`, we are telling
+ Xvfb to create Screen number 0 with width 640, height 480 and depth 24 bits.
+ Note that 640 X 480 are the default width and height. The main reason for
+ us to add this is because we'd like to change the depth from the default
+ of 8 bits to 24.
+ Unfortunately, this means that we will need to pass the arguments through
+ a shell which is why we set `shell=True`. Now, this adds its own
+ complications. E.g SIGINT can bounce off the shell and not get propagated
+ to the child processes. This is why we add `exec`, so that the shell gets
+ launched, the arguments are passed to `xvfb-run`. `exec` replaces the shell
+ we created with `xvfb`.
+ """
+ docker_ls = ("exec xvfb-run --auto-servernum"
+ " --server-args='-screen 0 640x480x24'"
+ " {0} --port {1} --nArenas {2}").format(launch_string, str(self.port), str(self.n_arenas))
+ self.proc1 = subprocess.Popen(docker_ls,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ shell=True)
+
+ def get_communicator(self, worker_id, base_port):
+ return RpcCommunicator(worker_id, base_port)
+ # return SocketCommunicator(worker_id, base_port)
+
+ def __str__(self):
+ return '''Unity Academy name: {0}
+ Number of Brains: {1}
+ Number of Training Brains : {2}'''.format(self._academy_name, str(self._num_brains),
+ str(self._num_external_brains))
+
+ def reset(self, arenas_configurations_input=None, train_mode=True) -> AllBrainInfo:
+ """
+ Sends a signal to reset the unity environment.
+ :return: AllBrainInfo : A data structure corresponding to the initial reset state of the environment.
+ """
+ if self._loaded:
+ self.arenas_configurations.update(arenas_configurations_input)
+
+ outputs = self.communicator.exchange(
+ self._generate_reset_input(train_mode, arenas_configurations_input)
+ )
+ if outputs is None:
+ raise KeyboardInterrupt
+ rl_output = outputs.rl_output
+ s = self._get_state(rl_output)
+ self._global_done = s[1]
+ for _b in self._external_brain_names:
+ self._n_agents[_b] = len(s[0][_b].agents)
+ return s[0]
+ else:
+ raise UnityEnvironmentException("No Unity environment is loaded.")
+
+ def step(self, vector_action=None, memory=None, text_action=None, value=None, step_number=0) -> AllBrainInfo:
+ """
+ Provides the environment with an action, moves the environment dynamics forward accordingly,
+ and returns observation, state, and reward information to the agent.
+ :param value: Value estimates provided by agents.
+ :param vector_action: Agent's vector action. Can be a scalar or vector of int/floats.
+ :param memory: Vector corresponding to memory used for recurrent policies.
+ :param text_action: Text action to send to environment for.
+ :return: AllBrainInfo : A Data structure corresponding to the new state of the environment.
+ """
+ vector_action = {} if vector_action is None else vector_action
+ memory = {} if memory is None else memory
+ text_action = {} if text_action is None else text_action
+ value = {} if value is None else value
+
+ # Check that environment is loaded, and episode is currently running.
+ if self._loaded and not self._global_done and self._global_done is not None:
+ if isinstance(vector_action, self.SINGLE_BRAIN_ACTION_TYPES):
+ if self._num_external_brains == 1:
+ vector_action = {self._external_brain_names[0]: vector_action}
+ elif self._num_external_brains > 1:
+ raise UnityActionException(
+ "You have {0} brains, you need to feed a dictionary of brain names a keys, "
+ "and vector_actions as values".format(self._num_brains))
+ else:
+ raise UnityActionException(
+ "There are no external brains in the environment, "
+ "step cannot take a vector_action input")
+
+ if isinstance(memory, self.SINGLE_BRAIN_ACTION_TYPES):
+ if self._num_external_brains == 1:
+ memory = {self._external_brain_names[0]: memory}
+ elif self._num_external_brains > 1:
+ raise UnityActionException(
+ "You have {0} brains, you need to feed a dictionary of brain names as keys "
+ "and memories as values".format(self._num_brains))
+ else:
+ raise UnityActionException(
+ "There are no external brains in the environment, "
+ "step cannot take a memory input")
+
+ if isinstance(text_action, self.SINGLE_BRAIN_TEXT_TYPES):
+ if self._num_external_brains == 1:
+ text_action = {self._external_brain_names[0]: text_action}
+ elif self._num_external_brains > 1:
+ raise UnityActionException(
+ "You have {0} brains, you need to feed a dictionary of brain names as keys "
+ "and text_actions as values".format(self._num_brains))
+ else:
+ raise UnityActionException(
+ "There are no external brains in the environment, "
+ "step cannot take a value input")
+
+ if isinstance(value, self.SINGLE_BRAIN_ACTION_TYPES):
+ if self._num_external_brains == 1:
+ value = {self._external_brain_names[0]: value}
+ elif self._num_external_brains > 1:
+ raise UnityActionException(
+ "You have {0} brains, you need to feed a dictionary of brain names as keys "
+ "and state/action value estimates as values".format(self._num_brains))
+ else:
+ raise UnityActionException(
+ "There are no external brains in the environment, "
+ "step cannot take a value input")
+
+ for brain_name in list(vector_action.keys()) + list(memory.keys()) + list(
+ text_action.keys()):
+ if brain_name not in self._external_brain_names:
+ raise UnityActionException(
+ "The name {0} does not correspond to an external brain "
+ "in the environment".format(brain_name))
+
+ for brain_name in self._external_brain_names:
+ n_agent = self._n_agents[brain_name]
+ if brain_name not in vector_action:
+ if self._brains[brain_name].vector_action_space_type == "discrete":
+ vector_action[brain_name] = [0.0] * n_agent * len(
+ self._brains[brain_name].vector_action_space_size)
+ else:
+ vector_action[brain_name] = [0.0] * n_agent * \
+ self._brains[
+ brain_name].vector_action_space_size[0]
+ else:
+ vector_action[brain_name] = self._flatten(vector_action[brain_name])
+ if brain_name not in memory:
+ memory[brain_name] = []
+ else:
+ if memory[brain_name] is None:
+ memory[brain_name] = []
+ else:
+ memory[brain_name] = self._flatten(memory[brain_name])
+ if brain_name not in text_action:
+ text_action[brain_name] = [""] * n_agent
+ else:
+ if text_action[brain_name] is None:
+ text_action[brain_name] = [""] * n_agent
+ if isinstance(text_action[brain_name], str):
+ text_action[brain_name] = [text_action[brain_name]] * n_agent
+
+ number_text_actions = len(text_action[brain_name])
+ if not ((number_text_actions == n_agent) or number_text_actions == 0):
+ raise UnityActionException(
+ "There was a mismatch between the provided text_action and "
+ "the environment's expectation: "
+ "The brain {0} expected {1} text_action but was given {2}".format(
+ brain_name, n_agent, number_text_actions))
+
+ discrete_check = self._brains[brain_name].vector_action_space_type == "discrete"
+
+ expected_discrete_size = n_agent * len(
+ self._brains[brain_name].vector_action_space_size)
+
+ continuous_check = self._brains[brain_name].vector_action_space_type == "continuous"
+
+ expected_continuous_size = self._brains[brain_name].vector_action_space_size[
+ 0] * n_agent
+
+ if not ((discrete_check and len(
+ vector_action[brain_name]) == expected_discrete_size) or
+ (continuous_check and len(
+ vector_action[brain_name]) == expected_continuous_size)):
+ raise UnityActionException(
+ "There was a mismatch between the provided action and "
+ "the environment's expectation: "
+ "The brain {0} expected {1} {2} action(s), but was provided: {3}"
+ .format(brain_name, str(expected_discrete_size)
+ if discrete_check
+ else str(expected_continuous_size),
+ self._brains[brain_name].vector_action_space_type,
+ str(vector_action[brain_name])))
+
+ outputs = self.communicator.exchange(
+ self._generate_step_input(vector_action, memory, text_action, value))
+ if outputs is None:
+ raise KeyboardInterrupt
+ rl_output = outputs.rl_output
+ state = self._get_state(rl_output)
+ self._global_done = state[1]
+ for _b in self._external_brain_names:
+ self._n_agents[_b] = len(state[0][_b].agents)
+ return state[0]
+ elif not self._loaded:
+ raise UnityEnvironmentException("No Unity environment is loaded.")
+ elif self._global_done:
+ raise UnityActionException(
+ "The episode is completed. Reset the environment with 'reset()'")
+ elif self.global_done is None:
+ raise UnityActionException(
+ "You cannot conduct step without first calling reset. "
+ "Reset the environment with 'reset()'")
+
+ def close(self):
+ """
+ Sends a shutdown signal to the unity environment, and closes the socket connection.
+ """
+ if self._loaded:
+ self._close()
+ else:
+ raise UnityEnvironmentException("No Unity environment is loaded.")
+
+ def _close(self):
+ self._loaded = False
+ self.communicator.close()
+ if self.proc1 is not None:
+ self.proc1.kill()
+
+ @classmethod
+ def _flatten(cls, arr):
+ """
+ Converts arrays to list.
+ :param arr: numpy vector.
+ :return: flattened list.
+ """
+ if isinstance(arr, cls.SCALAR_ACTION_TYPES):
+ arr = [float(arr)]
+ if isinstance(arr, np.ndarray):
+ arr = arr.tolist()
+ if len(arr) == 0:
+ return arr
+ if isinstance(arr[0], np.ndarray):
+ arr = [item for sublist in arr for item in sublist.tolist()]
+ if isinstance(arr[0], list):
+ arr = [item for sublist in arr for item in sublist]
+ arr = [float(x) for x in arr]
+ return arr
+
+ def _get_state(self, output: UnityRLOutput) -> (AllBrainInfo, bool):
+ """
+ Collects experience information from all external brains in environment at current step.
+ :return: a dictionary of BrainInfo objects.
+ """
+ _data = {}
+ global_done = output.global_done
+ for brain_name in output.agentInfos:
+ agent_info_list = output.agentInfos[brain_name].value
+ _data[brain_name] = BrainInfo.from_agent_proto(agent_info_list,
+ self.brains[brain_name])
+ return _data, global_done
+
+ def _generate_step_input(self, vector_action, memory, text_action, value) -> UnityRLInput:
+ rl_in = UnityRLInput()
+ for b in vector_action:
+ n_agents = self._n_agents[b]
+ if n_agents == 0:
+ continue
+ _a_s = len(vector_action[b]) // n_agents
+ _m_s = len(memory[b]) // n_agents
+ for i in range(n_agents):
+ action = AgentActionProto(
+ vector_actions=vector_action[b][i * _a_s: (i + 1) * _a_s],
+ memories=memory[b][i * _m_s: (i + 1) * _m_s],
+ text_actions=text_action[b][i],
+ )
+ if b in value:
+ if value[b] is not None:
+ action.value = float(value[b][i])
+ rl_in.agent_actions[b].value.extend([action])
+ rl_in.command = 0
+ return self.wrap_unity_input(rl_in)
+
+ def _generate_reset_input(self, training, config: ArenaConfig) -> UnityRLInput:
+ rl_in = UnityRLInput()
+ rl_in.is_training = training
+ rl_in.command = 1
+ rl_reset = UnityRLResetInput()
+ if (config is not None):
+ rl_reset.CopyFrom(config.dict_to_arena_config())
+ result = UnityInput()
+ result.rl_input.CopyFrom(rl_in)
+ result.rl_reset_input.CopyFrom(rl_reset)
+ return result
+
+ # return self.wrap_unity_input(rl_in)
+
+ def send_academy_parameters(self,
+ init_parameters: UnityRLInitializationInput) -> UnityRLInitializationOutput:
+ inputs = UnityInput()
+ inputs.rl_initialization_input.CopyFrom(init_parameters)
+ return self.communicator.initialize(inputs).rl_initialization_output
+
+ def wrap_unity_input(self, rl_input: UnityRLInput) -> UnityOutput:
+ result = UnityInput()
+ result.rl_input.CopyFrom(rl_input)
+ return result
+
+ # def send_update_arena_parameters(self, arena_parameters : ArenaConfigInput) -> None:
+ #
+ # # TODO: add return status ==> create new proto for ArenaParametersOutput
+ #
+ # self.communicator.exchange_arena_update(arena_parameters)
diff --git a/animalai_packages/animalai_envs/animalai/envs/exception.py b/animalai_packages/animalai_envs/animalai/envs/exception.py
new file mode 100644
index 00000000..edf16ff4
--- /dev/null
+++ b/animalai_packages/animalai_envs/animalai/envs/exception.py
@@ -0,0 +1,63 @@
+import logging
+logger = logging.getLogger("mlagents.envs")
+
+class UnityException(Exception):
+ """
+ Any error related to ml-agents environment.
+ """
+ pass
+
+class UnityEnvironmentException(UnityException):
+ """
+ Related to errors starting and closing environment.
+ """
+ pass
+
+
+class UnityActionException(UnityException):
+ """
+ Related to errors with sending actions.
+ """
+ pass
+
+class UnityTimeOutException(UnityException):
+ """
+ Related to errors with communication timeouts.
+ """
+ def __init__(self, message, log_file_path = None):
+ if log_file_path is not None:
+ try:
+ with open(log_file_path, "r") as f:
+ printing = False
+ unity_error = '\n'
+ for l in f:
+ l=l.strip()
+ if (l == 'Exception') or (l=='Error'):
+ printing = True
+ unity_error += '----------------------\n'
+ if (l == ''):
+ printing = False
+ if printing:
+ unity_error += l + '\n'
+ logger.info(unity_error)
+ logger.error("An error might have occured in the environment. "
+ "You can check the logfile for more information at {}".format(log_file_path))
+ except:
+ logger.error("An error might have occured in the environment. "
+ "No UnitySDK.log file could be found.")
+ super(UnityTimeOutException, self).__init__(message)
+
+
+class UnityWorkerInUseException(UnityException):
+ """
+ This error occurs when the port for a certain worker ID is already reserved.
+ """
+
+ MESSAGE_TEMPLATE = (
+ "Couldn't start socket communication because worker number {} is still in use. "
+ "You may need to manually close a previously opened environment "
+ "or use a different worker number.")
+
+ def __init__(self, worker_id):
+ message = self.MESSAGE_TEMPLATE.format(str(worker_id))
+ super(UnityWorkerInUseException, self).__init__(message)
diff --git a/animalai_packages/animalai_envs/animalai/envs/rpc_communicator.py b/animalai_packages/animalai_envs/animalai/envs/rpc_communicator.py
new file mode 100644
index 00000000..aa082305
--- /dev/null
+++ b/animalai_packages/animalai_envs/animalai/envs/rpc_communicator.py
@@ -0,0 +1,115 @@
+import logging
+import grpc
+
+import socket
+from multiprocessing import Pipe
+from concurrent.futures import ThreadPoolExecutor
+
+from .communicator import Communicator
+from animalai.communicator_objects import UnityToExternalServicer, add_UnityToExternalServicer_to_server
+from animalai.communicator_objects import UnityMessage, UnityInput, UnityOutput #, ArenaConfigInput
+from .exception import UnityTimeOutException, UnityWorkerInUseException
+
+logger = logging.getLogger("mlagents.envs")
+
+
+class UnityToExternalServicerImplementation(UnityToExternalServicer):
+ def __init__(self):
+ self.parent_conn, self.child_conn = Pipe()
+
+ def Initialize(self, request, context):
+ self.child_conn.send(request)
+ return self.child_conn.recv()
+
+ def Exchange(self, request, context):
+ self.child_conn.send(request)
+ return self.child_conn.recv()
+
+
+class RpcCommunicator(Communicator):
+ def __init__(self, worker_id=0, base_port=5005):
+ """
+ Python side of the grpc communication. Python is the server and Unity the client
+
+
+ :int base_port: Baseline port number to connect to Unity environment over. worker_id increments over this.
+ :int worker_id: Number to add to communication port (5005) [0]. Used for asynchronous agent scenarios.
+ """
+ self.port = base_port + worker_id
+ self.worker_id = worker_id
+ self.server = None
+ self.unity_to_external = None
+ self.is_open = False
+ self.create_server()
+
+ def create_server(self):
+ """
+ Creates the GRPC server.
+ """
+ self.check_port(self.port)
+
+ try:
+ # Establish communication grpc
+ self.server = grpc.server(ThreadPoolExecutor(max_workers=10))
+ self.unity_to_external = UnityToExternalServicerImplementation()
+ add_UnityToExternalServicer_to_server(self.unity_to_external, self.server)
+ # Using unspecified address, which means that grpc is communicating on all IPs
+ # This is so that the docker container can connect.
+ self.server.add_insecure_port('[::]:' + str(self.port))
+ self.server.start()
+ self.is_open = True
+ except:
+ raise UnityWorkerInUseException(self.worker_id)
+
+ def check_port(self, port):
+ """
+ Attempts to bind to the requested communicator port, checking if it is already in use.
+ """
+ s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ try:
+ s.bind(("localhost", port))
+ except socket.error:
+ raise UnityWorkerInUseException(self.worker_id)
+ finally:
+ s.close()
+
+ def initialize(self, inputs: UnityInput) -> UnityOutput:
+ if not self.unity_to_external.parent_conn.poll(3000):
+ raise UnityTimeOutException(
+ "The Unity environment took too long to respond. Make sure that :\n"
+ "\t The environment does not need user interaction to launch\n"
+ "\t The Academy's Broadcast Hub is configured correctly\n"
+ "\t The Agents are linked to the appropriate Brains\n"
+ "\t The environment and the Python interface have compatible versions.")
+ aca_param = self.unity_to_external.parent_conn.recv().unity_output
+ message = UnityMessage()
+ message.header.status = 200
+ message.unity_input.CopyFrom(inputs)
+ self.unity_to_external.parent_conn.send(message)
+ self.unity_to_external.parent_conn.recv()
+ return aca_param
+
+ def exchange(self, inputs: UnityInput) -> UnityOutput:
+ message = UnityMessage()
+ message.header.status = 200
+ message.unity_input.CopyFrom(inputs)
+ self.unity_to_external.parent_conn.send(message)
+ output = self.unity_to_external.parent_conn.recv()
+ if output.header.status != 200:
+ return None
+ return output.unity_output
+
+ def close(self):
+ """
+ Sends a shutdown signal to the unity environment, and closes the grpc connection.
+ """
+ if self.is_open:
+ message_input = UnityMessage()
+ message_input.header.status = 400
+ self.unity_to_external.parent_conn.send(message_input)
+ self.unity_to_external.parent_conn.close()
+ self.server.stop(False)
+ self.is_open = False
+
+ # def exchange_arena_update(self, inputs: ArenaConfigInput) -> None:
+ # self.unity_to_external.parent_conn.send(inputs)
diff --git a/animalai_packages/animalai_envs/animalai/envs/socket_communicator.py b/animalai_packages/animalai_envs/animalai/envs/socket_communicator.py
new file mode 100644
index 00000000..c600e938
--- /dev/null
+++ b/animalai_packages/animalai_envs/animalai/envs/socket_communicator.py
@@ -0,0 +1,98 @@
+import logging
+import socket
+import struct
+
+from .communicator import Communicator
+from animalai.communicator_objects import UnityMessage, UnityOutput, UnityInput
+from .exception import UnityTimeOutException
+
+
+logger = logging.getLogger("mlagents.envs")
+
+
+class SocketCommunicator(Communicator):
+ def __init__(self, worker_id=0,
+ base_port=5005):
+ """
+ Python side of the socket communication
+
+ :int base_port: Baseline port number to connect to Unity environment over. worker_id increments over this.
+ :int worker_id: Number to add to communication port (5005) [0]. Used for asynchronous agent scenarios.
+ """
+
+ self.port = base_port + worker_id
+ self._buffer_size = 12000
+ self.worker_id = worker_id
+ self._socket = None
+ self._conn = None
+
+ def initialize(self, inputs: UnityInput) -> UnityOutput:
+ try:
+ # Establish communication socket
+ self._socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ self._socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+ self._socket.bind(("localhost", self.port))
+ except:
+ raise UnityTimeOutException("Couldn't start socket communication because worker number {} is still in use. "
+ "You may need to manually close a previously opened environment "
+ "or use a different worker number.".format(str(self.worker_id)))
+ try:
+ self._socket.settimeout(30)
+ self._socket.listen(1)
+ self._conn, _ = self._socket.accept()
+ self._conn.settimeout(30)
+ except :
+ raise UnityTimeOutException(
+ "The Unity environment took too long to respond. Make sure that :\n"
+ "\t The environment does not need user interaction to launch\n"
+ "\t The Academy's Broadcast Hub is configured correctly\n"
+ "\t The Agents are linked to the appropriate Brains\n"
+ "\t The environment and the Python interface have compatible versions.")
+ message = UnityMessage()
+ message.header.status = 200
+ message.unity_input.CopyFrom(inputs)
+ self._communicator_send(message.SerializeToString())
+ initialization_output = UnityMessage()
+ initialization_output.ParseFromString(self._communicator_receive())
+ return initialization_output.unity_output
+
+ def _communicator_receive(self):
+ try:
+ s = self._conn.recv(self._buffer_size)
+ message_length = struct.unpack("I", bytearray(s[:4]))[0]
+ s = s[4:]
+ while len(s) != message_length:
+ s += self._conn.recv(self._buffer_size)
+ except socket.timeout as e:
+ raise UnityTimeOutException("The environment took too long to respond.")
+ return s
+
+ def _communicator_send(self, message):
+ self._conn.send(struct.pack("I", len(message)) + message)
+
+ def exchange(self, inputs: UnityInput) -> UnityOutput:
+ message = UnityMessage()
+ message.header.status = 200
+ message.unity_input.CopyFrom(inputs)
+ self._communicator_send(message.SerializeToString())
+ outputs = UnityMessage()
+ outputs.ParseFromString(self._communicator_receive())
+ if outputs.header.status != 200:
+ return None
+ return outputs.unity_output
+
+ def close(self):
+ """
+ Sends a shutdown signal to the unity environment, and closes the socket connection.
+ """
+ if self._socket is not None and self._conn is not None:
+ message_input = UnityMessage()
+ message_input.header.status = 400
+ self._communicator_send(message_input.SerializeToString())
+ if self._socket is not None:
+ self._socket.close()
+ self._socket = None
+ if self._socket is not None:
+ self._conn.close()
+ self._conn = None
+
diff --git a/animalai_packages/animalai_envs/setup.py b/animalai_packages/animalai_envs/setup.py
new file mode 100644
index 00000000..9c5b270c
--- /dev/null
+++ b/animalai_packages/animalai_envs/setup.py
@@ -0,0 +1,29 @@
+from setuptools import setup
+
+setup(
+ name='animalai',
+ version='0.4.0',
+ description='Animal AI competition interface',
+ url='https://github.com/beyretb/AnimalAI-Olympics',
+ author='Benjamin Beyret',
+ author_email='bb1010@ic.ac.uk',
+
+ classifiers=[
+ 'Intended Audience :: Developers',
+ 'Topic :: Scientific/Engineering :: Artificial Intelligence',
+ 'License :: OSI Approved :: Apache Software License',
+ 'Programming Language :: Python :: 3.6'
+ ],
+
+ packages=['animalai.envs', 'animalai.communicator_objects'], # Required
+ zip_safe=False,
+
+ install_requires=[
+ 'Pillow>=4.2.1,<=5.4.1',
+ 'numpy>=1.13.3,<=1.14.5',
+ 'protobuf>=3.6,<3.7',
+ 'grpcio>=1.11.0,<1.12.0',
+ 'pyyaml>=5.1',
+ 'jsonpickle>=1.2'],
+ python_requires=">=3.5,<3.8",
+)
\ No newline at end of file
diff --git a/animalai_packages/animalai_train/LICENSE b/animalai_packages/animalai_train/LICENSE
new file mode 100644
index 00000000..7ff5035e
--- /dev/null
+++ b/animalai_packages/animalai_train/LICENSE
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "{}"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright 2017 Unity Technologies
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
\ No newline at end of file
diff --git a/animalai_packages/animalai_train/README.md b/animalai_packages/animalai_train/README.md
new file mode 100644
index 00000000..1dd411bf
--- /dev/null
+++ b/animalai_packages/animalai_train/README.md
@@ -0,0 +1,5 @@
+# AnimalAI Python API
+
+This package provides the Python API used for training agents for the Animal AI Olympics competition.
+
+This library is mostly the same as [Unity's MLAgents](https://github.com/Unity-Technologies/ml-agents).
\ No newline at end of file
diff --git a/animalai_packages/animalai_train/animalai_train/__init__.py b/animalai_packages/animalai_train/animalai_train/__init__.py
new file mode 100644
index 00000000..fed01500
--- /dev/null
+++ b/animalai_packages/animalai_train/animalai_train/__init__.py
@@ -0,0 +1 @@
+name = "animalai_train"
\ No newline at end of file
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/__init__.py b/animalai_packages/animalai_train/animalai_train/trainers/__init__.py
new file mode 100644
index 00000000..4859f558
--- /dev/null
+++ b/animalai_packages/animalai_train/animalai_train/trainers/__init__.py
@@ -0,0 +1,15 @@
+from .buffer import *
+from .curriculum import *
+from .meta_curriculum import *
+from .models import *
+from .trainer_controller import *
+from .bc.models import *
+from .bc.offline_trainer import *
+from .bc.online_trainer import *
+from .bc.policy import *
+from .ppo.models import *
+from .ppo.trainer import *
+from .ppo.policy import *
+from .exception import *
+from .policy import *
+from .demo_loader import *
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/barracuda.py b/animalai_packages/animalai_train/animalai_train/trainers/barracuda.py
new file mode 100644
index 00000000..813d54db
--- /dev/null
+++ b/animalai_packages/animalai_train/animalai_train/trainers/barracuda.py
@@ -0,0 +1,491 @@
+from __future__ import print_function
+from collections import defaultdict
+import numpy as np
+import json
+import struct # convert from Python values and C structs
+import re
+import argparse
+import os.path
+
+BARRACUDA_VERSION = 16
+
+# Definition of Barracuda model
+class Model:
+ def __init__(self):
+ self.layers = []
+ self.tensors = {}
+ self.inputs = {}
+ self.outputs = []
+ self.globals = []
+ self.memories = []
+
+class Struct:
+ "A structure that can have any fields defined."
+ def __init__(self, **entries): self.__dict__.update(entries)
+
+# Parse command line argumengts
+def parse_args(description, source_extension, help):
+ parser = argparse.ArgumentParser(description=description)
+ parser.add_argument('source_file', help=help)
+ parser.add_argument('target_file', help='output Barracuda binary file')
+ parser.add_argument('-trim', '--trim-unused-by-output')
+ parser.add_argument('--print-layers', action='store_true')
+ parser.add_argument('--print-source-json', action='store_true')
+ parser.add_argument('-json', '--print-barracuda-json', action='store_true')
+ parser.add_argument('--print-layer-links', action='store_true')
+ parser.add_argument('--print-patterns', action='store_true')
+ parser.add_argument('--print-tensors', action='store_true')
+ parser.add_argument('--verbose', action='store_true')
+ args = parser.parse_args()
+ args.compress_f16 = False # TEMP: disabled, until properly implemented parser.add_argument('-f16', '--compress-f16', action='store_true')
+
+ output_extension = '.bc' if not args.compress_f16 else '.f16.bc'
+
+ if not os.path.exists(args.source_file):
+ args.source_file = args.source_file + source_extension
+
+ if not os.path.exists(args.source_file):
+ print('File', args.source_file, 'does not exist.')
+ exit(-1)
+
+ def replaceFilenameExtension(filename, newExtenstion):
+ return os.path.splitext(os.path.basename(filename))[0] + newExtenstion;
+
+ if os.path.isdir(args.target_file):
+ args.target_file = os.path.join(args.target_file, replaceFilenameExtension(args.source_file, output_extension))
+
+ if args.verbose:
+ print(args)
+
+ return args
+
+# Fuse training time BatchNorm tensors into Scale & Bias
+def fuse_batchnorm_weights(gamma, beta, mean, var, epsilon):
+ # https://github.com/Tencent/ncnn/blob/master/src/layer/batchnorm.cpp
+ """ float sqrt_var = sqrt(var_data[i]);
+ a_data[i] = bias_data[i] - slope_data[i] * mean_data[i] / sqrt_var;
+ b_data[i] = slope_data[i] / sqrt_var;
+ ...
+ ptr[i] = b * ptr[i] + a;
+ """
+ scale = gamma / np.sqrt(var + epsilon)
+ bias = beta - gamma * mean / np.sqrt(var + epsilon)
+ return [scale, bias]
+
+# Resort layers so that all inputs are satisfied for every layer beforehand
+def sort(model, inputs, memories, verbose):
+ if hasattr(model, 'layers'):
+ model = model.layers
+ inputs_and_memories = set(list(inputs) + list(memories[1::3]))
+
+ def find_missing_inputs(model, inputs):
+ missing = set()
+ ready = set(inputs)
+ for l in model:
+ for i in l.inputs:
+ if i not in ready:
+ missing.add(i)
+ ready.add(l.name)
+ return missing
+
+ # Class to represent a graph
+ # Taken from: https://www.geeksforgeeks.org/python-program-for-topological-sorting/
+ class Graph:
+ def __init__(self,vertices):
+ self.graph = defaultdict(list) #dictionary containing adjacency List
+ self.V = vertices #No. of vertices
+
+ # function to add an edge to graph
+ def addEdge(self,u,v):
+ self.graph[u].append(v)
+
+ # A recursive function used by topologicalSort
+ def topologicalSortUtil(self,v,visited,stack):
+
+ # Mark the current node as visited.
+ visited[v] = True
+
+ # Recur for all the vertices adjacent to this vertex
+ for i in self.graph[v]:
+ if visited[i] == False:
+ self.topologicalSortUtil(i,visited,stack)
+
+ # Push current vertex to stack which stores result
+ stack.insert(0,v)
+
+ # The function to do Topological Sort. It uses recursive
+ # topologicalSortUtil()
+ def topologicalSort(self):
+ # Mark all the vertices as not visited
+ visited = [False]*self.V
+ stack =[]
+
+ # Call the recursive helper function to store Topological
+ # Sort starting from all vertices one by one
+ for i in range(self.V):
+ if visited[i] == False:
+ self.topologicalSortUtil(i,visited,stack)
+
+ #print(stack)
+ return stack
+
+ if (len(find_missing_inputs(model, inputs_and_memories)) == 0):
+ return model
+
+ g = Graph(len(model))
+
+ layers = {}
+ id = 0
+ for l in model:
+ layers[l.name] = id;
+ id += 1
+
+ for layer in model:
+ for i in layer.inputs:
+ if i not in inputs_and_memories:
+ g.addEdge(layers[i], layers[layer.name])
+
+ sorted_layer_indices = g.topologicalSort()
+ print("SORTED:", sorted_layer_indices)
+ new_model = [model[idx] for idx in sorted_layer_indices]
+
+ assert(len(find_missing_inputs(new_model, inputs_and_memories)) == 0)
+ return new_model
+
+
+
+# Trim
+def trim(model, criteria_regexp_string, verbose):
+ if hasattr(model, 'layers'):
+ model = model.layers
+
+ def flatten(items,enter=lambda x:isinstance(x, list)):
+ # http://stackoverflow.com/a/40857703
+ # https://github.com/ctmakro/canton/blob/master/canton/misc.py
+ """Yield items from any nested iterable; see REF."""
+ for x in items:
+ if enter(x):
+ yield from flatten(x)
+ else:
+ yield x
+
+ def trim_model(model, outputs):
+ layers = {l.name:l for l in model}
+ connected = {o for o in outputs}
+ while len(outputs) > 0:
+ outputs = set(flatten([layers[o].inputs for o in outputs if o in layers]))
+ if verbose and len(outputs) > 0:
+ print(outputs)
+ for o in outputs:
+ connected.add(o)
+
+ trimmed = [l.name for l in model if l.name not in connected]
+ def array_without_brackets(arr):
+ return str(arr)[1:-1] # array to string without brackets
+ print("TRIMMED:", array_without_brackets(trimmed))
+
+ return [l for l in model if l.name in connected]
+
+ layer_names = {l.name for l in model}
+ criteria = re.compile(criteria_regexp_string)
+ preserve_outputs = list(filter(criteria.match, layer_names))
+ if preserve_outputs:
+ print("Trimming model given outputs to preserve:", preserve_outputs)
+ model = trim_model(model, preserve_outputs)
+ else:
+ print("WARNING: Trim couldn't find any layers to match:", criteria_regexp_string)
+ return model
+
+def compress(model):
+ compress_classes = {
+ 'Dense'
+ }
+ for l in model.layers:
+ if (l.class_name in compress_classes):
+ print("Compressing %s layer '%s' weights to float16" % (l.class_name, l.name))
+ for x in l.tensors:
+ x.data = np.float16(x.data)
+ return model
+
+# Verbose
+def to_json(model):
+ class StructEncoder(json.JSONEncoder):
+ def default(self, o):
+ if isinstance(o, np.ndarray): # skip binary data packed inside ndarray
+ return ""
+ if getattr(o, '__dict__', None):
+ return o.__dict__
+ return str(o)
+
+ s = json.dumps(model.layers, cls=StructEncoder, separators=(', ',':'))
+ # custom formatting
+ s = s.replace(']}, {', ']},\n{')
+ s = s.replace(':[{', ':[\n\t{')
+ s = s.replace('}, {', '},\n\t{')
+ s = s.replace('"', "'")
+ return s
+
+def summary(model, print_layer_links, print_barracuda_json, print_tensors):
+ def array_without_brackets(arr):
+ return str(arr)[1:-1] # array to string without brackets
+
+ if print_layer_links:
+ for l in model.layers:
+ print(l.name, " <= ", l.inputs)
+
+ if print_barracuda_json:
+ print(to_json(model))
+
+ if model.globals:
+ if isinstance(model.globals, dict):
+ model.globals = {x.name:x.shape for x in model.globals}
+ print("GLOBALS:", array_without_brackets(model.globals))
+
+ for l in model.layers:
+ if isinstance(model.inputs, dict):
+ ins = {i:model.inputs[i] for i in l.inputs if i in model.inputs}
+ else:
+ ins = [i for i in l.inputs if i in model.inputs]
+ if ins:
+ print("IN: %s => '%s'" % (array_without_brackets(ins), l.name))
+ for mem_in, mem_out in zip(model.memories[1::3], model.memories[2::3]):
+ print("MEM: '%s' => '%s'" % (mem_in, mem_out))
+ print("OUT:", array_without_brackets(model.outputs))
+
+ if (print_tensors):
+ for l in model.layers:
+ for x in l.tensors:
+ print(x.name, x.shape, x.data.dtype, x.data)
+
+class Build:
+ def __init__(self, scope=''):
+ self.scope = scope
+ self.layers = []
+ self.names_taken = set()
+
+ def __getattr__(self, attr):
+ if attr == '_':
+ return self.layers[-1].name if len(self.layer) > 0 else self.scope
+ raise AttributeError(attr)
+
+ def _patch_last_layer_name_and_return(self):
+ if self.layers[-1].name:
+ return self.layers[-1].name
+
+ # generate unique name based on op and increasing id
+ name = self.layers[-1].op
+
+ i = 1
+ while name in self.names_taken:
+ name = self.layers[-1].op + '_' + str(i)
+ i += 1
+ self.names_taken.add(name)
+
+ self.layers[-1].name = self.scope + ('/' if self.scope else '') + name
+ return self.layers[-1].name
+
+ def concat(self, a, b, out=''):
+ self.layers += [Struct(name=out, op='Concat', input=[a, b])]
+ return self._patch_last_layer_name_and_return()
+ def mad(self, x, kernel, bias, out=''):
+ self.layers += [Struct(name=out, op='Dense', input=[x, kernel, bias])]
+ return self._patch_last_layer_name_and_return()
+ def mul(self, a, b, out=''):
+ self.layers += [Struct(name=out, op='Mul', input=[a, b])]
+ return self._patch_last_layer_name_and_return()
+ def add(self, a, b, out=''):
+ self.layers += [Struct(name=out, op='Add', input=[a, b])]
+ return self._patch_last_layer_name_and_return()
+ def sub(self, a, b, out=''):
+ self.layers += [Struct(name=out, op='Sub', input=[a, b])]
+ return self._patch_last_layer_name_and_return()
+ def sigmoid(self, x, out=''):
+ self.layers += [Struct(name=out, op='Sigmoid', input=[x])]
+ return self._patch_last_layer_name_and_return()
+ def tanh(self, x, out=''):
+ self.layers += [Struct(name=out, op='Tanh', input=[x])]
+ return self._patch_last_layer_name_and_return()
+
+def rnn(name, input, state, kernel, bias, new_state, number_of_gates = 2):
+ ''' - Ht = f(Xt*Wi + Ht_1*Ri + Wbi + Rbi)
+ '''
+
+ nn = Build(name)
+ nn.tanh(
+ nn.mad(kernel=kernel, bias=bias,
+ x=nn.concat(input, state)),
+ out=new_state);
+ return nn.layers;
+
+def gru(name, input, state, kernel_r, kernel_u, kernel_c, bias_r, bias_u, bias_c, new_state, number_of_gates = 2):
+ ''' - zt = f(Xt*Wz + Ht_1*Rz + Wbz + Rbz)
+ - rt = f(Xt*Wr + Ht_1*Rr + Wbr + Rbr)
+ - ht = g(Xt*Wh + (rt . Ht_1)*Rh + Rbh + Wbh)
+ - Ht = (1-zt).ht + zt.Ht_1
+ '''
+ nn = Build(name)
+ inputs = nn.concat(input, state)
+
+ u = nn.sigmoid(nn.mad(inputs, kernel_u, bias_u))
+ r = nn.sigmoid(nn.mad(inputs, kernel_r, bias_r))
+ r_state = nn.mul(r, state)
+
+ c = nn.tanh(nn.mad(kernel=kernel_c, bias=bias_c,
+ x=nn.concat(input, r_state)))
+
+ # new_h = u' * state + (1 - u') * c'
+ # = u' * state + c' - u' * c'
+
+ # u' * state + c'
+ nn.add(nn.mul(u, state), c)
+ # - u' * c'
+ nn.sub(nn._, nn.mul(u, c),
+ out=new_state)
+
+ return nn.layers;
+
+def lstm(name, input, state_c, state_h, kernel_i, kernel_j, kernel_f, kernel_o, bias_i, bias_j, bias_f, bias_o, new_state_c, new_state_h):
+ ''' Full:
+ - it = f(Xt*Wi + Ht_1*Ri + Pi . Ct_1 + Wbi + Rbi)
+ - ft = f(Xt*Wf + Ht_1*Rf + Pf . Ct_1 + Wbf + Rbf)
+ - ct = g(Xt*Wc + Ht_1*Rc + Wbc + Rbc)
+ - Ct = ft . Ct_1 + it . ct
+ - ot = f(Xt*Wo + Ht_1*Ro + Po . Ct + Wbo + Rbo)
+ - Ht = ot . h(Ct)
+ '''
+
+ ''' No peephole:
+ - it = f(Xt*Wi + Ht_1*Ri + Wbi + Rbi)
+ - ft = f(Xt*Wf + Ht_1*Rf + Wbf + Rbf)
+ - ct = g(Xt*Wc + Ht_1*Rc + Wbc + Rbc)
+ - Ct = ft . Ct_ + it . ct
+ - ot = f(Xt*Wo + Ht_1*Ro + Wbo + Rbo)
+ - Ht = ot . h(Ct)
+ '''
+
+ nn = Build(name)
+ inputs = nn.concat(input, state_h)
+
+ i = nn.sigmoid(nn.mad(x=inputs, kernel=kernel_i, bias=bias_i))
+ j = nn.tanh(nn.mad(inputs, kernel_j, bias_j))
+ f = nn.sigmoid(nn.mad(inputs, kernel_f, bias_f))
+ o = nn.sigmoid(nn.mad(inputs, kernel_o, bias_o))
+
+ # new_c = state_c * f' + i' * j'
+ nn.add(
+ nn.mul(state_c, f), nn.mul(i, j),
+ out=new_state_c)
+
+ # new_h =
+ nn.mul(o, nn.tanh(new_state_c),
+ out=new_state_h)
+
+ return nn.layers
+
+# Serialize
+class BarracudaWriter:
+ f = None
+
+ def __init__(self, filename):
+ self.f = open(filename, 'wb+')
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, type, value, tb):
+ self.f.close()
+
+ def write_array(self, arr):
+ arr.tofile(self.f)
+
+ def write_str_array(self, array_of_strigs):
+ self.write_int32(len(array_of_strigs))
+ for s in array_of_strigs:
+ self.write_str(s)
+
+ def write_str(self, s):
+ self.write_int32(len(s))
+ self.f.write(s.encode('ascii'))
+
+ def write_float(self, d):
+ self.f.write(struct.pack('> 2 # length is measured in float32s (at least for now)
+
+ w.write_str(x.name)
+ w.write_shape(x.shape)
+ w.write_int64(offset)
+ w.write_int32(x.data.itemsize)
+ w.write_int32(length)
+
+ offset += length
+ all_tensors.append(x)
+
+ for x in all_tensors:
+ w.write_array(x.data)
+
+
+
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/bc/__init__.py b/animalai_packages/animalai_train/animalai_train/trainers/bc/__init__.py
new file mode 100644
index 00000000..80cd0aa0
--- /dev/null
+++ b/animalai_packages/animalai_train/animalai_train/trainers/bc/__init__.py
@@ -0,0 +1,4 @@
+from .models import *
+from .online_trainer import *
+from .offline_trainer import *
+from .policy import *
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/bc/models.py b/animalai_packages/animalai_train/animalai_train/trainers/bc/models.py
new file mode 100644
index 00000000..e1ef94d5
--- /dev/null
+++ b/animalai_packages/animalai_train/animalai_train/trainers/bc/models.py
@@ -0,0 +1,55 @@
+import tensorflow as tf
+import tensorflow.contrib.layers as c_layers
+from animalai_train.trainers.models import LearningModel
+
+
+class BehavioralCloningModel(LearningModel):
+ def __init__(self, brain, h_size=128, lr=1e-4, n_layers=2, m_size=128,
+ normalize=False, use_recurrent=False, seed=0):
+ LearningModel.__init__(self, m_size, normalize, use_recurrent, brain, seed)
+ num_streams = 1
+ hidden_streams = self.create_observation_streams(num_streams, h_size, n_layers)
+ hidden = hidden_streams[0]
+ self.dropout_rate = tf.placeholder(dtype=tf.float32, shape=[], name="dropout_rate")
+ hidden_reg = tf.layers.dropout(hidden, self.dropout_rate)
+ if self.use_recurrent:
+ tf.Variable(self.m_size, name="memory_size", trainable=False, dtype=tf.int32)
+ self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32, name='recurrent_in')
+ hidden_reg, self.memory_out = self.create_recurrent_encoder(hidden_reg, self.memory_in,
+ self.sequence_length)
+ self.memory_out = tf.identity(self.memory_out, name='recurrent_out')
+
+ if brain.vector_action_space_type == "discrete":
+ policy_branches = []
+ for size in self.act_size:
+ policy_branches.append(
+ tf.layers.dense(
+ hidden,
+ size,
+ activation=None,
+ use_bias=False,
+ kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01)))
+ self.action_probs = tf.concat(
+ [tf.nn.softmax(branch) for branch in policy_branches], axis=1, name="action_probs")
+ self.action_masks = tf.placeholder(shape=[None, sum(self.act_size)], dtype=tf.float32, name="action_masks")
+ self.sample_action_float, normalized_logits = self.create_discrete_action_masking_layer(
+ tf.concat(policy_branches, axis=1), self.action_masks, self.act_size)
+ tf.identity(normalized_logits, name='action')
+ self.sample_action = tf.cast(self.sample_action_float, tf.int32)
+ self.true_action = tf.placeholder(shape=[None, len(policy_branches)], dtype=tf.int32, name="teacher_action")
+ self.action_oh = tf.concat([
+ tf.one_hot(self.true_action[:, i], self.act_size[i]) for i in range(len(self.act_size))], axis=1)
+ self.loss = tf.reduce_sum(-tf.log(self.action_probs + 1e-10) * self.action_oh)
+ self.action_percent = tf.reduce_mean(tf.cast(
+ tf.equal(tf.cast(tf.argmax(self.action_probs, axis=1), tf.int32), self.sample_action), tf.float32))
+ else:
+ self.policy = tf.layers.dense(hidden_reg, self.act_size[0], activation=None, use_bias=False, name='pre_action',
+ kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01))
+ self.clipped_sample_action = tf.clip_by_value(self.policy, -1, 1)
+ self.sample_action = tf.identity(self.clipped_sample_action, name="action")
+ self.true_action = tf.placeholder(shape=[None, self.act_size[0]], dtype=tf.float32, name="teacher_action")
+ self.clipped_true_action = tf.clip_by_value(self.true_action, -1, 1)
+ self.loss = tf.reduce_sum(tf.squared_difference(self.clipped_true_action, self.sample_action))
+
+ optimizer = tf.train.AdamOptimizer(learning_rate=lr)
+ self.update = optimizer.minimize(self.loss)
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/bc/offline_trainer.py b/animalai_packages/animalai_train/animalai_train/trainers/bc/offline_trainer.py
new file mode 100644
index 00000000..ebdbc443
--- /dev/null
+++ b/animalai_packages/animalai_train/animalai_train/trainers/bc/offline_trainer.py
@@ -0,0 +1,56 @@
+# # Unity ML-Agents Toolkit
+# ## ML-Agent Learning (Behavioral Cloning)
+# Contains an implementation of Behavioral Cloning Algorithm
+
+import logging
+import copy
+
+from animalai_train.trainers.bc.trainer import BCTrainer
+from animalai_train.trainers.demo_loader import demo_to_buffer
+from animalai_train.trainers.trainer import UnityTrainerException
+
+logger = logging.getLogger("mlagents.trainers")
+
+
+class OfflineBCTrainer(BCTrainer):
+ """The OfflineBCTrainer is an implementation of Offline Behavioral Cloning."""
+
+ def __init__(self, brain, trainer_parameters, training, load, seed, run_id):
+ """
+ Responsible for collecting experiences and training PPO model.
+ :param trainer_parameters: The parameters for the trainer (dictionary).
+ :param training: Whether the trainer is set for training.
+ :param load: Whether the model should be loaded.
+ :param seed: The seed the model will be initialized with
+ :param run_id: The The identifier of the current run
+ """
+ super(OfflineBCTrainer, self).__init__(
+ brain, trainer_parameters, training, load, seed, run_id)
+
+ self.param_keys = ['batch_size', 'summary_freq', 'max_steps',
+ 'batches_per_epoch', 'use_recurrent',
+ 'hidden_units', 'learning_rate', 'num_layers',
+ 'sequence_length', 'memory_size', 'model_path',
+ 'demo_path']
+
+ self.check_param_keys()
+ self.batches_per_epoch = trainer_parameters['batches_per_epoch']
+ self.n_sequences = max(int(trainer_parameters['batch_size'] / self.policy.sequence_length),
+ 1)
+
+ brain_params, self.demonstration_buffer = demo_to_buffer(
+ trainer_parameters['demo_path'],
+ self.policy.sequence_length)
+
+ policy_brain = copy.deepcopy(brain.__dict__)
+ expert_brain = copy.deepcopy(brain_params.__dict__)
+ policy_brain.pop('brain_name')
+ expert_brain.pop('brain_name')
+ if expert_brain != policy_brain:
+ raise UnityTrainerException("The provided demonstration is not compatible with the "
+ "brain being used for performance evaluation.")
+
+ def __str__(self):
+ return '''Hyperparameters for the Imitation Trainer of brain {0}: \n{1}'''.format(
+ self.brain_name, '\n'.join(
+ ['\t{0}:\t{1}'.format(x, self.trainer_parameters[x]) for x in self.param_keys]))
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/bc/online_trainer.py b/animalai_packages/animalai_train/animalai_train/trainers/bc/online_trainer.py
new file mode 100644
index 00000000..91ba340c
--- /dev/null
+++ b/animalai_packages/animalai_train/animalai_train/trainers/bc/online_trainer.py
@@ -0,0 +1,116 @@
+# # Unity ML-Agents Toolkit
+# ## ML-Agent Learning (Behavioral Cloning)
+# Contains an implementation of Behavioral Cloning Algorithm
+
+import logging
+import numpy as np
+
+from animalai.envs import AllBrainInfo
+from animalai_train.trainers.bc.trainer import BCTrainer
+
+logger = logging.getLogger("mlagents.trainers")
+
+
+class OnlineBCTrainer(BCTrainer):
+ """The OnlineBCTrainer is an implementation of Online Behavioral Cloning."""
+
+ def __init__(self, brain, trainer_parameters, training, load, seed, run_id):
+ """
+ Responsible for collecting experiences and training PPO model.
+ :param trainer_parameters: The parameters for the trainer (dictionary).
+ :param training: Whether the trainer is set for training.
+ :param load: Whether the model should be loaded.
+ :param seed: The seed the model will be initialized with
+ :param run_id: The The identifier of the current run
+ """
+ super(OnlineBCTrainer, self).__init__(brain, trainer_parameters, training, load, seed,
+ run_id)
+
+ self.param_keys = ['brain_to_imitate', 'batch_size', 'time_horizon',
+ 'summary_freq', 'max_steps',
+ 'batches_per_epoch', 'use_recurrent',
+ 'hidden_units', 'learning_rate', 'num_layers',
+ 'sequence_length', 'memory_size', 'model_path']
+
+ self.check_param_keys()
+ self.brain_to_imitate = trainer_parameters['brain_to_imitate']
+ self.batches_per_epoch = trainer_parameters['batches_per_epoch']
+ self.n_sequences = max(int(trainer_parameters['batch_size'] / self.policy.sequence_length),
+ 1)
+
+ def __str__(self):
+ return '''Hyperparameters for the Imitation Trainer of brain {0}: \n{1}'''.format(
+ self.brain_name, '\n'.join(
+ ['\t{0}:\t{1}'.format(x, self.trainer_parameters[x]) for x in self.param_keys]))
+
+ def add_experiences(self, curr_info: AllBrainInfo, next_info: AllBrainInfo,
+ take_action_outputs):
+ """
+ Adds experiences to each agent's experience history.
+ :param curr_info: Current AllBrainInfo (Dictionary of all current brains and corresponding BrainInfo).
+ :param next_info: Next AllBrainInfo (Dictionary of all current brains and corresponding BrainInfo).
+ :param take_action_outputs: The outputs of the take action method.
+ """
+
+ # Used to collect teacher experience into training buffer
+ info_teacher = curr_info[self.brain_to_imitate]
+ next_info_teacher = next_info[self.brain_to_imitate]
+ for agent_id in info_teacher.agents:
+ self.demonstration_buffer[agent_id].last_brain_info = info_teacher
+
+ for agent_id in next_info_teacher.agents:
+ stored_info_teacher = self.demonstration_buffer[agent_id].last_brain_info
+ if stored_info_teacher is None:
+ continue
+ else:
+ idx = stored_info_teacher.agents.index(agent_id)
+ next_idx = next_info_teacher.agents.index(agent_id)
+ if stored_info_teacher.text_observations[idx] != "":
+ info_teacher_record, info_teacher_reset = \
+ stored_info_teacher.text_observations[idx].lower().split(",")
+ next_info_teacher_record, next_info_teacher_reset = \
+ next_info_teacher.text_observations[idx]. \
+ lower().split(",")
+ if next_info_teacher_reset == "true":
+ self.demonstration_buffer.reset_update_buffer()
+ else:
+ info_teacher_record, next_info_teacher_record = "true", "true"
+ if info_teacher_record == "true" and next_info_teacher_record == "true":
+ if not stored_info_teacher.local_done[idx]:
+ for i in range(self.policy.vis_obs_size):
+ self.demonstration_buffer[agent_id]['visual_obs%d' % i] \
+ .append(stored_info_teacher.visual_observations[i][idx])
+ if self.policy.use_vec_obs:
+ self.demonstration_buffer[agent_id]['vector_obs'] \
+ .append(stored_info_teacher.vector_observations[idx])
+ if self.policy.use_recurrent:
+ if stored_info_teacher.memories.shape[1] == 0:
+ stored_info_teacher.memories = np.zeros(
+ (len(stored_info_teacher.agents),
+ self.policy.m_size))
+ self.demonstration_buffer[agent_id]['memory'].append(
+ stored_info_teacher.memories[idx])
+ self.demonstration_buffer[agent_id]['actions'].append(
+ next_info_teacher.previous_vector_actions[next_idx])
+
+ super(OnlineBCTrainer, self).add_experiences(curr_info, next_info, take_action_outputs)
+
+ def process_experiences(self, current_info: AllBrainInfo, next_info: AllBrainInfo):
+ """
+ Checks agent histories for processing condition, and processes them as necessary.
+ Processing involves calculating value and advantage targets for model updating step.
+ :param current_info: Current AllBrainInfo
+ :param next_info: Next AllBrainInfo
+ """
+ info_teacher = next_info[self.brain_to_imitate]
+ for l in range(len(info_teacher.agents)):
+ teacher_action_list = len(self.demonstration_buffer[info_teacher.agents[l]]['actions'])
+ horizon_reached = teacher_action_list > self.trainer_parameters['time_horizon']
+ teacher_filled = len(self.demonstration_buffer[info_teacher.agents[l]]['actions']) > 0
+ if (info_teacher.local_done[l] or horizon_reached) and teacher_filled:
+ agent_id = info_teacher.agents[l]
+ self.demonstration_buffer.append_update_buffer(
+ agent_id, batch_size=None, training_length=self.policy.sequence_length)
+ self.demonstration_buffer[agent_id].reset_agent()
+
+ super(OnlineBCTrainer, self).process_experiences(current_info, next_info)
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/bc/policy.py b/animalai_packages/animalai_train/animalai_train/trainers/bc/policy.py
new file mode 100644
index 00000000..f2d990b3
--- /dev/null
+++ b/animalai_packages/animalai_train/animalai_train/trainers/bc/policy.py
@@ -0,0 +1,93 @@
+import logging
+
+import numpy as np
+from animalai_train.trainers.bc.models import BehavioralCloningModel
+from animalai_train.trainers.policy import Policy
+
+logger = logging.getLogger("mlagents.trainers")
+
+
+class BCPolicy(Policy):
+ def __init__(self, seed, brain, trainer_parameters, load):
+ """
+ :param seed: Random seed.
+ :param brain: Assigned Brain object.
+ :param trainer_parameters: Defined training parameters.
+ :param load: Whether a pre-trained model will be loaded or a new one created.
+ """
+ super(BCPolicy, self).__init__(seed, brain, trainer_parameters)
+
+ with self.graph.as_default():
+ with self.graph.as_default():
+ self.model = BehavioralCloningModel(
+ h_size=int(trainer_parameters['hidden_units']),
+ lr=float(trainer_parameters['learning_rate']),
+ n_layers=int(trainer_parameters['num_layers']),
+ m_size=self.m_size,
+ normalize=False,
+ use_recurrent=trainer_parameters['use_recurrent'],
+ brain=brain,
+ seed=seed)
+
+ if load:
+ self._load_graph()
+ else:
+ self._initialize_graph()
+
+ self.inference_dict = {'action': self.model.sample_action}
+ self.update_dict = {'policy_loss': self.model.loss,
+ 'update_batch': self.model.update}
+ if self.use_recurrent:
+ self.inference_dict['memory_out'] = self.model.memory_out
+
+ self.evaluate_rate = 1.0
+ self.update_rate = 0.5
+
+ def evaluate(self, brain_info):
+ """
+ Evaluates policy for the agent experiences provided.
+ :param brain_info: BrainInfo input to network.
+ :return: Results of evaluation.
+ """
+ feed_dict = {self.model.dropout_rate: self.evaluate_rate,
+ self.model.sequence_length: 1}
+
+ feed_dict = self._fill_eval_dict(feed_dict, brain_info)
+ if self.use_recurrent:
+ if brain_info.memories.shape[1] == 0:
+ brain_info.memories = self.make_empty_memory(len(brain_info.agents))
+ feed_dict[self.model.memory_in] = brain_info.memories
+ run_out = self._execute_model(feed_dict, self.inference_dict)
+ return run_out
+
+ def update(self, mini_batch, num_sequences):
+ """
+ Performs update on model.
+ :param mini_batch: Batch of experiences.
+ :param num_sequences: Number of sequences to process.
+ :return: Results of update.
+ """
+
+ feed_dict = {self.model.dropout_rate: self.update_rate,
+ self.model.batch_size: num_sequences,
+ self.model.sequence_length: self.sequence_length}
+ if self.use_continuous_act:
+ feed_dict[self.model.true_action] = mini_batch['actions']. \
+ reshape([-1, self.brain.vector_action_space_size[0]])
+ else:
+ feed_dict[self.model.true_action] = mini_batch['actions'].reshape(
+ [-1, len(self.brain.vector_action_space_size)])
+ feed_dict[self.model.action_masks] = np.ones(
+ (num_sequences, sum(self.brain.vector_action_space_size)))
+ if self.use_vec_obs:
+ apparent_obs_size = self.brain.vector_observation_space_size * \
+ self.brain.num_stacked_vector_observations
+ feed_dict[self.model.vector_in] = mini_batch['vector_obs'] \
+ .reshape([-1,apparent_obs_size])
+ for i, _ in enumerate(self.model.visual_in):
+ visual_obs = mini_batch['visual_obs%d' % i]
+ feed_dict[self.model.visual_in[i]] = visual_obs
+ if self.use_recurrent:
+ feed_dict[self.model.memory_in] = np.zeros([num_sequences, self.m_size])
+ run_out = self._execute_model(feed_dict, self.update_dict)
+ return run_out
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/bc/trainer.py b/animalai_packages/animalai_train/animalai_train/trainers/bc/trainer.py
new file mode 100644
index 00000000..dbd4d9c4
--- /dev/null
+++ b/animalai_packages/animalai_train/animalai_train/trainers/bc/trainer.py
@@ -0,0 +1,190 @@
+# # Unity ML-Agents Toolkit
+# ## ML-Agent Learning (Behavioral Cloning)
+# Contains an implementation of Behavioral Cloning Algorithm
+
+import logging
+import os
+
+import numpy as np
+import tensorflow as tf
+
+from animalai.envs import AllBrainInfo
+from animalai_train.trainers.bc.policy import BCPolicy
+from animalai_train.trainers.buffer import Buffer
+from animalai_train.trainers.trainer import Trainer
+
+logger = logging.getLogger("mlagents.trainers")
+
+
+class BCTrainer(Trainer):
+ """The BCTrainer is an implementation of Behavioral Cloning."""
+
+ def __init__(self, brain, trainer_parameters, training, load, seed, run_id):
+ """
+ Responsible for collecting experiences and training PPO model.
+ :param trainer_parameters: The parameters for the trainer (dictionary).
+ :param training: Whether the trainer is set for training.
+ :param load: Whether the model should be loaded.
+ :param seed: The seed the model will be initialized with
+ :param run_id: The The identifier of the current run
+ """
+ super(BCTrainer, self).__init__(brain, trainer_parameters, training, run_id)
+ self.policy = BCPolicy(seed, brain, trainer_parameters, load)
+ self.n_sequences = 1
+ self.cumulative_rewards = {}
+ self.episode_steps = {}
+ self.stats = {'Losses/Cloning Loss': [], 'Environment/Episode Length': [],
+ 'Environment/Cumulative Reward': []}
+
+ self.summary_path = trainer_parameters['summary_path']
+ self.batches_per_epoch = trainer_parameters['batches_per_epoch']
+ if not os.path.exists(self.summary_path):
+ os.makedirs(self.summary_path)
+
+ self.demonstration_buffer = Buffer()
+ self.evaluation_buffer = Buffer()
+ self.summary_writer = tf.summary.FileWriter(self.summary_path)
+
+ @property
+ def parameters(self):
+ """
+ Returns the trainer parameters of the trainer.
+ """
+ return self.trainer_parameters
+
+ @property
+ def get_max_steps(self):
+ """
+ Returns the maximum number of steps. Is used to know when the trainer should be stopped.
+ :return: The maximum number of steps of the trainer
+ """
+ return float(self.trainer_parameters['max_steps'])
+
+ @property
+ def get_step(self):
+ """
+ Returns the number of steps the trainer has performed
+ :return: the step count of the trainer
+ """
+ return self.policy.get_current_step()
+
+ @property
+ def get_last_reward(self):
+ """
+ Returns the last reward the trainer has had
+ :return: the new last reward
+ """
+ if len(self.stats['Environment/Cumulative Reward']) > 0:
+ return np.mean(self.stats['Environment/Cumulative Reward'])
+ else:
+ return 0
+
+ def increment_step_and_update_last_reward(self):
+ """
+ Increment the step count of the trainer and Updates the last reward
+ """
+ self.policy.increment_step()
+ return
+
+ def take_action(self, all_brain_info: AllBrainInfo):
+ """
+ Decides actions using policy given current brain info.
+ :param all_brain_info: AllBrainInfo from environment.
+ :return: a tuple containing action, memories, values and an object
+ to be passed to add experiences
+ """
+ if len(all_brain_info[self.brain_name].agents) == 0:
+ return [], [], [], None, None
+
+ agent_brain = all_brain_info[self.brain_name]
+ run_out = self.policy.evaluate(agent_brain)
+ if self.policy.use_recurrent:
+ return run_out['action'], run_out['memory_out'], None, None, None
+ else:
+ return run_out['action'], None, None, None, None
+
+ def add_experiences(self, curr_info: AllBrainInfo, next_info: AllBrainInfo,
+ take_action_outputs):
+ """
+ Adds experiences to each agent's experience history.
+ :param curr_info: Current AllBrainInfo (Dictionary of all current brains and corresponding BrainInfo).
+ :param next_info: Next AllBrainInfo (Dictionary of all current brains and corresponding BrainInfo).
+ :param take_action_outputs: The outputs of the take action method.
+ """
+
+ # Used to collect information about student performance.
+ info_student = curr_info[self.brain_name]
+ next_info_student = next_info[self.brain_name]
+ for agent_id in info_student.agents:
+ self.evaluation_buffer[agent_id].last_brain_info = info_student
+
+ for agent_id in next_info_student.agents:
+ stored_info_student = self.evaluation_buffer[agent_id].last_brain_info
+ if stored_info_student is None:
+ continue
+ else:
+ next_idx = next_info_student.agents.index(agent_id)
+ if agent_id not in self.cumulative_rewards:
+ self.cumulative_rewards[agent_id] = 0
+ self.cumulative_rewards[agent_id] += next_info_student.rewards[next_idx]
+ if not next_info_student.local_done[next_idx]:
+ if agent_id not in self.episode_steps:
+ self.episode_steps[agent_id] = 0
+ self.episode_steps[agent_id] += 1
+
+ def process_experiences(self, current_info: AllBrainInfo, next_info: AllBrainInfo):
+ """
+ Checks agent histories for processing condition, and processes them as necessary.
+ Processing involves calculating value and advantage targets for model updating step.
+ :param current_info: Current AllBrainInfo
+ :param next_info: Next AllBrainInfo
+ """
+ info_student = next_info[self.brain_name]
+ for l in range(len(info_student.agents)):
+ if info_student.local_done[l]:
+ agent_id = info_student.agents[l]
+ self.stats['Environment/Cumulative Reward'].append(
+ self.cumulative_rewards.get(agent_id, 0))
+ self.stats['Environment/Episode Length'].append(
+ self.episode_steps.get(agent_id, 0))
+ self.cumulative_rewards[agent_id] = 0
+ self.episode_steps[agent_id] = 0
+
+ def end_episode(self):
+ """
+ A signal that the Episode has ended. The buffer must be reset.
+ Get only called when the academy resets.
+ """
+ self.evaluation_buffer.reset_local_buffers()
+ for agent_id in self.cumulative_rewards:
+ self.cumulative_rewards[agent_id] = 0
+ for agent_id in self.episode_steps:
+ self.episode_steps[agent_id] = 0
+
+ def is_ready_update(self):
+ """
+ Returns whether or not the trainer has enough elements to run update model
+ :return: A boolean corresponding to whether or not update_model() can be run
+ """
+ return len(self.demonstration_buffer.update_buffer['actions']) > self.n_sequences
+
+ def update_policy(self):
+ """
+ Updates the policy.
+ """
+ self.demonstration_buffer.update_buffer.shuffle()
+ batch_losses = []
+ num_batches = min(len(self.demonstration_buffer.update_buffer['actions']) //
+ self.n_sequences, self.batches_per_epoch)
+ for i in range(num_batches):
+ update_buffer = self.demonstration_buffer.update_buffer
+ start = i * self.n_sequences
+ end = (i + 1) * self.n_sequences
+ mini_batch = update_buffer.make_mini_batch(start, end)
+ run_out = self.policy.update(mini_batch, self.n_sequences)
+ loss = run_out['policy_loss']
+ batch_losses.append(loss)
+ if len(batch_losses) > 0:
+ self.stats['Losses/Cloning Loss'].append(np.mean(batch_losses))
+ else:
+ self.stats['Losses/Cloning Loss'].append(0)
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/buffer.py b/animalai_packages/animalai_train/animalai_train/trainers/buffer.py
new file mode 100644
index 00000000..ff2f0b88
--- /dev/null
+++ b/animalai_packages/animalai_train/animalai_train/trainers/buffer.py
@@ -0,0 +1,255 @@
+import numpy as np
+
+from animalai.envs.exception import UnityException
+
+
+class BufferException(UnityException):
+ """
+ Related to errors with the Buffer.
+ """
+ pass
+
+
+class Buffer(dict):
+ """
+ Buffer contains a dictionary of AgentBuffer. The AgentBuffers are indexed by agent_id.
+ Buffer also contains an update_buffer that corresponds to the buffer used when updating the model.
+ """
+
+ class AgentBuffer(dict):
+ """
+ AgentBuffer contains a dictionary of AgentBufferFields. Each agent has his own AgentBuffer.
+ The keys correspond to the name of the field. Example: state, action
+ """
+
+ class AgentBufferField(list):
+ """
+ AgentBufferField is a list of numpy arrays. When an agent collects a field, you can add it to his
+ AgentBufferField with the append method.
+ """
+
+ def __init__(self):
+ self.padding_value = 0
+ super(Buffer.AgentBuffer.AgentBufferField, self).__init__()
+
+ def __str__(self):
+ return str(np.array(self).shape)
+
+ def append(self, element, padding_value=0):
+ """
+ Adds an element to this list. Also lets you change the padding
+ type, so that it can be set on append (e.g. action_masks should
+ be padded with 1.)
+ :param element: The element to append to the list.
+ :param padding_value: The value used to pad when get_batch is called.
+ """
+ super(Buffer.AgentBuffer.AgentBufferField, self).append(element)
+ self.padding_value = padding_value
+
+ def extend(self, data):
+ """
+ Adds a list of np.arrays to the end of the list of np.arrays.
+ :param data: The np.array list to append.
+ """
+ self += list(np.array(data))
+
+ def set(self, data):
+ """
+ Sets the list of np.array to the input data
+ :param data: The np.array list to be set.
+ """
+ self[:] = []
+ self[:] = list(np.array(data))
+
+ def get_batch(self, batch_size=None, training_length=1, sequential=True):
+ """
+ Retrieve the last batch_size elements of length training_length
+ from the list of np.array
+ :param batch_size: The number of elements to retrieve. If None:
+ All elements will be retrieved.
+ :param training_length: The length of the sequence to be retrieved. If
+ None: only takes one element.
+ :param sequential: If true and training_length is not None: the elements
+ will not repeat in the sequence. [a,b,c,d,e] with training_length = 2 and
+ sequential=True gives [[0,a],[b,c],[d,e]]. If sequential=False gives
+ [[a,b],[b,c],[c,d],[d,e]]
+ """
+ if training_length == 1:
+ # When the training length is 1, the method returns a list of elements,
+ # not a list of sequences of elements.
+ if batch_size is None:
+ # If batch_size is None : All the elements of the AgentBufferField are returned.
+ return np.array(self)
+ else:
+ # return the batch_size last elements
+ if batch_size > len(self):
+ raise BufferException("Batch size requested is too large")
+ return np.array(self[-batch_size:])
+ else:
+ # The training_length is not None, the method returns a list of SEQUENCES of elements
+ if not sequential:
+ # The sequences will have overlapping elements
+ if batch_size is None:
+ # retrieve the maximum number of elements
+ batch_size = len(self) - training_length + 1
+ # The number of sequences of length training_length taken from a list of len(self) elements
+ # with overlapping is equal to batch_size
+ if (len(self) - training_length + 1) < batch_size:
+ raise BufferException("The batch size and training length requested for get_batch where"
+ " too large given the current number of data points.")
+ tmp_list = []
+ for end in range(len(self) - batch_size + 1, len(self) + 1):
+ tmp_list += [np.array(self[end - training_length:end])]
+ return np.array(tmp_list)
+ if sequential:
+ # The sequences will not have overlapping elements (this involves padding)
+ leftover = len(self) % training_length
+ # leftover is the number of elements in the first sequence (this sequence might need 0 padding)
+ if batch_size is None:
+ # retrieve the maximum number of elements
+ batch_size = len(self) // training_length + 1 * (leftover != 0)
+ # The maximum number of sequences taken from a list of length len(self) without overlapping
+ # with padding is equal to batch_size
+ if batch_size > (len(self) // training_length + 1 * (leftover != 0)):
+ raise BufferException("The batch size and training length requested for get_batch where"
+ " too large given the current number of data points.")
+ tmp_list = []
+ padding = np.array(self[-1]) * self.padding_value
+ # The padding is made with zeros and its shape is given by the shape of the last element
+ for end in range(len(self), len(self) % training_length, -training_length)[:batch_size]:
+ tmp_list += [np.array(self[end - training_length:end])]
+ if (leftover != 0) and (len(tmp_list) < batch_size):
+ tmp_list += [np.array([padding] * (training_length - leftover) + self[:leftover])]
+ tmp_list.reverse()
+ return np.array(tmp_list)
+
+ def reset_field(self):
+ """
+ Resets the AgentBufferField
+ """
+ self[:] = []
+
+ def __init__(self):
+ self.last_brain_info = None
+ self.last_take_action_outputs = None
+ super(Buffer.AgentBuffer, self).__init__()
+
+ def __str__(self):
+ return ", ".join(["'{0}' : {1}".format(k, str(self[k])) for k in self.keys()])
+
+ def reset_agent(self):
+ """
+ Resets the AgentBuffer
+ """
+ for k in self.keys():
+ self[k].reset_field()
+ self.last_brain_info = None
+ self.last_take_action_outputs = None
+
+ def __getitem__(self, key):
+ if key not in self.keys():
+ self[key] = self.AgentBufferField()
+ return super(Buffer.AgentBuffer, self).__getitem__(key)
+
+ def check_length(self, key_list):
+ """
+ Some methods will require that some fields have the same length.
+ check_length will return true if the fields in key_list
+ have the same length.
+ :param key_list: The fields which length will be compared
+ """
+ if len(key_list) < 2:
+ return True
+ l = None
+ for key in key_list:
+ if key not in self.keys():
+ return False
+ if (l is not None) and (l != len(self[key])):
+ return False
+ l = len(self[key])
+ return True
+
+ def shuffle(self, key_list=None):
+ """
+ Shuffles the fields in key_list in a consistent way: The reordering will
+ Shuffles the fields in key_list in a consistent way: The reordering will
+ be the same across fields.
+ :param key_list: The fields that must be shuffled.
+ """
+ if key_list is None:
+ key_list = list(self.keys())
+ if not self.check_length(key_list):
+ raise BufferException("Unable to shuffle if the fields are not of same length")
+ s = np.arange(len(self[key_list[0]]))
+ np.random.shuffle(s)
+ for key in key_list:
+ self[key][:] = [self[key][i] for i in s]
+
+ def make_mini_batch(self, start, end):
+ """
+ Creates a mini-batch from buffer.
+ :param start: Starting index of buffer.
+ :param end: Ending index of buffer.
+ :return: Dict of mini batch.
+ """
+ mini_batch = {}
+ for key in self:
+ mini_batch[key] = np.array(self[key][start:end])
+ return mini_batch
+
+ def __init__(self):
+ self.update_buffer = self.AgentBuffer()
+ super(Buffer, self).__init__()
+
+ def __str__(self):
+ return "update buffer :\n\t{0}\nlocal_buffers :\n{1}".format(str(self.update_buffer),
+ '\n'.join(
+ ['\tagent {0} :{1}'.format(k, str(self[k])) for
+ k in self.keys()]))
+
+ def __getitem__(self, key):
+ if key not in self.keys():
+ self[key] = self.AgentBuffer()
+ return super(Buffer, self).__getitem__(key)
+
+ def reset_update_buffer(self):
+ """
+ Resets the update buffer
+ """
+ self.update_buffer.reset_agent()
+
+ def reset_local_buffers(self):
+ """
+ Resets all the local local_buffers
+ """
+ agent_ids = list(self.keys())
+ for k in agent_ids:
+ self[k].reset_agent()
+
+ def append_update_buffer(self, agent_id, key_list=None, batch_size=None, training_length=None):
+ """
+ Appends the buffer of an agent to the update buffer.
+ :param agent_id: The id of the agent which data will be appended
+ :param key_list: The fields that must be added. If None: all fields will be appended.
+ :param batch_size: The number of elements that must be appended. If None: All of them will be.
+ :param training_length: The length of the samples that must be appended. If None: only takes one element.
+ """
+ if key_list is None:
+ key_list = self[agent_id].keys()
+ if not self[agent_id].check_length(key_list):
+ raise BufferException("The length of the fields {0} for agent {1} where not of same length"
+ .format(key_list, agent_id))
+ for field_key in key_list:
+ self.update_buffer[field_key].extend(
+ self[agent_id][field_key].get_batch(batch_size=batch_size, training_length=training_length)
+ )
+
+ def append_all_agent_batch_to_update_buffer(self, key_list=None, batch_size=None, training_length=None):
+ """
+ Appends the buffer of all agents to the update buffer.
+ :param key_list: The fields that must be added. If None: all fields will be appended.
+ :param batch_size: The number of elements that must be appended. If None: All of them will be.
+ :param training_length: The length of the samples that must be appended. If None: only takes one element.
+ """
+ for agent_id in self.keys():
+ self.append_update_buffer(agent_id, key_list, batch_size, training_length)
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/curriculum.py b/animalai_packages/animalai_train/animalai_train/trainers/curriculum.py
new file mode 100644
index 00000000..3ec0859f
--- /dev/null
+++ b/animalai_packages/animalai_train/animalai_train/trainers/curriculum.py
@@ -0,0 +1,112 @@
+import os
+import json
+import math
+
+from .exception import CurriculumError
+
+import logging
+
+logger = logging.getLogger('mlagents.trainers')
+
+
+class Curriculum(object):
+ def __init__(self, location, default_reset_parameters):
+ """
+ Initializes a Curriculum object.
+ :param location: Path to JSON defining curriculum.
+ :param default_reset_parameters: Set of reset parameters for
+ environment.
+ """
+ self.max_lesson_num = 0
+ self.measure = None
+ self._lesson_num = 0
+ # The name of the brain should be the basename of the file without the
+ # extension.
+ self._brain_name = os.path.basename(location).split('.')[0]
+
+ try:
+ with open(location) as data_file:
+ self.data = json.load(data_file)
+ except IOError:
+ raise CurriculumError(
+ 'The file {0} could not be found.'.format(location))
+ except UnicodeDecodeError:
+ raise CurriculumError('There was an error decoding {}'
+ .format(location))
+ self.smoothing_value = 0
+ for key in ['parameters', 'measure', 'thresholds',
+ 'min_lesson_length', 'signal_smoothing']:
+ if key not in self.data:
+ raise CurriculumError("{0} does not contain a "
+ "{1} field."
+ .format(location, key))
+ self.smoothing_value = 0
+ self.measure = self.data['measure']
+ self.min_lesson_length = self.data['min_lesson_length']
+ self.max_lesson_num = len(self.data['thresholds'])
+
+ parameters = self.data['parameters']
+ for key in parameters:
+ if key not in default_reset_parameters:
+ raise CurriculumError(
+ 'The parameter {0} in Curriculum {1} is not present in '
+ 'the Environment'.format(key, location))
+ if len(parameters[key]) != self.max_lesson_num + 1:
+ raise CurriculumError(
+ 'The parameter {0} in Curriculum {1} must have {2} values '
+ 'but {3} were found'.format(key, location,
+ self.max_lesson_num + 1,
+ len(parameters[key])))
+
+ @property
+ def lesson_num(self):
+ return self._lesson_num
+
+ @lesson_num.setter
+ def lesson_num(self, lesson_num):
+ self._lesson_num = max(0, min(lesson_num, self.max_lesson_num))
+
+ def increment_lesson(self, measure_val):
+ """
+ Increments the lesson number depending on the progress given.
+ :param measure_val: Measure of progress (either reward or percentage
+ steps completed).
+ :return Whether the lesson was incremented.
+ """
+ if not self.data or not measure_val or math.isnan(measure_val):
+ return False
+ if self.data['signal_smoothing']:
+ measure_val = self.smoothing_value * 0.25 + 0.75 * measure_val
+ self.smoothing_value = measure_val
+ if self.lesson_num < self.max_lesson_num:
+ if measure_val > self.data['thresholds'][self.lesson_num]:
+ self.lesson_num += 1
+ config = {}
+ parameters = self.data['parameters']
+ for key in parameters:
+ config[key] = parameters[key][self.lesson_num]
+ logger.info('{0} lesson changed. Now in lesson {1}: {2}'
+ .format(self._brain_name,
+ self.lesson_num,
+ ', '.join([str(x) + ' -> ' + str(config[x])
+ for x in config])))
+ return True
+ return False
+
+ def get_config(self, lesson=None):
+ """
+ Returns reset parameters which correspond to the lesson.
+ :param lesson: The lesson you want to get the config of. If None, the
+ current lesson is returned.
+ :return: The configuration of the reset parameters.
+ """
+ if not self.data:
+ return {}
+ if lesson is None:
+ lesson = self.lesson_num
+ lesson = max(0, min(lesson, self.max_lesson_num))
+ config = {}
+ parameters = self.data['parameters']
+ for key in parameters:
+ config[key] = parameters[key][lesson]
+ return config
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/demo_loader.py b/animalai_packages/animalai_train/animalai_train/trainers/demo_loader.py
new file mode 100644
index 00000000..173615e4
--- /dev/null
+++ b/animalai_packages/animalai_train/animalai_train/trainers/demo_loader.py
@@ -0,0 +1,94 @@
+import pathlib
+import logging
+import os
+from animalai_train.trainers.buffer import Buffer
+from animalai.envs.brain import BrainParameters, BrainInfo
+from animalai.communicator_objects import *
+from google.protobuf.internal.decoder import _DecodeVarint32
+
+logger = logging.getLogger("mlagents.trainers")
+
+
+def make_demo_buffer(brain_infos, brain_params, sequence_length):
+ # Create and populate buffer using experiences
+ demo_buffer = Buffer()
+ for idx, experience in enumerate(brain_infos):
+ if idx > len(brain_infos) - 2:
+ break
+ current_brain_info = brain_infos[idx]
+ next_brain_info = brain_infos[idx + 1]
+ demo_buffer[0].last_brain_info = current_brain_info
+ demo_buffer[0]['done'].append(next_brain_info.local_done[0])
+ demo_buffer[0]['rewards'].append(next_brain_info.rewards[0])
+ for i in range(brain_params.number_visual_observations):
+ demo_buffer[0]['visual_obs%d' % i] \
+ .append(current_brain_info.visual_observations[i][0])
+ if brain_params.vector_observation_space_size > 0:
+ demo_buffer[0]['vector_obs'] \
+ .append(current_brain_info.vector_observations[0])
+ demo_buffer[0]['actions'].append(next_brain_info.previous_vector_actions[0])
+ if next_brain_info.local_done[0]:
+ demo_buffer.append_update_buffer(0, batch_size=None,
+ training_length=sequence_length)
+ demo_buffer.reset_local_buffers()
+ demo_buffer.append_update_buffer(0, batch_size=None,
+ training_length=sequence_length)
+ return demo_buffer
+
+
+def demo_to_buffer(file_path, sequence_length):
+ """
+ Loads demonstration file and uses it to fill training buffer.
+ :param file_path: Location of demonstration file (.demo).
+ :param sequence_length: Length of trajectories to fill buffer.
+ :return:
+ """
+ brain_params, brain_infos, _ = load_demonstration(file_path)
+ demo_buffer = make_demo_buffer(brain_infos, brain_params, sequence_length)
+ return brain_params, demo_buffer
+
+
+def load_demonstration(file_path):
+ """
+ Loads and parses a demonstration file.
+ :param file_path: Location of demonstration file (.demo).
+ :return: BrainParameter and list of BrainInfos containing demonstration data.
+ """
+
+ # First 32 bytes of file dedicated to meta-data.
+ INITIAL_POS = 33
+
+ if not os.path.isfile(file_path):
+ raise FileNotFoundError("The demonstration file {} does not exist.".format(file_path))
+ file_extension = pathlib.Path(file_path).suffix
+ if file_extension != '.demo':
+ raise ValueError("The file is not a '.demo' file. Please provide a file with the "
+ "correct extension.")
+
+ brain_params = None
+ brain_infos = []
+ data = open(file_path, "rb").read()
+ next_pos, pos, obs_decoded = 0, 0, 0
+ total_expected = 0
+ while pos < len(data):
+ next_pos, pos = _DecodeVarint32(data, pos)
+ if obs_decoded == 0:
+ meta_data_proto = DemonstrationMetaProto()
+ meta_data_proto.ParseFromString(data[pos:pos + next_pos])
+ total_expected = meta_data_proto.number_steps
+ pos = INITIAL_POS
+ if obs_decoded == 1:
+ brain_param_proto = BrainParametersProto()
+ brain_param_proto.ParseFromString(data[pos:pos + next_pos])
+ brain_params = BrainParameters.from_proto(brain_param_proto)
+ pos += next_pos
+ if obs_decoded > 1:
+ agent_info = AgentInfoProto()
+ agent_info.ParseFromString(data[pos:pos + next_pos])
+ brain_info = BrainInfo.from_agent_proto([agent_info], brain_params)
+ brain_infos.append(brain_info)
+ if len(brain_infos) == total_expected:
+ break
+ pos += next_pos
+ obs_decoded += 1
+ return brain_params, brain_infos, total_expected
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/exception.py b/animalai_packages/animalai_train/animalai_train/trainers/exception.py
new file mode 100644
index 00000000..b2f0a0db
--- /dev/null
+++ b/animalai_packages/animalai_train/animalai_train/trainers/exception.py
@@ -0,0 +1,20 @@
+"""
+Contains exceptions for the trainers package.
+"""
+
+class TrainerError(Exception):
+ """
+ Any error related to the trainers in the ML-Agents Toolkit.
+ """
+ pass
+
+class CurriculumError(TrainerError):
+ """
+ Any error related to training with a curriculum.
+ """
+ pass
+
+class MetaCurriculumError(TrainerError):
+ """
+ Any error related to the configuration of a metacurriculum.
+ """
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/learn.py b/animalai_packages/animalai_train/animalai_train/trainers/learn.py
new file mode 100644
index 00000000..66050d9e
--- /dev/null
+++ b/animalai_packages/animalai_train/animalai_train/trainers/learn.py
@@ -0,0 +1,249 @@
+# # Unity ML-Agents Toolkit
+
+import logging
+
+from multiprocessing import Process, Queue
+import os
+import glob
+import shutil
+import numpy as np
+import yaml
+from docopt import docopt
+from typing import Optional
+
+
+from animalai_train.trainers.trainer_controller import TrainerController
+from animalai_train.trainers.exception import TrainerError
+from animalai_train.trainers import MetaCurriculumError, MetaCurriculum
+from animalai.envs import UnityEnvironment
+from animalai.envs.exception import UnityEnvironmentException
+
+
+def run_training(sub_id: int, run_seed: int, run_options, process_queue):
+ """
+ Launches training session.
+ :param process_queue: Queue used to send signal back to main.
+ :param sub_id: Unique id for training session.
+ :param run_seed: Random seed used for training.
+ :param run_options: Command line arguments for training.
+ """
+ # Docker Parameters
+ docker_target_name = (run_options['--docker-target-name']
+ if run_options['--docker-target-name'] != 'None' else None)
+
+ # General parameters
+ env_path = (run_options['--env']
+ if run_options['--env'] != 'None' else None)
+ run_id = run_options['--run-id']
+ load_model = run_options['--load']
+ train_model = run_options['--train']
+ save_freq = int(run_options['--save-freq'])
+ keep_checkpoints = int(run_options['--keep-checkpoints'])
+ worker_id = int(run_options['--worker-id'])
+ curriculum_folder = (run_options['--curriculum']
+ if run_options['--curriculum'] != 'None' else None)
+ lesson = int(run_options['--lesson'])
+ fast_simulation = not bool(run_options['--slow'])
+ no_graphics = run_options['--no-graphics']
+ trainer_config_path = run_options['']
+
+ # Recognize and use docker volume if one is passed as an argument
+ if not docker_target_name:
+ model_path = './models/{run_id}'.format(run_id=run_id)
+ summaries_dir = './summaries'
+ else:
+ trainer_config_path = \
+ '/{docker_target_name}/{trainer_config_path}'.format(
+ docker_target_name=docker_target_name,
+ trainer_config_path=trainer_config_path)
+ if curriculum_folder is not None:
+ curriculum_folder = \
+ '/{docker_target_name}/{curriculum_folder}'.format(
+ docker_target_name=docker_target_name,
+ curriculum_folder=curriculum_folder)
+ model_path = '/{docker_target_name}/models/{run_id}'.format(
+ docker_target_name=docker_target_name,
+ run_id=run_id)
+ summaries_dir = '/{docker_target_name}/summaries'.format(
+ docker_target_name=docker_target_name)
+
+ trainer_config = load_config(trainer_config_path)
+ env = init_environment(env_path, docker_target_name, no_graphics, worker_id + sub_id, fast_simulation, run_seed)
+ maybe_meta_curriculum = try_create_meta_curriculum(curriculum_folder, env)
+
+ external_brains = {}
+ for brain_name in env.external_brain_names:
+ external_brains[brain_name] = env.brains[brain_name]
+
+ # Create controller and begin training.
+ tc = TrainerController(model_path, summaries_dir, run_id + '-' + str(sub_id),
+ save_freq, maybe_meta_curriculum,
+ load_model, train_model,
+ keep_checkpoints, lesson, external_brains, run_seed)
+
+ # Signal that environment has been launched.
+ process_queue.put(True)
+
+ # Begin training
+ tc.start_learning(env, trainer_config)
+
+
+def try_create_meta_curriculum(curriculum_folder: Optional[str], env: UnityEnvironment) -> Optional[MetaCurriculum]:
+ if curriculum_folder is None:
+ return None
+ else:
+ meta_curriculum = MetaCurriculum(curriculum_folder, env._resetParameters)
+ if meta_curriculum:
+ for brain_name in meta_curriculum.brains_to_curriculums.keys():
+ if brain_name not in env.external_brain_names:
+ raise MetaCurriculumError('One of the curricula '
+ 'defined in ' +
+ curriculum_folder + ' '
+ 'does not have a corresponding '
+ 'Brain. Check that the '
+ 'curriculum file has the same '
+ 'name as the Brain '
+ 'whose curriculum it defines.')
+ return meta_curriculum
+
+
+def prepare_for_docker_run(docker_target_name, env_path):
+ for f in glob.glob('/{docker_target_name}/*'.format(
+ docker_target_name=docker_target_name)):
+ if env_path in f:
+ try:
+ b = os.path.basename(f)
+ if os.path.isdir(f):
+ shutil.copytree(f,
+ '/ml-agents/{b}'.format(b=b))
+ else:
+ src_f = '/{docker_target_name}/{b}'.format(
+ docker_target_name=docker_target_name, b=b)
+ dst_f = '/ml-agents/{b}'.format(b=b)
+ shutil.copyfile(src_f, dst_f)
+ os.chmod(dst_f, 0o775) # Make executable
+ except Exception as e:
+ logging.getLogger('mlagents.trainers').info(e)
+ env_path = '/ml-agents/{env_path}'.format(env_path=env_path)
+ return env_path
+
+
+def load_config(trainer_config_path):
+ try:
+ with open(trainer_config_path) as data_file:
+ trainer_config = yaml.load(data_file)
+ return trainer_config
+ except IOError:
+ raise UnityEnvironmentException('Parameter file could not be found '
+ 'at {}.'
+ .format(trainer_config_path))
+ except UnicodeDecodeError:
+ raise UnityEnvironmentException('There was an error decoding '
+ 'Trainer Config from this path : {}'
+ .format(trainer_config_path))
+
+
+def init_environment(env_path, docker_target_name, no_graphics, worker_id, fast_simulation, seed):
+ if env_path is not None:
+ # Strip out executable extensions if passed
+ env_path = (env_path.strip()
+ .replace('.app', '')
+ .replace('.exe', '')
+ .replace('.x86_64', '')
+ .replace('.x86', ''))
+ docker_training = docker_target_name is not None
+ if docker_training and env_path is not None:
+ """
+ Comments for future maintenance:
+ Some OS/VM instances (e.g. COS GCP Image) mount filesystems
+ with COS flag which prevents execution of the Unity scene,
+ to get around this, we will copy the executable into the
+ container.
+ """
+ # Navigate in docker path and find env_path and copy it.
+ env_path = prepare_for_docker_run(docker_target_name,
+ env_path)
+ return UnityEnvironment(
+ file_name=env_path,
+ worker_id=worker_id,
+ seed=seed,
+ docker_training=docker_training,
+ no_graphics=no_graphics
+ )
+
+
+def main():
+ try:
+ print('''
+
+ ▄▄▄▓▓▓▓
+ ╓▓▓▓▓▓▓█▓▓▓▓▓
+ ,▄▄▄m▀▀▀' ,▓▓▓▀▓▓▄ ▓▓▓ ▓▓▌
+ ▄▓▓▓▀' ▄▓▓▀ ▓▓▓ ▄▄ ▄▄ ,▄▄ ▄▄▄▄ ,▄▄ ▄▓▓▌▄ ▄▄▄ ,▄▄
+ ▄▓▓▓▀ ▄▓▓▀ ▐▓▓▌ ▓▓▌ ▐▓▓ ▐▓▓▓▀▀▀▓▓▌ ▓▓▓ ▀▓▓▌▀ ^▓▓▌ ╒▓▓▌
+ ▄▓▓▓▓▓▄▄▄▄▄▄▄▄▓▓▓ ▓▀ ▓▓▌ ▐▓▓ ▐▓▓ ▓▓▓ ▓▓▓ ▓▓▌ ▐▓▓▄ ▓▓▌
+ ▀▓▓▓▓▀▀▀▀▀▀▀▀▀▀▓▓▄ ▓▓ ▓▓▌ ▐▓▓ ▐▓▓ ▓▓▓ ▓▓▓ ▓▓▌ ▐▓▓▐▓▓
+ ^█▓▓▓ ▀▓▓▄ ▐▓▓▌ ▓▓▓▓▄▓▓▓▓ ▐▓▓ ▓▓▓ ▓▓▓ ▓▓▓▄ ▓▓▓▓`
+ '▀▓▓▓▄ ^▓▓▓ ▓▓▓ └▀▀▀▀ ▀▀ ^▀▀ `▀▀ `▀▀ '▀▀ ▐▓▓▌
+ ▀▀▀▀▓▄▄▄ ▓▓▓▓▓▓, ▓▓▓▓▀
+ `▀█▓▓▓▓▓▓▓▓▓▌
+ ¬`▀▀▀█▓
+
+ ''')
+ except:
+ print('\n\n\tUnity Technologies\n')
+
+ logger = logging.getLogger('mlagents.trainers')
+ _USAGE = '''
+ Usage:
+ mlagents-learn [options]
+ mlagents-learn --help
+
+ Options:
+ --env= Name of the Unity executable [default: None].
+ --curriculum= Curriculum json directory for environment [default: None].
+ --keep-checkpoints= How many model checkpoints to keep [default: 5].
+ --lesson= Start learning from this lesson [default: 0].
+ --load Whether to load the model or randomly initialize [default: False].
+ --run-id= The directory name for model and summary statistics [default: ppo].
+ --num-runs= Number of concurrent training sessions [default: 1].
+ --save-freq= Frequency at which to save model [default: 50000].
+ --seed= Random seed used for training [default: -1].
+ --slow Whether to run the game at training speed [default: False].
+ --train Whether to train model, or only run inference [default: False].
+ --worker-id= Number to add to communication port (5005) [default: 0].
+ --docker-target-name= Docker volume to store training-specific files [default: None].
+ --no-graphics Whether to run the environment in no-graphics mode [default: False].
+ '''
+
+ options = docopt(_USAGE)
+ logger.info(options)
+ num_runs = int(options['--num-runs'])
+ seed = int(options['--seed'])
+
+ if options['--env'] == 'None' and num_runs > 1:
+ raise TrainerError('It is not possible to launch more than one concurrent training session '
+ 'when training from the editor.')
+
+ jobs = []
+ run_seed = seed
+
+ if num_runs == 1:
+ if seed == -1:
+ run_seed = np.random.randint(0, 10000)
+ run_training(0, run_seed, options, Queue())
+ else:
+ for i in range(num_runs):
+ if seed == -1:
+ run_seed = np.random.randint(0, 10000)
+ process_queue = Queue()
+ p = Process(target=run_training, args=(i, run_seed, options, process_queue))
+ jobs.append(p)
+ p.start()
+ # Wait for signal that environment has successfully launched
+ while process_queue.get() is not True:
+ continue
+
+# For python debugger to directly run this script
+if __name__ == "__main__":
+ main()
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/meta_curriculum.py b/animalai_packages/animalai_train/animalai_train/trainers/meta_curriculum.py
new file mode 100644
index 00000000..9809a887
--- /dev/null
+++ b/animalai_packages/animalai_train/animalai_train/trainers/meta_curriculum.py
@@ -0,0 +1,147 @@
+"""Contains the MetaCurriculum class."""
+
+import os
+from animalai_train.trainers.curriculum import Curriculum
+from animalai_train.trainers.exception import MetaCurriculumError
+
+import logging
+
+logger = logging.getLogger('mlagents.trainers')
+
+
+class MetaCurriculum(object):
+ """A MetaCurriculum holds curriculums. Each curriculum is associated to a
+ particular brain in the environment.
+ """
+
+ def __init__(self, curriculum_folder, default_reset_parameters):
+ """Initializes a MetaCurriculum object.
+
+ Args:
+ curriculum_folder (str): The relative or absolute path of the
+ folder which holds the curriculums for this environment.
+ The folder should contain JSON files whose names are the
+ brains that the curriculums belong to.
+ default_reset_parameters (dict): The default reset parameters
+ of the environment.
+ """
+ used_reset_parameters = set()
+ self._brains_to_curriculums = {}
+
+ try:
+ for curriculum_filename in os.listdir(curriculum_folder):
+ brain_name = curriculum_filename.split('.')[0]
+ curriculum_filepath = \
+ os.path.join(curriculum_folder, curriculum_filename)
+ curriculum = Curriculum(curriculum_filepath,
+ default_reset_parameters)
+
+ # Check if any two curriculums use the same reset params.
+ if any([(parameter in curriculum.get_config().keys())
+ for parameter in used_reset_parameters]):
+ logger.warning('Two or more curriculums will '
+ 'attempt to change the same reset '
+ 'parameter. The result will be '
+ 'non-deterministic.')
+
+ used_reset_parameters.update(curriculum.get_config().keys())
+ self._brains_to_curriculums[brain_name] = curriculum
+ except NotADirectoryError:
+ raise MetaCurriculumError(curriculum_folder + ' is not a '
+ 'directory. Refer to the ML-Agents '
+ 'curriculum learning docs.')
+
+
+ @property
+ def brains_to_curriculums(self):
+ """A dict from brain_name to the brain's curriculum."""
+ return self._brains_to_curriculums
+
+ @property
+ def lesson_nums(self):
+ """A dict from brain name to the brain's curriculum's lesson number."""
+ lesson_nums = {}
+ for brain_name, curriculum in self.brains_to_curriculums.items():
+ lesson_nums[brain_name] = curriculum.lesson_num
+
+ return lesson_nums
+
+ @lesson_nums.setter
+ def lesson_nums(self, lesson_nums):
+ for brain_name, lesson in lesson_nums.items():
+ self.brains_to_curriculums[brain_name].lesson_num = lesson
+
+ def _lesson_ready_to_increment(self, brain_name, reward_buff_size):
+ """Determines whether the curriculum of a specified brain is ready
+ to attempt an increment.
+
+ Args:
+ brain_name (str): The name of the brain whose curriculum will be
+ checked for readiness.
+ reward_buff_size (int): The size of the reward buffer of the trainer
+ that corresponds to the specified brain.
+
+ Returns:
+ Whether the curriculum of the specified brain should attempt to
+ increment its lesson.
+ """
+ return reward_buff_size >= (self.brains_to_curriculums[brain_name]
+ .min_lesson_length)
+
+ def increment_lessons(self, measure_vals, reward_buff_sizes=None):
+ """Attempts to increments all the lessons of all the curriculums in this
+ MetaCurriculum. Note that calling this method does not guarantee the
+ lesson of a curriculum will increment. The lesson of a curriculum will
+ only increment if the specified measure threshold defined in the
+ curriculum has been reached and the minimum number of episodes in the
+ lesson have been completed.
+
+ Args:
+ measure_vals (dict): A dict of brain name to measure value.
+ reward_buff_sizes (dict): A dict of brain names to the size of their
+ corresponding reward buffers.
+
+ Returns:
+ A dict from brain name to whether that brain's lesson number was
+ incremented.
+ """
+ ret = {}
+ if reward_buff_sizes:
+ for brain_name, buff_size in reward_buff_sizes.items():
+ if self._lesson_ready_to_increment(brain_name, buff_size):
+ measure_val = measure_vals[brain_name]
+ ret[brain_name] = (self.brains_to_curriculums[brain_name]
+ .increment_lesson(measure_val))
+ else:
+ for brain_name, measure_val in measure_vals.items():
+ ret[brain_name] = (self.brains_to_curriculums[brain_name]
+ .increment_lesson(measure_val))
+ return ret
+
+
+ def set_all_curriculums_to_lesson_num(self, lesson_num):
+ """Sets all the curriculums in this meta curriculum to a specified
+ lesson number.
+
+ Args:
+ lesson_num (int): The lesson number which all the curriculums will
+ be set to.
+ """
+ for _, curriculum in self.brains_to_curriculums.items():
+ curriculum.lesson_num = lesson_num
+
+
+ def get_config(self):
+ """Get the combined configuration of all curriculums in this
+ MetaCurriculum.
+
+ Returns:
+ A dict from parameter to value.
+ """
+ config = {}
+
+ for _, curriculum in self.brains_to_curriculums.items():
+ curr_config = curriculum.get_config()
+ config.update(curr_config)
+
+ return config
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/models.py b/animalai_packages/animalai_train/animalai_train/trainers/models.py
new file mode 100644
index 00000000..66c6cd71
--- /dev/null
+++ b/animalai_packages/animalai_train/animalai_train/trainers/models.py
@@ -0,0 +1,380 @@
+import logging
+
+import numpy as np
+import tensorflow as tf
+import tensorflow.contrib.layers as c_layers
+
+logger = logging.getLogger("mlagents.envs")
+
+
+class LearningModel(object):
+ _version_number_ = 2
+
+ def __init__(self, m_size, normalize, use_recurrent, brain, seed):
+ tf.set_random_seed(seed)
+ self.brain = brain
+ self.vector_in = None
+ self.global_step, self.increment_step = self.create_global_steps()
+ self.visual_in = []
+ self.batch_size = tf.placeholder(shape=None, dtype=tf.int32, name='batch_size')
+ self.sequence_length = tf.placeholder(shape=None, dtype=tf.int32, name='sequence_length')
+ self.mask_input = tf.placeholder(shape=[None], dtype=tf.float32, name='masks')
+ self.mask = tf.cast(self.mask_input, tf.int32)
+ self.use_recurrent = use_recurrent
+ if self.use_recurrent:
+ self.m_size = m_size
+ else:
+ self.m_size = 0
+ self.normalize = normalize
+ self.act_size = brain.vector_action_space_size
+ self.vec_obs_size = brain.vector_observation_space_size * \
+ brain.num_stacked_vector_observations
+ self.vis_obs_size = brain.number_visual_observations
+ tf.Variable(int(brain.vector_action_space_type == 'continuous'),
+ name='is_continuous_control', trainable=False, dtype=tf.int32)
+ tf.Variable(self._version_number_, name='version_number', trainable=False, dtype=tf.int32)
+ tf.Variable(self.m_size, name="memory_size", trainable=False, dtype=tf.int32)
+ if brain.vector_action_space_type == 'continuous':
+ tf.Variable(self.act_size[0], name="action_output_shape", trainable=False, dtype=tf.int32)
+ else:
+ tf.Variable(sum(self.act_size), name="action_output_shape", trainable=False, dtype=tf.int32)
+
+ @staticmethod
+ def create_global_steps():
+ """Creates TF ops to track and increment global training step."""
+ global_step = tf.Variable(0, name="global_step", trainable=False, dtype=tf.int32)
+ increment_step = tf.assign(global_step, tf.add(global_step, 1))
+ return global_step, increment_step
+
+ @staticmethod
+ def swish(input_activation):
+ """Swish activation function. For more info: https://arxiv.org/abs/1710.05941"""
+ return tf.multiply(input_activation, tf.nn.sigmoid(input_activation))
+
+ @staticmethod
+ def create_visual_input(camera_parameters, name):
+ """
+ Creates image input op.
+ :param camera_parameters: Parameters for visual observation from BrainInfo.
+ :param name: Desired name of input op.
+ :return: input op.
+ """
+ o_size_h = camera_parameters['height']
+ o_size_w = camera_parameters['width']
+ bw = camera_parameters['blackAndWhite']
+
+ if bw:
+ c_channels = 1
+ else:
+ c_channels = 3
+
+ visual_in = tf.placeholder(shape=[None, o_size_h, o_size_w, c_channels], dtype=tf.float32,
+ name=name)
+ return visual_in
+
+ def create_vector_input(self, name='vector_observation'):
+ """
+ Creates ops for vector observation input.
+ :param name: Name of the placeholder op.
+ :param vec_obs_size: Size of stacked vector observation.
+ :return:
+ """
+ self.vector_in = tf.placeholder(shape=[None, self.vec_obs_size], dtype=tf.float32,
+ name=name)
+ if self.normalize:
+ self.running_mean = tf.get_variable("running_mean", [self.vec_obs_size],
+ trainable=False, dtype=tf.float32,
+ initializer=tf.zeros_initializer())
+ self.running_variance = tf.get_variable("running_variance", [self.vec_obs_size],
+ trainable=False,
+ dtype=tf.float32,
+ initializer=tf.ones_initializer())
+ self.update_mean, self.update_variance = self.create_normalizer_update(self.vector_in)
+
+ self.normalized_state = tf.clip_by_value((self.vector_in - self.running_mean) / tf.sqrt(
+ self.running_variance / (tf.cast(self.global_step, tf.float32) + 1)), -5, 5,
+ name="normalized_state")
+ return self.normalized_state
+ else:
+ return self.vector_in
+
+ def create_normalizer_update(self, vector_input):
+ mean_current_observation = tf.reduce_mean(vector_input, axis=0)
+ new_mean = self.running_mean + (mean_current_observation - self.running_mean) / \
+ tf.cast(tf.add(self.global_step, 1), tf.float32)
+ new_variance = self.running_variance + (mean_current_observation - new_mean) * \
+ (mean_current_observation - self.running_mean)
+ update_mean = tf.assign(self.running_mean, new_mean)
+ update_variance = tf.assign(self.running_variance, new_variance)
+ return update_mean, update_variance
+
+ @staticmethod
+ def create_vector_observation_encoder(observation_input, h_size, activation, num_layers, scope,
+ reuse):
+ """
+ Builds a set of hidden state encoders.
+ :param reuse: Whether to re-use the weights within the same scope.
+ :param scope: Graph scope for the encoder ops.
+ :param observation_input: Input vector.
+ :param h_size: Hidden layer size.
+ :param activation: What type of activation function to use for layers.
+ :param num_layers: number of hidden layers to create.
+ :return: List of hidden layer tensors.
+ """
+ with tf.variable_scope(scope):
+ hidden = observation_input
+ for i in range(num_layers):
+ hidden = tf.layers.dense(hidden, h_size, activation=activation, reuse=reuse,
+ name="hidden_{}".format(i),
+ kernel_initializer=c_layers.variance_scaling_initializer(
+ 1.0))
+ return hidden
+
+ def create_visual_observation_encoder(self, image_input, h_size, activation, num_layers, scope,
+ reuse):
+ """
+ Builds a set of visual (CNN) encoders.
+ :param reuse: Whether to re-use the weights within the same scope.
+ :param scope: The scope of the graph within which to create the ops.
+ :param image_input: The placeholder for the image input to use.
+ :param h_size: Hidden layer size.
+ :param activation: What type of activation function to use for layers.
+ :param num_layers: number of hidden layers to create.
+ :return: List of hidden layer tensors.
+ """
+ with tf.variable_scope(scope):
+ conv1 = tf.layers.conv2d(image_input, 16, kernel_size=[8, 8], strides=[4, 4],
+ activation=tf.nn.elu, reuse=reuse, name="conv_1")
+ conv2 = tf.layers.conv2d(conv1, 32, kernel_size=[4, 4], strides=[2, 2],
+ activation=tf.nn.elu, reuse=reuse, name="conv_2")
+ hidden = c_layers.flatten(conv2)
+
+ with tf.variable_scope(scope + '/' + 'flat_encoding'):
+ hidden_flat = self.create_vector_observation_encoder(hidden, h_size, activation,
+ num_layers, scope, reuse)
+ return hidden_flat
+
+ @staticmethod
+ def create_discrete_action_masking_layer(all_logits, action_masks, action_size):
+ """
+ Creates a masking layer for the discrete actions
+ :param all_logits: The concatenated unnormalized action probabilities for all branches
+ :param action_masks: The mask for the logits. Must be of dimension [None x total_number_of_action]
+ :param action_size: A list containing the number of possible actions for each branch
+ :return: The action output dimension [batch_size, num_branches] and the concatenated normalized logits
+ """
+ action_idx = [0] + list(np.cumsum(action_size))
+ branches_logits = [all_logits[:, action_idx[i]:action_idx[i + 1]] for i in range(len(action_size))]
+ branch_masks = [action_masks[:, action_idx[i]:action_idx[i + 1]] for i in range(len(action_size))]
+ raw_probs = [tf.multiply(tf.nn.softmax(branches_logits[k]) + 1.0e-10, branch_masks[k])
+ for k in range(len(action_size))]
+ normalized_probs = [
+ tf.divide(raw_probs[k], tf.reduce_sum(raw_probs[k], axis=1, keepdims=True))
+ for k in range(len(action_size))]
+ output = tf.concat([tf.multinomial(tf.log(normalized_probs[k]), 1) for k in range(len(action_size))], axis=1)
+ return output, tf.concat([tf.log(normalized_probs[k] + 1.0e-10) for k in range(len(action_size))], axis=1)
+
+ def create_observation_streams(self, num_streams, h_size, num_layers):
+ """
+ Creates encoding stream for observations.
+ :param num_streams: Number of streams to create.
+ :param h_size: Size of hidden linear layers in stream.
+ :param num_layers: Number of hidden linear layers in stream.
+ :return: List of encoded streams.
+ """
+ brain = self.brain
+ activation_fn = self.swish
+
+ self.visual_in = []
+ for i in range(brain.number_visual_observations):
+ visual_input = self.create_visual_input(brain.camera_resolutions[i],
+ name="visual_observation_" + str(i))
+ self.visual_in.append(visual_input)
+ vector_observation_input = self.create_vector_input()
+
+ final_hiddens = []
+ for i in range(num_streams):
+ visual_encoders = []
+ hidden_state, hidden_visual = None, None
+ if self.vis_obs_size > 0:
+ for j in range(brain.number_visual_observations):
+ encoded_visual = self.create_visual_observation_encoder(self.visual_in[j],
+ h_size,
+ activation_fn,
+ num_layers,
+ "main_graph_{}_encoder{}"
+ .format(i, j), False)
+ visual_encoders.append(encoded_visual)
+ hidden_visual = tf.concat(visual_encoders, axis=1)
+ if brain.vector_observation_space_size > 0:
+ hidden_state = self.create_vector_observation_encoder(vector_observation_input,
+ h_size, activation_fn,
+ num_layers,
+ "main_graph_{}".format(i),
+ False)
+ if hidden_state is not None and hidden_visual is not None:
+ final_hidden = tf.concat([hidden_visual, hidden_state], axis=1)
+ elif hidden_state is None and hidden_visual is not None:
+ final_hidden = hidden_visual
+ elif hidden_state is not None and hidden_visual is None:
+ final_hidden = hidden_state
+ else:
+ raise Exception("No valid network configuration possible. "
+ "There are no states or observations in this brain")
+ final_hiddens.append(final_hidden)
+ return final_hiddens
+
+ @staticmethod
+ def create_recurrent_encoder(input_state, memory_in, sequence_length, name='lstm'):
+ """
+ Builds a recurrent encoder for either state or observations (LSTM).
+ :param sequence_length: Length of sequence to unroll.
+ :param input_state: The input tensor to the LSTM cell.
+ :param memory_in: The input memory to the LSTM cell.
+ :param name: The scope of the LSTM cell.
+ """
+ s_size = input_state.get_shape().as_list()[1]
+ m_size = memory_in.get_shape().as_list()[1]
+ lstm_input_state = tf.reshape(input_state, shape=[-1, sequence_length, s_size])
+ memory_in = tf.reshape(memory_in[:, :], [-1, m_size])
+ _half_point = int(m_size / 2)
+ with tf.variable_scope(name):
+ rnn_cell = tf.contrib.rnn.BasicLSTMCell(_half_point)
+ lstm_vector_in = tf.contrib.rnn.LSTMStateTuple(memory_in[:, :_half_point],
+ memory_in[:, _half_point:])
+ recurrent_output, lstm_state_out = tf.nn.dynamic_rnn(rnn_cell, lstm_input_state,
+ initial_state=lstm_vector_in)
+
+ recurrent_output = tf.reshape(recurrent_output, shape=[-1, _half_point])
+ return recurrent_output, tf.concat([lstm_state_out.c, lstm_state_out.h], axis=1)
+
+ def create_cc_actor_critic(self, h_size, num_layers):
+ """
+ Creates Continuous control actor-critic model.
+ :param h_size: Size of hidden linear layers.
+ :param num_layers: Number of hidden linear layers.
+ """
+ hidden_streams = self.create_observation_streams(2, h_size, num_layers)
+
+ if self.use_recurrent:
+ self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32,
+ name='recurrent_in')
+ _half_point = int(self.m_size / 2)
+ hidden_policy, memory_policy_out = self.create_recurrent_encoder(
+ hidden_streams[0], self.memory_in[:, :_half_point], self.sequence_length,
+ name='lstm_policy')
+
+ hidden_value, memory_value_out = self.create_recurrent_encoder(
+ hidden_streams[1], self.memory_in[:, _half_point:], self.sequence_length,
+ name='lstm_value')
+ self.memory_out = tf.concat([memory_policy_out, memory_value_out], axis=1,
+ name='recurrent_out')
+ else:
+ hidden_policy = hidden_streams[0]
+ hidden_value = hidden_streams[1]
+
+ mu = tf.layers.dense(hidden_policy, self.act_size[0], activation=None,
+ kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01))
+
+ log_sigma_sq = tf.get_variable("log_sigma_squared", [self.act_size[0]], dtype=tf.float32,
+ initializer=tf.zeros_initializer())
+
+ sigma_sq = tf.exp(log_sigma_sq)
+
+ self.epsilon = tf.placeholder(shape=[None, self.act_size[0]], dtype=tf.float32, name='epsilon')
+ # Clip and scale output to ensure actions are always within [-1, 1] range.
+ self.output_pre = mu + tf.sqrt(sigma_sq) * self.epsilon
+ output_post = tf.clip_by_value(self.output_pre, -3, 3) / 3
+ self.output = tf.identity(output_post, name='action')
+ self.selected_actions = tf.stop_gradient(output_post)
+
+ # Compute probability of model output.
+ all_probs = - 0.5 * tf.square(tf.stop_gradient(self.output_pre) - mu) / sigma_sq \
+ - 0.5 * tf.log(2.0 * np.pi) - 0.5 * log_sigma_sq
+
+ self.all_log_probs = tf.identity(all_probs, name='action_probs')
+
+ self.entropy = 0.5 * tf.reduce_mean(tf.log(2 * np.pi * np.e) + log_sigma_sq)
+
+ value = tf.layers.dense(hidden_value, 1, activation=None)
+ self.value = tf.identity(value, name="value_estimate")
+
+ self.all_old_log_probs = tf.placeholder(shape=[None, self.act_size[0]], dtype=tf.float32,
+ name='old_probabilities')
+
+ # We keep these tensors the same name, but use new nodes to keep code parallelism with discrete control.
+ self.log_probs = tf.reduce_sum((tf.identity(self.all_log_probs)), axis=1, keepdims=True)
+ self.old_log_probs = tf.reduce_sum((tf.identity(self.all_old_log_probs)), axis=1,
+ keepdims=True)
+
+ def create_dc_actor_critic(self, h_size, num_layers):
+ """
+ Creates Discrete control actor-critic model.
+ :param h_size: Size of hidden linear layers.
+ :param num_layers: Number of hidden linear layers.
+ """
+ hidden_streams = self.create_observation_streams(1, h_size, num_layers)
+ hidden = hidden_streams[0]
+
+ if self.use_recurrent:
+ self.prev_action = tf.placeholder(shape=[None, len(self.act_size)], dtype=tf.int32,
+ name='prev_action')
+ prev_action_oh = tf.concat([
+ tf.one_hot(self.prev_action[:, i], self.act_size[i]) for i in
+ range(len(self.act_size))], axis=1)
+ hidden = tf.concat([hidden, prev_action_oh], axis=1)
+
+ self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32,
+ name='recurrent_in')
+ hidden, memory_out = self.create_recurrent_encoder(hidden, self.memory_in,
+ self.sequence_length)
+ self.memory_out = tf.identity(memory_out, name='recurrent_out')
+
+ policy_branches = []
+ for size in self.act_size:
+ policy_branches.append(tf.layers.dense(hidden, size, activation=None, use_bias=False,
+ kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01)))
+
+ self.all_log_probs = tf.concat([branch for branch in policy_branches], axis=1, name="action_probs")
+
+ self.action_masks = tf.placeholder(shape=[None, sum(self.act_size)], dtype=tf.float32, name="action_masks")
+ output, normalized_logits = self.create_discrete_action_masking_layer(
+ self.all_log_probs, self.action_masks, self.act_size)
+
+ self.output = tf.identity(output)
+ self.normalized_logits = tf.identity(normalized_logits, name='action')
+
+ value = tf.layers.dense(hidden, 1, activation=None)
+ self.value = tf.identity(value, name="value_estimate")
+
+ self.action_holder = tf.placeholder(
+ shape=[None, len(policy_branches)], dtype=tf.int32, name="action_holder")
+ self.action_oh = tf.concat([
+ tf.one_hot(self.action_holder[:, i], self.act_size[i]) for i in range(len(self.act_size))], axis=1)
+ self.selected_actions = tf.stop_gradient(self.action_oh)
+
+ self.all_old_log_probs = tf.placeholder(
+ shape=[None, sum(self.act_size)], dtype=tf.float32, name='old_probabilities')
+ _, old_normalized_logits = self.create_discrete_action_masking_layer(
+ self.all_old_log_probs, self.action_masks, self.act_size)
+
+ action_idx = [0] + list(np.cumsum(self.act_size))
+
+ self.entropy = tf.reduce_sum((tf.stack([
+ tf.nn.softmax_cross_entropy_with_logits_v2(
+ labels=tf.nn.softmax(self.all_log_probs[:, action_idx[i]:action_idx[i + 1]]),
+ logits=self.all_log_probs[:, action_idx[i]:action_idx[i + 1]])
+ for i in range(len(self.act_size))], axis=1)), axis=1)
+
+ self.log_probs = tf.reduce_sum((tf.stack([
+ -tf.nn.softmax_cross_entropy_with_logits_v2(
+ labels=self.action_oh[:, action_idx[i]:action_idx[i + 1]],
+ logits=normalized_logits[:, action_idx[i]:action_idx[i + 1]]
+ )
+ for i in range(len(self.act_size))], axis=1)), axis=1, keepdims=True)
+ self.old_log_probs = tf.reduce_sum((tf.stack([
+ -tf.nn.softmax_cross_entropy_with_logits_v2(
+ labels=self.action_oh[:, action_idx[i]:action_idx[i + 1]],
+ logits=old_normalized_logits[:, action_idx[i]:action_idx[i + 1]]
+ )
+ for i in range(len(self.act_size))], axis=1)), axis=1, keepdims=True)
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/policy.py b/animalai_packages/animalai_train/animalai_train/trainers/policy.py
new file mode 100644
index 00000000..dd23940a
--- /dev/null
+++ b/animalai_packages/animalai_train/animalai_train/trainers/policy.py
@@ -0,0 +1,212 @@
+import logging
+import numpy as np
+import tensorflow as tf
+
+from animalai_train.trainers import UnityException
+from tensorflow.python.tools import freeze_graph
+from animalai_train.trainers import tensorflow_to_barracuda as tf2bc
+
+logger = logging.getLogger("mlagents.trainers")
+
+
+class UnityPolicyException(UnityException):
+ """
+ Related to errors with the Trainer.
+ """
+ pass
+
+
+class Policy(object):
+ """
+ Contains a learning model, and the necessary
+ functions to interact with it to perform evaluate and updating.
+ """
+ possible_output_nodes = ['action', 'value_estimate',
+ 'action_probs', 'recurrent_out', 'memory_size',
+ 'version_number', 'is_continuous_control',
+ 'action_output_shape']
+
+ def __init__(self, seed, brain, trainer_parameters):
+ """
+ Initialized the policy.
+ :param seed: Random seed to use for TensorFlow.
+ :param brain: The corresponding Brain for this policy.
+ :param trainer_parameters: The trainer parameters.
+ """
+ self.m_size = None
+ self.model = None
+ self.inference_dict = {}
+ self.update_dict = {}
+ self.sequence_length = 1
+ self.seed = seed
+ self.brain = brain
+ self.use_recurrent = trainer_parameters["use_recurrent"]
+ self.use_continuous_act = (brain.vector_action_space_type == "continuous")
+ self.model_path = trainer_parameters["model_path"]
+ self.keep_checkpoints = trainer_parameters.get("keep_checkpoints", 5)
+ self.graph = tf.Graph()
+ config = tf.ConfigProto()
+ config.gpu_options.allow_growth = True
+ self.sess = tf.Session(config=config, graph=self.graph)
+ self.saver = None
+ if self.use_recurrent:
+ self.m_size = trainer_parameters["memory_size"]
+ self.sequence_length = trainer_parameters["sequence_length"]
+ if self.m_size == 0:
+ raise UnityPolicyException("The memory size for brain {0} is 0 even "
+ "though the trainer uses recurrent."
+ .format(brain.brain_name))
+ elif self.m_size % 4 != 0:
+ raise UnityPolicyException("The memory size for brain {0} is {1} "
+ "but it must be divisible by 4."
+ .format(brain.brain_name, self.m_size))
+
+ def _initialize_graph(self):
+ with self.graph.as_default():
+ self.saver = tf.train.Saver(max_to_keep=self.keep_checkpoints)
+ init = tf.global_variables_initializer()
+ self.sess.run(init)
+
+ def _load_graph(self):
+ with self.graph.as_default():
+ self.saver = tf.train.Saver(max_to_keep=self.keep_checkpoints)
+ logger.info('Loading Model for brain {}'.format(self.brain.brain_name))
+ ckpt = tf.train.get_checkpoint_state(self.model_path)
+ if ckpt is None:
+ logger.info('The model {0} could not be found. Make '
+ 'sure you specified the right '
+ '--run-id'
+ .format(self.model_path))
+ self.saver.restore(self.sess, ckpt.model_checkpoint_path)
+
+ def evaluate(self, brain_info):
+ """
+ Evaluates policy for the agent experiences provided.
+ :param brain_info: BrainInfo input to network.
+ :return: Output from policy based on self.inference_dict.
+ """
+ raise UnityPolicyException("The evaluate function was not implemented.")
+
+ def update(self, mini_batch, num_sequences):
+ """
+ Performs update of the policy.
+ :param num_sequences: Number of experience trajectories in batch.
+ :param mini_batch: Batch of experiences.
+ :return: Results of update.
+ """
+ raise UnityPolicyException("The update function was not implemented.")
+
+ def _execute_model(self, feed_dict, out_dict):
+ """
+ Executes model.
+ :param feed_dict: Input dictionary mapping nodes to input data.
+ :param out_dict: Output dictionary mapping names to nodes.
+ :return: Dictionary mapping names to input data.
+ """
+ network_out = self.sess.run(list(out_dict.values()), feed_dict=feed_dict)
+ run_out = dict(zip(list(out_dict.keys()), network_out))
+ return run_out
+
+ def _fill_eval_dict(self, feed_dict, brain_info):
+ for i, _ in enumerate(brain_info.visual_observations):
+ feed_dict[self.model.visual_in[i]] = brain_info.visual_observations[i]
+ if self.use_vec_obs:
+ feed_dict[self.model.vector_in] = brain_info.vector_observations
+ if not self.use_continuous_act:
+ feed_dict[self.model.action_masks] = brain_info.action_masks
+ return feed_dict
+
+ def make_empty_memory(self, num_agents):
+ """
+ Creates empty memory for use with RNNs
+ :param num_agents: Number of agents.
+ :return: Numpy array of zeros.
+ """
+ return np.zeros((num_agents, self.m_size))
+
+ def get_current_step(self):
+ """
+ Gets current model step.
+ :return: current model step.
+ """
+ step = self.sess.run(self.model.global_step)
+ return step
+
+ def increment_step(self):
+ """
+ Increments model step.
+ """
+ self.sess.run(self.model.increment_step)
+
+ def get_inference_vars(self):
+ """
+ :return:list of inference var names
+ """
+ return list(self.inference_dict.keys())
+
+ def get_update_vars(self):
+ """
+ :return:list of update var names
+ """
+ return list(self.update_dict.keys())
+
+ def save_model(self, steps):
+ """
+ Saves the model
+ :param steps: The number of steps the model was trained for
+ :return:
+ """
+ with self.graph.as_default():
+ last_checkpoint = self.model_path + '/model-' + str(steps) + '.cptk'
+ self.saver.save(self.sess, last_checkpoint)
+ tf.train.write_graph(self.graph, self.model_path,
+ 'raw_graph_def.pb', as_text=False)
+
+ def export_model(self):
+ """
+ Exports latest saved model to .nn format for Unity embedding.
+ """
+
+ with self.graph.as_default():
+ target_nodes = ','.join(self._process_graph())
+ ckpt = tf.train.get_checkpoint_state(self.model_path)
+ freeze_graph.freeze_graph(
+ input_graph=self.model_path + '/raw_graph_def.pb',
+ input_binary=True,
+ input_checkpoint=ckpt.model_checkpoint_path,
+ output_node_names=target_nodes,
+ output_graph=(self.model_path + '/frozen_graph_def.pb'),
+ clear_devices=True, initializer_nodes='', input_saver='',
+ restore_op_name='save/restore_all',
+ filename_tensor_name='save/Const:0')
+
+ tf2bc.convert(self.model_path + '/frozen_graph_def.pb', self.model_path + '.nn')
+ logger.info('Exported ' + self.model_path + '.nn file')
+
+ def _process_graph(self):
+ """
+ Gets the list of the output nodes present in the graph for inference
+ :return: list of node names
+ """
+ all_nodes = [x.name for x in self.graph.as_graph_def().node]
+ nodes = [x for x in all_nodes if x in self.possible_output_nodes]
+ logger.info('List of nodes to export for brain :' + self.brain.brain_name)
+ for n in nodes:
+ logger.info('\t' + n)
+ return nodes
+
+ @property
+ def vis_obs_size(self):
+ return self.model.vis_obs_size
+
+ @property
+ def vec_obs_size(self):
+ return self.model.vec_obs_size
+
+ @property
+ def use_vis_obs(self):
+ return self.model.vis_obs_size > 0
+
+ @property
+ def use_vec_obs(self):
+ return self.model.vec_obs_size > 0
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/ppo/__init__.py b/animalai_packages/animalai_train/animalai_train/trainers/ppo/__init__.py
new file mode 100644
index 00000000..d48a8ccf
--- /dev/null
+++ b/animalai_packages/animalai_train/animalai_train/trainers/ppo/__init__.py
@@ -0,0 +1,3 @@
+from .models import *
+from .trainer import *
+from .policy import *
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/ppo/models.py b/animalai_packages/animalai_train/animalai_train/trainers/ppo/models.py
new file mode 100644
index 00000000..519dfb50
--- /dev/null
+++ b/animalai_packages/animalai_train/animalai_train/trainers/ppo/models.py
@@ -0,0 +1,195 @@
+import logging
+import numpy as np
+
+import tensorflow as tf
+from animalai_train.trainers.models import LearningModel
+
+logger = logging.getLogger("mlagents.envs")
+
+
+class PPOModel(LearningModel):
+ def __init__(self, brain, lr=1e-4, h_size=128, epsilon=0.2, beta=1e-3, max_step=5e6,
+ normalize=False, use_recurrent=False, num_layers=2, m_size=None, use_curiosity=False,
+ curiosity_strength=0.01, curiosity_enc_size=128, seed=0):
+ """
+ Takes a Unity environment and model-specific hyper-parameters and returns the
+ appropriate PPO agent model for the environment.
+ :param brain: BrainInfo used to generate specific network graph.
+ :param lr: Learning rate.
+ :param h_size: Size of hidden layers
+ :param epsilon: Value for policy-divergence threshold.
+ :param beta: Strength of entropy regularization.
+ :return: a sub-class of PPOAgent tailored to the environment.
+ :param max_step: Total number of training steps.
+ :param normalize: Whether to normalize vector observation input.
+ :param use_recurrent: Whether to use an LSTM layer in the network.
+ :param num_layers Number of hidden layers between encoded input and policy & value layers
+ :param m_size: Size of brain memory.
+ """
+ LearningModel.__init__(self, m_size, normalize, use_recurrent, brain, seed)
+ self.use_curiosity = use_curiosity
+ if num_layers < 1:
+ num_layers = 1
+ self.last_reward, self.new_reward, self.update_reward = self.create_reward_encoder()
+ if brain.vector_action_space_type == "continuous":
+ self.create_cc_actor_critic(h_size, num_layers)
+ self.entropy = tf.ones_like(tf.reshape(self.value, [-1])) * self.entropy
+ else:
+ self.create_dc_actor_critic(h_size, num_layers)
+ if self.use_curiosity:
+ self.curiosity_enc_size = curiosity_enc_size
+ self.curiosity_strength = curiosity_strength
+ encoded_state, encoded_next_state = self.create_curiosity_encoders()
+ self.create_inverse_model(encoded_state, encoded_next_state)
+ self.create_forward_model(encoded_state, encoded_next_state)
+ self.create_ppo_optimizer(self.log_probs, self.old_log_probs, self.value,
+ self.entropy, beta, epsilon, lr, max_step)
+
+ @staticmethod
+ def create_reward_encoder():
+ """Creates TF ops to track and increment recent average cumulative reward."""
+ last_reward = tf.Variable(0, name="last_reward", trainable=False, dtype=tf.float32)
+ new_reward = tf.placeholder(shape=[], dtype=tf.float32, name='new_reward')
+ update_reward = tf.assign(last_reward, new_reward)
+ return last_reward, new_reward, update_reward
+
+ def create_curiosity_encoders(self):
+ """
+ Creates state encoders for current and future observations.
+ Used for implementation of Curiosity-driven Exploration by Self-supervised Prediction
+ See https://arxiv.org/abs/1705.05363 for more details.
+ :return: current and future state encoder tensors.
+ """
+ encoded_state_list = []
+ encoded_next_state_list = []
+
+ if self.vis_obs_size > 0:
+ self.next_visual_in = []
+ visual_encoders = []
+ next_visual_encoders = []
+ for i in range(self.vis_obs_size):
+ # Create input ops for next (t+1) visual observations.
+ next_visual_input = self.create_visual_input(self.brain.camera_resolutions[i],
+ name="next_visual_observation_" + str(i))
+ self.next_visual_in.append(next_visual_input)
+
+ # Create the encoder ops for current and next visual input. Not that these encoders are siamese.
+ encoded_visual = self.create_visual_observation_encoder(self.visual_in[i], self.curiosity_enc_size,
+ self.swish, 1, "stream_{}_visual_obs_encoder"
+ .format(i), False)
+
+ encoded_next_visual = self.create_visual_observation_encoder(self.next_visual_in[i],
+ self.curiosity_enc_size,
+ self.swish, 1,
+ "stream_{}_visual_obs_encoder".format(i),
+ True)
+ visual_encoders.append(encoded_visual)
+ next_visual_encoders.append(encoded_next_visual)
+
+ hidden_visual = tf.concat(visual_encoders, axis=1)
+ hidden_next_visual = tf.concat(next_visual_encoders, axis=1)
+ encoded_state_list.append(hidden_visual)
+ encoded_next_state_list.append(hidden_next_visual)
+
+ if self.vec_obs_size > 0:
+ # Create the encoder ops for current and next vector input. Not that these encoders are siamese.
+ # Create input op for next (t+1) vector observation.
+ self.next_vector_in = tf.placeholder(shape=[None, self.vec_obs_size], dtype=tf.float32,
+ name='next_vector_observation')
+
+ encoded_vector_obs = self.create_vector_observation_encoder(self.vector_in,
+ self.curiosity_enc_size,
+ self.swish, 2, "vector_obs_encoder",
+ False)
+ encoded_next_vector_obs = self.create_vector_observation_encoder(self.next_vector_in,
+ self.curiosity_enc_size,
+ self.swish, 2,
+ "vector_obs_encoder",
+ True)
+ encoded_state_list.append(encoded_vector_obs)
+ encoded_next_state_list.append(encoded_next_vector_obs)
+
+ encoded_state = tf.concat(encoded_state_list, axis=1)
+ encoded_next_state = tf.concat(encoded_next_state_list, axis=1)
+ return encoded_state, encoded_next_state
+
+ def create_inverse_model(self, encoded_state, encoded_next_state):
+ """
+ Creates inverse model TensorFlow ops for Curiosity module.
+ Predicts action taken given current and future encoded states.
+ :param encoded_state: Tensor corresponding to encoded current state.
+ :param encoded_next_state: Tensor corresponding to encoded next state.
+ """
+ combined_input = tf.concat([encoded_state, encoded_next_state], axis=1)
+ hidden = tf.layers.dense(combined_input, 256, activation=self.swish)
+ if self.brain.vector_action_space_type == "continuous":
+ pred_action = tf.layers.dense(hidden, self.act_size[0], activation=None)
+ squared_difference = tf.reduce_sum(tf.squared_difference(pred_action, self.selected_actions), axis=1)
+ self.inverse_loss = tf.reduce_mean(tf.dynamic_partition(squared_difference, self.mask, 2)[1])
+ else:
+ pred_action = tf.concat(
+ [tf.layers.dense(hidden, self.act_size[i], activation=tf.nn.softmax)
+ for i in range(len(self.act_size))], axis=1)
+ cross_entropy = tf.reduce_sum(-tf.log(pred_action + 1e-10) * self.selected_actions, axis=1)
+ self.inverse_loss = tf.reduce_mean(tf.dynamic_partition(cross_entropy, self.mask, 2)[1])
+
+ def create_forward_model(self, encoded_state, encoded_next_state):
+ """
+ Creates forward model TensorFlow ops for Curiosity module.
+ Predicts encoded future state based on encoded current state and given action.
+ :param encoded_state: Tensor corresponding to encoded current state.
+ :param encoded_next_state: Tensor corresponding to encoded next state.
+ """
+ combined_input = tf.concat([encoded_state, self.selected_actions], axis=1)
+ hidden = tf.layers.dense(combined_input, 256, activation=self.swish)
+ # We compare against the concatenation of all observation streams, hence `self.vis_obs_size + int(self.vec_obs_size > 0)`.
+ pred_next_state = tf.layers.dense(hidden, self.curiosity_enc_size * (self.vis_obs_size + int(self.vec_obs_size > 0)),
+ activation=None)
+
+ squared_difference = 0.5 * tf.reduce_sum(tf.squared_difference(pred_next_state, encoded_next_state), axis=1)
+ self.intrinsic_reward = tf.clip_by_value(self.curiosity_strength * squared_difference, 0, 1)
+ self.forward_loss = tf.reduce_mean(tf.dynamic_partition(squared_difference, self.mask, 2)[1])
+
+ def create_ppo_optimizer(self, probs, old_probs, value, entropy, beta, epsilon, lr, max_step):
+ """
+ Creates training-specific Tensorflow ops for PPO models.
+ :param probs: Current policy probabilities
+ :param old_probs: Past policy probabilities
+ :param value: Current value estimate
+ :param beta: Entropy regularization strength
+ :param entropy: Current policy entropy
+ :param epsilon: Value for policy-divergence threshold
+ :param lr: Learning rate
+ :param max_step: Total number of training steps.
+ """
+ self.returns_holder = tf.placeholder(shape=[None], dtype=tf.float32, name='discounted_rewards')
+ self.advantage = tf.placeholder(shape=[None, 1], dtype=tf.float32, name='advantages')
+ self.learning_rate = tf.train.polynomial_decay(lr, self.global_step, max_step, 1e-10, power=1.0)
+
+ self.old_value = tf.placeholder(shape=[None], dtype=tf.float32, name='old_value_estimates')
+
+ decay_epsilon = tf.train.polynomial_decay(epsilon, self.global_step, max_step, 0.1, power=1.0)
+ decay_beta = tf.train.polynomial_decay(beta, self.global_step, max_step, 1e-5, power=1.0)
+ optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
+
+ clipped_value_estimate = self.old_value + tf.clip_by_value(tf.reduce_sum(value, axis=1) - self.old_value,
+ - decay_epsilon, decay_epsilon)
+
+ v_opt_a = tf.squared_difference(self.returns_holder, tf.reduce_sum(value, axis=1))
+ v_opt_b = tf.squared_difference(self.returns_holder, clipped_value_estimate)
+ self.value_loss = tf.reduce_mean(tf.dynamic_partition(tf.maximum(v_opt_a, v_opt_b), self.mask, 2)[1])
+
+ # Here we calculate PPO policy loss. In continuous control this is done independently for each action gaussian
+ # and then averaged together. This provides significantly better performance than treating the probability
+ # as an average of probabilities, or as a joint probability.
+ r_theta = tf.exp(probs - old_probs)
+ p_opt_a = r_theta * self.advantage
+ p_opt_b = tf.clip_by_value(r_theta, 1.0 - decay_epsilon, 1.0 + decay_epsilon) * self.advantage
+ self.policy_loss = -tf.reduce_mean(tf.dynamic_partition(tf.minimum(p_opt_a, p_opt_b), self.mask, 2)[1])
+
+ self.loss = self.policy_loss + 0.5 * self.value_loss - decay_beta * tf.reduce_mean(
+ tf.dynamic_partition(entropy, self.mask, 2)[1])
+
+ if self.use_curiosity:
+ self.loss += 10 * (0.2 * self.forward_loss + 0.8 * self.inverse_loss)
+ self.update_batch = optimizer.minimize(self.loss)
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/ppo/policy.py b/animalai_packages/animalai_train/animalai_train/trainers/ppo/policy.py
new file mode 100644
index 00000000..33bbba62
--- /dev/null
+++ b/animalai_packages/animalai_train/animalai_train/trainers/ppo/policy.py
@@ -0,0 +1,214 @@
+import logging
+import numpy as np
+
+from animalai_train.trainers.ppo.models import PPOModel
+from animalai_train.trainers.policy import Policy
+
+logger = logging.getLogger("mlagents.trainers")
+
+
+class PPOPolicy(Policy):
+ def __init__(self, seed, brain, trainer_params, is_training, load):
+ """
+ Policy for Proximal Policy Optimization Networks.
+ :param seed: Random seed.
+ :param brain: Assigned Brain object.
+ :param trainer_params: Defined training parameters.
+ :param is_training: Whether the model should be trained.
+ :param load: Whether a pre-trained model will be loaded or a new one created.
+ """
+ super().__init__(seed, brain, trainer_params)
+ self.has_updated = False
+ self.use_curiosity = bool(trainer_params['use_curiosity'])
+
+ with self.graph.as_default():
+ self.model = PPOModel(brain,
+ lr=float(trainer_params['learning_rate']),
+ h_size=int(trainer_params['hidden_units']),
+ epsilon=float(trainer_params['epsilon']),
+ beta=float(trainer_params['beta']),
+ max_step=float(trainer_params['max_steps']),
+ normalize=trainer_params['normalize'],
+ use_recurrent=trainer_params['use_recurrent'],
+ num_layers=int(trainer_params['num_layers']),
+ m_size=self.m_size,
+ use_curiosity=bool(trainer_params['use_curiosity']),
+ curiosity_strength=float(trainer_params['curiosity_strength']),
+ curiosity_enc_size=float(trainer_params['curiosity_enc_size']),
+ seed=seed)
+
+ if load:
+ self._load_graph()
+ else:
+ self._initialize_graph()
+
+ self.inference_dict = {'action': self.model.output, 'log_probs': self.model.all_log_probs,
+ 'value': self.model.value, 'entropy': self.model.entropy,
+ 'learning_rate': self.model.learning_rate}
+ if self.use_continuous_act:
+ self.inference_dict['pre_action'] = self.model.output_pre
+ if self.use_recurrent:
+ self.inference_dict['memory_out'] = self.model.memory_out
+ if is_training and self.use_vec_obs and trainer_params['normalize']:
+ self.inference_dict['update_mean'] = self.model.update_mean
+ self.inference_dict['update_variance'] = self.model.update_variance
+
+ self.update_dict = {'value_loss': self.model.value_loss,
+ 'policy_loss': self.model.policy_loss,
+ 'update_batch': self.model.update_batch}
+ if self.use_curiosity:
+ self.update_dict['forward_loss'] = self.model.forward_loss
+ self.update_dict['inverse_loss'] = self.model.inverse_loss
+
+ def evaluate(self, brain_info):
+ """
+ Evaluates policy for the agent experiences provided.
+ :param brain_info: BrainInfo object containing inputs.
+ :return: Outputs from network as defined by self.inference_dict.
+ """
+ feed_dict = {self.model.batch_size: len(brain_info.vector_observations),
+ self.model.sequence_length: 1}
+ epsilon = None
+ if self.use_recurrent:
+ if not self.use_continuous_act:
+ feed_dict[self.model.prev_action] = brain_info.previous_vector_actions.reshape(
+ [-1, len(self.model.act_size)])
+ if brain_info.memories.shape[1] == 0:
+ brain_info.memories = self.make_empty_memory(len(brain_info.agents))
+ feed_dict[self.model.memory_in] = brain_info.memories
+ if self.use_continuous_act:
+ epsilon = np.random.normal(
+ size=(len(brain_info.vector_observations), self.model.act_size[0]))
+ feed_dict[self.model.epsilon] = epsilon
+ feed_dict = self._fill_eval_dict(feed_dict, brain_info)
+ run_out = self._execute_model(feed_dict, self.inference_dict)
+ if self.use_continuous_act:
+ run_out['random_normal_epsilon'] = epsilon
+ return run_out
+
+ def update(self, mini_batch, num_sequences):
+ """
+ Updates model using buffer.
+ :param num_sequences: Number of trajectories in batch.
+ :param mini_batch: Experience batch.
+ :return: Output from update process.
+ """
+ feed_dict = {self.model.batch_size: num_sequences,
+ self.model.sequence_length: self.sequence_length,
+ self.model.mask_input: mini_batch['masks'].flatten(),
+ self.model.returns_holder: mini_batch['discounted_returns'].flatten(),
+ self.model.old_value: mini_batch['value_estimates'].flatten(),
+ self.model.advantage: mini_batch['advantages'].reshape([-1, 1]),
+ self.model.all_old_log_probs: mini_batch['action_probs'].reshape(
+ [-1, sum(self.model.act_size)])}
+ if self.use_continuous_act:
+ feed_dict[self.model.output_pre] = mini_batch['actions_pre'].reshape(
+ [-1, self.model.act_size[0]])
+ feed_dict[self.model.epsilon] = mini_batch['random_normal_epsilon'].reshape(
+ [-1, self.model.act_size[0]])
+ else:
+ feed_dict[self.model.action_holder] = mini_batch['actions'].reshape(
+ [-1, len(self.model.act_size)])
+ if self.use_recurrent:
+ feed_dict[self.model.prev_action] = mini_batch['prev_action'].reshape(
+ [-1, len(self.model.act_size)])
+ feed_dict[self.model.action_masks] = mini_batch['action_mask'].reshape(
+ [-1, sum(self.brain.vector_action_space_size)])
+ if self.use_vec_obs:
+ feed_dict[self.model.vector_in] = mini_batch['vector_obs'].reshape(
+ [-1, self.vec_obs_size])
+ if self.use_curiosity:
+ feed_dict[self.model.next_vector_in] = mini_batch['next_vector_in'].reshape(
+ [-1, self.vec_obs_size])
+ if self.model.vis_obs_size > 0:
+ for i, _ in enumerate(self.model.visual_in):
+ _obs = mini_batch['visual_obs%d' % i]
+ if self.sequence_length > 1 and self.use_recurrent:
+ (_batch, _seq, _w, _h, _c) = _obs.shape
+ feed_dict[self.model.visual_in[i]] = _obs.reshape([-1, _w, _h, _c])
+ else:
+ feed_dict[self.model.visual_in[i]] = _obs
+ if self.use_curiosity:
+ for i, _ in enumerate(self.model.visual_in):
+ _obs = mini_batch['next_visual_obs%d' % i]
+ if self.sequence_length > 1 and self.use_recurrent:
+ (_batch, _seq, _w, _h, _c) = _obs.shape
+ feed_dict[self.model.next_visual_in[i]] = _obs.reshape([-1, _w, _h, _c])
+ else:
+ feed_dict[self.model.next_visual_in[i]] = _obs
+ if self.use_recurrent:
+ mem_in = mini_batch['memory'][:, 0, :]
+ feed_dict[self.model.memory_in] = mem_in
+ self.has_updated = True
+ run_out = self._execute_model(feed_dict, self.update_dict)
+ return run_out
+
+ def get_intrinsic_rewards(self, curr_info, next_info):
+ """
+ Generates intrinsic reward used for Curiosity-based training.
+ :BrainInfo curr_info: Current BrainInfo.
+ :BrainInfo next_info: Next BrainInfo.
+ :return: Intrinsic rewards for all agents.
+ """
+ if self.use_curiosity:
+ if len(curr_info.agents) == 0:
+ return []
+
+ feed_dict = {self.model.batch_size: len(next_info.vector_observations),
+ self.model.sequence_length: 1}
+ if self.use_continuous_act:
+ feed_dict[self.model.selected_actions] = next_info.previous_vector_actions
+ else:
+ feed_dict[self.model.action_holder] = next_info.previous_vector_actions
+ for i in range(self.model.vis_obs_size):
+ feed_dict[self.model.visual_in[i]] = curr_info.visual_observations[i]
+ feed_dict[self.model.next_visual_in[i]] = next_info.visual_observations[i]
+ if self.use_vec_obs:
+ feed_dict[self.model.vector_in] = curr_info.vector_observations
+ feed_dict[self.model.next_vector_in] = next_info.vector_observations
+ if self.use_recurrent:
+ if curr_info.memories.shape[1] == 0:
+ curr_info.memories = self.make_empty_memory(len(curr_info.agents))
+ feed_dict[self.model.memory_in] = curr_info.memories
+ intrinsic_rewards = self.sess.run(self.model.intrinsic_reward,
+ feed_dict=feed_dict) * float(self.has_updated)
+ return intrinsic_rewards
+ else:
+ return None
+
+ def get_value_estimate(self, brain_info, idx):
+ """
+ Generates value estimates for bootstrapping.
+ :param brain_info: BrainInfo to be used for bootstrapping.
+ :param idx: Index in BrainInfo of agent.
+ :return: Value estimate.
+ """
+ feed_dict = {self.model.batch_size: 1, self.model.sequence_length: 1}
+ for i in range(len(brain_info.visual_observations)):
+ feed_dict[self.model.visual_in[i]] = [brain_info.visual_observations[i][idx]]
+ if self.use_vec_obs:
+ feed_dict[self.model.vector_in] = [brain_info.vector_observations[idx]]
+ if self.use_recurrent:
+ if brain_info.memories.shape[1] == 0:
+ brain_info.memories = self.make_empty_memory(len(brain_info.agents))
+ feed_dict[self.model.memory_in] = [brain_info.memories[idx]]
+ if not self.use_continuous_act and self.use_recurrent:
+ feed_dict[self.model.prev_action] = brain_info.previous_vector_actions[idx].reshape(
+ [-1, len(self.model.act_size)])
+ value_estimate = self.sess.run(self.model.value, feed_dict)
+ return value_estimate
+
+ def get_last_reward(self):
+ """
+ Returns the last reward the trainer has had
+ :return: the new last reward
+ """
+ return self.sess.run(self.model.last_reward)
+
+ def update_reward(self, new_reward):
+ """
+ Updates reward value for policy.
+ :param new_reward: New reward to save.
+ """
+ self.sess.run(self.model.update_reward,
+ feed_dict={self.model.new_reward: new_reward})
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/ppo/trainer.py b/animalai_packages/animalai_train/animalai_train/trainers/ppo/trainer.py
new file mode 100644
index 00000000..817cd669
--- /dev/null
+++ b/animalai_packages/animalai_train/animalai_train/trainers/ppo/trainer.py
@@ -0,0 +1,386 @@
+# # Unity ML-Agents Toolkit
+# ## ML-Agent Learning (PPO)
+# Contains an implementation of PPO as described (https://arxiv.org/abs/1707.06347).
+
+import logging
+import os
+from collections import deque
+
+import numpy as np
+import tensorflow as tf
+
+from animalai.envs import AllBrainInfo, BrainInfo
+from animalai_train.trainers.buffer import Buffer
+from animalai_train.trainers.ppo.policy import PPOPolicy
+from animalai_train.trainers.trainer import Trainer
+
+logger = logging.getLogger("mlagents.trainers")
+
+
+class PPOTrainer(Trainer):
+ """The PPOTrainer is an implementation of the PPO algorithm."""
+
+ def __init__(self, brain, reward_buff_cap, trainer_parameters, training, load, seed, run_id):
+ """
+ Responsible for collecting experiences and training PPO model.
+ :param trainer_parameters: The parameters for the trainer (dictionary).
+ :param training: Whether the trainer is set for training.
+ :param load: Whether the model should be loaded.
+ :param seed: The seed the model will be initialized with
+ :param run_id: The The identifier of the current run
+ """
+ super(PPOTrainer, self).__init__(brain, trainer_parameters, training, run_id)
+ self.param_keys = ['batch_size', 'beta', 'buffer_size', 'epsilon', 'gamma', 'hidden_units', 'lambd',
+ 'learning_rate', 'max_steps', 'normalize', 'num_epoch', 'num_layers',
+ 'time_horizon', 'sequence_length', 'summary_freq', 'use_recurrent',
+ 'summary_path', 'memory_size', 'use_curiosity', 'curiosity_strength',
+ 'curiosity_enc_size', 'model_path']
+
+ self.check_param_keys()
+ self.use_curiosity = bool(trainer_parameters['use_curiosity'])
+ self.step = 0
+ self.policy = PPOPolicy(seed, brain, trainer_parameters,
+ self.is_training, load)
+
+ stats = {'Environment/Cumulative Reward': [], 'Environment/Episode Length': [],
+ 'Policy/Value Estimate': [], 'Policy/Entropy': [], 'Losses/Value Loss': [],
+ 'Losses/Policy Loss': [], 'Policy/Learning Rate': []}
+ if self.use_curiosity:
+ stats['Losses/Forward Loss'] = []
+ stats['Losses/Inverse Loss'] = []
+ stats['Policy/Curiosity Reward'] = []
+ self.intrinsic_rewards = {}
+ self.stats = stats
+
+ self.training_buffer = Buffer()
+ self.cumulative_rewards = {}
+ self._reward_buffer = deque(maxlen=reward_buff_cap)
+ self.episode_steps = {}
+ self.summary_path = trainer_parameters['summary_path']
+ if not os.path.exists(self.summary_path):
+ os.makedirs(self.summary_path)
+
+ self.summary_writer = tf.summary.FileWriter(self.summary_path)
+
+ def __str__(self):
+ return '''Hyperparameters for the PPO Trainer of brain {0}: \n{1}'''.format(
+ self.brain_name, '\n'.join(['\t{0}:\t{1}'.format(x, self.trainer_parameters[x]) for x in self.param_keys]))
+
+ @property
+ def parameters(self):
+ """
+ Returns the trainer parameters of the trainer.
+ """
+ return self.trainer_parameters
+
+ @property
+ def get_max_steps(self):
+ """
+ Returns the maximum number of steps. Is used to know when the trainer should be stopped.
+ :return: The maximum number of steps of the trainer
+ """
+ return float(self.trainer_parameters['max_steps'])
+
+ @property
+ def get_step(self):
+ """
+ Returns the number of steps the trainer has performed
+ :return: the step count of the trainer
+ """
+ return self.step
+
+ @property
+ def reward_buffer(self):
+ """
+ Returns the reward buffer. The reward buffer contains the cumulative
+ rewards of the most recent episodes completed by agents using this
+ trainer.
+ :return: the reward buffer.
+ """
+ return self._reward_buffer
+
+ def increment_step_and_update_last_reward(self):
+ """
+ Increment the step count of the trainer and Updates the last reward
+ """
+ if len(self.stats['Environment/Cumulative Reward']) > 0:
+ mean_reward = np.mean(self.stats['Environment/Cumulative Reward'])
+ self.policy.update_reward(mean_reward)
+ self.policy.increment_step()
+ self.step = self.policy.get_current_step()
+
+ def take_action(self, all_brain_info: AllBrainInfo):
+ """
+ Decides actions given observations information, and takes them in environment.
+ :param all_brain_info: A dictionary of brain names and BrainInfo from environment.
+ :return: a tuple containing action, memories, values and an object
+ to be passed to add experiences
+ """
+ curr_brain_info = all_brain_info[self.brain_name]
+ if len(curr_brain_info.agents) == 0:
+ return [], [], [], None, None
+
+ run_out = self.policy.evaluate(curr_brain_info)
+ self.stats['Policy/Value Estimate'].append(run_out['value'].mean())
+ self.stats['Policy/Entropy'].append(run_out['entropy'].mean())
+ self.stats['Policy/Learning Rate'].append(run_out['learning_rate'])
+ if self.policy.use_recurrent:
+ return run_out['action'], run_out['memory_out'], None, \
+ run_out['value'], run_out
+ else:
+ return run_out['action'], None, None, run_out['value'], run_out
+
+ def construct_curr_info(self, next_info: BrainInfo) -> BrainInfo:
+ """
+ Constructs a BrainInfo which contains the most recent previous experiences for all agents info
+ which correspond to the agents in a provided next_info.
+ :BrainInfo next_info: A t+1 BrainInfo.
+ :return: curr_info: Reconstructed BrainInfo to match agents of next_info.
+ """
+ visual_observations = [[]]
+ vector_observations = []
+ text_observations = []
+ memories = []
+ rewards = []
+ local_dones = []
+ max_reacheds = []
+ agents = []
+ prev_vector_actions = []
+ prev_text_actions = []
+ for agent_id in next_info.agents:
+ agent_brain_info = self.training_buffer[agent_id].last_brain_info
+ if agent_brain_info is None:
+ agent_brain_info = next_info
+ agent_index = agent_brain_info.agents.index(agent_id)
+ for i in range(len(next_info.visual_observations)):
+ visual_observations[i].append(agent_brain_info.visual_observations[i][agent_index])
+ vector_observations.append(agent_brain_info.vector_observations[agent_index])
+ text_observations.append(agent_brain_info.text_observations[agent_index])
+ if self.policy.use_recurrent:
+ if len(agent_brain_info.memories > 0):
+ memories.append(agent_brain_info.memories[agent_index])
+ else:
+ memories.append(self.policy.make_empty_memory(1))
+ rewards.append(agent_brain_info.rewards[agent_index])
+ local_dones.append(agent_brain_info.local_done[agent_index])
+ max_reacheds.append(agent_brain_info.max_reached[agent_index])
+ agents.append(agent_brain_info.agents[agent_index])
+ prev_vector_actions.append(agent_brain_info.previous_vector_actions[agent_index])
+ prev_text_actions.append(agent_brain_info.previous_text_actions[agent_index])
+ if self.policy.use_recurrent:
+ memories = np.vstack(memories)
+ curr_info = BrainInfo(visual_observations, vector_observations, text_observations,
+ memories, rewards, agents, local_dones, prev_vector_actions,
+ prev_text_actions, max_reacheds)
+ return curr_info
+
+ def add_experiences(self, curr_all_info: AllBrainInfo, next_all_info: AllBrainInfo, take_action_outputs):
+ """
+ Adds experiences to each agent's experience history.
+ :param curr_all_info: Dictionary of all current brains and corresponding BrainInfo.
+ :param next_all_info: Dictionary of all current brains and corresponding BrainInfo.
+ :param take_action_outputs: The outputs of the take action method.
+ """
+ curr_info = curr_all_info[self.brain_name]
+ next_info = next_all_info[self.brain_name]
+
+ for agent_id in curr_info.agents:
+ self.training_buffer[agent_id].last_brain_info = curr_info
+ self.training_buffer[agent_id].last_take_action_outputs = take_action_outputs
+
+ if curr_info.agents != next_info.agents:
+ curr_to_use = self.construct_curr_info(next_info)
+ else:
+ curr_to_use = curr_info
+
+ intrinsic_rewards = self.policy.get_intrinsic_rewards(curr_to_use, next_info)
+
+ for agent_id in next_info.agents:
+ stored_info = self.training_buffer[agent_id].last_brain_info
+ stored_take_action_outputs = self.training_buffer[agent_id].last_take_action_outputs
+ if stored_info is not None:
+ idx = stored_info.agents.index(agent_id)
+ next_idx = next_info.agents.index(agent_id)
+ if not stored_info.local_done[idx]:
+ for i, _ in enumerate(stored_info.visual_observations):
+ self.training_buffer[agent_id]['visual_obs%d' % i].append(
+ stored_info.visual_observations[i][idx])
+ self.training_buffer[agent_id]['next_visual_obs%d' % i].append(
+ next_info.visual_observations[i][next_idx])
+ if self.policy.use_vec_obs:
+ self.training_buffer[agent_id]['vector_obs'].append(stored_info.vector_observations[idx])
+ self.training_buffer[agent_id]['next_vector_in'].append(
+ next_info.vector_observations[next_idx])
+ if self.policy.use_recurrent:
+ if stored_info.memories.shape[1] == 0:
+ stored_info.memories = np.zeros((len(stored_info.agents), self.policy.m_size))
+ self.training_buffer[agent_id]['memory'].append(stored_info.memories[idx])
+ actions = stored_take_action_outputs['action']
+ if self.policy.use_continuous_act:
+ actions_pre = stored_take_action_outputs['pre_action']
+ self.training_buffer[agent_id]['actions_pre'].append(actions_pre[idx])
+ epsilons = stored_take_action_outputs['random_normal_epsilon']
+ self.training_buffer[agent_id]['random_normal_epsilon'].append(
+ epsilons[idx])
+ else:
+ self.training_buffer[agent_id]['action_mask'].append(
+ stored_info.action_masks[idx], padding_value=1)
+ a_dist = stored_take_action_outputs['log_probs']
+ value = stored_take_action_outputs['value']
+ self.training_buffer[agent_id]['actions'].append(actions[idx])
+ self.training_buffer[agent_id]['prev_action'].append(stored_info.previous_vector_actions[idx])
+ self.training_buffer[agent_id]['masks'].append(1.0)
+ if self.use_curiosity:
+ self.training_buffer[agent_id]['rewards'].append(next_info.rewards[next_idx] +
+ intrinsic_rewards[next_idx])
+ else:
+ self.training_buffer[agent_id]['rewards'].append(next_info.rewards[next_idx])
+ self.training_buffer[agent_id]['action_probs'].append(a_dist[idx])
+ self.training_buffer[agent_id]['value_estimates'].append(value[idx][0])
+ if agent_id not in self.cumulative_rewards:
+ self.cumulative_rewards[agent_id] = 0
+ self.cumulative_rewards[agent_id] += next_info.rewards[next_idx]
+ if self.use_curiosity:
+ if agent_id not in self.intrinsic_rewards:
+ self.intrinsic_rewards[agent_id] = 0
+ self.intrinsic_rewards[agent_id] += intrinsic_rewards[next_idx]
+ if not next_info.local_done[next_idx]:
+ if agent_id not in self.episode_steps:
+ self.episode_steps[agent_id] = 0
+ self.episode_steps[agent_id] += 1
+
+ def process_experiences(self, current_info: AllBrainInfo, new_info: AllBrainInfo):
+ """
+ Checks agent histories for processing condition, and processes them as necessary.
+ Processing involves calculating value and advantage targets for model updating step.
+ :param current_info: Dictionary of all current brains and corresponding BrainInfo.
+ :param new_info: Dictionary of all next brains and corresponding BrainInfo.
+ """
+
+ info = new_info[self.brain_name]
+ for l in range(len(info.agents)):
+ agent_actions = self.training_buffer[info.agents[l]]['actions']
+ if ((info.local_done[l] or len(agent_actions) > self.trainer_parameters['time_horizon'])
+ and len(agent_actions) > 0):
+ agent_id = info.agents[l]
+ if info.local_done[l] and not info.max_reached[l]:
+ value_next = 0.0
+ else:
+ if info.max_reached[l]:
+ bootstrapping_info = self.training_buffer[agent_id].last_brain_info
+ idx = bootstrapping_info.agents.index(agent_id)
+ else:
+ bootstrapping_info = info
+ idx = l
+ value_next = self.policy.get_value_estimate(bootstrapping_info, idx)
+
+ self.training_buffer[agent_id]['advantages'].set(
+ get_gae(
+ rewards=self.training_buffer[agent_id]['rewards'].get_batch(),
+ value_estimates=self.training_buffer[agent_id]['value_estimates'].get_batch(),
+ value_next=value_next,
+ gamma=self.trainer_parameters['gamma'],
+ lambd=self.trainer_parameters['lambd']))
+ self.training_buffer[agent_id]['discounted_returns'].set(
+ self.training_buffer[agent_id]['advantages'].get_batch()
+ + self.training_buffer[agent_id]['value_estimates'].get_batch())
+
+ self.training_buffer.append_update_buffer(agent_id, batch_size=None,
+ training_length=self.policy.sequence_length)
+
+ self.training_buffer[agent_id].reset_agent()
+ if info.local_done[l]:
+ self.stats['Environment/Cumulative Reward'].append(
+ self.cumulative_rewards.get(agent_id, 0))
+ self.reward_buffer.appendleft(self.cumulative_rewards.get(agent_id, 0))
+ self.stats['Environment/Episode Length'].append(
+ self.episode_steps.get(agent_id, 0))
+ self.cumulative_rewards[agent_id] = 0
+ self.episode_steps[agent_id] = 0
+ if self.use_curiosity:
+ self.stats['Policy/Curiosity Reward'].append(
+ self.intrinsic_rewards.get(agent_id, 0))
+ self.intrinsic_rewards[agent_id] = 0
+
+ def end_episode(self):
+ """
+ A signal that the Episode has ended. The buffer must be reset.
+ Get only called when the academy resets.
+ """
+ self.training_buffer.reset_local_buffers()
+ for agent_id in self.cumulative_rewards:
+ self.cumulative_rewards[agent_id] = 0
+ for agent_id in self.episode_steps:
+ self.episode_steps[agent_id] = 0
+ if self.use_curiosity:
+ for agent_id in self.intrinsic_rewards:
+ self.intrinsic_rewards[agent_id] = 0
+
+ def is_ready_update(self):
+ """
+ Returns whether or not the trainer has enough elements to run update model
+ :return: A boolean corresponding to whether or not update_model() can be run
+ """
+ size_of_buffer = len(self.training_buffer.update_buffer['actions'])
+ return size_of_buffer > max(int(self.trainer_parameters['buffer_size'] / self.policy.sequence_length), 1)
+
+ def update_policy(self):
+ """
+ Uses demonstration_buffer to update the policy.
+ """
+ n_sequences = max(int(self.trainer_parameters['batch_size'] / self.policy.sequence_length), 1)
+ value_total, policy_total, forward_total, inverse_total = [], [], [], []
+ advantages = self.training_buffer.update_buffer['advantages'].get_batch()
+ self.training_buffer.update_buffer['advantages'].set(
+ (advantages - advantages.mean()) / (advantages.std() + 1e-10))
+ num_epoch = self.trainer_parameters['num_epoch']
+ for k in range(num_epoch):
+ self.training_buffer.update_buffer.shuffle()
+ buffer = self.training_buffer.update_buffer
+ for l in range(len(self.training_buffer.update_buffer['actions']) // n_sequences):
+ start = l * n_sequences
+ end = (l + 1) * n_sequences
+ run_out = self.policy.update(buffer.make_mini_batch(start, end), n_sequences)
+ value_total.append(run_out['value_loss'])
+ policy_total.append(np.abs(run_out['policy_loss']))
+ if self.use_curiosity:
+ inverse_total.append(run_out['inverse_loss'])
+ forward_total.append(run_out['forward_loss'])
+ self.stats['Losses/Value Loss'].append(np.mean(value_total))
+ self.stats['Losses/Policy Loss'].append(np.mean(policy_total))
+ if self.use_curiosity:
+ self.stats['Losses/Forward Loss'].append(np.mean(forward_total))
+ self.stats['Losses/Inverse Loss'].append(np.mean(inverse_total))
+ self.training_buffer.reset_update_buffer()
+
+
+def discount_rewards(r, gamma=0.99, value_next=0.0):
+ """
+ Computes discounted sum of future rewards for use in updating value estimate.
+ :param r: List of rewards.
+ :param gamma: Discount factor.
+ :param value_next: T+1 value estimate for returns calculation.
+ :return: discounted sum of future rewards as list.
+ """
+ discounted_r = np.zeros_like(r)
+ running_add = value_next
+ for t in reversed(range(0, r.size)):
+ running_add = running_add * gamma + r[t]
+ discounted_r[t] = running_add
+ return discounted_r
+
+
+def get_gae(rewards, value_estimates, value_next=0.0, gamma=0.99, lambd=0.95):
+ """
+ Computes generalized advantage estimate for use in updating policy.
+ :param rewards: list of rewards for time-steps t to T.
+ :param value_next: Value estimate for time-step T+1.
+ :param value_estimates: list of value estimates for time-steps t to T.
+ :param gamma: Discount factor.
+ :param lambd: GAE weighing factor.
+ :return: list of advantage estimates for time-steps t to T.
+ """
+ value_estimates = np.asarray(value_estimates.tolist() + [value_next])
+ delta_t = rewards + gamma * value_estimates[1:] - value_estimates[:-1]
+ advantage = discount_rewards(r=delta_t, gamma=gamma * lambd)
+ return advantage
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/tensorflow_to_barracuda.py b/animalai_packages/animalai_train/animalai_train/trainers/tensorflow_to_barracuda.py
new file mode 100644
index 00000000..a7942909
--- /dev/null
+++ b/animalai_packages/animalai_train/animalai_train/trainers/tensorflow_to_barracuda.py
@@ -0,0 +1,1034 @@
+from __future__ import print_function
+import numpy as np
+import struct # convert from Python values and C structs
+import tensorflow as tf
+import re
+#import barracuda
+#from barracuda import Struct
+from animalai_train.trainers import barracuda
+from animalai_train.trainers.barracuda import Struct
+from google.protobuf import descriptor
+from google.protobuf.json_format import MessageToJson
+
+
+if __name__ == '__main__':
+ # Handle command line argumengts
+ args = barracuda.parse_args(
+ description = 'Convert Tensorflow model to Barracuda binary',
+ source_extension = '.pb',
+ help = 'input Tensorflow serialized .pb file')
+ # Te following code can be used as an example of API used from another module
+ # convert() is the main entry point for converter
+ import tensorflow_to_barracuda as tf2bc
+ tf2bc.convert(args.source_file, args.target_file, args.trim_unused_by_output, args)
+
+
+# TODO: support more than 1 LSTM layer per model - prepend scope to names and inputs
+# TODO: support different activation functions in LSTM
+# TODO: strip output Identity node, instead patch upstream layer names
+# TODO: use ScaleBias and Pow with alpha when input is constant Tensor
+# TODO: support all data format types (curretly only NHWC)
+# TODO: support all data types (currently only FLOAT, INT32, BOOL)
+# TODO: implement FusedResizeAndPadConv2D
+
+# Important ProtoBuf definitions:
+# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/types.proto
+# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor.proto
+# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/node_def.proto
+#
+# Node descriptions:
+# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/ops/nn_ops.cc
+# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/ops/math_ops.cc
+# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/ops/random_ops.cc
+#
+# Class doc:
+# https://www.tensorflow.org/api_docs/cc/
+#
+known_classes = {
+ 'Dense': Struct(
+ id = 1,
+ out_shapes = lambda shapes: [
+ [shapes[0][0], 1, 1, shapes[0][1]], # W
+ [1, 1, 1, shapes[-1][-1]] # B
+ ],
+ patch_data = lambda data: [
+ data[0],
+ data[1]
+ ]),
+ 'MatMul': Struct(
+ id = 1,
+ out_shapes = lambda shapes: [
+ [shapes[0][0], 1, 1, shapes[0][1]], # W
+ [1, 1, 1, shapes[0][1]] # B
+ ],
+ patch_data = lambda data: [
+ data[0],
+ np.zeros(np.shape(data[1]))
+ ]),
+ 'BiasAdd': Struct(
+ id = 51, # implemented as ScaleBias
+ out_shapes = lambda shapes: [
+ [1, 1, 1, shapes[0][0]], # ONE
+ [1, 1, 1, shapes[0][0]], # B
+ ],
+ patch_data = lambda data: [
+ np.ones(np.shape(data[0])),
+ data[0]
+ ]),
+
+ # TODO: NCHW
+ 'Conv2D': Struct(
+ id = 20,
+ out_shapes = lambda shapes: [
+ shapes[0], # K
+ [1, 1, 1, shapes[-1][-1]] # B
+ ],
+ patch_data = lambda data: [
+ data[0],
+ data[1]
+ ]),
+ 'DepthwiseConv2dNative': Struct( # DepthwiseConv2D
+ id = 21,
+ out_shapes = lambda s: [
+ [s[0][0], s[0][1], s[0][3], s[0][2]], # K TF:[H, W, in_channels, channel_multiplier] => [H, W, 1, in_channels]
+ [1, 1, 1, s[-1][-1]] if len(s) > 1 else
+ [1, 1, 1, s[0][2]] # B
+ ],
+ patch_data = lambda data: [
+ np.transpose(data[0], (0,1,3,2)),
+ data[1]
+ ]),
+ 'Conv2DBackpropInput': Struct( # Conv2DTranspose
+ id = 22,
+ out_shapes = lambda shapes: [
+ shapes[0], # K
+ [1, 1, 1, shapes[-1][-1]] # B
+ ],
+ patch_data = lambda data: [
+ data[0],
+ data[1]
+ ]),
+
+ # TODO: 3D
+
+ 'ResizeNearestNeighbor':
+ 23, # implemented as Upsample2D
+ 'ResizeBilinear': 23, # implemented as Upsample2D
+ 'ResizeBicubic': 23, # implemented as Upsample2D
+ 'MaxPool': 25,
+ 'AvgPool': 26,
+
+ 'GlobalAveragePool':28,
+
+ 'Activation': 50,
+
+ 'BatchNormalization': Struct(
+ id = 51, # after fusion implemented as ScaleBias
+ out_shapes = lambda shapes: [
+ [1, 1, 1, shapes[0][0]], # S
+ [1, 1, 1, shapes[0][0]], # B
+ ],
+ patch_data = lambda data:
+ # fuse [gamma, beta, mean, var, epsilon] => [scale, bias]
+ # TODO: double-check if epsilon is the last data argument and not the 1st?
+ barracuda.fuse_batchnorm_weights(data[0], data[1], data[2], data[3], data[4]) if len(data) == 5 else
+ # fuse [ONE, beta, mean, var, epsilon] => [scale, bias]
+ # TODO: double-check if epsilon is the last data argument and not the 1st?
+ barracuda.fuse_batchnorm_weights(np.ones(np.shape(data[0])), data[0], data[1], data[2], data[3])
+ ),
+ 'FusedBatchNorm': Struct(
+ id = 51, # after fusion implemented as ScaleBias
+ out_shapes = lambda shapes: [
+ [1, 1, 1, shapes[0][0]], # S
+ [1, 1, 1, shapes[0][0]], # B
+ ],
+ patch_data = lambda data, layer:
+ # fuse [gamma, beta, mean, var, epsilon] => [scale, bias]
+ barracuda.fuse_batchnorm_weights(data[0], data[1], data[2], data[3], get_epsilon(layer))
+ ),
+ 'LRN': 53,
+
+ 'RandomStandardNormal':
+ 64,
+ 'RandomUniform': 65,
+ 'Multinomial': 66,
+ 'OneHot': 67,
+
+ # Broadcast ops
+ 'Add': 100,
+ 'Sub': 101,
+ 'Mul': 102,
+ 'RealDiv': 103,
+ 'Pow': 104,
+ 'Minimum': 110,
+ 'Maximum': 111,
+
+ # Reduce ops
+ 'Max': 124,
+ 'Mean': 125,
+ 'Min': 126,
+ 'Prod': 127,
+ 'Sum': 128,
+
+ 'Flatten': 200,
+ 'Reshape': 201,
+ #'Squeeze': 203,
+ #'Unsqueeze': 204,
+ 'Concat': 210,
+ 'StridedSlice': 211,
+}
+
+requires_runtime_flag = {
+ 'Dropout' : 'DropoutRuntime',
+ 'BatchNormalization' : 'BatchNormalizationRuntime',
+}
+
+known_activations = {
+ 'Linear' : 0,
+ 'Relu' : 1,
+ 'Softmax' : 2,
+ 'Tanh' : 3,
+ 'Sigmoid' : 4,
+ 'Elu' : 5,
+ 'Relu6' : 6,
+ 'LeakyRelu' : 7,
+ 'Selu' : 8,
+ 'Swish' : 9,
+
+ 'LogSoftmax' : 10,
+ 'Softplus' : 11,
+ 'Softsign' : 12,
+
+ 'Abs' : 100,
+ 'Neg' : 101,
+ 'Ceil' : 102,
+ 'Floor' : 104,
+
+ 'Sqrt' : 111,
+ 'Exp' : 113,
+ 'Log' : 114,
+
+ 'Acos' : 200,
+ 'Acosh' : 201,
+ 'Asin' : 202,
+ 'Asinh' : 203,
+ 'Atan' : 204,
+ 'Atanh' : 205,
+ 'Cos' : 206,
+ 'Cosh' : 207,
+ 'Sin' : 208,
+ 'Sinh' : 209,
+ 'Tan' : 210
+}
+
+known_paddings = {
+ 'VALID' : [0,0,0,0],
+ 'SAME' : [-1] # SameUpper
+}
+
+supported_data_formats = {
+ 'NHWC'
+}
+
+known_patterns = {
+ # TODO: Flatten pattern using namespace regexp
+ repr(['Shape', 'StridedSlice', 'Pack', 'Reshape']) : "Flatten",
+ repr(['Shape', 'StridedSlice', 'Prod', 'Pack', 'Reshape']) : "Flatten",
+ repr(['Shape', 'Slice', 'Slice', 'Prod',
+ 'ExpandDims', 'ConcatV2', 'Reshape']) : "Flatten",
+ repr(['Const', 'Reshape']) : 'Reshape',
+
+ repr(['Add', 'Rsqrt', 'Mul', 'Mul', 'Sub', 'Add']) : 'BatchNormalization',
+ repr(['Add', 'Rsqrt', 'Mul', 'Mul', 'Mul', 'Sub', 'Add']) : 'BatchNormalization',
+
+ repr(['ConcatV2']) : 'ConcatV2',
+ repr(['Mean']) : 'Mean',
+ repr(['Multinomial']) : 'Multinomial',
+ repr(['OneHot']) : 'OneHot',
+ repr(['Square']) : 'Square',
+
+ repr(['MatMul', 'BiasAdd']) : 'Dense',
+ repr(['Conv2D', 'BiasAdd']) : 'Conv2D',
+ repr(['DepthwiseConv2dNative', 'BiasAdd']) : 'DepthwiseConv2dNative',
+ repr(['Conv2DBackpropInput', 'BiasAdd']) : 'Conv2DBackpropInput',
+
+
+ repr(['Pack', 'Reshape']) : 'Flatten$', # for now we assume that this combination is trivial Flatten
+ # for exmaple it is used in ML-agents LSTM nets with sequence_length==1
+
+ repr(['StridedSlice', 'Reshape',
+ re.compile('^lstm/'),
+ 'Reshape', 'ConcatV2', 'Identity']) : 'BasicLSTM',
+
+ repr([re.compile('^lstm/'),
+ 'Reshape', 'ConcatV2', 'Identity']) : 'BasicLSTM',
+
+ repr(['Sigmoid', 'Mul']) : "Swish",
+
+ # TODO: FusedResizeAndPadConv2D
+}
+
+def by_name(args, name):
+ for a in args:
+ if a.name.endswith(name):
+ return a
+
+def by_op(args, op):
+ for a in args:
+ if a.op == op:
+ return a
+
+def order_by(args, names):
+ ordered = []
+ arg_count = len(args)
+ for name in names:
+ ordered += [a for a in args if a.endswith(name)]
+ args = [a for a in args if not a.endswith(name)]
+ ordered += args # append what is left
+ assert(len(ordered) == arg_count)
+ return ordered
+
+transform_patterns = {
+ 'Flatten' : lambda nodes, inputs, tensors, _:
+ Struct(
+ op = 'Flatten',
+ input = inputs
+ ),
+ 'Flatten$' : lambda nodes, inputs, tensors, _:
+ Struct(
+ op = 'Flatten',
+ input = [inputs[-1]] # take only the last input, assume all other arguments are trivial (like sequence_length==1 always in ML-agents LSTM nets)
+ ),
+ 'Reshape' : lambda nodes, inputs, tensors, _:
+ Struct(
+ op = 'Reshape',
+ input = inputs,
+ shape = [tensors[0].data[0], tensors[0].data[1], tensors[0].data[2], tensors[0].data[3]] if len(tensors[0].data) == 4 else
+ [tensors[0].data[0], 1, tensors[0].data[1], tensors[0].data[2]] if len(tensors[0].data) == 3 else
+ [tensors[0].data[0], 1, 1, tensors[0].data[1]]
+ # tensor.name = 'shape'
+ ),
+ 'Multinomial' : lambda nodes, inputs, tensors, _:
+ Struct(
+ op = 'Multinomial',
+ input = inputs,
+ shape = [int(by_name(tensors, '/num_samples').data[0])],
+ #seed = get_attr(nodes[0], 'seed'),
+ ),
+ 'OneHot' : lambda nodes, inputs, tensors, _:
+ Struct(
+ op = 'OneHot',
+ input = inputs,
+ shape = [int(by_name(tensors, '/depth').data[0])],
+ alpha = by_name(tensors, '/on_value').data[0],
+ beta = by_name(tensors, '/off_value').data[0],
+ ),
+ 'Square' : lambda nodes, inputs, tensors, _:
+ Struct(
+ op = 'Mul',
+ input = [i for i in inputs] + [i for i in inputs], # input * input
+ ),
+ 'ConcatV2' : lambda nodes, inputs, tensors, _:
+ Struct(
+ op = 'Concat',
+ input = inputs,
+
+ # TEMPORARY: until we implemented rank detection and axis remapping (hopefully in exporter)
+ # HACK: assume Concat is always for last channel
+ axis = int(-1)
+ #axis = int(by_name(tensors, '/axis').data[0])
+ ),
+ 'BatchNormalization' : lambda nodes, inputs, tensors, _:
+ Struct(
+ op = 'BatchNormalization',
+ input = [i for i in inputs] +
+ order_by([t.name for t in tensors], ['gamma', 'beta', 'mean', 'variance']),
+ ),
+ 'Mean' : lambda nodes, inputs, tensors, _:
+ Struct(
+ # TODO: use data_frmt of the input instead of hardcoded [1,2] for HW
+ op = 'GlobalAveragePool' if np.array_equal(tensors[0].data, [1,2]) else 'MeanWithUnsupportedReductionTensor',
+ input = [i for i in inputs],
+ ),
+ 'Dense' : lambda nodes, inputs, tensors, _:
+ Struct(
+ op = 'Dense',
+ input = [i for i in inputs] + [t.name for t in tensors],
+ data_frmt = get_attr(by_op(nodes, 'Dense') or by_op(nodes, 'MatMul'), 'data_format'),
+ ),
+ 'Conv2D' : lambda nodes, inputs, tensors, _:
+ Struct(
+ op = 'Conv2D',
+ input = [i for i in inputs] + [t.name for t in tensors],
+ padding = get_attr(by_op(nodes, 'Conv2D'), 'padding'),
+ strides = get_attr(by_op(nodes, 'Conv2D'), 'strides'),
+ dilations = get_attr(by_op(nodes, 'Conv2D'), 'dilations'),
+ data_frmt = get_attr(by_op(nodes, 'Conv2D'), 'data_format'),
+ ),
+ 'DepthwiseConv2dNative' : lambda nodes, inputs, tensors, _:
+ Struct(
+ op = 'DepthwiseConv2dNative',
+ input = [i for i in inputs] + [t.name for t in tensors],
+ padding = get_attr(by_op(nodes, 'DepthwiseConv2dNative'), 'padding'),
+ strides = get_attr(by_op(nodes, 'DepthwiseConv2dNative'), 'strides'),
+ dilations = get_attr(by_op(nodes, 'DepthwiseConv2dNative'), 'dilations'),
+ data_frmt = get_attr(by_op(nodes, 'DepthwiseConv2dNative'), 'data_format'),
+ ),
+ 'Conv2DBackpropInput' : lambda nodes, inputs, tensors, _:
+ Struct(
+ op = 'Conv2DBackpropInput',
+ input = [i for i in inputs] + [t.name for t in tensors],
+ padding = get_attr(by_op(nodes, 'Conv2DBackpropInput'), 'padding'),
+ strides = get_attr(by_op(nodes, 'Conv2DBackpropInput'), 'strides'),
+ dilations = get_attr(by_op(nodes, 'Conv2DBackpropInput'), 'dilations'),
+ data_frmt = get_attr(by_op(nodes, 'Conv2DBackpropInput'), 'data_format'),
+ ),
+ 'BasicLSTM' : lambda nodes, inputs, tensors, context:
+ basic_lstm(nodes, inputs, tensors, context),
+
+ 'Swish' : lambda nodes, inputs, tensors, _:
+ Struct(
+ op = 'Swish',
+ input = inputs
+ ),
+
+ # TODO:'Round'
+ # TODO:'Rsqrt'
+}
+
+# Parse
+def get_attr(node, attr_name, default=None):
+ if type(node) == Struct:
+ if hasattr(node, attr_name):
+ return getattr(node, attr_name)
+ else:
+ return default
+
+ # See: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/attr_value.proto
+ val = node.attr[attr_name]
+
+ if val.HasField("list"):
+ return val.list.i
+ # NOTE: can't find way to identify type of list BUT it is almost always list(int)
+ # except list(float) in FractionalAvg/MaxPool
+ if val.HasField("b"):
+ return val.b
+ if val.HasField("i"):
+ return val.i
+ if val.HasField("f"):
+ return val.f
+ if val.HasField("s"):
+ return val.s.decode("utf-8")
+ if val.HasField("shape"):
+ return val.shape
+ if val.HasField("tensor"):
+ return val.tensor
+ return default
+
+def get_epsilon(layer):
+ return get_attr(layer, 'epsilon', default=0.001) # default epsilon taken from tf.layers.batch_normalization
+
+def get_layer_shape(layer):
+ shape = get_attr(layer, 'shape')
+ if not shape:
+ return [-1, -1, -1, -1]
+ shape = [dim.size for dim in shape.dim]
+ if len(shape) == 1:
+ return [1, 1, 1, shape[0]]
+ if len(shape) == 2:
+ return [shape[0], 1, 1, shape[1]]
+ return shape
+
+def get_tensor_dims(tensor):
+ if isinstance(tensor, np.ndarray):
+ return np.shape(tensor)
+
+ dims = []
+ if tensor.tensor_shape:
+ dims = [v.size for v in tensor.tensor_shape.dim]
+ if tensor.float_val:
+ dims = np.shape(tensor.float_val)
+ if tensor.int_val:
+ dims = np.shape(tensor.int_val)
+ if tensor.bool_val:
+ dims = np.shape(tensor.bool_val)
+ return dims
+
+def get_tensor_dtype(tensor):
+ if isinstance(tensor, np.ndarray):
+ return tensor.dtype
+
+ dataType = ''
+ fields = tensor.ListFields()
+
+ for field, value in fields:
+ if field.name == 'dtype' and field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:
+ dataType = field.enum_type.values_by_number.get(value, None).name
+
+ return dataType
+
+def get_tensor_data(tensor):
+ if isinstance(tensor, np.ndarray):
+ return tensor.astype(float)
+
+ dims = get_tensor_dims(tensor)
+ elems = np.product(dims)
+
+ if tensor.tensor_content:
+ # TODO: support other types
+ dataType = get_tensor_dtype(tensor)
+ if dataType == "DT_FLOAT":
+ data = struct.unpack('<'+str(elems)+'f', tensor.tensor_content)
+ elif dataType == "DT_INT32":
+ data = struct.unpack('<'+str(elems)+'i', tensor.tensor_content)
+ elif dataType == "DT_BOOL":
+ data = struct.unpack('<'+str(elems)+'?', tensor.tensor_content)
+ else:
+ print('UNSUPPORTED: data type', dataType)
+ if tensor.float_val:
+ data = tensor.float_val
+ if tensor.int_val:
+ data = np.array(tensor.int_val, dtype=float)
+ if tensor.bool_val:
+ data = np.array(tensor.bool_val, dtype=float)
+ return np.array(data).reshape(dims)
+
+def flatten(items,enter=lambda x:isinstance(x, list)):
+ # http://stackoverflow.com/a/40857703
+ # https://github.com/ctmakro/canton/blob/master/canton/misc.py
+ """Yield items from any nested iterable; see REF."""
+ for x in items:
+ if enter(x):
+ yield from flatten(x)
+ else:
+ yield x
+
+def replace_strings_in_list(array_of_strigs, replace_with_strings):
+ "A value in replace_with_strings can be either single string or list of strings"
+ potentially_nested_list = [replace_with_strings.get(s) or s for s in array_of_strigs]
+ return list(flatten(potentially_nested_list))
+
+def remove_duplicates_from_list(array):
+ "Preserves the order of elements in the list"
+ output = []
+ unique = set()
+ for a in array:
+ if a not in unique:
+ unique.add(a)
+ output.append(a)
+ return output
+
+#########################################################
+
+def pool_to_HW(shape, data_frmt):
+ """ Convert from NHWC|NCHW => HW
+ """
+ if len(shape) != 4:
+ return shape # Not NHWC|NCHW, return as is
+ if data_frmt == 'NCHW':
+ return [shape[2], shape[3]]
+ return [shape[1], shape[2]]
+
+def strides_to_HW(shape, format):
+ return pool_to_HW(shape, format)
+
+#########################################################
+
+def gru(nodes, inputs, tensors, context):
+ assert(len(inputs) == 2)
+
+ def find_tensor_by_name(name, default=None):
+ nonlocal tensors
+ candidates = [t for t in tensors if t.name.endswith(name)]
+ return candidates[0].data if candidates else default
+
+ input = inputs[-1]
+ state = inputs[0]
+ gates_kernel = find_tensor_by_name('/gates/kernel')
+ gates_bias = find_tensor_by_name('/gates/bias', default=np.zeros(np.shape(gates_kernel)[-1]))
+ candidate_kernel = find_tensor_by_name('/candidate/kernel')
+ candidate_bias = find_tensor_by_name('/candidate/bias', default=np.zeros(np.shape(candidate_kernel)[-1]))
+ new_state = nodes[-1].name + '_h'
+
+ assert(np.shape(gates_kernel)[-1] == np.shape(gates_bias)[-1])
+ assert(np.shape(candidate_kernel)[-1] == np.shape(candidate_bias)[-1])
+
+ num_gates = 2
+ seq_length = 1
+ hidden_size = np.shape(gates_kernel)[-1] // num_gates
+
+ gate_kernels = np.split(gates_kernel, num_gates, axis=-1)
+ gate_biases = np.split(gates_bias, num_gates, axis=-1)
+
+ context.model_tensors['kernel_r'] = gate_kernels[0]
+ context.model_tensors['kernel_u'] = gate_kernels[1]
+ context.model_tensors['kernel_c'] = candidate_kernel
+ context.model_tensors['bias_r'] = gate_biases[0]
+ context.model_tensors['bias_u'] = gate_biases[1]
+ context.model_tensors['bias_c'] = candidate_bias
+
+ new_layers = barracuda.gru('gru', input, state,
+ 'kernel_r', 'kernel_u', 'kernel_c',
+ 'bias_r', 'bias_u', 'bias_c',
+ new_state)
+
+ state_shape = [1, 1, seq_length, hidden_size]
+ context.model_memories += [state_shape, state, new_state]
+
+ # map exptected output of the replaced pattern to output from our GRU cell
+ actual_output_node = nodes[-4]
+ assert(actual_output_node.op == 'Reshape')
+ context.map_ignored_layer_to_its_input[actual_output_node.name] = new_state
+
+ return new_layers
+
+def basic_lstm(nodes, inputs, tensors, context):
+ assert(len(inputs) == 2)
+
+ def find_tensor_by_name(name, default=None):
+ nonlocal tensors
+ candidates = [t for t in tensors if t.name.endswith(name)]
+ return candidates[0].data if candidates else default
+
+ def find_forget_bias():
+ nonlocal nodes
+ nonlocal tensors
+ # TODO: make it more fault-tolerant
+ # search for scalar float constant that is input to Add node
+ # and hope it is not a constant for some complex activation function
+ for t in tensors:
+ if np.prod(t.shape) == 1 and get_tensor_dtype(t.obj) == "DT_FLOAT":
+ for n in nodes:
+ if n.op == 'Add' and t.name in n.input:
+ return t.data
+ return np.zeros(1)
+
+ input = inputs[-1]
+ state_c = inputs[0] + '_c'
+ state_h = inputs[0] + '_h'
+ kernel = find_tensor_by_name('/kernel')
+ bias = find_tensor_by_name('/bias', default=np.zeros(np.shape(kernel)[-1]))
+ forget_bias = find_forget_bias()
+ new_state_c = nodes[-1].name + '_c'
+ new_state_h = nodes[-1].name + '_h'
+
+ assert(np.shape(kernel)[-1] == np.shape(bias)[-1])
+
+ num_gates = 4
+ seq_length = 1
+ hidden_size = np.shape(kernel)[-1] // num_gates
+
+ kernels = np.split(kernel, num_gates, axis=-1)
+ biases = np.split(bias, num_gates, axis=-1)
+
+ context.model_tensors['kernel_i'] = kernels[0]
+ context.model_tensors['kernel_j'] = kernels[1]
+ context.model_tensors['kernel_f'] = kernels[2]
+ context.model_tensors['kernel_o'] = kernels[3]
+ context.model_tensors['bias_i'] = biases[0]
+ context.model_tensors['bias_j'] = biases[1]
+ context.model_tensors['bias_f'] = biases[2] + forget_bias
+ context.model_tensors['bias_o'] = biases[3]
+
+ new_layers = barracuda.lstm('lstm', input, state_c, state_h,
+ 'kernel_i', 'kernel_j', 'kernel_f', 'kernel_o',
+ 'bias_i', 'bias_j', 'bias_f', 'bias_o',
+ new_state_c, new_state_h)
+
+ state_shape = [1, 1, seq_length, hidden_size]
+ context.model_memories += [state_shape, state_c, new_state_c]
+ context.model_memories += [state_shape, state_h, new_state_h]
+
+ # map expected output of the replaced pattern to output from our LSTM cell
+ actual_output_node = nodes[-4]
+ assert(actual_output_node.op == 'Reshape')
+ context.map_ignored_layer_to_its_input[actual_output_node.name] = new_state_h
+
+ return new_layers
+
+#########################################################
+
+def process_layer(layer, context, args):
+ model_tensors = context.model_tensors
+ input_shapes = context.input_shapes
+ map_ignored_layer_to_its_input = context.map_ignored_layer_to_its_input
+
+ name = layer.name
+ class_name = layer.op
+ inputs = layer.input # Tensorflow inputs are always explicit, but in case of Keras we had 'inputs = layer.input or [prev_layer_name]'
+ inputs = replace_strings_in_list(inputs, map_ignored_layer_to_its_input)
+
+ if class_name == 'Const':
+ model_tensors[name] = layer.attr["value"].tensor
+ return
+
+ if class_name == 'Placeholder':
+ assert(inputs == [])
+ map_ignored_layer_to_its_input[name] = inputs
+ input_shapes[name] = get_layer_shape(layer)
+ return
+
+ if class_name == 'Identity':
+ connected_to_const = len(inputs) == 1 and inputs[0] in model_tensors
+ if connected_to_const:
+ map_ignored_layer_to_its_input[name] = inputs
+ return
+ else:
+ # treat Identity layer that are connected to processing nodes
+ # as output from the network
+ class_name = 'Linear'
+
+ # TEMPORARY: until we implemented rank detection and StidedSlice at runtime
+ # HACK: skips trivial StridedSlices for rank=2 tensors
+ if class_name == 'StridedSlice' and get_attr(layer, 'begin_mask') == 1 and get_attr(layer, 'end_mask') == 1:
+ map_ignored_layer_to_its_input[name] = inputs[0]
+ return
+
+ if args.print_layers or args.verbose:
+ var_tensors = [i for i in inputs if i not in model_tensors]
+ const_tensors = [i for i in inputs if i in model_tensors]
+ print("'%s' %s Vars:%s Const:%s" % (name, class_name, var_tensors, const_tensors))
+
+ if class_name in known_activations:
+ activation = class_name
+ class_name = 'Activation'
+ else:
+ activation = 'Linear'
+
+ if not class_name in known_classes:
+ if class_name in requires_runtime_flag:
+ print('SKIP:', class_name, 'layer is used only for training')
+ else:
+ print('IGNORED:', class_name, 'unknown layer')
+ map_ignored_layer_to_its_input[name] = inputs
+ return
+
+ klass = known_classes[class_name]
+ if type(klass) == int:
+ klass = Struct(id = klass)
+
+ o_l = Struct()
+ o_l.type = klass.id
+ o_l.class_name = class_name
+ o_l.name = name
+
+ padding = get_attr(layer, 'padding') # layer.attr['padding'].s.decode("utf-8")
+ strides = get_attr(layer, 'strides') # layer.attr['strides'].list.i
+ dilations = get_attr(layer, 'dilations') # layer.attr['dilations'].list.i
+ pool_size = get_attr(layer, 'ksize') # layer.attr['ksize'].list.i
+ shape = get_attr(layer, 'shape', default=[])
+ data_frmt = get_attr(layer, 'data_format') # layer.attr['data_format'].s.decode("utf-8")
+ axis = get_attr(layer, 'axis')
+ alpha = get_attr(layer, 'alpha')
+ beta = get_attr(layer, 'beta')
+
+ if activation and not activation in known_activations:
+ print('IGNORED: unknown activation', activation)
+ if padding and not padding in known_paddings:
+ print('IGNORED: unknown padding', padding)
+ if data_frmt and not data_frmt in supported_data_formats:
+ print('UNSUPPORTED: data format', data_frmt)
+
+ o_l.activation = known_activations.get(activation) or 0
+ o_l.pads = known_paddings.get(padding) or [0,0,0,0]
+ o_l.strides = strides_to_HW(strides, data_frmt) if strides else []
+ o_l.pool_size = pool_to_HW(pool_size, data_frmt) if pool_size else shape
+ o_l.axis = axis or -1
+ o_l.alpha = alpha or 1
+ o_l.beta = beta or 0
+
+ tensor_names = [i for i in inputs if i in model_tensors]
+ o_l.tensors = [Struct(name = x, shape = get_tensor_dims(model_tensors[x]), data = get_tensor_data(model_tensors[x]))
+ for x in tensor_names]
+ # Patch shapes & data
+ layer_has_model_tensors = len(o_l.tensors) > 0
+ if hasattr(klass, 'out_shapes') and layer_has_model_tensors:
+ shapes = klass.out_shapes([x.shape for x in o_l.tensors])
+
+ # if we have more shapes than actual tensors,
+ # then create & fill missing tensors with zeros
+ in_tensor_num = len(o_l.tensors)
+ for index, new_shape in enumerate(shapes):
+ if index >= in_tensor_num:
+ new_tensor = Struct(name = ('%s/patch:%i') % (name, index-in_tensor_num),
+ shape = new_shape,
+ data = np.zeros(new_shape))
+ o_l.tensors.append(new_tensor)
+ assert(len(shapes) <= len(o_l.tensors))
+
+ if hasattr(klass, 'patch_data'):
+ data = [x.data for x in o_l.tensors]
+
+ patch_data_fn = klass.patch_data
+ patch_data_expected_arg_count = patch_data_fn.__code__.co_argcount
+ patch_data_args = (data, layer) if patch_data_expected_arg_count > 1 else (data,)
+ tensor_data = patch_data_fn(*patch_data_args)
+ o_l.tensors = o_l.tensors[:len(tensor_data)] # resize tensor array to match patched data - patching might reduce number of tensors
+ for x, data in zip(o_l.tensors, tensor_data):
+ x.data = data
+
+ # after this point we should have equal amount of shapes and tensors
+ assert(len(o_l.tensors) == len(shapes))
+
+ for x, shape in zip(o_l.tensors, shapes):
+ x.shape = shape
+
+ o_l.inputs = [i for i in inputs if i not in model_tensors]
+
+ else:
+ # no 'patch_data' lambda was specified, op does not require tensor args
+ o_l.tensors = []
+ o_l.inputs = inputs
+
+ # Force all tensors to float32
+ for x in o_l.tensors:
+ x.data = x.data.astype(np.float32)
+
+ # Layer is ready
+ context.layers.append(o_l)
+
+class ModelBuilderContext:
+ def __init__(self):
+ self.layers = []
+ self.input_shapes = {}
+ self.model_tensors = {}
+ self.model_memories = []
+ self.map_ignored_layer_to_its_input = {}
+
+def process_model(model, args):
+ o_context = ModelBuilderContext()
+
+ # Find node patterns
+ nodes_as_array = [node for node in model.node]
+
+ node_index = 0
+ while node_index < len(nodes_as_array):
+ node = nodes_as_array[node_index]
+ match = False
+ for pattern_repr, pattern_name in known_patterns.items():
+ pattern = eval(pattern_repr)
+ if node_index + len(pattern) > len(nodes_as_array):
+ continue # pattern too long, skip
+
+ require_exact_match = (pattern[0] == 'Const' or pattern[0] == 'Identity')
+ pattern_end = node_index
+
+ def match_node(node, pattern):
+ return node.op == pattern or (hasattr(pattern, 'match') and pattern.match(node.name))
+
+ for p in pattern:
+ if not require_exact_match:
+ while pattern_end < len(nodes_as_array) and nodes_as_array[pattern_end].op != p and (
+ nodes_as_array[pattern_end].op == 'Const' or
+ nodes_as_array[pattern_end].op == 'Identity'):
+ pattern_end += 1
+ if pattern_end >= len(nodes_as_array):
+ break
+
+ match = False
+ if (hasattr(p, 'match')): # regexp
+ while pattern_end < len(nodes_as_array) and p.match(nodes_as_array[pattern_end].name):
+ match = True
+ pattern_end += 1
+ else: # exact string
+ match = nodes_as_array[pattern_end].op == p
+ pattern_end += 1
+
+ if not match:
+ break
+
+ def get_tensors(pattern_nodes):
+ nonlocal o_context
+ map_ignored_layer_to_its_input = o_context.map_ignored_layer_to_its_input
+
+ # tensors <= all Const nodes within this pattern
+ tensor_nodes = [n for n in pattern_nodes if n.op == 'Const']
+ tensors = [Struct(name = n.name, obj = n.attr["value"].tensor, shape = get_tensor_dims(n.attr["value"].tensor), data = get_tensor_data(n.attr["value"].tensor))
+ for n in tensor_nodes]
+
+ # TODO: unify / reuse code from process_layer
+ identity_nodes = [n for n in pattern_nodes if n.op == 'Identity']
+ for i in identity_nodes:
+ inputs = replace_strings_in_list(i.input, map_ignored_layer_to_its_input)
+ map_ignored_layer_to_its_input[i.name] = inputs
+
+ # gather inputs from Op nodes (not Const, not Identity)
+ op_nodes = [n for n in pattern_nodes if n not in tensor_nodes and n not in identity_nodes]
+ inputs_to_op_nodes = list(flatten([list(flatten(n.input)) for n in op_nodes]))
+ inputs_to_op_nodes = replace_strings_in_list(inputs_to_op_nodes, map_ignored_layer_to_its_input)
+ inputs_to_op_nodes = [i.split(':')[0] for i in inputs_to_op_nodes]
+
+ # filter only inputs that are coming from nodes that are outside this pattern
+ # preserve the order
+ pattern_nodes = [n.name for n in pattern_nodes]
+ #inputs_from_outside_pattern = remove_duplicates_from_list([i for i in inputs_to_op_nodes if nodes_by_name[i] not in pattern_nodes])
+ inputs_from_outside_pattern = remove_duplicates_from_list([i for i in inputs_to_op_nodes if i not in pattern_nodes])
+
+ return inputs_from_outside_pattern, tensors
+
+ if match:
+ nodes = nodes_as_array[node_index:pattern_end]
+ name = nodes[-1].name
+ var_tensors, const_tensors = get_tensors(nodes)
+ if args.print_patterns or args.verbose:
+ print('PATTERN:', name, '~~', pattern_name, pattern, '<-', var_tensors, '+', [t.name for t in const_tensors])
+ for n in nodes:
+ if n.op == 'Const' or n.op == 'Identity':
+ process_layer(n, o_context, args)
+
+ new_layers = transform_patterns[pattern_name](nodes, var_tensors, const_tensors, o_context)
+ if not isinstance(new_layers, list):
+ if not hasattr(new_layers, name): new_layers.name = name
+ new_layers = [new_layers]
+
+ for l in new_layers:
+ # TODO: prefix new layer names with scope, patch inputs
+ #l.name = name + '/' + l.name
+ process_layer(l, o_context, args)
+
+ node_index = pattern_end
+ break # pattern found & processed
+
+ if not match:
+ # TODO: gather tensors in the same way as patterns do
+ process_layer(node, o_context, args)
+ node_index += 1
+
+ return o_context.layers, o_context.input_shapes, o_context.model_tensors, o_context.model_memories
+
+#########################################################
+
+def convert(source_file, target_file, trim_unused_by_output="", verbose=False, compress_f16=False):
+ """
+ Converts a TensorFlow model into a Barracuda model.
+ :param source_file: The TensorFlow Model
+ :param target_file: The name of the file the converted model will be saved to
+ :param trim_unused_by_output: The regexp to match output nodes to remain in the model. All other uconnected nodes will be removed.
+ :param verbose: If True, will display debug messages
+ :param compress_f16: If true, the float values will be converted to f16
+ :return:
+ """
+ if (type(verbose)==bool):
+ args = Struct()
+ args.verbose = verbose
+ args.print_layers = verbose
+ args.print_source_json = verbose
+ args.print_barracuda_json = verbose
+ args.print_layer_links = verbose
+ args.print_patterns = verbose
+ args.print_tensors = verbose
+ else:
+ args = verbose
+
+ # Load Tensorflow model
+ print("Converting %s to %s" % (source_file, target_file))
+ f = open(source_file, 'rb')
+ i_model = tf.GraphDef()
+ i_model.ParseFromString(f.read())
+
+ if args.verbose:
+ print('OP_TYPES:', {layer.op for layer in i_model.node})
+
+ if args.print_source_json or args.verbose:
+ for layer in i_model.node:
+ if not layer.op == 'Const':
+ print('MODEL:', MessageToJson(layer) + ",")
+
+ # Convert
+ o_model = barracuda.Model()
+ o_model.layers, o_input_shapes, o_model.tensors, o_model.memories = \
+ process_model(i_model, args)
+
+ # Cleanup unconnected Identities (they might linger after processing complex node patterns like LSTM)
+ def cleanup_layers(layers):
+ all_layers = {l.name for l in layers}
+ all_inputs = {i for l in layers for i in l.inputs}
+
+ def is_unconnected_identity(layer):
+ if layer.class_name == 'Activation' and layer.activation == 0: # Identity
+ assert(len(layer.inputs) == 1)
+ if layer.inputs[0] not in all_layers and layer.name not in all_inputs:
+ return True;
+ return False;
+
+ return [l for l in layers if not is_unconnected_identity(l)]
+ o_model.layers = cleanup_layers(o_model.layers)
+
+ all_inputs = {i for l in o_model.layers for i in l.inputs}
+ embedded_tensors = {t.name for l in o_model.layers for t in l.tensors}
+
+ # Find global tensors
+ def dims_to_barracuda_shape(dims):
+ shape = list(dims)
+ while len(shape) < 4:
+ shape = [1] + shape
+ return shape
+ o_model.globals = [t for t in o_model.tensors if t not in all_inputs and t not in embedded_tensors]
+ #for x in global_tensors:
+ # shape = dims_to_barracuda_shape(get_tensor_dims(o_model.tensors[x]))
+ # o_globals += [Struct(
+ # name = x,
+ # shape = shape,
+ # data = np.reshape(get_tensor_data(o_model.tensors[x]), shape).astype(np.float32))]
+
+ # Trim
+ if trim_unused_by_output:
+ o_model.layers = barracuda.trim(o_model.layers, trim_unused_by_output, args.verbose)
+
+ # Create load layers for constants
+ const_tensors = [i for i in all_inputs if i in o_model.tensors]
+ const_tensors += o_model.globals
+ for x in const_tensors:
+ shape = dims_to_barracuda_shape(get_tensor_dims(o_model.tensors[x]))
+
+ o_l = Struct(
+ type = 255, # Load
+ class_name = "Const",
+ name = x,
+ pads = [0,0,0,0],
+ strides = [],
+ pool_size = [],
+ axis = -1,
+ alpha = 1,
+ beta = 0,
+ activation = 0,
+ inputs = [],
+ tensors = [Struct(
+ name = x,
+ shape = shape,
+ data = np.reshape(get_tensor_data(o_model.tensors[x]), shape).astype(np.float32))]
+ )
+ o_model.layers.insert(0, o_l)
+
+ # Find model inputs & outputs
+ all_layers = {l.name for l in o_model.layers}
+ # global inputs => are inputs that are NOT connected to any layer in the network
+ # global outputs => are outputs that are NOT feeding any layer in the network OR are coming from Identity layers
+ o_model.inputs = {i:o_input_shapes[i] for l in o_model.layers for i in l.inputs if i not in all_layers and i not in o_model.memories}
+
+ def is_output_layer(layer):
+ if layer.class_name == 'Const': # Constants never count as global output even when unconnected
+ return False;
+ if layer.name not in all_inputs: # this layer is not inputing to any other layer
+ return True
+ if layer.class_name == 'Activation' and layer.activation == 0: # Identity marks global output
+ return True
+ return False
+ o_model.outputs = [l.name for l in o_model.layers if is_output_layer(l)]
+
+ # Compress
+ if compress_f16:
+ o_model = barracuda.compress(o_model)
+
+ # Sort model so that layer inputs are always ready upfront
+ o_model.layers = barracuda.sort(o_model.layers, o_model.inputs, o_model.memories, args.verbose)
+
+ # Summary
+ barracuda.summary(o_model,
+ print_layer_links = args.print_layer_links or args.verbose,
+ print_barracuda_json = args.print_barracuda_json or args.verbose,
+ print_tensors = args.print_tensors or args.verbose)
+
+ # Write to file
+ barracuda.write(o_model, target_file)
+ print('DONE: wrote', target_file, 'file.')
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/trainer.py b/animalai_packages/animalai_train/animalai_train/trainers/trainer.py
new file mode 100644
index 00000000..6abd8ca8
--- /dev/null
+++ b/animalai_packages/animalai_train/animalai_train/trainers/trainer.py
@@ -0,0 +1,198 @@
+# # Unity ML-Agents Toolkit
+import logging
+
+import tensorflow as tf
+import numpy as np
+
+from animalai.envs import UnityException, AllBrainInfo
+
+logger = logging.getLogger("mlagents.trainers")
+
+
+class UnityTrainerException(UnityException):
+ """
+ Related to errors with the Trainer.
+ """
+ pass
+
+
+class Trainer(object):
+ """This class is the base class for the mlagents.trainers"""
+
+ def __init__(self, brain, trainer_parameters, training, run_id):
+ """
+ Responsible for collecting experiences and training a neural network model.
+ :BrainParameters brain: Brain to be trained.
+ :dict trainer_parameters: The parameters for the trainer (dictionary).
+ :bool training: Whether the trainer is set for training.
+ :int run_id: The identifier of the current run
+ """
+ self.param_keys = []
+ self.brain_name = brain.brain_name
+ self.run_id = run_id
+ self.trainer_parameters = trainer_parameters
+ self.is_training = training
+ self.stats = {}
+ self.summary_writer = None
+ self.policy = None
+
+ def __str__(self):
+ return '''{} Trainer'''.format(self.__class__)
+
+ def check_param_keys(self):
+ for k in self.param_keys:
+ if k not in self.trainer_parameters:
+ raise UnityTrainerException(
+ "The hyper-parameter {0} could not be found for the {1} trainer of "
+ "brain {2}.".format(k, self.__class__, self.brain_name))
+
+ @property
+ def parameters(self):
+ """
+ Returns the trainer parameters of the trainer.
+ """
+ raise UnityTrainerException("The parameters property was not implemented.")
+
+ @property
+ def graph_scope(self):
+ """
+ Returns the graph scope of the trainer.
+ """
+ raise UnityTrainerException("The graph_scope property was not implemented.")
+
+ @property
+ def get_max_steps(self):
+ """
+ Returns the maximum number of steps. Is used to know when the trainer should be stopped.
+ :return: The maximum number of steps of the trainer
+ """
+ raise UnityTrainerException("The get_max_steps property was not implemented.")
+
+ @property
+ def get_step(self):
+ """
+ Returns the number of training steps the trainer has performed
+ :return: the step count of the trainer
+ """
+ raise UnityTrainerException("The get_step property was not implemented.")
+
+ @property
+ def get_last_reward(self):
+ """
+ Returns the last reward the trainer has had
+ :return: the new last reward
+ """
+ raise UnityTrainerException("The get_last_reward property was not implemented.")
+
+ def increment_step_and_update_last_reward(self):
+ """
+ Increment the step count of the trainer and updates the last reward
+ """
+ raise UnityTrainerException(
+ "The increment_step_and_update_last_reward method was not implemented.")
+
+ def take_action(self, all_brain_info: AllBrainInfo):
+ """
+ Decides actions given state/observation information, and takes them in environment.
+ :param all_brain_info: A dictionary of brain names and BrainInfo from environment.
+ :return: a tuple containing action, memories, values and an object
+ to be passed to add experiences
+ """
+ raise UnityTrainerException("The take_action method was not implemented.")
+
+ def add_experiences(self, curr_info: AllBrainInfo, next_info: AllBrainInfo,
+ take_action_outputs):
+ """
+ Adds experiences to each agent's experience history.
+ :param curr_info: Current AllBrainInfo.
+ :param next_info: Next AllBrainInfo.
+ :param take_action_outputs: The outputs of the take action method.
+ """
+ raise UnityTrainerException("The add_experiences method was not implemented.")
+
+ def process_experiences(self, current_info: AllBrainInfo, next_info: AllBrainInfo):
+ """
+ Checks agent histories for processing condition, and processes them as necessary.
+ Processing involves calculating value and advantage targets for model updating step.
+ :param current_info: Dictionary of all current-step brains and corresponding BrainInfo.
+ :param next_info: Dictionary of all next-step brains and corresponding BrainInfo.
+ """
+ raise UnityTrainerException("The process_experiences method was not implemented.")
+
+ def end_episode(self):
+ """
+ A signal that the Episode has ended. The buffer must be reset.
+ Get only called when the academy resets.
+ """
+ raise UnityTrainerException("The end_episode method was not implemented.")
+
+ def is_ready_update(self):
+ """
+ Returns whether or not the trainer has enough elements to run update model
+ :return: A boolean corresponding to wether or not update_model() can be run
+ """
+ raise UnityTrainerException("The is_ready_update method was not implemented.")
+
+ def update_policy(self):
+ """
+ Uses demonstration_buffer to update model.
+ """
+ raise UnityTrainerException("The update_model method was not implemented.")
+
+ def save_model(self):
+ """
+ Saves the model
+ """
+ self.policy.save_model(self.get_step)
+
+ def export_model(self):
+ """
+ Exports the model
+ """
+ self.policy.export_model()
+
+ def write_summary(self, global_step, lesson_num=0):
+ """
+ Saves training statistics to Tensorboard.
+ :param lesson_num: Current lesson number in curriculum.
+ :param global_step: The number of steps the simulation has been going for
+ """
+ if global_step % self.trainer_parameters['summary_freq'] == 0 and global_step != 0:
+ is_training = "Training." if self.is_training and self.get_step <= self.get_max_steps else "Not Training."
+ if len(self.stats['Environment/Cumulative Reward']) > 0:
+ mean_reward = np.mean(self.stats['Environment/Cumulative Reward'])
+ logger.info(" {}: {}: Step: {}. Mean Reward: {:0.3f}. Std of Reward: {:0.3f}. {}"
+ .format(self.run_id, self.brain_name,
+ min(self.get_step, self.get_max_steps),
+ mean_reward, np.std(self.stats['Environment/Cumulative Reward']),
+ is_training))
+ else:
+ logger.info(" {}: {}: Step: {}. No episode was completed since last summary. {}"
+ .format(self.run_id, self.brain_name, self.get_step, is_training))
+ summary = tf.Summary()
+ for key in self.stats:
+ if len(self.stats[key]) > 0:
+ stat_mean = float(np.mean(self.stats[key]))
+ summary.value.add(tag='{}'.format(key), simple_value=stat_mean)
+ self.stats[key] = []
+ summary.value.add(tag='Environment/Lesson', simple_value=lesson_num)
+ self.summary_writer.add_summary(summary, self.get_step)
+ self.summary_writer.flush()
+
+ def write_tensorboard_text(self, key, input_dict):
+ """
+ Saves text to Tensorboard.
+ Note: Only works on tensorflow r1.2 or above.
+ :param key: The name of the text.
+ :param input_dict: A dictionary that will be displayed in a table on Tensorboard.
+ """
+ try:
+ with tf.Session() as sess:
+ s_op = tf.summary.text(key, tf.convert_to_tensor(
+ ([[str(x), str(input_dict[x])] for x in input_dict])))
+ s = sess.run(s_op)
+ self.summary_writer.add_summary(s, self.get_step)
+ except:
+ logger.info(
+ "Cannot write text summary for Tensorboard. Tensorflow version must be r1.2 or above.")
+ pass
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/trainer_controller.py b/animalai_packages/animalai_train/animalai_train/trainers/trainer_controller.py
new file mode 100644
index 00000000..0eb6c53c
--- /dev/null
+++ b/animalai_packages/animalai_train/animalai_train/trainers/trainer_controller.py
@@ -0,0 +1,301 @@
+# # Unity ML-Agents Toolkit
+# ## ML-Agent Learning
+"""Launches trainers for each External Brains in a Unity Environment."""
+
+import os
+import logging
+import shutil
+import sys
+if sys.platform.startswith('win'):
+ import win32api
+ import win32con
+from typing import *
+
+import numpy as np
+import tensorflow as tf
+
+from animalai.envs import BrainInfo
+from animalai.envs.exception import UnityEnvironmentException
+from animalai_train.trainers.ppo.trainer import PPOTrainer
+from animalai_train.trainers.bc.offline_trainer import OfflineBCTrainer
+from animalai_train.trainers.bc.online_trainer import OnlineBCTrainer
+from animalai_train.trainers.meta_curriculum import MetaCurriculum
+
+
+class TrainerController(object):
+ def __init__(self, model_path: str, summaries_dir: str,
+ run_id: str, save_freq: int, meta_curriculum: Optional[MetaCurriculum],
+ load: bool, train: bool, keep_checkpoints: int, lesson: Optional[int],
+ external_brains: Dict[str, BrainInfo], training_seed: int, config=None):
+ """
+ :param model_path: Path to save the model.
+ :param summaries_dir: Folder to save training summaries.
+ :param run_id: The sub-directory name for model and summary statistics
+ :param save_freq: Frequency at which to save model
+ :param meta_curriculum: MetaCurriculum object which stores information about all curricula.
+ :param load: Whether to load the model or randomly initialize.
+ :param train: Whether to train model, or only run inference.
+ :param keep_checkpoints: How many model checkpoints to keep.
+ :param lesson: Start learning from this lesson.
+ :param external_brains: dictionary of external brain names to BrainInfo objects.
+ :param training_seed: Seed to use for Numpy and Tensorflow random number generation.
+ """
+
+ self.model_path = model_path
+ self.summaries_dir = summaries_dir
+ self.external_brains = external_brains
+ self.external_brain_names = external_brains.keys()
+ self.logger = logging.getLogger('mlagents.envs')
+ self.run_id = run_id
+ self.save_freq = save_freq
+ self.lesson = lesson
+ self.load_model = load
+ self.train_model = train
+ self.keep_checkpoints = keep_checkpoints
+ self.trainers = {}
+ self.global_step = 0
+ self.meta_curriculum = meta_curriculum
+ self.seed = training_seed
+ self.config = config
+ self.update_config = True
+ np.random.seed(self.seed)
+ tf.set_random_seed(self.seed)
+
+ def _get_measure_vals(self):
+ if self.meta_curriculum:
+ brain_names_to_measure_vals = {}
+ for brain_name, curriculum \
+ in self.meta_curriculum.brains_to_curriculums.items():
+ if curriculum.measure == 'progress':
+ measure_val = (self.trainers[brain_name].get_step /
+ self.trainers[brain_name].get_max_steps)
+ brain_names_to_measure_vals[brain_name] = measure_val
+ elif curriculum.measure == 'reward':
+ measure_val = np.mean(self.trainers[brain_name]
+ .reward_buffer)
+ brain_names_to_measure_vals[brain_name] = measure_val
+ return brain_names_to_measure_vals
+ else:
+ return None
+
+ def _save_model(self, steps=0):
+ """
+ Saves current model to checkpoint folder.
+ :param steps: Current number of steps in training process.
+ :param saver: Tensorflow saver for session.
+ """
+ for brain_name in self.trainers.keys():
+ self.trainers[brain_name].save_model()
+ self.logger.info('Saved Model')
+
+ def _save_model_when_interrupted(self, steps=0):
+ self.logger.info('Learning was interrupted. Please wait '
+ 'while the graph is generated.')
+ self._save_model(steps)
+
+ def _win_handler(self, event):
+ """
+ This function gets triggered after ctrl-c or ctrl-break is pressed
+ under Windows platform.
+ """
+ if event in (win32con.CTRL_C_EVENT, win32con.CTRL_BREAK_EVENT):
+ self._save_model_when_interrupted(self.global_step)
+ self._export_graph()
+ sys.exit()
+ return True
+ return False
+
+ def _export_graph(self):
+ """
+ Exports latest saved models to .nn format for Unity embedding.
+ """
+ for brain_name in self.trainers.keys():
+ self.trainers[brain_name].export_model()
+
+ def initialize_trainers(self, trainer_config):
+ """
+ Initialization of the trainers
+ :param trainer_config: The configurations of the trainers
+ """
+ trainer_parameters_dict = {}
+
+ for brain_name in self.external_brains:
+ trainer_parameters = trainer_config['default'].copy()
+ trainer_parameters['summary_path'] = '{basedir}/{name}'.format(
+ basedir=self.summaries_dir,
+ name=str(self.run_id) + '_' + brain_name)
+ trainer_parameters['model_path'] = '{basedir}/{name}'.format(
+ basedir=self.model_path,
+ name=brain_name)
+ trainer_parameters['keep_checkpoints'] = self.keep_checkpoints
+ if brain_name in trainer_config:
+ _brain_key = brain_name
+ while not isinstance(trainer_config[_brain_key], dict):
+ _brain_key = trainer_config[_brain_key]
+ for k in trainer_config[_brain_key]:
+ trainer_parameters[k] = trainer_config[_brain_key][k]
+ trainer_parameters_dict[brain_name] = trainer_parameters.copy()
+ for brain_name in self.external_brains:
+ if trainer_parameters_dict[brain_name]['trainer'] == 'offline_bc':
+ self.trainers[brain_name] = OfflineBCTrainer(
+ self.external_brains[brain_name],
+ trainer_parameters_dict[brain_name], self.train_model,
+ self.load_model, self.seed, self.run_id)
+ elif trainer_parameters_dict[brain_name]['trainer'] == 'online_bc':
+ self.trainers[brain_name] = OnlineBCTrainer(
+ self.external_brains[brain_name],
+ trainer_parameters_dict[brain_name], self.train_model,
+ self.load_model, self.seed, self.run_id)
+ elif trainer_parameters_dict[brain_name]['trainer'] == 'ppo':
+ self.trainers[brain_name] = PPOTrainer(
+ self.external_brains[brain_name],
+ self.meta_curriculum
+ .brains_to_curriculums[brain_name]
+ .min_lesson_length if self.meta_curriculum else 0,
+ trainer_parameters_dict[brain_name],
+ self.train_model, self.load_model, self.seed, self.run_id)
+ else:
+ raise UnityEnvironmentException('The trainer config contains '
+ 'an unknown trainer type for '
+ 'brain {}'
+ .format(brain_name))
+
+ @staticmethod
+ def _create_model_path(model_path):
+ try:
+ if not os.path.exists(model_path):
+ os.makedirs(model_path)
+ except Exception:
+ raise UnityEnvironmentException('The folder {} containing the '
+ 'generated model could not be '
+ 'accessed. Please make sure the '
+ 'permissions are set correctly.'
+ .format(model_path))
+
+ def _reset_env(self, env):
+ """Resets the environment.
+
+ Returns:
+ A Data structure corresponding to the initial reset state of the
+ environment.
+ """
+ if self.meta_curriculum is not None:
+ return env.reset(config=self.meta_curriculum.get_config())
+ else:
+ if self.update_config:
+ return env.reset(arenas_configurations_input=self.config)
+ self.update_config = False
+ else:
+ return env.reset()
+
+ def start_learning(self, env, trainer_config):
+ # TODO: Should be able to start learning at different lesson numbers
+ # for each curriculum.
+ if self.meta_curriculum is not None:
+ self.meta_curriculum.set_all_curriculums_to_lesson_num(self.lesson)
+ self._create_model_path(self.model_path)
+
+ tf.reset_default_graph()
+
+ # Prevent a single session from taking all GPU memory.
+ self.initialize_trainers(trainer_config)
+ for _, t in self.trainers.items():
+ self.logger.info(t)
+
+ curr_info = self._reset_env(env)
+ if self.train_model:
+ for brain_name, trainer in self.trainers.items():
+ trainer.write_tensorboard_text('Hyperparameters',
+ trainer.parameters)
+ if sys.platform.startswith('win'):
+ # Add the _win_handler function to the windows console's handler function list
+ win32api.SetConsoleCtrlHandler(self._win_handler, True)
+ try:
+ while any([t.get_step <= t.get_max_steps \
+ for k, t in self.trainers.items()]) \
+ or not self.train_model:
+ new_info = self.take_step(env, curr_info)
+ self.global_step += 1
+ if self.global_step % self.save_freq == 0 and self.global_step != 0 \
+ and self.train_model:
+ # Save Tensorflow model
+ self._save_model(steps=self.global_step)
+ curr_info = new_info
+ # Final save Tensorflow model
+ if self.global_step != 0 and self.train_model:
+ self._save_model(steps=self.global_step)
+ except KeyboardInterrupt:
+ if self.train_model:
+ self._save_model_when_interrupted(steps=self.global_step)
+ pass
+ env.close()
+
+ if self.train_model:
+ self._export_graph()
+
+ def take_step(self, env, curr_info):
+ if self.meta_curriculum:
+ # Get the sizes of the reward buffers.
+ reward_buff_sizes = {k: len(t.reward_buffer) \
+ for (k, t) in self.trainers.items()}
+ # Attempt to increment the lessons of the brains who
+ # were ready.
+ lessons_incremented = \
+ self.meta_curriculum.increment_lessons(
+ self._get_measure_vals(),
+ reward_buff_sizes=reward_buff_sizes)
+
+ # If any lessons were incremented or the environment is
+ # ready to be reset
+ if (self.meta_curriculum
+ and any(lessons_incremented.values())):
+ curr_info = self._reset_env(env)
+ for brain_name, trainer in self.trainers.items():
+ trainer.end_episode()
+ for brain_name, changed in lessons_incremented.items():
+ if changed:
+ self.trainers[brain_name].reward_buffer.clear()
+ elif env.global_done:
+ curr_info = self._reset_env(env)
+ for brain_name, trainer in self.trainers.items():
+ trainer.end_episode()
+
+ # Decide and take an action
+ take_action_vector, \
+ take_action_memories, \
+ take_action_text, \
+ take_action_value, \
+ take_action_outputs \
+ = {}, {}, {}, {}, {}
+ for brain_name, trainer in self.trainers.items():
+ (take_action_vector[brain_name],
+ take_action_memories[brain_name],
+ take_action_text[brain_name],
+ take_action_value[brain_name],
+ take_action_outputs[brain_name]) = \
+ trainer.take_action(curr_info)
+ new_info = env.step(vector_action=take_action_vector,
+ memory=take_action_memories,
+ text_action=take_action_text,
+ value=take_action_value)
+ for brain_name, trainer in self.trainers.items():
+ trainer.add_experiences(curr_info, new_info,
+ take_action_outputs[brain_name])
+ trainer.process_experiences(curr_info, new_info)
+ if trainer.is_ready_update() and self.train_model \
+ and trainer.get_step <= trainer.get_max_steps:
+ # Perform gradient descent with experience buffer
+ trainer.update_policy()
+ # Write training statistics to Tensorboard.
+ if self.meta_curriculum is not None:
+ trainer.write_summary(
+ self.global_step,
+ lesson_num=self.meta_curriculum
+ .brains_to_curriculums[brain_name]
+ .lesson_num)
+ else:
+ trainer.write_summary(self.global_step)
+ if self.train_model \
+ and trainer.get_step <= trainer.get_max_steps:
+ trainer.increment_step_and_update_last_reward()
+ return new_info
diff --git a/animalai_packages/animalai_train/setup.py b/animalai_packages/animalai_train/setup.py
new file mode 100644
index 00000000..6c31c5b6
--- /dev/null
+++ b/animalai_packages/animalai_train/setup.py
@@ -0,0 +1,33 @@
+from setuptools import setup
+
+setup(
+ name='animalai_train',
+ version='0.4.0',
+ description='Animal AI competition training library',
+ url='https://github.com/beyretb/AnimalAI-Olympics',
+ author='Benjamin Beyret',
+ author_email='bb1010@ic.ac.uk',
+
+ classifiers=[
+ 'Intended Audience :: Developers',
+ 'Topic :: Scientific/Engineering :: Artificial Intelligence',
+ 'License :: OSI Approved :: Apache Software License',
+ 'Programming Language :: Python :: 3.6'
+ ],
+
+ packages=['animalai_train.trainers', 'animalai_train.trainers.bc', 'animalai_train.trainers.ppo'], # Required
+ zip_safe=False,
+
+ install_requires=[
+ 'animalai>=0.4.0',
+ 'tensorflow>=1.7,<1.8',
+ 'matplotlib',
+ 'Pillow>=4.2.1,<=5.4.1',
+ 'numpy>=1.13.3,<=1.14.5',
+ 'protobuf>=3.6,<3.7',
+ 'grpcio>=1.11.0,<1.12.0',
+ 'pyyaml>=5.1',
+ 'jsonpickle>=1.2',
+ 'pypiwin32==223;platform_system=="Windows"'],
+ python_requires=">=3.5,<3.8",
+)
\ No newline at end of file
diff --git a/train.py b/train.py
index b291b582..877f08ee 100644
--- a/train.py
+++ b/train.py
@@ -1,4 +1,4 @@
-from animalai.trainers.trainer_controller import TrainerController
+from animalai_train.trainers.trainer_controller import TrainerController
from animalai.envs import UnityEnvironment
from animalai.envs.exception import UnityEnvironmentException
from animalai.envs.arena_config import ArenaConfig
From 7d2bd1a3097bf4675f6dfa3129767b1343da8fa4 Mon Sep 17 00:00:00 2001
From: Benjamin Beyret
Date: Wed, 29 May 2019 16:14:10 +0100
Subject: [PATCH 02/23] add github link to env warning + remove requirements +
create example folder
---
.../animalai_envs/animalai/envs/environment.py | 4 +++-
train.py => examples/train.py | 0
visualizeArena.py => examples/visualizeArena.py | 0
.../visualizeLightsOff.py | 0
requirementsOthers.txt | 12 ------------
requirementsWindows.txt | 13 -------------
6 files changed, 3 insertions(+), 26 deletions(-)
rename train.py => examples/train.py (100%)
rename visualizeArena.py => examples/visualizeArena.py (100%)
rename visualizeLightsOff.py => examples/visualizeLightsOff.py (100%)
delete mode 100644 requirementsOthers.txt
delete mode 100644 requirementsWindows.txt
diff --git a/animalai_packages/animalai_envs/animalai/envs/environment.py b/animalai_packages/animalai_envs/animalai/envs/environment.py
index 6c7fc0ae..7f3e28d1 100644
--- a/animalai_packages/animalai_envs/animalai/envs/environment.py
+++ b/animalai_packages/animalai_envs/animalai/envs/environment.py
@@ -168,7 +168,9 @@ def executable_launcher(self, file_name, docker_training):
if launch_string is None:
self._close()
raise UnityEnvironmentException("Couldn't launch the {0} environment. "
- "Provided filename does not match any environments."
+ "Provided filename does not match any environments.\n"
+ "If you haven't done so already, follow the instructions at: "
+ "https://github.com/beyretb/AnimalAI-Olympics "
.format(true_filename))
else:
logger.debug("This is the launch string {}".format(launch_string))
diff --git a/train.py b/examples/train.py
similarity index 100%
rename from train.py
rename to examples/train.py
diff --git a/visualizeArena.py b/examples/visualizeArena.py
similarity index 100%
rename from visualizeArena.py
rename to examples/visualizeArena.py
diff --git a/visualizeLightsOff.py b/examples/visualizeLightsOff.py
similarity index 100%
rename from visualizeLightsOff.py
rename to examples/visualizeLightsOff.py
diff --git a/requirementsOthers.txt b/requirementsOthers.txt
deleted file mode 100644
index 5aedba07..00000000
--- a/requirementsOthers.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-tensorflow>=1.7,<1.8
-Pillow>=4.2.1
-matplotlib
-numpy>=1.13.3,<=1.14.5
-jupyter
-pytest>=3.2.2,<4.0.0
-docopt
-pyyaml
-jsonpickle
-matplotlib
-protobuf>=3.6,<3.7
-grpcio>=1.11.0,<1.12.0
diff --git a/requirementsWindows.txt b/requirementsWindows.txt
deleted file mode 100644
index c48ba5d3..00000000
--- a/requirementsWindows.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-tensorflow>=1.7,<1.8
-Pillow>=4.2.1
-matplotlib
-numpy>=1.13.3,<=1.14.5
-jupyter
-pytest>=3.2.2,<4.0.0
-docopt
-pyyaml
-jsonpickle
-matplotlib
-protobuf>=3.6,<3.7
-grpcio>=1.11.0,<1.12.0
-pypiwin32==223
From 5144e5cee886c884892f9136a4da0fa54a6e6056 Mon Sep 17 00:00:00 2001
From: Benjamin Beyret
Date: Wed, 29 May 2019 16:15:10 +0100
Subject: [PATCH 03/23] move configs to examples
---
{configs => examples/configs}/allObjectsRandom.yaml | 0
{configs => examples/configs}/avoidance.yaml | 0
{configs => examples/configs}/exampleConfig.yaml | 0
{configs => examples/configs}/exampleTraining.yaml | 0
{configs => examples/configs}/justFood.yaml | 0
{configs => examples/configs}/lightsOff.yaml | 0
{configs => examples/configs}/movingFood.yaml | 0
{configs => examples/configs}/objectManipulation.yaml | 0
{configs => examples/configs}/obstacles.yaml | 0
{configs => examples/configs}/preferences.yaml | 0
{configs => examples/configs}/trainer_config.yaml | 0
11 files changed, 0 insertions(+), 0 deletions(-)
rename {configs => examples/configs}/allObjectsRandom.yaml (100%)
rename {configs => examples/configs}/avoidance.yaml (100%)
rename {configs => examples/configs}/exampleConfig.yaml (100%)
rename {configs => examples/configs}/exampleTraining.yaml (100%)
rename {configs => examples/configs}/justFood.yaml (100%)
rename {configs => examples/configs}/lightsOff.yaml (100%)
rename {configs => examples/configs}/movingFood.yaml (100%)
rename {configs => examples/configs}/objectManipulation.yaml (100%)
rename {configs => examples/configs}/obstacles.yaml (100%)
rename {configs => examples/configs}/preferences.yaml (100%)
rename {configs => examples/configs}/trainer_config.yaml (100%)
diff --git a/configs/allObjectsRandom.yaml b/examples/configs/allObjectsRandom.yaml
similarity index 100%
rename from configs/allObjectsRandom.yaml
rename to examples/configs/allObjectsRandom.yaml
diff --git a/configs/avoidance.yaml b/examples/configs/avoidance.yaml
similarity index 100%
rename from configs/avoidance.yaml
rename to examples/configs/avoidance.yaml
diff --git a/configs/exampleConfig.yaml b/examples/configs/exampleConfig.yaml
similarity index 100%
rename from configs/exampleConfig.yaml
rename to examples/configs/exampleConfig.yaml
diff --git a/configs/exampleTraining.yaml b/examples/configs/exampleTraining.yaml
similarity index 100%
rename from configs/exampleTraining.yaml
rename to examples/configs/exampleTraining.yaml
diff --git a/configs/justFood.yaml b/examples/configs/justFood.yaml
similarity index 100%
rename from configs/justFood.yaml
rename to examples/configs/justFood.yaml
diff --git a/configs/lightsOff.yaml b/examples/configs/lightsOff.yaml
similarity index 100%
rename from configs/lightsOff.yaml
rename to examples/configs/lightsOff.yaml
diff --git a/configs/movingFood.yaml b/examples/configs/movingFood.yaml
similarity index 100%
rename from configs/movingFood.yaml
rename to examples/configs/movingFood.yaml
diff --git a/configs/objectManipulation.yaml b/examples/configs/objectManipulation.yaml
similarity index 100%
rename from configs/objectManipulation.yaml
rename to examples/configs/objectManipulation.yaml
diff --git a/configs/obstacles.yaml b/examples/configs/obstacles.yaml
similarity index 100%
rename from configs/obstacles.yaml
rename to examples/configs/obstacles.yaml
diff --git a/configs/preferences.yaml b/examples/configs/preferences.yaml
similarity index 100%
rename from configs/preferences.yaml
rename to examples/configs/preferences.yaml
diff --git a/configs/trainer_config.yaml b/examples/configs/trainer_config.yaml
similarity index 100%
rename from configs/trainer_config.yaml
rename to examples/configs/trainer_config.yaml
From 16e8636fa3b77ae4eb0ccca75798d837cf601984 Mon Sep 17 00:00:00 2001
From: Benjamin Beyret
Date: Wed, 29 May 2019 16:33:19 +0100
Subject: [PATCH 04/23] fix links in examples
---
examples/train.py | 2 +-
examples/visualizeArena.py | 2 +-
examples/visualizeLightsOff.py | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/examples/train.py b/examples/train.py
index 877f08ee..38057b15 100644
--- a/examples/train.py
+++ b/examples/train.py
@@ -8,7 +8,7 @@
# ML-agents parameters for training
-env_path = 'env/AnimalAI'
+env_path = '../env/AnimalAI'
worker_id = random.randint(1, 100)
seed = 10
base_port = 5005
diff --git a/examples/visualizeArena.py b/examples/visualizeArena.py
index ddc7a5bd..d87f56ed 100644
--- a/examples/visualizeArena.py
+++ b/examples/visualizeArena.py
@@ -3,7 +3,7 @@
import sys
import random
-env_path = 'env/AnimalAI'
+env_path = '../env/AnimalAI'
worker_id = random.randint(0, 200)
run_seed = 1
docker_target_name = None
diff --git a/examples/visualizeLightsOff.py b/examples/visualizeLightsOff.py
index 0d941c39..855a0d40 100644
--- a/examples/visualizeLightsOff.py
+++ b/examples/visualizeLightsOff.py
@@ -5,7 +5,7 @@
from matplotlib import pyplot as plt
from matplotlib import animation
-env_path = 'env/AnimalAI'
+env_path = '../env/AnimalAI'
worker_id = random.randint(1, 100)
seed = 10
From f899425cfbd62ff0bddd952bcc13092db2390013 Mon Sep 17 00:00:00 2001
From: Benjamin Beyret
Date: Fri, 31 May 2019 18:41:35 +0100
Subject: [PATCH 05/23] add gym wrapper + add dopamine + move train package to
examples
---
.gitignore | 4 +-
animalai_bkp/envs/arena_config.py | 8 +-
animalai_bkp/envs/environment.py | 6 +-
animalai_bkp/trainers/trainer_controller.py | 2 +-
.../animalai}/LICENSE | 0
.../animalai}/README.md | 0
.../animalai}/animalai/__init__.py | 0
.../animalai/communicator_objects/__init__.py | 0
.../agent_action_proto_pb2.py | 0
.../agent_info_proto_pb2.py | 0
.../arena_parameters_proto_pb2.py | 0
.../brain_parameters_proto_pb2.py | 0
.../communicator_objects/command_proto_pb2.py | 0
.../demonstration_meta_proto_pb2.py | 0
.../engine_configuration_proto_pb2.py | 0
.../communicator_objects/header_pb2.py | 0
.../resolution_proto_pb2.py | 0
.../space_type_proto_pb2.py | 0
.../communicator_objects/unity_input_pb2.py | 0
.../communicator_objects/unity_message_pb2.py | 0
.../communicator_objects/unity_output_pb2.py | 0
.../unity_rl_initialization_input_pb2.py | 0
.../unity_rl_initialization_output_pb2.py | 0
.../unity_rl_input_pb2.py | 0
.../unity_rl_output_pb2.py | 0
.../unity_rl_reset_input_pb2.py | 0
.../unity_rl_reset_output_pb2.py | 0
.../unity_to_external_pb2.py | 0
.../unity_to_external_pb2_grpc.py | 0
.../animalai}/animalai/envs/__init__.py | 0
.../animalai}/animalai/envs/arena_config.py | 8 +-
.../animalai}/animalai/envs/brain.py | 0
.../animalai}/animalai/envs/communicator.py | 0
.../animalai}/animalai/envs/environment.py | 11 +-
.../animalai}/animalai/envs/exception.py | 0
.../animalai/animalai/envs/gym/environment.py | 354 ++++++++++++++++++
.../animalai/envs/rpc_communicator.py | 2 +-
.../animalai/envs/socket_communicator.py | 0
.../animalai}/setup.py | 3 +-
documentation/quickstart.md | 2 +-
documentation/training.md | 2 +-
.../animalai_train/LICENSE | 0
.../animalai_train/README.md | 0
.../animalai_train/animalai_train/__init__.py | 0
.../animalai_train/dopamine/animalai_lib.py | 270 +++++++++++++
.../animalai_train/trainers/__init__.py | 0
.../animalai_train/trainers/barracuda.py | 0
.../animalai_train/trainers/bc/__init__.py | 0
.../animalai_train/trainers/bc/models.py | 0
.../trainers/bc/offline_trainer.py | 0
.../trainers/bc/online_trainer.py | 0
.../animalai_train/trainers/bc/policy.py | 0
.../animalai_train/trainers/bc/trainer.py | 0
.../animalai_train/trainers/buffer.py | 0
.../animalai_train/trainers/curriculum.py | 0
.../animalai_train/trainers/demo_loader.py | 0
.../animalai_train/trainers/exception.py | 0
.../animalai_train/trainers/learn.py | 0
.../trainers/meta_curriculum.py | 0
.../animalai_train/trainers/models.py | 0
.../animalai_train/trainers/policy.py | 0
.../animalai_train/trainers/ppo/__init__.py | 0
.../animalai_train/trainers/ppo/models.py | 0
.../animalai_train/trainers/ppo/policy.py | 0
.../animalai_train/trainers/ppo/trainer.py | 0
.../trainers/tensorflow_to_barracuda.py | 0
.../animalai_train/trainers/trainer.py | 0
.../trainers/trainer_controller.py | 2 +-
.../animalai_train/setup.py | 0
examples/configs/rainbow.gin | 34 ++
examples/trainBaselines.py | 33 ++
examples/visualizeArena.py | 2 +-
examples/visualizeLightsOff.py | 6 +-
73 files changed, 721 insertions(+), 28 deletions(-)
rename {animalai_packages/animalai_envs => animalai_package/animalai}/LICENSE (100%)
rename {animalai_packages/animalai_envs => animalai_package/animalai}/README.md (100%)
rename {animalai_packages/animalai_envs => animalai_package/animalai}/animalai/__init__.py (100%)
rename {animalai_packages/animalai_envs => animalai_package/animalai}/animalai/communicator_objects/__init__.py (100%)
rename {animalai_packages/animalai_envs => animalai_package/animalai}/animalai/communicator_objects/agent_action_proto_pb2.py (100%)
rename {animalai_packages/animalai_envs => animalai_package/animalai}/animalai/communicator_objects/agent_info_proto_pb2.py (100%)
rename {animalai_packages/animalai_envs => animalai_package/animalai}/animalai/communicator_objects/arena_parameters_proto_pb2.py (100%)
rename {animalai_packages/animalai_envs => animalai_package/animalai}/animalai/communicator_objects/brain_parameters_proto_pb2.py (100%)
rename {animalai_packages/animalai_envs => animalai_package/animalai}/animalai/communicator_objects/command_proto_pb2.py (100%)
rename {animalai_packages/animalai_envs => animalai_package/animalai}/animalai/communicator_objects/demonstration_meta_proto_pb2.py (100%)
rename {animalai_packages/animalai_envs => animalai_package/animalai}/animalai/communicator_objects/engine_configuration_proto_pb2.py (100%)
rename {animalai_packages/animalai_envs => animalai_package/animalai}/animalai/communicator_objects/header_pb2.py (100%)
rename {animalai_packages/animalai_envs => animalai_package/animalai}/animalai/communicator_objects/resolution_proto_pb2.py (100%)
rename {animalai_packages/animalai_envs => animalai_package/animalai}/animalai/communicator_objects/space_type_proto_pb2.py (100%)
rename {animalai_packages/animalai_envs => animalai_package/animalai}/animalai/communicator_objects/unity_input_pb2.py (100%)
rename {animalai_packages/animalai_envs => animalai_package/animalai}/animalai/communicator_objects/unity_message_pb2.py (100%)
rename {animalai_packages/animalai_envs => animalai_package/animalai}/animalai/communicator_objects/unity_output_pb2.py (100%)
rename {animalai_packages/animalai_envs => animalai_package/animalai}/animalai/communicator_objects/unity_rl_initialization_input_pb2.py (100%)
rename {animalai_packages/animalai_envs => animalai_package/animalai}/animalai/communicator_objects/unity_rl_initialization_output_pb2.py (100%)
rename {animalai_packages/animalai_envs => animalai_package/animalai}/animalai/communicator_objects/unity_rl_input_pb2.py (100%)
rename {animalai_packages/animalai_envs => animalai_package/animalai}/animalai/communicator_objects/unity_rl_output_pb2.py (100%)
rename {animalai_packages/animalai_envs => animalai_package/animalai}/animalai/communicator_objects/unity_rl_reset_input_pb2.py (100%)
rename {animalai_packages/animalai_envs => animalai_package/animalai}/animalai/communicator_objects/unity_rl_reset_output_pb2.py (100%)
rename {animalai_packages/animalai_envs => animalai_package/animalai}/animalai/communicator_objects/unity_to_external_pb2.py (100%)
rename {animalai_packages/animalai_envs => animalai_package/animalai}/animalai/communicator_objects/unity_to_external_pb2_grpc.py (100%)
rename {animalai_packages/animalai_envs => animalai_package/animalai}/animalai/envs/__init__.py (100%)
rename {animalai_packages/animalai_envs => animalai_package/animalai}/animalai/envs/arena_config.py (94%)
rename {animalai_packages/animalai_envs => animalai_package/animalai}/animalai/envs/brain.py (100%)
rename {animalai_packages/animalai_envs => animalai_package/animalai}/animalai/envs/communicator.py (100%)
rename {animalai_packages/animalai_envs => animalai_package/animalai}/animalai/envs/environment.py (98%)
rename {animalai_packages/animalai_envs => animalai_package/animalai}/animalai/envs/exception.py (100%)
create mode 100644 animalai_package/animalai/animalai/envs/gym/environment.py
rename {animalai_packages/animalai_envs => animalai_package/animalai}/animalai/envs/rpc_communicator.py (98%)
rename {animalai_packages/animalai_envs => animalai_package/animalai}/animalai/envs/socket_communicator.py (100%)
rename {animalai_packages/animalai_envs => animalai_package/animalai}/setup.py (94%)
rename {animalai_packages => examples}/animalai_train/LICENSE (100%)
rename {animalai_packages => examples}/animalai_train/README.md (100%)
rename {animalai_packages => examples}/animalai_train/animalai_train/__init__.py (100%)
create mode 100644 examples/animalai_train/animalai_train/dopamine/animalai_lib.py
rename {animalai_packages => examples}/animalai_train/animalai_train/trainers/__init__.py (100%)
rename {animalai_packages => examples}/animalai_train/animalai_train/trainers/barracuda.py (100%)
rename {animalai_packages => examples}/animalai_train/animalai_train/trainers/bc/__init__.py (100%)
rename {animalai_packages => examples}/animalai_train/animalai_train/trainers/bc/models.py (100%)
rename {animalai_packages => examples}/animalai_train/animalai_train/trainers/bc/offline_trainer.py (100%)
rename {animalai_packages => examples}/animalai_train/animalai_train/trainers/bc/online_trainer.py (100%)
rename {animalai_packages => examples}/animalai_train/animalai_train/trainers/bc/policy.py (100%)
rename {animalai_packages => examples}/animalai_train/animalai_train/trainers/bc/trainer.py (100%)
rename {animalai_packages => examples}/animalai_train/animalai_train/trainers/buffer.py (100%)
rename {animalai_packages => examples}/animalai_train/animalai_train/trainers/curriculum.py (100%)
rename {animalai_packages => examples}/animalai_train/animalai_train/trainers/demo_loader.py (100%)
rename {animalai_packages => examples}/animalai_train/animalai_train/trainers/exception.py (100%)
rename {animalai_packages => examples}/animalai_train/animalai_train/trainers/learn.py (100%)
rename {animalai_packages => examples}/animalai_train/animalai_train/trainers/meta_curriculum.py (100%)
rename {animalai_packages => examples}/animalai_train/animalai_train/trainers/models.py (100%)
rename {animalai_packages => examples}/animalai_train/animalai_train/trainers/policy.py (100%)
rename {animalai_packages => examples}/animalai_train/animalai_train/trainers/ppo/__init__.py (100%)
rename {animalai_packages => examples}/animalai_train/animalai_train/trainers/ppo/models.py (100%)
rename {animalai_packages => examples}/animalai_train/animalai_train/trainers/ppo/policy.py (100%)
rename {animalai_packages => examples}/animalai_train/animalai_train/trainers/ppo/trainer.py (100%)
rename {animalai_packages => examples}/animalai_train/animalai_train/trainers/tensorflow_to_barracuda.py (100%)
rename {animalai_packages => examples}/animalai_train/animalai_train/trainers/trainer.py (100%)
rename {animalai_packages => examples}/animalai_train/animalai_train/trainers/trainer_controller.py (99%)
rename {animalai_packages => examples}/animalai_train/setup.py (100%)
create mode 100644 examples/configs/rainbow.gin
create mode 100644 examples/trainBaselines.py
diff --git a/.gitignore b/.gitignore
index 82fde7b4..8ccb7e08 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,7 @@
env/*
!env/README.md
-/models
-/summaries
+models/
+summaries/
/.idea
__pycache__/
UnitySDK.log
diff --git a/animalai_bkp/envs/arena_config.py b/animalai_bkp/envs/arena_config.py
index 8e48b1a0..a756c493 100644
--- a/animalai_bkp/envs/arena_config.py
+++ b/animalai_bkp/envs/arena_config.py
@@ -94,11 +94,11 @@ def dict_to_arena_config(self) -> UnityRLResetInput:
return config_out
- def update(self, arenas_configurations_input):
+ def update(self, arenas_configurations):
- if arenas_configurations_input is not None:
- for arena_i in arenas_configurations_input.arenas:
- self.arenas[arena_i] = copy.copy(arenas_configurations_input.arenas[arena_i])
+ if arenas_configurations is not None:
+ for arena_i in arenas_configurations.arenas:
+ self.arenas[arena_i] = copy.copy(arenas_configurations.arenas[arena_i])
def constructor_arena(loader, node):
diff --git a/animalai_bkp/envs/environment.py b/animalai_bkp/envs/environment.py
index 6c7fc0ae..7397b9c8 100644
--- a/animalai_bkp/envs/environment.py
+++ b/animalai_bkp/envs/environment.py
@@ -217,16 +217,16 @@ def __str__(self):
Number of Training Brains : {2}'''.format(self._academy_name, str(self._num_brains),
str(self._num_external_brains))
- def reset(self, arenas_configurations_input=None, train_mode=True) -> AllBrainInfo:
+ def reset(self, arenas_configurations=None, train_mode=True) -> AllBrainInfo:
"""
Sends a signal to reset the unity environment.
:return: AllBrainInfo : A data structure corresponding to the initial reset state of the environment.
"""
if self._loaded:
- self.arenas_configurations.update(arenas_configurations_input)
+ self.arenas_configurations.update(arenas_configurations)
outputs = self.communicator.exchange(
- self._generate_reset_input(train_mode, arenas_configurations_input)
+ self._generate_reset_input(train_mode, arenas_configurations)
)
if outputs is None:
raise KeyboardInterrupt
diff --git a/animalai_bkp/trainers/trainer_controller.py b/animalai_bkp/trainers/trainer_controller.py
index 813a0a1e..0618815d 100644
--- a/animalai_bkp/trainers/trainer_controller.py
+++ b/animalai_bkp/trainers/trainer_controller.py
@@ -183,7 +183,7 @@ def _reset_env(self, env):
return env.reset(config=self.meta_curriculum.get_config())
else:
if self.update_config:
- return env.reset(arenas_configurations_input=self.config)
+ return env.reset(arenas_configurations=self.config)
self.update_config = False
else:
return env.reset()
diff --git a/animalai_packages/animalai_envs/LICENSE b/animalai_package/animalai/LICENSE
similarity index 100%
rename from animalai_packages/animalai_envs/LICENSE
rename to animalai_package/animalai/LICENSE
diff --git a/animalai_packages/animalai_envs/README.md b/animalai_package/animalai/README.md
similarity index 100%
rename from animalai_packages/animalai_envs/README.md
rename to animalai_package/animalai/README.md
diff --git a/animalai_packages/animalai_envs/animalai/__init__.py b/animalai_package/animalai/animalai/__init__.py
similarity index 100%
rename from animalai_packages/animalai_envs/animalai/__init__.py
rename to animalai_package/animalai/animalai/__init__.py
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/__init__.py b/animalai_package/animalai/animalai/communicator_objects/__init__.py
similarity index 100%
rename from animalai_packages/animalai_envs/animalai/communicator_objects/__init__.py
rename to animalai_package/animalai/animalai/communicator_objects/__init__.py
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/agent_action_proto_pb2.py b/animalai_package/animalai/animalai/communicator_objects/agent_action_proto_pb2.py
similarity index 100%
rename from animalai_packages/animalai_envs/animalai/communicator_objects/agent_action_proto_pb2.py
rename to animalai_package/animalai/animalai/communicator_objects/agent_action_proto_pb2.py
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/agent_info_proto_pb2.py b/animalai_package/animalai/animalai/communicator_objects/agent_info_proto_pb2.py
similarity index 100%
rename from animalai_packages/animalai_envs/animalai/communicator_objects/agent_info_proto_pb2.py
rename to animalai_package/animalai/animalai/communicator_objects/agent_info_proto_pb2.py
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/arena_parameters_proto_pb2.py b/animalai_package/animalai/animalai/communicator_objects/arena_parameters_proto_pb2.py
similarity index 100%
rename from animalai_packages/animalai_envs/animalai/communicator_objects/arena_parameters_proto_pb2.py
rename to animalai_package/animalai/animalai/communicator_objects/arena_parameters_proto_pb2.py
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/brain_parameters_proto_pb2.py b/animalai_package/animalai/animalai/communicator_objects/brain_parameters_proto_pb2.py
similarity index 100%
rename from animalai_packages/animalai_envs/animalai/communicator_objects/brain_parameters_proto_pb2.py
rename to animalai_package/animalai/animalai/communicator_objects/brain_parameters_proto_pb2.py
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/command_proto_pb2.py b/animalai_package/animalai/animalai/communicator_objects/command_proto_pb2.py
similarity index 100%
rename from animalai_packages/animalai_envs/animalai/communicator_objects/command_proto_pb2.py
rename to animalai_package/animalai/animalai/communicator_objects/command_proto_pb2.py
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/demonstration_meta_proto_pb2.py b/animalai_package/animalai/animalai/communicator_objects/demonstration_meta_proto_pb2.py
similarity index 100%
rename from animalai_packages/animalai_envs/animalai/communicator_objects/demonstration_meta_proto_pb2.py
rename to animalai_package/animalai/animalai/communicator_objects/demonstration_meta_proto_pb2.py
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/engine_configuration_proto_pb2.py b/animalai_package/animalai/animalai/communicator_objects/engine_configuration_proto_pb2.py
similarity index 100%
rename from animalai_packages/animalai_envs/animalai/communicator_objects/engine_configuration_proto_pb2.py
rename to animalai_package/animalai/animalai/communicator_objects/engine_configuration_proto_pb2.py
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/header_pb2.py b/animalai_package/animalai/animalai/communicator_objects/header_pb2.py
similarity index 100%
rename from animalai_packages/animalai_envs/animalai/communicator_objects/header_pb2.py
rename to animalai_package/animalai/animalai/communicator_objects/header_pb2.py
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/resolution_proto_pb2.py b/animalai_package/animalai/animalai/communicator_objects/resolution_proto_pb2.py
similarity index 100%
rename from animalai_packages/animalai_envs/animalai/communicator_objects/resolution_proto_pb2.py
rename to animalai_package/animalai/animalai/communicator_objects/resolution_proto_pb2.py
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/space_type_proto_pb2.py b/animalai_package/animalai/animalai/communicator_objects/space_type_proto_pb2.py
similarity index 100%
rename from animalai_packages/animalai_envs/animalai/communicator_objects/space_type_proto_pb2.py
rename to animalai_package/animalai/animalai/communicator_objects/space_type_proto_pb2.py
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/unity_input_pb2.py b/animalai_package/animalai/animalai/communicator_objects/unity_input_pb2.py
similarity index 100%
rename from animalai_packages/animalai_envs/animalai/communicator_objects/unity_input_pb2.py
rename to animalai_package/animalai/animalai/communicator_objects/unity_input_pb2.py
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/unity_message_pb2.py b/animalai_package/animalai/animalai/communicator_objects/unity_message_pb2.py
similarity index 100%
rename from animalai_packages/animalai_envs/animalai/communicator_objects/unity_message_pb2.py
rename to animalai_package/animalai/animalai/communicator_objects/unity_message_pb2.py
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/unity_output_pb2.py b/animalai_package/animalai/animalai/communicator_objects/unity_output_pb2.py
similarity index 100%
rename from animalai_packages/animalai_envs/animalai/communicator_objects/unity_output_pb2.py
rename to animalai_package/animalai/animalai/communicator_objects/unity_output_pb2.py
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/unity_rl_initialization_input_pb2.py b/animalai_package/animalai/animalai/communicator_objects/unity_rl_initialization_input_pb2.py
similarity index 100%
rename from animalai_packages/animalai_envs/animalai/communicator_objects/unity_rl_initialization_input_pb2.py
rename to animalai_package/animalai/animalai/communicator_objects/unity_rl_initialization_input_pb2.py
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/unity_rl_initialization_output_pb2.py b/animalai_package/animalai/animalai/communicator_objects/unity_rl_initialization_output_pb2.py
similarity index 100%
rename from animalai_packages/animalai_envs/animalai/communicator_objects/unity_rl_initialization_output_pb2.py
rename to animalai_package/animalai/animalai/communicator_objects/unity_rl_initialization_output_pb2.py
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/unity_rl_input_pb2.py b/animalai_package/animalai/animalai/communicator_objects/unity_rl_input_pb2.py
similarity index 100%
rename from animalai_packages/animalai_envs/animalai/communicator_objects/unity_rl_input_pb2.py
rename to animalai_package/animalai/animalai/communicator_objects/unity_rl_input_pb2.py
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/unity_rl_output_pb2.py b/animalai_package/animalai/animalai/communicator_objects/unity_rl_output_pb2.py
similarity index 100%
rename from animalai_packages/animalai_envs/animalai/communicator_objects/unity_rl_output_pb2.py
rename to animalai_package/animalai/animalai/communicator_objects/unity_rl_output_pb2.py
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/unity_rl_reset_input_pb2.py b/animalai_package/animalai/animalai/communicator_objects/unity_rl_reset_input_pb2.py
similarity index 100%
rename from animalai_packages/animalai_envs/animalai/communicator_objects/unity_rl_reset_input_pb2.py
rename to animalai_package/animalai/animalai/communicator_objects/unity_rl_reset_input_pb2.py
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/unity_rl_reset_output_pb2.py b/animalai_package/animalai/animalai/communicator_objects/unity_rl_reset_output_pb2.py
similarity index 100%
rename from animalai_packages/animalai_envs/animalai/communicator_objects/unity_rl_reset_output_pb2.py
rename to animalai_package/animalai/animalai/communicator_objects/unity_rl_reset_output_pb2.py
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/unity_to_external_pb2.py b/animalai_package/animalai/animalai/communicator_objects/unity_to_external_pb2.py
similarity index 100%
rename from animalai_packages/animalai_envs/animalai/communicator_objects/unity_to_external_pb2.py
rename to animalai_package/animalai/animalai/communicator_objects/unity_to_external_pb2.py
diff --git a/animalai_packages/animalai_envs/animalai/communicator_objects/unity_to_external_pb2_grpc.py b/animalai_package/animalai/animalai/communicator_objects/unity_to_external_pb2_grpc.py
similarity index 100%
rename from animalai_packages/animalai_envs/animalai/communicator_objects/unity_to_external_pb2_grpc.py
rename to animalai_package/animalai/animalai/communicator_objects/unity_to_external_pb2_grpc.py
diff --git a/animalai_packages/animalai_envs/animalai/envs/__init__.py b/animalai_package/animalai/animalai/envs/__init__.py
similarity index 100%
rename from animalai_packages/animalai_envs/animalai/envs/__init__.py
rename to animalai_package/animalai/animalai/envs/__init__.py
diff --git a/animalai_packages/animalai_envs/animalai/envs/arena_config.py b/animalai_package/animalai/animalai/envs/arena_config.py
similarity index 94%
rename from animalai_packages/animalai_envs/animalai/envs/arena_config.py
rename to animalai_package/animalai/animalai/envs/arena_config.py
index 8e48b1a0..a756c493 100644
--- a/animalai_packages/animalai_envs/animalai/envs/arena_config.py
+++ b/animalai_package/animalai/animalai/envs/arena_config.py
@@ -94,11 +94,11 @@ def dict_to_arena_config(self) -> UnityRLResetInput:
return config_out
- def update(self, arenas_configurations_input):
+ def update(self, arenas_configurations):
- if arenas_configurations_input is not None:
- for arena_i in arenas_configurations_input.arenas:
- self.arenas[arena_i] = copy.copy(arenas_configurations_input.arenas[arena_i])
+ if arenas_configurations is not None:
+ for arena_i in arenas_configurations.arenas:
+ self.arenas[arena_i] = copy.copy(arenas_configurations.arenas[arena_i])
def constructor_arena(loader, node):
diff --git a/animalai_packages/animalai_envs/animalai/envs/brain.py b/animalai_package/animalai/animalai/envs/brain.py
similarity index 100%
rename from animalai_packages/animalai_envs/animalai/envs/brain.py
rename to animalai_package/animalai/animalai/envs/brain.py
diff --git a/animalai_packages/animalai_envs/animalai/envs/communicator.py b/animalai_package/animalai/animalai/envs/communicator.py
similarity index 100%
rename from animalai_packages/animalai_envs/animalai/envs/communicator.py
rename to animalai_package/animalai/animalai/envs/communicator.py
diff --git a/animalai_packages/animalai_envs/animalai/envs/environment.py b/animalai_package/animalai/animalai/envs/environment.py
similarity index 98%
rename from animalai_packages/animalai_envs/animalai/envs/environment.py
rename to animalai_package/animalai/animalai/envs/environment.py
index 7f3e28d1..5e76c159 100644
--- a/animalai_packages/animalai_envs/animalai/envs/environment.py
+++ b/animalai_package/animalai/animalai/envs/environment.py
@@ -53,7 +53,8 @@ def __init__(self, file_name=None,
self._loaded = False # If true, this means the environment was successfully loaded
self.proc1 = None # The process that is started. If None, no process was started
self.communicator = self.get_communicator(worker_id, base_port)
- self.arenas_configurations = arenas_configurations if arenas_configurations is not None else ArenaConfig()
+ self.arenas_configurations = arenas_configurations if arenas_configurations is not None \
+ else ArenaConfig()
if file_name is not None:
self.executable_launcher(file_name, docker_training)
@@ -217,18 +218,18 @@ def __str__(self):
return '''Unity Academy name: {0}
Number of Brains: {1}
Number of Training Brains : {2}'''.format(self._academy_name, str(self._num_brains),
- str(self._num_external_brains))
+ str(self._num_external_brains))
- def reset(self, arenas_configurations_input=None, train_mode=True) -> AllBrainInfo:
+ def reset(self, arenas_configurations=None, train_mode=True) -> AllBrainInfo:
"""
Sends a signal to reset the unity environment.
:return: AllBrainInfo : A data structure corresponding to the initial reset state of the environment.
"""
if self._loaded:
- self.arenas_configurations.update(arenas_configurations_input)
+ self.arenas_configurations.update(arenas_configurations)
outputs = self.communicator.exchange(
- self._generate_reset_input(train_mode, arenas_configurations_input)
+ self._generate_reset_input(train_mode, arenas_configurations)
)
if outputs is None:
raise KeyboardInterrupt
diff --git a/animalai_packages/animalai_envs/animalai/envs/exception.py b/animalai_package/animalai/animalai/envs/exception.py
similarity index 100%
rename from animalai_packages/animalai_envs/animalai/envs/exception.py
rename to animalai_package/animalai/animalai/envs/exception.py
diff --git a/animalai_package/animalai/animalai/envs/gym/environment.py b/animalai_package/animalai/animalai/envs/gym/environment.py
new file mode 100644
index 00000000..1846952f
--- /dev/null
+++ b/animalai_package/animalai/animalai/envs/gym/environment.py
@@ -0,0 +1,354 @@
+import logging
+from PIL import Image
+import itertools
+import gym
+import numpy as np
+from animalai.envs import UnityEnvironment
+from gym import error, spaces
+
+
+class UnityGymException(error.Error):
+ """
+ Any error related to the gym wrapper of ml-agents.
+ """
+ pass
+
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("gym_unity")
+
+
+class AnimalAIEnv(gym.Env):
+ """
+ Provides Gym wrapper for Unity Learning Environments.
+ Multi-agent environments use lists for object types, as done here:
+ https://github.com/openai/multiagent-particle-envs
+ """
+
+ def __init__(self,
+ environment_filename: str,
+ worker_id=0,
+ docker_training=False,
+ n_arenas=1,
+ arenas_configurations=None,
+ greyscale=False,
+ retro=True):
+ """
+ Environment initialization
+ :param environment_filename: The UnityEnvironment path or file to be wrapped in the gym.
+ :param worker_id: Worker number for environment.
+ :param docker_training: Whether this is running within a docker environment and should use a virtual
+ frame buffer (xvfb).
+ :param n_arenas: number of arenas to create in the environment (one agent per arena)
+ :param arenas_configurations: an ArenaConfig to configure the items present in each arena, will spawn random
+ objects randomly if not provided
+ :param greyscale: whether the visual observations should be grayscaled or not
+ :param retro: Resize visual observation to 84x84 (int8) and flattens action space.
+ """
+ self._env = UnityEnvironment(file_name=environment_filename,
+ worker_id=worker_id,
+ docker_training=docker_training,
+ n_arenas=n_arenas,
+ arenas_configurations=arenas_configurations)
+ # self.name = self._env.academy_name
+ self.vector_obs = None
+ self._current_state = None
+ self._n_agents = None
+ self._flattener = None
+ self._greyscale = greyscale or retro
+ # self._seed = None
+ self.retro = retro
+ self.game_over = False # Hidden flag used by Atari environments to determine if the game is over
+ self.arenas_configurations = arenas_configurations
+
+ self.flatten_branched = self.retro
+ self.uint8_visual = self.retro
+
+ # Check brain configuration
+ if len(self._env.brains) != 1:
+ raise UnityGymException(
+ "There can only be one brain in a UnityEnvironment "
+ "if it is wrapped in a gym.")
+ self.brain_name = self._env.external_brain_names[0]
+ brain = self._env.brains[self.brain_name]
+
+ if brain.number_visual_observations == 0:
+ raise UnityGymException("Environment provides no visual observations.")
+
+ if brain.num_stacked_vector_observations != 1:
+ raise UnityGymException("Environment provides no vector observations.")
+
+ # Check for number of agents in scene.
+ initial_info = self._env.reset(arenas_configurations=arenas_configurations)[self.brain_name]
+ self._check_agents(len(initial_info.agents))
+
+ if self.retro and self._n_agents > 1:
+ raise UnityGymException("Only one agent is allowed in retro mode, set n_agents to 1.")
+
+ # Set observation and action spaces
+ if len(brain.vector_action_space_size) == 1:
+ self._action_space = spaces.Discrete(brain.vector_action_space_size[0])
+ else:
+ if self.flatten_branched:
+ self._flattener = ActionFlattener(brain.vector_action_space_size)
+ self._action_space = self._flattener.action_space
+ else:
+ self._action_space = spaces.MultiDiscrete(brain.vector_action_space_size)
+
+ # high = np.array([np.inf] * brain.vector_observation_space_size)
+ self.action_meanings = brain.vector_action_descriptions
+
+ # if self.visual_obs:
+ if self._greyscale:
+ depth = 1
+ else:
+ depth = 3
+
+ if self.retro:
+ image_space_max = 255
+ image_space_dtype = np.uint8
+ camera_height = 84
+ camera_width = 84
+
+ image_space = spaces.Box(
+ 0, image_space_max,
+ dtype=image_space_dtype,
+ shape=(camera_height, camera_width, depth)
+ )
+
+ self._observation_space = image_space
+ else:
+ image_space_max = 1.0
+ image_space_dtype = np.float32
+ camera_height = brain.camera_resolutions[0]["height"]
+ camera_width = brain.camera_resolutions[0]["width"]
+ max_float = np.finfo(np.float32).max
+
+ image_space = spaces.Box(
+ 0, image_space_max,
+ dtype=image_space_dtype,
+ shape=(self._n_agents, camera_height, camera_width, depth)
+ )
+ vector_space = spaces.Box(-max_float, max_float,
+ shape=(self._n_agents, brain.vector_observation_space_size))
+ self._observation_space = spaces.Tuple((image_space, vector_space))
+
+ def reset(self, arenas_configurations=None):
+ """Resets the state of the environment and returns an initial observation.
+ In the case of multi-agent environments, this is a list.
+ Returns: observation (object/list): the initial observation of the
+ space.
+ """
+ info = self._env.reset(arenas_configurations=arenas_configurations)[self.brain_name]
+ n_agents = len(info.agents)
+ self._check_agents(n_agents)
+ self.game_over = False
+
+ if self._n_agents == 1:
+ obs, reward, done, info = self._single_step(info)
+ else:
+ obs, reward, done, info = self._multi_step(info)
+ return obs
+
+ def step(self, action):
+ """Run one timestep of the environment's dynamics. When end of
+ episode is reached, you are responsible for calling `reset()`
+ to reset this environment's state.
+ Accepts an action and returns a tuple (observation, reward, done, info).
+ In the case of multi-agent environments, these are lists.
+ Args:
+ action (object/list): an action provided by the environment
+ Returns:
+ observation (object/list): agent's observation of the current environment
+ reward (float/list) : amount of reward returned after previous action
+ done (boolean/list): whether the episode has ended.
+ info (dict): contains auxiliary diagnostic information, including BrainInfo.
+ """
+
+ # Use random actions for all other agents in environment.
+ if self._n_agents > 1:
+ if not isinstance(action, list):
+ raise UnityGymException("The environment was expecting `action` to be a list.")
+ if len(action) != self._n_agents:
+ raise UnityGymException(
+ "The environment was expecting a list of {} actions.".format(self._n_agents))
+ else:
+ if self._flattener is not None:
+ # Action space is discrete and flattened - we expect a list of scalars
+ action = [self._flattener.lookup_action(_act) for _act in action]
+ action = np.array(action)
+ else:
+ if self._flattener is not None:
+ # Translate action into list
+ action = self._flattener.lookup_action(action)
+
+ info = self._env.step(action)[self.brain_name]
+ n_agents = len(info.agents)
+ self._check_agents(n_agents)
+ self._current_state = info
+
+ if self._n_agents == 1:
+ obs, reward, done, info = self._single_step(info)
+ self.game_over = done
+ else:
+ obs, reward, done, info = self._multi_step(info)
+ self.game_over = all(done)
+ return obs, reward, done, info
+
+ def _single_step(self, info):
+
+ self.visual_obs = self._preprocess_single(info.visual_observations[0][0, :, :, :])
+ self.vector_obs = info.vector_observations[0]
+
+ if self._greyscale:
+ self.visual_obs = self._greyscale_obs_single(self.visual_obs)
+
+ if self.retro:
+ self.visual_obs = self._resize_observation(self.visual_obs)
+ default_observation = self.visual_obs
+ else:
+ default_observation = self.visual_obs, self.vector_obs
+
+ return default_observation, info.rewards[0], info.local_done[0], {
+ "text_observation": info.text_observations[0],
+ "brain_info": info}
+
+ def _preprocess_single(self, single_visual_obs):
+ if self.uint8_visual:
+ return (255.0 * single_visual_obs).astype(np.uint8)
+ else:
+ return single_visual_obs
+
+ def _multi_step(self, info):
+
+ self.visual_obs = self._preprocess_multi(info.visual_observations)
+ self.vector_obs = info.vector_observations
+
+ if self._greyscale:
+ self.visual_obs = self._greyscale_obs_multi(self.visual_obs)
+
+ default_observation = self.visual_obs
+
+ return list(default_observation), info.rewards, info.local_done, {
+ "text_observation": info.text_observations,
+ "brain_info": info}
+
+ def _preprocess_multi(self, multiple_visual_obs):
+ if self.uint8_visual:
+ return [(255.0 * _visual_obs).astype(np.uint8) for _visual_obs in multiple_visual_obs]
+ else:
+ return multiple_visual_obs
+
+ def render(self, mode='rgb_array'):
+ return self.visual_obs
+
+ def close(self):
+ """Override _close in your subclass to perform any necessary cleanup.
+ Environments will automatically close() themselves when
+ garbage collected or when the program exits.
+ """
+ self._env.close()
+
+ def get_action_meanings(self):
+ return self.action_meanings
+
+ def seed(self, seed=None):
+ """Sets the seed for this env's random number generator(s).
+ Currently not implemented.
+ """
+ logger.warning("Could not seed environment %s", self.name)
+ return
+
+ @staticmethod
+ def _resize_observation(observation):
+ """
+ Re-sizes visual observation to 84x84
+ """
+ obs_image = Image.fromarray(observation)
+ obs_image = obs_image.resize((84, 84), Image.NEAREST)
+ return np.array(obs_image)
+
+ def _greyscale_obs_single(self, obs):
+ new_obs = np.floor(np.expand_dims(np.mean(obs, axis=2), axis=2)).squeeze().astype(np.uint8)
+ return new_obs
+
+ def _greyscale_obs_multi(self, obs):
+ new_obs = [np.floor(np.expand_dims(np.mean(o, axis=2), axis=2)).squeeze().astype(np.uint8) for o in obs]
+ return new_obs
+
+ def _check_agents(self, n_agents):
+ # if n_agents > 1:
+ # raise UnityGymException(
+ # "The environment was launched as a single-agent environment, however"
+ # "there is more than one agent in the scene.")
+ # elif self._multiagent and n_agents <= 1:
+ # raise UnityGymException(
+ # "The environment was launched as a mutli-agent environment, however"
+ # "there is only one agent in the scene.")
+ if self._n_agents is None:
+ self._n_agents = n_agents
+ logger.info("{} agents within environment.".format(n_agents))
+ elif self._n_agents != n_agents:
+ raise UnityGymException("The number of agents in the environment has changed since "
+ "initialization. This is not supported.")
+
+ @property
+ def metadata(self):
+ return {'render.modes': ['rgb_array']}
+
+ @property
+ def reward_range(self):
+ return -float('inf'), float('inf')
+
+ @property
+ def spec(self):
+ return None
+
+ @property
+ def action_space(self):
+ return self._action_space
+
+ @property
+ def observation_space(self):
+ return self._observation_space
+
+ @property
+ def number_agents(self):
+ return self._n_agents
+
+
+class ActionFlattener:
+ """
+ Flattens branched discrete action spaces into single-branch discrete action spaces.
+ """
+
+ def __init__(self, branched_action_space):
+ """
+ Initialize the flattener.
+ :param branched_action_space: A List containing the sizes of each branch of the action
+ space, e.g. [2,3,3] for three branches with size 2, 3, and 3 respectively.
+ """
+ self._action_shape = branched_action_space
+ self.action_lookup = self._create_lookup(self._action_shape)
+ self.action_space = spaces.Discrete(len(self.action_lookup))
+
+ @classmethod
+ def _create_lookup(self, branched_action_space):
+ """
+ Creates a Dict that maps discrete actions (scalars) to branched actions (lists).
+ Each key in the Dict maps to one unique set of branched actions, and each value
+ contains the List of branched actions.
+ """
+ possible_vals = [range(_num) for _num in branched_action_space]
+ all_actions = [list(_action) for _action in itertools.product(*possible_vals)]
+ # Dict should be faster than List for large action spaces
+ action_lookup = {_scalar: _action for (_scalar, _action) in enumerate(all_actions)}
+ return action_lookup
+
+ def lookup_action(self, action):
+ """
+ Convert a scalar discrete action into a unique set of branched actions.
+ :param: action: A scalar value representing one of the discrete actions.
+ :return: The List containing the branched actions.
+ """
+ return self.action_lookup[action]
diff --git a/animalai_packages/animalai_envs/animalai/envs/rpc_communicator.py b/animalai_package/animalai/animalai/envs/rpc_communicator.py
similarity index 98%
rename from animalai_packages/animalai_envs/animalai/envs/rpc_communicator.py
rename to animalai_package/animalai/animalai/envs/rpc_communicator.py
index aa082305..ddc48ffd 100644
--- a/animalai_packages/animalai_envs/animalai/envs/rpc_communicator.py
+++ b/animalai_package/animalai/animalai/envs/rpc_communicator.py
@@ -74,7 +74,7 @@ def check_port(self, port):
s.close()
def initialize(self, inputs: UnityInput) -> UnityOutput:
- if not self.unity_to_external.parent_conn.poll(3000):
+ if not self.unity_to_external.parent_conn.poll(90):
raise UnityTimeOutException(
"The Unity environment took too long to respond. Make sure that :\n"
"\t The environment does not need user interaction to launch\n"
diff --git a/animalai_packages/animalai_envs/animalai/envs/socket_communicator.py b/animalai_package/animalai/animalai/envs/socket_communicator.py
similarity index 100%
rename from animalai_packages/animalai_envs/animalai/envs/socket_communicator.py
rename to animalai_package/animalai/animalai/envs/socket_communicator.py
diff --git a/animalai_packages/animalai_envs/setup.py b/animalai_package/animalai/setup.py
similarity index 94%
rename from animalai_packages/animalai_envs/setup.py
rename to animalai_package/animalai/setup.py
index 9c5b270c..0be8087e 100644
--- a/animalai_packages/animalai_envs/setup.py
+++ b/animalai_package/animalai/setup.py
@@ -24,6 +24,7 @@
'protobuf>=3.6,<3.7',
'grpcio>=1.11.0,<1.12.0',
'pyyaml>=5.1',
- 'jsonpickle>=1.2'],
+ 'jsonpickle>=1.2',
+ 'gym'],
python_requires=">=3.5,<3.8",
)
\ No newline at end of file
diff --git a/documentation/quickstart.md b/documentation/quickstart.md
index f4fe1bd9..f20012f9 100644
--- a/documentation/quickstart.md
+++ b/documentation/quickstart.md
@@ -1,7 +1,7 @@
# Quick Start Guide
You can run the Animal AI environment in three different ways:
-- running the the standalone `AnimalAI` executable
+- running the standalone `AnimalAI` executable
- running a configuration file via `visualizeArena.py`
- start training using `train.py`
diff --git a/documentation/training.md b/documentation/training.md
index 0d895c51..7ff0ae86 100644
--- a/documentation/training.md
+++ b/documentation/training.md
@@ -50,7 +50,7 @@ as an argument to reset the environment. The environment will use the new config
following ones until a new configuration is passed. The syntax is:
```
-env.reset(arenas_configurations_input=arena_config, # A new ArenaConfig to use for reset, leave empty to use the last one provided
+env.reset(arenas_configurations=arena_config, # A new ArenaConfig to use for reset, leave empty to use the last one provided
train_mode=True # True for training
)
```
diff --git a/animalai_packages/animalai_train/LICENSE b/examples/animalai_train/LICENSE
similarity index 100%
rename from animalai_packages/animalai_train/LICENSE
rename to examples/animalai_train/LICENSE
diff --git a/animalai_packages/animalai_train/README.md b/examples/animalai_train/README.md
similarity index 100%
rename from animalai_packages/animalai_train/README.md
rename to examples/animalai_train/README.md
diff --git a/animalai_packages/animalai_train/animalai_train/__init__.py b/examples/animalai_train/animalai_train/__init__.py
similarity index 100%
rename from animalai_packages/animalai_train/animalai_train/__init__.py
rename to examples/animalai_train/animalai_train/__init__.py
diff --git a/examples/animalai_train/animalai_train/dopamine/animalai_lib.py b/examples/animalai_train/animalai_train/dopamine/animalai_lib.py
new file mode 100644
index 00000000..532ea075
--- /dev/null
+++ b/examples/animalai_train/animalai_train/dopamine/animalai_lib.py
@@ -0,0 +1,270 @@
+# coding=utf-8
+# Copyright 2018 The Dopamine Authors.
+# Modifications copyright 2019 Unity Technologies.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Obstacle Tower-specific utilities including Atari-specific network architectures.
+
+This includes a class implementing minimal preprocessing, which
+is in charge of:
+ . Converting observations to greyscale.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+
+from animalai.envs.gym.environment import AnimalAIEnv
+
+import numpy as np
+import tensorflow as tf
+
+import gin.tf
+import cv2
+
+slim = tf.contrib.slim
+
+NATURE_DQN_OBSERVATION_SHAPE = (84, 84) # Size of downscaled Atari 2600 frame.
+NATURE_DQN_DTYPE = tf.uint8 # DType of Atari 2600 observations.
+NATURE_DQN_STACK_SIZE = 4 # Number of frames in the state stack.
+
+
+@gin.configurable
+def create_animalai_environment(environment_path=None):
+ """Wraps the Animal AI environment with some basic preprocessing.
+
+ Returns:
+ An Animal AI environment with some standard preprocessing.
+ """
+ assert environment_path is not None
+ env = AnimalAIEnv(environment_path, 0, n_arenas=1, retro=True)
+ env = OTCPreprocessing(env)
+ return env
+
+@gin.configurable
+def nature_dqn_network(num_actions, network_type, state):
+ """The convolutional network used to compute the agent's Q-values.
+
+ Args:
+ num_actions: int, number of actions.
+ network_type: namedtuple, collection of expected values to return.
+ state: `tf.Tensor`, contains the agent's current state.
+
+ Returns:
+ net: _network_type object containing the tensors output by the network.
+ """
+ net = tf.cast(state, tf.float32)
+ net = tf.div(net, 255.)
+ net = slim.conv2d(net, 32, [8, 8], stride=4)
+ net = slim.conv2d(net, 64, [4, 4], stride=2)
+ net = slim.conv2d(net, 64, [3, 3], stride=1)
+ net = slim.flatten(net)
+ net = slim.fully_connected(net, 512)
+ q_values = slim.fully_connected(net, num_actions, activation_fn=None)
+ return network_type(q_values)
+
+@gin.configurable
+def rainbow_network(num_actions, num_atoms, support, network_type, state):
+ """The convolutional network used to compute agent's Q-value distributions.
+
+ Args:
+ num_actions: int, number of actions.
+ num_atoms: int, the number of buckets of the value function distribution.
+ support: tf.linspace, the support of the Q-value distribution.
+ network_type: namedtuple, collection of expected values to return.
+ state: `tf.Tensor`, contains the agent's current state.
+
+ Returns:
+ net: _network_type object containing the tensors output by the network.
+ """
+ weights_initializer = slim.variance_scaling_initializer(
+ factor=1.0 / np.sqrt(3.0), mode='FAN_IN', uniform=True)
+
+ net = tf.cast(state, tf.float32)
+ net = tf.div(net, 255.)
+ net = slim.conv2d(
+ net, 32, [8, 8], stride=4, weights_initializer=weights_initializer)
+ net = slim.conv2d(
+ net, 64, [4, 4], stride=2, weights_initializer=weights_initializer)
+ net = slim.conv2d(
+ net, 64, [3, 3], stride=1, weights_initializer=weights_initializer)
+ net = slim.flatten(net)
+ net = slim.fully_connected(
+ net, 512, weights_initializer=weights_initializer)
+ net = slim.fully_connected(
+ net,
+ num_actions * num_atoms,
+ activation_fn=None,
+ weights_initializer=weights_initializer)
+
+ logits = tf.reshape(net, [-1, num_actions, num_atoms])
+ probabilities = tf.contrib.layers.softmax(logits)
+ q_values = tf.reduce_sum(support * probabilities, axis=2)
+ return network_type(q_values, logits, probabilities)
+
+@gin.configurable
+def implicit_quantile_network(num_actions, quantile_embedding_dim,
+ network_type, state, num_quantiles):
+ """The Implicit Quantile ConvNet.
+
+ Args:
+ num_actions: int, number of actions.
+ quantile_embedding_dim: int, embedding dimension for the quantile input.
+ network_type: namedtuple, collection of expected values to return.
+ state: `tf.Tensor`, contains the agent's current state.
+ num_quantiles: int, number of quantile inputs.
+
+ Returns:
+ net: _network_type object containing the tensors output by the network.
+ """
+ weights_initializer = slim.variance_scaling_initializer(
+ factor=1.0 / np.sqrt(3.0), mode='FAN_IN', uniform=True)
+
+ state_net = tf.cast(state, tf.float32)
+ state_net = tf.div(state_net, 255.)
+ state_net = slim.conv2d(
+ state_net, 32, [8, 8], stride=4,
+ weights_initializer=weights_initializer)
+ state_net = slim.conv2d(
+ state_net, 64, [4, 4], stride=2,
+ weights_initializer=weights_initializer)
+ state_net = slim.conv2d(
+ state_net, 64, [3, 3], stride=1,
+ weights_initializer=weights_initializer)
+ state_net = slim.flatten(state_net)
+ state_net_size = state_net.get_shape().as_list()[-1]
+ state_net_tiled = tf.tile(state_net, [num_quantiles, 1])
+
+ batch_size = state_net.get_shape().as_list()[0]
+ quantiles_shape = [num_quantiles * batch_size, 1]
+ quantiles = tf.random_uniform(
+ quantiles_shape, minval=0, maxval=1, dtype=tf.float32)
+
+ quantile_net = tf.tile(quantiles, [1, quantile_embedding_dim])
+ pi = tf.constant(math.pi)
+ quantile_net = tf.cast(tf.range(
+ 1, quantile_embedding_dim + 1, 1), tf.float32) * pi * quantile_net
+ quantile_net = tf.cos(quantile_net)
+ quantile_net = slim.fully_connected(quantile_net, state_net_size,
+ weights_initializer=weights_initializer)
+ # Hadamard product.
+ net = tf.multiply(state_net_tiled, quantile_net)
+
+ net = slim.fully_connected(
+ net, 512, weights_initializer=weights_initializer)
+ quantile_values = slim.fully_connected(
+ net,
+ num_actions,
+ activation_fn=None,
+ weights_initializer=weights_initializer)
+
+ return network_type(quantile_values=quantile_values, quantiles=quantiles)
+
+#
+# @gin.configurable
+# class AAIPreprocessing(object):
+# """A class implementing image preprocessing for OTC agents.
+#
+# Specifically, this converts observations to greyscale. It doesn't
+# do anything else to the environment.
+# """
+#
+# def __init__(self, environment):
+# """Constructor for an Obstacle Tower preprocessor.
+#
+# Args:
+# environment: Gym environment whose observations are preprocessed.
+#
+# """
+# self.environment = environment
+#
+# self.game_over = False
+# self.lives = 0 # Will need to be set by reset().
+#
+# @property
+# def observation_space(self):
+# return self.environment.observation_space
+#
+# @property
+# def action_space(self):
+# return self.environment.action_space
+#
+# @property
+# def reward_range(self):
+# return self.environment.reward_range
+#
+# @property
+# def metadata(self):
+# return self.environment.metadata
+#
+# def reset(self):
+# """Resets the environment. Converts the observation to greyscale,
+# if it is not.
+#
+# Returns:
+# observation: numpy array, the initial observation emitted by the
+# environment.
+# """
+# observation = self.environment.reset()
+# if (len(observation.shape) > 2):
+# observation = cv2.cvtColor(observation, cv2.COLOR_RGB2GRAY)
+#
+# return observation
+#
+# def render(self, mode):
+# """Renders the current screen, before preprocessing.
+#
+# This calls the Gym API's render() method.
+#
+# Args:
+# mode: Mode argument for the environment's render() method.
+# Valid values (str) are:
+# 'rgb_array': returns the raw ALE image.
+# 'human': renders to display via the Gym renderer.
+#
+# Returns:
+# if mode='rgb_array': numpy array, the most recent screen.
+# if mode='human': bool, whether the rendering was successful.
+# """
+# return self.environment.render(mode)
+#
+# def step(self, action):
+# """Applies the given action in the environment. Converts the observation to
+# greyscale, if it is not.
+#
+# Remarks:
+#
+# * If a terminal state (from life loss or episode end) is reached, this may
+# execute fewer than self.frame_skip steps in the environment.
+# * Furthermore, in this case the returned observation may not contain valid
+# image data and should be ignored.
+#
+# Args:
+# action: The action to be executed.
+#
+# Returns:
+# observation: numpy array, the observation following the action.
+# reward: float, the reward following the action.
+# is_terminal: bool, whether the environment has reached a terminal state.
+# This is true when a life is lost and terminal_on_life_loss, or when the
+# episode is over.
+# info: Gym API's info data structure.
+# """
+#
+# observation, reward, game_over, info = self.environment.step(action)
+# self.game_over = game_over
+# if (len(observation.shape) > 2):
+# observation = cv2.cvtColor(observation, cv2.COLOR_RGB2GRAY)
+# return observation, reward, game_over, info
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/__init__.py b/examples/animalai_train/animalai_train/trainers/__init__.py
similarity index 100%
rename from animalai_packages/animalai_train/animalai_train/trainers/__init__.py
rename to examples/animalai_train/animalai_train/trainers/__init__.py
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/barracuda.py b/examples/animalai_train/animalai_train/trainers/barracuda.py
similarity index 100%
rename from animalai_packages/animalai_train/animalai_train/trainers/barracuda.py
rename to examples/animalai_train/animalai_train/trainers/barracuda.py
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/bc/__init__.py b/examples/animalai_train/animalai_train/trainers/bc/__init__.py
similarity index 100%
rename from animalai_packages/animalai_train/animalai_train/trainers/bc/__init__.py
rename to examples/animalai_train/animalai_train/trainers/bc/__init__.py
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/bc/models.py b/examples/animalai_train/animalai_train/trainers/bc/models.py
similarity index 100%
rename from animalai_packages/animalai_train/animalai_train/trainers/bc/models.py
rename to examples/animalai_train/animalai_train/trainers/bc/models.py
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/bc/offline_trainer.py b/examples/animalai_train/animalai_train/trainers/bc/offline_trainer.py
similarity index 100%
rename from animalai_packages/animalai_train/animalai_train/trainers/bc/offline_trainer.py
rename to examples/animalai_train/animalai_train/trainers/bc/offline_trainer.py
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/bc/online_trainer.py b/examples/animalai_train/animalai_train/trainers/bc/online_trainer.py
similarity index 100%
rename from animalai_packages/animalai_train/animalai_train/trainers/bc/online_trainer.py
rename to examples/animalai_train/animalai_train/trainers/bc/online_trainer.py
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/bc/policy.py b/examples/animalai_train/animalai_train/trainers/bc/policy.py
similarity index 100%
rename from animalai_packages/animalai_train/animalai_train/trainers/bc/policy.py
rename to examples/animalai_train/animalai_train/trainers/bc/policy.py
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/bc/trainer.py b/examples/animalai_train/animalai_train/trainers/bc/trainer.py
similarity index 100%
rename from animalai_packages/animalai_train/animalai_train/trainers/bc/trainer.py
rename to examples/animalai_train/animalai_train/trainers/bc/trainer.py
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/buffer.py b/examples/animalai_train/animalai_train/trainers/buffer.py
similarity index 100%
rename from animalai_packages/animalai_train/animalai_train/trainers/buffer.py
rename to examples/animalai_train/animalai_train/trainers/buffer.py
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/curriculum.py b/examples/animalai_train/animalai_train/trainers/curriculum.py
similarity index 100%
rename from animalai_packages/animalai_train/animalai_train/trainers/curriculum.py
rename to examples/animalai_train/animalai_train/trainers/curriculum.py
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/demo_loader.py b/examples/animalai_train/animalai_train/trainers/demo_loader.py
similarity index 100%
rename from animalai_packages/animalai_train/animalai_train/trainers/demo_loader.py
rename to examples/animalai_train/animalai_train/trainers/demo_loader.py
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/exception.py b/examples/animalai_train/animalai_train/trainers/exception.py
similarity index 100%
rename from animalai_packages/animalai_train/animalai_train/trainers/exception.py
rename to examples/animalai_train/animalai_train/trainers/exception.py
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/learn.py b/examples/animalai_train/animalai_train/trainers/learn.py
similarity index 100%
rename from animalai_packages/animalai_train/animalai_train/trainers/learn.py
rename to examples/animalai_train/animalai_train/trainers/learn.py
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/meta_curriculum.py b/examples/animalai_train/animalai_train/trainers/meta_curriculum.py
similarity index 100%
rename from animalai_packages/animalai_train/animalai_train/trainers/meta_curriculum.py
rename to examples/animalai_train/animalai_train/trainers/meta_curriculum.py
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/models.py b/examples/animalai_train/animalai_train/trainers/models.py
similarity index 100%
rename from animalai_packages/animalai_train/animalai_train/trainers/models.py
rename to examples/animalai_train/animalai_train/trainers/models.py
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/policy.py b/examples/animalai_train/animalai_train/trainers/policy.py
similarity index 100%
rename from animalai_packages/animalai_train/animalai_train/trainers/policy.py
rename to examples/animalai_train/animalai_train/trainers/policy.py
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/ppo/__init__.py b/examples/animalai_train/animalai_train/trainers/ppo/__init__.py
similarity index 100%
rename from animalai_packages/animalai_train/animalai_train/trainers/ppo/__init__.py
rename to examples/animalai_train/animalai_train/trainers/ppo/__init__.py
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/ppo/models.py b/examples/animalai_train/animalai_train/trainers/ppo/models.py
similarity index 100%
rename from animalai_packages/animalai_train/animalai_train/trainers/ppo/models.py
rename to examples/animalai_train/animalai_train/trainers/ppo/models.py
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/ppo/policy.py b/examples/animalai_train/animalai_train/trainers/ppo/policy.py
similarity index 100%
rename from animalai_packages/animalai_train/animalai_train/trainers/ppo/policy.py
rename to examples/animalai_train/animalai_train/trainers/ppo/policy.py
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/ppo/trainer.py b/examples/animalai_train/animalai_train/trainers/ppo/trainer.py
similarity index 100%
rename from animalai_packages/animalai_train/animalai_train/trainers/ppo/trainer.py
rename to examples/animalai_train/animalai_train/trainers/ppo/trainer.py
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/tensorflow_to_barracuda.py b/examples/animalai_train/animalai_train/trainers/tensorflow_to_barracuda.py
similarity index 100%
rename from animalai_packages/animalai_train/animalai_train/trainers/tensorflow_to_barracuda.py
rename to examples/animalai_train/animalai_train/trainers/tensorflow_to_barracuda.py
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/trainer.py b/examples/animalai_train/animalai_train/trainers/trainer.py
similarity index 100%
rename from animalai_packages/animalai_train/animalai_train/trainers/trainer.py
rename to examples/animalai_train/animalai_train/trainers/trainer.py
diff --git a/animalai_packages/animalai_train/animalai_train/trainers/trainer_controller.py b/examples/animalai_train/animalai_train/trainers/trainer_controller.py
similarity index 99%
rename from animalai_packages/animalai_train/animalai_train/trainers/trainer_controller.py
rename to examples/animalai_train/animalai_train/trainers/trainer_controller.py
index 0eb6c53c..e71a3624 100644
--- a/animalai_packages/animalai_train/animalai_train/trainers/trainer_controller.py
+++ b/examples/animalai_train/animalai_train/trainers/trainer_controller.py
@@ -183,7 +183,7 @@ def _reset_env(self, env):
return env.reset(config=self.meta_curriculum.get_config())
else:
if self.update_config:
- return env.reset(arenas_configurations_input=self.config)
+ return env.reset(arenas_configurations=self.config)
self.update_config = False
else:
return env.reset()
diff --git a/animalai_packages/animalai_train/setup.py b/examples/animalai_train/setup.py
similarity index 100%
rename from animalai_packages/animalai_train/setup.py
rename to examples/animalai_train/setup.py
diff --git a/examples/configs/rainbow.gin b/examples/configs/rainbow.gin
new file mode 100644
index 00000000..1cc5e979
--- /dev/null
+++ b/examples/configs/rainbow.gin
@@ -0,0 +1,34 @@
+# Hyperparameters follow Hessel et al. (2018).
+import dopamine.agents.rainbow.rainbow_agent
+import animalai_train.dopamine.animalai_lib
+import dopamine.discrete_domains.run_experiment
+import dopamine.replay_memory.prioritized_replay_buffer
+import gin.tf.external_configurables
+
+RainbowAgent.num_atoms = 51
+RainbowAgent.vmax = 10.
+RainbowAgent.gamma = 0.99
+RainbowAgent.update_horizon = 3
+RainbowAgent.min_replay_history = 20000 # agent steps
+RainbowAgent.update_period = 4
+RainbowAgent.target_update_period = 8000 # agent steps
+RainbowAgent.epsilon_train = 0.01
+RainbowAgent.epsilon_eval = 0.001
+RainbowAgent.epsilon_decay_period = 250000 # agent steps
+RainbowAgent.replay_scheme = 'prioritized'
+RainbowAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version
+RainbowAgent.optimizer = @tf.train.AdamOptimizer()
+RainbowAgent.network = @animalai_lib.rainbow_network
+
+# Note these parameters are different from C51's.
+tf.train.AdamOptimizer.learning_rate = 0.0000625
+tf.train.AdamOptimizer.epsilon = 0.00015
+
+create_agent.agent_name = 'rainbow'
+Runner.num_iterations = 200
+Runner.training_steps = 250000 # agent steps
+Runner.evaluation_steps = 125000 # agent steps
+Runner.max_steps_per_episode = 27000 # agent steps
+
+WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
+WrappedPrioritizedReplayBuffer.batch_size = 32
diff --git a/examples/trainBaselines.py b/examples/trainBaselines.py
new file mode 100644
index 00000000..bf97a7c5
--- /dev/null
+++ b/examples/trainBaselines.py
@@ -0,0 +1,33 @@
+from animalai.envs.gym.environment import AnimalAIEnv
+from animalai.envs.arena_config import ArenaConfig
+from dopamine.agents.rainbow import rainbow_agent
+from dopamine.discrete_domains import run_experiment
+
+
+import random
+
+env_path = '../env/AnimalAI'
+worker_id = random.randint(1, 100)
+arena_config_in = ArenaConfig('configs/justFood.yaml')
+base_dir = 'models/dopamine'
+gin_files = ['configs/rainbow.gin']
+
+
+def create_env_fn():
+ env = AnimalAIEnv(environment_filename=env_path,
+ worker_id=worker_id,
+ n_arenas=1,
+ arenas_configurations=arena_config_in,
+ retro=True)
+ return env
+
+
+def create_agent_fn(sess, env, summary_writer):
+ return rainbow_agent.RainbowAgent(sess=sess, num_actions=env.action_space.n, summary_writer=summary_writer)
+
+
+run_experiment.load_gin_configs(gin_files, None)
+runner = run_experiment.Runner(base_dir=base_dir,
+ create_agent_fn=create_agent_fn,
+ create_environment_fn=create_env_fn)
+runner.run_experiment()
diff --git a/examples/visualizeArena.py b/examples/visualizeArena.py
index d87f56ed..09711631 100644
--- a/examples/visualizeArena.py
+++ b/examples/visualizeArena.py
@@ -41,7 +41,7 @@ def init_environment(env_path, docker_target_name, no_graphics, worker_id, seed)
# We can pass a different configuration at each env.reset() call. You can therefore load different YAML files between
# episodes or directly amend the arena_config_in which contains a dictionary of configurations for all arenas.
# See animalai/envs/arena_config.py for the syntax
-env.reset(arenas_configurations_input =arena_config_in)
+env.reset(arenas_configurations =arena_config_in)
try:
while True:
diff --git a/examples/visualizeLightsOff.py b/examples/visualizeLightsOff.py
index 855a0d40..3c14995a 100644
--- a/examples/visualizeLightsOff.py
+++ b/examples/visualizeLightsOff.py
@@ -5,7 +5,7 @@
from matplotlib import pyplot as plt
from matplotlib import animation
-env_path = '../env/AnimalAI'
+env_path = 'env/AnimalAI'
worker_id = random.randint(1, 100)
seed = 10
@@ -34,8 +34,8 @@
play=False
)
-arena_config_in = ArenaConfig('configs/lightsOff.yaml')
-env.reset(arenas_configurations_input=arena_config_in)
+arena_config_in = ArenaConfig('examples/configs/lightsOff.yaml')
+env.reset(arenas_configurations=arena_config_in)
fig, axes = plt.subplots(2, 2)
imshows = []
for i in range(2):
From 39f7d70008e544cc9b0e1cdbf413b73e9f790860 Mon Sep 17 00:00:00 2001
From: Benjamin Beyret
Date: Fri, 31 May 2019 18:45:20 +0100
Subject: [PATCH 06/23] delete animalai_bkp + rename animalai_package
---
animalai_bkp/__init__.py | 0
animalai_bkp/communicator_objects/__init__.py | 22 -
.../agent_action_proto_pb2.py | 92 --
.../agent_info_proto_pb2.py | 141 ---
.../arena_parameters_proto_pb2.py | 209 ----
.../brain_parameters_proto_pb2.py | 125 --
.../communicator_objects/command_proto_pb2.py | 64 -
.../demonstration_meta_proto_pb2.py | 99 --
.../engine_configuration_proto_pb2.py | 106 --
.../communicator_objects/header_pb2.py | 78 --
.../resolution_proto_pb2.py | 85 --
.../space_type_proto_pb2.py | 61 -
.../communicator_objects/unity_input_pb2.py | 92 --
.../communicator_objects/unity_message_pb2.py | 92 --
.../communicator_objects/unity_output_pb2.py | 83 --
.../unity_rl_initialization_input_pb2.py | 71 --
.../unity_rl_initialization_output_pb2.py | 95 --
.../unity_rl_input_pb2.py | 178 ---
.../unity_rl_output_pb2.py | 169 ---
.../unity_rl_reset_input_pb2.py | 122 --
.../unity_rl_reset_output_pb2.py | 71 --
.../unity_to_external_pb2.py | 57 -
.../unity_to_external_pb2_grpc.py | 46 -
animalai_bkp/envs/__init__.py | 3 -
animalai_bkp/envs/arena_config.py | 115 --
animalai_bkp/envs/brain.py | 149 ---
animalai_bkp/envs/communicator.py | 35 -
animalai_bkp/envs/environment.py | 491 --------
animalai_bkp/envs/exception.py | 63 -
animalai_bkp/envs/rpc_communicator.py | 115 --
animalai_bkp/envs/socket_communicator.py | 98 --
animalai_bkp/trainers/__init__.py | 15 -
animalai_bkp/trainers/barracuda.py | 491 --------
animalai_bkp/trainers/bc/__init__.py | 4 -
animalai_bkp/trainers/bc/models.py | 55 -
animalai_bkp/trainers/bc/offline_trainer.py | 56 -
animalai_bkp/trainers/bc/online_trainer.py | 116 --
animalai_bkp/trainers/bc/policy.py | 93 --
animalai_bkp/trainers/bc/trainer.py | 190 ---
animalai_bkp/trainers/buffer.py | 255 ----
animalai_bkp/trainers/curriculum.py | 112 --
animalai_bkp/trainers/demo_loader.py | 94 --
animalai_bkp/trainers/exception.py | 20 -
animalai_bkp/trainers/learn.py | 249 ----
animalai_bkp/trainers/meta_curriculum.py | 147 ---
animalai_bkp/trainers/models.py | 380 ------
animalai_bkp/trainers/policy.py | 212 ----
animalai_bkp/trainers/ppo/__init__.py | 3 -
animalai_bkp/trainers/ppo/models.py | 195 ----
animalai_bkp/trainers/ppo/policy.py | 214 ----
animalai_bkp/trainers/ppo/trainer.py | 386 ------
.../trainers/tensorflow_to_barracuda.py | 1034 -----------------
animalai_bkp/trainers/trainer.py | 198 ----
animalai_bkp/trainers/trainer_controller.py | 301 -----
54 files changed, 8047 deletions(-)
delete mode 100644 animalai_bkp/__init__.py
delete mode 100644 animalai_bkp/communicator_objects/__init__.py
delete mode 100644 animalai_bkp/communicator_objects/agent_action_proto_pb2.py
delete mode 100644 animalai_bkp/communicator_objects/agent_info_proto_pb2.py
delete mode 100644 animalai_bkp/communicator_objects/arena_parameters_proto_pb2.py
delete mode 100644 animalai_bkp/communicator_objects/brain_parameters_proto_pb2.py
delete mode 100644 animalai_bkp/communicator_objects/command_proto_pb2.py
delete mode 100644 animalai_bkp/communicator_objects/demonstration_meta_proto_pb2.py
delete mode 100644 animalai_bkp/communicator_objects/engine_configuration_proto_pb2.py
delete mode 100644 animalai_bkp/communicator_objects/header_pb2.py
delete mode 100644 animalai_bkp/communicator_objects/resolution_proto_pb2.py
delete mode 100644 animalai_bkp/communicator_objects/space_type_proto_pb2.py
delete mode 100644 animalai_bkp/communicator_objects/unity_input_pb2.py
delete mode 100644 animalai_bkp/communicator_objects/unity_message_pb2.py
delete mode 100644 animalai_bkp/communicator_objects/unity_output_pb2.py
delete mode 100644 animalai_bkp/communicator_objects/unity_rl_initialization_input_pb2.py
delete mode 100644 animalai_bkp/communicator_objects/unity_rl_initialization_output_pb2.py
delete mode 100644 animalai_bkp/communicator_objects/unity_rl_input_pb2.py
delete mode 100644 animalai_bkp/communicator_objects/unity_rl_output_pb2.py
delete mode 100644 animalai_bkp/communicator_objects/unity_rl_reset_input_pb2.py
delete mode 100644 animalai_bkp/communicator_objects/unity_rl_reset_output_pb2.py
delete mode 100644 animalai_bkp/communicator_objects/unity_to_external_pb2.py
delete mode 100644 animalai_bkp/communicator_objects/unity_to_external_pb2_grpc.py
delete mode 100644 animalai_bkp/envs/__init__.py
delete mode 100644 animalai_bkp/envs/arena_config.py
delete mode 100644 animalai_bkp/envs/brain.py
delete mode 100644 animalai_bkp/envs/communicator.py
delete mode 100644 animalai_bkp/envs/environment.py
delete mode 100644 animalai_bkp/envs/exception.py
delete mode 100644 animalai_bkp/envs/rpc_communicator.py
delete mode 100644 animalai_bkp/envs/socket_communicator.py
delete mode 100644 animalai_bkp/trainers/__init__.py
delete mode 100644 animalai_bkp/trainers/barracuda.py
delete mode 100644 animalai_bkp/trainers/bc/__init__.py
delete mode 100644 animalai_bkp/trainers/bc/models.py
delete mode 100644 animalai_bkp/trainers/bc/offline_trainer.py
delete mode 100644 animalai_bkp/trainers/bc/online_trainer.py
delete mode 100644 animalai_bkp/trainers/bc/policy.py
delete mode 100644 animalai_bkp/trainers/bc/trainer.py
delete mode 100644 animalai_bkp/trainers/buffer.py
delete mode 100644 animalai_bkp/trainers/curriculum.py
delete mode 100644 animalai_bkp/trainers/demo_loader.py
delete mode 100644 animalai_bkp/trainers/exception.py
delete mode 100644 animalai_bkp/trainers/learn.py
delete mode 100644 animalai_bkp/trainers/meta_curriculum.py
delete mode 100644 animalai_bkp/trainers/models.py
delete mode 100644 animalai_bkp/trainers/policy.py
delete mode 100644 animalai_bkp/trainers/ppo/__init__.py
delete mode 100644 animalai_bkp/trainers/ppo/models.py
delete mode 100644 animalai_bkp/trainers/ppo/policy.py
delete mode 100644 animalai_bkp/trainers/ppo/trainer.py
delete mode 100644 animalai_bkp/trainers/tensorflow_to_barracuda.py
delete mode 100644 animalai_bkp/trainers/trainer.py
delete mode 100644 animalai_bkp/trainers/trainer_controller.py
diff --git a/animalai_bkp/__init__.py b/animalai_bkp/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/animalai_bkp/communicator_objects/__init__.py b/animalai_bkp/communicator_objects/__init__.py
deleted file mode 100644
index 571ec13b..00000000
--- a/animalai_bkp/communicator_objects/__init__.py
+++ /dev/null
@@ -1,22 +0,0 @@
-from .agent_action_proto_pb2 import *
-from .agent_info_proto_pb2 import *
-from .arena_parameters_proto_pb2 import *
-from .brain_parameters_proto_pb2 import *
-from .command_proto_pb2 import *
-from .demonstration_meta_proto_pb2 import *
-from .engine_configuration_proto_pb2 import *
-from .header_pb2 import *
-from .__init__ import *
-from .resolution_proto_pb2 import *
-from .space_type_proto_pb2 import *
-from .unity_input_pb2 import *
-from .unity_message_pb2 import *
-from .unity_output_pb2 import *
-from .unity_rl_initialization_input_pb2 import *
-from .unity_rl_initialization_output_pb2 import *
-from .unity_rl_input_pb2 import *
-from .unity_rl_output_pb2 import *
-from .unity_rl_reset_input_pb2 import *
-from .unity_rl_reset_output_pb2 import *
-from .unity_to_external_pb2_grpc import *
-from .unity_to_external_pb2 import *
diff --git a/animalai_bkp/communicator_objects/agent_action_proto_pb2.py b/animalai_bkp/communicator_objects/agent_action_proto_pb2.py
deleted file mode 100644
index 8d19593e..00000000
--- a/animalai_bkp/communicator_objects/agent_action_proto_pb2.py
+++ /dev/null
@@ -1,92 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: animalai/communicator_objects/agent_action_proto.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='animalai/communicator_objects/agent_action_proto.proto',
- package='communicator_objects',
- syntax='proto3',
- serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
- serialized_pb=_b('\n6animalai/communicator_objects/agent_action_proto.proto\x12\x14\x63ommunicator_objects\"a\n\x10\x41gentActionProto\x12\x16\n\x0evector_actions\x18\x01 \x03(\x02\x12\x14\n\x0ctext_actions\x18\x02 \x01(\t\x12\x10\n\x08memories\x18\x03 \x03(\x02\x12\r\n\x05value\x18\x04 \x01(\x02\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
-)
-
-
-
-
-_AGENTACTIONPROTO = _descriptor.Descriptor(
- name='AgentActionProto',
- full_name='communicator_objects.AgentActionProto',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='vector_actions', full_name='communicator_objects.AgentActionProto.vector_actions', index=0,
- number=1, type=2, cpp_type=6, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='text_actions', full_name='communicator_objects.AgentActionProto.text_actions', index=1,
- number=2, type=9, cpp_type=9, label=1,
- has_default_value=False, default_value=_b("").decode('utf-8'),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='memories', full_name='communicator_objects.AgentActionProto.memories', index=2,
- number=3, type=2, cpp_type=6, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='value', full_name='communicator_objects.AgentActionProto.value', index=3,
- number=4, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- serialized_options=None,
- is_extendable=False,
- syntax='proto3',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=80,
- serialized_end=177,
-)
-
-DESCRIPTOR.message_types_by_name['AgentActionProto'] = _AGENTACTIONPROTO
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-AgentActionProto = _reflection.GeneratedProtocolMessageType('AgentActionProto', (_message.Message,), {
- 'DESCRIPTOR' : _AGENTACTIONPROTO,
- '__module__' : 'animalai.communicator_objects.agent_action_proto_pb2'
- # @@protoc_insertion_point(class_scope:communicator_objects.AgentActionProto)
- })
-_sym_db.RegisterMessage(AgentActionProto)
-
-
-DESCRIPTOR._options = None
-# @@protoc_insertion_point(module_scope)
diff --git a/animalai_bkp/communicator_objects/agent_info_proto_pb2.py b/animalai_bkp/communicator_objects/agent_info_proto_pb2.py
deleted file mode 100644
index 0dad7e85..00000000
--- a/animalai_bkp/communicator_objects/agent_info_proto_pb2.py
+++ /dev/null
@@ -1,141 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: animalai/communicator_objects/agent_info_proto.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='animalai/communicator_objects/agent_info_proto.proto',
- package='communicator_objects',
- syntax='proto3',
- serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
- serialized_pb=_b('\n4animalai/communicator_objects/agent_info_proto.proto\x12\x14\x63ommunicator_objects\"\x92\x02\n\x0e\x41gentInfoProto\x12\"\n\x1astacked_vector_observation\x18\x01 \x03(\x02\x12\x1b\n\x13visual_observations\x18\x02 \x03(\x0c\x12\x18\n\x10text_observation\x18\x03 \x01(\t\x12\x1d\n\x15stored_vector_actions\x18\x04 \x03(\x02\x12\x1b\n\x13stored_text_actions\x18\x05 \x01(\t\x12\x10\n\x08memories\x18\x06 \x03(\x02\x12\x0e\n\x06reward\x18\x07 \x01(\x02\x12\x0c\n\x04\x64one\x18\x08 \x01(\x08\x12\x18\n\x10max_step_reached\x18\t \x01(\x08\x12\n\n\x02id\x18\n \x01(\x05\x12\x13\n\x0b\x61\x63tion_mask\x18\x0b \x03(\x08\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
-)
-
-
-
-
-_AGENTINFOPROTO = _descriptor.Descriptor(
- name='AgentInfoProto',
- full_name='communicator_objects.AgentInfoProto',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='stacked_vector_observation', full_name='communicator_objects.AgentInfoProto.stacked_vector_observation', index=0,
- number=1, type=2, cpp_type=6, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='visual_observations', full_name='communicator_objects.AgentInfoProto.visual_observations', index=1,
- number=2, type=12, cpp_type=9, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='text_observation', full_name='communicator_objects.AgentInfoProto.text_observation', index=2,
- number=3, type=9, cpp_type=9, label=1,
- has_default_value=False, default_value=_b("").decode('utf-8'),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='stored_vector_actions', full_name='communicator_objects.AgentInfoProto.stored_vector_actions', index=3,
- number=4, type=2, cpp_type=6, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='stored_text_actions', full_name='communicator_objects.AgentInfoProto.stored_text_actions', index=4,
- number=5, type=9, cpp_type=9, label=1,
- has_default_value=False, default_value=_b("").decode('utf-8'),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='memories', full_name='communicator_objects.AgentInfoProto.memories', index=5,
- number=6, type=2, cpp_type=6, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='reward', full_name='communicator_objects.AgentInfoProto.reward', index=6,
- number=7, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='done', full_name='communicator_objects.AgentInfoProto.done', index=7,
- number=8, type=8, cpp_type=7, label=1,
- has_default_value=False, default_value=False,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='max_step_reached', full_name='communicator_objects.AgentInfoProto.max_step_reached', index=8,
- number=9, type=8, cpp_type=7, label=1,
- has_default_value=False, default_value=False,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='id', full_name='communicator_objects.AgentInfoProto.id', index=9,
- number=10, type=5, cpp_type=1, label=1,
- has_default_value=False, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='action_mask', full_name='communicator_objects.AgentInfoProto.action_mask', index=10,
- number=11, type=8, cpp_type=7, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- serialized_options=None,
- is_extendable=False,
- syntax='proto3',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=79,
- serialized_end=353,
-)
-
-DESCRIPTOR.message_types_by_name['AgentInfoProto'] = _AGENTINFOPROTO
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-AgentInfoProto = _reflection.GeneratedProtocolMessageType('AgentInfoProto', (_message.Message,), {
- 'DESCRIPTOR' : _AGENTINFOPROTO,
- '__module__' : 'animalai.communicator_objects.agent_info_proto_pb2'
- # @@protoc_insertion_point(class_scope:communicator_objects.AgentInfoProto)
- })
-_sym_db.RegisterMessage(AgentInfoProto)
-
-
-DESCRIPTOR._options = None
-# @@protoc_insertion_point(module_scope)
diff --git a/animalai_bkp/communicator_objects/arena_parameters_proto_pb2.py b/animalai_bkp/communicator_objects/arena_parameters_proto_pb2.py
deleted file mode 100644
index 930b300c..00000000
--- a/animalai_bkp/communicator_objects/arena_parameters_proto_pb2.py
+++ /dev/null
@@ -1,209 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: animalai/communicator_objects/arena_parameters_proto.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='animalai/communicator_objects/arena_parameters_proto.proto',
- package='communicator_objects',
- syntax='proto3',
- serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
- serialized_pb=_b('\n:animalai/communicator_objects/arena_parameters_proto.proto\x12\x14\x63ommunicator_objects\"\xcf\x03\n\x14\x41renaParametersProto\x12\t\n\x01t\x18\x01 \x01(\x05\x12\x46\n\x05items\x18\x02 \x03(\x0b\x32\x37.communicator_objects.ArenaParametersProto.ItemsToSpawn\x12\x11\n\tblackouts\x18\x03 \x03(\x05\x1a\xd0\x02\n\x0cItemsToSpawn\x12\x0c\n\x04name\x18\x01 \x01(\t\x12R\n\tpositions\x18\x03 \x03(\x0b\x32?.communicator_objects.ArenaParametersProto.ItemsToSpawn.Vector3\x12\x11\n\trotations\x18\x04 \x03(\x02\x12N\n\x05sizes\x18\x05 \x03(\x0b\x32?.communicator_objects.ArenaParametersProto.ItemsToSpawn.Vector3\x12O\n\x06\x63olors\x18\x06 \x03(\x0b\x32?.communicator_objects.ArenaParametersProto.ItemsToSpawn.Vector3\x1a*\n\x07Vector3\x12\t\n\x01x\x18\x01 \x01(\x02\x12\t\n\x01y\x18\x02 \x01(\x02\x12\t\n\x01z\x18\x03 \x01(\x02\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
-)
-
-
-
-
-_ARENAPARAMETERSPROTO_ITEMSTOSPAWN_VECTOR3 = _descriptor.Descriptor(
- name='Vector3',
- full_name='communicator_objects.ArenaParametersProto.ItemsToSpawn.Vector3',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='x', full_name='communicator_objects.ArenaParametersProto.ItemsToSpawn.Vector3.x', index=0,
- number=1, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='y', full_name='communicator_objects.ArenaParametersProto.ItemsToSpawn.Vector3.y', index=1,
- number=2, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='z', full_name='communicator_objects.ArenaParametersProto.ItemsToSpawn.Vector3.z', index=2,
- number=3, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- serialized_options=None,
- is_extendable=False,
- syntax='proto3',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=506,
- serialized_end=548,
-)
-
-_ARENAPARAMETERSPROTO_ITEMSTOSPAWN = _descriptor.Descriptor(
- name='ItemsToSpawn',
- full_name='communicator_objects.ArenaParametersProto.ItemsToSpawn',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='name', full_name='communicator_objects.ArenaParametersProto.ItemsToSpawn.name', index=0,
- number=1, type=9, cpp_type=9, label=1,
- has_default_value=False, default_value=_b("").decode('utf-8'),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='positions', full_name='communicator_objects.ArenaParametersProto.ItemsToSpawn.positions', index=1,
- number=3, type=11, cpp_type=10, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='rotations', full_name='communicator_objects.ArenaParametersProto.ItemsToSpawn.rotations', index=2,
- number=4, type=2, cpp_type=6, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='sizes', full_name='communicator_objects.ArenaParametersProto.ItemsToSpawn.sizes', index=3,
- number=5, type=11, cpp_type=10, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='colors', full_name='communicator_objects.ArenaParametersProto.ItemsToSpawn.colors', index=4,
- number=6, type=11, cpp_type=10, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[_ARENAPARAMETERSPROTO_ITEMSTOSPAWN_VECTOR3, ],
- enum_types=[
- ],
- serialized_options=None,
- is_extendable=False,
- syntax='proto3',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=212,
- serialized_end=548,
-)
-
-_ARENAPARAMETERSPROTO = _descriptor.Descriptor(
- name='ArenaParametersProto',
- full_name='communicator_objects.ArenaParametersProto',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='t', full_name='communicator_objects.ArenaParametersProto.t', index=0,
- number=1, type=5, cpp_type=1, label=1,
- has_default_value=False, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='items', full_name='communicator_objects.ArenaParametersProto.items', index=1,
- number=2, type=11, cpp_type=10, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='blackouts', full_name='communicator_objects.ArenaParametersProto.blackouts', index=2,
- number=3, type=5, cpp_type=1, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[_ARENAPARAMETERSPROTO_ITEMSTOSPAWN, ],
- enum_types=[
- ],
- serialized_options=None,
- is_extendable=False,
- syntax='proto3',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=85,
- serialized_end=548,
-)
-
-_ARENAPARAMETERSPROTO_ITEMSTOSPAWN_VECTOR3.containing_type = _ARENAPARAMETERSPROTO_ITEMSTOSPAWN
-_ARENAPARAMETERSPROTO_ITEMSTOSPAWN.fields_by_name['positions'].message_type = _ARENAPARAMETERSPROTO_ITEMSTOSPAWN_VECTOR3
-_ARENAPARAMETERSPROTO_ITEMSTOSPAWN.fields_by_name['sizes'].message_type = _ARENAPARAMETERSPROTO_ITEMSTOSPAWN_VECTOR3
-_ARENAPARAMETERSPROTO_ITEMSTOSPAWN.fields_by_name['colors'].message_type = _ARENAPARAMETERSPROTO_ITEMSTOSPAWN_VECTOR3
-_ARENAPARAMETERSPROTO_ITEMSTOSPAWN.containing_type = _ARENAPARAMETERSPROTO
-_ARENAPARAMETERSPROTO.fields_by_name['items'].message_type = _ARENAPARAMETERSPROTO_ITEMSTOSPAWN
-DESCRIPTOR.message_types_by_name['ArenaParametersProto'] = _ARENAPARAMETERSPROTO
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-ArenaParametersProto = _reflection.GeneratedProtocolMessageType('ArenaParametersProto', (_message.Message,), {
-
- 'ItemsToSpawn' : _reflection.GeneratedProtocolMessageType('ItemsToSpawn', (_message.Message,), {
-
- 'Vector3' : _reflection.GeneratedProtocolMessageType('Vector3', (_message.Message,), {
- 'DESCRIPTOR' : _ARENAPARAMETERSPROTO_ITEMSTOSPAWN_VECTOR3,
- '__module__' : 'animalai.communicator_objects.arena_parameters_proto_pb2'
- # @@protoc_insertion_point(class_scope:communicator_objects.ArenaParametersProto.ItemsToSpawn.Vector3)
- })
- ,
- 'DESCRIPTOR' : _ARENAPARAMETERSPROTO_ITEMSTOSPAWN,
- '__module__' : 'animalai.communicator_objects.arena_parameters_proto_pb2'
- # @@protoc_insertion_point(class_scope:communicator_objects.ArenaParametersProto.ItemsToSpawn)
- })
- ,
- 'DESCRIPTOR' : _ARENAPARAMETERSPROTO,
- '__module__' : 'animalai.communicator_objects.arena_parameters_proto_pb2'
- # @@protoc_insertion_point(class_scope:communicator_objects.ArenaParametersProto)
- })
-_sym_db.RegisterMessage(ArenaParametersProto)
-_sym_db.RegisterMessage(ArenaParametersProto.ItemsToSpawn)
-_sym_db.RegisterMessage(ArenaParametersProto.ItemsToSpawn.Vector3)
-
-
-DESCRIPTOR._options = None
-# @@protoc_insertion_point(module_scope)
diff --git a/animalai_bkp/communicator_objects/brain_parameters_proto_pb2.py b/animalai_bkp/communicator_objects/brain_parameters_proto_pb2.py
deleted file mode 100644
index 07091b4f..00000000
--- a/animalai_bkp/communicator_objects/brain_parameters_proto_pb2.py
+++ /dev/null
@@ -1,125 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: animalai/communicator_objects/brain_parameters_proto.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-from animalai.communicator_objects import resolution_proto_pb2 as animalai_dot_communicator__objects_dot_resolution__proto__pb2
-from animalai.communicator_objects import space_type_proto_pb2 as animalai_dot_communicator__objects_dot_space__type__proto__pb2
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='animalai/communicator_objects/brain_parameters_proto.proto',
- package='communicator_objects',
- syntax='proto3',
- serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
- serialized_pb=_b('\n:animalai/communicator_objects/brain_parameters_proto.proto\x12\x14\x63ommunicator_objects\x1a\x34\x61nimalai/communicator_objects/resolution_proto.proto\x1a\x34\x61nimalai/communicator_objects/space_type_proto.proto\"\xd4\x02\n\x14\x42rainParametersProto\x12\x1f\n\x17vector_observation_size\x18\x01 \x01(\x05\x12\'\n\x1fnum_stacked_vector_observations\x18\x02 \x01(\x05\x12\x1a\n\x12vector_action_size\x18\x03 \x03(\x05\x12\x41\n\x12\x63\x61mera_resolutions\x18\x04 \x03(\x0b\x32%.communicator_objects.ResolutionProto\x12\"\n\x1avector_action_descriptions\x18\x05 \x03(\t\x12\x46\n\x18vector_action_space_type\x18\x06 \x01(\x0e\x32$.communicator_objects.SpaceTypeProto\x12\x12\n\nbrain_name\x18\x07 \x01(\t\x12\x13\n\x0bis_training\x18\x08 \x01(\x08\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
- ,
- dependencies=[animalai_dot_communicator__objects_dot_resolution__proto__pb2.DESCRIPTOR,animalai_dot_communicator__objects_dot_space__type__proto__pb2.DESCRIPTOR,])
-
-
-
-
-_BRAINPARAMETERSPROTO = _descriptor.Descriptor(
- name='BrainParametersProto',
- full_name='communicator_objects.BrainParametersProto',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='vector_observation_size', full_name='communicator_objects.BrainParametersProto.vector_observation_size', index=0,
- number=1, type=5, cpp_type=1, label=1,
- has_default_value=False, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='num_stacked_vector_observations', full_name='communicator_objects.BrainParametersProto.num_stacked_vector_observations', index=1,
- number=2, type=5, cpp_type=1, label=1,
- has_default_value=False, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='vector_action_size', full_name='communicator_objects.BrainParametersProto.vector_action_size', index=2,
- number=3, type=5, cpp_type=1, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='camera_resolutions', full_name='communicator_objects.BrainParametersProto.camera_resolutions', index=3,
- number=4, type=11, cpp_type=10, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='vector_action_descriptions', full_name='communicator_objects.BrainParametersProto.vector_action_descriptions', index=4,
- number=5, type=9, cpp_type=9, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='vector_action_space_type', full_name='communicator_objects.BrainParametersProto.vector_action_space_type', index=5,
- number=6, type=14, cpp_type=8, label=1,
- has_default_value=False, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='brain_name', full_name='communicator_objects.BrainParametersProto.brain_name', index=6,
- number=7, type=9, cpp_type=9, label=1,
- has_default_value=False, default_value=_b("").decode('utf-8'),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='is_training', full_name='communicator_objects.BrainParametersProto.is_training', index=7,
- number=8, type=8, cpp_type=7, label=1,
- has_default_value=False, default_value=False,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- serialized_options=None,
- is_extendable=False,
- syntax='proto3',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=193,
- serialized_end=533,
-)
-
-_BRAINPARAMETERSPROTO.fields_by_name['camera_resolutions'].message_type = animalai_dot_communicator__objects_dot_resolution__proto__pb2._RESOLUTIONPROTO
-_BRAINPARAMETERSPROTO.fields_by_name['vector_action_space_type'].enum_type = animalai_dot_communicator__objects_dot_space__type__proto__pb2._SPACETYPEPROTO
-DESCRIPTOR.message_types_by_name['BrainParametersProto'] = _BRAINPARAMETERSPROTO
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-BrainParametersProto = _reflection.GeneratedProtocolMessageType('BrainParametersProto', (_message.Message,), {
- 'DESCRIPTOR' : _BRAINPARAMETERSPROTO,
- '__module__' : 'animalai.communicator_objects.brain_parameters_proto_pb2'
- # @@protoc_insertion_point(class_scope:communicator_objects.BrainParametersProto)
- })
-_sym_db.RegisterMessage(BrainParametersProto)
-
-
-DESCRIPTOR._options = None
-# @@protoc_insertion_point(module_scope)
diff --git a/animalai_bkp/communicator_objects/command_proto_pb2.py b/animalai_bkp/communicator_objects/command_proto_pb2.py
deleted file mode 100644
index 4912301f..00000000
--- a/animalai_bkp/communicator_objects/command_proto_pb2.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: animalai/communicator_objects/command_proto.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf.internal import enum_type_wrapper
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='animalai/communicator_objects/command_proto.proto',
- package='communicator_objects',
- syntax='proto3',
- serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
- serialized_pb=_b('\n1animalai/communicator_objects/command_proto.proto\x12\x14\x63ommunicator_objects*-\n\x0c\x43ommandProto\x12\x08\n\x04STEP\x10\x00\x12\t\n\x05RESET\x10\x01\x12\x08\n\x04QUIT\x10\x02\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
-)
-
-_COMMANDPROTO = _descriptor.EnumDescriptor(
- name='CommandProto',
- full_name='communicator_objects.CommandProto',
- filename=None,
- file=DESCRIPTOR,
- values=[
- _descriptor.EnumValueDescriptor(
- name='STEP', index=0, number=0,
- serialized_options=None,
- type=None),
- _descriptor.EnumValueDescriptor(
- name='RESET', index=1, number=1,
- serialized_options=None,
- type=None),
- _descriptor.EnumValueDescriptor(
- name='QUIT', index=2, number=2,
- serialized_options=None,
- type=None),
- ],
- containing_type=None,
- serialized_options=None,
- serialized_start=75,
- serialized_end=120,
-)
-_sym_db.RegisterEnumDescriptor(_COMMANDPROTO)
-
-CommandProto = enum_type_wrapper.EnumTypeWrapper(_COMMANDPROTO)
-STEP = 0
-RESET = 1
-QUIT = 2
-
-
-DESCRIPTOR.enum_types_by_name['CommandProto'] = _COMMANDPROTO
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-
-DESCRIPTOR._options = None
-# @@protoc_insertion_point(module_scope)
diff --git a/animalai_bkp/communicator_objects/demonstration_meta_proto_pb2.py b/animalai_bkp/communicator_objects/demonstration_meta_proto_pb2.py
deleted file mode 100644
index 107b1335..00000000
--- a/animalai_bkp/communicator_objects/demonstration_meta_proto_pb2.py
+++ /dev/null
@@ -1,99 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: animalai/communicator_objects/demonstration_meta_proto.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='animalai/communicator_objects/demonstration_meta_proto.proto',
- package='communicator_objects',
- syntax='proto3',
- serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
- serialized_pb=_b('\nanimalai/communicator_objects/engine_configuration_proto.proto\x12\x14\x63ommunicator_objects\"\x95\x01\n\x18\x45ngineConfigurationProto\x12\r\n\x05width\x18\x01 \x01(\x05\x12\x0e\n\x06height\x18\x02 \x01(\x05\x12\x15\n\rquality_level\x18\x03 \x01(\x05\x12\x12\n\ntime_scale\x18\x04 \x01(\x02\x12\x19\n\x11target_frame_rate\x18\x05 \x01(\x05\x12\x14\n\x0cshow_monitor\x18\x06 \x01(\x08\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
-)
-
-
-
-
-_ENGINECONFIGURATIONPROTO = _descriptor.Descriptor(
- name='EngineConfigurationProto',
- full_name='communicator_objects.EngineConfigurationProto',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='width', full_name='communicator_objects.EngineConfigurationProto.width', index=0,
- number=1, type=5, cpp_type=1, label=1,
- has_default_value=False, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='height', full_name='communicator_objects.EngineConfigurationProto.height', index=1,
- number=2, type=5, cpp_type=1, label=1,
- has_default_value=False, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='quality_level', full_name='communicator_objects.EngineConfigurationProto.quality_level', index=2,
- number=3, type=5, cpp_type=1, label=1,
- has_default_value=False, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='time_scale', full_name='communicator_objects.EngineConfigurationProto.time_scale', index=3,
- number=4, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='target_frame_rate', full_name='communicator_objects.EngineConfigurationProto.target_frame_rate', index=4,
- number=5, type=5, cpp_type=1, label=1,
- has_default_value=False, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='show_monitor', full_name='communicator_objects.EngineConfigurationProto.show_monitor', index=5,
- number=6, type=8, cpp_type=7, label=1,
- has_default_value=False, default_value=False,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- serialized_options=None,
- is_extendable=False,
- syntax='proto3',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=89,
- serialized_end=238,
-)
-
-DESCRIPTOR.message_types_by_name['EngineConfigurationProto'] = _ENGINECONFIGURATIONPROTO
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-EngineConfigurationProto = _reflection.GeneratedProtocolMessageType('EngineConfigurationProto', (_message.Message,), {
- 'DESCRIPTOR' : _ENGINECONFIGURATIONPROTO,
- '__module__' : 'animalai.communicator_objects.engine_configuration_proto_pb2'
- # @@protoc_insertion_point(class_scope:communicator_objects.EngineConfigurationProto)
- })
-_sym_db.RegisterMessage(EngineConfigurationProto)
-
-
-DESCRIPTOR._options = None
-# @@protoc_insertion_point(module_scope)
diff --git a/animalai_bkp/communicator_objects/header_pb2.py b/animalai_bkp/communicator_objects/header_pb2.py
deleted file mode 100644
index 6c40f6cc..00000000
--- a/animalai_bkp/communicator_objects/header_pb2.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: animalai/communicator_objects/header.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='animalai/communicator_objects/header.proto',
- package='communicator_objects',
- syntax='proto3',
- serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
- serialized_pb=_b('\n*animalai/communicator_objects/header.proto\x12\x14\x63ommunicator_objects\")\n\x06Header\x12\x0e\n\x06status\x18\x01 \x01(\x05\x12\x0f\n\x07message\x18\x02 \x01(\tB\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
-)
-
-
-
-
-_HEADER = _descriptor.Descriptor(
- name='Header',
- full_name='communicator_objects.Header',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='status', full_name='communicator_objects.Header.status', index=0,
- number=1, type=5, cpp_type=1, label=1,
- has_default_value=False, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='message', full_name='communicator_objects.Header.message', index=1,
- number=2, type=9, cpp_type=9, label=1,
- has_default_value=False, default_value=_b("").decode('utf-8'),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- serialized_options=None,
- is_extendable=False,
- syntax='proto3',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=68,
- serialized_end=109,
-)
-
-DESCRIPTOR.message_types_by_name['Header'] = _HEADER
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-Header = _reflection.GeneratedProtocolMessageType('Header', (_message.Message,), {
- 'DESCRIPTOR' : _HEADER,
- '__module__' : 'animalai.communicator_objects.header_pb2'
- # @@protoc_insertion_point(class_scope:communicator_objects.Header)
- })
-_sym_db.RegisterMessage(Header)
-
-
-DESCRIPTOR._options = None
-# @@protoc_insertion_point(module_scope)
diff --git a/animalai_bkp/communicator_objects/resolution_proto_pb2.py b/animalai_bkp/communicator_objects/resolution_proto_pb2.py
deleted file mode 100644
index cade7d49..00000000
--- a/animalai_bkp/communicator_objects/resolution_proto_pb2.py
+++ /dev/null
@@ -1,85 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: animalai/communicator_objects/resolution_proto.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='animalai/communicator_objects/resolution_proto.proto',
- package='communicator_objects',
- syntax='proto3',
- serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
- serialized_pb=_b('\n4animalai/communicator_objects/resolution_proto.proto\x12\x14\x63ommunicator_objects\"D\n\x0fResolutionProto\x12\r\n\x05width\x18\x01 \x01(\x05\x12\x0e\n\x06height\x18\x02 \x01(\x05\x12\x12\n\ngray_scale\x18\x03 \x01(\x08\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
-)
-
-
-
-
-_RESOLUTIONPROTO = _descriptor.Descriptor(
- name='ResolutionProto',
- full_name='communicator_objects.ResolutionProto',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='width', full_name='communicator_objects.ResolutionProto.width', index=0,
- number=1, type=5, cpp_type=1, label=1,
- has_default_value=False, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='height', full_name='communicator_objects.ResolutionProto.height', index=1,
- number=2, type=5, cpp_type=1, label=1,
- has_default_value=False, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='gray_scale', full_name='communicator_objects.ResolutionProto.gray_scale', index=2,
- number=3, type=8, cpp_type=7, label=1,
- has_default_value=False, default_value=False,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- serialized_options=None,
- is_extendable=False,
- syntax='proto3',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=78,
- serialized_end=146,
-)
-
-DESCRIPTOR.message_types_by_name['ResolutionProto'] = _RESOLUTIONPROTO
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-ResolutionProto = _reflection.GeneratedProtocolMessageType('ResolutionProto', (_message.Message,), {
- 'DESCRIPTOR' : _RESOLUTIONPROTO,
- '__module__' : 'animalai.communicator_objects.resolution_proto_pb2'
- # @@protoc_insertion_point(class_scope:communicator_objects.ResolutionProto)
- })
-_sym_db.RegisterMessage(ResolutionProto)
-
-
-DESCRIPTOR._options = None
-# @@protoc_insertion_point(module_scope)
diff --git a/animalai_bkp/communicator_objects/space_type_proto_pb2.py b/animalai_bkp/communicator_objects/space_type_proto_pb2.py
deleted file mode 100644
index 02a9b16e..00000000
--- a/animalai_bkp/communicator_objects/space_type_proto_pb2.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: animalai/communicator_objects/space_type_proto.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf.internal import enum_type_wrapper
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-from animalai.communicator_objects import resolution_proto_pb2 as animalai_dot_communicator__objects_dot_resolution__proto__pb2
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='animalai/communicator_objects/space_type_proto.proto',
- package='communicator_objects',
- syntax='proto3',
- serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
- serialized_pb=_b('\n4animalai/communicator_objects/space_type_proto.proto\x12\x14\x63ommunicator_objects\x1a\x34\x61nimalai/communicator_objects/resolution_proto.proto*.\n\x0eSpaceTypeProto\x12\x0c\n\x08\x64iscrete\x10\x00\x12\x0e\n\ncontinuous\x10\x01\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
- ,
- dependencies=[animalai_dot_communicator__objects_dot_resolution__proto__pb2.DESCRIPTOR,])
-
-_SPACETYPEPROTO = _descriptor.EnumDescriptor(
- name='SpaceTypeProto',
- full_name='communicator_objects.SpaceTypeProto',
- filename=None,
- file=DESCRIPTOR,
- values=[
- _descriptor.EnumValueDescriptor(
- name='discrete', index=0, number=0,
- serialized_options=None,
- type=None),
- _descriptor.EnumValueDescriptor(
- name='continuous', index=1, number=1,
- serialized_options=None,
- type=None),
- ],
- containing_type=None,
- serialized_options=None,
- serialized_start=132,
- serialized_end=178,
-)
-_sym_db.RegisterEnumDescriptor(_SPACETYPEPROTO)
-
-SpaceTypeProto = enum_type_wrapper.EnumTypeWrapper(_SPACETYPEPROTO)
-discrete = 0
-continuous = 1
-
-
-DESCRIPTOR.enum_types_by_name['SpaceTypeProto'] = _SPACETYPEPROTO
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-
-DESCRIPTOR._options = None
-# @@protoc_insertion_point(module_scope)
diff --git a/animalai_bkp/communicator_objects/unity_input_pb2.py b/animalai_bkp/communicator_objects/unity_input_pb2.py
deleted file mode 100644
index a3f9d1c1..00000000
--- a/animalai_bkp/communicator_objects/unity_input_pb2.py
+++ /dev/null
@@ -1,92 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: animalai/communicator_objects/unity_input.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-from animalai.communicator_objects import unity_rl_input_pb2 as animalai_dot_communicator__objects_dot_unity__rl__input__pb2
-from animalai.communicator_objects import unity_rl_initialization_input_pb2 as animalai_dot_communicator__objects_dot_unity__rl__initialization__input__pb2
-from animalai.communicator_objects import unity_rl_reset_input_pb2 as animalai_dot_communicator__objects_dot_unity__rl__reset__input__pb2
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='animalai/communicator_objects/unity_input.proto',
- package='communicator_objects',
- syntax='proto3',
- serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
- serialized_pb=_b('\n/animalai/communicator_objects/unity_input.proto\x12\x14\x63ommunicator_objects\x1a\x32\x61nimalai/communicator_objects/unity_rl_input.proto\x1a\x41\x61nimalai/communicator_objects/unity_rl_initialization_input.proto\x1a\x38\x61nimalai/communicator_objects/unity_rl_reset_input.proto\"\xd6\x01\n\nUnityInput\x12\x34\n\x08rl_input\x18\x01 \x01(\x0b\x32\".communicator_objects.UnityRLInput\x12Q\n\x17rl_initialization_input\x18\x02 \x01(\x0b\x32\x30.communicator_objects.UnityRLInitializationInput\x12?\n\x0erl_reset_input\x18\x03 \x01(\x0b\x32\'.communicator_objects.UnityRLResetInputB\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
- ,
- dependencies=[animalai_dot_communicator__objects_dot_unity__rl__input__pb2.DESCRIPTOR,animalai_dot_communicator__objects_dot_unity__rl__initialization__input__pb2.DESCRIPTOR,animalai_dot_communicator__objects_dot_unity__rl__reset__input__pb2.DESCRIPTOR,])
-
-
-
-
-_UNITYINPUT = _descriptor.Descriptor(
- name='UnityInput',
- full_name='communicator_objects.UnityInput',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='rl_input', full_name='communicator_objects.UnityInput.rl_input', index=0,
- number=1, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='rl_initialization_input', full_name='communicator_objects.UnityInput.rl_initialization_input', index=1,
- number=2, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='rl_reset_input', full_name='communicator_objects.UnityInput.rl_reset_input', index=2,
- number=3, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- serialized_options=None,
- is_extendable=False,
- syntax='proto3',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=251,
- serialized_end=465,
-)
-
-_UNITYINPUT.fields_by_name['rl_input'].message_type = animalai_dot_communicator__objects_dot_unity__rl__input__pb2._UNITYRLINPUT
-_UNITYINPUT.fields_by_name['rl_initialization_input'].message_type = animalai_dot_communicator__objects_dot_unity__rl__initialization__input__pb2._UNITYRLINITIALIZATIONINPUT
-_UNITYINPUT.fields_by_name['rl_reset_input'].message_type = animalai_dot_communicator__objects_dot_unity__rl__reset__input__pb2._UNITYRLRESETINPUT
-DESCRIPTOR.message_types_by_name['UnityInput'] = _UNITYINPUT
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-UnityInput = _reflection.GeneratedProtocolMessageType('UnityInput', (_message.Message,), {
- 'DESCRIPTOR' : _UNITYINPUT,
- '__module__' : 'animalai.communicator_objects.unity_input_pb2'
- # @@protoc_insertion_point(class_scope:communicator_objects.UnityInput)
- })
-_sym_db.RegisterMessage(UnityInput)
-
-
-DESCRIPTOR._options = None
-# @@protoc_insertion_point(module_scope)
diff --git a/animalai_bkp/communicator_objects/unity_message_pb2.py b/animalai_bkp/communicator_objects/unity_message_pb2.py
deleted file mode 100644
index 47deefdc..00000000
--- a/animalai_bkp/communicator_objects/unity_message_pb2.py
+++ /dev/null
@@ -1,92 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: animalai/communicator_objects/unity_message.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-from animalai.communicator_objects import unity_output_pb2 as animalai_dot_communicator__objects_dot_unity__output__pb2
-from animalai.communicator_objects import unity_input_pb2 as animalai_dot_communicator__objects_dot_unity__input__pb2
-from animalai.communicator_objects import header_pb2 as animalai_dot_communicator__objects_dot_header__pb2
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='animalai/communicator_objects/unity_message.proto',
- package='communicator_objects',
- syntax='proto3',
- serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
- serialized_pb=_b('\n1animalai/communicator_objects/unity_message.proto\x12\x14\x63ommunicator_objects\x1a\x30\x61nimalai/communicator_objects/unity_output.proto\x1a/animalai/communicator_objects/unity_input.proto\x1a*animalai/communicator_objects/header.proto\"\xac\x01\n\x0cUnityMessage\x12,\n\x06header\x18\x01 \x01(\x0b\x32\x1c.communicator_objects.Header\x12\x37\n\x0cunity_output\x18\x02 \x01(\x0b\x32!.communicator_objects.UnityOutput\x12\x35\n\x0bunity_input\x18\x03 \x01(\x0b\x32 .communicator_objects.UnityInputB\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
- ,
- dependencies=[animalai_dot_communicator__objects_dot_unity__output__pb2.DESCRIPTOR,animalai_dot_communicator__objects_dot_unity__input__pb2.DESCRIPTOR,animalai_dot_communicator__objects_dot_header__pb2.DESCRIPTOR,])
-
-
-
-
-_UNITYMESSAGE = _descriptor.Descriptor(
- name='UnityMessage',
- full_name='communicator_objects.UnityMessage',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='header', full_name='communicator_objects.UnityMessage.header', index=0,
- number=1, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='unity_output', full_name='communicator_objects.UnityMessage.unity_output', index=1,
- number=2, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='unity_input', full_name='communicator_objects.UnityMessage.unity_input', index=2,
- number=3, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- serialized_options=None,
- is_extendable=False,
- syntax='proto3',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=219,
- serialized_end=391,
-)
-
-_UNITYMESSAGE.fields_by_name['header'].message_type = animalai_dot_communicator__objects_dot_header__pb2._HEADER
-_UNITYMESSAGE.fields_by_name['unity_output'].message_type = animalai_dot_communicator__objects_dot_unity__output__pb2._UNITYOUTPUT
-_UNITYMESSAGE.fields_by_name['unity_input'].message_type = animalai_dot_communicator__objects_dot_unity__input__pb2._UNITYINPUT
-DESCRIPTOR.message_types_by_name['UnityMessage'] = _UNITYMESSAGE
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-UnityMessage = _reflection.GeneratedProtocolMessageType('UnityMessage', (_message.Message,), {
- 'DESCRIPTOR' : _UNITYMESSAGE,
- '__module__' : 'animalai.communicator_objects.unity_message_pb2'
- # @@protoc_insertion_point(class_scope:communicator_objects.UnityMessage)
- })
-_sym_db.RegisterMessage(UnityMessage)
-
-
-DESCRIPTOR._options = None
-# @@protoc_insertion_point(module_scope)
diff --git a/animalai_bkp/communicator_objects/unity_output_pb2.py b/animalai_bkp/communicator_objects/unity_output_pb2.py
deleted file mode 100644
index 5beed76e..00000000
--- a/animalai_bkp/communicator_objects/unity_output_pb2.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: animalai/communicator_objects/unity_output.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-from animalai.communicator_objects import unity_rl_output_pb2 as animalai_dot_communicator__objects_dot_unity__rl__output__pb2
-from animalai.communicator_objects import unity_rl_initialization_output_pb2 as animalai_dot_communicator__objects_dot_unity__rl__initialization__output__pb2
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='animalai/communicator_objects/unity_output.proto',
- package='communicator_objects',
- syntax='proto3',
- serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
- serialized_pb=_b('\n0animalai/communicator_objects/unity_output.proto\x12\x14\x63ommunicator_objects\x1a\x33\x61nimalai/communicator_objects/unity_rl_output.proto\x1a\x42\x61nimalai/communicator_objects/unity_rl_initialization_output.proto\"\x9a\x01\n\x0bUnityOutput\x12\x36\n\trl_output\x18\x01 \x01(\x0b\x32#.communicator_objects.UnityRLOutput\x12S\n\x18rl_initialization_output\x18\x02 \x01(\x0b\x32\x31.communicator_objects.UnityRLInitializationOutputB\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
- ,
- dependencies=[animalai_dot_communicator__objects_dot_unity__rl__output__pb2.DESCRIPTOR,animalai_dot_communicator__objects_dot_unity__rl__initialization__output__pb2.DESCRIPTOR,])
-
-
-
-
-_UNITYOUTPUT = _descriptor.Descriptor(
- name='UnityOutput',
- full_name='communicator_objects.UnityOutput',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='rl_output', full_name='communicator_objects.UnityOutput.rl_output', index=0,
- number=1, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='rl_initialization_output', full_name='communicator_objects.UnityOutput.rl_initialization_output', index=1,
- number=2, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- serialized_options=None,
- is_extendable=False,
- syntax='proto3',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=196,
- serialized_end=350,
-)
-
-_UNITYOUTPUT.fields_by_name['rl_output'].message_type = animalai_dot_communicator__objects_dot_unity__rl__output__pb2._UNITYRLOUTPUT
-_UNITYOUTPUT.fields_by_name['rl_initialization_output'].message_type = animalai_dot_communicator__objects_dot_unity__rl__initialization__output__pb2._UNITYRLINITIALIZATIONOUTPUT
-DESCRIPTOR.message_types_by_name['UnityOutput'] = _UNITYOUTPUT
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-UnityOutput = _reflection.GeneratedProtocolMessageType('UnityOutput', (_message.Message,), {
- 'DESCRIPTOR' : _UNITYOUTPUT,
- '__module__' : 'animalai.communicator_objects.unity_output_pb2'
- # @@protoc_insertion_point(class_scope:communicator_objects.UnityOutput)
- })
-_sym_db.RegisterMessage(UnityOutput)
-
-
-DESCRIPTOR._options = None
-# @@protoc_insertion_point(module_scope)
diff --git a/animalai_bkp/communicator_objects/unity_rl_initialization_input_pb2.py b/animalai_bkp/communicator_objects/unity_rl_initialization_input_pb2.py
deleted file mode 100644
index 9b16381b..00000000
--- a/animalai_bkp/communicator_objects/unity_rl_initialization_input_pb2.py
+++ /dev/null
@@ -1,71 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: animalai/communicator_objects/unity_rl_initialization_input.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='animalai/communicator_objects/unity_rl_initialization_input.proto',
- package='communicator_objects',
- syntax='proto3',
- serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
- serialized_pb=_b('\nAanimalai/communicator_objects/unity_rl_initialization_input.proto\x12\x14\x63ommunicator_objects\"*\n\x1aUnityRLInitializationInput\x12\x0c\n\x04seed\x18\x01 \x01(\x05\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
-)
-
-
-
-
-_UNITYRLINITIALIZATIONINPUT = _descriptor.Descriptor(
- name='UnityRLInitializationInput',
- full_name='communicator_objects.UnityRLInitializationInput',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='seed', full_name='communicator_objects.UnityRLInitializationInput.seed', index=0,
- number=1, type=5, cpp_type=1, label=1,
- has_default_value=False, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- serialized_options=None,
- is_extendable=False,
- syntax='proto3',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=91,
- serialized_end=133,
-)
-
-DESCRIPTOR.message_types_by_name['UnityRLInitializationInput'] = _UNITYRLINITIALIZATIONINPUT
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-UnityRLInitializationInput = _reflection.GeneratedProtocolMessageType('UnityRLInitializationInput', (_message.Message,), {
- 'DESCRIPTOR' : _UNITYRLINITIALIZATIONINPUT,
- '__module__' : 'animalai.communicator_objects.unity_rl_initialization_input_pb2'
- # @@protoc_insertion_point(class_scope:communicator_objects.UnityRLInitializationInput)
- })
-_sym_db.RegisterMessage(UnityRLInitializationInput)
-
-
-DESCRIPTOR._options = None
-# @@protoc_insertion_point(module_scope)
diff --git a/animalai_bkp/communicator_objects/unity_rl_initialization_output_pb2.py b/animalai_bkp/communicator_objects/unity_rl_initialization_output_pb2.py
deleted file mode 100644
index 1042578f..00000000
--- a/animalai_bkp/communicator_objects/unity_rl_initialization_output_pb2.py
+++ /dev/null
@@ -1,95 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: animalai/communicator_objects/unity_rl_initialization_output.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-from animalai.communicator_objects import brain_parameters_proto_pb2 as animalai_dot_communicator__objects_dot_brain__parameters__proto__pb2
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='animalai/communicator_objects/unity_rl_initialization_output.proto',
- package='communicator_objects',
- syntax='proto3',
- serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
- serialized_pb=_b('\nBanimalai/communicator_objects/unity_rl_initialization_output.proto\x12\x14\x63ommunicator_objects\x1a:animalai/communicator_objects/brain_parameters_proto.proto\"\x94\x01\n\x1bUnityRLInitializationOutput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\t\x12\x10\n\x08log_path\x18\x03 \x01(\t\x12\x44\n\x10\x62rain_parameters\x18\x05 \x03(\x0b\x32*.communicator_objects.BrainParametersProtoB\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
- ,
- dependencies=[animalai_dot_communicator__objects_dot_brain__parameters__proto__pb2.DESCRIPTOR,])
-
-
-
-
-_UNITYRLINITIALIZATIONOUTPUT = _descriptor.Descriptor(
- name='UnityRLInitializationOutput',
- full_name='communicator_objects.UnityRLInitializationOutput',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='name', full_name='communicator_objects.UnityRLInitializationOutput.name', index=0,
- number=1, type=9, cpp_type=9, label=1,
- has_default_value=False, default_value=_b("").decode('utf-8'),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='version', full_name='communicator_objects.UnityRLInitializationOutput.version', index=1,
- number=2, type=9, cpp_type=9, label=1,
- has_default_value=False, default_value=_b("").decode('utf-8'),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='log_path', full_name='communicator_objects.UnityRLInitializationOutput.log_path', index=2,
- number=3, type=9, cpp_type=9, label=1,
- has_default_value=False, default_value=_b("").decode('utf-8'),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='brain_parameters', full_name='communicator_objects.UnityRLInitializationOutput.brain_parameters', index=3,
- number=5, type=11, cpp_type=10, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- serialized_options=None,
- is_extendable=False,
- syntax='proto3',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=153,
- serialized_end=301,
-)
-
-_UNITYRLINITIALIZATIONOUTPUT.fields_by_name['brain_parameters'].message_type = animalai_dot_communicator__objects_dot_brain__parameters__proto__pb2._BRAINPARAMETERSPROTO
-DESCRIPTOR.message_types_by_name['UnityRLInitializationOutput'] = _UNITYRLINITIALIZATIONOUTPUT
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-UnityRLInitializationOutput = _reflection.GeneratedProtocolMessageType('UnityRLInitializationOutput', (_message.Message,), {
- 'DESCRIPTOR' : _UNITYRLINITIALIZATIONOUTPUT,
- '__module__' : 'animalai.communicator_objects.unity_rl_initialization_output_pb2'
- # @@protoc_insertion_point(class_scope:communicator_objects.UnityRLInitializationOutput)
- })
-_sym_db.RegisterMessage(UnityRLInitializationOutput)
-
-
-DESCRIPTOR._options = None
-# @@protoc_insertion_point(module_scope)
diff --git a/animalai_bkp/communicator_objects/unity_rl_input_pb2.py b/animalai_bkp/communicator_objects/unity_rl_input_pb2.py
deleted file mode 100644
index 29225764..00000000
--- a/animalai_bkp/communicator_objects/unity_rl_input_pb2.py
+++ /dev/null
@@ -1,178 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: animalai/communicator_objects/unity_rl_input.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-from animalai.communicator_objects import agent_action_proto_pb2 as animalai_dot_communicator__objects_dot_agent__action__proto__pb2
-from animalai.communicator_objects import command_proto_pb2 as animalai_dot_communicator__objects_dot_command__proto__pb2
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='animalai/communicator_objects/unity_rl_input.proto',
- package='communicator_objects',
- syntax='proto3',
- serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
- serialized_pb=_b('\n2animalai/communicator_objects/unity_rl_input.proto\x12\x14\x63ommunicator_objects\x1a\x36\x61nimalai/communicator_objects/agent_action_proto.proto\x1a\x31\x61nimalai/communicator_objects/command_proto.proto\"\xe2\x02\n\x0cUnityRLInput\x12K\n\ragent_actions\x18\x01 \x03(\x0b\x32\x34.communicator_objects.UnityRLInput.AgentActionsEntry\x12\x13\n\x0bis_training\x18\x02 \x01(\x08\x12\x33\n\x07\x63ommand\x18\x03 \x01(\x0e\x32\".communicator_objects.CommandProto\x1aM\n\x14ListAgentActionProto\x12\x35\n\x05value\x18\x01 \x03(\x0b\x32&.communicator_objects.AgentActionProto\x1al\n\x11\x41gentActionsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x46\n\x05value\x18\x02 \x01(\x0b\x32\x37.communicator_objects.UnityRLInput.ListAgentActionProto:\x02\x38\x01\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
- ,
- dependencies=[animalai_dot_communicator__objects_dot_agent__action__proto__pb2.DESCRIPTOR,animalai_dot_communicator__objects_dot_command__proto__pb2.DESCRIPTOR,])
-
-
-
-
-_UNITYRLINPUT_LISTAGENTACTIONPROTO = _descriptor.Descriptor(
- name='ListAgentActionProto',
- full_name='communicator_objects.UnityRLInput.ListAgentActionProto',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='value', full_name='communicator_objects.UnityRLInput.ListAgentActionProto.value', index=0,
- number=1, type=11, cpp_type=10, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- serialized_options=None,
- is_extendable=False,
- syntax='proto3',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=351,
- serialized_end=428,
-)
-
-_UNITYRLINPUT_AGENTACTIONSENTRY = _descriptor.Descriptor(
- name='AgentActionsEntry',
- full_name='communicator_objects.UnityRLInput.AgentActionsEntry',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='key', full_name='communicator_objects.UnityRLInput.AgentActionsEntry.key', index=0,
- number=1, type=9, cpp_type=9, label=1,
- has_default_value=False, default_value=_b("").decode('utf-8'),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='value', full_name='communicator_objects.UnityRLInput.AgentActionsEntry.value', index=1,
- number=2, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- serialized_options=_b('8\001'),
- is_extendable=False,
- syntax='proto3',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=430,
- serialized_end=538,
-)
-
-_UNITYRLINPUT = _descriptor.Descriptor(
- name='UnityRLInput',
- full_name='communicator_objects.UnityRLInput',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='agent_actions', full_name='communicator_objects.UnityRLInput.agent_actions', index=0,
- number=1, type=11, cpp_type=10, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='is_training', full_name='communicator_objects.UnityRLInput.is_training', index=1,
- number=2, type=8, cpp_type=7, label=1,
- has_default_value=False, default_value=False,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='command', full_name='communicator_objects.UnityRLInput.command', index=2,
- number=3, type=14, cpp_type=8, label=1,
- has_default_value=False, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[_UNITYRLINPUT_LISTAGENTACTIONPROTO, _UNITYRLINPUT_AGENTACTIONSENTRY, ],
- enum_types=[
- ],
- serialized_options=None,
- is_extendable=False,
- syntax='proto3',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=184,
- serialized_end=538,
-)
-
-_UNITYRLINPUT_LISTAGENTACTIONPROTO.fields_by_name['value'].message_type = animalai_dot_communicator__objects_dot_agent__action__proto__pb2._AGENTACTIONPROTO
-_UNITYRLINPUT_LISTAGENTACTIONPROTO.containing_type = _UNITYRLINPUT
-_UNITYRLINPUT_AGENTACTIONSENTRY.fields_by_name['value'].message_type = _UNITYRLINPUT_LISTAGENTACTIONPROTO
-_UNITYRLINPUT_AGENTACTIONSENTRY.containing_type = _UNITYRLINPUT
-_UNITYRLINPUT.fields_by_name['agent_actions'].message_type = _UNITYRLINPUT_AGENTACTIONSENTRY
-_UNITYRLINPUT.fields_by_name['command'].enum_type = animalai_dot_communicator__objects_dot_command__proto__pb2._COMMANDPROTO
-DESCRIPTOR.message_types_by_name['UnityRLInput'] = _UNITYRLINPUT
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-UnityRLInput = _reflection.GeneratedProtocolMessageType('UnityRLInput', (_message.Message,), {
-
- 'ListAgentActionProto' : _reflection.GeneratedProtocolMessageType('ListAgentActionProto', (_message.Message,), {
- 'DESCRIPTOR' : _UNITYRLINPUT_LISTAGENTACTIONPROTO,
- '__module__' : 'animalai.communicator_objects.unity_rl_input_pb2'
- # @@protoc_insertion_point(class_scope:communicator_objects.UnityRLInput.ListAgentActionProto)
- })
- ,
-
- 'AgentActionsEntry' : _reflection.GeneratedProtocolMessageType('AgentActionsEntry', (_message.Message,), {
- 'DESCRIPTOR' : _UNITYRLINPUT_AGENTACTIONSENTRY,
- '__module__' : 'animalai.communicator_objects.unity_rl_input_pb2'
- # @@protoc_insertion_point(class_scope:communicator_objects.UnityRLInput.AgentActionsEntry)
- })
- ,
- 'DESCRIPTOR' : _UNITYRLINPUT,
- '__module__' : 'animalai.communicator_objects.unity_rl_input_pb2'
- # @@protoc_insertion_point(class_scope:communicator_objects.UnityRLInput)
- })
-_sym_db.RegisterMessage(UnityRLInput)
-_sym_db.RegisterMessage(UnityRLInput.ListAgentActionProto)
-_sym_db.RegisterMessage(UnityRLInput.AgentActionsEntry)
-
-
-DESCRIPTOR._options = None
-_UNITYRLINPUT_AGENTACTIONSENTRY._options = None
-# @@protoc_insertion_point(module_scope)
diff --git a/animalai_bkp/communicator_objects/unity_rl_output_pb2.py b/animalai_bkp/communicator_objects/unity_rl_output_pb2.py
deleted file mode 100644
index a35cdd20..00000000
--- a/animalai_bkp/communicator_objects/unity_rl_output_pb2.py
+++ /dev/null
@@ -1,169 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: animalai/communicator_objects/unity_rl_output.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-from animalai.communicator_objects import agent_info_proto_pb2 as animalai_dot_communicator__objects_dot_agent__info__proto__pb2
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='animalai/communicator_objects/unity_rl_output.proto',
- package='communicator_objects',
- syntax='proto3',
- serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
- serialized_pb=_b('\n3animalai/communicator_objects/unity_rl_output.proto\x12\x14\x63ommunicator_objects\x1a\x34\x61nimalai/communicator_objects/agent_info_proto.proto\"\xa3\x02\n\rUnityRLOutput\x12\x13\n\x0bglobal_done\x18\x01 \x01(\x08\x12G\n\nagentInfos\x18\x02 \x03(\x0b\x32\x33.communicator_objects.UnityRLOutput.AgentInfosEntry\x1aI\n\x12ListAgentInfoProto\x12\x33\n\x05value\x18\x01 \x03(\x0b\x32$.communicator_objects.AgentInfoProto\x1ai\n\x0f\x41gentInfosEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x45\n\x05value\x18\x02 \x01(\x0b\x32\x36.communicator_objects.UnityRLOutput.ListAgentInfoProto:\x02\x38\x01\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
- ,
- dependencies=[animalai_dot_communicator__objects_dot_agent__info__proto__pb2.DESCRIPTOR,])
-
-
-
-
-_UNITYRLOUTPUT_LISTAGENTINFOPROTO = _descriptor.Descriptor(
- name='ListAgentInfoProto',
- full_name='communicator_objects.UnityRLOutput.ListAgentInfoProto',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='value', full_name='communicator_objects.UnityRLOutput.ListAgentInfoProto.value', index=0,
- number=1, type=11, cpp_type=10, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- serialized_options=None,
- is_extendable=False,
- syntax='proto3',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=243,
- serialized_end=316,
-)
-
-_UNITYRLOUTPUT_AGENTINFOSENTRY = _descriptor.Descriptor(
- name='AgentInfosEntry',
- full_name='communicator_objects.UnityRLOutput.AgentInfosEntry',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='key', full_name='communicator_objects.UnityRLOutput.AgentInfosEntry.key', index=0,
- number=1, type=9, cpp_type=9, label=1,
- has_default_value=False, default_value=_b("").decode('utf-8'),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='value', full_name='communicator_objects.UnityRLOutput.AgentInfosEntry.value', index=1,
- number=2, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- serialized_options=_b('8\001'),
- is_extendable=False,
- syntax='proto3',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=318,
- serialized_end=423,
-)
-
-_UNITYRLOUTPUT = _descriptor.Descriptor(
- name='UnityRLOutput',
- full_name='communicator_objects.UnityRLOutput',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='global_done', full_name='communicator_objects.UnityRLOutput.global_done', index=0,
- number=1, type=8, cpp_type=7, label=1,
- has_default_value=False, default_value=False,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='agentInfos', full_name='communicator_objects.UnityRLOutput.agentInfos', index=1,
- number=2, type=11, cpp_type=10, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[_UNITYRLOUTPUT_LISTAGENTINFOPROTO, _UNITYRLOUTPUT_AGENTINFOSENTRY, ],
- enum_types=[
- ],
- serialized_options=None,
- is_extendable=False,
- syntax='proto3',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=132,
- serialized_end=423,
-)
-
-_UNITYRLOUTPUT_LISTAGENTINFOPROTO.fields_by_name['value'].message_type = animalai_dot_communicator__objects_dot_agent__info__proto__pb2._AGENTINFOPROTO
-_UNITYRLOUTPUT_LISTAGENTINFOPROTO.containing_type = _UNITYRLOUTPUT
-_UNITYRLOUTPUT_AGENTINFOSENTRY.fields_by_name['value'].message_type = _UNITYRLOUTPUT_LISTAGENTINFOPROTO
-_UNITYRLOUTPUT_AGENTINFOSENTRY.containing_type = _UNITYRLOUTPUT
-_UNITYRLOUTPUT.fields_by_name['agentInfos'].message_type = _UNITYRLOUTPUT_AGENTINFOSENTRY
-DESCRIPTOR.message_types_by_name['UnityRLOutput'] = _UNITYRLOUTPUT
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-UnityRLOutput = _reflection.GeneratedProtocolMessageType('UnityRLOutput', (_message.Message,), {
-
- 'ListAgentInfoProto' : _reflection.GeneratedProtocolMessageType('ListAgentInfoProto', (_message.Message,), {
- 'DESCRIPTOR' : _UNITYRLOUTPUT_LISTAGENTINFOPROTO,
- '__module__' : 'animalai.communicator_objects.unity_rl_output_pb2'
- # @@protoc_insertion_point(class_scope:communicator_objects.UnityRLOutput.ListAgentInfoProto)
- })
- ,
-
- 'AgentInfosEntry' : _reflection.GeneratedProtocolMessageType('AgentInfosEntry', (_message.Message,), {
- 'DESCRIPTOR' : _UNITYRLOUTPUT_AGENTINFOSENTRY,
- '__module__' : 'animalai.communicator_objects.unity_rl_output_pb2'
- # @@protoc_insertion_point(class_scope:communicator_objects.UnityRLOutput.AgentInfosEntry)
- })
- ,
- 'DESCRIPTOR' : _UNITYRLOUTPUT,
- '__module__' : 'animalai.communicator_objects.unity_rl_output_pb2'
- # @@protoc_insertion_point(class_scope:communicator_objects.UnityRLOutput)
- })
-_sym_db.RegisterMessage(UnityRLOutput)
-_sym_db.RegisterMessage(UnityRLOutput.ListAgentInfoProto)
-_sym_db.RegisterMessage(UnityRLOutput.AgentInfosEntry)
-
-
-DESCRIPTOR._options = None
-_UNITYRLOUTPUT_AGENTINFOSENTRY._options = None
-# @@protoc_insertion_point(module_scope)
diff --git a/animalai_bkp/communicator_objects/unity_rl_reset_input_pb2.py b/animalai_bkp/communicator_objects/unity_rl_reset_input_pb2.py
deleted file mode 100644
index e07a7286..00000000
--- a/animalai_bkp/communicator_objects/unity_rl_reset_input_pb2.py
+++ /dev/null
@@ -1,122 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: animalai/communicator_objects/unity_rl_reset_input.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-from animalai.communicator_objects import arena_parameters_proto_pb2 as animalai_dot_communicator__objects_dot_arena__parameters__proto__pb2
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='animalai/communicator_objects/unity_rl_reset_input.proto',
- package='communicator_objects',
- syntax='proto3',
- serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
- serialized_pb=_b('\n8animalai/communicator_objects/unity_rl_reset_input.proto\x12\x14\x63ommunicator_objects\x1a:animalai/communicator_objects/arena_parameters_proto.proto\"\xb3\x01\n\x11UnityRLResetInput\x12\x43\n\x06\x61renas\x18\x01 \x03(\x0b\x32\x33.communicator_objects.UnityRLResetInput.ArenasEntry\x1aY\n\x0b\x41renasEntry\x12\x0b\n\x03key\x18\x01 \x01(\x05\x12\x39\n\x05value\x18\x02 \x01(\x0b\x32*.communicator_objects.ArenaParametersProto:\x02\x38\x01\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
- ,
- dependencies=[animalai_dot_communicator__objects_dot_arena__parameters__proto__pb2.DESCRIPTOR,])
-
-
-
-
-_UNITYRLRESETINPUT_ARENASENTRY = _descriptor.Descriptor(
- name='ArenasEntry',
- full_name='communicator_objects.UnityRLResetInput.ArenasEntry',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='key', full_name='communicator_objects.UnityRLResetInput.ArenasEntry.key', index=0,
- number=1, type=5, cpp_type=1, label=1,
- has_default_value=False, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='value', full_name='communicator_objects.UnityRLResetInput.ArenasEntry.value', index=1,
- number=2, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- serialized_options=_b('8\001'),
- is_extendable=False,
- syntax='proto3',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=233,
- serialized_end=322,
-)
-
-_UNITYRLRESETINPUT = _descriptor.Descriptor(
- name='UnityRLResetInput',
- full_name='communicator_objects.UnityRLResetInput',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='arenas', full_name='communicator_objects.UnityRLResetInput.arenas', index=0,
- number=1, type=11, cpp_type=10, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[_UNITYRLRESETINPUT_ARENASENTRY, ],
- enum_types=[
- ],
- serialized_options=None,
- is_extendable=False,
- syntax='proto3',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=143,
- serialized_end=322,
-)
-
-_UNITYRLRESETINPUT_ARENASENTRY.fields_by_name['value'].message_type = animalai_dot_communicator__objects_dot_arena__parameters__proto__pb2._ARENAPARAMETERSPROTO
-_UNITYRLRESETINPUT_ARENASENTRY.containing_type = _UNITYRLRESETINPUT
-_UNITYRLRESETINPUT.fields_by_name['arenas'].message_type = _UNITYRLRESETINPUT_ARENASENTRY
-DESCRIPTOR.message_types_by_name['UnityRLResetInput'] = _UNITYRLRESETINPUT
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-UnityRLResetInput = _reflection.GeneratedProtocolMessageType('UnityRLResetInput', (_message.Message,), {
-
- 'ArenasEntry' : _reflection.GeneratedProtocolMessageType('ArenasEntry', (_message.Message,), {
- 'DESCRIPTOR' : _UNITYRLRESETINPUT_ARENASENTRY,
- '__module__' : 'animalai.communicator_objects.unity_rl_reset_input_pb2'
- # @@protoc_insertion_point(class_scope:communicator_objects.UnityRLResetInput.ArenasEntry)
- })
- ,
- 'DESCRIPTOR' : _UNITYRLRESETINPUT,
- '__module__' : 'animalai.communicator_objects.unity_rl_reset_input_pb2'
- # @@protoc_insertion_point(class_scope:communicator_objects.UnityRLResetInput)
- })
-_sym_db.RegisterMessage(UnityRLResetInput)
-_sym_db.RegisterMessage(UnityRLResetInput.ArenasEntry)
-
-
-DESCRIPTOR._options = None
-_UNITYRLRESETINPUT_ARENASENTRY._options = None
-# @@protoc_insertion_point(module_scope)
diff --git a/animalai_bkp/communicator_objects/unity_rl_reset_output_pb2.py b/animalai_bkp/communicator_objects/unity_rl_reset_output_pb2.py
deleted file mode 100644
index 4b5a24c2..00000000
--- a/animalai_bkp/communicator_objects/unity_rl_reset_output_pb2.py
+++ /dev/null
@@ -1,71 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: animalai/communicator_objects/unity_rl_reset_output.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='animalai/communicator_objects/unity_rl_reset_output.proto',
- package='communicator_objects',
- syntax='proto3',
- serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
- serialized_pb=_b('\n9animalai/communicator_objects/unity_rl_reset_output.proto\x12\x14\x63ommunicator_objects\"1\n\x12UnityRLResetOutput\x12\x1b\n\x13\x61renas_instanciated\x18\x01 \x03(\x08\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
-)
-
-
-
-
-_UNITYRLRESETOUTPUT = _descriptor.Descriptor(
- name='UnityRLResetOutput',
- full_name='communicator_objects.UnityRLResetOutput',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='arenas_instanciated', full_name='communicator_objects.UnityRLResetOutput.arenas_instanciated', index=0,
- number=1, type=8, cpp_type=7, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- serialized_options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- serialized_options=None,
- is_extendable=False,
- syntax='proto3',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=83,
- serialized_end=132,
-)
-
-DESCRIPTOR.message_types_by_name['UnityRLResetOutput'] = _UNITYRLRESETOUTPUT
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-UnityRLResetOutput = _reflection.GeneratedProtocolMessageType('UnityRLResetOutput', (_message.Message,), {
- 'DESCRIPTOR' : _UNITYRLRESETOUTPUT,
- '__module__' : 'animalai.communicator_objects.unity_rl_reset_output_pb2'
- # @@protoc_insertion_point(class_scope:communicator_objects.UnityRLResetOutput)
- })
-_sym_db.RegisterMessage(UnityRLResetOutput)
-
-
-DESCRIPTOR._options = None
-# @@protoc_insertion_point(module_scope)
diff --git a/animalai_bkp/communicator_objects/unity_to_external_pb2.py b/animalai_bkp/communicator_objects/unity_to_external_pb2.py
deleted file mode 100644
index 852b3813..00000000
--- a/animalai_bkp/communicator_objects/unity_to_external_pb2.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: animalai/communicator_objects/unity_to_external.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-from animalai.communicator_objects import unity_message_pb2 as animalai_dot_communicator__objects_dot_unity__message__pb2
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='animalai/communicator_objects/unity_to_external.proto',
- package='communicator_objects',
- syntax='proto3',
- serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
- serialized_pb=_b('\n5animalai/communicator_objects/unity_to_external.proto\x12\x14\x63ommunicator_objects\x1a\x31\x61nimalai/communicator_objects/unity_message.proto2g\n\x0fUnityToExternal\x12T\n\x08\x45xchange\x12\".communicator_objects.UnityMessage\x1a\".communicator_objects.UnityMessage\"\x00\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
- ,
- dependencies=[animalai_dot_communicator__objects_dot_unity__message__pb2.DESCRIPTOR,])
-
-
-
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-
-DESCRIPTOR._options = None
-
-_UNITYTOEXTERNAL = _descriptor.ServiceDescriptor(
- name='UnityToExternal',
- full_name='communicator_objects.UnityToExternal',
- file=DESCRIPTOR,
- index=0,
- serialized_options=None,
- serialized_start=130,
- serialized_end=233,
- methods=[
- _descriptor.MethodDescriptor(
- name='Exchange',
- full_name='communicator_objects.UnityToExternal.Exchange',
- index=0,
- containing_service=None,
- input_type=animalai_dot_communicator__objects_dot_unity__message__pb2._UNITYMESSAGE,
- output_type=animalai_dot_communicator__objects_dot_unity__message__pb2._UNITYMESSAGE,
- serialized_options=None,
- ),
-])
-_sym_db.RegisterServiceDescriptor(_UNITYTOEXTERNAL)
-
-DESCRIPTOR.services_by_name['UnityToExternal'] = _UNITYTOEXTERNAL
-
-# @@protoc_insertion_point(module_scope)
diff --git a/animalai_bkp/communicator_objects/unity_to_external_pb2_grpc.py b/animalai_bkp/communicator_objects/unity_to_external_pb2_grpc.py
deleted file mode 100644
index f1406fa1..00000000
--- a/animalai_bkp/communicator_objects/unity_to_external_pb2_grpc.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
-import grpc
-
-from animalai.communicator_objects import unity_message_pb2 as animalai_dot_communicator__objects_dot_unity__message__pb2
-
-
-class UnityToExternalStub(object):
- # missing associated documentation comment in .proto file
- pass
-
- def __init__(self, channel):
- """Constructor.
-
- Args:
- channel: A grpc.Channel.
- """
- self.Exchange = channel.unary_unary(
- '/communicator_objects.UnityToExternal/Exchange',
- request_serializer=animalai_dot_communicator__objects_dot_unity__message__pb2.UnityMessage.SerializeToString,
- response_deserializer=animalai_dot_communicator__objects_dot_unity__message__pb2.UnityMessage.FromString,
- )
-
-
-class UnityToExternalServicer(object):
- # missing associated documentation comment in .proto file
- pass
-
- def Exchange(self, request, context):
- """Sends the academy parameters
- """
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
-
-def add_UnityToExternalServicer_to_server(servicer, server):
- rpc_method_handlers = {
- 'Exchange': grpc.unary_unary_rpc_method_handler(
- servicer.Exchange,
- request_deserializer=animalai_dot_communicator__objects_dot_unity__message__pb2.UnityMessage.FromString,
- response_serializer=animalai_dot_communicator__objects_dot_unity__message__pb2.UnityMessage.SerializeToString,
- ),
- }
- generic_handler = grpc.method_handlers_generic_handler(
- 'communicator_objects.UnityToExternal', rpc_method_handlers)
- server.add_generic_rpc_handlers((generic_handler,))
diff --git a/animalai_bkp/envs/__init__.py b/animalai_bkp/envs/__init__.py
deleted file mode 100644
index 93960d2f..00000000
--- a/animalai_bkp/envs/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from .environment import *
-from .brain import *
-from .exception import *
diff --git a/animalai_bkp/envs/arena_config.py b/animalai_bkp/envs/arena_config.py
deleted file mode 100644
index a756c493..00000000
--- a/animalai_bkp/envs/arena_config.py
+++ /dev/null
@@ -1,115 +0,0 @@
-import json
-import jsonpickle
-import yaml
-import copy
-
-from animalai.communicator_objects import UnityRLResetInput, ArenaParametersProto
-
-yaml.Dumper.ignore_aliases = lambda *args: True
-
-
-class Vector3(yaml.YAMLObject):
- yaml_tag = u'!Vector3'
-
- def __init__(self, x=0, y=0, z=0):
- self.x = x
- self.y = y
- self.z = z
-
- def to_proto(self):
- res = ArenaParametersProto.ItemsToSpawn.Vector3()
- res.x = self.x
- res.y = self.y
- res.z = self.z
-
- return res
-
-
-class RGB(yaml.YAMLObject):
- yaml_tag = u'!RGB'
-
- def __init__(self, r=0, g=0, b=0):
- self.r = r
- self.g = g
- self.b = b
-
- def to_proto(self):
- res = ArenaParametersProto.ItemsToSpawn.Vector3()
- res.x = self.r
- res.y = self.g
- res.z = self.b
-
- return res
-
-
-class Item(yaml.YAMLObject):
- yaml_tag = u'!Item'
-
- def __init__(self, name='', positions=None, rotations=None, sizes=None, colors=None):
- self.name = name
- self.positions = positions if positions is not None else []
- self.rotations = rotations if rotations is not None else []
- self.sizes = sizes if sizes is not None else []
- self.colors = colors if colors is not None else []
-
-
-class Arena(yaml.YAMLObject):
- yaml_tag = u'!Arena'
-
- def __init__(self, t=1000, items=None, blackouts=None):
- self.t = t
- self.items = items if items is not None else {}
- self.blackouts = blackouts if blackouts is not None else []
-
-
-class ArenaConfig(yaml.YAMLObject):
- yaml_tag = u'!ArenaConfig'
-
- def __init__(self, yaml_path=None):
-
- if yaml_path is not None:
- self.arenas = yaml.load(open(yaml_path, 'r'), Loader=yaml.Loader).arenas
- else:
- self.arenas = {}
-
- def save_config(self, json_path):
- out = jsonpickle.encode(self.arenas)
- out = json.loads(out)
- json.dump(out, open(json_path, 'w'), indent=4)
-
- def dict_to_arena_config(self) -> UnityRLResetInput:
- config_out = UnityRLResetInput()
-
- for k in self.arenas:
- config_out.arenas[k].CopyFrom(ArenaParametersProto())
- config_out.arenas[k].t = self.arenas[k].t
- config_out.arenas[k].blackouts.extend(self.arenas[k].blackouts)
- for item in self.arenas[k].items:
- to_spawn = config_out.arenas[k].items.add()
- to_spawn.name = item.name
- to_spawn.positions.extend([v.to_proto() for v in item.positions])
- to_spawn.rotations.extend(item.rotations)
- to_spawn.sizes.extend([v.to_proto() for v in item.sizes])
- to_spawn.colors.extend([v.to_proto() for v in item.colors])
-
- return config_out
-
- def update(self, arenas_configurations):
-
- if arenas_configurations is not None:
- for arena_i in arenas_configurations.arenas:
- self.arenas[arena_i] = copy.copy(arenas_configurations.arenas[arena_i])
-
-
-def constructor_arena(loader, node):
- fields = loader.construct_mapping(node)
- return Arena(**fields)
-
-
-def constructor_item(loader, node):
- fields = loader.construct_mapping(node)
- return Item(**fields)
-
-
-yaml.add_constructor(u'!Arena', constructor_arena)
-yaml.add_constructor(u'!Item', constructor_item)
diff --git a/animalai_bkp/envs/brain.py b/animalai_bkp/envs/brain.py
deleted file mode 100644
index 06940fd2..00000000
--- a/animalai_bkp/envs/brain.py
+++ /dev/null
@@ -1,149 +0,0 @@
-import logging
-import numpy as np
-import io
-
-from typing import Dict
-from PIL import Image
-
-logger = logging.getLogger("mlagents.envs")
-
-
-class BrainInfo:
- def __init__(self, visual_observation, vector_observation, text_observations, memory=None,
- reward=None, agents=None, local_done=None,
- vector_action=None, text_action=None, max_reached=None, action_mask=None):
- """
- Describes experience at current step of all agents linked to a brain.
- """
- self.visual_observations = visual_observation
- self.vector_observations = vector_observation
- self.text_observations = text_observations
- self.memories = memory
- self.rewards = reward
- self.local_done = local_done
- self.max_reached = max_reached
- self.agents = agents
- self.previous_vector_actions = vector_action
- self.previous_text_actions = text_action
- self.action_masks = action_mask
-
- @staticmethod
- def process_pixels(image_bytes, gray_scale):
- """
- Converts byte array observation image into numpy array, re-sizes it,
- and optionally converts it to grey scale
- :param gray_scale: Whether to convert the image to grayscale.
- :param image_bytes: input byte array corresponding to image
- :return: processed numpy array of observation from environment
- """
- s = bytearray(image_bytes)
- image = Image.open(io.BytesIO(s))
- s = np.array(image) / 255.0
- if gray_scale:
- s = np.mean(s, axis=2)
- s = np.reshape(s, [s.shape[0], s.shape[1], 1])
- return s
-
- @staticmethod
- def from_agent_proto(agent_info_list, brain_params):
- """
- Converts list of agent infos to BrainInfo.
- """
- vis_obs = []
- for i in range(brain_params.number_visual_observations):
- obs = [BrainInfo.process_pixels(x.visual_observations[i],
- brain_params.camera_resolutions[i]['blackAndWhite'])
- for x in agent_info_list]
- vis_obs += [np.array(obs)]
- if len(agent_info_list) == 0:
- memory_size = 0
- else:
- memory_size = max([len(x.memories) for x in agent_info_list])
- if memory_size == 0:
- memory = np.zeros((0, 0))
- else:
- [x.memories.extend([0] * (memory_size - len(x.memories))) for x in agent_info_list]
- memory = np.array([x.memories for x in agent_info_list])
- total_num_actions = sum(brain_params.vector_action_space_size)
- mask_actions = np.ones((len(agent_info_list), total_num_actions))
- for agent_index, agent_info in enumerate(agent_info_list):
- if agent_info.action_mask is not None:
- if len(agent_info.action_mask) == total_num_actions:
- mask_actions[agent_index, :] = [
- 0 if agent_info.action_mask[k] else 1 for k in range(total_num_actions)]
- if any([np.isnan(x.reward) for x in agent_info_list]):
- logger.warning("An agent had a NaN reward for brain " + brain_params.brain_name)
- if any([np.isnan(x.stacked_vector_observation).any() for x in agent_info_list]):
- logger.warning("An agent had a NaN observation for brain " + brain_params.brain_name)
- brain_info = BrainInfo(
- visual_observation=vis_obs,
- vector_observation=np.nan_to_num(
- np.array([x.stacked_vector_observation for x in agent_info_list])),
- text_observations=[x.text_observation for x in agent_info_list],
- memory=memory,
- reward=[x.reward if not np.isnan(x.reward) else 0 for x in agent_info_list],
- agents=[x.id for x in agent_info_list],
- local_done=[x.done for x in agent_info_list],
- vector_action=np.array([x.stored_vector_actions for x in agent_info_list]),
- text_action=[x.stored_text_actions for x in agent_info_list],
- max_reached=[x.max_step_reached for x in agent_info_list],
- action_mask=mask_actions
- )
- return brain_info
-
-
-# Renaming of dictionary of brain name to BrainInfo for clarity
-AllBrainInfo = Dict[str, BrainInfo]
-
-
-class BrainParameters:
- def __init__(self, brain_name, vector_observation_space_size, num_stacked_vector_observations,
- camera_resolutions, vector_action_space_size,
- vector_action_descriptions, vector_action_space_type):
- """
- Contains all brain-specific parameters.
- """
- self.brain_name = brain_name
- self.vector_observation_space_size = vector_observation_space_size
- self.num_stacked_vector_observations = num_stacked_vector_observations
- self.number_visual_observations = len(camera_resolutions)
- self.camera_resolutions = camera_resolutions
- self.vector_action_space_size = vector_action_space_size
- self.vector_action_descriptions = vector_action_descriptions
- self.vector_action_space_type = ["discrete", "continuous"][vector_action_space_type]
-
- def __str__(self):
- return '''Unity brain name: {}
- Number of Visual Observations (per agent): {}
- Vector Observation space size (per agent): {}
- Number of stacked Vector Observation: {}
- Vector Action space type: {}
- Vector Action space size (per agent): {}
- Vector Action descriptions: {}'''.format(self.brain_name,
- str(self.number_visual_observations),
- str(self.vector_observation_space_size),
- str(self.num_stacked_vector_observations),
- self.vector_action_space_type,
- str(self.vector_action_space_size),
- ', '.join(self.vector_action_descriptions))
-
- @staticmethod
- def from_proto(brain_param_proto):
- """
- Converts brain parameter proto to BrainParameter object.
- :param brain_param_proto: protobuf object.
- :return: BrainParameter object.
- """
- resolution = [{
- "height": x.height,
- "width": x.width,
- "blackAndWhite": x.gray_scale
- } for x in brain_param_proto.camera_resolutions]
- brain_params = BrainParameters(brain_param_proto.brain_name,
- brain_param_proto.vector_observation_size,
- brain_param_proto.num_stacked_vector_observations,
- resolution,
- brain_param_proto.vector_action_size,
- brain_param_proto.vector_action_descriptions,
- brain_param_proto.vector_action_space_type)
- return brain_params
diff --git a/animalai_bkp/envs/communicator.py b/animalai_bkp/envs/communicator.py
deleted file mode 100644
index 85b56db1..00000000
--- a/animalai_bkp/envs/communicator.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import logging
-
-from animalai.communicator_objects import UnityOutput, UnityInput
-
-logger = logging.getLogger("mlagents.envs")
-
-
-class Communicator(object):
- def __init__(self, worker_id=0, base_port=5005):
- """
- Python side of the communication. Must be used in pair with the right Unity Communicator equivalent.
-
- :int base_port: Baseline port number to connect to Unity environment over. worker_id increments over this.
- :int worker_id: Number to add to communication port (5005) [0]. Used for asynchronous agent scenarios.
- """
-
- def initialize(self, inputs: UnityInput) -> UnityOutput:
- """
- Used to exchange initialization parameters between Python and the Environment
- :param inputs: The initialization input that will be sent to the environment.
- :return: UnityOutput: The initialization output sent by Unity
- """
-
- def exchange(self, inputs: UnityInput) -> UnityOutput:
- """
- Used to send an input and receive an output from the Environment
- :param inputs: The UnityInput that needs to be sent the Environment
- :return: The UnityOutputs generated by the Environment
- """
-
- def close(self):
- """
- Sends a shutdown signal to the unity environment, and closes the connection.
- """
-
diff --git a/animalai_bkp/envs/environment.py b/animalai_bkp/envs/environment.py
deleted file mode 100644
index 7397b9c8..00000000
--- a/animalai_bkp/envs/environment.py
+++ /dev/null
@@ -1,491 +0,0 @@
-import atexit
-import glob
-import logging
-import numpy as np
-import os
-import subprocess
-
-from .brain import AllBrainInfo, BrainInfo, BrainParameters
-from .exception import UnityEnvironmentException, UnityActionException, UnityTimeOutException
-
-from animalai.communicator_objects import UnityRLInput, UnityRLOutput, AgentActionProto, \
- UnityRLInitializationInput, UnityRLInitializationOutput, \
- UnityRLResetInput, UnityInput, UnityOutput
-
-from .rpc_communicator import RpcCommunicator
-from sys import platform
-from .arena_config import ArenaConfig
-
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger("mlagents.envs")
-
-
-class UnityEnvironment(object):
- SCALAR_ACTION_TYPES = (int, np.int32, np.int64, float, np.float32, np.float64)
- SINGLE_BRAIN_ACTION_TYPES = SCALAR_ACTION_TYPES + (list, np.ndarray)
- SINGLE_BRAIN_TEXT_TYPES = (str, list, np.ndarray)
-
- def __init__(self, file_name=None,
- worker_id=0,
- base_port=5005,
- seed=0,
- docker_training=False,
- n_arenas=1,
- play=False,
- arenas_configurations=None):
- """
- Starts a new unity environment and establishes a connection with the environment.
- Notice: Currently communication between Unity and Python takes place over an open socket without authentication.
- Ensure that the network where training takes place is secure.
-
- :string file_name: Name of Unity environment binary.
- :int base_port: Baseline port number to connect to Unity environment over. worker_id increments over this.
- :int worker_id: Number to add to communication port (5005) [0]. Used for asynchronous agent scenarios.
- :param docker_training: Informs this class whether the process is being run within a container.
- """
-
- atexit.register(self._close)
- self.n_arenas = n_arenas
- self.play = play
- self.port = base_port + worker_id
- self._buffer_size = 12000
- self._version_ = "API-7"
- self._loaded = False # If true, this means the environment was successfully loaded
- self.proc1 = None # The process that is started. If None, no process was started
- self.communicator = self.get_communicator(worker_id, base_port)
- self.arenas_configurations = arenas_configurations if arenas_configurations is not None else ArenaConfig()
-
- if file_name is not None:
- self.executable_launcher(file_name, docker_training)
- else:
- logger.info("Start training by pressing the Play button in the Unity Editor.")
- self._loaded = True
-
- rl_init_parameters_in = UnityRLInitializationInput(
- seed=seed
- )
- try:
- aca_params = self.send_academy_parameters(rl_init_parameters_in)
- except UnityTimeOutException:
- self._close()
- raise
- # TODO : think of a better way to expose the academyParameters
- self._unity_version = aca_params.version
- if self._unity_version != self._version_:
- raise UnityEnvironmentException(
- "The API number is not compatible between Unity and python. Python API : {0}, Unity API : "
- "{1}.\nPlease go to https://github.com/Unity-Technologies/ml-agents to download the latest version "
- "of ML-Agents.".format(self._version_, self._unity_version))
- self._n_agents = {}
- self._global_done = None
- self._academy_name = aca_params.name
- self._log_path = aca_params.log_path
- self._brains = {}
- self._brain_names = []
- self._external_brain_names = []
- for brain_param in aca_params.brain_parameters:
- self._brain_names += [brain_param.brain_name]
- self._brains[brain_param.brain_name] = BrainParameters.from_proto(brain_param)
- if brain_param.is_training:
- self._external_brain_names += [brain_param.brain_name]
- self._num_brains = len(self._brain_names)
- self._num_external_brains = len(self._external_brain_names)
- logger.info("\n'{0}' started successfully!\n{1}".format(self._academy_name, str(self)))
- if self._num_external_brains == 0:
- logger.warning(" No Learning Brains set to train found in the Unity Environment. "
- "You will not be able to pass actions to your agent(s).")
-
- @property
- def logfile_path(self):
- return self._log_path
-
- @property
- def brains(self):
- return self._brains
-
- @property
- def global_done(self):
- return self._global_done
-
- @property
- def academy_name(self):
- return self._academy_name
-
- @property
- def number_brains(self):
- return self._num_brains
-
- @property
- def number_external_brains(self):
- return self._num_external_brains
-
- @property
- def brain_names(self):
- return self._brain_names
-
- @property
- def external_brain_names(self):
- return self._external_brain_names
-
- def executable_launcher(self, file_name, docker_training):
- cwd = os.getcwd()
- file_name = (file_name.strip()
- .replace('.app', '').replace('.exe', '').replace('.x86_64', '').replace('.x86',
- ''))
- true_filename = os.path.basename(os.path.normpath(file_name))
- logger.debug('The true file name is {}'.format(true_filename))
- launch_string = None
- if platform == "linux" or platform == "linux2":
- candidates = glob.glob(os.path.join(cwd, file_name) + '.x86_64')
- if len(candidates) == 0:
- candidates = glob.glob(os.path.join(cwd, file_name) + '.x86')
- if len(candidates) == 0:
- candidates = glob.glob(file_name + '.x86_64')
- if len(candidates) == 0:
- candidates = glob.glob(file_name + '.x86')
- if len(candidates) > 0:
- launch_string = candidates[0]
-
- elif platform == 'darwin':
- candidates = glob.glob(
- os.path.join(cwd, file_name + '.app', 'Contents', 'MacOS', true_filename))
- if len(candidates) == 0:
- candidates = glob.glob(
- os.path.join(file_name + '.app', 'Contents', 'MacOS', true_filename))
- if len(candidates) == 0:
- candidates = glob.glob(
- os.path.join(cwd, file_name + '.app', 'Contents', 'MacOS', '*'))
- if len(candidates) == 0:
- candidates = glob.glob(os.path.join(file_name + '.app', 'Contents', 'MacOS', '*'))
- if len(candidates) > 0:
- launch_string = candidates[0]
- elif platform == 'win32':
- candidates = glob.glob(os.path.join(cwd, file_name + '.exe'))
- if len(candidates) == 0:
- candidates = glob.glob(file_name + '.exe')
- if len(candidates) > 0:
- launch_string = candidates[0]
- if launch_string is None:
- self._close()
- raise UnityEnvironmentException("Couldn't launch the {0} environment. "
- "Provided filename does not match any environments."
- .format(true_filename))
- else:
- logger.debug("This is the launch string {}".format(launch_string))
- # Launch Unity environment
- if not docker_training:
- if not self.play:
- self.proc1 = subprocess.Popen(
- [launch_string, '--port', str(self.port), '--nArenas', str(self.n_arenas)])
- else:
- self.proc1 = subprocess.Popen(
- [launch_string, '--port', str(self.port)])
-
- else:
- """
- Comments for future maintenance:
- xvfb-run is a wrapper around Xvfb, a virtual xserver where all
- rendering is done to virtual memory. It automatically creates a
- new virtual server automatically picking a server number `auto-servernum`.
- The server is passed the arguments using `server-args`, we are telling
- Xvfb to create Screen number 0 with width 640, height 480 and depth 24 bits.
- Note that 640 X 480 are the default width and height. The main reason for
- us to add this is because we'd like to change the depth from the default
- of 8 bits to 24.
- Unfortunately, this means that we will need to pass the arguments through
- a shell which is why we set `shell=True`. Now, this adds its own
- complications. E.g SIGINT can bounce off the shell and not get propagated
- to the child processes. This is why we add `exec`, so that the shell gets
- launched, the arguments are passed to `xvfb-run`. `exec` replaces the shell
- we created with `xvfb`.
- """
- docker_ls = ("exec xvfb-run --auto-servernum"
- " --server-args='-screen 0 640x480x24'"
- " {0} --port {1} --nArenas {2}").format(launch_string, str(self.port), str(self.n_arenas))
- self.proc1 = subprocess.Popen(docker_ls,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE,
- shell=True)
-
- def get_communicator(self, worker_id, base_port):
- return RpcCommunicator(worker_id, base_port)
- # return SocketCommunicator(worker_id, base_port)
-
- def __str__(self):
- return '''Unity Academy name: {0}
- Number of Brains: {1}
- Number of Training Brains : {2}'''.format(self._academy_name, str(self._num_brains),
- str(self._num_external_brains))
-
- def reset(self, arenas_configurations=None, train_mode=True) -> AllBrainInfo:
- """
- Sends a signal to reset the unity environment.
- :return: AllBrainInfo : A data structure corresponding to the initial reset state of the environment.
- """
- if self._loaded:
- self.arenas_configurations.update(arenas_configurations)
-
- outputs = self.communicator.exchange(
- self._generate_reset_input(train_mode, arenas_configurations)
- )
- if outputs is None:
- raise KeyboardInterrupt
- rl_output = outputs.rl_output
- s = self._get_state(rl_output)
- self._global_done = s[1]
- for _b in self._external_brain_names:
- self._n_agents[_b] = len(s[0][_b].agents)
- return s[0]
- else:
- raise UnityEnvironmentException("No Unity environment is loaded.")
-
- def step(self, vector_action=None, memory=None, text_action=None, value=None, step_number=0) -> AllBrainInfo:
- """
- Provides the environment with an action, moves the environment dynamics forward accordingly,
- and returns observation, state, and reward information to the agent.
- :param value: Value estimates provided by agents.
- :param vector_action: Agent's vector action. Can be a scalar or vector of int/floats.
- :param memory: Vector corresponding to memory used for recurrent policies.
- :param text_action: Text action to send to environment for.
- :return: AllBrainInfo : A Data structure corresponding to the new state of the environment.
- """
- vector_action = {} if vector_action is None else vector_action
- memory = {} if memory is None else memory
- text_action = {} if text_action is None else text_action
- value = {} if value is None else value
-
- # Check that environment is loaded, and episode is currently running.
- if self._loaded and not self._global_done and self._global_done is not None:
- if isinstance(vector_action, self.SINGLE_BRAIN_ACTION_TYPES):
- if self._num_external_brains == 1:
- vector_action = {self._external_brain_names[0]: vector_action}
- elif self._num_external_brains > 1:
- raise UnityActionException(
- "You have {0} brains, you need to feed a dictionary of brain names a keys, "
- "and vector_actions as values".format(self._num_brains))
- else:
- raise UnityActionException(
- "There are no external brains in the environment, "
- "step cannot take a vector_action input")
-
- if isinstance(memory, self.SINGLE_BRAIN_ACTION_TYPES):
- if self._num_external_brains == 1:
- memory = {self._external_brain_names[0]: memory}
- elif self._num_external_brains > 1:
- raise UnityActionException(
- "You have {0} brains, you need to feed a dictionary of brain names as keys "
- "and memories as values".format(self._num_brains))
- else:
- raise UnityActionException(
- "There are no external brains in the environment, "
- "step cannot take a memory input")
-
- if isinstance(text_action, self.SINGLE_BRAIN_TEXT_TYPES):
- if self._num_external_brains == 1:
- text_action = {self._external_brain_names[0]: text_action}
- elif self._num_external_brains > 1:
- raise UnityActionException(
- "You have {0} brains, you need to feed a dictionary of brain names as keys "
- "and text_actions as values".format(self._num_brains))
- else:
- raise UnityActionException(
- "There are no external brains in the environment, "
- "step cannot take a value input")
-
- if isinstance(value, self.SINGLE_BRAIN_ACTION_TYPES):
- if self._num_external_brains == 1:
- value = {self._external_brain_names[0]: value}
- elif self._num_external_brains > 1:
- raise UnityActionException(
- "You have {0} brains, you need to feed a dictionary of brain names as keys "
- "and state/action value estimates as values".format(self._num_brains))
- else:
- raise UnityActionException(
- "There are no external brains in the environment, "
- "step cannot take a value input")
-
- for brain_name in list(vector_action.keys()) + list(memory.keys()) + list(
- text_action.keys()):
- if brain_name not in self._external_brain_names:
- raise UnityActionException(
- "The name {0} does not correspond to an external brain "
- "in the environment".format(brain_name))
-
- for brain_name in self._external_brain_names:
- n_agent = self._n_agents[brain_name]
- if brain_name not in vector_action:
- if self._brains[brain_name].vector_action_space_type == "discrete":
- vector_action[brain_name] = [0.0] * n_agent * len(
- self._brains[brain_name].vector_action_space_size)
- else:
- vector_action[brain_name] = [0.0] * n_agent * \
- self._brains[
- brain_name].vector_action_space_size[0]
- else:
- vector_action[brain_name] = self._flatten(vector_action[brain_name])
- if brain_name not in memory:
- memory[brain_name] = []
- else:
- if memory[brain_name] is None:
- memory[brain_name] = []
- else:
- memory[brain_name] = self._flatten(memory[brain_name])
- if brain_name not in text_action:
- text_action[brain_name] = [""] * n_agent
- else:
- if text_action[brain_name] is None:
- text_action[brain_name] = [""] * n_agent
- if isinstance(text_action[brain_name], str):
- text_action[brain_name] = [text_action[brain_name]] * n_agent
-
- number_text_actions = len(text_action[brain_name])
- if not ((number_text_actions == n_agent) or number_text_actions == 0):
- raise UnityActionException(
- "There was a mismatch between the provided text_action and "
- "the environment's expectation: "
- "The brain {0} expected {1} text_action but was given {2}".format(
- brain_name, n_agent, number_text_actions))
-
- discrete_check = self._brains[brain_name].vector_action_space_type == "discrete"
-
- expected_discrete_size = n_agent * len(
- self._brains[brain_name].vector_action_space_size)
-
- continuous_check = self._brains[brain_name].vector_action_space_type == "continuous"
-
- expected_continuous_size = self._brains[brain_name].vector_action_space_size[
- 0] * n_agent
-
- if not ((discrete_check and len(
- vector_action[brain_name]) == expected_discrete_size) or
- (continuous_check and len(
- vector_action[brain_name]) == expected_continuous_size)):
- raise UnityActionException(
- "There was a mismatch between the provided action and "
- "the environment's expectation: "
- "The brain {0} expected {1} {2} action(s), but was provided: {3}"
- .format(brain_name, str(expected_discrete_size)
- if discrete_check
- else str(expected_continuous_size),
- self._brains[brain_name].vector_action_space_type,
- str(vector_action[brain_name])))
-
- outputs = self.communicator.exchange(
- self._generate_step_input(vector_action, memory, text_action, value))
- if outputs is None:
- raise KeyboardInterrupt
- rl_output = outputs.rl_output
- state = self._get_state(rl_output)
- self._global_done = state[1]
- for _b in self._external_brain_names:
- self._n_agents[_b] = len(state[0][_b].agents)
- return state[0]
- elif not self._loaded:
- raise UnityEnvironmentException("No Unity environment is loaded.")
- elif self._global_done:
- raise UnityActionException(
- "The episode is completed. Reset the environment with 'reset()'")
- elif self.global_done is None:
- raise UnityActionException(
- "You cannot conduct step without first calling reset. "
- "Reset the environment with 'reset()'")
-
- def close(self):
- """
- Sends a shutdown signal to the unity environment, and closes the socket connection.
- """
- if self._loaded:
- self._close()
- else:
- raise UnityEnvironmentException("No Unity environment is loaded.")
-
- def _close(self):
- self._loaded = False
- self.communicator.close()
- if self.proc1 is not None:
- self.proc1.kill()
-
- @classmethod
- def _flatten(cls, arr):
- """
- Converts arrays to list.
- :param arr: numpy vector.
- :return: flattened list.
- """
- if isinstance(arr, cls.SCALAR_ACTION_TYPES):
- arr = [float(arr)]
- if isinstance(arr, np.ndarray):
- arr = arr.tolist()
- if len(arr) == 0:
- return arr
- if isinstance(arr[0], np.ndarray):
- arr = [item for sublist in arr for item in sublist.tolist()]
- if isinstance(arr[0], list):
- arr = [item for sublist in arr for item in sublist]
- arr = [float(x) for x in arr]
- return arr
-
- def _get_state(self, output: UnityRLOutput) -> (AllBrainInfo, bool):
- """
- Collects experience information from all external brains in environment at current step.
- :return: a dictionary of BrainInfo objects.
- """
- _data = {}
- global_done = output.global_done
- for brain_name in output.agentInfos:
- agent_info_list = output.agentInfos[brain_name].value
- _data[brain_name] = BrainInfo.from_agent_proto(agent_info_list,
- self.brains[brain_name])
- return _data, global_done
-
- def _generate_step_input(self, vector_action, memory, text_action, value) -> UnityRLInput:
- rl_in = UnityRLInput()
- for b in vector_action:
- n_agents = self._n_agents[b]
- if n_agents == 0:
- continue
- _a_s = len(vector_action[b]) // n_agents
- _m_s = len(memory[b]) // n_agents
- for i in range(n_agents):
- action = AgentActionProto(
- vector_actions=vector_action[b][i * _a_s: (i + 1) * _a_s],
- memories=memory[b][i * _m_s: (i + 1) * _m_s],
- text_actions=text_action[b][i],
- )
- if b in value:
- if value[b] is not None:
- action.value = float(value[b][i])
- rl_in.agent_actions[b].value.extend([action])
- rl_in.command = 0
- return self.wrap_unity_input(rl_in)
-
- def _generate_reset_input(self, training, config: ArenaConfig) -> UnityRLInput:
- rl_in = UnityRLInput()
- rl_in.is_training = training
- rl_in.command = 1
- rl_reset = UnityRLResetInput()
- if (config is not None):
- rl_reset.CopyFrom(config.dict_to_arena_config())
- result = UnityInput()
- result.rl_input.CopyFrom(rl_in)
- result.rl_reset_input.CopyFrom(rl_reset)
- return result
-
- # return self.wrap_unity_input(rl_in)
-
- def send_academy_parameters(self,
- init_parameters: UnityRLInitializationInput) -> UnityRLInitializationOutput:
- inputs = UnityInput()
- inputs.rl_initialization_input.CopyFrom(init_parameters)
- return self.communicator.initialize(inputs).rl_initialization_output
-
- def wrap_unity_input(self, rl_input: UnityRLInput) -> UnityOutput:
- result = UnityInput()
- result.rl_input.CopyFrom(rl_input)
- return result
-
- # def send_update_arena_parameters(self, arena_parameters : ArenaConfigInput) -> None:
- #
- # # TODO: add return status ==> create new proto for ArenaParametersOutput
- #
- # self.communicator.exchange_arena_update(arena_parameters)
diff --git a/animalai_bkp/envs/exception.py b/animalai_bkp/envs/exception.py
deleted file mode 100644
index edf16ff4..00000000
--- a/animalai_bkp/envs/exception.py
+++ /dev/null
@@ -1,63 +0,0 @@
-import logging
-logger = logging.getLogger("mlagents.envs")
-
-class UnityException(Exception):
- """
- Any error related to ml-agents environment.
- """
- pass
-
-class UnityEnvironmentException(UnityException):
- """
- Related to errors starting and closing environment.
- """
- pass
-
-
-class UnityActionException(UnityException):
- """
- Related to errors with sending actions.
- """
- pass
-
-class UnityTimeOutException(UnityException):
- """
- Related to errors with communication timeouts.
- """
- def __init__(self, message, log_file_path = None):
- if log_file_path is not None:
- try:
- with open(log_file_path, "r") as f:
- printing = False
- unity_error = '\n'
- for l in f:
- l=l.strip()
- if (l == 'Exception') or (l=='Error'):
- printing = True
- unity_error += '----------------------\n'
- if (l == ''):
- printing = False
- if printing:
- unity_error += l + '\n'
- logger.info(unity_error)
- logger.error("An error might have occured in the environment. "
- "You can check the logfile for more information at {}".format(log_file_path))
- except:
- logger.error("An error might have occured in the environment. "
- "No UnitySDK.log file could be found.")
- super(UnityTimeOutException, self).__init__(message)
-
-
-class UnityWorkerInUseException(UnityException):
- """
- This error occurs when the port for a certain worker ID is already reserved.
- """
-
- MESSAGE_TEMPLATE = (
- "Couldn't start socket communication because worker number {} is still in use. "
- "You may need to manually close a previously opened environment "
- "or use a different worker number.")
-
- def __init__(self, worker_id):
- message = self.MESSAGE_TEMPLATE.format(str(worker_id))
- super(UnityWorkerInUseException, self).__init__(message)
diff --git a/animalai_bkp/envs/rpc_communicator.py b/animalai_bkp/envs/rpc_communicator.py
deleted file mode 100644
index aa082305..00000000
--- a/animalai_bkp/envs/rpc_communicator.py
+++ /dev/null
@@ -1,115 +0,0 @@
-import logging
-import grpc
-
-import socket
-from multiprocessing import Pipe
-from concurrent.futures import ThreadPoolExecutor
-
-from .communicator import Communicator
-from animalai.communicator_objects import UnityToExternalServicer, add_UnityToExternalServicer_to_server
-from animalai.communicator_objects import UnityMessage, UnityInput, UnityOutput #, ArenaConfigInput
-from .exception import UnityTimeOutException, UnityWorkerInUseException
-
-logger = logging.getLogger("mlagents.envs")
-
-
-class UnityToExternalServicerImplementation(UnityToExternalServicer):
- def __init__(self):
- self.parent_conn, self.child_conn = Pipe()
-
- def Initialize(self, request, context):
- self.child_conn.send(request)
- return self.child_conn.recv()
-
- def Exchange(self, request, context):
- self.child_conn.send(request)
- return self.child_conn.recv()
-
-
-class RpcCommunicator(Communicator):
- def __init__(self, worker_id=0, base_port=5005):
- """
- Python side of the grpc communication. Python is the server and Unity the client
-
-
- :int base_port: Baseline port number to connect to Unity environment over. worker_id increments over this.
- :int worker_id: Number to add to communication port (5005) [0]. Used for asynchronous agent scenarios.
- """
- self.port = base_port + worker_id
- self.worker_id = worker_id
- self.server = None
- self.unity_to_external = None
- self.is_open = False
- self.create_server()
-
- def create_server(self):
- """
- Creates the GRPC server.
- """
- self.check_port(self.port)
-
- try:
- # Establish communication grpc
- self.server = grpc.server(ThreadPoolExecutor(max_workers=10))
- self.unity_to_external = UnityToExternalServicerImplementation()
- add_UnityToExternalServicer_to_server(self.unity_to_external, self.server)
- # Using unspecified address, which means that grpc is communicating on all IPs
- # This is so that the docker container can connect.
- self.server.add_insecure_port('[::]:' + str(self.port))
- self.server.start()
- self.is_open = True
- except:
- raise UnityWorkerInUseException(self.worker_id)
-
- def check_port(self, port):
- """
- Attempts to bind to the requested communicator port, checking if it is already in use.
- """
- s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
- try:
- s.bind(("localhost", port))
- except socket.error:
- raise UnityWorkerInUseException(self.worker_id)
- finally:
- s.close()
-
- def initialize(self, inputs: UnityInput) -> UnityOutput:
- if not self.unity_to_external.parent_conn.poll(3000):
- raise UnityTimeOutException(
- "The Unity environment took too long to respond. Make sure that :\n"
- "\t The environment does not need user interaction to launch\n"
- "\t The Academy's Broadcast Hub is configured correctly\n"
- "\t The Agents are linked to the appropriate Brains\n"
- "\t The environment and the Python interface have compatible versions.")
- aca_param = self.unity_to_external.parent_conn.recv().unity_output
- message = UnityMessage()
- message.header.status = 200
- message.unity_input.CopyFrom(inputs)
- self.unity_to_external.parent_conn.send(message)
- self.unity_to_external.parent_conn.recv()
- return aca_param
-
- def exchange(self, inputs: UnityInput) -> UnityOutput:
- message = UnityMessage()
- message.header.status = 200
- message.unity_input.CopyFrom(inputs)
- self.unity_to_external.parent_conn.send(message)
- output = self.unity_to_external.parent_conn.recv()
- if output.header.status != 200:
- return None
- return output.unity_output
-
- def close(self):
- """
- Sends a shutdown signal to the unity environment, and closes the grpc connection.
- """
- if self.is_open:
- message_input = UnityMessage()
- message_input.header.status = 400
- self.unity_to_external.parent_conn.send(message_input)
- self.unity_to_external.parent_conn.close()
- self.server.stop(False)
- self.is_open = False
-
- # def exchange_arena_update(self, inputs: ArenaConfigInput) -> None:
- # self.unity_to_external.parent_conn.send(inputs)
diff --git a/animalai_bkp/envs/socket_communicator.py b/animalai_bkp/envs/socket_communicator.py
deleted file mode 100644
index c600e938..00000000
--- a/animalai_bkp/envs/socket_communicator.py
+++ /dev/null
@@ -1,98 +0,0 @@
-import logging
-import socket
-import struct
-
-from .communicator import Communicator
-from animalai.communicator_objects import UnityMessage, UnityOutput, UnityInput
-from .exception import UnityTimeOutException
-
-
-logger = logging.getLogger("mlagents.envs")
-
-
-class SocketCommunicator(Communicator):
- def __init__(self, worker_id=0,
- base_port=5005):
- """
- Python side of the socket communication
-
- :int base_port: Baseline port number to connect to Unity environment over. worker_id increments over this.
- :int worker_id: Number to add to communication port (5005) [0]. Used for asynchronous agent scenarios.
- """
-
- self.port = base_port + worker_id
- self._buffer_size = 12000
- self.worker_id = worker_id
- self._socket = None
- self._conn = None
-
- def initialize(self, inputs: UnityInput) -> UnityOutput:
- try:
- # Establish communication socket
- self._socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
- self._socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
- self._socket.bind(("localhost", self.port))
- except:
- raise UnityTimeOutException("Couldn't start socket communication because worker number {} is still in use. "
- "You may need to manually close a previously opened environment "
- "or use a different worker number.".format(str(self.worker_id)))
- try:
- self._socket.settimeout(30)
- self._socket.listen(1)
- self._conn, _ = self._socket.accept()
- self._conn.settimeout(30)
- except :
- raise UnityTimeOutException(
- "The Unity environment took too long to respond. Make sure that :\n"
- "\t The environment does not need user interaction to launch\n"
- "\t The Academy's Broadcast Hub is configured correctly\n"
- "\t The Agents are linked to the appropriate Brains\n"
- "\t The environment and the Python interface have compatible versions.")
- message = UnityMessage()
- message.header.status = 200
- message.unity_input.CopyFrom(inputs)
- self._communicator_send(message.SerializeToString())
- initialization_output = UnityMessage()
- initialization_output.ParseFromString(self._communicator_receive())
- return initialization_output.unity_output
-
- def _communicator_receive(self):
- try:
- s = self._conn.recv(self._buffer_size)
- message_length = struct.unpack("I", bytearray(s[:4]))[0]
- s = s[4:]
- while len(s) != message_length:
- s += self._conn.recv(self._buffer_size)
- except socket.timeout as e:
- raise UnityTimeOutException("The environment took too long to respond.")
- return s
-
- def _communicator_send(self, message):
- self._conn.send(struct.pack("I", len(message)) + message)
-
- def exchange(self, inputs: UnityInput) -> UnityOutput:
- message = UnityMessage()
- message.header.status = 200
- message.unity_input.CopyFrom(inputs)
- self._communicator_send(message.SerializeToString())
- outputs = UnityMessage()
- outputs.ParseFromString(self._communicator_receive())
- if outputs.header.status != 200:
- return None
- return outputs.unity_output
-
- def close(self):
- """
- Sends a shutdown signal to the unity environment, and closes the socket connection.
- """
- if self._socket is not None and self._conn is not None:
- message_input = UnityMessage()
- message_input.header.status = 400
- self._communicator_send(message_input.SerializeToString())
- if self._socket is not None:
- self._socket.close()
- self._socket = None
- if self._socket is not None:
- self._conn.close()
- self._conn = None
-
diff --git a/animalai_bkp/trainers/__init__.py b/animalai_bkp/trainers/__init__.py
deleted file mode 100644
index 4859f558..00000000
--- a/animalai_bkp/trainers/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-from .buffer import *
-from .curriculum import *
-from .meta_curriculum import *
-from .models import *
-from .trainer_controller import *
-from .bc.models import *
-from .bc.offline_trainer import *
-from .bc.online_trainer import *
-from .bc.policy import *
-from .ppo.models import *
-from .ppo.trainer import *
-from .ppo.policy import *
-from .exception import *
-from .policy import *
-from .demo_loader import *
diff --git a/animalai_bkp/trainers/barracuda.py b/animalai_bkp/trainers/barracuda.py
deleted file mode 100644
index 813d54db..00000000
--- a/animalai_bkp/trainers/barracuda.py
+++ /dev/null
@@ -1,491 +0,0 @@
-from __future__ import print_function
-from collections import defaultdict
-import numpy as np
-import json
-import struct # convert from Python values and C structs
-import re
-import argparse
-import os.path
-
-BARRACUDA_VERSION = 16
-
-# Definition of Barracuda model
-class Model:
- def __init__(self):
- self.layers = []
- self.tensors = {}
- self.inputs = {}
- self.outputs = []
- self.globals = []
- self.memories = []
-
-class Struct:
- "A structure that can have any fields defined."
- def __init__(self, **entries): self.__dict__.update(entries)
-
-# Parse command line argumengts
-def parse_args(description, source_extension, help):
- parser = argparse.ArgumentParser(description=description)
- parser.add_argument('source_file', help=help)
- parser.add_argument('target_file', help='output Barracuda binary file')
- parser.add_argument('-trim', '--trim-unused-by-output')
- parser.add_argument('--print-layers', action='store_true')
- parser.add_argument('--print-source-json', action='store_true')
- parser.add_argument('-json', '--print-barracuda-json', action='store_true')
- parser.add_argument('--print-layer-links', action='store_true')
- parser.add_argument('--print-patterns', action='store_true')
- parser.add_argument('--print-tensors', action='store_true')
- parser.add_argument('--verbose', action='store_true')
- args = parser.parse_args()
- args.compress_f16 = False # TEMP: disabled, until properly implemented parser.add_argument('-f16', '--compress-f16', action='store_true')
-
- output_extension = '.bc' if not args.compress_f16 else '.f16.bc'
-
- if not os.path.exists(args.source_file):
- args.source_file = args.source_file + source_extension
-
- if not os.path.exists(args.source_file):
- print('File', args.source_file, 'does not exist.')
- exit(-1)
-
- def replaceFilenameExtension(filename, newExtenstion):
- return os.path.splitext(os.path.basename(filename))[0] + newExtenstion;
-
- if os.path.isdir(args.target_file):
- args.target_file = os.path.join(args.target_file, replaceFilenameExtension(args.source_file, output_extension))
-
- if args.verbose:
- print(args)
-
- return args
-
-# Fuse training time BatchNorm tensors into Scale & Bias
-def fuse_batchnorm_weights(gamma, beta, mean, var, epsilon):
- # https://github.com/Tencent/ncnn/blob/master/src/layer/batchnorm.cpp
- """ float sqrt_var = sqrt(var_data[i]);
- a_data[i] = bias_data[i] - slope_data[i] * mean_data[i] / sqrt_var;
- b_data[i] = slope_data[i] / sqrt_var;
- ...
- ptr[i] = b * ptr[i] + a;
- """
- scale = gamma / np.sqrt(var + epsilon)
- bias = beta - gamma * mean / np.sqrt(var + epsilon)
- return [scale, bias]
-
-# Resort layers so that all inputs are satisfied for every layer beforehand
-def sort(model, inputs, memories, verbose):
- if hasattr(model, 'layers'):
- model = model.layers
- inputs_and_memories = set(list(inputs) + list(memories[1::3]))
-
- def find_missing_inputs(model, inputs):
- missing = set()
- ready = set(inputs)
- for l in model:
- for i in l.inputs:
- if i not in ready:
- missing.add(i)
- ready.add(l.name)
- return missing
-
- # Class to represent a graph
- # Taken from: https://www.geeksforgeeks.org/python-program-for-topological-sorting/
- class Graph:
- def __init__(self,vertices):
- self.graph = defaultdict(list) #dictionary containing adjacency List
- self.V = vertices #No. of vertices
-
- # function to add an edge to graph
- def addEdge(self,u,v):
- self.graph[u].append(v)
-
- # A recursive function used by topologicalSort
- def topologicalSortUtil(self,v,visited,stack):
-
- # Mark the current node as visited.
- visited[v] = True
-
- # Recur for all the vertices adjacent to this vertex
- for i in self.graph[v]:
- if visited[i] == False:
- self.topologicalSortUtil(i,visited,stack)
-
- # Push current vertex to stack which stores result
- stack.insert(0,v)
-
- # The function to do Topological Sort. It uses recursive
- # topologicalSortUtil()
- def topologicalSort(self):
- # Mark all the vertices as not visited
- visited = [False]*self.V
- stack =[]
-
- # Call the recursive helper function to store Topological
- # Sort starting from all vertices one by one
- for i in range(self.V):
- if visited[i] == False:
- self.topologicalSortUtil(i,visited,stack)
-
- #print(stack)
- return stack
-
- if (len(find_missing_inputs(model, inputs_and_memories)) == 0):
- return model
-
- g = Graph(len(model))
-
- layers = {}
- id = 0
- for l in model:
- layers[l.name] = id;
- id += 1
-
- for layer in model:
- for i in layer.inputs:
- if i not in inputs_and_memories:
- g.addEdge(layers[i], layers[layer.name])
-
- sorted_layer_indices = g.topologicalSort()
- print("SORTED:", sorted_layer_indices)
- new_model = [model[idx] for idx in sorted_layer_indices]
-
- assert(len(find_missing_inputs(new_model, inputs_and_memories)) == 0)
- return new_model
-
-
-
-# Trim
-def trim(model, criteria_regexp_string, verbose):
- if hasattr(model, 'layers'):
- model = model.layers
-
- def flatten(items,enter=lambda x:isinstance(x, list)):
- # http://stackoverflow.com/a/40857703
- # https://github.com/ctmakro/canton/blob/master/canton/misc.py
- """Yield items from any nested iterable; see REF."""
- for x in items:
- if enter(x):
- yield from flatten(x)
- else:
- yield x
-
- def trim_model(model, outputs):
- layers = {l.name:l for l in model}
- connected = {o for o in outputs}
- while len(outputs) > 0:
- outputs = set(flatten([layers[o].inputs for o in outputs if o in layers]))
- if verbose and len(outputs) > 0:
- print(outputs)
- for o in outputs:
- connected.add(o)
-
- trimmed = [l.name for l in model if l.name not in connected]
- def array_without_brackets(arr):
- return str(arr)[1:-1] # array to string without brackets
- print("TRIMMED:", array_without_brackets(trimmed))
-
- return [l for l in model if l.name in connected]
-
- layer_names = {l.name for l in model}
- criteria = re.compile(criteria_regexp_string)
- preserve_outputs = list(filter(criteria.match, layer_names))
- if preserve_outputs:
- print("Trimming model given outputs to preserve:", preserve_outputs)
- model = trim_model(model, preserve_outputs)
- else:
- print("WARNING: Trim couldn't find any layers to match:", criteria_regexp_string)
- return model
-
-def compress(model):
- compress_classes = {
- 'Dense'
- }
- for l in model.layers:
- if (l.class_name in compress_classes):
- print("Compressing %s layer '%s' weights to float16" % (l.class_name, l.name))
- for x in l.tensors:
- x.data = np.float16(x.data)
- return model
-
-# Verbose
-def to_json(model):
- class StructEncoder(json.JSONEncoder):
- def default(self, o):
- if isinstance(o, np.ndarray): # skip binary data packed inside ndarray
- return ""
- if getattr(o, '__dict__', None):
- return o.__dict__
- return str(o)
-
- s = json.dumps(model.layers, cls=StructEncoder, separators=(', ',':'))
- # custom formatting
- s = s.replace(']}, {', ']},\n{')
- s = s.replace(':[{', ':[\n\t{')
- s = s.replace('}, {', '},\n\t{')
- s = s.replace('"', "'")
- return s
-
-def summary(model, print_layer_links, print_barracuda_json, print_tensors):
- def array_without_brackets(arr):
- return str(arr)[1:-1] # array to string without brackets
-
- if print_layer_links:
- for l in model.layers:
- print(l.name, " <= ", l.inputs)
-
- if print_barracuda_json:
- print(to_json(model))
-
- if model.globals:
- if isinstance(model.globals, dict):
- model.globals = {x.name:x.shape for x in model.globals}
- print("GLOBALS:", array_without_brackets(model.globals))
-
- for l in model.layers:
- if isinstance(model.inputs, dict):
- ins = {i:model.inputs[i] for i in l.inputs if i in model.inputs}
- else:
- ins = [i for i in l.inputs if i in model.inputs]
- if ins:
- print("IN: %s => '%s'" % (array_without_brackets(ins), l.name))
- for mem_in, mem_out in zip(model.memories[1::3], model.memories[2::3]):
- print("MEM: '%s' => '%s'" % (mem_in, mem_out))
- print("OUT:", array_without_brackets(model.outputs))
-
- if (print_tensors):
- for l in model.layers:
- for x in l.tensors:
- print(x.name, x.shape, x.data.dtype, x.data)
-
-class Build:
- def __init__(self, scope=''):
- self.scope = scope
- self.layers = []
- self.names_taken = set()
-
- def __getattr__(self, attr):
- if attr == '_':
- return self.layers[-1].name if len(self.layer) > 0 else self.scope
- raise AttributeError(attr)
-
- def _patch_last_layer_name_and_return(self):
- if self.layers[-1].name:
- return self.layers[-1].name
-
- # generate unique name based on op and increasing id
- name = self.layers[-1].op
-
- i = 1
- while name in self.names_taken:
- name = self.layers[-1].op + '_' + str(i)
- i += 1
- self.names_taken.add(name)
-
- self.layers[-1].name = self.scope + ('/' if self.scope else '') + name
- return self.layers[-1].name
-
- def concat(self, a, b, out=''):
- self.layers += [Struct(name=out, op='Concat', input=[a, b])]
- return self._patch_last_layer_name_and_return()
- def mad(self, x, kernel, bias, out=''):
- self.layers += [Struct(name=out, op='Dense', input=[x, kernel, bias])]
- return self._patch_last_layer_name_and_return()
- def mul(self, a, b, out=''):
- self.layers += [Struct(name=out, op='Mul', input=[a, b])]
- return self._patch_last_layer_name_and_return()
- def add(self, a, b, out=''):
- self.layers += [Struct(name=out, op='Add', input=[a, b])]
- return self._patch_last_layer_name_and_return()
- def sub(self, a, b, out=''):
- self.layers += [Struct(name=out, op='Sub', input=[a, b])]
- return self._patch_last_layer_name_and_return()
- def sigmoid(self, x, out=''):
- self.layers += [Struct(name=out, op='Sigmoid', input=[x])]
- return self._patch_last_layer_name_and_return()
- def tanh(self, x, out=''):
- self.layers += [Struct(name=out, op='Tanh', input=[x])]
- return self._patch_last_layer_name_and_return()
-
-def rnn(name, input, state, kernel, bias, new_state, number_of_gates = 2):
- ''' - Ht = f(Xt*Wi + Ht_1*Ri + Wbi + Rbi)
- '''
-
- nn = Build(name)
- nn.tanh(
- nn.mad(kernel=kernel, bias=bias,
- x=nn.concat(input, state)),
- out=new_state);
- return nn.layers;
-
-def gru(name, input, state, kernel_r, kernel_u, kernel_c, bias_r, bias_u, bias_c, new_state, number_of_gates = 2):
- ''' - zt = f(Xt*Wz + Ht_1*Rz + Wbz + Rbz)
- - rt = f(Xt*Wr + Ht_1*Rr + Wbr + Rbr)
- - ht = g(Xt*Wh + (rt . Ht_1)*Rh + Rbh + Wbh)
- - Ht = (1-zt).ht + zt.Ht_1
- '''
- nn = Build(name)
- inputs = nn.concat(input, state)
-
- u = nn.sigmoid(nn.mad(inputs, kernel_u, bias_u))
- r = nn.sigmoid(nn.mad(inputs, kernel_r, bias_r))
- r_state = nn.mul(r, state)
-
- c = nn.tanh(nn.mad(kernel=kernel_c, bias=bias_c,
- x=nn.concat(input, r_state)))
-
- # new_h = u' * state + (1 - u') * c'
- # = u' * state + c' - u' * c'
-
- # u' * state + c'
- nn.add(nn.mul(u, state), c)
- # - u' * c'
- nn.sub(nn._, nn.mul(u, c),
- out=new_state)
-
- return nn.layers;
-
-def lstm(name, input, state_c, state_h, kernel_i, kernel_j, kernel_f, kernel_o, bias_i, bias_j, bias_f, bias_o, new_state_c, new_state_h):
- ''' Full:
- - it = f(Xt*Wi + Ht_1*Ri + Pi . Ct_1 + Wbi + Rbi)
- - ft = f(Xt*Wf + Ht_1*Rf + Pf . Ct_1 + Wbf + Rbf)
- - ct = g(Xt*Wc + Ht_1*Rc + Wbc + Rbc)
- - Ct = ft . Ct_1 + it . ct
- - ot = f(Xt*Wo + Ht_1*Ro + Po . Ct + Wbo + Rbo)
- - Ht = ot . h(Ct)
- '''
-
- ''' No peephole:
- - it = f(Xt*Wi + Ht_1*Ri + Wbi + Rbi)
- - ft = f(Xt*Wf + Ht_1*Rf + Wbf + Rbf)
- - ct = g(Xt*Wc + Ht_1*Rc + Wbc + Rbc)
- - Ct = ft . Ct_ + it . ct
- - ot = f(Xt*Wo + Ht_1*Ro + Wbo + Rbo)
- - Ht = ot . h(Ct)
- '''
-
- nn = Build(name)
- inputs = nn.concat(input, state_h)
-
- i = nn.sigmoid(nn.mad(x=inputs, kernel=kernel_i, bias=bias_i))
- j = nn.tanh(nn.mad(inputs, kernel_j, bias_j))
- f = nn.sigmoid(nn.mad(inputs, kernel_f, bias_f))
- o = nn.sigmoid(nn.mad(inputs, kernel_o, bias_o))
-
- # new_c = state_c * f' + i' * j'
- nn.add(
- nn.mul(state_c, f), nn.mul(i, j),
- out=new_state_c)
-
- # new_h =
- nn.mul(o, nn.tanh(new_state_c),
- out=new_state_h)
-
- return nn.layers
-
-# Serialize
-class BarracudaWriter:
- f = None
-
- def __init__(self, filename):
- self.f = open(filename, 'wb+')
-
- def __enter__(self):
- return self
-
- def __exit__(self, type, value, tb):
- self.f.close()
-
- def write_array(self, arr):
- arr.tofile(self.f)
-
- def write_str_array(self, array_of_strigs):
- self.write_int32(len(array_of_strigs))
- for s in array_of_strigs:
- self.write_str(s)
-
- def write_str(self, s):
- self.write_int32(len(s))
- self.f.write(s.encode('ascii'))
-
- def write_float(self, d):
- self.f.write(struct.pack('> 2 # length is measured in float32s (at least for now)
-
- w.write_str(x.name)
- w.write_shape(x.shape)
- w.write_int64(offset)
- w.write_int32(x.data.itemsize)
- w.write_int32(length)
-
- offset += length
- all_tensors.append(x)
-
- for x in all_tensors:
- w.write_array(x.data)
-
-
-
diff --git a/animalai_bkp/trainers/bc/__init__.py b/animalai_bkp/trainers/bc/__init__.py
deleted file mode 100644
index 80cd0aa0..00000000
--- a/animalai_bkp/trainers/bc/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from .models import *
-from .online_trainer import *
-from .offline_trainer import *
-from .policy import *
diff --git a/animalai_bkp/trainers/bc/models.py b/animalai_bkp/trainers/bc/models.py
deleted file mode 100644
index 06cdab6d..00000000
--- a/animalai_bkp/trainers/bc/models.py
+++ /dev/null
@@ -1,55 +0,0 @@
-import tensorflow as tf
-import tensorflow.contrib.layers as c_layers
-from animalai.trainers.models import LearningModel
-
-
-class BehavioralCloningModel(LearningModel):
- def __init__(self, brain, h_size=128, lr=1e-4, n_layers=2, m_size=128,
- normalize=False, use_recurrent=False, seed=0):
- LearningModel.__init__(self, m_size, normalize, use_recurrent, brain, seed)
- num_streams = 1
- hidden_streams = self.create_observation_streams(num_streams, h_size, n_layers)
- hidden = hidden_streams[0]
- self.dropout_rate = tf.placeholder(dtype=tf.float32, shape=[], name="dropout_rate")
- hidden_reg = tf.layers.dropout(hidden, self.dropout_rate)
- if self.use_recurrent:
- tf.Variable(self.m_size, name="memory_size", trainable=False, dtype=tf.int32)
- self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32, name='recurrent_in')
- hidden_reg, self.memory_out = self.create_recurrent_encoder(hidden_reg, self.memory_in,
- self.sequence_length)
- self.memory_out = tf.identity(self.memory_out, name='recurrent_out')
-
- if brain.vector_action_space_type == "discrete":
- policy_branches = []
- for size in self.act_size:
- policy_branches.append(
- tf.layers.dense(
- hidden,
- size,
- activation=None,
- use_bias=False,
- kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01)))
- self.action_probs = tf.concat(
- [tf.nn.softmax(branch) for branch in policy_branches], axis=1, name="action_probs")
- self.action_masks = tf.placeholder(shape=[None, sum(self.act_size)], dtype=tf.float32, name="action_masks")
- self.sample_action_float, normalized_logits = self.create_discrete_action_masking_layer(
- tf.concat(policy_branches, axis=1), self.action_masks, self.act_size)
- tf.identity(normalized_logits, name='action')
- self.sample_action = tf.cast(self.sample_action_float, tf.int32)
- self.true_action = tf.placeholder(shape=[None, len(policy_branches)], dtype=tf.int32, name="teacher_action")
- self.action_oh = tf.concat([
- tf.one_hot(self.true_action[:, i], self.act_size[i]) for i in range(len(self.act_size))], axis=1)
- self.loss = tf.reduce_sum(-tf.log(self.action_probs + 1e-10) * self.action_oh)
- self.action_percent = tf.reduce_mean(tf.cast(
- tf.equal(tf.cast(tf.argmax(self.action_probs, axis=1), tf.int32), self.sample_action), tf.float32))
- else:
- self.policy = tf.layers.dense(hidden_reg, self.act_size[0], activation=None, use_bias=False, name='pre_action',
- kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01))
- self.clipped_sample_action = tf.clip_by_value(self.policy, -1, 1)
- self.sample_action = tf.identity(self.clipped_sample_action, name="action")
- self.true_action = tf.placeholder(shape=[None, self.act_size[0]], dtype=tf.float32, name="teacher_action")
- self.clipped_true_action = tf.clip_by_value(self.true_action, -1, 1)
- self.loss = tf.reduce_sum(tf.squared_difference(self.clipped_true_action, self.sample_action))
-
- optimizer = tf.train.AdamOptimizer(learning_rate=lr)
- self.update = optimizer.minimize(self.loss)
diff --git a/animalai_bkp/trainers/bc/offline_trainer.py b/animalai_bkp/trainers/bc/offline_trainer.py
deleted file mode 100644
index 36e209f2..00000000
--- a/animalai_bkp/trainers/bc/offline_trainer.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# # Unity ML-Agents Toolkit
-# ## ML-Agent Learning (Behavioral Cloning)
-# Contains an implementation of Behavioral Cloning Algorithm
-
-import logging
-import copy
-
-from animalai.trainers.bc.trainer import BCTrainer
-from animalai.trainers.demo_loader import demo_to_buffer
-from animalai.trainers.trainer import UnityTrainerException
-
-logger = logging.getLogger("mlagents.trainers")
-
-
-class OfflineBCTrainer(BCTrainer):
- """The OfflineBCTrainer is an implementation of Offline Behavioral Cloning."""
-
- def __init__(self, brain, trainer_parameters, training, load, seed, run_id):
- """
- Responsible for collecting experiences and training PPO model.
- :param trainer_parameters: The parameters for the trainer (dictionary).
- :param training: Whether the trainer is set for training.
- :param load: Whether the model should be loaded.
- :param seed: The seed the model will be initialized with
- :param run_id: The The identifier of the current run
- """
- super(OfflineBCTrainer, self).__init__(
- brain, trainer_parameters, training, load, seed, run_id)
-
- self.param_keys = ['batch_size', 'summary_freq', 'max_steps',
- 'batches_per_epoch', 'use_recurrent',
- 'hidden_units', 'learning_rate', 'num_layers',
- 'sequence_length', 'memory_size', 'model_path',
- 'demo_path']
-
- self.check_param_keys()
- self.batches_per_epoch = trainer_parameters['batches_per_epoch']
- self.n_sequences = max(int(trainer_parameters['batch_size'] / self.policy.sequence_length),
- 1)
-
- brain_params, self.demonstration_buffer = demo_to_buffer(
- trainer_parameters['demo_path'],
- self.policy.sequence_length)
-
- policy_brain = copy.deepcopy(brain.__dict__)
- expert_brain = copy.deepcopy(brain_params.__dict__)
- policy_brain.pop('brain_name')
- expert_brain.pop('brain_name')
- if expert_brain != policy_brain:
- raise UnityTrainerException("The provided demonstration is not compatible with the "
- "brain being used for performance evaluation.")
-
- def __str__(self):
- return '''Hyperparameters for the Imitation Trainer of brain {0}: \n{1}'''.format(
- self.brain_name, '\n'.join(
- ['\t{0}:\t{1}'.format(x, self.trainer_parameters[x]) for x in self.param_keys]))
diff --git a/animalai_bkp/trainers/bc/online_trainer.py b/animalai_bkp/trainers/bc/online_trainer.py
deleted file mode 100644
index d06ac321..00000000
--- a/animalai_bkp/trainers/bc/online_trainer.py
+++ /dev/null
@@ -1,116 +0,0 @@
-# # Unity ML-Agents Toolkit
-# ## ML-Agent Learning (Behavioral Cloning)
-# Contains an implementation of Behavioral Cloning Algorithm
-
-import logging
-import numpy as np
-
-from animalai.envs import AllBrainInfo
-from animalai.trainers.bc.trainer import BCTrainer
-
-logger = logging.getLogger("mlagents.trainers")
-
-
-class OnlineBCTrainer(BCTrainer):
- """The OnlineBCTrainer is an implementation of Online Behavioral Cloning."""
-
- def __init__(self, brain, trainer_parameters, training, load, seed, run_id):
- """
- Responsible for collecting experiences and training PPO model.
- :param trainer_parameters: The parameters for the trainer (dictionary).
- :param training: Whether the trainer is set for training.
- :param load: Whether the model should be loaded.
- :param seed: The seed the model will be initialized with
- :param run_id: The The identifier of the current run
- """
- super(OnlineBCTrainer, self).__init__(brain, trainer_parameters, training, load, seed,
- run_id)
-
- self.param_keys = ['brain_to_imitate', 'batch_size', 'time_horizon',
- 'summary_freq', 'max_steps',
- 'batches_per_epoch', 'use_recurrent',
- 'hidden_units', 'learning_rate', 'num_layers',
- 'sequence_length', 'memory_size', 'model_path']
-
- self.check_param_keys()
- self.brain_to_imitate = trainer_parameters['brain_to_imitate']
- self.batches_per_epoch = trainer_parameters['batches_per_epoch']
- self.n_sequences = max(int(trainer_parameters['batch_size'] / self.policy.sequence_length),
- 1)
-
- def __str__(self):
- return '''Hyperparameters for the Imitation Trainer of brain {0}: \n{1}'''.format(
- self.brain_name, '\n'.join(
- ['\t{0}:\t{1}'.format(x, self.trainer_parameters[x]) for x in self.param_keys]))
-
- def add_experiences(self, curr_info: AllBrainInfo, next_info: AllBrainInfo,
- take_action_outputs):
- """
- Adds experiences to each agent's experience history.
- :param curr_info: Current AllBrainInfo (Dictionary of all current brains and corresponding BrainInfo).
- :param next_info: Next AllBrainInfo (Dictionary of all current brains and corresponding BrainInfo).
- :param take_action_outputs: The outputs of the take action method.
- """
-
- # Used to collect teacher experience into training buffer
- info_teacher = curr_info[self.brain_to_imitate]
- next_info_teacher = next_info[self.brain_to_imitate]
- for agent_id in info_teacher.agents:
- self.demonstration_buffer[agent_id].last_brain_info = info_teacher
-
- for agent_id in next_info_teacher.agents:
- stored_info_teacher = self.demonstration_buffer[agent_id].last_brain_info
- if stored_info_teacher is None:
- continue
- else:
- idx = stored_info_teacher.agents.index(agent_id)
- next_idx = next_info_teacher.agents.index(agent_id)
- if stored_info_teacher.text_observations[idx] != "":
- info_teacher_record, info_teacher_reset = \
- stored_info_teacher.text_observations[idx].lower().split(",")
- next_info_teacher_record, next_info_teacher_reset = \
- next_info_teacher.text_observations[idx]. \
- lower().split(",")
- if next_info_teacher_reset == "true":
- self.demonstration_buffer.reset_update_buffer()
- else:
- info_teacher_record, next_info_teacher_record = "true", "true"
- if info_teacher_record == "true" and next_info_teacher_record == "true":
- if not stored_info_teacher.local_done[idx]:
- for i in range(self.policy.vis_obs_size):
- self.demonstration_buffer[agent_id]['visual_obs%d' % i] \
- .append(stored_info_teacher.visual_observations[i][idx])
- if self.policy.use_vec_obs:
- self.demonstration_buffer[agent_id]['vector_obs'] \
- .append(stored_info_teacher.vector_observations[idx])
- if self.policy.use_recurrent:
- if stored_info_teacher.memories.shape[1] == 0:
- stored_info_teacher.memories = np.zeros(
- (len(stored_info_teacher.agents),
- self.policy.m_size))
- self.demonstration_buffer[agent_id]['memory'].append(
- stored_info_teacher.memories[idx])
- self.demonstration_buffer[agent_id]['actions'].append(
- next_info_teacher.previous_vector_actions[next_idx])
-
- super(OnlineBCTrainer, self).add_experiences(curr_info, next_info, take_action_outputs)
-
- def process_experiences(self, current_info: AllBrainInfo, next_info: AllBrainInfo):
- """
- Checks agent histories for processing condition, and processes them as necessary.
- Processing involves calculating value and advantage targets for model updating step.
- :param current_info: Current AllBrainInfo
- :param next_info: Next AllBrainInfo
- """
- info_teacher = next_info[self.brain_to_imitate]
- for l in range(len(info_teacher.agents)):
- teacher_action_list = len(self.demonstration_buffer[info_teacher.agents[l]]['actions'])
- horizon_reached = teacher_action_list > self.trainer_parameters['time_horizon']
- teacher_filled = len(self.demonstration_buffer[info_teacher.agents[l]]['actions']) > 0
- if (info_teacher.local_done[l] or horizon_reached) and teacher_filled:
- agent_id = info_teacher.agents[l]
- self.demonstration_buffer.append_update_buffer(
- agent_id, batch_size=None, training_length=self.policy.sequence_length)
- self.demonstration_buffer[agent_id].reset_agent()
-
- super(OnlineBCTrainer, self).process_experiences(current_info, next_info)
diff --git a/animalai_bkp/trainers/bc/policy.py b/animalai_bkp/trainers/bc/policy.py
deleted file mode 100644
index b9fd3bdb..00000000
--- a/animalai_bkp/trainers/bc/policy.py
+++ /dev/null
@@ -1,93 +0,0 @@
-import logging
-
-import numpy as np
-from animalai.trainers.bc.models import BehavioralCloningModel
-from animalai.trainers.policy import Policy
-
-logger = logging.getLogger("mlagents.trainers")
-
-
-class BCPolicy(Policy):
- def __init__(self, seed, brain, trainer_parameters, load):
- """
- :param seed: Random seed.
- :param brain: Assigned Brain object.
- :param trainer_parameters: Defined training parameters.
- :param load: Whether a pre-trained model will be loaded or a new one created.
- """
- super(BCPolicy, self).__init__(seed, brain, trainer_parameters)
-
- with self.graph.as_default():
- with self.graph.as_default():
- self.model = BehavioralCloningModel(
- h_size=int(trainer_parameters['hidden_units']),
- lr=float(trainer_parameters['learning_rate']),
- n_layers=int(trainer_parameters['num_layers']),
- m_size=self.m_size,
- normalize=False,
- use_recurrent=trainer_parameters['use_recurrent'],
- brain=brain,
- seed=seed)
-
- if load:
- self._load_graph()
- else:
- self._initialize_graph()
-
- self.inference_dict = {'action': self.model.sample_action}
- self.update_dict = {'policy_loss': self.model.loss,
- 'update_batch': self.model.update}
- if self.use_recurrent:
- self.inference_dict['memory_out'] = self.model.memory_out
-
- self.evaluate_rate = 1.0
- self.update_rate = 0.5
-
- def evaluate(self, brain_info):
- """
- Evaluates policy for the agent experiences provided.
- :param brain_info: BrainInfo input to network.
- :return: Results of evaluation.
- """
- feed_dict = {self.model.dropout_rate: self.evaluate_rate,
- self.model.sequence_length: 1}
-
- feed_dict = self._fill_eval_dict(feed_dict, brain_info)
- if self.use_recurrent:
- if brain_info.memories.shape[1] == 0:
- brain_info.memories = self.make_empty_memory(len(brain_info.agents))
- feed_dict[self.model.memory_in] = brain_info.memories
- run_out = self._execute_model(feed_dict, self.inference_dict)
- return run_out
-
- def update(self, mini_batch, num_sequences):
- """
- Performs update on model.
- :param mini_batch: Batch of experiences.
- :param num_sequences: Number of sequences to process.
- :return: Results of update.
- """
-
- feed_dict = {self.model.dropout_rate: self.update_rate,
- self.model.batch_size: num_sequences,
- self.model.sequence_length: self.sequence_length}
- if self.use_continuous_act:
- feed_dict[self.model.true_action] = mini_batch['actions']. \
- reshape([-1, self.brain.vector_action_space_size[0]])
- else:
- feed_dict[self.model.true_action] = mini_batch['actions'].reshape(
- [-1, len(self.brain.vector_action_space_size)])
- feed_dict[self.model.action_masks] = np.ones(
- (num_sequences, sum(self.brain.vector_action_space_size)))
- if self.use_vec_obs:
- apparent_obs_size = self.brain.vector_observation_space_size * \
- self.brain.num_stacked_vector_observations
- feed_dict[self.model.vector_in] = mini_batch['vector_obs'] \
- .reshape([-1,apparent_obs_size])
- for i, _ in enumerate(self.model.visual_in):
- visual_obs = mini_batch['visual_obs%d' % i]
- feed_dict[self.model.visual_in[i]] = visual_obs
- if self.use_recurrent:
- feed_dict[self.model.memory_in] = np.zeros([num_sequences, self.m_size])
- run_out = self._execute_model(feed_dict, self.update_dict)
- return run_out
diff --git a/animalai_bkp/trainers/bc/trainer.py b/animalai_bkp/trainers/bc/trainer.py
deleted file mode 100644
index bdc2010f..00000000
--- a/animalai_bkp/trainers/bc/trainer.py
+++ /dev/null
@@ -1,190 +0,0 @@
-# # Unity ML-Agents Toolkit
-# ## ML-Agent Learning (Behavioral Cloning)
-# Contains an implementation of Behavioral Cloning Algorithm
-
-import logging
-import os
-
-import numpy as np
-import tensorflow as tf
-
-from animalai.envs import AllBrainInfo
-from animalai.trainers.bc.policy import BCPolicy
-from animalai.trainers.buffer import Buffer
-from animalai.trainers.trainer import Trainer
-
-logger = logging.getLogger("mlagents.trainers")
-
-
-class BCTrainer(Trainer):
- """The BCTrainer is an implementation of Behavioral Cloning."""
-
- def __init__(self, brain, trainer_parameters, training, load, seed, run_id):
- """
- Responsible for collecting experiences and training PPO model.
- :param trainer_parameters: The parameters for the trainer (dictionary).
- :param training: Whether the trainer is set for training.
- :param load: Whether the model should be loaded.
- :param seed: The seed the model will be initialized with
- :param run_id: The The identifier of the current run
- """
- super(BCTrainer, self).__init__(brain, trainer_parameters, training, run_id)
- self.policy = BCPolicy(seed, brain, trainer_parameters, load)
- self.n_sequences = 1
- self.cumulative_rewards = {}
- self.episode_steps = {}
- self.stats = {'Losses/Cloning Loss': [], 'Environment/Episode Length': [],
- 'Environment/Cumulative Reward': []}
-
- self.summary_path = trainer_parameters['summary_path']
- self.batches_per_epoch = trainer_parameters['batches_per_epoch']
- if not os.path.exists(self.summary_path):
- os.makedirs(self.summary_path)
-
- self.demonstration_buffer = Buffer()
- self.evaluation_buffer = Buffer()
- self.summary_writer = tf.summary.FileWriter(self.summary_path)
-
- @property
- def parameters(self):
- """
- Returns the trainer parameters of the trainer.
- """
- return self.trainer_parameters
-
- @property
- def get_max_steps(self):
- """
- Returns the maximum number of steps. Is used to know when the trainer should be stopped.
- :return: The maximum number of steps of the trainer
- """
- return float(self.trainer_parameters['max_steps'])
-
- @property
- def get_step(self):
- """
- Returns the number of steps the trainer has performed
- :return: the step count of the trainer
- """
- return self.policy.get_current_step()
-
- @property
- def get_last_reward(self):
- """
- Returns the last reward the trainer has had
- :return: the new last reward
- """
- if len(self.stats['Environment/Cumulative Reward']) > 0:
- return np.mean(self.stats['Environment/Cumulative Reward'])
- else:
- return 0
-
- def increment_step_and_update_last_reward(self):
- """
- Increment the step count of the trainer and Updates the last reward
- """
- self.policy.increment_step()
- return
-
- def take_action(self, all_brain_info: AllBrainInfo):
- """
- Decides actions using policy given current brain info.
- :param all_brain_info: AllBrainInfo from environment.
- :return: a tuple containing action, memories, values and an object
- to be passed to add experiences
- """
- if len(all_brain_info[self.brain_name].agents) == 0:
- return [], [], [], None, None
-
- agent_brain = all_brain_info[self.brain_name]
- run_out = self.policy.evaluate(agent_brain)
- if self.policy.use_recurrent:
- return run_out['action'], run_out['memory_out'], None, None, None
- else:
- return run_out['action'], None, None, None, None
-
- def add_experiences(self, curr_info: AllBrainInfo, next_info: AllBrainInfo,
- take_action_outputs):
- """
- Adds experiences to each agent's experience history.
- :param curr_info: Current AllBrainInfo (Dictionary of all current brains and corresponding BrainInfo).
- :param next_info: Next AllBrainInfo (Dictionary of all current brains and corresponding BrainInfo).
- :param take_action_outputs: The outputs of the take action method.
- """
-
- # Used to collect information about student performance.
- info_student = curr_info[self.brain_name]
- next_info_student = next_info[self.brain_name]
- for agent_id in info_student.agents:
- self.evaluation_buffer[agent_id].last_brain_info = info_student
-
- for agent_id in next_info_student.agents:
- stored_info_student = self.evaluation_buffer[agent_id].last_brain_info
- if stored_info_student is None:
- continue
- else:
- next_idx = next_info_student.agents.index(agent_id)
- if agent_id not in self.cumulative_rewards:
- self.cumulative_rewards[agent_id] = 0
- self.cumulative_rewards[agent_id] += next_info_student.rewards[next_idx]
- if not next_info_student.local_done[next_idx]:
- if agent_id not in self.episode_steps:
- self.episode_steps[agent_id] = 0
- self.episode_steps[agent_id] += 1
-
- def process_experiences(self, current_info: AllBrainInfo, next_info: AllBrainInfo):
- """
- Checks agent histories for processing condition, and processes them as necessary.
- Processing involves calculating value and advantage targets for model updating step.
- :param current_info: Current AllBrainInfo
- :param next_info: Next AllBrainInfo
- """
- info_student = next_info[self.brain_name]
- for l in range(len(info_student.agents)):
- if info_student.local_done[l]:
- agent_id = info_student.agents[l]
- self.stats['Environment/Cumulative Reward'].append(
- self.cumulative_rewards.get(agent_id, 0))
- self.stats['Environment/Episode Length'].append(
- self.episode_steps.get(agent_id, 0))
- self.cumulative_rewards[agent_id] = 0
- self.episode_steps[agent_id] = 0
-
- def end_episode(self):
- """
- A signal that the Episode has ended. The buffer must be reset.
- Get only called when the academy resets.
- """
- self.evaluation_buffer.reset_local_buffers()
- for agent_id in self.cumulative_rewards:
- self.cumulative_rewards[agent_id] = 0
- for agent_id in self.episode_steps:
- self.episode_steps[agent_id] = 0
-
- def is_ready_update(self):
- """
- Returns whether or not the trainer has enough elements to run update model
- :return: A boolean corresponding to whether or not update_model() can be run
- """
- return len(self.demonstration_buffer.update_buffer['actions']) > self.n_sequences
-
- def update_policy(self):
- """
- Updates the policy.
- """
- self.demonstration_buffer.update_buffer.shuffle()
- batch_losses = []
- num_batches = min(len(self.demonstration_buffer.update_buffer['actions']) //
- self.n_sequences, self.batches_per_epoch)
- for i in range(num_batches):
- update_buffer = self.demonstration_buffer.update_buffer
- start = i * self.n_sequences
- end = (i + 1) * self.n_sequences
- mini_batch = update_buffer.make_mini_batch(start, end)
- run_out = self.policy.update(mini_batch, self.n_sequences)
- loss = run_out['policy_loss']
- batch_losses.append(loss)
- if len(batch_losses) > 0:
- self.stats['Losses/Cloning Loss'].append(np.mean(batch_losses))
- else:
- self.stats['Losses/Cloning Loss'].append(0)
diff --git a/animalai_bkp/trainers/buffer.py b/animalai_bkp/trainers/buffer.py
deleted file mode 100644
index ff2f0b88..00000000
--- a/animalai_bkp/trainers/buffer.py
+++ /dev/null
@@ -1,255 +0,0 @@
-import numpy as np
-
-from animalai.envs.exception import UnityException
-
-
-class BufferException(UnityException):
- """
- Related to errors with the Buffer.
- """
- pass
-
-
-class Buffer(dict):
- """
- Buffer contains a dictionary of AgentBuffer. The AgentBuffers are indexed by agent_id.
- Buffer also contains an update_buffer that corresponds to the buffer used when updating the model.
- """
-
- class AgentBuffer(dict):
- """
- AgentBuffer contains a dictionary of AgentBufferFields. Each agent has his own AgentBuffer.
- The keys correspond to the name of the field. Example: state, action
- """
-
- class AgentBufferField(list):
- """
- AgentBufferField is a list of numpy arrays. When an agent collects a field, you can add it to his
- AgentBufferField with the append method.
- """
-
- def __init__(self):
- self.padding_value = 0
- super(Buffer.AgentBuffer.AgentBufferField, self).__init__()
-
- def __str__(self):
- return str(np.array(self).shape)
-
- def append(self, element, padding_value=0):
- """
- Adds an element to this list. Also lets you change the padding
- type, so that it can be set on append (e.g. action_masks should
- be padded with 1.)
- :param element: The element to append to the list.
- :param padding_value: The value used to pad when get_batch is called.
- """
- super(Buffer.AgentBuffer.AgentBufferField, self).append(element)
- self.padding_value = padding_value
-
- def extend(self, data):
- """
- Adds a list of np.arrays to the end of the list of np.arrays.
- :param data: The np.array list to append.
- """
- self += list(np.array(data))
-
- def set(self, data):
- """
- Sets the list of np.array to the input data
- :param data: The np.array list to be set.
- """
- self[:] = []
- self[:] = list(np.array(data))
-
- def get_batch(self, batch_size=None, training_length=1, sequential=True):
- """
- Retrieve the last batch_size elements of length training_length
- from the list of np.array
- :param batch_size: The number of elements to retrieve. If None:
- All elements will be retrieved.
- :param training_length: The length of the sequence to be retrieved. If
- None: only takes one element.
- :param sequential: If true and training_length is not None: the elements
- will not repeat in the sequence. [a,b,c,d,e] with training_length = 2 and
- sequential=True gives [[0,a],[b,c],[d,e]]. If sequential=False gives
- [[a,b],[b,c],[c,d],[d,e]]
- """
- if training_length == 1:
- # When the training length is 1, the method returns a list of elements,
- # not a list of sequences of elements.
- if batch_size is None:
- # If batch_size is None : All the elements of the AgentBufferField are returned.
- return np.array(self)
- else:
- # return the batch_size last elements
- if batch_size > len(self):
- raise BufferException("Batch size requested is too large")
- return np.array(self[-batch_size:])
- else:
- # The training_length is not None, the method returns a list of SEQUENCES of elements
- if not sequential:
- # The sequences will have overlapping elements
- if batch_size is None:
- # retrieve the maximum number of elements
- batch_size = len(self) - training_length + 1
- # The number of sequences of length training_length taken from a list of len(self) elements
- # with overlapping is equal to batch_size
- if (len(self) - training_length + 1) < batch_size:
- raise BufferException("The batch size and training length requested for get_batch where"
- " too large given the current number of data points.")
- tmp_list = []
- for end in range(len(self) - batch_size + 1, len(self) + 1):
- tmp_list += [np.array(self[end - training_length:end])]
- return np.array(tmp_list)
- if sequential:
- # The sequences will not have overlapping elements (this involves padding)
- leftover = len(self) % training_length
- # leftover is the number of elements in the first sequence (this sequence might need 0 padding)
- if batch_size is None:
- # retrieve the maximum number of elements
- batch_size = len(self) // training_length + 1 * (leftover != 0)
- # The maximum number of sequences taken from a list of length len(self) without overlapping
- # with padding is equal to batch_size
- if batch_size > (len(self) // training_length + 1 * (leftover != 0)):
- raise BufferException("The batch size and training length requested for get_batch where"
- " too large given the current number of data points.")
- tmp_list = []
- padding = np.array(self[-1]) * self.padding_value
- # The padding is made with zeros and its shape is given by the shape of the last element
- for end in range(len(self), len(self) % training_length, -training_length)[:batch_size]:
- tmp_list += [np.array(self[end - training_length:end])]
- if (leftover != 0) and (len(tmp_list) < batch_size):
- tmp_list += [np.array([padding] * (training_length - leftover) + self[:leftover])]
- tmp_list.reverse()
- return np.array(tmp_list)
-
- def reset_field(self):
- """
- Resets the AgentBufferField
- """
- self[:] = []
-
- def __init__(self):
- self.last_brain_info = None
- self.last_take_action_outputs = None
- super(Buffer.AgentBuffer, self).__init__()
-
- def __str__(self):
- return ", ".join(["'{0}' : {1}".format(k, str(self[k])) for k in self.keys()])
-
- def reset_agent(self):
- """
- Resets the AgentBuffer
- """
- for k in self.keys():
- self[k].reset_field()
- self.last_brain_info = None
- self.last_take_action_outputs = None
-
- def __getitem__(self, key):
- if key not in self.keys():
- self[key] = self.AgentBufferField()
- return super(Buffer.AgentBuffer, self).__getitem__(key)
-
- def check_length(self, key_list):
- """
- Some methods will require that some fields have the same length.
- check_length will return true if the fields in key_list
- have the same length.
- :param key_list: The fields which length will be compared
- """
- if len(key_list) < 2:
- return True
- l = None
- for key in key_list:
- if key not in self.keys():
- return False
- if (l is not None) and (l != len(self[key])):
- return False
- l = len(self[key])
- return True
-
- def shuffle(self, key_list=None):
- """
- Shuffles the fields in key_list in a consistent way: The reordering will
- Shuffles the fields in key_list in a consistent way: The reordering will
- be the same across fields.
- :param key_list: The fields that must be shuffled.
- """
- if key_list is None:
- key_list = list(self.keys())
- if not self.check_length(key_list):
- raise BufferException("Unable to shuffle if the fields are not of same length")
- s = np.arange(len(self[key_list[0]]))
- np.random.shuffle(s)
- for key in key_list:
- self[key][:] = [self[key][i] for i in s]
-
- def make_mini_batch(self, start, end):
- """
- Creates a mini-batch from buffer.
- :param start: Starting index of buffer.
- :param end: Ending index of buffer.
- :return: Dict of mini batch.
- """
- mini_batch = {}
- for key in self:
- mini_batch[key] = np.array(self[key][start:end])
- return mini_batch
-
- def __init__(self):
- self.update_buffer = self.AgentBuffer()
- super(Buffer, self).__init__()
-
- def __str__(self):
- return "update buffer :\n\t{0}\nlocal_buffers :\n{1}".format(str(self.update_buffer),
- '\n'.join(
- ['\tagent {0} :{1}'.format(k, str(self[k])) for
- k in self.keys()]))
-
- def __getitem__(self, key):
- if key not in self.keys():
- self[key] = self.AgentBuffer()
- return super(Buffer, self).__getitem__(key)
-
- def reset_update_buffer(self):
- """
- Resets the update buffer
- """
- self.update_buffer.reset_agent()
-
- def reset_local_buffers(self):
- """
- Resets all the local local_buffers
- """
- agent_ids = list(self.keys())
- for k in agent_ids:
- self[k].reset_agent()
-
- def append_update_buffer(self, agent_id, key_list=None, batch_size=None, training_length=None):
- """
- Appends the buffer of an agent to the update buffer.
- :param agent_id: The id of the agent which data will be appended
- :param key_list: The fields that must be added. If None: all fields will be appended.
- :param batch_size: The number of elements that must be appended. If None: All of them will be.
- :param training_length: The length of the samples that must be appended. If None: only takes one element.
- """
- if key_list is None:
- key_list = self[agent_id].keys()
- if not self[agent_id].check_length(key_list):
- raise BufferException("The length of the fields {0} for agent {1} where not of same length"
- .format(key_list, agent_id))
- for field_key in key_list:
- self.update_buffer[field_key].extend(
- self[agent_id][field_key].get_batch(batch_size=batch_size, training_length=training_length)
- )
-
- def append_all_agent_batch_to_update_buffer(self, key_list=None, batch_size=None, training_length=None):
- """
- Appends the buffer of all agents to the update buffer.
- :param key_list: The fields that must be added. If None: all fields will be appended.
- :param batch_size: The number of elements that must be appended. If None: All of them will be.
- :param training_length: The length of the samples that must be appended. If None: only takes one element.
- """
- for agent_id in self.keys():
- self.append_update_buffer(agent_id, key_list, batch_size, training_length)
diff --git a/animalai_bkp/trainers/curriculum.py b/animalai_bkp/trainers/curriculum.py
deleted file mode 100644
index 3ec0859f..00000000
--- a/animalai_bkp/trainers/curriculum.py
+++ /dev/null
@@ -1,112 +0,0 @@
-import os
-import json
-import math
-
-from .exception import CurriculumError
-
-import logging
-
-logger = logging.getLogger('mlagents.trainers')
-
-
-class Curriculum(object):
- def __init__(self, location, default_reset_parameters):
- """
- Initializes a Curriculum object.
- :param location: Path to JSON defining curriculum.
- :param default_reset_parameters: Set of reset parameters for
- environment.
- """
- self.max_lesson_num = 0
- self.measure = None
- self._lesson_num = 0
- # The name of the brain should be the basename of the file without the
- # extension.
- self._brain_name = os.path.basename(location).split('.')[0]
-
- try:
- with open(location) as data_file:
- self.data = json.load(data_file)
- except IOError:
- raise CurriculumError(
- 'The file {0} could not be found.'.format(location))
- except UnicodeDecodeError:
- raise CurriculumError('There was an error decoding {}'
- .format(location))
- self.smoothing_value = 0
- for key in ['parameters', 'measure', 'thresholds',
- 'min_lesson_length', 'signal_smoothing']:
- if key not in self.data:
- raise CurriculumError("{0} does not contain a "
- "{1} field."
- .format(location, key))
- self.smoothing_value = 0
- self.measure = self.data['measure']
- self.min_lesson_length = self.data['min_lesson_length']
- self.max_lesson_num = len(self.data['thresholds'])
-
- parameters = self.data['parameters']
- for key in parameters:
- if key not in default_reset_parameters:
- raise CurriculumError(
- 'The parameter {0} in Curriculum {1} is not present in '
- 'the Environment'.format(key, location))
- if len(parameters[key]) != self.max_lesson_num + 1:
- raise CurriculumError(
- 'The parameter {0} in Curriculum {1} must have {2} values '
- 'but {3} were found'.format(key, location,
- self.max_lesson_num + 1,
- len(parameters[key])))
-
- @property
- def lesson_num(self):
- return self._lesson_num
-
- @lesson_num.setter
- def lesson_num(self, lesson_num):
- self._lesson_num = max(0, min(lesson_num, self.max_lesson_num))
-
- def increment_lesson(self, measure_val):
- """
- Increments the lesson number depending on the progress given.
- :param measure_val: Measure of progress (either reward or percentage
- steps completed).
- :return Whether the lesson was incremented.
- """
- if not self.data or not measure_val or math.isnan(measure_val):
- return False
- if self.data['signal_smoothing']:
- measure_val = self.smoothing_value * 0.25 + 0.75 * measure_val
- self.smoothing_value = measure_val
- if self.lesson_num < self.max_lesson_num:
- if measure_val > self.data['thresholds'][self.lesson_num]:
- self.lesson_num += 1
- config = {}
- parameters = self.data['parameters']
- for key in parameters:
- config[key] = parameters[key][self.lesson_num]
- logger.info('{0} lesson changed. Now in lesson {1}: {2}'
- .format(self._brain_name,
- self.lesson_num,
- ', '.join([str(x) + ' -> ' + str(config[x])
- for x in config])))
- return True
- return False
-
- def get_config(self, lesson=None):
- """
- Returns reset parameters which correspond to the lesson.
- :param lesson: The lesson you want to get the config of. If None, the
- current lesson is returned.
- :return: The configuration of the reset parameters.
- """
- if not self.data:
- return {}
- if lesson is None:
- lesson = self.lesson_num
- lesson = max(0, min(lesson, self.max_lesson_num))
- config = {}
- parameters = self.data['parameters']
- for key in parameters:
- config[key] = parameters[key][lesson]
- return config
diff --git a/animalai_bkp/trainers/demo_loader.py b/animalai_bkp/trainers/demo_loader.py
deleted file mode 100644
index 02c8f68b..00000000
--- a/animalai_bkp/trainers/demo_loader.py
+++ /dev/null
@@ -1,94 +0,0 @@
-import pathlib
-import logging
-import os
-from animalai.trainers.buffer import Buffer
-from animalai.envs.brain import BrainParameters, BrainInfo
-from animalai.communicator_objects import *
-from google.protobuf.internal.decoder import _DecodeVarint32
-
-logger = logging.getLogger("mlagents.trainers")
-
-
-def make_demo_buffer(brain_infos, brain_params, sequence_length):
- # Create and populate buffer using experiences
- demo_buffer = Buffer()
- for idx, experience in enumerate(brain_infos):
- if idx > len(brain_infos) - 2:
- break
- current_brain_info = brain_infos[idx]
- next_brain_info = brain_infos[idx + 1]
- demo_buffer[0].last_brain_info = current_brain_info
- demo_buffer[0]['done'].append(next_brain_info.local_done[0])
- demo_buffer[0]['rewards'].append(next_brain_info.rewards[0])
- for i in range(brain_params.number_visual_observations):
- demo_buffer[0]['visual_obs%d' % i] \
- .append(current_brain_info.visual_observations[i][0])
- if brain_params.vector_observation_space_size > 0:
- demo_buffer[0]['vector_obs'] \
- .append(current_brain_info.vector_observations[0])
- demo_buffer[0]['actions'].append(next_brain_info.previous_vector_actions[0])
- if next_brain_info.local_done[0]:
- demo_buffer.append_update_buffer(0, batch_size=None,
- training_length=sequence_length)
- demo_buffer.reset_local_buffers()
- demo_buffer.append_update_buffer(0, batch_size=None,
- training_length=sequence_length)
- return demo_buffer
-
-
-def demo_to_buffer(file_path, sequence_length):
- """
- Loads demonstration file and uses it to fill training buffer.
- :param file_path: Location of demonstration file (.demo).
- :param sequence_length: Length of trajectories to fill buffer.
- :return:
- """
- brain_params, brain_infos, _ = load_demonstration(file_path)
- demo_buffer = make_demo_buffer(brain_infos, brain_params, sequence_length)
- return brain_params, demo_buffer
-
-
-def load_demonstration(file_path):
- """
- Loads and parses a demonstration file.
- :param file_path: Location of demonstration file (.demo).
- :return: BrainParameter and list of BrainInfos containing demonstration data.
- """
-
- # First 32 bytes of file dedicated to meta-data.
- INITIAL_POS = 33
-
- if not os.path.isfile(file_path):
- raise FileNotFoundError("The demonstration file {} does not exist.".format(file_path))
- file_extension = pathlib.Path(file_path).suffix
- if file_extension != '.demo':
- raise ValueError("The file is not a '.demo' file. Please provide a file with the "
- "correct extension.")
-
- brain_params = None
- brain_infos = []
- data = open(file_path, "rb").read()
- next_pos, pos, obs_decoded = 0, 0, 0
- total_expected = 0
- while pos < len(data):
- next_pos, pos = _DecodeVarint32(data, pos)
- if obs_decoded == 0:
- meta_data_proto = DemonstrationMetaProto()
- meta_data_proto.ParseFromString(data[pos:pos + next_pos])
- total_expected = meta_data_proto.number_steps
- pos = INITIAL_POS
- if obs_decoded == 1:
- brain_param_proto = BrainParametersProto()
- brain_param_proto.ParseFromString(data[pos:pos + next_pos])
- brain_params = BrainParameters.from_proto(brain_param_proto)
- pos += next_pos
- if obs_decoded > 1:
- agent_info = AgentInfoProto()
- agent_info.ParseFromString(data[pos:pos + next_pos])
- brain_info = BrainInfo.from_agent_proto([agent_info], brain_params)
- brain_infos.append(brain_info)
- if len(brain_infos) == total_expected:
- break
- pos += next_pos
- obs_decoded += 1
- return brain_params, brain_infos, total_expected
diff --git a/animalai_bkp/trainers/exception.py b/animalai_bkp/trainers/exception.py
deleted file mode 100644
index b2f0a0db..00000000
--- a/animalai_bkp/trainers/exception.py
+++ /dev/null
@@ -1,20 +0,0 @@
-"""
-Contains exceptions for the trainers package.
-"""
-
-class TrainerError(Exception):
- """
- Any error related to the trainers in the ML-Agents Toolkit.
- """
- pass
-
-class CurriculumError(TrainerError):
- """
- Any error related to training with a curriculum.
- """
- pass
-
-class MetaCurriculumError(TrainerError):
- """
- Any error related to the configuration of a metacurriculum.
- """
diff --git a/animalai_bkp/trainers/learn.py b/animalai_bkp/trainers/learn.py
deleted file mode 100644
index 309e4f66..00000000
--- a/animalai_bkp/trainers/learn.py
+++ /dev/null
@@ -1,249 +0,0 @@
-# # Unity ML-Agents Toolkit
-
-import logging
-
-from multiprocessing import Process, Queue
-import os
-import glob
-import shutil
-import numpy as np
-import yaml
-from docopt import docopt
-from typing import Optional
-
-
-from animalai.trainers.trainer_controller import TrainerController
-from animalai.trainers.exception import TrainerError
-from animalai.trainers import MetaCurriculumError, MetaCurriculum
-from animalai.envs import UnityEnvironment
-from animalai.envs.exception import UnityEnvironmentException
-
-
-def run_training(sub_id: int, run_seed: int, run_options, process_queue):
- """
- Launches training session.
- :param process_queue: Queue used to send signal back to main.
- :param sub_id: Unique id for training session.
- :param run_seed: Random seed used for training.
- :param run_options: Command line arguments for training.
- """
- # Docker Parameters
- docker_target_name = (run_options['--docker-target-name']
- if run_options['--docker-target-name'] != 'None' else None)
-
- # General parameters
- env_path = (run_options['--env']
- if run_options['--env'] != 'None' else None)
- run_id = run_options['--run-id']
- load_model = run_options['--load']
- train_model = run_options['--train']
- save_freq = int(run_options['--save-freq'])
- keep_checkpoints = int(run_options['--keep-checkpoints'])
- worker_id = int(run_options['--worker-id'])
- curriculum_folder = (run_options['--curriculum']
- if run_options['--curriculum'] != 'None' else None)
- lesson = int(run_options['--lesson'])
- fast_simulation = not bool(run_options['--slow'])
- no_graphics = run_options['--no-graphics']
- trainer_config_path = run_options['']
-
- # Recognize and use docker volume if one is passed as an argument
- if not docker_target_name:
- model_path = './models/{run_id}'.format(run_id=run_id)
- summaries_dir = './summaries'
- else:
- trainer_config_path = \
- '/{docker_target_name}/{trainer_config_path}'.format(
- docker_target_name=docker_target_name,
- trainer_config_path=trainer_config_path)
- if curriculum_folder is not None:
- curriculum_folder = \
- '/{docker_target_name}/{curriculum_folder}'.format(
- docker_target_name=docker_target_name,
- curriculum_folder=curriculum_folder)
- model_path = '/{docker_target_name}/models/{run_id}'.format(
- docker_target_name=docker_target_name,
- run_id=run_id)
- summaries_dir = '/{docker_target_name}/summaries'.format(
- docker_target_name=docker_target_name)
-
- trainer_config = load_config(trainer_config_path)
- env = init_environment(env_path, docker_target_name, no_graphics, worker_id + sub_id, fast_simulation, run_seed)
- maybe_meta_curriculum = try_create_meta_curriculum(curriculum_folder, env)
-
- external_brains = {}
- for brain_name in env.external_brain_names:
- external_brains[brain_name] = env.brains[brain_name]
-
- # Create controller and begin training.
- tc = TrainerController(model_path, summaries_dir, run_id + '-' + str(sub_id),
- save_freq, maybe_meta_curriculum,
- load_model, train_model,
- keep_checkpoints, lesson, external_brains, run_seed)
-
- # Signal that environment has been launched.
- process_queue.put(True)
-
- # Begin training
- tc.start_learning(env, trainer_config)
-
-
-def try_create_meta_curriculum(curriculum_folder: Optional[str], env: UnityEnvironment) -> Optional[MetaCurriculum]:
- if curriculum_folder is None:
- return None
- else:
- meta_curriculum = MetaCurriculum(curriculum_folder, env._resetParameters)
- if meta_curriculum:
- for brain_name in meta_curriculum.brains_to_curriculums.keys():
- if brain_name not in env.external_brain_names:
- raise MetaCurriculumError('One of the curricula '
- 'defined in ' +
- curriculum_folder + ' '
- 'does not have a corresponding '
- 'Brain. Check that the '
- 'curriculum file has the same '
- 'name as the Brain '
- 'whose curriculum it defines.')
- return meta_curriculum
-
-
-def prepare_for_docker_run(docker_target_name, env_path):
- for f in glob.glob('/{docker_target_name}/*'.format(
- docker_target_name=docker_target_name)):
- if env_path in f:
- try:
- b = os.path.basename(f)
- if os.path.isdir(f):
- shutil.copytree(f,
- '/ml-agents/{b}'.format(b=b))
- else:
- src_f = '/{docker_target_name}/{b}'.format(
- docker_target_name=docker_target_name, b=b)
- dst_f = '/ml-agents/{b}'.format(b=b)
- shutil.copyfile(src_f, dst_f)
- os.chmod(dst_f, 0o775) # Make executable
- except Exception as e:
- logging.getLogger('mlagents.trainers').info(e)
- env_path = '/ml-agents/{env_path}'.format(env_path=env_path)
- return env_path
-
-
-def load_config(trainer_config_path):
- try:
- with open(trainer_config_path) as data_file:
- trainer_config = yaml.load(data_file)
- return trainer_config
- except IOError:
- raise UnityEnvironmentException('Parameter file could not be found '
- 'at {}.'
- .format(trainer_config_path))
- except UnicodeDecodeError:
- raise UnityEnvironmentException('There was an error decoding '
- 'Trainer Config from this path : {}'
- .format(trainer_config_path))
-
-
-def init_environment(env_path, docker_target_name, no_graphics, worker_id, fast_simulation, seed):
- if env_path is not None:
- # Strip out executable extensions if passed
- env_path = (env_path.strip()
- .replace('.app', '')
- .replace('.exe', '')
- .replace('.x86_64', '')
- .replace('.x86', ''))
- docker_training = docker_target_name is not None
- if docker_training and env_path is not None:
- """
- Comments for future maintenance:
- Some OS/VM instances (e.g. COS GCP Image) mount filesystems
- with COS flag which prevents execution of the Unity scene,
- to get around this, we will copy the executable into the
- container.
- """
- # Navigate in docker path and find env_path and copy it.
- env_path = prepare_for_docker_run(docker_target_name,
- env_path)
- return UnityEnvironment(
- file_name=env_path,
- worker_id=worker_id,
- seed=seed,
- docker_training=docker_training,
- no_graphics=no_graphics
- )
-
-
-def main():
- try:
- print('''
-
- ▄▄▄▓▓▓▓
- ╓▓▓▓▓▓▓█▓▓▓▓▓
- ,▄▄▄m▀▀▀' ,▓▓▓▀▓▓▄ ▓▓▓ ▓▓▌
- ▄▓▓▓▀' ▄▓▓▀ ▓▓▓ ▄▄ ▄▄ ,▄▄ ▄▄▄▄ ,▄▄ ▄▓▓▌▄ ▄▄▄ ,▄▄
- ▄▓▓▓▀ ▄▓▓▀ ▐▓▓▌ ▓▓▌ ▐▓▓ ▐▓▓▓▀▀▀▓▓▌ ▓▓▓ ▀▓▓▌▀ ^▓▓▌ ╒▓▓▌
- ▄▓▓▓▓▓▄▄▄▄▄▄▄▄▓▓▓ ▓▀ ▓▓▌ ▐▓▓ ▐▓▓ ▓▓▓ ▓▓▓ ▓▓▌ ▐▓▓▄ ▓▓▌
- ▀▓▓▓▓▀▀▀▀▀▀▀▀▀▀▓▓▄ ▓▓ ▓▓▌ ▐▓▓ ▐▓▓ ▓▓▓ ▓▓▓ ▓▓▌ ▐▓▓▐▓▓
- ^█▓▓▓ ▀▓▓▄ ▐▓▓▌ ▓▓▓▓▄▓▓▓▓ ▐▓▓ ▓▓▓ ▓▓▓ ▓▓▓▄ ▓▓▓▓`
- '▀▓▓▓▄ ^▓▓▓ ▓▓▓ └▀▀▀▀ ▀▀ ^▀▀ `▀▀ `▀▀ '▀▀ ▐▓▓▌
- ▀▀▀▀▓▄▄▄ ▓▓▓▓▓▓, ▓▓▓▓▀
- `▀█▓▓▓▓▓▓▓▓▓▌
- ¬`▀▀▀█▓
-
- ''')
- except:
- print('\n\n\tUnity Technologies\n')
-
- logger = logging.getLogger('mlagents.trainers')
- _USAGE = '''
- Usage:
- mlagents-learn [options]
- mlagents-learn --help
-
- Options:
- --env= Name of the Unity executable [default: None].
- --curriculum= Curriculum json directory for environment [default: None].
- --keep-checkpoints= How many model checkpoints to keep [default: 5].
- --lesson= Start learning from this lesson [default: 0].
- --load Whether to load the model or randomly initialize [default: False].
- --run-id= The directory name for model and summary statistics [default: ppo].
- --num-runs= Number of concurrent training sessions [default: 1].
- --save-freq= Frequency at which to save model [default: 50000].
- --seed= Random seed used for training [default: -1].
- --slow Whether to run the game at training speed [default: False].
- --train Whether to train model, or only run inference [default: False].
- --worker-id= Number to add to communication port (5005) [default: 0].
- --docker-target-name= Docker volume to store training-specific files [default: None].
- --no-graphics Whether to run the environment in no-graphics mode [default: False].
- '''
-
- options = docopt(_USAGE)
- logger.info(options)
- num_runs = int(options['--num-runs'])
- seed = int(options['--seed'])
-
- if options['--env'] == 'None' and num_runs > 1:
- raise TrainerError('It is not possible to launch more than one concurrent training session '
- 'when training from the editor.')
-
- jobs = []
- run_seed = seed
-
- if num_runs == 1:
- if seed == -1:
- run_seed = np.random.randint(0, 10000)
- run_training(0, run_seed, options, Queue())
- else:
- for i in range(num_runs):
- if seed == -1:
- run_seed = np.random.randint(0, 10000)
- process_queue = Queue()
- p = Process(target=run_training, args=(i, run_seed, options, process_queue))
- jobs.append(p)
- p.start()
- # Wait for signal that environment has successfully launched
- while process_queue.get() is not True:
- continue
-
-# For python debugger to directly run this script
-if __name__ == "__main__":
- main()
diff --git a/animalai_bkp/trainers/meta_curriculum.py b/animalai_bkp/trainers/meta_curriculum.py
deleted file mode 100644
index f71e91e3..00000000
--- a/animalai_bkp/trainers/meta_curriculum.py
+++ /dev/null
@@ -1,147 +0,0 @@
-"""Contains the MetaCurriculum class."""
-
-import os
-from animalai.trainers.curriculum import Curriculum
-from animalai.trainers.exception import MetaCurriculumError
-
-import logging
-
-logger = logging.getLogger('mlagents.trainers')
-
-
-class MetaCurriculum(object):
- """A MetaCurriculum holds curriculums. Each curriculum is associated to a
- particular brain in the environment.
- """
-
- def __init__(self, curriculum_folder, default_reset_parameters):
- """Initializes a MetaCurriculum object.
-
- Args:
- curriculum_folder (str): The relative or absolute path of the
- folder which holds the curriculums for this environment.
- The folder should contain JSON files whose names are the
- brains that the curriculums belong to.
- default_reset_parameters (dict): The default reset parameters
- of the environment.
- """
- used_reset_parameters = set()
- self._brains_to_curriculums = {}
-
- try:
- for curriculum_filename in os.listdir(curriculum_folder):
- brain_name = curriculum_filename.split('.')[0]
- curriculum_filepath = \
- os.path.join(curriculum_folder, curriculum_filename)
- curriculum = Curriculum(curriculum_filepath,
- default_reset_parameters)
-
- # Check if any two curriculums use the same reset params.
- if any([(parameter in curriculum.get_config().keys())
- for parameter in used_reset_parameters]):
- logger.warning('Two or more curriculums will '
- 'attempt to change the same reset '
- 'parameter. The result will be '
- 'non-deterministic.')
-
- used_reset_parameters.update(curriculum.get_config().keys())
- self._brains_to_curriculums[brain_name] = curriculum
- except NotADirectoryError:
- raise MetaCurriculumError(curriculum_folder + ' is not a '
- 'directory. Refer to the ML-Agents '
- 'curriculum learning docs.')
-
-
- @property
- def brains_to_curriculums(self):
- """A dict from brain_name to the brain's curriculum."""
- return self._brains_to_curriculums
-
- @property
- def lesson_nums(self):
- """A dict from brain name to the brain's curriculum's lesson number."""
- lesson_nums = {}
- for brain_name, curriculum in self.brains_to_curriculums.items():
- lesson_nums[brain_name] = curriculum.lesson_num
-
- return lesson_nums
-
- @lesson_nums.setter
- def lesson_nums(self, lesson_nums):
- for brain_name, lesson in lesson_nums.items():
- self.brains_to_curriculums[brain_name].lesson_num = lesson
-
- def _lesson_ready_to_increment(self, brain_name, reward_buff_size):
- """Determines whether the curriculum of a specified brain is ready
- to attempt an increment.
-
- Args:
- brain_name (str): The name of the brain whose curriculum will be
- checked for readiness.
- reward_buff_size (int): The size of the reward buffer of the trainer
- that corresponds to the specified brain.
-
- Returns:
- Whether the curriculum of the specified brain should attempt to
- increment its lesson.
- """
- return reward_buff_size >= (self.brains_to_curriculums[brain_name]
- .min_lesson_length)
-
- def increment_lessons(self, measure_vals, reward_buff_sizes=None):
- """Attempts to increments all the lessons of all the curriculums in this
- MetaCurriculum. Note that calling this method does not guarantee the
- lesson of a curriculum will increment. The lesson of a curriculum will
- only increment if the specified measure threshold defined in the
- curriculum has been reached and the minimum number of episodes in the
- lesson have been completed.
-
- Args:
- measure_vals (dict): A dict of brain name to measure value.
- reward_buff_sizes (dict): A dict of brain names to the size of their
- corresponding reward buffers.
-
- Returns:
- A dict from brain name to whether that brain's lesson number was
- incremented.
- """
- ret = {}
- if reward_buff_sizes:
- for brain_name, buff_size in reward_buff_sizes.items():
- if self._lesson_ready_to_increment(brain_name, buff_size):
- measure_val = measure_vals[brain_name]
- ret[brain_name] = (self.brains_to_curriculums[brain_name]
- .increment_lesson(measure_val))
- else:
- for brain_name, measure_val in measure_vals.items():
- ret[brain_name] = (self.brains_to_curriculums[brain_name]
- .increment_lesson(measure_val))
- return ret
-
-
- def set_all_curriculums_to_lesson_num(self, lesson_num):
- """Sets all the curriculums in this meta curriculum to a specified
- lesson number.
-
- Args:
- lesson_num (int): The lesson number which all the curriculums will
- be set to.
- """
- for _, curriculum in self.brains_to_curriculums.items():
- curriculum.lesson_num = lesson_num
-
-
- def get_config(self):
- """Get the combined configuration of all curriculums in this
- MetaCurriculum.
-
- Returns:
- A dict from parameter to value.
- """
- config = {}
-
- for _, curriculum in self.brains_to_curriculums.items():
- curr_config = curriculum.get_config()
- config.update(curr_config)
-
- return config
diff --git a/animalai_bkp/trainers/models.py b/animalai_bkp/trainers/models.py
deleted file mode 100644
index 66c6cd71..00000000
--- a/animalai_bkp/trainers/models.py
+++ /dev/null
@@ -1,380 +0,0 @@
-import logging
-
-import numpy as np
-import tensorflow as tf
-import tensorflow.contrib.layers as c_layers
-
-logger = logging.getLogger("mlagents.envs")
-
-
-class LearningModel(object):
- _version_number_ = 2
-
- def __init__(self, m_size, normalize, use_recurrent, brain, seed):
- tf.set_random_seed(seed)
- self.brain = brain
- self.vector_in = None
- self.global_step, self.increment_step = self.create_global_steps()
- self.visual_in = []
- self.batch_size = tf.placeholder(shape=None, dtype=tf.int32, name='batch_size')
- self.sequence_length = tf.placeholder(shape=None, dtype=tf.int32, name='sequence_length')
- self.mask_input = tf.placeholder(shape=[None], dtype=tf.float32, name='masks')
- self.mask = tf.cast(self.mask_input, tf.int32)
- self.use_recurrent = use_recurrent
- if self.use_recurrent:
- self.m_size = m_size
- else:
- self.m_size = 0
- self.normalize = normalize
- self.act_size = brain.vector_action_space_size
- self.vec_obs_size = brain.vector_observation_space_size * \
- brain.num_stacked_vector_observations
- self.vis_obs_size = brain.number_visual_observations
- tf.Variable(int(brain.vector_action_space_type == 'continuous'),
- name='is_continuous_control', trainable=False, dtype=tf.int32)
- tf.Variable(self._version_number_, name='version_number', trainable=False, dtype=tf.int32)
- tf.Variable(self.m_size, name="memory_size", trainable=False, dtype=tf.int32)
- if brain.vector_action_space_type == 'continuous':
- tf.Variable(self.act_size[0], name="action_output_shape", trainable=False, dtype=tf.int32)
- else:
- tf.Variable(sum(self.act_size), name="action_output_shape", trainable=False, dtype=tf.int32)
-
- @staticmethod
- def create_global_steps():
- """Creates TF ops to track and increment global training step."""
- global_step = tf.Variable(0, name="global_step", trainable=False, dtype=tf.int32)
- increment_step = tf.assign(global_step, tf.add(global_step, 1))
- return global_step, increment_step
-
- @staticmethod
- def swish(input_activation):
- """Swish activation function. For more info: https://arxiv.org/abs/1710.05941"""
- return tf.multiply(input_activation, tf.nn.sigmoid(input_activation))
-
- @staticmethod
- def create_visual_input(camera_parameters, name):
- """
- Creates image input op.
- :param camera_parameters: Parameters for visual observation from BrainInfo.
- :param name: Desired name of input op.
- :return: input op.
- """
- o_size_h = camera_parameters['height']
- o_size_w = camera_parameters['width']
- bw = camera_parameters['blackAndWhite']
-
- if bw:
- c_channels = 1
- else:
- c_channels = 3
-
- visual_in = tf.placeholder(shape=[None, o_size_h, o_size_w, c_channels], dtype=tf.float32,
- name=name)
- return visual_in
-
- def create_vector_input(self, name='vector_observation'):
- """
- Creates ops for vector observation input.
- :param name: Name of the placeholder op.
- :param vec_obs_size: Size of stacked vector observation.
- :return:
- """
- self.vector_in = tf.placeholder(shape=[None, self.vec_obs_size], dtype=tf.float32,
- name=name)
- if self.normalize:
- self.running_mean = tf.get_variable("running_mean", [self.vec_obs_size],
- trainable=False, dtype=tf.float32,
- initializer=tf.zeros_initializer())
- self.running_variance = tf.get_variable("running_variance", [self.vec_obs_size],
- trainable=False,
- dtype=tf.float32,
- initializer=tf.ones_initializer())
- self.update_mean, self.update_variance = self.create_normalizer_update(self.vector_in)
-
- self.normalized_state = tf.clip_by_value((self.vector_in - self.running_mean) / tf.sqrt(
- self.running_variance / (tf.cast(self.global_step, tf.float32) + 1)), -5, 5,
- name="normalized_state")
- return self.normalized_state
- else:
- return self.vector_in
-
- def create_normalizer_update(self, vector_input):
- mean_current_observation = tf.reduce_mean(vector_input, axis=0)
- new_mean = self.running_mean + (mean_current_observation - self.running_mean) / \
- tf.cast(tf.add(self.global_step, 1), tf.float32)
- new_variance = self.running_variance + (mean_current_observation - new_mean) * \
- (mean_current_observation - self.running_mean)
- update_mean = tf.assign(self.running_mean, new_mean)
- update_variance = tf.assign(self.running_variance, new_variance)
- return update_mean, update_variance
-
- @staticmethod
- def create_vector_observation_encoder(observation_input, h_size, activation, num_layers, scope,
- reuse):
- """
- Builds a set of hidden state encoders.
- :param reuse: Whether to re-use the weights within the same scope.
- :param scope: Graph scope for the encoder ops.
- :param observation_input: Input vector.
- :param h_size: Hidden layer size.
- :param activation: What type of activation function to use for layers.
- :param num_layers: number of hidden layers to create.
- :return: List of hidden layer tensors.
- """
- with tf.variable_scope(scope):
- hidden = observation_input
- for i in range(num_layers):
- hidden = tf.layers.dense(hidden, h_size, activation=activation, reuse=reuse,
- name="hidden_{}".format(i),
- kernel_initializer=c_layers.variance_scaling_initializer(
- 1.0))
- return hidden
-
- def create_visual_observation_encoder(self, image_input, h_size, activation, num_layers, scope,
- reuse):
- """
- Builds a set of visual (CNN) encoders.
- :param reuse: Whether to re-use the weights within the same scope.
- :param scope: The scope of the graph within which to create the ops.
- :param image_input: The placeholder for the image input to use.
- :param h_size: Hidden layer size.
- :param activation: What type of activation function to use for layers.
- :param num_layers: number of hidden layers to create.
- :return: List of hidden layer tensors.
- """
- with tf.variable_scope(scope):
- conv1 = tf.layers.conv2d(image_input, 16, kernel_size=[8, 8], strides=[4, 4],
- activation=tf.nn.elu, reuse=reuse, name="conv_1")
- conv2 = tf.layers.conv2d(conv1, 32, kernel_size=[4, 4], strides=[2, 2],
- activation=tf.nn.elu, reuse=reuse, name="conv_2")
- hidden = c_layers.flatten(conv2)
-
- with tf.variable_scope(scope + '/' + 'flat_encoding'):
- hidden_flat = self.create_vector_observation_encoder(hidden, h_size, activation,
- num_layers, scope, reuse)
- return hidden_flat
-
- @staticmethod
- def create_discrete_action_masking_layer(all_logits, action_masks, action_size):
- """
- Creates a masking layer for the discrete actions
- :param all_logits: The concatenated unnormalized action probabilities for all branches
- :param action_masks: The mask for the logits. Must be of dimension [None x total_number_of_action]
- :param action_size: A list containing the number of possible actions for each branch
- :return: The action output dimension [batch_size, num_branches] and the concatenated normalized logits
- """
- action_idx = [0] + list(np.cumsum(action_size))
- branches_logits = [all_logits[:, action_idx[i]:action_idx[i + 1]] for i in range(len(action_size))]
- branch_masks = [action_masks[:, action_idx[i]:action_idx[i + 1]] for i in range(len(action_size))]
- raw_probs = [tf.multiply(tf.nn.softmax(branches_logits[k]) + 1.0e-10, branch_masks[k])
- for k in range(len(action_size))]
- normalized_probs = [
- tf.divide(raw_probs[k], tf.reduce_sum(raw_probs[k], axis=1, keepdims=True))
- for k in range(len(action_size))]
- output = tf.concat([tf.multinomial(tf.log(normalized_probs[k]), 1) for k in range(len(action_size))], axis=1)
- return output, tf.concat([tf.log(normalized_probs[k] + 1.0e-10) for k in range(len(action_size))], axis=1)
-
- def create_observation_streams(self, num_streams, h_size, num_layers):
- """
- Creates encoding stream for observations.
- :param num_streams: Number of streams to create.
- :param h_size: Size of hidden linear layers in stream.
- :param num_layers: Number of hidden linear layers in stream.
- :return: List of encoded streams.
- """
- brain = self.brain
- activation_fn = self.swish
-
- self.visual_in = []
- for i in range(brain.number_visual_observations):
- visual_input = self.create_visual_input(brain.camera_resolutions[i],
- name="visual_observation_" + str(i))
- self.visual_in.append(visual_input)
- vector_observation_input = self.create_vector_input()
-
- final_hiddens = []
- for i in range(num_streams):
- visual_encoders = []
- hidden_state, hidden_visual = None, None
- if self.vis_obs_size > 0:
- for j in range(brain.number_visual_observations):
- encoded_visual = self.create_visual_observation_encoder(self.visual_in[j],
- h_size,
- activation_fn,
- num_layers,
- "main_graph_{}_encoder{}"
- .format(i, j), False)
- visual_encoders.append(encoded_visual)
- hidden_visual = tf.concat(visual_encoders, axis=1)
- if brain.vector_observation_space_size > 0:
- hidden_state = self.create_vector_observation_encoder(vector_observation_input,
- h_size, activation_fn,
- num_layers,
- "main_graph_{}".format(i),
- False)
- if hidden_state is not None and hidden_visual is not None:
- final_hidden = tf.concat([hidden_visual, hidden_state], axis=1)
- elif hidden_state is None and hidden_visual is not None:
- final_hidden = hidden_visual
- elif hidden_state is not None and hidden_visual is None:
- final_hidden = hidden_state
- else:
- raise Exception("No valid network configuration possible. "
- "There are no states or observations in this brain")
- final_hiddens.append(final_hidden)
- return final_hiddens
-
- @staticmethod
- def create_recurrent_encoder(input_state, memory_in, sequence_length, name='lstm'):
- """
- Builds a recurrent encoder for either state or observations (LSTM).
- :param sequence_length: Length of sequence to unroll.
- :param input_state: The input tensor to the LSTM cell.
- :param memory_in: The input memory to the LSTM cell.
- :param name: The scope of the LSTM cell.
- """
- s_size = input_state.get_shape().as_list()[1]
- m_size = memory_in.get_shape().as_list()[1]
- lstm_input_state = tf.reshape(input_state, shape=[-1, sequence_length, s_size])
- memory_in = tf.reshape(memory_in[:, :], [-1, m_size])
- _half_point = int(m_size / 2)
- with tf.variable_scope(name):
- rnn_cell = tf.contrib.rnn.BasicLSTMCell(_half_point)
- lstm_vector_in = tf.contrib.rnn.LSTMStateTuple(memory_in[:, :_half_point],
- memory_in[:, _half_point:])
- recurrent_output, lstm_state_out = tf.nn.dynamic_rnn(rnn_cell, lstm_input_state,
- initial_state=lstm_vector_in)
-
- recurrent_output = tf.reshape(recurrent_output, shape=[-1, _half_point])
- return recurrent_output, tf.concat([lstm_state_out.c, lstm_state_out.h], axis=1)
-
- def create_cc_actor_critic(self, h_size, num_layers):
- """
- Creates Continuous control actor-critic model.
- :param h_size: Size of hidden linear layers.
- :param num_layers: Number of hidden linear layers.
- """
- hidden_streams = self.create_observation_streams(2, h_size, num_layers)
-
- if self.use_recurrent:
- self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32,
- name='recurrent_in')
- _half_point = int(self.m_size / 2)
- hidden_policy, memory_policy_out = self.create_recurrent_encoder(
- hidden_streams[0], self.memory_in[:, :_half_point], self.sequence_length,
- name='lstm_policy')
-
- hidden_value, memory_value_out = self.create_recurrent_encoder(
- hidden_streams[1], self.memory_in[:, _half_point:], self.sequence_length,
- name='lstm_value')
- self.memory_out = tf.concat([memory_policy_out, memory_value_out], axis=1,
- name='recurrent_out')
- else:
- hidden_policy = hidden_streams[0]
- hidden_value = hidden_streams[1]
-
- mu = tf.layers.dense(hidden_policy, self.act_size[0], activation=None,
- kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01))
-
- log_sigma_sq = tf.get_variable("log_sigma_squared", [self.act_size[0]], dtype=tf.float32,
- initializer=tf.zeros_initializer())
-
- sigma_sq = tf.exp(log_sigma_sq)
-
- self.epsilon = tf.placeholder(shape=[None, self.act_size[0]], dtype=tf.float32, name='epsilon')
- # Clip and scale output to ensure actions are always within [-1, 1] range.
- self.output_pre = mu + tf.sqrt(sigma_sq) * self.epsilon
- output_post = tf.clip_by_value(self.output_pre, -3, 3) / 3
- self.output = tf.identity(output_post, name='action')
- self.selected_actions = tf.stop_gradient(output_post)
-
- # Compute probability of model output.
- all_probs = - 0.5 * tf.square(tf.stop_gradient(self.output_pre) - mu) / sigma_sq \
- - 0.5 * tf.log(2.0 * np.pi) - 0.5 * log_sigma_sq
-
- self.all_log_probs = tf.identity(all_probs, name='action_probs')
-
- self.entropy = 0.5 * tf.reduce_mean(tf.log(2 * np.pi * np.e) + log_sigma_sq)
-
- value = tf.layers.dense(hidden_value, 1, activation=None)
- self.value = tf.identity(value, name="value_estimate")
-
- self.all_old_log_probs = tf.placeholder(shape=[None, self.act_size[0]], dtype=tf.float32,
- name='old_probabilities')
-
- # We keep these tensors the same name, but use new nodes to keep code parallelism with discrete control.
- self.log_probs = tf.reduce_sum((tf.identity(self.all_log_probs)), axis=1, keepdims=True)
- self.old_log_probs = tf.reduce_sum((tf.identity(self.all_old_log_probs)), axis=1,
- keepdims=True)
-
- def create_dc_actor_critic(self, h_size, num_layers):
- """
- Creates Discrete control actor-critic model.
- :param h_size: Size of hidden linear layers.
- :param num_layers: Number of hidden linear layers.
- """
- hidden_streams = self.create_observation_streams(1, h_size, num_layers)
- hidden = hidden_streams[0]
-
- if self.use_recurrent:
- self.prev_action = tf.placeholder(shape=[None, len(self.act_size)], dtype=tf.int32,
- name='prev_action')
- prev_action_oh = tf.concat([
- tf.one_hot(self.prev_action[:, i], self.act_size[i]) for i in
- range(len(self.act_size))], axis=1)
- hidden = tf.concat([hidden, prev_action_oh], axis=1)
-
- self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32,
- name='recurrent_in')
- hidden, memory_out = self.create_recurrent_encoder(hidden, self.memory_in,
- self.sequence_length)
- self.memory_out = tf.identity(memory_out, name='recurrent_out')
-
- policy_branches = []
- for size in self.act_size:
- policy_branches.append(tf.layers.dense(hidden, size, activation=None, use_bias=False,
- kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01)))
-
- self.all_log_probs = tf.concat([branch for branch in policy_branches], axis=1, name="action_probs")
-
- self.action_masks = tf.placeholder(shape=[None, sum(self.act_size)], dtype=tf.float32, name="action_masks")
- output, normalized_logits = self.create_discrete_action_masking_layer(
- self.all_log_probs, self.action_masks, self.act_size)
-
- self.output = tf.identity(output)
- self.normalized_logits = tf.identity(normalized_logits, name='action')
-
- value = tf.layers.dense(hidden, 1, activation=None)
- self.value = tf.identity(value, name="value_estimate")
-
- self.action_holder = tf.placeholder(
- shape=[None, len(policy_branches)], dtype=tf.int32, name="action_holder")
- self.action_oh = tf.concat([
- tf.one_hot(self.action_holder[:, i], self.act_size[i]) for i in range(len(self.act_size))], axis=1)
- self.selected_actions = tf.stop_gradient(self.action_oh)
-
- self.all_old_log_probs = tf.placeholder(
- shape=[None, sum(self.act_size)], dtype=tf.float32, name='old_probabilities')
- _, old_normalized_logits = self.create_discrete_action_masking_layer(
- self.all_old_log_probs, self.action_masks, self.act_size)
-
- action_idx = [0] + list(np.cumsum(self.act_size))
-
- self.entropy = tf.reduce_sum((tf.stack([
- tf.nn.softmax_cross_entropy_with_logits_v2(
- labels=tf.nn.softmax(self.all_log_probs[:, action_idx[i]:action_idx[i + 1]]),
- logits=self.all_log_probs[:, action_idx[i]:action_idx[i + 1]])
- for i in range(len(self.act_size))], axis=1)), axis=1)
-
- self.log_probs = tf.reduce_sum((tf.stack([
- -tf.nn.softmax_cross_entropy_with_logits_v2(
- labels=self.action_oh[:, action_idx[i]:action_idx[i + 1]],
- logits=normalized_logits[:, action_idx[i]:action_idx[i + 1]]
- )
- for i in range(len(self.act_size))], axis=1)), axis=1, keepdims=True)
- self.old_log_probs = tf.reduce_sum((tf.stack([
- -tf.nn.softmax_cross_entropy_with_logits_v2(
- labels=self.action_oh[:, action_idx[i]:action_idx[i + 1]],
- logits=old_normalized_logits[:, action_idx[i]:action_idx[i + 1]]
- )
- for i in range(len(self.act_size))], axis=1)), axis=1, keepdims=True)
diff --git a/animalai_bkp/trainers/policy.py b/animalai_bkp/trainers/policy.py
deleted file mode 100644
index ad18c501..00000000
--- a/animalai_bkp/trainers/policy.py
+++ /dev/null
@@ -1,212 +0,0 @@
-import logging
-import numpy as np
-import tensorflow as tf
-
-from animalai.trainers import UnityException
-from tensorflow.python.tools import freeze_graph
-from animalai.trainers import tensorflow_to_barracuda as tf2bc
-
-logger = logging.getLogger("mlagents.trainers")
-
-
-class UnityPolicyException(UnityException):
- """
- Related to errors with the Trainer.
- """
- pass
-
-
-class Policy(object):
- """
- Contains a learning model, and the necessary
- functions to interact with it to perform evaluate and updating.
- """
- possible_output_nodes = ['action', 'value_estimate',
- 'action_probs', 'recurrent_out', 'memory_size',
- 'version_number', 'is_continuous_control',
- 'action_output_shape']
-
- def __init__(self, seed, brain, trainer_parameters):
- """
- Initialized the policy.
- :param seed: Random seed to use for TensorFlow.
- :param brain: The corresponding Brain for this policy.
- :param trainer_parameters: The trainer parameters.
- """
- self.m_size = None
- self.model = None
- self.inference_dict = {}
- self.update_dict = {}
- self.sequence_length = 1
- self.seed = seed
- self.brain = brain
- self.use_recurrent = trainer_parameters["use_recurrent"]
- self.use_continuous_act = (brain.vector_action_space_type == "continuous")
- self.model_path = trainer_parameters["model_path"]
- self.keep_checkpoints = trainer_parameters.get("keep_checkpoints", 5)
- self.graph = tf.Graph()
- config = tf.ConfigProto()
- config.gpu_options.allow_growth = True
- self.sess = tf.Session(config=config, graph=self.graph)
- self.saver = None
- if self.use_recurrent:
- self.m_size = trainer_parameters["memory_size"]
- self.sequence_length = trainer_parameters["sequence_length"]
- if self.m_size == 0:
- raise UnityPolicyException("The memory size for brain {0} is 0 even "
- "though the trainer uses recurrent."
- .format(brain.brain_name))
- elif self.m_size % 4 != 0:
- raise UnityPolicyException("The memory size for brain {0} is {1} "
- "but it must be divisible by 4."
- .format(brain.brain_name, self.m_size))
-
- def _initialize_graph(self):
- with self.graph.as_default():
- self.saver = tf.train.Saver(max_to_keep=self.keep_checkpoints)
- init = tf.global_variables_initializer()
- self.sess.run(init)
-
- def _load_graph(self):
- with self.graph.as_default():
- self.saver = tf.train.Saver(max_to_keep=self.keep_checkpoints)
- logger.info('Loading Model for brain {}'.format(self.brain.brain_name))
- ckpt = tf.train.get_checkpoint_state(self.model_path)
- if ckpt is None:
- logger.info('The model {0} could not be found. Make '
- 'sure you specified the right '
- '--run-id'
- .format(self.model_path))
- self.saver.restore(self.sess, ckpt.model_checkpoint_path)
-
- def evaluate(self, brain_info):
- """
- Evaluates policy for the agent experiences provided.
- :param brain_info: BrainInfo input to network.
- :return: Output from policy based on self.inference_dict.
- """
- raise UnityPolicyException("The evaluate function was not implemented.")
-
- def update(self, mini_batch, num_sequences):
- """
- Performs update of the policy.
- :param num_sequences: Number of experience trajectories in batch.
- :param mini_batch: Batch of experiences.
- :return: Results of update.
- """
- raise UnityPolicyException("The update function was not implemented.")
-
- def _execute_model(self, feed_dict, out_dict):
- """
- Executes model.
- :param feed_dict: Input dictionary mapping nodes to input data.
- :param out_dict: Output dictionary mapping names to nodes.
- :return: Dictionary mapping names to input data.
- """
- network_out = self.sess.run(list(out_dict.values()), feed_dict=feed_dict)
- run_out = dict(zip(list(out_dict.keys()), network_out))
- return run_out
-
- def _fill_eval_dict(self, feed_dict, brain_info):
- for i, _ in enumerate(brain_info.visual_observations):
- feed_dict[self.model.visual_in[i]] = brain_info.visual_observations[i]
- if self.use_vec_obs:
- feed_dict[self.model.vector_in] = brain_info.vector_observations
- if not self.use_continuous_act:
- feed_dict[self.model.action_masks] = brain_info.action_masks
- return feed_dict
-
- def make_empty_memory(self, num_agents):
- """
- Creates empty memory for use with RNNs
- :param num_agents: Number of agents.
- :return: Numpy array of zeros.
- """
- return np.zeros((num_agents, self.m_size))
-
- def get_current_step(self):
- """
- Gets current model step.
- :return: current model step.
- """
- step = self.sess.run(self.model.global_step)
- return step
-
- def increment_step(self):
- """
- Increments model step.
- """
- self.sess.run(self.model.increment_step)
-
- def get_inference_vars(self):
- """
- :return:list of inference var names
- """
- return list(self.inference_dict.keys())
-
- def get_update_vars(self):
- """
- :return:list of update var names
- """
- return list(self.update_dict.keys())
-
- def save_model(self, steps):
- """
- Saves the model
- :param steps: The number of steps the model was trained for
- :return:
- """
- with self.graph.as_default():
- last_checkpoint = self.model_path + '/model-' + str(steps) + '.cptk'
- self.saver.save(self.sess, last_checkpoint)
- tf.train.write_graph(self.graph, self.model_path,
- 'raw_graph_def.pb', as_text=False)
-
- def export_model(self):
- """
- Exports latest saved model to .nn format for Unity embedding.
- """
-
- with self.graph.as_default():
- target_nodes = ','.join(self._process_graph())
- ckpt = tf.train.get_checkpoint_state(self.model_path)
- freeze_graph.freeze_graph(
- input_graph=self.model_path + '/raw_graph_def.pb',
- input_binary=True,
- input_checkpoint=ckpt.model_checkpoint_path,
- output_node_names=target_nodes,
- output_graph=(self.model_path + '/frozen_graph_def.pb'),
- clear_devices=True, initializer_nodes='', input_saver='',
- restore_op_name='save/restore_all',
- filename_tensor_name='save/Const:0')
-
- tf2bc.convert(self.model_path + '/frozen_graph_def.pb', self.model_path + '.nn')
- logger.info('Exported ' + self.model_path + '.nn file')
-
- def _process_graph(self):
- """
- Gets the list of the output nodes present in the graph for inference
- :return: list of node names
- """
- all_nodes = [x.name for x in self.graph.as_graph_def().node]
- nodes = [x for x in all_nodes if x in self.possible_output_nodes]
- logger.info('List of nodes to export for brain :' + self.brain.brain_name)
- for n in nodes:
- logger.info('\t' + n)
- return nodes
-
- @property
- def vis_obs_size(self):
- return self.model.vis_obs_size
-
- @property
- def vec_obs_size(self):
- return self.model.vec_obs_size
-
- @property
- def use_vis_obs(self):
- return self.model.vis_obs_size > 0
-
- @property
- def use_vec_obs(self):
- return self.model.vec_obs_size > 0
diff --git a/animalai_bkp/trainers/ppo/__init__.py b/animalai_bkp/trainers/ppo/__init__.py
deleted file mode 100644
index d48a8ccf..00000000
--- a/animalai_bkp/trainers/ppo/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from .models import *
-from .trainer import *
-from .policy import *
diff --git a/animalai_bkp/trainers/ppo/models.py b/animalai_bkp/trainers/ppo/models.py
deleted file mode 100644
index cb1546cd..00000000
--- a/animalai_bkp/trainers/ppo/models.py
+++ /dev/null
@@ -1,195 +0,0 @@
-import logging
-import numpy as np
-
-import tensorflow as tf
-from animalai.trainers.models import LearningModel
-
-logger = logging.getLogger("mlagents.envs")
-
-
-class PPOModel(LearningModel):
- def __init__(self, brain, lr=1e-4, h_size=128, epsilon=0.2, beta=1e-3, max_step=5e6,
- normalize=False, use_recurrent=False, num_layers=2, m_size=None, use_curiosity=False,
- curiosity_strength=0.01, curiosity_enc_size=128, seed=0):
- """
- Takes a Unity environment and model-specific hyper-parameters and returns the
- appropriate PPO agent model for the environment.
- :param brain: BrainInfo used to generate specific network graph.
- :param lr: Learning rate.
- :param h_size: Size of hidden layers
- :param epsilon: Value for policy-divergence threshold.
- :param beta: Strength of entropy regularization.
- :return: a sub-class of PPOAgent tailored to the environment.
- :param max_step: Total number of training steps.
- :param normalize: Whether to normalize vector observation input.
- :param use_recurrent: Whether to use an LSTM layer in the network.
- :param num_layers Number of hidden layers between encoded input and policy & value layers
- :param m_size: Size of brain memory.
- """
- LearningModel.__init__(self, m_size, normalize, use_recurrent, brain, seed)
- self.use_curiosity = use_curiosity
- if num_layers < 1:
- num_layers = 1
- self.last_reward, self.new_reward, self.update_reward = self.create_reward_encoder()
- if brain.vector_action_space_type == "continuous":
- self.create_cc_actor_critic(h_size, num_layers)
- self.entropy = tf.ones_like(tf.reshape(self.value, [-1])) * self.entropy
- else:
- self.create_dc_actor_critic(h_size, num_layers)
- if self.use_curiosity:
- self.curiosity_enc_size = curiosity_enc_size
- self.curiosity_strength = curiosity_strength
- encoded_state, encoded_next_state = self.create_curiosity_encoders()
- self.create_inverse_model(encoded_state, encoded_next_state)
- self.create_forward_model(encoded_state, encoded_next_state)
- self.create_ppo_optimizer(self.log_probs, self.old_log_probs, self.value,
- self.entropy, beta, epsilon, lr, max_step)
-
- @staticmethod
- def create_reward_encoder():
- """Creates TF ops to track and increment recent average cumulative reward."""
- last_reward = tf.Variable(0, name="last_reward", trainable=False, dtype=tf.float32)
- new_reward = tf.placeholder(shape=[], dtype=tf.float32, name='new_reward')
- update_reward = tf.assign(last_reward, new_reward)
- return last_reward, new_reward, update_reward
-
- def create_curiosity_encoders(self):
- """
- Creates state encoders for current and future observations.
- Used for implementation of Curiosity-driven Exploration by Self-supervised Prediction
- See https://arxiv.org/abs/1705.05363 for more details.
- :return: current and future state encoder tensors.
- """
- encoded_state_list = []
- encoded_next_state_list = []
-
- if self.vis_obs_size > 0:
- self.next_visual_in = []
- visual_encoders = []
- next_visual_encoders = []
- for i in range(self.vis_obs_size):
- # Create input ops for next (t+1) visual observations.
- next_visual_input = self.create_visual_input(self.brain.camera_resolutions[i],
- name="next_visual_observation_" + str(i))
- self.next_visual_in.append(next_visual_input)
-
- # Create the encoder ops for current and next visual input. Not that these encoders are siamese.
- encoded_visual = self.create_visual_observation_encoder(self.visual_in[i], self.curiosity_enc_size,
- self.swish, 1, "stream_{}_visual_obs_encoder"
- .format(i), False)
-
- encoded_next_visual = self.create_visual_observation_encoder(self.next_visual_in[i],
- self.curiosity_enc_size,
- self.swish, 1,
- "stream_{}_visual_obs_encoder".format(i),
- True)
- visual_encoders.append(encoded_visual)
- next_visual_encoders.append(encoded_next_visual)
-
- hidden_visual = tf.concat(visual_encoders, axis=1)
- hidden_next_visual = tf.concat(next_visual_encoders, axis=1)
- encoded_state_list.append(hidden_visual)
- encoded_next_state_list.append(hidden_next_visual)
-
- if self.vec_obs_size > 0:
- # Create the encoder ops for current and next vector input. Not that these encoders are siamese.
- # Create input op for next (t+1) vector observation.
- self.next_vector_in = tf.placeholder(shape=[None, self.vec_obs_size], dtype=tf.float32,
- name='next_vector_observation')
-
- encoded_vector_obs = self.create_vector_observation_encoder(self.vector_in,
- self.curiosity_enc_size,
- self.swish, 2, "vector_obs_encoder",
- False)
- encoded_next_vector_obs = self.create_vector_observation_encoder(self.next_vector_in,
- self.curiosity_enc_size,
- self.swish, 2,
- "vector_obs_encoder",
- True)
- encoded_state_list.append(encoded_vector_obs)
- encoded_next_state_list.append(encoded_next_vector_obs)
-
- encoded_state = tf.concat(encoded_state_list, axis=1)
- encoded_next_state = tf.concat(encoded_next_state_list, axis=1)
- return encoded_state, encoded_next_state
-
- def create_inverse_model(self, encoded_state, encoded_next_state):
- """
- Creates inverse model TensorFlow ops for Curiosity module.
- Predicts action taken given current and future encoded states.
- :param encoded_state: Tensor corresponding to encoded current state.
- :param encoded_next_state: Tensor corresponding to encoded next state.
- """
- combined_input = tf.concat([encoded_state, encoded_next_state], axis=1)
- hidden = tf.layers.dense(combined_input, 256, activation=self.swish)
- if self.brain.vector_action_space_type == "continuous":
- pred_action = tf.layers.dense(hidden, self.act_size[0], activation=None)
- squared_difference = tf.reduce_sum(tf.squared_difference(pred_action, self.selected_actions), axis=1)
- self.inverse_loss = tf.reduce_mean(tf.dynamic_partition(squared_difference, self.mask, 2)[1])
- else:
- pred_action = tf.concat(
- [tf.layers.dense(hidden, self.act_size[i], activation=tf.nn.softmax)
- for i in range(len(self.act_size))], axis=1)
- cross_entropy = tf.reduce_sum(-tf.log(pred_action + 1e-10) * self.selected_actions, axis=1)
- self.inverse_loss = tf.reduce_mean(tf.dynamic_partition(cross_entropy, self.mask, 2)[1])
-
- def create_forward_model(self, encoded_state, encoded_next_state):
- """
- Creates forward model TensorFlow ops for Curiosity module.
- Predicts encoded future state based on encoded current state and given action.
- :param encoded_state: Tensor corresponding to encoded current state.
- :param encoded_next_state: Tensor corresponding to encoded next state.
- """
- combined_input = tf.concat([encoded_state, self.selected_actions], axis=1)
- hidden = tf.layers.dense(combined_input, 256, activation=self.swish)
- # We compare against the concatenation of all observation streams, hence `self.vis_obs_size + int(self.vec_obs_size > 0)`.
- pred_next_state = tf.layers.dense(hidden, self.curiosity_enc_size * (self.vis_obs_size + int(self.vec_obs_size > 0)),
- activation=None)
-
- squared_difference = 0.5 * tf.reduce_sum(tf.squared_difference(pred_next_state, encoded_next_state), axis=1)
- self.intrinsic_reward = tf.clip_by_value(self.curiosity_strength * squared_difference, 0, 1)
- self.forward_loss = tf.reduce_mean(tf.dynamic_partition(squared_difference, self.mask, 2)[1])
-
- def create_ppo_optimizer(self, probs, old_probs, value, entropy, beta, epsilon, lr, max_step):
- """
- Creates training-specific Tensorflow ops for PPO models.
- :param probs: Current policy probabilities
- :param old_probs: Past policy probabilities
- :param value: Current value estimate
- :param beta: Entropy regularization strength
- :param entropy: Current policy entropy
- :param epsilon: Value for policy-divergence threshold
- :param lr: Learning rate
- :param max_step: Total number of training steps.
- """
- self.returns_holder = tf.placeholder(shape=[None], dtype=tf.float32, name='discounted_rewards')
- self.advantage = tf.placeholder(shape=[None, 1], dtype=tf.float32, name='advantages')
- self.learning_rate = tf.train.polynomial_decay(lr, self.global_step, max_step, 1e-10, power=1.0)
-
- self.old_value = tf.placeholder(shape=[None], dtype=tf.float32, name='old_value_estimates')
-
- decay_epsilon = tf.train.polynomial_decay(epsilon, self.global_step, max_step, 0.1, power=1.0)
- decay_beta = tf.train.polynomial_decay(beta, self.global_step, max_step, 1e-5, power=1.0)
- optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
-
- clipped_value_estimate = self.old_value + tf.clip_by_value(tf.reduce_sum(value, axis=1) - self.old_value,
- - decay_epsilon, decay_epsilon)
-
- v_opt_a = tf.squared_difference(self.returns_holder, tf.reduce_sum(value, axis=1))
- v_opt_b = tf.squared_difference(self.returns_holder, clipped_value_estimate)
- self.value_loss = tf.reduce_mean(tf.dynamic_partition(tf.maximum(v_opt_a, v_opt_b), self.mask, 2)[1])
-
- # Here we calculate PPO policy loss. In continuous control this is done independently for each action gaussian
- # and then averaged together. This provides significantly better performance than treating the probability
- # as an average of probabilities, or as a joint probability.
- r_theta = tf.exp(probs - old_probs)
- p_opt_a = r_theta * self.advantage
- p_opt_b = tf.clip_by_value(r_theta, 1.0 - decay_epsilon, 1.0 + decay_epsilon) * self.advantage
- self.policy_loss = -tf.reduce_mean(tf.dynamic_partition(tf.minimum(p_opt_a, p_opt_b), self.mask, 2)[1])
-
- self.loss = self.policy_loss + 0.5 * self.value_loss - decay_beta * tf.reduce_mean(
- tf.dynamic_partition(entropy, self.mask, 2)[1])
-
- if self.use_curiosity:
- self.loss += 10 * (0.2 * self.forward_loss + 0.8 * self.inverse_loss)
- self.update_batch = optimizer.minimize(self.loss)
diff --git a/animalai_bkp/trainers/ppo/policy.py b/animalai_bkp/trainers/ppo/policy.py
deleted file mode 100644
index 925043e4..00000000
--- a/animalai_bkp/trainers/ppo/policy.py
+++ /dev/null
@@ -1,214 +0,0 @@
-import logging
-import numpy as np
-
-from animalai.trainers.ppo.models import PPOModel
-from animalai.trainers.policy import Policy
-
-logger = logging.getLogger("mlagents.trainers")
-
-
-class PPOPolicy(Policy):
- def __init__(self, seed, brain, trainer_params, is_training, load):
- """
- Policy for Proximal Policy Optimization Networks.
- :param seed: Random seed.
- :param brain: Assigned Brain object.
- :param trainer_params: Defined training parameters.
- :param is_training: Whether the model should be trained.
- :param load: Whether a pre-trained model will be loaded or a new one created.
- """
- super().__init__(seed, brain, trainer_params)
- self.has_updated = False
- self.use_curiosity = bool(trainer_params['use_curiosity'])
-
- with self.graph.as_default():
- self.model = PPOModel(brain,
- lr=float(trainer_params['learning_rate']),
- h_size=int(trainer_params['hidden_units']),
- epsilon=float(trainer_params['epsilon']),
- beta=float(trainer_params['beta']),
- max_step=float(trainer_params['max_steps']),
- normalize=trainer_params['normalize'],
- use_recurrent=trainer_params['use_recurrent'],
- num_layers=int(trainer_params['num_layers']),
- m_size=self.m_size,
- use_curiosity=bool(trainer_params['use_curiosity']),
- curiosity_strength=float(trainer_params['curiosity_strength']),
- curiosity_enc_size=float(trainer_params['curiosity_enc_size']),
- seed=seed)
-
- if load:
- self._load_graph()
- else:
- self._initialize_graph()
-
- self.inference_dict = {'action': self.model.output, 'log_probs': self.model.all_log_probs,
- 'value': self.model.value, 'entropy': self.model.entropy,
- 'learning_rate': self.model.learning_rate}
- if self.use_continuous_act:
- self.inference_dict['pre_action'] = self.model.output_pre
- if self.use_recurrent:
- self.inference_dict['memory_out'] = self.model.memory_out
- if is_training and self.use_vec_obs and trainer_params['normalize']:
- self.inference_dict['update_mean'] = self.model.update_mean
- self.inference_dict['update_variance'] = self.model.update_variance
-
- self.update_dict = {'value_loss': self.model.value_loss,
- 'policy_loss': self.model.policy_loss,
- 'update_batch': self.model.update_batch}
- if self.use_curiosity:
- self.update_dict['forward_loss'] = self.model.forward_loss
- self.update_dict['inverse_loss'] = self.model.inverse_loss
-
- def evaluate(self, brain_info):
- """
- Evaluates policy for the agent experiences provided.
- :param brain_info: BrainInfo object containing inputs.
- :return: Outputs from network as defined by self.inference_dict.
- """
- feed_dict = {self.model.batch_size: len(brain_info.vector_observations),
- self.model.sequence_length: 1}
- epsilon = None
- if self.use_recurrent:
- if not self.use_continuous_act:
- feed_dict[self.model.prev_action] = brain_info.previous_vector_actions.reshape(
- [-1, len(self.model.act_size)])
- if brain_info.memories.shape[1] == 0:
- brain_info.memories = self.make_empty_memory(len(brain_info.agents))
- feed_dict[self.model.memory_in] = brain_info.memories
- if self.use_continuous_act:
- epsilon = np.random.normal(
- size=(len(brain_info.vector_observations), self.model.act_size[0]))
- feed_dict[self.model.epsilon] = epsilon
- feed_dict = self._fill_eval_dict(feed_dict, brain_info)
- run_out = self._execute_model(feed_dict, self.inference_dict)
- if self.use_continuous_act:
- run_out['random_normal_epsilon'] = epsilon
- return run_out
-
- def update(self, mini_batch, num_sequences):
- """
- Updates model using buffer.
- :param num_sequences: Number of trajectories in batch.
- :param mini_batch: Experience batch.
- :return: Output from update process.
- """
- feed_dict = {self.model.batch_size: num_sequences,
- self.model.sequence_length: self.sequence_length,
- self.model.mask_input: mini_batch['masks'].flatten(),
- self.model.returns_holder: mini_batch['discounted_returns'].flatten(),
- self.model.old_value: mini_batch['value_estimates'].flatten(),
- self.model.advantage: mini_batch['advantages'].reshape([-1, 1]),
- self.model.all_old_log_probs: mini_batch['action_probs'].reshape(
- [-1, sum(self.model.act_size)])}
- if self.use_continuous_act:
- feed_dict[self.model.output_pre] = mini_batch['actions_pre'].reshape(
- [-1, self.model.act_size[0]])
- feed_dict[self.model.epsilon] = mini_batch['random_normal_epsilon'].reshape(
- [-1, self.model.act_size[0]])
- else:
- feed_dict[self.model.action_holder] = mini_batch['actions'].reshape(
- [-1, len(self.model.act_size)])
- if self.use_recurrent:
- feed_dict[self.model.prev_action] = mini_batch['prev_action'].reshape(
- [-1, len(self.model.act_size)])
- feed_dict[self.model.action_masks] = mini_batch['action_mask'].reshape(
- [-1, sum(self.brain.vector_action_space_size)])
- if self.use_vec_obs:
- feed_dict[self.model.vector_in] = mini_batch['vector_obs'].reshape(
- [-1, self.vec_obs_size])
- if self.use_curiosity:
- feed_dict[self.model.next_vector_in] = mini_batch['next_vector_in'].reshape(
- [-1, self.vec_obs_size])
- if self.model.vis_obs_size > 0:
- for i, _ in enumerate(self.model.visual_in):
- _obs = mini_batch['visual_obs%d' % i]
- if self.sequence_length > 1 and self.use_recurrent:
- (_batch, _seq, _w, _h, _c) = _obs.shape
- feed_dict[self.model.visual_in[i]] = _obs.reshape([-1, _w, _h, _c])
- else:
- feed_dict[self.model.visual_in[i]] = _obs
- if self.use_curiosity:
- for i, _ in enumerate(self.model.visual_in):
- _obs = mini_batch['next_visual_obs%d' % i]
- if self.sequence_length > 1 and self.use_recurrent:
- (_batch, _seq, _w, _h, _c) = _obs.shape
- feed_dict[self.model.next_visual_in[i]] = _obs.reshape([-1, _w, _h, _c])
- else:
- feed_dict[self.model.next_visual_in[i]] = _obs
- if self.use_recurrent:
- mem_in = mini_batch['memory'][:, 0, :]
- feed_dict[self.model.memory_in] = mem_in
- self.has_updated = True
- run_out = self._execute_model(feed_dict, self.update_dict)
- return run_out
-
- def get_intrinsic_rewards(self, curr_info, next_info):
- """
- Generates intrinsic reward used for Curiosity-based training.
- :BrainInfo curr_info: Current BrainInfo.
- :BrainInfo next_info: Next BrainInfo.
- :return: Intrinsic rewards for all agents.
- """
- if self.use_curiosity:
- if len(curr_info.agents) == 0:
- return []
-
- feed_dict = {self.model.batch_size: len(next_info.vector_observations),
- self.model.sequence_length: 1}
- if self.use_continuous_act:
- feed_dict[self.model.selected_actions] = next_info.previous_vector_actions
- else:
- feed_dict[self.model.action_holder] = next_info.previous_vector_actions
- for i in range(self.model.vis_obs_size):
- feed_dict[self.model.visual_in[i]] = curr_info.visual_observations[i]
- feed_dict[self.model.next_visual_in[i]] = next_info.visual_observations[i]
- if self.use_vec_obs:
- feed_dict[self.model.vector_in] = curr_info.vector_observations
- feed_dict[self.model.next_vector_in] = next_info.vector_observations
- if self.use_recurrent:
- if curr_info.memories.shape[1] == 0:
- curr_info.memories = self.make_empty_memory(len(curr_info.agents))
- feed_dict[self.model.memory_in] = curr_info.memories
- intrinsic_rewards = self.sess.run(self.model.intrinsic_reward,
- feed_dict=feed_dict) * float(self.has_updated)
- return intrinsic_rewards
- else:
- return None
-
- def get_value_estimate(self, brain_info, idx):
- """
- Generates value estimates for bootstrapping.
- :param brain_info: BrainInfo to be used for bootstrapping.
- :param idx: Index in BrainInfo of agent.
- :return: Value estimate.
- """
- feed_dict = {self.model.batch_size: 1, self.model.sequence_length: 1}
- for i in range(len(brain_info.visual_observations)):
- feed_dict[self.model.visual_in[i]] = [brain_info.visual_observations[i][idx]]
- if self.use_vec_obs:
- feed_dict[self.model.vector_in] = [brain_info.vector_observations[idx]]
- if self.use_recurrent:
- if brain_info.memories.shape[1] == 0:
- brain_info.memories = self.make_empty_memory(len(brain_info.agents))
- feed_dict[self.model.memory_in] = [brain_info.memories[idx]]
- if not self.use_continuous_act and self.use_recurrent:
- feed_dict[self.model.prev_action] = brain_info.previous_vector_actions[idx].reshape(
- [-1, len(self.model.act_size)])
- value_estimate = self.sess.run(self.model.value, feed_dict)
- return value_estimate
-
- def get_last_reward(self):
- """
- Returns the last reward the trainer has had
- :return: the new last reward
- """
- return self.sess.run(self.model.last_reward)
-
- def update_reward(self, new_reward):
- """
- Updates reward value for policy.
- :param new_reward: New reward to save.
- """
- self.sess.run(self.model.update_reward,
- feed_dict={self.model.new_reward: new_reward})
diff --git a/animalai_bkp/trainers/ppo/trainer.py b/animalai_bkp/trainers/ppo/trainer.py
deleted file mode 100644
index f5b4b422..00000000
--- a/animalai_bkp/trainers/ppo/trainer.py
+++ /dev/null
@@ -1,386 +0,0 @@
-# # Unity ML-Agents Toolkit
-# ## ML-Agent Learning (PPO)
-# Contains an implementation of PPO as described (https://arxiv.org/abs/1707.06347).
-
-import logging
-import os
-from collections import deque
-
-import numpy as np
-import tensorflow as tf
-
-from animalai.envs import AllBrainInfo, BrainInfo
-from animalai.trainers.buffer import Buffer
-from animalai.trainers.ppo.policy import PPOPolicy
-from animalai.trainers.trainer import Trainer
-
-logger = logging.getLogger("mlagents.trainers")
-
-
-class PPOTrainer(Trainer):
- """The PPOTrainer is an implementation of the PPO algorithm."""
-
- def __init__(self, brain, reward_buff_cap, trainer_parameters, training, load, seed, run_id):
- """
- Responsible for collecting experiences and training PPO model.
- :param trainer_parameters: The parameters for the trainer (dictionary).
- :param training: Whether the trainer is set for training.
- :param load: Whether the model should be loaded.
- :param seed: The seed the model will be initialized with
- :param run_id: The The identifier of the current run
- """
- super(PPOTrainer, self).__init__(brain, trainer_parameters, training, run_id)
- self.param_keys = ['batch_size', 'beta', 'buffer_size', 'epsilon', 'gamma', 'hidden_units', 'lambd',
- 'learning_rate', 'max_steps', 'normalize', 'num_epoch', 'num_layers',
- 'time_horizon', 'sequence_length', 'summary_freq', 'use_recurrent',
- 'summary_path', 'memory_size', 'use_curiosity', 'curiosity_strength',
- 'curiosity_enc_size', 'model_path']
-
- self.check_param_keys()
- self.use_curiosity = bool(trainer_parameters['use_curiosity'])
- self.step = 0
- self.policy = PPOPolicy(seed, brain, trainer_parameters,
- self.is_training, load)
-
- stats = {'Environment/Cumulative Reward': [], 'Environment/Episode Length': [],
- 'Policy/Value Estimate': [], 'Policy/Entropy': [], 'Losses/Value Loss': [],
- 'Losses/Policy Loss': [], 'Policy/Learning Rate': []}
- if self.use_curiosity:
- stats['Losses/Forward Loss'] = []
- stats['Losses/Inverse Loss'] = []
- stats['Policy/Curiosity Reward'] = []
- self.intrinsic_rewards = {}
- self.stats = stats
-
- self.training_buffer = Buffer()
- self.cumulative_rewards = {}
- self._reward_buffer = deque(maxlen=reward_buff_cap)
- self.episode_steps = {}
- self.summary_path = trainer_parameters['summary_path']
- if not os.path.exists(self.summary_path):
- os.makedirs(self.summary_path)
-
- self.summary_writer = tf.summary.FileWriter(self.summary_path)
-
- def __str__(self):
- return '''Hyperparameters for the PPO Trainer of brain {0}: \n{1}'''.format(
- self.brain_name, '\n'.join(['\t{0}:\t{1}'.format(x, self.trainer_parameters[x]) for x in self.param_keys]))
-
- @property
- def parameters(self):
- """
- Returns the trainer parameters of the trainer.
- """
- return self.trainer_parameters
-
- @property
- def get_max_steps(self):
- """
- Returns the maximum number of steps. Is used to know when the trainer should be stopped.
- :return: The maximum number of steps of the trainer
- """
- return float(self.trainer_parameters['max_steps'])
-
- @property
- def get_step(self):
- """
- Returns the number of steps the trainer has performed
- :return: the step count of the trainer
- """
- return self.step
-
- @property
- def reward_buffer(self):
- """
- Returns the reward buffer. The reward buffer contains the cumulative
- rewards of the most recent episodes completed by agents using this
- trainer.
- :return: the reward buffer.
- """
- return self._reward_buffer
-
- def increment_step_and_update_last_reward(self):
- """
- Increment the step count of the trainer and Updates the last reward
- """
- if len(self.stats['Environment/Cumulative Reward']) > 0:
- mean_reward = np.mean(self.stats['Environment/Cumulative Reward'])
- self.policy.update_reward(mean_reward)
- self.policy.increment_step()
- self.step = self.policy.get_current_step()
-
- def take_action(self, all_brain_info: AllBrainInfo):
- """
- Decides actions given observations information, and takes them in environment.
- :param all_brain_info: A dictionary of brain names and BrainInfo from environment.
- :return: a tuple containing action, memories, values and an object
- to be passed to add experiences
- """
- curr_brain_info = all_brain_info[self.brain_name]
- if len(curr_brain_info.agents) == 0:
- return [], [], [], None, None
-
- run_out = self.policy.evaluate(curr_brain_info)
- self.stats['Policy/Value Estimate'].append(run_out['value'].mean())
- self.stats['Policy/Entropy'].append(run_out['entropy'].mean())
- self.stats['Policy/Learning Rate'].append(run_out['learning_rate'])
- if self.policy.use_recurrent:
- return run_out['action'], run_out['memory_out'], None, \
- run_out['value'], run_out
- else:
- return run_out['action'], None, None, run_out['value'], run_out
-
- def construct_curr_info(self, next_info: BrainInfo) -> BrainInfo:
- """
- Constructs a BrainInfo which contains the most recent previous experiences for all agents info
- which correspond to the agents in a provided next_info.
- :BrainInfo next_info: A t+1 BrainInfo.
- :return: curr_info: Reconstructed BrainInfo to match agents of next_info.
- """
- visual_observations = [[]]
- vector_observations = []
- text_observations = []
- memories = []
- rewards = []
- local_dones = []
- max_reacheds = []
- agents = []
- prev_vector_actions = []
- prev_text_actions = []
- for agent_id in next_info.agents:
- agent_brain_info = self.training_buffer[agent_id].last_brain_info
- if agent_brain_info is None:
- agent_brain_info = next_info
- agent_index = agent_brain_info.agents.index(agent_id)
- for i in range(len(next_info.visual_observations)):
- visual_observations[i].append(agent_brain_info.visual_observations[i][agent_index])
- vector_observations.append(agent_brain_info.vector_observations[agent_index])
- text_observations.append(agent_brain_info.text_observations[agent_index])
- if self.policy.use_recurrent:
- if len(agent_brain_info.memories > 0):
- memories.append(agent_brain_info.memories[agent_index])
- else:
- memories.append(self.policy.make_empty_memory(1))
- rewards.append(agent_brain_info.rewards[agent_index])
- local_dones.append(agent_brain_info.local_done[agent_index])
- max_reacheds.append(agent_brain_info.max_reached[agent_index])
- agents.append(agent_brain_info.agents[agent_index])
- prev_vector_actions.append(agent_brain_info.previous_vector_actions[agent_index])
- prev_text_actions.append(agent_brain_info.previous_text_actions[agent_index])
- if self.policy.use_recurrent:
- memories = np.vstack(memories)
- curr_info = BrainInfo(visual_observations, vector_observations, text_observations,
- memories, rewards, agents, local_dones, prev_vector_actions,
- prev_text_actions, max_reacheds)
- return curr_info
-
- def add_experiences(self, curr_all_info: AllBrainInfo, next_all_info: AllBrainInfo, take_action_outputs):
- """
- Adds experiences to each agent's experience history.
- :param curr_all_info: Dictionary of all current brains and corresponding BrainInfo.
- :param next_all_info: Dictionary of all current brains and corresponding BrainInfo.
- :param take_action_outputs: The outputs of the take action method.
- """
- curr_info = curr_all_info[self.brain_name]
- next_info = next_all_info[self.brain_name]
-
- for agent_id in curr_info.agents:
- self.training_buffer[agent_id].last_brain_info = curr_info
- self.training_buffer[agent_id].last_take_action_outputs = take_action_outputs
-
- if curr_info.agents != next_info.agents:
- curr_to_use = self.construct_curr_info(next_info)
- else:
- curr_to_use = curr_info
-
- intrinsic_rewards = self.policy.get_intrinsic_rewards(curr_to_use, next_info)
-
- for agent_id in next_info.agents:
- stored_info = self.training_buffer[agent_id].last_brain_info
- stored_take_action_outputs = self.training_buffer[agent_id].last_take_action_outputs
- if stored_info is not None:
- idx = stored_info.agents.index(agent_id)
- next_idx = next_info.agents.index(agent_id)
- if not stored_info.local_done[idx]:
- for i, _ in enumerate(stored_info.visual_observations):
- self.training_buffer[agent_id]['visual_obs%d' % i].append(
- stored_info.visual_observations[i][idx])
- self.training_buffer[agent_id]['next_visual_obs%d' % i].append(
- next_info.visual_observations[i][next_idx])
- if self.policy.use_vec_obs:
- self.training_buffer[agent_id]['vector_obs'].append(stored_info.vector_observations[idx])
- self.training_buffer[agent_id]['next_vector_in'].append(
- next_info.vector_observations[next_idx])
- if self.policy.use_recurrent:
- if stored_info.memories.shape[1] == 0:
- stored_info.memories = np.zeros((len(stored_info.agents), self.policy.m_size))
- self.training_buffer[agent_id]['memory'].append(stored_info.memories[idx])
- actions = stored_take_action_outputs['action']
- if self.policy.use_continuous_act:
- actions_pre = stored_take_action_outputs['pre_action']
- self.training_buffer[agent_id]['actions_pre'].append(actions_pre[idx])
- epsilons = stored_take_action_outputs['random_normal_epsilon']
- self.training_buffer[agent_id]['random_normal_epsilon'].append(
- epsilons[idx])
- else:
- self.training_buffer[agent_id]['action_mask'].append(
- stored_info.action_masks[idx], padding_value=1)
- a_dist = stored_take_action_outputs['log_probs']
- value = stored_take_action_outputs['value']
- self.training_buffer[agent_id]['actions'].append(actions[idx])
- self.training_buffer[agent_id]['prev_action'].append(stored_info.previous_vector_actions[idx])
- self.training_buffer[agent_id]['masks'].append(1.0)
- if self.use_curiosity:
- self.training_buffer[agent_id]['rewards'].append(next_info.rewards[next_idx] +
- intrinsic_rewards[next_idx])
- else:
- self.training_buffer[agent_id]['rewards'].append(next_info.rewards[next_idx])
- self.training_buffer[agent_id]['action_probs'].append(a_dist[idx])
- self.training_buffer[agent_id]['value_estimates'].append(value[idx][0])
- if agent_id not in self.cumulative_rewards:
- self.cumulative_rewards[agent_id] = 0
- self.cumulative_rewards[agent_id] += next_info.rewards[next_idx]
- if self.use_curiosity:
- if agent_id not in self.intrinsic_rewards:
- self.intrinsic_rewards[agent_id] = 0
- self.intrinsic_rewards[agent_id] += intrinsic_rewards[next_idx]
- if not next_info.local_done[next_idx]:
- if agent_id not in self.episode_steps:
- self.episode_steps[agent_id] = 0
- self.episode_steps[agent_id] += 1
-
- def process_experiences(self, current_info: AllBrainInfo, new_info: AllBrainInfo):
- """
- Checks agent histories for processing condition, and processes them as necessary.
- Processing involves calculating value and advantage targets for model updating step.
- :param current_info: Dictionary of all current brains and corresponding BrainInfo.
- :param new_info: Dictionary of all next brains and corresponding BrainInfo.
- """
-
- info = new_info[self.brain_name]
- for l in range(len(info.agents)):
- agent_actions = self.training_buffer[info.agents[l]]['actions']
- if ((info.local_done[l] or len(agent_actions) > self.trainer_parameters['time_horizon'])
- and len(agent_actions) > 0):
- agent_id = info.agents[l]
- if info.local_done[l] and not info.max_reached[l]:
- value_next = 0.0
- else:
- if info.max_reached[l]:
- bootstrapping_info = self.training_buffer[agent_id].last_brain_info
- idx = bootstrapping_info.agents.index(agent_id)
- else:
- bootstrapping_info = info
- idx = l
- value_next = self.policy.get_value_estimate(bootstrapping_info, idx)
-
- self.training_buffer[agent_id]['advantages'].set(
- get_gae(
- rewards=self.training_buffer[agent_id]['rewards'].get_batch(),
- value_estimates=self.training_buffer[agent_id]['value_estimates'].get_batch(),
- value_next=value_next,
- gamma=self.trainer_parameters['gamma'],
- lambd=self.trainer_parameters['lambd']))
- self.training_buffer[agent_id]['discounted_returns'].set(
- self.training_buffer[agent_id]['advantages'].get_batch()
- + self.training_buffer[agent_id]['value_estimates'].get_batch())
-
- self.training_buffer.append_update_buffer(agent_id, batch_size=None,
- training_length=self.policy.sequence_length)
-
- self.training_buffer[agent_id].reset_agent()
- if info.local_done[l]:
- self.stats['Environment/Cumulative Reward'].append(
- self.cumulative_rewards.get(agent_id, 0))
- self.reward_buffer.appendleft(self.cumulative_rewards.get(agent_id, 0))
- self.stats['Environment/Episode Length'].append(
- self.episode_steps.get(agent_id, 0))
- self.cumulative_rewards[agent_id] = 0
- self.episode_steps[agent_id] = 0
- if self.use_curiosity:
- self.stats['Policy/Curiosity Reward'].append(
- self.intrinsic_rewards.get(agent_id, 0))
- self.intrinsic_rewards[agent_id] = 0
-
- def end_episode(self):
- """
- A signal that the Episode has ended. The buffer must be reset.
- Get only called when the academy resets.
- """
- self.training_buffer.reset_local_buffers()
- for agent_id in self.cumulative_rewards:
- self.cumulative_rewards[agent_id] = 0
- for agent_id in self.episode_steps:
- self.episode_steps[agent_id] = 0
- if self.use_curiosity:
- for agent_id in self.intrinsic_rewards:
- self.intrinsic_rewards[agent_id] = 0
-
- def is_ready_update(self):
- """
- Returns whether or not the trainer has enough elements to run update model
- :return: A boolean corresponding to whether or not update_model() can be run
- """
- size_of_buffer = len(self.training_buffer.update_buffer['actions'])
- return size_of_buffer > max(int(self.trainer_parameters['buffer_size'] / self.policy.sequence_length), 1)
-
- def update_policy(self):
- """
- Uses demonstration_buffer to update the policy.
- """
- n_sequences = max(int(self.trainer_parameters['batch_size'] / self.policy.sequence_length), 1)
- value_total, policy_total, forward_total, inverse_total = [], [], [], []
- advantages = self.training_buffer.update_buffer['advantages'].get_batch()
- self.training_buffer.update_buffer['advantages'].set(
- (advantages - advantages.mean()) / (advantages.std() + 1e-10))
- num_epoch = self.trainer_parameters['num_epoch']
- for k in range(num_epoch):
- self.training_buffer.update_buffer.shuffle()
- buffer = self.training_buffer.update_buffer
- for l in range(len(self.training_buffer.update_buffer['actions']) // n_sequences):
- start = l * n_sequences
- end = (l + 1) * n_sequences
- run_out = self.policy.update(buffer.make_mini_batch(start, end), n_sequences)
- value_total.append(run_out['value_loss'])
- policy_total.append(np.abs(run_out['policy_loss']))
- if self.use_curiosity:
- inverse_total.append(run_out['inverse_loss'])
- forward_total.append(run_out['forward_loss'])
- self.stats['Losses/Value Loss'].append(np.mean(value_total))
- self.stats['Losses/Policy Loss'].append(np.mean(policy_total))
- if self.use_curiosity:
- self.stats['Losses/Forward Loss'].append(np.mean(forward_total))
- self.stats['Losses/Inverse Loss'].append(np.mean(inverse_total))
- self.training_buffer.reset_update_buffer()
-
-
-def discount_rewards(r, gamma=0.99, value_next=0.0):
- """
- Computes discounted sum of future rewards for use in updating value estimate.
- :param r: List of rewards.
- :param gamma: Discount factor.
- :param value_next: T+1 value estimate for returns calculation.
- :return: discounted sum of future rewards as list.
- """
- discounted_r = np.zeros_like(r)
- running_add = value_next
- for t in reversed(range(0, r.size)):
- running_add = running_add * gamma + r[t]
- discounted_r[t] = running_add
- return discounted_r
-
-
-def get_gae(rewards, value_estimates, value_next=0.0, gamma=0.99, lambd=0.95):
- """
- Computes generalized advantage estimate for use in updating policy.
- :param rewards: list of rewards for time-steps t to T.
- :param value_next: Value estimate for time-step T+1.
- :param value_estimates: list of value estimates for time-steps t to T.
- :param gamma: Discount factor.
- :param lambd: GAE weighing factor.
- :return: list of advantage estimates for time-steps t to T.
- """
- value_estimates = np.asarray(value_estimates.tolist() + [value_next])
- delta_t = rewards + gamma * value_estimates[1:] - value_estimates[:-1]
- advantage = discount_rewards(r=delta_t, gamma=gamma * lambd)
- return advantage
diff --git a/animalai_bkp/trainers/tensorflow_to_barracuda.py b/animalai_bkp/trainers/tensorflow_to_barracuda.py
deleted file mode 100644
index f33f3402..00000000
--- a/animalai_bkp/trainers/tensorflow_to_barracuda.py
+++ /dev/null
@@ -1,1034 +0,0 @@
-from __future__ import print_function
-import numpy as np
-import struct # convert from Python values and C structs
-import tensorflow as tf
-import re
-#import barracuda
-#from barracuda import Struct
-from animalai.trainers import barracuda
-from animalai.trainers.barracuda import Struct
-from google.protobuf import descriptor
-from google.protobuf.json_format import MessageToJson
-
-
-if __name__ == '__main__':
- # Handle command line argumengts
- args = barracuda.parse_args(
- description = 'Convert Tensorflow model to Barracuda binary',
- source_extension = '.pb',
- help = 'input Tensorflow serialized .pb file')
- # Te following code can be used as an example of API used from another module
- # convert() is the main entry point for converter
- import tensorflow_to_barracuda as tf2bc
- tf2bc.convert(args.source_file, args.target_file, args.trim_unused_by_output, args)
-
-
-# TODO: support more than 1 LSTM layer per model - prepend scope to names and inputs
-# TODO: support different activation functions in LSTM
-# TODO: strip output Identity node, instead patch upstream layer names
-# TODO: use ScaleBias and Pow with alpha when input is constant Tensor
-# TODO: support all data format types (curretly only NHWC)
-# TODO: support all data types (currently only FLOAT, INT32, BOOL)
-# TODO: implement FusedResizeAndPadConv2D
-
-# Important ProtoBuf definitions:
-# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/types.proto
-# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor.proto
-# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/node_def.proto
-#
-# Node descriptions:
-# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/ops/nn_ops.cc
-# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/ops/math_ops.cc
-# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/ops/random_ops.cc
-#
-# Class doc:
-# https://www.tensorflow.org/api_docs/cc/
-#
-known_classes = {
- 'Dense': Struct(
- id = 1,
- out_shapes = lambda shapes: [
- [shapes[0][0], 1, 1, shapes[0][1]], # W
- [1, 1, 1, shapes[-1][-1]] # B
- ],
- patch_data = lambda data: [
- data[0],
- data[1]
- ]),
- 'MatMul': Struct(
- id = 1,
- out_shapes = lambda shapes: [
- [shapes[0][0], 1, 1, shapes[0][1]], # W
- [1, 1, 1, shapes[0][1]] # B
- ],
- patch_data = lambda data: [
- data[0],
- np.zeros(np.shape(data[1]))
- ]),
- 'BiasAdd': Struct(
- id = 51, # implemented as ScaleBias
- out_shapes = lambda shapes: [
- [1, 1, 1, shapes[0][0]], # ONE
- [1, 1, 1, shapes[0][0]], # B
- ],
- patch_data = lambda data: [
- np.ones(np.shape(data[0])),
- data[0]
- ]),
-
- # TODO: NCHW
- 'Conv2D': Struct(
- id = 20,
- out_shapes = lambda shapes: [
- shapes[0], # K
- [1, 1, 1, shapes[-1][-1]] # B
- ],
- patch_data = lambda data: [
- data[0],
- data[1]
- ]),
- 'DepthwiseConv2dNative': Struct( # DepthwiseConv2D
- id = 21,
- out_shapes = lambda s: [
- [s[0][0], s[0][1], s[0][3], s[0][2]], # K TF:[H, W, in_channels, channel_multiplier] => [H, W, 1, in_channels]
- [1, 1, 1, s[-1][-1]] if len(s) > 1 else
- [1, 1, 1, s[0][2]] # B
- ],
- patch_data = lambda data: [
- np.transpose(data[0], (0,1,3,2)),
- data[1]
- ]),
- 'Conv2DBackpropInput': Struct( # Conv2DTranspose
- id = 22,
- out_shapes = lambda shapes: [
- shapes[0], # K
- [1, 1, 1, shapes[-1][-1]] # B
- ],
- patch_data = lambda data: [
- data[0],
- data[1]
- ]),
-
- # TODO: 3D
-
- 'ResizeNearestNeighbor':
- 23, # implemented as Upsample2D
- 'ResizeBilinear': 23, # implemented as Upsample2D
- 'ResizeBicubic': 23, # implemented as Upsample2D
- 'MaxPool': 25,
- 'AvgPool': 26,
-
- 'GlobalAveragePool':28,
-
- 'Activation': 50,
-
- 'BatchNormalization': Struct(
- id = 51, # after fusion implemented as ScaleBias
- out_shapes = lambda shapes: [
- [1, 1, 1, shapes[0][0]], # S
- [1, 1, 1, shapes[0][0]], # B
- ],
- patch_data = lambda data:
- # fuse [gamma, beta, mean, var, epsilon] => [scale, bias]
- # TODO: double-check if epsilon is the last data argument and not the 1st?
- barracuda.fuse_batchnorm_weights(data[0], data[1], data[2], data[3], data[4]) if len(data) == 5 else
- # fuse [ONE, beta, mean, var, epsilon] => [scale, bias]
- # TODO: double-check if epsilon is the last data argument and not the 1st?
- barracuda.fuse_batchnorm_weights(np.ones(np.shape(data[0])), data[0], data[1], data[2], data[3])
- ),
- 'FusedBatchNorm': Struct(
- id = 51, # after fusion implemented as ScaleBias
- out_shapes = lambda shapes: [
- [1, 1, 1, shapes[0][0]], # S
- [1, 1, 1, shapes[0][0]], # B
- ],
- patch_data = lambda data, layer:
- # fuse [gamma, beta, mean, var, epsilon] => [scale, bias]
- barracuda.fuse_batchnorm_weights(data[0], data[1], data[2], data[3], get_epsilon(layer))
- ),
- 'LRN': 53,
-
- 'RandomStandardNormal':
- 64,
- 'RandomUniform': 65,
- 'Multinomial': 66,
- 'OneHot': 67,
-
- # Broadcast ops
- 'Add': 100,
- 'Sub': 101,
- 'Mul': 102,
- 'RealDiv': 103,
- 'Pow': 104,
- 'Minimum': 110,
- 'Maximum': 111,
-
- # Reduce ops
- 'Max': 124,
- 'Mean': 125,
- 'Min': 126,
- 'Prod': 127,
- 'Sum': 128,
-
- 'Flatten': 200,
- 'Reshape': 201,
- #'Squeeze': 203,
- #'Unsqueeze': 204,
- 'Concat': 210,
- 'StridedSlice': 211,
-}
-
-requires_runtime_flag = {
- 'Dropout' : 'DropoutRuntime',
- 'BatchNormalization' : 'BatchNormalizationRuntime',
-}
-
-known_activations = {
- 'Linear' : 0,
- 'Relu' : 1,
- 'Softmax' : 2,
- 'Tanh' : 3,
- 'Sigmoid' : 4,
- 'Elu' : 5,
- 'Relu6' : 6,
- 'LeakyRelu' : 7,
- 'Selu' : 8,
- 'Swish' : 9,
-
- 'LogSoftmax' : 10,
- 'Softplus' : 11,
- 'Softsign' : 12,
-
- 'Abs' : 100,
- 'Neg' : 101,
- 'Ceil' : 102,
- 'Floor' : 104,
-
- 'Sqrt' : 111,
- 'Exp' : 113,
- 'Log' : 114,
-
- 'Acos' : 200,
- 'Acosh' : 201,
- 'Asin' : 202,
- 'Asinh' : 203,
- 'Atan' : 204,
- 'Atanh' : 205,
- 'Cos' : 206,
- 'Cosh' : 207,
- 'Sin' : 208,
- 'Sinh' : 209,
- 'Tan' : 210
-}
-
-known_paddings = {
- 'VALID' : [0,0,0,0],
- 'SAME' : [-1] # SameUpper
-}
-
-supported_data_formats = {
- 'NHWC'
-}
-
-known_patterns = {
- # TODO: Flatten pattern using namespace regexp
- repr(['Shape', 'StridedSlice', 'Pack', 'Reshape']) : "Flatten",
- repr(['Shape', 'StridedSlice', 'Prod', 'Pack', 'Reshape']) : "Flatten",
- repr(['Shape', 'Slice', 'Slice', 'Prod',
- 'ExpandDims', 'ConcatV2', 'Reshape']) : "Flatten",
- repr(['Const', 'Reshape']) : 'Reshape',
-
- repr(['Add', 'Rsqrt', 'Mul', 'Mul', 'Sub', 'Add']) : 'BatchNormalization',
- repr(['Add', 'Rsqrt', 'Mul', 'Mul', 'Mul', 'Sub', 'Add']) : 'BatchNormalization',
-
- repr(['ConcatV2']) : 'ConcatV2',
- repr(['Mean']) : 'Mean',
- repr(['Multinomial']) : 'Multinomial',
- repr(['OneHot']) : 'OneHot',
- repr(['Square']) : 'Square',
-
- repr(['MatMul', 'BiasAdd']) : 'Dense',
- repr(['Conv2D', 'BiasAdd']) : 'Conv2D',
- repr(['DepthwiseConv2dNative', 'BiasAdd']) : 'DepthwiseConv2dNative',
- repr(['Conv2DBackpropInput', 'BiasAdd']) : 'Conv2DBackpropInput',
-
-
- repr(['Pack', 'Reshape']) : 'Flatten$', # for now we assume that this combination is trivial Flatten
- # for exmaple it is used in ML-agents LSTM nets with sequence_length==1
-
- repr(['StridedSlice', 'Reshape',
- re.compile('^lstm/'),
- 'Reshape', 'ConcatV2', 'Identity']) : 'BasicLSTM',
-
- repr([re.compile('^lstm/'),
- 'Reshape', 'ConcatV2', 'Identity']) : 'BasicLSTM',
-
- repr(['Sigmoid', 'Mul']) : "Swish",
-
- # TODO: FusedResizeAndPadConv2D
-}
-
-def by_name(args, name):
- for a in args:
- if a.name.endswith(name):
- return a
-
-def by_op(args, op):
- for a in args:
- if a.op == op:
- return a
-
-def order_by(args, names):
- ordered = []
- arg_count = len(args)
- for name in names:
- ordered += [a for a in args if a.endswith(name)]
- args = [a for a in args if not a.endswith(name)]
- ordered += args # append what is left
- assert(len(ordered) == arg_count)
- return ordered
-
-transform_patterns = {
- 'Flatten' : lambda nodes, inputs, tensors, _:
- Struct(
- op = 'Flatten',
- input = inputs
- ),
- 'Flatten$' : lambda nodes, inputs, tensors, _:
- Struct(
- op = 'Flatten',
- input = [inputs[-1]] # take only the last input, assume all other arguments are trivial (like sequence_length==1 always in ML-agents LSTM nets)
- ),
- 'Reshape' : lambda nodes, inputs, tensors, _:
- Struct(
- op = 'Reshape',
- input = inputs,
- shape = [tensors[0].data[0], tensors[0].data[1], tensors[0].data[2], tensors[0].data[3]] if len(tensors[0].data) == 4 else
- [tensors[0].data[0], 1, tensors[0].data[1], tensors[0].data[2]] if len(tensors[0].data) == 3 else
- [tensors[0].data[0], 1, 1, tensors[0].data[1]]
- # tensor.name = 'shape'
- ),
- 'Multinomial' : lambda nodes, inputs, tensors, _:
- Struct(
- op = 'Multinomial',
- input = inputs,
- shape = [int(by_name(tensors, '/num_samples').data[0])],
- #seed = get_attr(nodes[0], 'seed'),
- ),
- 'OneHot' : lambda nodes, inputs, tensors, _:
- Struct(
- op = 'OneHot',
- input = inputs,
- shape = [int(by_name(tensors, '/depth').data[0])],
- alpha = by_name(tensors, '/on_value').data[0],
- beta = by_name(tensors, '/off_value').data[0],
- ),
- 'Square' : lambda nodes, inputs, tensors, _:
- Struct(
- op = 'Mul',
- input = [i for i in inputs] + [i for i in inputs], # input * input
- ),
- 'ConcatV2' : lambda nodes, inputs, tensors, _:
- Struct(
- op = 'Concat',
- input = inputs,
-
- # TEMPORARY: until we implemented rank detection and axis remapping (hopefully in exporter)
- # HACK: assume Concat is always for last channel
- axis = int(-1)
- #axis = int(by_name(tensors, '/axis').data[0])
- ),
- 'BatchNormalization' : lambda nodes, inputs, tensors, _:
- Struct(
- op = 'BatchNormalization',
- input = [i for i in inputs] +
- order_by([t.name for t in tensors], ['gamma', 'beta', 'mean', 'variance']),
- ),
- 'Mean' : lambda nodes, inputs, tensors, _:
- Struct(
- # TODO: use data_frmt of the input instead of hardcoded [1,2] for HW
- op = 'GlobalAveragePool' if np.array_equal(tensors[0].data, [1,2]) else 'MeanWithUnsupportedReductionTensor',
- input = [i for i in inputs],
- ),
- 'Dense' : lambda nodes, inputs, tensors, _:
- Struct(
- op = 'Dense',
- input = [i for i in inputs] + [t.name for t in tensors],
- data_frmt = get_attr(by_op(nodes, 'Dense') or by_op(nodes, 'MatMul'), 'data_format'),
- ),
- 'Conv2D' : lambda nodes, inputs, tensors, _:
- Struct(
- op = 'Conv2D',
- input = [i for i in inputs] + [t.name for t in tensors],
- padding = get_attr(by_op(nodes, 'Conv2D'), 'padding'),
- strides = get_attr(by_op(nodes, 'Conv2D'), 'strides'),
- dilations = get_attr(by_op(nodes, 'Conv2D'), 'dilations'),
- data_frmt = get_attr(by_op(nodes, 'Conv2D'), 'data_format'),
- ),
- 'DepthwiseConv2dNative' : lambda nodes, inputs, tensors, _:
- Struct(
- op = 'DepthwiseConv2dNative',
- input = [i for i in inputs] + [t.name for t in tensors],
- padding = get_attr(by_op(nodes, 'DepthwiseConv2dNative'), 'padding'),
- strides = get_attr(by_op(nodes, 'DepthwiseConv2dNative'), 'strides'),
- dilations = get_attr(by_op(nodes, 'DepthwiseConv2dNative'), 'dilations'),
- data_frmt = get_attr(by_op(nodes, 'DepthwiseConv2dNative'), 'data_format'),
- ),
- 'Conv2DBackpropInput' : lambda nodes, inputs, tensors, _:
- Struct(
- op = 'Conv2DBackpropInput',
- input = [i for i in inputs] + [t.name for t in tensors],
- padding = get_attr(by_op(nodes, 'Conv2DBackpropInput'), 'padding'),
- strides = get_attr(by_op(nodes, 'Conv2DBackpropInput'), 'strides'),
- dilations = get_attr(by_op(nodes, 'Conv2DBackpropInput'), 'dilations'),
- data_frmt = get_attr(by_op(nodes, 'Conv2DBackpropInput'), 'data_format'),
- ),
- 'BasicLSTM' : lambda nodes, inputs, tensors, context:
- basic_lstm(nodes, inputs, tensors, context),
-
- 'Swish' : lambda nodes, inputs, tensors, _:
- Struct(
- op = 'Swish',
- input = inputs
- ),
-
- # TODO:'Round'
- # TODO:'Rsqrt'
-}
-
-# Parse
-def get_attr(node, attr_name, default=None):
- if type(node) == Struct:
- if hasattr(node, attr_name):
- return getattr(node, attr_name)
- else:
- return default
-
- # See: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/attr_value.proto
- val = node.attr[attr_name]
-
- if val.HasField("list"):
- return val.list.i
- # NOTE: can't find way to identify type of list BUT it is almost always list(int)
- # except list(float) in FractionalAvg/MaxPool
- if val.HasField("b"):
- return val.b
- if val.HasField("i"):
- return val.i
- if val.HasField("f"):
- return val.f
- if val.HasField("s"):
- return val.s.decode("utf-8")
- if val.HasField("shape"):
- return val.shape
- if val.HasField("tensor"):
- return val.tensor
- return default
-
-def get_epsilon(layer):
- return get_attr(layer, 'epsilon', default=0.001) # default epsilon taken from tf.layers.batch_normalization
-
-def get_layer_shape(layer):
- shape = get_attr(layer, 'shape')
- if not shape:
- return [-1, -1, -1, -1]
- shape = [dim.size for dim in shape.dim]
- if len(shape) == 1:
- return [1, 1, 1, shape[0]]
- if len(shape) == 2:
- return [shape[0], 1, 1, shape[1]]
- return shape
-
-def get_tensor_dims(tensor):
- if isinstance(tensor, np.ndarray):
- return np.shape(tensor)
-
- dims = []
- if tensor.tensor_shape:
- dims = [v.size for v in tensor.tensor_shape.dim]
- if tensor.float_val:
- dims = np.shape(tensor.float_val)
- if tensor.int_val:
- dims = np.shape(tensor.int_val)
- if tensor.bool_val:
- dims = np.shape(tensor.bool_val)
- return dims
-
-def get_tensor_dtype(tensor):
- if isinstance(tensor, np.ndarray):
- return tensor.dtype
-
- dataType = ''
- fields = tensor.ListFields()
-
- for field, value in fields:
- if field.name == 'dtype' and field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:
- dataType = field.enum_type.values_by_number.get(value, None).name
-
- return dataType
-
-def get_tensor_data(tensor):
- if isinstance(tensor, np.ndarray):
- return tensor.astype(float)
-
- dims = get_tensor_dims(tensor)
- elems = np.product(dims)
-
- if tensor.tensor_content:
- # TODO: support other types
- dataType = get_tensor_dtype(tensor)
- if dataType == "DT_FLOAT":
- data = struct.unpack('<'+str(elems)+'f', tensor.tensor_content)
- elif dataType == "DT_INT32":
- data = struct.unpack('<'+str(elems)+'i', tensor.tensor_content)
- elif dataType == "DT_BOOL":
- data = struct.unpack('<'+str(elems)+'?', tensor.tensor_content)
- else:
- print('UNSUPPORTED: data type', dataType)
- if tensor.float_val:
- data = tensor.float_val
- if tensor.int_val:
- data = np.array(tensor.int_val, dtype=float)
- if tensor.bool_val:
- data = np.array(tensor.bool_val, dtype=float)
- return np.array(data).reshape(dims)
-
-def flatten(items,enter=lambda x:isinstance(x, list)):
- # http://stackoverflow.com/a/40857703
- # https://github.com/ctmakro/canton/blob/master/canton/misc.py
- """Yield items from any nested iterable; see REF."""
- for x in items:
- if enter(x):
- yield from flatten(x)
- else:
- yield x
-
-def replace_strings_in_list(array_of_strigs, replace_with_strings):
- "A value in replace_with_strings can be either single string or list of strings"
- potentially_nested_list = [replace_with_strings.get(s) or s for s in array_of_strigs]
- return list(flatten(potentially_nested_list))
-
-def remove_duplicates_from_list(array):
- "Preserves the order of elements in the list"
- output = []
- unique = set()
- for a in array:
- if a not in unique:
- unique.add(a)
- output.append(a)
- return output
-
-#########################################################
-
-def pool_to_HW(shape, data_frmt):
- """ Convert from NHWC|NCHW => HW
- """
- if len(shape) != 4:
- return shape # Not NHWC|NCHW, return as is
- if data_frmt == 'NCHW':
- return [shape[2], shape[3]]
- return [shape[1], shape[2]]
-
-def strides_to_HW(shape, format):
- return pool_to_HW(shape, format)
-
-#########################################################
-
-def gru(nodes, inputs, tensors, context):
- assert(len(inputs) == 2)
-
- def find_tensor_by_name(name, default=None):
- nonlocal tensors
- candidates = [t for t in tensors if t.name.endswith(name)]
- return candidates[0].data if candidates else default
-
- input = inputs[-1]
- state = inputs[0]
- gates_kernel = find_tensor_by_name('/gates/kernel')
- gates_bias = find_tensor_by_name('/gates/bias', default=np.zeros(np.shape(gates_kernel)[-1]))
- candidate_kernel = find_tensor_by_name('/candidate/kernel')
- candidate_bias = find_tensor_by_name('/candidate/bias', default=np.zeros(np.shape(candidate_kernel)[-1]))
- new_state = nodes[-1].name + '_h'
-
- assert(np.shape(gates_kernel)[-1] == np.shape(gates_bias)[-1])
- assert(np.shape(candidate_kernel)[-1] == np.shape(candidate_bias)[-1])
-
- num_gates = 2
- seq_length = 1
- hidden_size = np.shape(gates_kernel)[-1] // num_gates
-
- gate_kernels = np.split(gates_kernel, num_gates, axis=-1)
- gate_biases = np.split(gates_bias, num_gates, axis=-1)
-
- context.model_tensors['kernel_r'] = gate_kernels[0]
- context.model_tensors['kernel_u'] = gate_kernels[1]
- context.model_tensors['kernel_c'] = candidate_kernel
- context.model_tensors['bias_r'] = gate_biases[0]
- context.model_tensors['bias_u'] = gate_biases[1]
- context.model_tensors['bias_c'] = candidate_bias
-
- new_layers = barracuda.gru('gru', input, state,
- 'kernel_r', 'kernel_u', 'kernel_c',
- 'bias_r', 'bias_u', 'bias_c',
- new_state)
-
- state_shape = [1, 1, seq_length, hidden_size]
- context.model_memories += [state_shape, state, new_state]
-
- # map exptected output of the replaced pattern to output from our GRU cell
- actual_output_node = nodes[-4]
- assert(actual_output_node.op == 'Reshape')
- context.map_ignored_layer_to_its_input[actual_output_node.name] = new_state
-
- return new_layers
-
-def basic_lstm(nodes, inputs, tensors, context):
- assert(len(inputs) == 2)
-
- def find_tensor_by_name(name, default=None):
- nonlocal tensors
- candidates = [t for t in tensors if t.name.endswith(name)]
- return candidates[0].data if candidates else default
-
- def find_forget_bias():
- nonlocal nodes
- nonlocal tensors
- # TODO: make it more fault-tolerant
- # search for scalar float constant that is input to Add node
- # and hope it is not a constant for some complex activation function
- for t in tensors:
- if np.prod(t.shape) == 1 and get_tensor_dtype(t.obj) == "DT_FLOAT":
- for n in nodes:
- if n.op == 'Add' and t.name in n.input:
- return t.data
- return np.zeros(1)
-
- input = inputs[-1]
- state_c = inputs[0] + '_c'
- state_h = inputs[0] + '_h'
- kernel = find_tensor_by_name('/kernel')
- bias = find_tensor_by_name('/bias', default=np.zeros(np.shape(kernel)[-1]))
- forget_bias = find_forget_bias()
- new_state_c = nodes[-1].name + '_c'
- new_state_h = nodes[-1].name + '_h'
-
- assert(np.shape(kernel)[-1] == np.shape(bias)[-1])
-
- num_gates = 4
- seq_length = 1
- hidden_size = np.shape(kernel)[-1] // num_gates
-
- kernels = np.split(kernel, num_gates, axis=-1)
- biases = np.split(bias, num_gates, axis=-1)
-
- context.model_tensors['kernel_i'] = kernels[0]
- context.model_tensors['kernel_j'] = kernels[1]
- context.model_tensors['kernel_f'] = kernels[2]
- context.model_tensors['kernel_o'] = kernels[3]
- context.model_tensors['bias_i'] = biases[0]
- context.model_tensors['bias_j'] = biases[1]
- context.model_tensors['bias_f'] = biases[2] + forget_bias
- context.model_tensors['bias_o'] = biases[3]
-
- new_layers = barracuda.lstm('lstm', input, state_c, state_h,
- 'kernel_i', 'kernel_j', 'kernel_f', 'kernel_o',
- 'bias_i', 'bias_j', 'bias_f', 'bias_o',
- new_state_c, new_state_h)
-
- state_shape = [1, 1, seq_length, hidden_size]
- context.model_memories += [state_shape, state_c, new_state_c]
- context.model_memories += [state_shape, state_h, new_state_h]
-
- # map expected output of the replaced pattern to output from our LSTM cell
- actual_output_node = nodes[-4]
- assert(actual_output_node.op == 'Reshape')
- context.map_ignored_layer_to_its_input[actual_output_node.name] = new_state_h
-
- return new_layers
-
-#########################################################
-
-def process_layer(layer, context, args):
- model_tensors = context.model_tensors
- input_shapes = context.input_shapes
- map_ignored_layer_to_its_input = context.map_ignored_layer_to_its_input
-
- name = layer.name
- class_name = layer.op
- inputs = layer.input # Tensorflow inputs are always explicit, but in case of Keras we had 'inputs = layer.input or [prev_layer_name]'
- inputs = replace_strings_in_list(inputs, map_ignored_layer_to_its_input)
-
- if class_name == 'Const':
- model_tensors[name] = layer.attr["value"].tensor
- return
-
- if class_name == 'Placeholder':
- assert(inputs == [])
- map_ignored_layer_to_its_input[name] = inputs
- input_shapes[name] = get_layer_shape(layer)
- return
-
- if class_name == 'Identity':
- connected_to_const = len(inputs) == 1 and inputs[0] in model_tensors
- if connected_to_const:
- map_ignored_layer_to_its_input[name] = inputs
- return
- else:
- # treat Identity layer that are connected to processing nodes
- # as output from the network
- class_name = 'Linear'
-
- # TEMPORARY: until we implemented rank detection and StidedSlice at runtime
- # HACK: skips trivial StridedSlices for rank=2 tensors
- if class_name == 'StridedSlice' and get_attr(layer, 'begin_mask') == 1 and get_attr(layer, 'end_mask') == 1:
- map_ignored_layer_to_its_input[name] = inputs[0]
- return
-
- if args.print_layers or args.verbose:
- var_tensors = [i for i in inputs if i not in model_tensors]
- const_tensors = [i for i in inputs if i in model_tensors]
- print("'%s' %s Vars:%s Const:%s" % (name, class_name, var_tensors, const_tensors))
-
- if class_name in known_activations:
- activation = class_name
- class_name = 'Activation'
- else:
- activation = 'Linear'
-
- if not class_name in known_classes:
- if class_name in requires_runtime_flag:
- print('SKIP:', class_name, 'layer is used only for training')
- else:
- print('IGNORED:', class_name, 'unknown layer')
- map_ignored_layer_to_its_input[name] = inputs
- return
-
- klass = known_classes[class_name]
- if type(klass) == int:
- klass = Struct(id = klass)
-
- o_l = Struct()
- o_l.type = klass.id
- o_l.class_name = class_name
- o_l.name = name
-
- padding = get_attr(layer, 'padding') # layer.attr['padding'].s.decode("utf-8")
- strides = get_attr(layer, 'strides') # layer.attr['strides'].list.i
- dilations = get_attr(layer, 'dilations') # layer.attr['dilations'].list.i
- pool_size = get_attr(layer, 'ksize') # layer.attr['ksize'].list.i
- shape = get_attr(layer, 'shape', default=[])
- data_frmt = get_attr(layer, 'data_format') # layer.attr['data_format'].s.decode("utf-8")
- axis = get_attr(layer, 'axis')
- alpha = get_attr(layer, 'alpha')
- beta = get_attr(layer, 'beta')
-
- if activation and not activation in known_activations:
- print('IGNORED: unknown activation', activation)
- if padding and not padding in known_paddings:
- print('IGNORED: unknown padding', padding)
- if data_frmt and not data_frmt in supported_data_formats:
- print('UNSUPPORTED: data format', data_frmt)
-
- o_l.activation = known_activations.get(activation) or 0
- o_l.pads = known_paddings.get(padding) or [0,0,0,0]
- o_l.strides = strides_to_HW(strides, data_frmt) if strides else []
- o_l.pool_size = pool_to_HW(pool_size, data_frmt) if pool_size else shape
- o_l.axis = axis or -1
- o_l.alpha = alpha or 1
- o_l.beta = beta or 0
-
- tensor_names = [i for i in inputs if i in model_tensors]
- o_l.tensors = [Struct(name = x, shape = get_tensor_dims(model_tensors[x]), data = get_tensor_data(model_tensors[x]))
- for x in tensor_names]
- # Patch shapes & data
- layer_has_model_tensors = len(o_l.tensors) > 0
- if hasattr(klass, 'out_shapes') and layer_has_model_tensors:
- shapes = klass.out_shapes([x.shape for x in o_l.tensors])
-
- # if we have more shapes than actual tensors,
- # then create & fill missing tensors with zeros
- in_tensor_num = len(o_l.tensors)
- for index, new_shape in enumerate(shapes):
- if index >= in_tensor_num:
- new_tensor = Struct(name = ('%s/patch:%i') % (name, index-in_tensor_num),
- shape = new_shape,
- data = np.zeros(new_shape))
- o_l.tensors.append(new_tensor)
- assert(len(shapes) <= len(o_l.tensors))
-
- if hasattr(klass, 'patch_data'):
- data = [x.data for x in o_l.tensors]
-
- patch_data_fn = klass.patch_data
- patch_data_expected_arg_count = patch_data_fn.__code__.co_argcount
- patch_data_args = (data, layer) if patch_data_expected_arg_count > 1 else (data,)
- tensor_data = patch_data_fn(*patch_data_args)
- o_l.tensors = o_l.tensors[:len(tensor_data)] # resize tensor array to match patched data - patching might reduce number of tensors
- for x, data in zip(o_l.tensors, tensor_data):
- x.data = data
-
- # after this point we should have equal amount of shapes and tensors
- assert(len(o_l.tensors) == len(shapes))
-
- for x, shape in zip(o_l.tensors, shapes):
- x.shape = shape
-
- o_l.inputs = [i for i in inputs if i not in model_tensors]
-
- else:
- # no 'patch_data' lambda was specified, op does not require tensor args
- o_l.tensors = []
- o_l.inputs = inputs
-
- # Force all tensors to float32
- for x in o_l.tensors:
- x.data = x.data.astype(np.float32)
-
- # Layer is ready
- context.layers.append(o_l)
-
-class ModelBuilderContext:
- def __init__(self):
- self.layers = []
- self.input_shapes = {}
- self.model_tensors = {}
- self.model_memories = []
- self.map_ignored_layer_to_its_input = {}
-
-def process_model(model, args):
- o_context = ModelBuilderContext()
-
- # Find node patterns
- nodes_as_array = [node for node in model.node]
-
- node_index = 0
- while node_index < len(nodes_as_array):
- node = nodes_as_array[node_index]
- match = False
- for pattern_repr, pattern_name in known_patterns.items():
- pattern = eval(pattern_repr)
- if node_index + len(pattern) > len(nodes_as_array):
- continue # pattern too long, skip
-
- require_exact_match = (pattern[0] == 'Const' or pattern[0] == 'Identity')
- pattern_end = node_index
-
- def match_node(node, pattern):
- return node.op == pattern or (hasattr(pattern, 'match') and pattern.match(node.name))
-
- for p in pattern:
- if not require_exact_match:
- while pattern_end < len(nodes_as_array) and nodes_as_array[pattern_end].op != p and (
- nodes_as_array[pattern_end].op == 'Const' or
- nodes_as_array[pattern_end].op == 'Identity'):
- pattern_end += 1
- if pattern_end >= len(nodes_as_array):
- break
-
- match = False
- if (hasattr(p, 'match')): # regexp
- while pattern_end < len(nodes_as_array) and p.match(nodes_as_array[pattern_end].name):
- match = True
- pattern_end += 1
- else: # exact string
- match = nodes_as_array[pattern_end].op == p
- pattern_end += 1
-
- if not match:
- break
-
- def get_tensors(pattern_nodes):
- nonlocal o_context
- map_ignored_layer_to_its_input = o_context.map_ignored_layer_to_its_input
-
- # tensors <= all Const nodes within this pattern
- tensor_nodes = [n for n in pattern_nodes if n.op == 'Const']
- tensors = [Struct(name = n.name, obj = n.attr["value"].tensor, shape = get_tensor_dims(n.attr["value"].tensor), data = get_tensor_data(n.attr["value"].tensor))
- for n in tensor_nodes]
-
- # TODO: unify / reuse code from process_layer
- identity_nodes = [n for n in pattern_nodes if n.op == 'Identity']
- for i in identity_nodes:
- inputs = replace_strings_in_list(i.input, map_ignored_layer_to_its_input)
- map_ignored_layer_to_its_input[i.name] = inputs
-
- # gather inputs from Op nodes (not Const, not Identity)
- op_nodes = [n for n in pattern_nodes if n not in tensor_nodes and n not in identity_nodes]
- inputs_to_op_nodes = list(flatten([list(flatten(n.input)) for n in op_nodes]))
- inputs_to_op_nodes = replace_strings_in_list(inputs_to_op_nodes, map_ignored_layer_to_its_input)
- inputs_to_op_nodes = [i.split(':')[0] for i in inputs_to_op_nodes]
-
- # filter only inputs that are coming from nodes that are outside this pattern
- # preserve the order
- pattern_nodes = [n.name for n in pattern_nodes]
- #inputs_from_outside_pattern = remove_duplicates_from_list([i for i in inputs_to_op_nodes if nodes_by_name[i] not in pattern_nodes])
- inputs_from_outside_pattern = remove_duplicates_from_list([i for i in inputs_to_op_nodes if i not in pattern_nodes])
-
- return inputs_from_outside_pattern, tensors
-
- if match:
- nodes = nodes_as_array[node_index:pattern_end]
- name = nodes[-1].name
- var_tensors, const_tensors = get_tensors(nodes)
- if args.print_patterns or args.verbose:
- print('PATTERN:', name, '~~', pattern_name, pattern, '<-', var_tensors, '+', [t.name for t in const_tensors])
- for n in nodes:
- if n.op == 'Const' or n.op == 'Identity':
- process_layer(n, o_context, args)
-
- new_layers = transform_patterns[pattern_name](nodes, var_tensors, const_tensors, o_context)
- if not isinstance(new_layers, list):
- if not hasattr(new_layers, name): new_layers.name = name
- new_layers = [new_layers]
-
- for l in new_layers:
- # TODO: prefix new layer names with scope, patch inputs
- #l.name = name + '/' + l.name
- process_layer(l, o_context, args)
-
- node_index = pattern_end
- break # pattern found & processed
-
- if not match:
- # TODO: gather tensors in the same way as patterns do
- process_layer(node, o_context, args)
- node_index += 1
-
- return o_context.layers, o_context.input_shapes, o_context.model_tensors, o_context.model_memories
-
-#########################################################
-
-def convert(source_file, target_file, trim_unused_by_output="", verbose=False, compress_f16=False):
- """
- Converts a TensorFlow model into a Barracuda model.
- :param source_file: The TensorFlow Model
- :param target_file: The name of the file the converted model will be saved to
- :param trim_unused_by_output: The regexp to match output nodes to remain in the model. All other uconnected nodes will be removed.
- :param verbose: If True, will display debug messages
- :param compress_f16: If true, the float values will be converted to f16
- :return:
- """
- if (type(verbose)==bool):
- args = Struct()
- args.verbose = verbose
- args.print_layers = verbose
- args.print_source_json = verbose
- args.print_barracuda_json = verbose
- args.print_layer_links = verbose
- args.print_patterns = verbose
- args.print_tensors = verbose
- else:
- args = verbose
-
- # Load Tensorflow model
- print("Converting %s to %s" % (source_file, target_file))
- f = open(source_file, 'rb')
- i_model = tf.GraphDef()
- i_model.ParseFromString(f.read())
-
- if args.verbose:
- print('OP_TYPES:', {layer.op for layer in i_model.node})
-
- if args.print_source_json or args.verbose:
- for layer in i_model.node:
- if not layer.op == 'Const':
- print('MODEL:', MessageToJson(layer) + ",")
-
- # Convert
- o_model = barracuda.Model()
- o_model.layers, o_input_shapes, o_model.tensors, o_model.memories = \
- process_model(i_model, args)
-
- # Cleanup unconnected Identities (they might linger after processing complex node patterns like LSTM)
- def cleanup_layers(layers):
- all_layers = {l.name for l in layers}
- all_inputs = {i for l in layers for i in l.inputs}
-
- def is_unconnected_identity(layer):
- if layer.class_name == 'Activation' and layer.activation == 0: # Identity
- assert(len(layer.inputs) == 1)
- if layer.inputs[0] not in all_layers and layer.name not in all_inputs:
- return True;
- return False;
-
- return [l for l in layers if not is_unconnected_identity(l)]
- o_model.layers = cleanup_layers(o_model.layers)
-
- all_inputs = {i for l in o_model.layers for i in l.inputs}
- embedded_tensors = {t.name for l in o_model.layers for t in l.tensors}
-
- # Find global tensors
- def dims_to_barracuda_shape(dims):
- shape = list(dims)
- while len(shape) < 4:
- shape = [1] + shape
- return shape
- o_model.globals = [t for t in o_model.tensors if t not in all_inputs and t not in embedded_tensors]
- #for x in global_tensors:
- # shape = dims_to_barracuda_shape(get_tensor_dims(o_model.tensors[x]))
- # o_globals += [Struct(
- # name = x,
- # shape = shape,
- # data = np.reshape(get_tensor_data(o_model.tensors[x]), shape).astype(np.float32))]
-
- # Trim
- if trim_unused_by_output:
- o_model.layers = barracuda.trim(o_model.layers, trim_unused_by_output, args.verbose)
-
- # Create load layers for constants
- const_tensors = [i for i in all_inputs if i in o_model.tensors]
- const_tensors += o_model.globals
- for x in const_tensors:
- shape = dims_to_barracuda_shape(get_tensor_dims(o_model.tensors[x]))
-
- o_l = Struct(
- type = 255, # Load
- class_name = "Const",
- name = x,
- pads = [0,0,0,0],
- strides = [],
- pool_size = [],
- axis = -1,
- alpha = 1,
- beta = 0,
- activation = 0,
- inputs = [],
- tensors = [Struct(
- name = x,
- shape = shape,
- data = np.reshape(get_tensor_data(o_model.tensors[x]), shape).astype(np.float32))]
- )
- o_model.layers.insert(0, o_l)
-
- # Find model inputs & outputs
- all_layers = {l.name for l in o_model.layers}
- # global inputs => are inputs that are NOT connected to any layer in the network
- # global outputs => are outputs that are NOT feeding any layer in the network OR are coming from Identity layers
- o_model.inputs = {i:o_input_shapes[i] for l in o_model.layers for i in l.inputs if i not in all_layers and i not in o_model.memories}
-
- def is_output_layer(layer):
- if layer.class_name == 'Const': # Constants never count as global output even when unconnected
- return False;
- if layer.name not in all_inputs: # this layer is not inputing to any other layer
- return True
- if layer.class_name == 'Activation' and layer.activation == 0: # Identity marks global output
- return True
- return False
- o_model.outputs = [l.name for l in o_model.layers if is_output_layer(l)]
-
- # Compress
- if compress_f16:
- o_model = barracuda.compress(o_model)
-
- # Sort model so that layer inputs are always ready upfront
- o_model.layers = barracuda.sort(o_model.layers, o_model.inputs, o_model.memories, args.verbose)
-
- # Summary
- barracuda.summary(o_model,
- print_layer_links = args.print_layer_links or args.verbose,
- print_barracuda_json = args.print_barracuda_json or args.verbose,
- print_tensors = args.print_tensors or args.verbose)
-
- # Write to file
- barracuda.write(o_model, target_file)
- print('DONE: wrote', target_file, 'file.')
diff --git a/animalai_bkp/trainers/trainer.py b/animalai_bkp/trainers/trainer.py
deleted file mode 100644
index 6abd8ca8..00000000
--- a/animalai_bkp/trainers/trainer.py
+++ /dev/null
@@ -1,198 +0,0 @@
-# # Unity ML-Agents Toolkit
-import logging
-
-import tensorflow as tf
-import numpy as np
-
-from animalai.envs import UnityException, AllBrainInfo
-
-logger = logging.getLogger("mlagents.trainers")
-
-
-class UnityTrainerException(UnityException):
- """
- Related to errors with the Trainer.
- """
- pass
-
-
-class Trainer(object):
- """This class is the base class for the mlagents.trainers"""
-
- def __init__(self, brain, trainer_parameters, training, run_id):
- """
- Responsible for collecting experiences and training a neural network model.
- :BrainParameters brain: Brain to be trained.
- :dict trainer_parameters: The parameters for the trainer (dictionary).
- :bool training: Whether the trainer is set for training.
- :int run_id: The identifier of the current run
- """
- self.param_keys = []
- self.brain_name = brain.brain_name
- self.run_id = run_id
- self.trainer_parameters = trainer_parameters
- self.is_training = training
- self.stats = {}
- self.summary_writer = None
- self.policy = None
-
- def __str__(self):
- return '''{} Trainer'''.format(self.__class__)
-
- def check_param_keys(self):
- for k in self.param_keys:
- if k not in self.trainer_parameters:
- raise UnityTrainerException(
- "The hyper-parameter {0} could not be found for the {1} trainer of "
- "brain {2}.".format(k, self.__class__, self.brain_name))
-
- @property
- def parameters(self):
- """
- Returns the trainer parameters of the trainer.
- """
- raise UnityTrainerException("The parameters property was not implemented.")
-
- @property
- def graph_scope(self):
- """
- Returns the graph scope of the trainer.
- """
- raise UnityTrainerException("The graph_scope property was not implemented.")
-
- @property
- def get_max_steps(self):
- """
- Returns the maximum number of steps. Is used to know when the trainer should be stopped.
- :return: The maximum number of steps of the trainer
- """
- raise UnityTrainerException("The get_max_steps property was not implemented.")
-
- @property
- def get_step(self):
- """
- Returns the number of training steps the trainer has performed
- :return: the step count of the trainer
- """
- raise UnityTrainerException("The get_step property was not implemented.")
-
- @property
- def get_last_reward(self):
- """
- Returns the last reward the trainer has had
- :return: the new last reward
- """
- raise UnityTrainerException("The get_last_reward property was not implemented.")
-
- def increment_step_and_update_last_reward(self):
- """
- Increment the step count of the trainer and updates the last reward
- """
- raise UnityTrainerException(
- "The increment_step_and_update_last_reward method was not implemented.")
-
- def take_action(self, all_brain_info: AllBrainInfo):
- """
- Decides actions given state/observation information, and takes them in environment.
- :param all_brain_info: A dictionary of brain names and BrainInfo from environment.
- :return: a tuple containing action, memories, values and an object
- to be passed to add experiences
- """
- raise UnityTrainerException("The take_action method was not implemented.")
-
- def add_experiences(self, curr_info: AllBrainInfo, next_info: AllBrainInfo,
- take_action_outputs):
- """
- Adds experiences to each agent's experience history.
- :param curr_info: Current AllBrainInfo.
- :param next_info: Next AllBrainInfo.
- :param take_action_outputs: The outputs of the take action method.
- """
- raise UnityTrainerException("The add_experiences method was not implemented.")
-
- def process_experiences(self, current_info: AllBrainInfo, next_info: AllBrainInfo):
- """
- Checks agent histories for processing condition, and processes them as necessary.
- Processing involves calculating value and advantage targets for model updating step.
- :param current_info: Dictionary of all current-step brains and corresponding BrainInfo.
- :param next_info: Dictionary of all next-step brains and corresponding BrainInfo.
- """
- raise UnityTrainerException("The process_experiences method was not implemented.")
-
- def end_episode(self):
- """
- A signal that the Episode has ended. The buffer must be reset.
- Get only called when the academy resets.
- """
- raise UnityTrainerException("The end_episode method was not implemented.")
-
- def is_ready_update(self):
- """
- Returns whether or not the trainer has enough elements to run update model
- :return: A boolean corresponding to wether or not update_model() can be run
- """
- raise UnityTrainerException("The is_ready_update method was not implemented.")
-
- def update_policy(self):
- """
- Uses demonstration_buffer to update model.
- """
- raise UnityTrainerException("The update_model method was not implemented.")
-
- def save_model(self):
- """
- Saves the model
- """
- self.policy.save_model(self.get_step)
-
- def export_model(self):
- """
- Exports the model
- """
- self.policy.export_model()
-
- def write_summary(self, global_step, lesson_num=0):
- """
- Saves training statistics to Tensorboard.
- :param lesson_num: Current lesson number in curriculum.
- :param global_step: The number of steps the simulation has been going for
- """
- if global_step % self.trainer_parameters['summary_freq'] == 0 and global_step != 0:
- is_training = "Training." if self.is_training and self.get_step <= self.get_max_steps else "Not Training."
- if len(self.stats['Environment/Cumulative Reward']) > 0:
- mean_reward = np.mean(self.stats['Environment/Cumulative Reward'])
- logger.info(" {}: {}: Step: {}. Mean Reward: {:0.3f}. Std of Reward: {:0.3f}. {}"
- .format(self.run_id, self.brain_name,
- min(self.get_step, self.get_max_steps),
- mean_reward, np.std(self.stats['Environment/Cumulative Reward']),
- is_training))
- else:
- logger.info(" {}: {}: Step: {}. No episode was completed since last summary. {}"
- .format(self.run_id, self.brain_name, self.get_step, is_training))
- summary = tf.Summary()
- for key in self.stats:
- if len(self.stats[key]) > 0:
- stat_mean = float(np.mean(self.stats[key]))
- summary.value.add(tag='{}'.format(key), simple_value=stat_mean)
- self.stats[key] = []
- summary.value.add(tag='Environment/Lesson', simple_value=lesson_num)
- self.summary_writer.add_summary(summary, self.get_step)
- self.summary_writer.flush()
-
- def write_tensorboard_text(self, key, input_dict):
- """
- Saves text to Tensorboard.
- Note: Only works on tensorflow r1.2 or above.
- :param key: The name of the text.
- :param input_dict: A dictionary that will be displayed in a table on Tensorboard.
- """
- try:
- with tf.Session() as sess:
- s_op = tf.summary.text(key, tf.convert_to_tensor(
- ([[str(x), str(input_dict[x])] for x in input_dict])))
- s = sess.run(s_op)
- self.summary_writer.add_summary(s, self.get_step)
- except:
- logger.info(
- "Cannot write text summary for Tensorboard. Tensorflow version must be r1.2 or above.")
- pass
diff --git a/animalai_bkp/trainers/trainer_controller.py b/animalai_bkp/trainers/trainer_controller.py
deleted file mode 100644
index 0618815d..00000000
--- a/animalai_bkp/trainers/trainer_controller.py
+++ /dev/null
@@ -1,301 +0,0 @@
-# # Unity ML-Agents Toolkit
-# ## ML-Agent Learning
-"""Launches trainers for each External Brains in a Unity Environment."""
-
-import os
-import logging
-import shutil
-import sys
-if sys.platform.startswith('win'):
- import win32api
- import win32con
-from typing import *
-
-import numpy as np
-import tensorflow as tf
-
-from animalai.envs import BrainInfo
-from animalai.envs.exception import UnityEnvironmentException
-from animalai.trainers.ppo.trainer import PPOTrainer
-from animalai.trainers.bc.offline_trainer import OfflineBCTrainer
-from animalai.trainers.bc.online_trainer import OnlineBCTrainer
-from animalai.trainers.meta_curriculum import MetaCurriculum
-
-
-class TrainerController(object):
- def __init__(self, model_path: str, summaries_dir: str,
- run_id: str, save_freq: int, meta_curriculum: Optional[MetaCurriculum],
- load: bool, train: bool, keep_checkpoints: int, lesson: Optional[int],
- external_brains: Dict[str, BrainInfo], training_seed: int, config=None):
- """
- :param model_path: Path to save the model.
- :param summaries_dir: Folder to save training summaries.
- :param run_id: The sub-directory name for model and summary statistics
- :param save_freq: Frequency at which to save model
- :param meta_curriculum: MetaCurriculum object which stores information about all curricula.
- :param load: Whether to load the model or randomly initialize.
- :param train: Whether to train model, or only run inference.
- :param keep_checkpoints: How many model checkpoints to keep.
- :param lesson: Start learning from this lesson.
- :param external_brains: dictionary of external brain names to BrainInfo objects.
- :param training_seed: Seed to use for Numpy and Tensorflow random number generation.
- """
-
- self.model_path = model_path
- self.summaries_dir = summaries_dir
- self.external_brains = external_brains
- self.external_brain_names = external_brains.keys()
- self.logger = logging.getLogger('mlagents.envs')
- self.run_id = run_id
- self.save_freq = save_freq
- self.lesson = lesson
- self.load_model = load
- self.train_model = train
- self.keep_checkpoints = keep_checkpoints
- self.trainers = {}
- self.global_step = 0
- self.meta_curriculum = meta_curriculum
- self.seed = training_seed
- self.config = config
- self.update_config = True
- np.random.seed(self.seed)
- tf.set_random_seed(self.seed)
-
- def _get_measure_vals(self):
- if self.meta_curriculum:
- brain_names_to_measure_vals = {}
- for brain_name, curriculum \
- in self.meta_curriculum.brains_to_curriculums.items():
- if curriculum.measure == 'progress':
- measure_val = (self.trainers[brain_name].get_step /
- self.trainers[brain_name].get_max_steps)
- brain_names_to_measure_vals[brain_name] = measure_val
- elif curriculum.measure == 'reward':
- measure_val = np.mean(self.trainers[brain_name]
- .reward_buffer)
- brain_names_to_measure_vals[brain_name] = measure_val
- return brain_names_to_measure_vals
- else:
- return None
-
- def _save_model(self, steps=0):
- """
- Saves current model to checkpoint folder.
- :param steps: Current number of steps in training process.
- :param saver: Tensorflow saver for session.
- """
- for brain_name in self.trainers.keys():
- self.trainers[brain_name].save_model()
- self.logger.info('Saved Model')
-
- def _save_model_when_interrupted(self, steps=0):
- self.logger.info('Learning was interrupted. Please wait '
- 'while the graph is generated.')
- self._save_model(steps)
-
- def _win_handler(self, event):
- """
- This function gets triggered after ctrl-c or ctrl-break is pressed
- under Windows platform.
- """
- if event in (win32con.CTRL_C_EVENT, win32con.CTRL_BREAK_EVENT):
- self._save_model_when_interrupted(self.global_step)
- self._export_graph()
- sys.exit()
- return True
- return False
-
- def _export_graph(self):
- """
- Exports latest saved models to .nn format for Unity embedding.
- """
- for brain_name in self.trainers.keys():
- self.trainers[brain_name].export_model()
-
- def initialize_trainers(self, trainer_config):
- """
- Initialization of the trainers
- :param trainer_config: The configurations of the trainers
- """
- trainer_parameters_dict = {}
-
- for brain_name in self.external_brains:
- trainer_parameters = trainer_config['default'].copy()
- trainer_parameters['summary_path'] = '{basedir}/{name}'.format(
- basedir=self.summaries_dir,
- name=str(self.run_id) + '_' + brain_name)
- trainer_parameters['model_path'] = '{basedir}/{name}'.format(
- basedir=self.model_path,
- name=brain_name)
- trainer_parameters['keep_checkpoints'] = self.keep_checkpoints
- if brain_name in trainer_config:
- _brain_key = brain_name
- while not isinstance(trainer_config[_brain_key], dict):
- _brain_key = trainer_config[_brain_key]
- for k in trainer_config[_brain_key]:
- trainer_parameters[k] = trainer_config[_brain_key][k]
- trainer_parameters_dict[brain_name] = trainer_parameters.copy()
- for brain_name in self.external_brains:
- if trainer_parameters_dict[brain_name]['trainer'] == 'offline_bc':
- self.trainers[brain_name] = OfflineBCTrainer(
- self.external_brains[brain_name],
- trainer_parameters_dict[brain_name], self.train_model,
- self.load_model, self.seed, self.run_id)
- elif trainer_parameters_dict[brain_name]['trainer'] == 'online_bc':
- self.trainers[brain_name] = OnlineBCTrainer(
- self.external_brains[brain_name],
- trainer_parameters_dict[brain_name], self.train_model,
- self.load_model, self.seed, self.run_id)
- elif trainer_parameters_dict[brain_name]['trainer'] == 'ppo':
- self.trainers[brain_name] = PPOTrainer(
- self.external_brains[brain_name],
- self.meta_curriculum
- .brains_to_curriculums[brain_name]
- .min_lesson_length if self.meta_curriculum else 0,
- trainer_parameters_dict[brain_name],
- self.train_model, self.load_model, self.seed, self.run_id)
- else:
- raise UnityEnvironmentException('The trainer config contains '
- 'an unknown trainer type for '
- 'brain {}'
- .format(brain_name))
-
- @staticmethod
- def _create_model_path(model_path):
- try:
- if not os.path.exists(model_path):
- os.makedirs(model_path)
- except Exception:
- raise UnityEnvironmentException('The folder {} containing the '
- 'generated model could not be '
- 'accessed. Please make sure the '
- 'permissions are set correctly.'
- .format(model_path))
-
- def _reset_env(self, env):
- """Resets the environment.
-
- Returns:
- A Data structure corresponding to the initial reset state of the
- environment.
- """
- if self.meta_curriculum is not None:
- return env.reset(config=self.meta_curriculum.get_config())
- else:
- if self.update_config:
- return env.reset(arenas_configurations=self.config)
- self.update_config = False
- else:
- return env.reset()
-
- def start_learning(self, env, trainer_config):
- # TODO: Should be able to start learning at different lesson numbers
- # for each curriculum.
- if self.meta_curriculum is not None:
- self.meta_curriculum.set_all_curriculums_to_lesson_num(self.lesson)
- self._create_model_path(self.model_path)
-
- tf.reset_default_graph()
-
- # Prevent a single session from taking all GPU memory.
- self.initialize_trainers(trainer_config)
- for _, t in self.trainers.items():
- self.logger.info(t)
-
- curr_info = self._reset_env(env)
- if self.train_model:
- for brain_name, trainer in self.trainers.items():
- trainer.write_tensorboard_text('Hyperparameters',
- trainer.parameters)
- if sys.platform.startswith('win'):
- # Add the _win_handler function to the windows console's handler function list
- win32api.SetConsoleCtrlHandler(self._win_handler, True)
- try:
- while any([t.get_step <= t.get_max_steps \
- for k, t in self.trainers.items()]) \
- or not self.train_model:
- new_info = self.take_step(env, curr_info)
- self.global_step += 1
- if self.global_step % self.save_freq == 0 and self.global_step != 0 \
- and self.train_model:
- # Save Tensorflow model
- self._save_model(steps=self.global_step)
- curr_info = new_info
- # Final save Tensorflow model
- if self.global_step != 0 and self.train_model:
- self._save_model(steps=self.global_step)
- except KeyboardInterrupt:
- if self.train_model:
- self._save_model_when_interrupted(steps=self.global_step)
- pass
- env.close()
-
- if self.train_model:
- self._export_graph()
-
- def take_step(self, env, curr_info):
- if self.meta_curriculum:
- # Get the sizes of the reward buffers.
- reward_buff_sizes = {k: len(t.reward_buffer) \
- for (k, t) in self.trainers.items()}
- # Attempt to increment the lessons of the brains who
- # were ready.
- lessons_incremented = \
- self.meta_curriculum.increment_lessons(
- self._get_measure_vals(),
- reward_buff_sizes=reward_buff_sizes)
-
- # If any lessons were incremented or the environment is
- # ready to be reset
- if (self.meta_curriculum
- and any(lessons_incremented.values())):
- curr_info = self._reset_env(env)
- for brain_name, trainer in self.trainers.items():
- trainer.end_episode()
- for brain_name, changed in lessons_incremented.items():
- if changed:
- self.trainers[brain_name].reward_buffer.clear()
- elif env.global_done:
- curr_info = self._reset_env(env)
- for brain_name, trainer in self.trainers.items():
- trainer.end_episode()
-
- # Decide and take an action
- take_action_vector, \
- take_action_memories, \
- take_action_text, \
- take_action_value, \
- take_action_outputs \
- = {}, {}, {}, {}, {}
- for brain_name, trainer in self.trainers.items():
- (take_action_vector[brain_name],
- take_action_memories[brain_name],
- take_action_text[brain_name],
- take_action_value[brain_name],
- take_action_outputs[brain_name]) = \
- trainer.take_action(curr_info)
- new_info = env.step(vector_action=take_action_vector,
- memory=take_action_memories,
- text_action=take_action_text,
- value=take_action_value)
- for brain_name, trainer in self.trainers.items():
- trainer.add_experiences(curr_info, new_info,
- take_action_outputs[brain_name])
- trainer.process_experiences(curr_info, new_info)
- if trainer.is_ready_update() and self.train_model \
- and trainer.get_step <= trainer.get_max_steps:
- # Perform gradient descent with experience buffer
- trainer.update_policy()
- # Write training statistics to Tensorboard.
- if self.meta_curriculum is not None:
- trainer.write_summary(
- self.global_step,
- lesson_num=self.meta_curriculum
- .brains_to_curriculums[brain_name]
- .lesson_num)
- else:
- trainer.write_summary(self.global_step)
- if self.train_model \
- and trainer.get_step <= trainer.get_max_steps:
- trainer.increment_step_and_update_last_reward()
- return new_info
From f0ccd596e55ab6e592cb1b651adba88d872aa964 Mon Sep 17 00:00:00 2001
From: Benjamin Beyret
Date: Fri, 31 May 2019 18:47:16 +0100
Subject: [PATCH 07/23] rename animalai_package to animalai
---
.gitignore | 3 ++-
{animalai_package => animalai}/animalai/LICENSE | 0
{animalai_package => animalai}/animalai/README.md | 0
{animalai_package => animalai}/animalai/animalai/__init__.py | 0
.../animalai/animalai/communicator_objects/__init__.py | 0
.../animalai/communicator_objects/agent_action_proto_pb2.py | 0
.../animalai/communicator_objects/agent_info_proto_pb2.py | 0
.../communicator_objects/arena_parameters_proto_pb2.py | 0
.../communicator_objects/brain_parameters_proto_pb2.py | 0
.../animalai/communicator_objects/command_proto_pb2.py | 0
.../communicator_objects/demonstration_meta_proto_pb2.py | 0
.../communicator_objects/engine_configuration_proto_pb2.py | 0
.../animalai/animalai/communicator_objects/header_pb2.py | 0
.../animalai/communicator_objects/resolution_proto_pb2.py | 0
.../animalai/communicator_objects/space_type_proto_pb2.py | 0
.../animalai/animalai/communicator_objects/unity_input_pb2.py | 0
.../animalai/communicator_objects/unity_message_pb2.py | 0
.../animalai/animalai/communicator_objects/unity_output_pb2.py | 0
.../communicator_objects/unity_rl_initialization_input_pb2.py | 0
.../communicator_objects/unity_rl_initialization_output_pb2.py | 0
.../animalai/communicator_objects/unity_rl_input_pb2.py | 0
.../animalai/communicator_objects/unity_rl_output_pb2.py | 0
.../animalai/communicator_objects/unity_rl_reset_input_pb2.py | 0
.../animalai/communicator_objects/unity_rl_reset_output_pb2.py | 0
.../animalai/communicator_objects/unity_to_external_pb2.py | 0
.../communicator_objects/unity_to_external_pb2_grpc.py | 0
.../animalai/animalai/envs/__init__.py | 0
.../animalai/animalai/envs/arena_config.py | 0
{animalai_package => animalai}/animalai/animalai/envs/brain.py | 0
.../animalai/animalai/envs/communicator.py | 0
.../animalai/animalai/envs/environment.py | 0
.../animalai/animalai/envs/exception.py | 0
.../animalai/animalai/envs/gym/environment.py | 0
.../animalai/animalai/envs/rpc_communicator.py | 0
.../animalai/animalai/envs/socket_communicator.py | 0
{animalai_package => animalai}/animalai/setup.py | 0
36 files changed, 2 insertions(+), 1 deletion(-)
rename {animalai_package => animalai}/animalai/LICENSE (100%)
rename {animalai_package => animalai}/animalai/README.md (100%)
rename {animalai_package => animalai}/animalai/animalai/__init__.py (100%)
rename {animalai_package => animalai}/animalai/animalai/communicator_objects/__init__.py (100%)
rename {animalai_package => animalai}/animalai/animalai/communicator_objects/agent_action_proto_pb2.py (100%)
rename {animalai_package => animalai}/animalai/animalai/communicator_objects/agent_info_proto_pb2.py (100%)
rename {animalai_package => animalai}/animalai/animalai/communicator_objects/arena_parameters_proto_pb2.py (100%)
rename {animalai_package => animalai}/animalai/animalai/communicator_objects/brain_parameters_proto_pb2.py (100%)
rename {animalai_package => animalai}/animalai/animalai/communicator_objects/command_proto_pb2.py (100%)
rename {animalai_package => animalai}/animalai/animalai/communicator_objects/demonstration_meta_proto_pb2.py (100%)
rename {animalai_package => animalai}/animalai/animalai/communicator_objects/engine_configuration_proto_pb2.py (100%)
rename {animalai_package => animalai}/animalai/animalai/communicator_objects/header_pb2.py (100%)
rename {animalai_package => animalai}/animalai/animalai/communicator_objects/resolution_proto_pb2.py (100%)
rename {animalai_package => animalai}/animalai/animalai/communicator_objects/space_type_proto_pb2.py (100%)
rename {animalai_package => animalai}/animalai/animalai/communicator_objects/unity_input_pb2.py (100%)
rename {animalai_package => animalai}/animalai/animalai/communicator_objects/unity_message_pb2.py (100%)
rename {animalai_package => animalai}/animalai/animalai/communicator_objects/unity_output_pb2.py (100%)
rename {animalai_package => animalai}/animalai/animalai/communicator_objects/unity_rl_initialization_input_pb2.py (100%)
rename {animalai_package => animalai}/animalai/animalai/communicator_objects/unity_rl_initialization_output_pb2.py (100%)
rename {animalai_package => animalai}/animalai/animalai/communicator_objects/unity_rl_input_pb2.py (100%)
rename {animalai_package => animalai}/animalai/animalai/communicator_objects/unity_rl_output_pb2.py (100%)
rename {animalai_package => animalai}/animalai/animalai/communicator_objects/unity_rl_reset_input_pb2.py (100%)
rename {animalai_package => animalai}/animalai/animalai/communicator_objects/unity_rl_reset_output_pb2.py (100%)
rename {animalai_package => animalai}/animalai/animalai/communicator_objects/unity_to_external_pb2.py (100%)
rename {animalai_package => animalai}/animalai/animalai/communicator_objects/unity_to_external_pb2_grpc.py (100%)
rename {animalai_package => animalai}/animalai/animalai/envs/__init__.py (100%)
rename {animalai_package => animalai}/animalai/animalai/envs/arena_config.py (100%)
rename {animalai_package => animalai}/animalai/animalai/envs/brain.py (100%)
rename {animalai_package => animalai}/animalai/animalai/envs/communicator.py (100%)
rename {animalai_package => animalai}/animalai/animalai/envs/environment.py (100%)
rename {animalai_package => animalai}/animalai/animalai/envs/exception.py (100%)
rename {animalai_package => animalai}/animalai/animalai/envs/gym/environment.py (100%)
rename {animalai_package => animalai}/animalai/animalai/envs/rpc_communicator.py (100%)
rename {animalai_package => animalai}/animalai/animalai/envs/socket_communicator.py (100%)
rename {animalai_package => animalai}/animalai/setup.py (100%)
diff --git a/.gitignore b/.gitignore
index 8ccb7e08..a1fc126d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,4 +11,5 @@ venv/
build/
dist/
*.egg-info*
-*.eggs*
\ No newline at end of file
+*.eggs*
+animalai_bkp/
\ No newline at end of file
diff --git a/animalai_package/animalai/LICENSE b/animalai/animalai/LICENSE
similarity index 100%
rename from animalai_package/animalai/LICENSE
rename to animalai/animalai/LICENSE
diff --git a/animalai_package/animalai/README.md b/animalai/animalai/README.md
similarity index 100%
rename from animalai_package/animalai/README.md
rename to animalai/animalai/README.md
diff --git a/animalai_package/animalai/animalai/__init__.py b/animalai/animalai/animalai/__init__.py
similarity index 100%
rename from animalai_package/animalai/animalai/__init__.py
rename to animalai/animalai/animalai/__init__.py
diff --git a/animalai_package/animalai/animalai/communicator_objects/__init__.py b/animalai/animalai/animalai/communicator_objects/__init__.py
similarity index 100%
rename from animalai_package/animalai/animalai/communicator_objects/__init__.py
rename to animalai/animalai/animalai/communicator_objects/__init__.py
diff --git a/animalai_package/animalai/animalai/communicator_objects/agent_action_proto_pb2.py b/animalai/animalai/animalai/communicator_objects/agent_action_proto_pb2.py
similarity index 100%
rename from animalai_package/animalai/animalai/communicator_objects/agent_action_proto_pb2.py
rename to animalai/animalai/animalai/communicator_objects/agent_action_proto_pb2.py
diff --git a/animalai_package/animalai/animalai/communicator_objects/agent_info_proto_pb2.py b/animalai/animalai/animalai/communicator_objects/agent_info_proto_pb2.py
similarity index 100%
rename from animalai_package/animalai/animalai/communicator_objects/agent_info_proto_pb2.py
rename to animalai/animalai/animalai/communicator_objects/agent_info_proto_pb2.py
diff --git a/animalai_package/animalai/animalai/communicator_objects/arena_parameters_proto_pb2.py b/animalai/animalai/animalai/communicator_objects/arena_parameters_proto_pb2.py
similarity index 100%
rename from animalai_package/animalai/animalai/communicator_objects/arena_parameters_proto_pb2.py
rename to animalai/animalai/animalai/communicator_objects/arena_parameters_proto_pb2.py
diff --git a/animalai_package/animalai/animalai/communicator_objects/brain_parameters_proto_pb2.py b/animalai/animalai/animalai/communicator_objects/brain_parameters_proto_pb2.py
similarity index 100%
rename from animalai_package/animalai/animalai/communicator_objects/brain_parameters_proto_pb2.py
rename to animalai/animalai/animalai/communicator_objects/brain_parameters_proto_pb2.py
diff --git a/animalai_package/animalai/animalai/communicator_objects/command_proto_pb2.py b/animalai/animalai/animalai/communicator_objects/command_proto_pb2.py
similarity index 100%
rename from animalai_package/animalai/animalai/communicator_objects/command_proto_pb2.py
rename to animalai/animalai/animalai/communicator_objects/command_proto_pb2.py
diff --git a/animalai_package/animalai/animalai/communicator_objects/demonstration_meta_proto_pb2.py b/animalai/animalai/animalai/communicator_objects/demonstration_meta_proto_pb2.py
similarity index 100%
rename from animalai_package/animalai/animalai/communicator_objects/demonstration_meta_proto_pb2.py
rename to animalai/animalai/animalai/communicator_objects/demonstration_meta_proto_pb2.py
diff --git a/animalai_package/animalai/animalai/communicator_objects/engine_configuration_proto_pb2.py b/animalai/animalai/animalai/communicator_objects/engine_configuration_proto_pb2.py
similarity index 100%
rename from animalai_package/animalai/animalai/communicator_objects/engine_configuration_proto_pb2.py
rename to animalai/animalai/animalai/communicator_objects/engine_configuration_proto_pb2.py
diff --git a/animalai_package/animalai/animalai/communicator_objects/header_pb2.py b/animalai/animalai/animalai/communicator_objects/header_pb2.py
similarity index 100%
rename from animalai_package/animalai/animalai/communicator_objects/header_pb2.py
rename to animalai/animalai/animalai/communicator_objects/header_pb2.py
diff --git a/animalai_package/animalai/animalai/communicator_objects/resolution_proto_pb2.py b/animalai/animalai/animalai/communicator_objects/resolution_proto_pb2.py
similarity index 100%
rename from animalai_package/animalai/animalai/communicator_objects/resolution_proto_pb2.py
rename to animalai/animalai/animalai/communicator_objects/resolution_proto_pb2.py
diff --git a/animalai_package/animalai/animalai/communicator_objects/space_type_proto_pb2.py b/animalai/animalai/animalai/communicator_objects/space_type_proto_pb2.py
similarity index 100%
rename from animalai_package/animalai/animalai/communicator_objects/space_type_proto_pb2.py
rename to animalai/animalai/animalai/communicator_objects/space_type_proto_pb2.py
diff --git a/animalai_package/animalai/animalai/communicator_objects/unity_input_pb2.py b/animalai/animalai/animalai/communicator_objects/unity_input_pb2.py
similarity index 100%
rename from animalai_package/animalai/animalai/communicator_objects/unity_input_pb2.py
rename to animalai/animalai/animalai/communicator_objects/unity_input_pb2.py
diff --git a/animalai_package/animalai/animalai/communicator_objects/unity_message_pb2.py b/animalai/animalai/animalai/communicator_objects/unity_message_pb2.py
similarity index 100%
rename from animalai_package/animalai/animalai/communicator_objects/unity_message_pb2.py
rename to animalai/animalai/animalai/communicator_objects/unity_message_pb2.py
diff --git a/animalai_package/animalai/animalai/communicator_objects/unity_output_pb2.py b/animalai/animalai/animalai/communicator_objects/unity_output_pb2.py
similarity index 100%
rename from animalai_package/animalai/animalai/communicator_objects/unity_output_pb2.py
rename to animalai/animalai/animalai/communicator_objects/unity_output_pb2.py
diff --git a/animalai_package/animalai/animalai/communicator_objects/unity_rl_initialization_input_pb2.py b/animalai/animalai/animalai/communicator_objects/unity_rl_initialization_input_pb2.py
similarity index 100%
rename from animalai_package/animalai/animalai/communicator_objects/unity_rl_initialization_input_pb2.py
rename to animalai/animalai/animalai/communicator_objects/unity_rl_initialization_input_pb2.py
diff --git a/animalai_package/animalai/animalai/communicator_objects/unity_rl_initialization_output_pb2.py b/animalai/animalai/animalai/communicator_objects/unity_rl_initialization_output_pb2.py
similarity index 100%
rename from animalai_package/animalai/animalai/communicator_objects/unity_rl_initialization_output_pb2.py
rename to animalai/animalai/animalai/communicator_objects/unity_rl_initialization_output_pb2.py
diff --git a/animalai_package/animalai/animalai/communicator_objects/unity_rl_input_pb2.py b/animalai/animalai/animalai/communicator_objects/unity_rl_input_pb2.py
similarity index 100%
rename from animalai_package/animalai/animalai/communicator_objects/unity_rl_input_pb2.py
rename to animalai/animalai/animalai/communicator_objects/unity_rl_input_pb2.py
diff --git a/animalai_package/animalai/animalai/communicator_objects/unity_rl_output_pb2.py b/animalai/animalai/animalai/communicator_objects/unity_rl_output_pb2.py
similarity index 100%
rename from animalai_package/animalai/animalai/communicator_objects/unity_rl_output_pb2.py
rename to animalai/animalai/animalai/communicator_objects/unity_rl_output_pb2.py
diff --git a/animalai_package/animalai/animalai/communicator_objects/unity_rl_reset_input_pb2.py b/animalai/animalai/animalai/communicator_objects/unity_rl_reset_input_pb2.py
similarity index 100%
rename from animalai_package/animalai/animalai/communicator_objects/unity_rl_reset_input_pb2.py
rename to animalai/animalai/animalai/communicator_objects/unity_rl_reset_input_pb2.py
diff --git a/animalai_package/animalai/animalai/communicator_objects/unity_rl_reset_output_pb2.py b/animalai/animalai/animalai/communicator_objects/unity_rl_reset_output_pb2.py
similarity index 100%
rename from animalai_package/animalai/animalai/communicator_objects/unity_rl_reset_output_pb2.py
rename to animalai/animalai/animalai/communicator_objects/unity_rl_reset_output_pb2.py
diff --git a/animalai_package/animalai/animalai/communicator_objects/unity_to_external_pb2.py b/animalai/animalai/animalai/communicator_objects/unity_to_external_pb2.py
similarity index 100%
rename from animalai_package/animalai/animalai/communicator_objects/unity_to_external_pb2.py
rename to animalai/animalai/animalai/communicator_objects/unity_to_external_pb2.py
diff --git a/animalai_package/animalai/animalai/communicator_objects/unity_to_external_pb2_grpc.py b/animalai/animalai/animalai/communicator_objects/unity_to_external_pb2_grpc.py
similarity index 100%
rename from animalai_package/animalai/animalai/communicator_objects/unity_to_external_pb2_grpc.py
rename to animalai/animalai/animalai/communicator_objects/unity_to_external_pb2_grpc.py
diff --git a/animalai_package/animalai/animalai/envs/__init__.py b/animalai/animalai/animalai/envs/__init__.py
similarity index 100%
rename from animalai_package/animalai/animalai/envs/__init__.py
rename to animalai/animalai/animalai/envs/__init__.py
diff --git a/animalai_package/animalai/animalai/envs/arena_config.py b/animalai/animalai/animalai/envs/arena_config.py
similarity index 100%
rename from animalai_package/animalai/animalai/envs/arena_config.py
rename to animalai/animalai/animalai/envs/arena_config.py
diff --git a/animalai_package/animalai/animalai/envs/brain.py b/animalai/animalai/animalai/envs/brain.py
similarity index 100%
rename from animalai_package/animalai/animalai/envs/brain.py
rename to animalai/animalai/animalai/envs/brain.py
diff --git a/animalai_package/animalai/animalai/envs/communicator.py b/animalai/animalai/animalai/envs/communicator.py
similarity index 100%
rename from animalai_package/animalai/animalai/envs/communicator.py
rename to animalai/animalai/animalai/envs/communicator.py
diff --git a/animalai_package/animalai/animalai/envs/environment.py b/animalai/animalai/animalai/envs/environment.py
similarity index 100%
rename from animalai_package/animalai/animalai/envs/environment.py
rename to animalai/animalai/animalai/envs/environment.py
diff --git a/animalai_package/animalai/animalai/envs/exception.py b/animalai/animalai/animalai/envs/exception.py
similarity index 100%
rename from animalai_package/animalai/animalai/envs/exception.py
rename to animalai/animalai/animalai/envs/exception.py
diff --git a/animalai_package/animalai/animalai/envs/gym/environment.py b/animalai/animalai/animalai/envs/gym/environment.py
similarity index 100%
rename from animalai_package/animalai/animalai/envs/gym/environment.py
rename to animalai/animalai/animalai/envs/gym/environment.py
diff --git a/animalai_package/animalai/animalai/envs/rpc_communicator.py b/animalai/animalai/animalai/envs/rpc_communicator.py
similarity index 100%
rename from animalai_package/animalai/animalai/envs/rpc_communicator.py
rename to animalai/animalai/animalai/envs/rpc_communicator.py
diff --git a/animalai_package/animalai/animalai/envs/socket_communicator.py b/animalai/animalai/animalai/envs/socket_communicator.py
similarity index 100%
rename from animalai_package/animalai/animalai/envs/socket_communicator.py
rename to animalai/animalai/animalai/envs/socket_communicator.py
diff --git a/animalai_package/animalai/setup.py b/animalai/animalai/setup.py
similarity index 100%
rename from animalai_package/animalai/setup.py
rename to animalai/animalai/setup.py
From 4277083be24237d176902dc305c9ec6982a3be9f Mon Sep 17 00:00:00 2001
From: Benjamin Beyret
Date: Fri, 31 May 2019 18:48:50 +0100
Subject: [PATCH 08/23] delete empty animalai folder
---
animalai/{animalai => }/LICENSE | 0
animalai/{animalai => }/README.md | 0
animalai/animalai/{animalai => }/__init__.py | 0
animalai/animalai/{animalai => }/communicator_objects/__init__.py | 0
.../{animalai => }/communicator_objects/agent_action_proto_pb2.py | 0
.../{animalai => }/communicator_objects/agent_info_proto_pb2.py | 0
.../communicator_objects/arena_parameters_proto_pb2.py | 0
.../communicator_objects/brain_parameters_proto_pb2.py | 0
.../{animalai => }/communicator_objects/command_proto_pb2.py | 0
.../communicator_objects/demonstration_meta_proto_pb2.py | 0
.../communicator_objects/engine_configuration_proto_pb2.py | 0
.../animalai/{animalai => }/communicator_objects/header_pb2.py | 0
.../{animalai => }/communicator_objects/resolution_proto_pb2.py | 0
.../{animalai => }/communicator_objects/space_type_proto_pb2.py | 0
.../{animalai => }/communicator_objects/unity_input_pb2.py | 0
.../{animalai => }/communicator_objects/unity_message_pb2.py | 0
.../{animalai => }/communicator_objects/unity_output_pb2.py | 0
.../communicator_objects/unity_rl_initialization_input_pb2.py | 0
.../communicator_objects/unity_rl_initialization_output_pb2.py | 0
.../{animalai => }/communicator_objects/unity_rl_input_pb2.py | 0
.../{animalai => }/communicator_objects/unity_rl_output_pb2.py | 0
.../communicator_objects/unity_rl_reset_input_pb2.py | 0
.../communicator_objects/unity_rl_reset_output_pb2.py | 0
.../{animalai => }/communicator_objects/unity_to_external_pb2.py | 0
.../communicator_objects/unity_to_external_pb2_grpc.py | 0
animalai/animalai/{animalai => }/envs/__init__.py | 0
animalai/animalai/{animalai => }/envs/arena_config.py | 0
animalai/animalai/{animalai => }/envs/brain.py | 0
animalai/animalai/{animalai => }/envs/communicator.py | 0
animalai/animalai/{animalai => }/envs/environment.py | 0
animalai/animalai/{animalai => }/envs/exception.py | 0
animalai/animalai/{animalai => }/envs/gym/environment.py | 0
animalai/animalai/{animalai => }/envs/rpc_communicator.py | 0
animalai/animalai/{animalai => }/envs/socket_communicator.py | 0
animalai/{animalai => }/setup.py | 0
35 files changed, 0 insertions(+), 0 deletions(-)
rename animalai/{animalai => }/LICENSE (100%)
rename animalai/{animalai => }/README.md (100%)
rename animalai/animalai/{animalai => }/__init__.py (100%)
rename animalai/animalai/{animalai => }/communicator_objects/__init__.py (100%)
rename animalai/animalai/{animalai => }/communicator_objects/agent_action_proto_pb2.py (100%)
rename animalai/animalai/{animalai => }/communicator_objects/agent_info_proto_pb2.py (100%)
rename animalai/animalai/{animalai => }/communicator_objects/arena_parameters_proto_pb2.py (100%)
rename animalai/animalai/{animalai => }/communicator_objects/brain_parameters_proto_pb2.py (100%)
rename animalai/animalai/{animalai => }/communicator_objects/command_proto_pb2.py (100%)
rename animalai/animalai/{animalai => }/communicator_objects/demonstration_meta_proto_pb2.py (100%)
rename animalai/animalai/{animalai => }/communicator_objects/engine_configuration_proto_pb2.py (100%)
rename animalai/animalai/{animalai => }/communicator_objects/header_pb2.py (100%)
rename animalai/animalai/{animalai => }/communicator_objects/resolution_proto_pb2.py (100%)
rename animalai/animalai/{animalai => }/communicator_objects/space_type_proto_pb2.py (100%)
rename animalai/animalai/{animalai => }/communicator_objects/unity_input_pb2.py (100%)
rename animalai/animalai/{animalai => }/communicator_objects/unity_message_pb2.py (100%)
rename animalai/animalai/{animalai => }/communicator_objects/unity_output_pb2.py (100%)
rename animalai/animalai/{animalai => }/communicator_objects/unity_rl_initialization_input_pb2.py (100%)
rename animalai/animalai/{animalai => }/communicator_objects/unity_rl_initialization_output_pb2.py (100%)
rename animalai/animalai/{animalai => }/communicator_objects/unity_rl_input_pb2.py (100%)
rename animalai/animalai/{animalai => }/communicator_objects/unity_rl_output_pb2.py (100%)
rename animalai/animalai/{animalai => }/communicator_objects/unity_rl_reset_input_pb2.py (100%)
rename animalai/animalai/{animalai => }/communicator_objects/unity_rl_reset_output_pb2.py (100%)
rename animalai/animalai/{animalai => }/communicator_objects/unity_to_external_pb2.py (100%)
rename animalai/animalai/{animalai => }/communicator_objects/unity_to_external_pb2_grpc.py (100%)
rename animalai/animalai/{animalai => }/envs/__init__.py (100%)
rename animalai/animalai/{animalai => }/envs/arena_config.py (100%)
rename animalai/animalai/{animalai => }/envs/brain.py (100%)
rename animalai/animalai/{animalai => }/envs/communicator.py (100%)
rename animalai/animalai/{animalai => }/envs/environment.py (100%)
rename animalai/animalai/{animalai => }/envs/exception.py (100%)
rename animalai/animalai/{animalai => }/envs/gym/environment.py (100%)
rename animalai/animalai/{animalai => }/envs/rpc_communicator.py (100%)
rename animalai/animalai/{animalai => }/envs/socket_communicator.py (100%)
rename animalai/{animalai => }/setup.py (100%)
diff --git a/animalai/animalai/LICENSE b/animalai/LICENSE
similarity index 100%
rename from animalai/animalai/LICENSE
rename to animalai/LICENSE
diff --git a/animalai/animalai/README.md b/animalai/README.md
similarity index 100%
rename from animalai/animalai/README.md
rename to animalai/README.md
diff --git a/animalai/animalai/animalai/__init__.py b/animalai/animalai/__init__.py
similarity index 100%
rename from animalai/animalai/animalai/__init__.py
rename to animalai/animalai/__init__.py
diff --git a/animalai/animalai/animalai/communicator_objects/__init__.py b/animalai/animalai/communicator_objects/__init__.py
similarity index 100%
rename from animalai/animalai/animalai/communicator_objects/__init__.py
rename to animalai/animalai/communicator_objects/__init__.py
diff --git a/animalai/animalai/animalai/communicator_objects/agent_action_proto_pb2.py b/animalai/animalai/communicator_objects/agent_action_proto_pb2.py
similarity index 100%
rename from animalai/animalai/animalai/communicator_objects/agent_action_proto_pb2.py
rename to animalai/animalai/communicator_objects/agent_action_proto_pb2.py
diff --git a/animalai/animalai/animalai/communicator_objects/agent_info_proto_pb2.py b/animalai/animalai/communicator_objects/agent_info_proto_pb2.py
similarity index 100%
rename from animalai/animalai/animalai/communicator_objects/agent_info_proto_pb2.py
rename to animalai/animalai/communicator_objects/agent_info_proto_pb2.py
diff --git a/animalai/animalai/animalai/communicator_objects/arena_parameters_proto_pb2.py b/animalai/animalai/communicator_objects/arena_parameters_proto_pb2.py
similarity index 100%
rename from animalai/animalai/animalai/communicator_objects/arena_parameters_proto_pb2.py
rename to animalai/animalai/communicator_objects/arena_parameters_proto_pb2.py
diff --git a/animalai/animalai/animalai/communicator_objects/brain_parameters_proto_pb2.py b/animalai/animalai/communicator_objects/brain_parameters_proto_pb2.py
similarity index 100%
rename from animalai/animalai/animalai/communicator_objects/brain_parameters_proto_pb2.py
rename to animalai/animalai/communicator_objects/brain_parameters_proto_pb2.py
diff --git a/animalai/animalai/animalai/communicator_objects/command_proto_pb2.py b/animalai/animalai/communicator_objects/command_proto_pb2.py
similarity index 100%
rename from animalai/animalai/animalai/communicator_objects/command_proto_pb2.py
rename to animalai/animalai/communicator_objects/command_proto_pb2.py
diff --git a/animalai/animalai/animalai/communicator_objects/demonstration_meta_proto_pb2.py b/animalai/animalai/communicator_objects/demonstration_meta_proto_pb2.py
similarity index 100%
rename from animalai/animalai/animalai/communicator_objects/demonstration_meta_proto_pb2.py
rename to animalai/animalai/communicator_objects/demonstration_meta_proto_pb2.py
diff --git a/animalai/animalai/animalai/communicator_objects/engine_configuration_proto_pb2.py b/animalai/animalai/communicator_objects/engine_configuration_proto_pb2.py
similarity index 100%
rename from animalai/animalai/animalai/communicator_objects/engine_configuration_proto_pb2.py
rename to animalai/animalai/communicator_objects/engine_configuration_proto_pb2.py
diff --git a/animalai/animalai/animalai/communicator_objects/header_pb2.py b/animalai/animalai/communicator_objects/header_pb2.py
similarity index 100%
rename from animalai/animalai/animalai/communicator_objects/header_pb2.py
rename to animalai/animalai/communicator_objects/header_pb2.py
diff --git a/animalai/animalai/animalai/communicator_objects/resolution_proto_pb2.py b/animalai/animalai/communicator_objects/resolution_proto_pb2.py
similarity index 100%
rename from animalai/animalai/animalai/communicator_objects/resolution_proto_pb2.py
rename to animalai/animalai/communicator_objects/resolution_proto_pb2.py
diff --git a/animalai/animalai/animalai/communicator_objects/space_type_proto_pb2.py b/animalai/animalai/communicator_objects/space_type_proto_pb2.py
similarity index 100%
rename from animalai/animalai/animalai/communicator_objects/space_type_proto_pb2.py
rename to animalai/animalai/communicator_objects/space_type_proto_pb2.py
diff --git a/animalai/animalai/animalai/communicator_objects/unity_input_pb2.py b/animalai/animalai/communicator_objects/unity_input_pb2.py
similarity index 100%
rename from animalai/animalai/animalai/communicator_objects/unity_input_pb2.py
rename to animalai/animalai/communicator_objects/unity_input_pb2.py
diff --git a/animalai/animalai/animalai/communicator_objects/unity_message_pb2.py b/animalai/animalai/communicator_objects/unity_message_pb2.py
similarity index 100%
rename from animalai/animalai/animalai/communicator_objects/unity_message_pb2.py
rename to animalai/animalai/communicator_objects/unity_message_pb2.py
diff --git a/animalai/animalai/animalai/communicator_objects/unity_output_pb2.py b/animalai/animalai/communicator_objects/unity_output_pb2.py
similarity index 100%
rename from animalai/animalai/animalai/communicator_objects/unity_output_pb2.py
rename to animalai/animalai/communicator_objects/unity_output_pb2.py
diff --git a/animalai/animalai/animalai/communicator_objects/unity_rl_initialization_input_pb2.py b/animalai/animalai/communicator_objects/unity_rl_initialization_input_pb2.py
similarity index 100%
rename from animalai/animalai/animalai/communicator_objects/unity_rl_initialization_input_pb2.py
rename to animalai/animalai/communicator_objects/unity_rl_initialization_input_pb2.py
diff --git a/animalai/animalai/animalai/communicator_objects/unity_rl_initialization_output_pb2.py b/animalai/animalai/communicator_objects/unity_rl_initialization_output_pb2.py
similarity index 100%
rename from animalai/animalai/animalai/communicator_objects/unity_rl_initialization_output_pb2.py
rename to animalai/animalai/communicator_objects/unity_rl_initialization_output_pb2.py
diff --git a/animalai/animalai/animalai/communicator_objects/unity_rl_input_pb2.py b/animalai/animalai/communicator_objects/unity_rl_input_pb2.py
similarity index 100%
rename from animalai/animalai/animalai/communicator_objects/unity_rl_input_pb2.py
rename to animalai/animalai/communicator_objects/unity_rl_input_pb2.py
diff --git a/animalai/animalai/animalai/communicator_objects/unity_rl_output_pb2.py b/animalai/animalai/communicator_objects/unity_rl_output_pb2.py
similarity index 100%
rename from animalai/animalai/animalai/communicator_objects/unity_rl_output_pb2.py
rename to animalai/animalai/communicator_objects/unity_rl_output_pb2.py
diff --git a/animalai/animalai/animalai/communicator_objects/unity_rl_reset_input_pb2.py b/animalai/animalai/communicator_objects/unity_rl_reset_input_pb2.py
similarity index 100%
rename from animalai/animalai/animalai/communicator_objects/unity_rl_reset_input_pb2.py
rename to animalai/animalai/communicator_objects/unity_rl_reset_input_pb2.py
diff --git a/animalai/animalai/animalai/communicator_objects/unity_rl_reset_output_pb2.py b/animalai/animalai/communicator_objects/unity_rl_reset_output_pb2.py
similarity index 100%
rename from animalai/animalai/animalai/communicator_objects/unity_rl_reset_output_pb2.py
rename to animalai/animalai/communicator_objects/unity_rl_reset_output_pb2.py
diff --git a/animalai/animalai/animalai/communicator_objects/unity_to_external_pb2.py b/animalai/animalai/communicator_objects/unity_to_external_pb2.py
similarity index 100%
rename from animalai/animalai/animalai/communicator_objects/unity_to_external_pb2.py
rename to animalai/animalai/communicator_objects/unity_to_external_pb2.py
diff --git a/animalai/animalai/animalai/communicator_objects/unity_to_external_pb2_grpc.py b/animalai/animalai/communicator_objects/unity_to_external_pb2_grpc.py
similarity index 100%
rename from animalai/animalai/animalai/communicator_objects/unity_to_external_pb2_grpc.py
rename to animalai/animalai/communicator_objects/unity_to_external_pb2_grpc.py
diff --git a/animalai/animalai/animalai/envs/__init__.py b/animalai/animalai/envs/__init__.py
similarity index 100%
rename from animalai/animalai/animalai/envs/__init__.py
rename to animalai/animalai/envs/__init__.py
diff --git a/animalai/animalai/animalai/envs/arena_config.py b/animalai/animalai/envs/arena_config.py
similarity index 100%
rename from animalai/animalai/animalai/envs/arena_config.py
rename to animalai/animalai/envs/arena_config.py
diff --git a/animalai/animalai/animalai/envs/brain.py b/animalai/animalai/envs/brain.py
similarity index 100%
rename from animalai/animalai/animalai/envs/brain.py
rename to animalai/animalai/envs/brain.py
diff --git a/animalai/animalai/animalai/envs/communicator.py b/animalai/animalai/envs/communicator.py
similarity index 100%
rename from animalai/animalai/animalai/envs/communicator.py
rename to animalai/animalai/envs/communicator.py
diff --git a/animalai/animalai/animalai/envs/environment.py b/animalai/animalai/envs/environment.py
similarity index 100%
rename from animalai/animalai/animalai/envs/environment.py
rename to animalai/animalai/envs/environment.py
diff --git a/animalai/animalai/animalai/envs/exception.py b/animalai/animalai/envs/exception.py
similarity index 100%
rename from animalai/animalai/animalai/envs/exception.py
rename to animalai/animalai/envs/exception.py
diff --git a/animalai/animalai/animalai/envs/gym/environment.py b/animalai/animalai/envs/gym/environment.py
similarity index 100%
rename from animalai/animalai/animalai/envs/gym/environment.py
rename to animalai/animalai/envs/gym/environment.py
diff --git a/animalai/animalai/animalai/envs/rpc_communicator.py b/animalai/animalai/envs/rpc_communicator.py
similarity index 100%
rename from animalai/animalai/animalai/envs/rpc_communicator.py
rename to animalai/animalai/envs/rpc_communicator.py
diff --git a/animalai/animalai/animalai/envs/socket_communicator.py b/animalai/animalai/envs/socket_communicator.py
similarity index 100%
rename from animalai/animalai/animalai/envs/socket_communicator.py
rename to animalai/animalai/envs/socket_communicator.py
diff --git a/animalai/animalai/setup.py b/animalai/setup.py
similarity index 100%
rename from animalai/animalai/setup.py
rename to animalai/setup.py
From c84b67722a3078989e8c8e5677602e9f9ba5f2a2 Mon Sep 17 00:00:00 2001
From: Benjamin Beyret
Date: Fri, 31 May 2019 19:41:58 +0100
Subject: [PATCH 09/23] fix typos visualize + package for PyPI
---
animalai/setup.py | 4 ++--
examples/animalai_train/setup.py | 13 ++++++++-----
examples/visualizeLightsOff.py | 6 +++---
3 files changed, 13 insertions(+), 10 deletions(-)
diff --git a/animalai/setup.py b/animalai/setup.py
index 0be8087e..0fec0db5 100644
--- a/animalai/setup.py
+++ b/animalai/setup.py
@@ -2,7 +2,7 @@
setup(
name='animalai',
- version='0.4.0',
+ version='0.4.2',
description='Animal AI competition interface',
url='https://github.com/beyretb/AnimalAI-Olympics',
author='Benjamin Beyret',
@@ -15,7 +15,7 @@
'Programming Language :: Python :: 3.6'
],
- packages=['animalai.envs', 'animalai.communicator_objects'], # Required
+ packages=['animalai.envs', 'animalai.envs.gym', 'animalai.communicator_objects'], # Required
zip_safe=False,
install_requires=[
diff --git a/examples/animalai_train/setup.py b/examples/animalai_train/setup.py
index 6c31c5b6..501e4cd0 100644
--- a/examples/animalai_train/setup.py
+++ b/examples/animalai_train/setup.py
@@ -2,7 +2,7 @@
setup(
name='animalai_train',
- version='0.4.0',
+ version='0.4.2',
description='Animal AI competition training library',
url='https://github.com/beyretb/AnimalAI-Olympics',
author='Benjamin Beyret',
@@ -15,19 +15,22 @@
'Programming Language :: Python :: 3.6'
],
- packages=['animalai_train.trainers', 'animalai_train.trainers.bc', 'animalai_train.trainers.ppo'], # Required
+ packages=['animalai_train.trainers', 'animalai_train.trainers.bc', 'animalai_train.trainers.ppo',
+ 'animalai_train.dopamine'], # Required
zip_safe=False,
install_requires=[
- 'animalai>=0.4.0',
- 'tensorflow>=1.7,<1.8',
+ 'animalai>=0.4.2',
+ 'dopamine-rl',
+ 'tensorflow==1.12',
'matplotlib',
'Pillow>=4.2.1,<=5.4.1',
'numpy>=1.13.3,<=1.14.5',
'protobuf>=3.6,<3.7',
'grpcio>=1.11.0,<1.12.0',
'pyyaml>=5.1',
+ 'atari-py',
'jsonpickle>=1.2',
'pypiwin32==223;platform_system=="Windows"'],
python_requires=">=3.5,<3.8",
-)
\ No newline at end of file
+)
diff --git a/examples/visualizeLightsOff.py b/examples/visualizeLightsOff.py
index 3c14995a..b1a55d49 100644
--- a/examples/visualizeLightsOff.py
+++ b/examples/visualizeLightsOff.py
@@ -1,11 +1,11 @@
-from animalai.envs import UnityEnvironment
+from animalai.envs.environment import UnityEnvironment
from animalai.envs.arena_config import ArenaConfig
import random
import numpy as np
from matplotlib import pyplot as plt
from matplotlib import animation
-env_path = 'env/AnimalAI'
+env_path = '../env/AnimalAI'
worker_id = random.randint(1, 100)
seed = 10
@@ -34,7 +34,7 @@
play=False
)
-arena_config_in = ArenaConfig('examples/configs/lightsOff.yaml')
+arena_config_in = ArenaConfig('configs/lightsOff.yaml')
env.reset(arenas_configurations=arena_config_in)
fig, axes = plt.subplots(2, 2)
imshows = []
From fbb6a704ddf49923b27798df28cae711f00425eb Mon Sep 17 00:00:00 2001
From: Benjamin
Date: Sat, 1 Jun 2019 15:56:50 +0100
Subject: [PATCH 10/23] update README with new setup + train.py ->
trainMLAgents.py
---
README.md | 91 ++++++++++++++++++-------
examples/{train.py => trainMLAgents.py} | 0
2 files changed, 68 insertions(+), 23 deletions(-)
rename examples/{train.py => trainMLAgents.py} (100%)
diff --git a/README.md b/README.md
index 7c4110d0..33eccc52 100644
--- a/README.md
+++ b/README.md
@@ -3,12 +3,24 @@
## Overview
Welcome to the repository for the Animal-AI Olympics competition where you will find all the code needed to compete in
-this new challenge. Note that for the moment this repo contains **only the training environment** (v0.1) that will be used for the competition and **does not contain any competition tests or information for entering**. If everything goes well the competition will be live on June 30th. Until then we will be continually updating with bug fixes and small changes to environment. However, the general structure will stay the same so it's not too early to start working with the environment. For more information on the competition itself and to stay updated with any developments, head to the [Competition Website](http://www.animalaiolympics.com/) and follow [@MacroPhilosophy](https://twitter.com/MacroPhilosophy) and [@BenBeyret](https://twitter.com/BenBeyret) on twitter.
-
-The environment contains an agent enclosed in a fixed sized arena. Objects can spawn in this arena, including positive and negative rewards (green, yellow and red spheres). All of the hidden tests that will appear in the competition are made using the objects in the training environment. We have provided some sample environment configurations that should be useful for training, but part of the challenge will be experimenting and designing new configurations.
-
-The goal of this first release is to **seek feedback from the community** as well as to provide the environment for research prior to the launch of the competition itself. The competition version of the environment will be similar to this one, however we are open to suggestion (for minor changes) and especially bug reports! Head over to the [issues page](https://github.com/beyretb/AnimalAI-Olympics/issues) and open a ticket using the `suggestion` or `bug` labels
-respectively.
+this new challenge. Note that for the moment this repo contains **only the training environment** (v0.5) that will be
+used for the competition and **does not contain any competition tests or information for entering**. If everything goes
+well the competition will be live on June 30th. Until then we will be continually updating with bug fixes and small
+changes to the environment. However, the general structure will stay the same so it's not too early to start working with
+the environment. For more information on the competition itself and to stay updated with any developments, head to the
+[Competition Website](http://www.animalaiolympics.com/) and follow [@MacroPhilosophy](https://twitter.com/MacroPhilosophy)
+and [@BenBeyret](https://twitter.com/BenBeyret) on twitter.
+
+The environment contains an agent enclosed in a fixed sized arena. Objects can spawn in this arena, including positive
+and negative rewards (green, yellow and red spheres). All of the hidden tests that will appear in the competition are
+made using the objects in the training environment. We have provided some sample environment configurations that should
+be useful for training, but part of the challenge will be experimenting and designing new configurations.
+
+The goal of this first release is to **seek feedback from the community** as well as to provide the environment for
+research prior to the launch of the competition itself. The competition version of the environment will be similar to
+this one, however we are open to suggestion (for minor changes) and especially bug reports! Head over to the
+[issues page](https://github.com/beyretb/AnimalAI-Olympics/issues) and open a ticket using the `suggestion` or `bug`
+labels respectively.
To get started install the requirements below, and then follow the [Quick Start Guide](documentation/quickstart.md).
A more in depth documentation can be found on the
@@ -16,7 +28,8 @@ A more in depth documentation c
## Development Blog
-You can read the development blog [here](https://mdcrosby.com/blog). It covers further details about the competition as well as part of the development process.
+You can read the development blog [here](https://mdcrosby.com/blog). It covers further details about the competition as
+well as part of the development process.
1. [Why Animal-AI?](https://mdcrosby.com/blog/animalai1.html)
@@ -24,21 +37,32 @@ You can read the development blog [here](https://mdcrosby.com/blog). It covers f
## Requirements
-The Animal-AI package works on most platforms.
+The Animal-AI package works on most platforms.
+
-First of all your will need `python3.6` installed. You will find a list of requirements in the `requirements*.txt` files.
-Using `pip` you can run:
+First of all your will need `python3.6` installed, we recommend using virtual environments. We provide two packages for
+this competition:
-on Linux and mac:
+- The main one is an API for interfacing with the Unity environment. It contains both a
+[gym environment](https://github.com/openai/gym) as well as an extension of Unity's
+[ml-agents environments](https://github.com/Unity-Technologies/ml-agents/tree/master/ml-agents-envs). You can install it
+ via pip:
```
-pip install -r requirementsOthers.txt
+pip install animalai
```
-
-on windows:
+Or you can install it from the source, head to `animalai/` folder and run `pip install -e .`.
+
+- We also provide a package that can be used as a starting point for training, and which is required to run most of the
+example scripts found in the `examples/` folder. It contains an extension of
+[ml-agents' training environment](https://github.com/Unity-Technologies/ml-agents/tree/master/ml-agents) that relies on
+[OpenAI's PPO](https://openai.com/blog/openai-baselines-ppo/), as well as
+[Google's dopamine](https://github.com/google/dopamine) which implements
+[Rainbow](https://www.aaai.org/ocs/index.php/AAAI/AAAI18/paper/download/17204/16680) (among others). You can also install
+this package using pip:
```
-pip install -r requirementsWindows.txt
+pip install animalai-train
```
-**Note:** `python3.6` is required to install `tensorflow>=1.7,<1.8` which is only used for the training script we provide as an example. Should you wish to use another version of python you can remove the first line from the requirement files. You will still be able to use the `visualizeArena.py` script, but not the `train.py` one.
+Or you can install it from source, head to `examples/animalai_train` and run `pip install -e .`.
Finally download the environment for your system:
@@ -71,20 +95,29 @@ mode for better performance.
## Competition Tests
-We will be releasing further details about the tests in the competition over the coming weeks. The tests will be split into multiple categories from the very simple (e.g. **food retrieval**, **preferences**, and **basic obstacles**) to the more complex (e.g. **working memory**, **spatial memory**, **object permanence**, and **object manipulation**). For now we have included multiple example config files that each relate to a different category. As we release further details we will also specify the rules for the type of tests that can appear in each category. Note that the example config files are just simple examples to be used as a guide. An agent that solves even all of these perfectly may still not be able to solve all the tests in the categories but it would be off to a very good start.
+We will be releasing further details about the tests in the competition over the coming weeks. The tests will be split
+into multiple categories from the very simple (e.g. **food retrieval**, **preferences**, and **basic obstacles**) to
+the more complex (e.g. **working memory**, **spatial memory**, **object permanence**, and **object manipulation**). For
+now we have included multiple example config files that each relate to a different category. As we release further
+details we will also specify the rules for the type of tests that can appear in each category. Note that the example
+config files are just simple examples to be used as a guide. An agent that solves even all of these perfectly may still
+not be able to solve all the tests in the categories but it would be off to a very good start.
## Citing
For now please cite the [Nature: Machine Intelligence piece](https://rdcu.be/bBCQt):
-Crosby, M., Beyret, B., Halina M. [The Animal-AI Olympics](https://www.nature.com/articles/s42256-019-0050-3) Nature Machine Intelligence 1 (5) p257 2019.
+Crosby, M., Beyret, B., Halina M. [The Animal-AI Olympics](https://www.nature.com/articles/s42256-019-0050-3) Nature
+Machine Intelligence 1 (5) p257 2019.
## Unity ML-Agents
The Animal-AI Olympics was built using [Unity's ML-Agents Toolkit.](https://github.com/Unity-Technologies/ml-agents)
The Python library located in [animalai](animalai) is almost identical to
-[ml-agents v0.7](https://github.com/Unity-Technologies/ml-agents/tree/master/ml-agents-envs). We only added the possibility to change the configuration of arenas between episodes. The documentation for ML-Agents can be found [here](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Python-API.md).
+[ml-agents v0.7](https://github.com/Unity-Technologies/ml-agents/tree/master/ml-agents-envs). We only added the
+possibility to change the configuration of arenas between episodes. The documentation for ML-Agents can be found
+[here](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Python-API.md).
Juliani, A., Berges, V., Vckay, E., Gao, Y., Henry, H., Mattar, M., Lange, D. (2018). [Unity: A General Platform for
Intelligent Agents.](https://arxiv.org/abs/1809.02627) *arXiv preprint arXiv:1809.02627*
@@ -97,8 +130,8 @@ Occasional slow frame rates in play mode. Temporary fix: reduce screen size.
## TODO
-- [ ] Offer a gym wrapper for training
- [ ] Add protobuf for arena spawning feedback
+- [x] Offer a gym wrapper for training
- [x] Improve the way the agent spawns
- [x] Add lights out configurations.
- [x] Improve environment framerates
@@ -106,17 +139,29 @@ Occasional slow frame rates in play mode. Temporary fix: reduce screen size.
## Version History
+- v0.5
+ - Separate environment API and training API in Python
+ - Release both as `animalai` and `animalai-train` PyPI packages (for `pip` installs)
+ - Agent speed in play-mode constant across various platforms
+ - Provide Gym environment
+ - Add `trainBaselines,py` to train using `dopamine` and the Gym wrapper
+ - Create the `agent.py` interface for agents submission
+ - Add the `HotZone` object (equivalent to the red zone but without death)
+
- v0.4 - Lights off moved to Unity, colors configurations, proportional goals, bugs fixes
- The light is now directly switched on/off within Unity, configuration files stay the same
- Blackouts now work with infinite episodes (`t=0`)
- - The `rand_colors` configurations have been removed and the user can now pass `RGB` values, see [here](documentation/configFile.md#objects)
- - Rewards for goals are now proportional to their size (except for the `DeathZone`), see [here](documentation/definitionsOfObjects.md#rewards)
+ - The `rand_colors` configurations have been removed and the user can now pass `RGB` values, see
+ [here](documentation/configFile.md#objects)
+ - Rewards for goals are now proportional to their size (except for the `DeathZone`), see
+ [here](documentation/definitionsOfObjects.md#rewards)
- The agent is now a ball rather than a cube
- Increased safety for spawning the agent to avoid infinite loops
- Bugs fixes
- v0.3 - Lights off, remove Beams and add cylinder
- - We added the possibility to switch the lights off at given intervals, see [here](documentation/configFile.md#blackouts)
+ - We added the possibility to switch the lights off at given intervals, see
+ [here](documentation/configFile.md#blackouts)
- visualizeLightsOff.py displays an example of lights off, from the agent's point of view
- Beams objects have been removed
- A `Cylinder` object has been added (similar behaviour to the `Woodlog`)
diff --git a/examples/train.py b/examples/trainMLAgents.py
similarity index 100%
rename from examples/train.py
rename to examples/trainMLAgents.py
From 3725cb7a521b137befd521a0b722ae6c46abf325 Mon Sep 17 00:00:00 2001
From: Benjamin
Date: Sat, 1 Jun 2019 16:52:03 +0100
Subject: [PATCH 11/23] update quickstart.md with new structure
---
README.md | 18 ++---
documentation/quickstart.md | 66 ++++++++++++-------
.../{trainBaselines.py => trainDopamine.py} | 0
3 files changed, 53 insertions(+), 31 deletions(-)
rename examples/{trainBaselines.py => trainDopamine.py} (100%)
diff --git a/README.md b/README.md
index 33eccc52..a35eb75e 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ well as part of the development process.
## Requirements
-The Animal-AI package works on most platforms.
+The Animal-AI package works on Linux, Mac and Windows, as well as most Cloud providers.
First of all your will need `python3.6` installed, we recommend using virtual environments. We provide two packages for
@@ -47,10 +47,10 @@ this competition:
[gym environment](https://github.com/openai/gym) as well as an extension of Unity's
[ml-agents environments](https://github.com/Unity-Technologies/ml-agents/tree/master/ml-agents-envs). You can install it
via pip:
-```
-pip install animalai
-```
-Or you can install it from the source, head to `animalai/` folder and run `pip install -e .`.
+ ```
+ pip install animalai
+ ```
+ Or you can install it from the source, head to `animalai/` folder and run `pip install -e .`
- We also provide a package that can be used as a starting point for training, and which is required to run most of the
example scripts found in the `examples/` folder. It contains an extension of
@@ -59,10 +59,10 @@ example scripts found in the `examples/` folder. It contains an extension of
[Google's dopamine](https://github.com/google/dopamine) which implements
[Rainbow](https://www.aaai.org/ocs/index.php/AAAI/AAAI18/paper/download/17204/16680) (among others). You can also install
this package using pip:
-```
-pip install animalai-train
-```
-Or you can install it from source, head to `examples/animalai_train` and run `pip install -e .`.
+ ```
+ pip install animalai-train
+ ```
+ Or you can install it from source, head to `examples/animalai_train` and run `pip install -e .`
Finally download the environment for your system:
diff --git a/documentation/quickstart.md b/documentation/quickstart.md
index f20012f9..3b51ad1b 100644
--- a/documentation/quickstart.md
+++ b/documentation/quickstart.md
@@ -1,38 +1,60 @@
# Quick Start Guide
-You can run the Animal AI environment in three different ways:
-- running the standalone `AnimalAI` executable
-- running a configuration file via `visualizeArena.py`
-- start training using `train.py`
+The format of this competition is rather different to what you might be used to. We do provide a single training set that
+you can train on out of the box, instead you are invited to include the design of a training environment as part of the
+whole training process. To make this new step as smooth as possible, we created tools you can use to easily setup your
+training environment and visualize what these configurations look like.
## Running the standalone arena
-Running the executable `AnimalAI` that you should have separately downloaded and added to the `envs` folder starts a
-playable environment with default configurations in a single arena. You can toggle the camera between First Person and
-Bird's eye view using the `C` key on your keyboard. The agent can then be controlled using `W,A,S,D` on your keyboard.
-The objects present in the configuration are randomly sampled from the list of objects that can be spawned, their
-location is random too. Hitting `R` or collecting rewards will reset the arena.
+The basic environment is made of a single agent in an enclosed arena, that resembles the environment we would use for
+experimenting with animals. In this environment you can add objects the agents can interact with, as well as goals or
+rewards the agent must collect or avoid. To see what this looks like, run the executable environment you downloaded, you
+will spawn in an arena with lots of objects randomly spawned.
+
+You can toggle the camera between First Person and Bird's eye view using the `C` key on your keyboard. The agent can
+then be controlled using `W,A,S,D` on your keyboard. Hitting `R` or collecting rewards will reset the arena.
**Note**: on some platforms, running the standalone arena in full screen makes the environment slow, keep the
environment in window mode for better performance.
## Running a specific configuration file
-The `visualizeArena.py` script found in the main folder allows you to visualize an arena configuration file. We provide
-sample configuration files for you to experiment with. To make your own environment configuration file we advise to read
-thoroughly the [configuration file documentation page](configFile.md). You will find a detailed list of all the objects on the [definitions of objects page](definitionsOfObjects.md). Running this script only allows for a single arena to be visualized at once, as there can only be a single agent you control.
-
-For example, to run an environment that contains the agent, a goal, and some randomly placed walls use:
+Once you are familiarized with the environment and its physics, you can start building and visualizing your own. Assuming
+you followed the [installation instruction](../README.md#requirements), go to the `examples/` folder and run
+`python visualizeArena.py configs/exampleConfig.yaml`. This loads the `configs/exampleConfig.yaml` configuration for the
+arena and lets you play as the agent.
-```
-python visualizeArena.py configs/obstacles.yaml
-```
+Have a look at the [configuration file](configs/exampleConfig.yaml) for a first look behind the scene. You can select
+objects, their size, location, rotation and color, randomizing any of these parameters as you like. We provide
+documentation section that we recommend you read thoroughly:
+ - The [configuration file documentation page](configFile.md) which explains how to write these configuration files.
+ - The [definitions of objects page](definitionsOfObjects.md) which contains a detailed list of all the objects and their
+ characteristics.
## Start training your agent
-Once you're happy with your arena configuration you can start training your agent. This can be done in a way very similar
-to a regular [gym](https://github.com/openai/gym) environment. We provide a template training file `train.py` you can run
-out of the box, it uses the [ML agents' PPO](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Training-PPO.md)
-for training. We added the ability for participants to **change the environment configuration between episodes**. You can
-find more details about that in the [training documentation](training.md).
+Once you're happy with your arena configurations you can start training your agent. The `animalai` presents several features
+that we think will improve training speed and performance:
+
+- Participants can **change the environment configuration between episodes** (allowing for techniques such as curriculum
+ learning)
+- You can choose the length of length of each episode as part of the configuration files, even having infinite episodes
+- You can have several arenas in a single environment instance, each with an agent you control independently from the other,
+ and each with its own configuration allowing for collecting observations faster
+
+We provide examples of training using the `animalai-train` package, you can of course start from scratch and submit agents
+that do not rely on this library. To understand how training an `animalai` environment we provide scripts in the
+`examples/` folder:
+
+- `trainDopamine.py` uses the `dopamine` implementation of Rainbow to train a single agent using the gym interface. This
+is a good starting point if you want to try another training algorithm that works as a plug-and-play with Gym. **Note that
+as such it only allows for training on environment with a single agent.** We do offer to train with several agents in a
+gym environment but this will require modifying your code to accept more than one observation at a time.
+- `trainMLAgents.py` uses the `ml-agents` implementation of PPO to train one or more agents at a time, using the
+`UnityEnvironment`. This is a great starting point if you don't mind reading some code as it directly allows to use the
+functionalities described above, out of the box.
+
+
+You can find more details about this in the [training documentation](training.md).
diff --git a/examples/trainBaselines.py b/examples/trainDopamine.py
similarity index 100%
rename from examples/trainBaselines.py
rename to examples/trainDopamine.py
From 17a422b85872b4d921653074a99a4e1f63f8c15e Mon Sep 17 00:00:00 2001
From: Benjamin
Date: Sat, 1 Jun 2019 17:42:18 +0100
Subject: [PATCH 12/23] update documentation
---
README.md | 2 +-
documentation/configFile.md | 10 ++++++----
documentation/quickstart.md | 4 ++--
3 files changed, 9 insertions(+), 7 deletions(-)
diff --git a/README.md b/README.md
index a35eb75e..0dcd6567 100644
--- a/README.md
+++ b/README.md
@@ -139,7 +139,7 @@ Occasional slow frame rates in play mode. Temporary fix: reduce screen size.
## Version History
-- v0.5
+- v0.5 Package `animalai`, gym compatible, dopamine example, bug fixes
- Separate environment API and training API in Python
- Release both as `animalai` and `animalai-train` PyPI packages (for `pip` installs)
- Agent speed in play-mode constant across various platforms
diff --git a/documentation/configFile.md b/documentation/configFile.md
index 33a3f979..1854d35c 100644
--- a/documentation/configFile.md
+++ b/documentation/configFile.md
@@ -14,7 +14,8 @@ To configure training arenas you can use a simple **YAML file** and/or the **Are
- on the fly changes of configuration of one or more arenas between episodes, allowing for easy curriculum learning for example
- share configurations between participants
-We provide a few custom configurations, but we expect designing good environments will be an important component of doing well in the competition.
+We provide a few custom configurations, but we expect designing good environments will be an important component of doing
+ well in the competition.
We describe below the structure of the configuration file for an instance of the training environment, as well as all the
parameters and the values they can take. For how to change the configuration during training see `animalai/envs/ArenaConfig.py`.
@@ -25,15 +26,16 @@ parameters and the values they can take. For how to change the configuration dur
-A single arena is as shown above, it comes with a single agent (blue cube, black dot showing the front), a floor and four walls. It is a square of size 40x40, the
-origin of the arena is `(0,0)`, therefore you can provide coordinates for objects in the range `[0,40]x[0,40]` as floats.
+A single arena is as shown above, it comes with a single agent (blue sphere, black dot showing the front), a floor and
+four walls. It is a square of size 40x40, the origin of the arena is `(0,0)`, therefore you can provide coordinates for
+objects in the range `[0,40]x[0,40]` as floats.
For visualization you can only configure a single arena, however during training you can configure as many as you want,
each will have its local set of coordinates as described above.
For a single arena you can provide the following parameters:
- `t` an `int`, the length of an episode which can change from one episode to the other. A value of `0` means that the episode will
-not terminate unlti a reward has been collected (setting `t=0` and having no reward will lead to an infinite episode)
+not terminate until a reward has been collected (setting `t=0` and having no reward will lead to an infinite episode)
- `blackouts` [see below](#blackouts)
diff --git a/documentation/quickstart.md b/documentation/quickstart.md
index 3b51ad1b..c4233019 100644
--- a/documentation/quickstart.md
+++ b/documentation/quickstart.md
@@ -40,8 +40,8 @@ that we think will improve training speed and performance:
- Participants can **change the environment configuration between episodes** (allowing for techniques such as curriculum
learning)
-- You can choose the length of length of each episode as part of the configuration files, even having infinite episodes
-- You can have several arenas in a single environment instance, each with an agent you control independently from the other,
+- You can **choose the length of length of each episode** as part of the configuration files, even having infinite episodes
+- You can **have several arenas in a single environment instance**, each with an agent you control independently from the other,
and each with its own configuration allowing for collecting observations faster
We provide examples of training using the `animalai-train` package, you can of course start from scratch and submit agents
From ce11fdc2ea9235ca1e12ba166abb67c8e278acb7 Mon Sep 17 00:00:00 2001
From: Benjamin Beyret
Date: Mon, 3 Jun 2019 11:25:34 +0100
Subject: [PATCH 13/23] training doc + add HotZone doc
---
.../PrefabsPictures/Rewards/HotZone.png | Bin 0 -> 71977 bytes
documentation/definitionsOfObjects.md | 13 ++++-
documentation/training.md | 55 +++++++++++++-----
3 files changed, 51 insertions(+), 17 deletions(-)
create mode 100644 documentation/PrefabsPictures/Rewards/HotZone.png
diff --git a/documentation/PrefabsPictures/Rewards/HotZone.png b/documentation/PrefabsPictures/Rewards/HotZone.png
new file mode 100644
index 0000000000000000000000000000000000000000..cdebd2dfbc6b5b744b6b9f6b3f06a5df6433f8dc
GIT binary patch
literal 71977
zcmV*GKxw~;P)ZgXgFbngSdJ^%n907*naRCt{1
zz3Z=R%XT018}*ov?zQ$l=h|^($HLA73;r7Z2_9dFz=}-(2|j=XiAV(h7|9nt?v1#s=W}x^9dEZg#v$VMG9!40lnQbw0Jx1@z7FmlLIB{pN%WJ^pQ&VQ+lD?zBCd&Zs;hUgK4M+gqVJ9zKlK=qkN3O1SwEf~%@I3k1?5JObD5{L+N7~dm-m>OTq1S$NTMu;0fP<_kx#~NobQxUAun08f^$fwjNwR$4n^fz_L4#bH=tUSl1N*
z!;u4#e3s;KevQ+OBjcMVF(yvKnNbed#qc=vh5h6$25k+
zzBt^QbYGsHYwJNa09_B(?`+$S`)xta8Qyz1=jwMI5lGjg?Ze}P83lz|PKg{v5D`)?
zxZhXod&cX_gx8lDxiE6hU`E}a=W)vGBgwcvTxMb8C`Hyb4iWQZgd>M#*|4s=&~#5&
zw_WHOJVFpY;o42)Su8(||1fkP6Tv$N?>#~Y7-DQRAU#`j+!Y6I#~t_R_Gl>uU*GQd
z^7f7)2F!D;`=XT6=%HsD4`=xCB3lg>o{&pME(P9uT*gVp&fR{X{+pHZ9bZ1ux}RAA
zBNy5GTnff%KwtnV7lfP<*z00dmVO*8Js!B8aFv)&2+RLrvifvEu&SQuH
zV|2Jo0p3j=14X@WX}j^r->74-x)GTf1wcwdpBSUwla2)Kmb70B6vnn?YX4Ih6wb(5m@UD>k$d``M&x$6!_&C_*#(D^3)W@FmoGPj=y83S
zfCS{Mv;kvN2EXqr>~{eHV-!YzeT@j-!#hIJ6H@>bW;hRc@35>105FXKOazCriA$afm3TV7afzIm^79I~@GuJ&tEg&LD${>vsr13?7(9l(OK<*E;~<
z)9VEU;Sm)Om{=lL_XjJ3sf?$S@&+0h1m}R9fv;~j{P5)sdlsfW1OZZ)$>a4B+R-Gc
z`_O>kfj0FgRbiS-u7HmS7~)VF!g0Y(tWxDts|xOR@)KKkQidjCI?=D7Z`m9AwQ)
z=?A8QsE8o`Xs?&