diff --git a/.gitignore b/.gitignore index 8ccb7e08..9b8cdbb6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,7 @@ env/* !env/README.md +examples/submission/test_submission/env/* +!examples/submission/test_submission/env/README.md models/ summaries/ /.idea @@ -11,4 +13,4 @@ venv/ build/ dist/ *.egg-info* -*.eggs* \ No newline at end of file +*.eggs* diff --git a/README.md b/README.md index 07f8ab25..b558bc59 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@

-**July 1st - November 1st:** Entries will be available on EvalAI from July 8th (please be patient while we make sure everything is running smoothly). +**July 1st - November 1st** The Animal-AI Olympics is an AI competition with tests inspired by animal cognition. Participants are given a small environment with just seven different classes of objects that can be placed inside. In each test, the agent needs to retrieve the food in the environment, but to do so there are obstacles to overcome, ramps to climb, boxes to push, and areas that must be avoided. The real challenge is that we don't provide the tests in advance. It's up to you to explore the possibilities with the environment and build interesting configurations that can help create an agent that understands how the environment's physics work and the affordances that it has. The final submission should be an agent capable of robust food retrieval behaviour similar to that of many kinds of animals. We know the animals can pass these tests, it's time to see if AI can too. @@ -20,22 +20,29 @@ The Animal-AI Olympics is an AI competition with tests inspired by animal cognit See [competition launch page](https://mdcrosby.com/blog/animalailaunch.html) and official rules for further details. -**Important** Please check the competition rules [here](http://animalaiolympics.com/rules.html). Entry to the competition (via EvalAI) constitutes agreement with all competition rules. +**Important** Please check the competition rules [here](http://animalaiolympics.com/rules.html). **To submit to the competition and be considered for prizes you must also fill in [this form](https://forms.gle/PKCgp2JAWvjf4c9i6)**. Entry to the competition (via EvalAI) constitutes agreement with all competition rules. ## Overview -Here you will find all the code needed to compete in this new challenge. This repo contains **the training environment** (v1.0) that will be used for the competition. Please check back during the competition for minor bug-fixes and updates, but as of v1.0 the major features and contents are set in place. **Information for entering** will be added by July 8th when the submission will be available via the EvalAI website for the compeition. +Here you will find all the code needed to compete in this new challenge. This repo contains **the training environment** (v1.0) that will be used for the competition. Information for entering can be found in the [submission documentation](documentation/submission.md). Please check back during the competition for minor bug-fixes and updates, but as of v1.0 the major features and contents are set in place. For more information on the competition itself and to stay updated with any developments, head to the -[Competition Website](http://www.animalaiolympics.com/) and follow [@MacroPhilosophy](https://twitter.com/MacroPhilosophy) -and [@BenBeyret](https://twitter.com/BenBeyret) on twitter. +[Competition Website](http://www.animalaiolympics.com/) and follow [@MacroPhilosophy](https://twitter.com/MacroPhilosophy) and [@BenBeyret](https://twitter.com/BenBeyret) on twitter. The environment contains an agent enclosed in a fixed sized arena. Objects can spawn in this arena, including positive -and negative rewards (green, yellow and red spheres) that the agent must obtain (or avoid). All of the hidden tests that will appear in the competition are made using the objects in the training environment. We have provided some sample environment configurations that should be useful for training, but part of the challenge will be experimenting and designing new configurations. +and negative rewards (green, yellow and red spheres) that the agent must obtain (or avoid). All of the hidden tests that will appear in the competition are made using the objects in the training environment. We have provided some sample environment configurations that should be useful for training (see examples/configs), but part of the challenge is to experiment and design new configurations. To get started install the requirements below, and then follow the [Quick Start Guide](documentation/quickstart.md). More in depth documentation can be found on the [Documentation Page](documentation/README.md). +## Evaluation + +The competition has 300 tests, split over ten categories. The categories range from the very simple (e.g. **food retrieval**, **preferences**, and **basic obstacles**) to the more complex (e.g. **spatial reasoning**, **internal models**, **object permanence**, and **causal reasoning**). We have included example config files for the first seven categories. Note that the example config files are just simple examples to be used as a guide. An agent that solves even all of these perfectly may still not be able to solve all the tests in the category, but it would be off to a good start. + +The submission website allows you to submit an agent that will be run on all 300 tests and it returns the overall score (number of tests passed) and score per category. We cannot offer infinite compute, so instances will be timed out after ~90 minutes and only tests performed up to that point counted (all others will be considered failed). See the [submission documentation](documentation/submission.md) for more information. + +For the mid-way and final evaluation we will (resources permitting) run more extensive testing with 3 variations per test (so 900 tests total). The variations will include minor perturbations to the configurations. The agent will have to pass all 3 variations to pass each individual test, giving a total score out of 300. This means that **your final test score might be lower than the score achieved during the competition** and that **the competition leaderboard on EvalAI may not exactly match the final results**. + ## Development Blog You can read the launch post - with information about prizes and the categories in the competition here: @@ -53,12 +60,14 @@ well as part of the development process. ## Requirements -The Animal-AI package works on Linux, Mac and Windows, as well as most Cloud providers. +The Animal-AI package works on Linux, Mac and Windows, as well as most Cloud providers. Note that for submission to the competition we only support linux-based Docker files. -First of all your will need `python3.6` installed (we currently only support **python3.6**). We recommend using a virtual environment specifically for the competition. Clone this repository to run the examples we provide you with. We offer two packages for this competition: +We recommend using a virtual environment specifically for the competition. You will need `python3.6` installed (we currently only support **python3.6**). Clone this repository to run the examples we provide. + +We offer two packages for this competition: -- The main one is an API for interfacing with the Unity environment. It contains both a +- The main package is an API for interfacing with the Unity environment. It contains both a [gym environment](https://github.com/openai/gym) as well as an extension of Unity's [ml-agents environments](https://github.com/Unity-Technologies/ml-agents/tree/master/ml-agents-envs). You can install it via pip: @@ -105,19 +114,9 @@ mode. Here you can control the agent with the following: | C | switch camera | | R | reset environment | -## Competition Tests - -We will be releasing further details about the tests in the competition over the coming weeks. The tests will be split -into multiple categories from the very simple (e.g. **food retrieval**, **preferences**, and **basic obstacles**) to -the more complex (e.g. **working memory**, **spatial memory**, **object permanence**, and **object manipulation**). For -now we have included multiple example config files that each relate to a different category. As we release further -details we will also specify the rules for the type of tests that can appear in each category. Note that the example -config files are just simple examples to be used as a guide. An agent that solves even all of these perfectly may still -not be able to solve all the tests in the categories but it would be off to a very good start. - ## Citing -For now please cite the [Nature: Machine Intelligence piece](https://rdcu.be/bBCQt) for any work involving the competition environment. Official Animal-AI Papers to follow: +**Official Animal-AI Papers Coming Soon**. In the meantime please cite the [Nature: Machine Intelligence piece](https://rdcu.be/bBCQt) for any work involving the competition environment. Crosby, M., Beyret, B., Halina M. [The Animal-AI Olympics](https://www.nature.com/articles/s42256-019-0050-3) Nature Machine Intelligence 1 (5) p257 2019. @@ -134,6 +133,12 @@ possibility to change the configuration of arenas between episodes. The document Juliani, A., Berges, V., Vckay, E., Gao, Y., Henry, H., Mattar, M., Lange, D. (2018). [Unity: A General Platform for Intelligent Agents.](https://arxiv.org/abs/1809.02627) *arXiv preprint arXiv:1809.02627* +## EvalAI + +The competition is kindly hosted on [EvalAI](https://github.com/Cloud-CV/EvalAI), an open source web application for AI competitions. Special thanks to Rishabh Jain for his help in settting this up. + +Deshraj Yadav, Rishabh Jain, Harsh Agrawal, Prithvijit Chattopadhyay, Taranjeet Singh, Akash Jain, Shiv Baran Singh, Stefan Lee and Dhruv Batra (2019) [EvalAI: Towards Better Evaluation Systems for AI Agents](https://arxiv.org/abs/1902.03570) + ## Known Issues In play mode pressing `R` or `C` does nothing sometimes. This is due to the fact that we have synchronized these @@ -154,9 +159,16 @@ v0.6.1) ## Version History +- v1.0.3 + - Adds inference mode to Gym environment + - Adds seed to Gym Environment + - Submission example folder containing a trained agent + - Provide submission details for the competition + - Documentation for training on AWS + - v1.0.2 - Adds custom resolution for docker training as well - - fix version checker + - Fix version checker - v1.0.0 - Adds custom resolution to both Unity and Gym environments diff --git a/agent.py b/agent.py index f3bb53cf..9becd370 100644 --- a/agent.py +++ b/agent.py @@ -1,11 +1,9 @@ -from animalai.envs.brain import BrainInfo - - class Agent(object): def __init__(self): """ Load your agent here and initialize anything needed + WARNING: any path to files you wish to access on the docker should be ABSOLUTE PATHS """ pass @@ -16,16 +14,21 @@ def reset(self, t=250): :param t the number of timesteps in the episode """ - def step(self, brain_info: BrainInfo) -> list[float]: + def step(self, obs, reward, done, info): """ - A single step the agent should take based on the current - :param brain_info: a single BrainInfo containing the observations and reward for a single step for one agent - :return: a list of actions to execute (of size 2) + A single step the agent should take based on the current state of the environment + We will run the Gym environment (AnimalAIEnv) and pass the arguments returned by env.step() to + the agent. + + Note that should if you prefer using the BrainInfo object that is usually returned by the Unity + environment, it can be accessed from info['brain_info']. + + :param obs: agent's observation of the current environment + :param reward: amount of reward returned after previous action + :param done: whether the episode has ended. + :param info: contains auxiliary diagnostic information, including BrainInfo. + :return: the action to take, a list or size 2 """ + action = [0, 0] - self.action = [0, 0] - - return self.action - - def destroy(self): - pass + return action diff --git a/animalai/animalai/envs/gym/environment.py b/animalai/animalai/envs/gym/environment.py index 3c9e7879..bdde3224 100644 --- a/animalai/animalai/envs/gym/environment.py +++ b/animalai/animalai/envs/gym/environment.py @@ -30,9 +30,11 @@ def __init__(self, worker_id=0, docker_training=False, n_arenas=1, + seed=0, arenas_configurations=None, greyscale=False, retro=True, + inference=False, resolution=None): """ Environment initialization @@ -48,12 +50,15 @@ def __init__(self, """ self._env = UnityEnvironment(file_name=environment_filename, worker_id=worker_id, + seed=seed, docker_training=docker_training, n_arenas=n_arenas, arenas_configurations=arenas_configurations, + inference=inference, resolution=resolution) # self.name = self._env.academy_name self.vector_obs = None + self.inference = inference self.resolution = resolution self._current_state = None self._n_agents = None diff --git a/animalai/setup.py b/animalai/setup.py index 6ab757a8..ec32eb03 100644 --- a/animalai/setup.py +++ b/animalai/setup.py @@ -2,7 +2,7 @@ setup( name='animalai', - version='1.0.2', + version='1.0.3', description='Animal AI competition interface', url='https://github.com/beyretb/AnimalAI-Olympics', author='Benjamin Beyret', diff --git a/documentation/AWS/EC2.png b/documentation/AWS/EC2.png new file mode 100644 index 00000000..c022caa3 Binary files /dev/null and b/documentation/AWS/EC2.png differ diff --git a/documentation/AWS/launch.png b/documentation/AWS/launch.png new file mode 100644 index 00000000..8ad73b45 Binary files /dev/null and b/documentation/AWS/launch.png differ diff --git a/documentation/AWS/limits.png b/documentation/AWS/limits.png new file mode 100644 index 00000000..83b19a9a Binary files /dev/null and b/documentation/AWS/limits.png differ diff --git a/documentation/AWS/marketplace.png b/documentation/AWS/marketplace.png new file mode 100644 index 00000000..de74aa22 Binary files /dev/null and b/documentation/AWS/marketplace.png differ diff --git a/documentation/AWS/p2.png b/documentation/AWS/p2.png new file mode 100644 index 00000000..f481ce19 Binary files /dev/null and b/documentation/AWS/p2.png differ diff --git a/documentation/README.md b/documentation/README.md index c2f8bb40..aa66d03c 100644 --- a/documentation/README.md +++ b/documentation/README.md @@ -6,6 +6,8 @@ You can find here the following documentation: - [How to design configuration files](configFile.md) - [How training works](training.md) - [All the objects you can include in the arenas as well as their specifications](definitionsOfObjects.md) +- [How to submit your agent](submission.md) +- [A guide to train on AWS](cloudTraining.md) More will come before the competition launches. diff --git a/documentation/cloudTraining.md b/documentation/cloudTraining.md new file mode 100644 index 00000000..794cba99 --- /dev/null +++ b/documentation/cloudTraining.md @@ -0,0 +1,98 @@ +# Training on AWS + +Training an agent requires rendering the environment on a screen, which means that you may have to follow a few steps (detailed below) before you can use standard cloud compute instances. We detail two possibilities. Both methods were tested on [AWS p2.xlarge](https://aws.amazon.com/ec2/instance-types/p2/) using a standard [Deep Learning Base AMI](https://aws.amazon.com/marketplace/pp/B077GCZ4GR). + +We leave participants the task of adapting the information found here to different cloud providers and/or instance types or for their specific use-case. We do not have the resources to fully support this capability. We are providing the following purely in the hopes it serves as a useful guide for some. + +**WARNING: using cloud services will incur costs, carefully read your provider terms of service** + +## Pre-requisite: setup an AWS p2.xlarge instance + +Start by creating an account on [AWS](https://aws.amazon.com/), and then open the [console](https://console.aws.amazon.com/console/home?). +Compute engines on AWS are called `EC2` and offer a vast range of configurations in terms of number and type of CPUs, GPUs, +memory and storage. You can find more details about the different types and prices [here](https://aws.amazon.com/ec2/pricing/on-demand/). +In our case, we will use a `p2.xlarge instance`, in the console select `EC2`: + +![EC2](AWS/EC2.png) + +by default you will have a limit restriction on the number of instances you can create. Check your limits by selecting `Limits` on the top +left menu: + +![EC2](AWS/limits.png) + +Request an increase for `p2.xlarge` if needed. Once you have at least a limit of 1, go back to the EC2 console and select launch instance: + +![EC2](AWS/launch.png) + +You can then select various images, type in `Deep learning` to see what is on offer, for now we recommend to select `AWS Marketplace` on the left panel: + +![EC2](AWS/marketplace.png) + +and select either `Deep Learning Base AMI (Ubuntu)` if you want a basic Ubuntu install with CUDA capabilities. On the next page select `p2.xlarge` (this will not be selected by default): + +![EC2](AWS/p2.png) + +Click `Next` twice (first Next: Configure Instance Deatils, then Next: Add Storage) and add at least 15 Gb of storage to the current size (so at least 65 total with a default of 50). Click `review and launch`, and then `launch`. You will then be asked to create or select existing key pairs which will be used to ssh to your instance. + +Once your instances is started, it will appear on the EC2 console. To ssh into your instance, right click the line, select connect and follow the instructions. +We can now configure our instance for training. **Don't forget to shutdown your instance once you're done using it as you get charged as long as it runs**. + +## Simulating a screen + +As cloud engines do not have screens attached, rendering the environment window is impossible. We use a virtual screen instead, in the form of [xvfb](https://en.wikipedia.org/wiki/Xvfb). +You can follow either one of the following methods to use this. In both, **remember** to select `docker_training=True` in your environment configuration. + + +## Method 1: train using docker + +Basic Deep Learning Ubuntu images provide [NVIDIA docker](https://devblogs.nvidia.com/nvidia-docker-gpu-server-application-deployment-made-easy/) +pre-installed, which allows the use of CUDA within a container. SSH into your AWS instance, clone this repo and follow the instructions below. + +In the [submission guide](submission.md) we describe how to build a docker container for submission. The same process +can be used to create a docker for training an agent. The [dockerfile provided](../examples/submission/Dockerfile) can +be adapted to include all the libraries and code needed for training. + +For example, should you wish to train a standard Dopamine agent provided in `animalai-train` out of the box, using GPU compute, add the following +lines to your docker in the `YOUR COMMANDS GO HERE` part, below the line installing `animalai-train`: + +``` +RUN git clone https://github.com/beyretb/AnimalAI-Olympics.git +RUN pip uninstall --yes tensorflow +RUN pip install tensorflow-gpu==1.12.2 +RUN apt-get install unzip wget +RUN wget https://www.doc.ic.ac.uk/~bb1010/animalAI/env_linux_v1.0.0.zip +RUN mv env_linux_v1.0.0.zip AnimalAI-Olympics/env/ +RUN unzip AnimalAI-Olympics/env/env_linux_v1.0.0.zip -d AnimalAI-Olympics/env/ +WORKDIR /aaio/AnimalAI-Olympics/examples +RUN sed -i 's/docker_training=False/docker_training=True/g' trainDopamine.py +``` + +Build your docker, from the `examples/submission` folder run: + +``` +docker build --tag=test-training . +``` + +Once built, you can start training straight away by running: + +``` +docker run --runtime=nvidia test-training python trainDopamine.py +``` + +Notice the use of `--runtime=nvidia` which activates CUDA capabilities. You should see the following tensorflow line in the output +which confirms you are training using the GPU: + +``` +I tensorflow/core/common_runtime/gpu/gpu_device.cc:1432] Found device 0 with properties: +name: Tesla K80 major: 3 minor: 7 memoryClockRate(GHz): 0.823 +``` + +You're now ready to start training on AWS using docker! + +## Method 2: install xvfb on the instance + +An alternative to docker is to install `xvfb` directly on your AWS instance and use it in the same way you would when training on your home computer. For this you will want to install an Ubuntu image with some deep learning libraries installed. From the AWS Marketplace page you can for example install `Deep Learning AMI (Ubuntu)` which contains tensorflow and pytorch. + +To do so, you can follow the original ML Agents description for `p2.xlarge` found [here](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Training-on-Amazon-Web-Service.md#setting-up-x-server-optional). From our +experience, these steps do not work as well on other types of instances. + diff --git a/documentation/submission.md b/documentation/submission.md new file mode 100644 index 00000000..ab35295a --- /dev/null +++ b/documentation/submission.md @@ -0,0 +1,93 @@ +# Submission + +In order to participate in the competition you will need to upload a [docker container](https://docs.docker.com/get-started/) +containing your trained agent that interfaces with the `animalai` library. We detail the steps for participating +below. + +## Python agent and associated data + +Submissions need to implement the [agent script provided](https://github.com/beyretb/AnimalAI-Olympics/blob/master/agent.py). +This script must implement the methods present in the base script and keep the same class name. The methods are: +- `__init__`: this will only be called once when the agent is loaded first. It can contain loading of the model and other +related parameters. +- `reset(t)`: will be called each time the arena resets. At test time the length of episodes will vary across the 300 +experiments we will run, therefore we provide the agent with the length of the episode to come. Test lengths are either 250, 500, or 1000. +- `step(obs, reward, done, info)`: the method that is called each time the agent has to take a step. The arguments +are the ones returned by the Gym environment `AnimalAIEnv` from `animalai.envs.environment`. If you wish to directly +work on the ML Agents `BrainInfo` you can access them via `info['brain_info']` + +Make sure any data loaded in the docker is referred to using **absolute paths** in the container or the form `/aaio/data/...` (see below). An example that you can modify is provided [here](https://github.com/beyretb/AnimalAI-Olympics/blob/master/examples/submission/agent.py) + +## Create an EvalAI account and add submission details + +The competition is kindly hosted by EvalAI. Head over to [their website](https://evalai.cloudcv.org/), create an account, and enroll your team in the AnimalAI challenge. To be able to submit and be eligible for prizes you will also need to register your personal details using [this form](https://docs.google.com/forms/d/e/1FAIpQLScqcIDaCwp1Wezj-klNfahcph1V8UQ-AZqmQui7vmcqVenPKw/viewform?usp=sf_link). + +**Any question related solely to the submission process on EvalAI should be posted to the** [EvalAI forum](https://evalai-forum.cloudcv.org/c/animal-ai-olympics-2019) + +## Docker + +Docker offers a containerized platform for running applications in a closed environment. You can install all the libraries your agent will require and we will this use to run the tests as they would run on your local machine. The hardware we're using to run the tests is an AWS [p2.xlarge instance](https://aws.amazon.com/ec2/instance-types/p2/). + +Take the time to read the [Docker documentation](https://docs.docker.com/get-started/) and follow the install process. + +### Adding CUDA capabilities to Docker (optional) + +As part of the evaluation we offer GPU compute on an AWS +[p2.xlarge instance](https://aws.amazon.com/ec2/instance-types/p2/). These compute instances will run an Amazon +[Deep Learning Base AMI](https://aws.amazon.com/marketplace/pp/B077GCZ4GR) with several CUDA libraries installed. + +The native docker engine does not provide a pass-through to these libraries, rendering any use of GPU capable libraries (such as `tensorflow-gpu`) impossible. To overcome this issue, NVIDIA provides a specific version of docker. We can recommend [this tutorial](https://marmelab.com/blog/2018/03/21/using-nvidia-gpu-within-docker-container.html#installing-nvidia-docker) for installing this version. Note we cannot provide help with installing these. + +## Creating the docker for submission + +Once you have docker up and running, you can start building your submission. Head over to `examples/sumission` and have a look at the `Dockerfile`. This script installs all the requirements for the environment, we do not recommend editing anything outside of the commented block saying `YOUR COMMANDS GO HERE`. + +If your submission only requires the `animalai-train` library to run, you can use `Dockerfile` without any modification. While in `examples/submission` run: + +``` +docker build --tag=submission . +``` + +You can give your docker the name you want, it does not have to be `submission`. Note that the Dockerfile creates two +folders `/aaio` and `/aaio/data` at the root of the container, and copies the `agent.py` file and `data` folder from your local machine into the container. Your submission must keep this architecture. References to these folders in +your code **should use absolute paths** (see the example agent provided in `examples/submission`). + +## Test your docker + +As uploading and evaluating containers takes a while, and you are only allowed a maximum of one submission per day, it is recommended to ensure your docker runs properly before submitting. If there is a failure during testing **you will only have access to abridged outputs** which may not be enough to debug on your own. If you cannot find a solution using the provided submission testing volume you will need to raise a question on the forum and we will investigate for you (which might take time). + +Bottom line: be sure to test your submission prior to uploading! + +First, copy the AnimalAI linux environment (and AnimalAI_Data folder) to `examples/submission/test_submission/env`. + +Next, you need to run the container by mounting the `test_submission` folder and its content as a volume, and execute the `testDocker.py` script. To do so, from the `submission` folder, run: + +``` +docker run -v "$PWD"/test_submission:/aaio/test submission python /aaio/test/testDocker.py +``` + +If your container and agent are set properly, you should not get any error, and the script should output the rewards for 5 simple tests and conclude with `SUCCESS` + +## Submit your docker + +You can now submit your container to EvalAI for evaluation as explained on the [EvalAI submission page](https://evalai.cloudcv.org/web/challenges/challenge-page/396/submission). + + +## Docker evaluation and results + +On the EvalAI page you will see that the number of valid submissions is limited to one a day. A submission is valid if it fulfils the following requirements: + +- it does not crash at any point before the first two experiments are complete (this includes loading the agent, resetting it, and completing the two experiments) +- loading the agent takes less than 5 minutes +- running the first two experiments takes less than 10 minutes + +If your submission meets these requirements it will be flagged as valid and you will not be able to submit again until the following day. + +Completing the experiments cannot take longer than 80 minutes in total. If your submission goes over the time limit it will stop and you will score for any experiments that were completed. + +Example scenarios: + +- FAIL: agent loads in 2 minutes, crashes during test number 2 +- FAIL: agent loads in 1 minute, takes more than 10 minutes to complete tests 1 and 2 +- SUCCESS: your agent loads in 3 minutes, takes 30 seconds for test 1, takes 1 minute for test two, it therefore has 78.5 minutes to complete the remaining 298 experiments +- SUCCESS: agent loads in 4 minutes, completes test 1 and 2 in 1 minute, uses all the 79 minutes remaining to complete only 100 tests, you will get results based on the 102 experiments ran diff --git a/documentation/training.md b/documentation/training.md index 3065982f..2c3b35b6 100644 --- a/documentation/training.md +++ b/documentation/training.md @@ -15,8 +15,8 @@ If you are not familiar with these algorithms, have a look at Before looking at the environment itself, we define here the actions the agent can take and the observations it collects: - **Actions**: the agent can move forward/backward and rotate left/right, just like in play mode. The -actions are discrete and of dimension `2`, each component can take 3 values (`(nothing, forward, backward)` and `(nothing, -left,right)`). +actions are discrete and of dimension `2`, each component can take values `0`,`1` or `2` (`(0: nothing, 1: forward, 2: +backward)` and `(0: nothing, 1: right, 2: left)`). - **Observations** are made of two components: visual observations which are pixel based and of dimension `84x84x3`, as well as the speed of the agent which is continuous of dimension `3` (speed along axes `(x,y,z)` in this order). Of course, you may want to process and/or scale down the input before use with your approach. - **Rewards**: in case of an episode of finite length `T`, each step carries a small negative reward `-1/T`. In case of diff --git a/examples/animalai_train/animalai_train/dopamine/animalai_lib.py b/examples/animalai_train/animalai_train/dopamine/animalai_lib.py index 532ea075..fe501c65 100644 --- a/examples/animalai_train/animalai_train/dopamine/animalai_lib.py +++ b/examples/animalai_train/animalai_train/dopamine/animalai_lib.py @@ -13,11 +13,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Obstacle Tower-specific utilities including Atari-specific network architectures. - -This includes a class implementing minimal preprocessing, which -is in charge of: - . Converting observations to greyscale. +""" +Code adapted from the Obstacle Tower competition """ from __future__ import absolute_import @@ -26,13 +23,10 @@ import math -from animalai.envs.gym.environment import AnimalAIEnv - import numpy as np import tensorflow as tf import gin.tf -import cv2 slim = tf.contrib.slim @@ -41,18 +35,6 @@ NATURE_DQN_STACK_SIZE = 4 # Number of frames in the state stack. -@gin.configurable -def create_animalai_environment(environment_path=None): - """Wraps the Animal AI environment with some basic preprocessing. - - Returns: - An Animal AI environment with some standard preprocessing. - """ - assert environment_path is not None - env = AnimalAIEnv(environment_path, 0, n_arenas=1, retro=True) - env = OTCPreprocessing(env) - return env - @gin.configurable def nature_dqn_network(num_actions, network_type, state): """The convolutional network used to compute the agent's Q-values. @@ -75,6 +57,7 @@ def nature_dqn_network(num_actions, network_type, state): q_values = slim.fully_connected(net, num_actions, activation_fn=None) return network_type(q_values) + @gin.configurable def rainbow_network(num_actions, num_atoms, support, network_type, state): """The convolutional network used to compute agent's Q-value distributions. @@ -114,6 +97,7 @@ def rainbow_network(num_actions, num_atoms, support, network_type, state): q_values = tf.reduce_sum(support * probabilities, axis=2) return network_type(q_values, logits, probabilities) + @gin.configurable def implicit_quantile_network(num_actions, quantile_embedding_dim, network_type, state, num_quantiles): @@ -171,100 +155,3 @@ def implicit_quantile_network(num_actions, quantile_embedding_dim, weights_initializer=weights_initializer) return network_type(quantile_values=quantile_values, quantiles=quantiles) - -# -# @gin.configurable -# class AAIPreprocessing(object): -# """A class implementing image preprocessing for OTC agents. -# -# Specifically, this converts observations to greyscale. It doesn't -# do anything else to the environment. -# """ -# -# def __init__(self, environment): -# """Constructor for an Obstacle Tower preprocessor. -# -# Args: -# environment: Gym environment whose observations are preprocessed. -# -# """ -# self.environment = environment -# -# self.game_over = False -# self.lives = 0 # Will need to be set by reset(). -# -# @property -# def observation_space(self): -# return self.environment.observation_space -# -# @property -# def action_space(self): -# return self.environment.action_space -# -# @property -# def reward_range(self): -# return self.environment.reward_range -# -# @property -# def metadata(self): -# return self.environment.metadata -# -# def reset(self): -# """Resets the environment. Converts the observation to greyscale, -# if it is not. -# -# Returns: -# observation: numpy array, the initial observation emitted by the -# environment. -# """ -# observation = self.environment.reset() -# if (len(observation.shape) > 2): -# observation = cv2.cvtColor(observation, cv2.COLOR_RGB2GRAY) -# -# return observation -# -# def render(self, mode): -# """Renders the current screen, before preprocessing. -# -# This calls the Gym API's render() method. -# -# Args: -# mode: Mode argument for the environment's render() method. -# Valid values (str) are: -# 'rgb_array': returns the raw ALE image. -# 'human': renders to display via the Gym renderer. -# -# Returns: -# if mode='rgb_array': numpy array, the most recent screen. -# if mode='human': bool, whether the rendering was successful. -# """ -# return self.environment.render(mode) -# -# def step(self, action): -# """Applies the given action in the environment. Converts the observation to -# greyscale, if it is not. -# -# Remarks: -# -# * If a terminal state (from life loss or episode end) is reached, this may -# execute fewer than self.frame_skip steps in the environment. -# * Furthermore, in this case the returned observation may not contain valid -# image data and should be ignored. -# -# Args: -# action: The action to be executed. -# -# Returns: -# observation: numpy array, the observation following the action. -# reward: float, the reward following the action. -# is_terminal: bool, whether the environment has reached a terminal state. -# This is true when a life is lost and terminal_on_life_loss, or when the -# episode is over. -# info: Gym API's info data structure. -# """ -# -# observation, reward, game_over, info = self.environment.step(action) -# self.game_over = game_over -# if (len(observation.shape) > 2): -# observation = cv2.cvtColor(observation, cv2.COLOR_RGB2GRAY) -# return observation, reward, game_over, info diff --git a/examples/animalai_train/setup.py b/examples/animalai_train/setup.py index 33406a3f..f173eb88 100644 --- a/examples/animalai_train/setup.py +++ b/examples/animalai_train/setup.py @@ -2,7 +2,7 @@ setup( name='animalai_train', - version='1.0.2', + version='1.0.3', description='Animal AI competition training library', url='https://github.com/beyretb/AnimalAI-Olympics', author='Benjamin Beyret', @@ -20,7 +20,7 @@ zip_safe=False, install_requires=[ - 'animalai>=1.0.2', + 'animalai>=1.0.3', 'dopamine-rl', 'tensorflow==1.12.2', 'matplotlib', diff --git a/examples/configs/trainer_config.yaml b/examples/configs/trainer_config.yaml index e25c3320..70db7406 100644 --- a/examples/configs/trainer_config.yaml +++ b/examples/configs/trainer_config.yaml @@ -1,27 +1,14 @@ -default: +Learner: trainer: ppo - batch_size: 1024 - beta: 5.0e-3 - buffer_size: 10240 epsilon: 0.2 gamma: 0.99 - hidden_units: 128 lambd: 0.95 learning_rate: 3.0e-4 - max_steps: 5.0e4 memory_size: 256 normalize: false - num_epoch: 3 - num_layers: 2 - time_horizon: 64 sequence_length: 64 summary_freq: 1000 use_recurrent: false - use_curiosity: false - curiosity_strength: 0.01 - curiosity_enc_size: 128 - -Learner: use_curiosity: true curiosity_strength: 0.01 curiosity_enc_size: 256 diff --git a/examples/submission/Dockerfile b/examples/submission/Dockerfile new file mode 100644 index 00000000..06e0f4b9 --- /dev/null +++ b/examples/submission/Dockerfile @@ -0,0 +1,46 @@ +FROM nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04 + +RUN apt-get clean && apt-get update && apt-get install -y locales +RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && \ + locale-gen +ENV LC_ALL en_US.UTF-8 +ENV LANG en_US.UTF-8 +ENV LANGUAGE en_US.UTF-8 +ENV SHELL /bin/bash + +RUN apt-get update && \ + apt-get install -y curl bzip2 xvfb ffmpeg git libxrender1 + +WORKDIR /aaio + +RUN curl -o ~/miniconda.sh -O https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ + chmod +x ~/miniconda.sh && \ + ~/miniconda.sh -b -p /opt/conda && \ + rm ~/miniconda.sh && \ + /opt/conda/bin/conda clean -ya && \ + /opt/conda/bin/conda create -n python36 python=3.6 numpy + +ENV PATH /opt/conda/envs/python36/bin:/opt/conda/envs/bin:$PATH + +RUN pip install animalai + +COPY agent.py /aaio/agent.py +COPY data /aaio/data + +ENV HTTP_PROXY "" +ENV HTTPS_PROXY "" +ENV http_proxy "" +ENV https_proxy "" + +######################################################################################################################## +# YOUR COMMANDS GO HERE + +# For example, if your agent requires the animalai-train library +# you can add the following (remove if not needed): +RUN pip install animalai-train + + +######################################################################################################################## + +CMD ["/bin/bash"] + diff --git a/examples/submission/README.md b/examples/submission/README.md new file mode 100644 index 00000000..7c03bcb1 --- /dev/null +++ b/examples/submission/README.md @@ -0,0 +1,6 @@ +# Submission example + +We provide here the code for creating a submission container, as well as a very basic agent trained on `1-Food` using `trainMLAgents.py`. The `agent.py` implements the +script you will need to submit, it loads a trained model located in `data`. + +Read the [submission documentation](../../documentation/submission.md) \ No newline at end of file diff --git a/examples/submission/agent.py b/examples/submission/agent.py new file mode 100644 index 00000000..0d1a6991 --- /dev/null +++ b/examples/submission/agent.py @@ -0,0 +1,53 @@ +import yaml +from animalai_train.trainers.ppo.policy import PPOPolicy +from animalai.envs.brain import BrainParameters + + +class Agent(object): + + def __init__(self): + """ + Load your agent here and initialize anything needed + """ + + # Load the configuration and model using ABSOLUTE PATHS + self.configuration_file = '/aaio/data/trainer_config.yaml' + self.model_path = '/aaio/data/1-Food/Learner' + + self.brain = BrainParameters(brain_name='Learner', + camera_resolutions=[{'height': 84, 'width': 84, 'blackAndWhite': False}], + num_stacked_vector_observations=1, + vector_action_descriptions=['', ''], + vector_action_space_size=[3, 3], + vector_action_space_type=0, # corresponds to discrete + vector_observation_space_size=3 + ) + self.trainer_params = yaml.load(open(self.configuration_file))['Learner'] + self.trainer_params['keep_checkpoints'] = 0 + self.trainer_params['model_path'] = self.model_path + self.trainer_params['use_recurrent'] = False + + self.policy = PPOPolicy(brain=self.brain, + seed=0, + trainer_params=self.trainer_params, + is_training=False, + load=True) + + def reset(self, t=250): + """ + Reset is called before each episode begins + Leave blank if nothing needs to happen there + :param t the number of timesteps in the episode + """ + + def step(self, obs, reward, done, info): + """ + A single step the agent should take based on the current + :param brain_info: a single BrainInfo containing the observations and reward for a single step for one agent + :return: a list of actions to execute (of size 2) + """ + + brain_info = info['brain_info'] + action = self.policy.evaluate(brain_info=brain_info)['action'] + + return action diff --git a/examples/submission/data/1-Food/Learner.nn b/examples/submission/data/1-Food/Learner.nn new file mode 100644 index 00000000..725953f9 Binary files /dev/null and b/examples/submission/data/1-Food/Learner.nn differ diff --git a/examples/submission/data/1-Food/Learner/checkpoint b/examples/submission/data/1-Food/Learner/checkpoint new file mode 100644 index 00000000..ae7dcf8b --- /dev/null +++ b/examples/submission/data/1-Food/Learner/checkpoint @@ -0,0 +1,13 @@ +model_checkpoint_path: "model-58856.cptk" +all_model_checkpoint_paths: "model-5000.cptk" +all_model_checkpoint_paths: "model-10000.cptk" +all_model_checkpoint_paths: "model-15000.cptk" +all_model_checkpoint_paths: "model-20000.cptk" +all_model_checkpoint_paths: "model-25000.cptk" +all_model_checkpoint_paths: "model-30000.cptk" +all_model_checkpoint_paths: "model-35000.cptk" +all_model_checkpoint_paths: "model-40000.cptk" +all_model_checkpoint_paths: "model-45000.cptk" +all_model_checkpoint_paths: "model-50000.cptk" +all_model_checkpoint_paths: "model-55000.cptk" +all_model_checkpoint_paths: "model-58856.cptk" diff --git a/examples/submission/data/1-Food/Learner/frozen_graph_def.pb b/examples/submission/data/1-Food/Learner/frozen_graph_def.pb new file mode 100644 index 00000000..3b2e7ad2 Binary files /dev/null and b/examples/submission/data/1-Food/Learner/frozen_graph_def.pb differ diff --git a/examples/submission/data/1-Food/Learner/model-58856.cptk.data-00000-of-00001 b/examples/submission/data/1-Food/Learner/model-58856.cptk.data-00000-of-00001 new file mode 100644 index 00000000..c5755ceb Binary files /dev/null and b/examples/submission/data/1-Food/Learner/model-58856.cptk.data-00000-of-00001 differ diff --git a/examples/submission/data/1-Food/Learner/model-58856.cptk.index b/examples/submission/data/1-Food/Learner/model-58856.cptk.index new file mode 100644 index 00000000..44a3df2e Binary files /dev/null and b/examples/submission/data/1-Food/Learner/model-58856.cptk.index differ diff --git a/examples/submission/data/1-Food/Learner/model-58856.cptk.meta b/examples/submission/data/1-Food/Learner/model-58856.cptk.meta new file mode 100644 index 00000000..4571e820 Binary files /dev/null and b/examples/submission/data/1-Food/Learner/model-58856.cptk.meta differ diff --git a/examples/submission/data/1-Food/Learner/raw_graph_def.pb b/examples/submission/data/1-Food/Learner/raw_graph_def.pb new file mode 100644 index 00000000..664c068e Binary files /dev/null and b/examples/submission/data/1-Food/Learner/raw_graph_def.pb differ diff --git a/examples/submission/data/trainer_config.yaml b/examples/submission/data/trainer_config.yaml new file mode 100644 index 00000000..70db7406 --- /dev/null +++ b/examples/submission/data/trainer_config.yaml @@ -0,0 +1,23 @@ +Learner: + trainer: ppo + epsilon: 0.2 + gamma: 0.99 + lambd: 0.95 + learning_rate: 3.0e-4 + memory_size: 256 + normalize: false + sequence_length: 64 + summary_freq: 1000 + use_recurrent: false + use_curiosity: true + curiosity_strength: 0.01 + curiosity_enc_size: 256 + time_horizon: 128 + batch_size: 64 + buffer_size: 2024 + hidden_units: 256 + num_layers: 1 + beta: 1.0e-2 + max_steps: 5.0e6 + num_epoch: 3 + diff --git a/examples/submission/test_submission/1-Food.yaml b/examples/submission/test_submission/1-Food.yaml new file mode 100644 index 00000000..2025a62a --- /dev/null +++ b/examples/submission/test_submission/1-Food.yaml @@ -0,0 +1,7 @@ +!ArenaConfig +arenas: + 0: !Arena + t: 250 + items: + - !Item + name: GoodGoal diff --git a/examples/submission/test_submission/env/README.md b/examples/submission/test_submission/env/README.md new file mode 100644 index 00000000..78b91a67 --- /dev/null +++ b/examples/submission/test_submission/env/README.md @@ -0,0 +1 @@ +Place the **Linux** environment here to test your docker \ No newline at end of file diff --git a/examples/submission/test_submission/testDocker.py b/examples/submission/test_submission/testDocker.py new file mode 100644 index 00000000..79b6f296 --- /dev/null +++ b/examples/submission/test_submission/testDocker.py @@ -0,0 +1,63 @@ +import importlib.util + +from animalai.envs.gym.environment import AnimalAIEnv +from animalai.envs.arena_config import ArenaConfig + + +def main(): + # Load the agent from the submission + print('Loading your agent') + try: + spec = importlib.util.spec_from_file_location('agent_module', '/aaio/agent.py') + agent_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(agent_module) + submitted_agent = agent_module.Agent() + except Exception as e: + print('Your agent could not be loaded, make sure all the paths are absolute, error thrown:') + raise e + print('Agent successfully loaded') + + arena_config_in = ArenaConfig('/aaio/test/1-Food.yaml') + + print('Resetting your agent') + try: + submitted_agent.reset(t=arena_config_in.arenas[0].t) + except Exception as e: + print('Your agent could not be reset:') + raise e + + env = AnimalAIEnv( + environment_filename='/aaio/test/env/AnimalAI', + # seed=0, + retro=False, + n_arenas=1, + worker_id=1, + docker_training=True, + ) + + env.reset(arenas_configurations=arena_config_in) + obs, reward, done, info = env.step([0, 0]) + + print('Running 5 episodes') + + for k in range(5): + cumulated_reward = 0 + print('Episode {} starting'.format(k)) + try: + for i in range(arena_config_in.arenas[0].t): + + action = submitted_agent.step(obs, reward, done, info) + obs, reward, done, info = env.step(action) + cumulated_reward += reward + if done: + break + except Exception as e: + print('Episode {} failed'.format(k)) + raise e + print('Episode {0} completed, reward {1}'.format(k, cumulated_reward)) + + print('SUCCESS') + + +if __name__ == '__main__': + main() diff --git a/examples/trainDopamine.py b/examples/trainDopamine.py index 99d4b92c..837c43a3 100644 --- a/examples/trainDopamine.py +++ b/examples/trainDopamine.py @@ -18,6 +18,7 @@ def create_env_fn(): worker_id=worker_id, n_arenas=1, arenas_configurations=arena_config_in, + docker_training=False, retro=True) return env