diff --git a/.gitignore b/.gitignore index 345781e3..dfd1107a 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,4 @@ env/* __pycache__/ UnitySDK.log /venv -testDevs.py -testDevs.yaml -.DS_Store \ No newline at end of file +/dev \ No newline at end of file diff --git a/README.md b/README.md index f070baab..7c4110d0 100644 --- a/README.md +++ b/README.md @@ -44,9 +44,9 @@ Finally download the environment for your system: | OS | Environment link | | --- | --- | -| Linux | [download v0.3](https://www.doc.ic.ac.uk/~bb1010/animalAI/env_linux_v0.3.zip) | -| MacOS | [download v0.3](https://www.doc.ic.ac.uk/~bb1010/animalAI/env_mac_v0.3.zip) | -| Windows | [download v0.3](https://www.doc.ic.ac.uk/~bb1010/animalAI/env_windows_v0.3.zip) | +| Linux | [download v0.4](https://www.doc.ic.ac.uk/~bb1010/animalAI/env_linux_v0.4.zip) | +| MacOS | [download v0.4](https://www.doc.ic.ac.uk/~bb1010/animalAI/env_mac_v0.4.zip) | +| Windows | [download v0.4](https://www.doc.ic.ac.uk/~bb1010/animalAI/env_windows_v0.4.zip) | You can now unzip the content of the archive to the `env` folder and you're ready to go! Make sure the executable `AnimalAI.*` is in `env/`. On linux you may have to make the file executable by running `chmod +x env/AnimalAI.x86_64`. @@ -99,19 +99,29 @@ Occasional slow frame rates in play mode. Temporary fix: reduce screen size. - [ ] Offer a gym wrapper for training - [ ] Add protobuf for arena spawning feedback -- [ ] Improve the way the agent spawns +- [x] Improve the way the agent spawns - [x] Add lights out configurations. - [x] Improve environment framerates - [x] Add moving food ## Version History +- v0.4 - Lights off moved to Unity, colors configurations, proportional goals, bugs fixes + - The light is now directly switched on/off within Unity, configuration files stay the same + - Blackouts now work with infinite episodes (`t=0`) + - The `rand_colors` configurations have been removed and the user can now pass `RGB` values, see [here](documentation/configFile.md#objects) + - Rewards for goals are now proportional to their size (except for the `DeathZone`), see [here](documentation/definitionsOfObjects.md#rewards) + - The agent is now a ball rather than a cube + - Increased safety for spawning the agent to avoid infinite loops + - Bugs fixes + - v0.3 - Lights off, remove Beams and add cylinder - We added the possibility to switch the lights off at given intervals, see [here](documentation/configFile.md#blackouts) - visualizeLightsOff.py displays an example of lights off, from the agent's point of view - Beams objects have been removed - A `Cylinder` object has been added (similar behaviour to the `Woodlog`) - The immovable `Cylinder` tunnel has been renamed `CylinderTunnel` + - `UnityEnvironment.reset()` parameter `config` renamed to `arenas_configurations_input` - v0.2 - New moving food rewards, improved Unity performance and bug fixes - Moving rewards have been added, two for each type of reward, see diff --git a/agent.py b/agent.py new file mode 100644 index 00000000..678b2376 --- /dev/null +++ b/agent.py @@ -0,0 +1,27 @@ +from animalai.envs.brain import BrainInfo + +class Agent(object): + + def __init__(self, configuration_to_load: str): + + """ + Load your agent here and initialize anything needed + :param configuration_to_load: path to your model to lead + """ + pass + + def step(self, brain_info: BrainInfo) -> list[float]: + + """ + A single step the agent should take based on the current + :param brain_info: a single BrainInfo containing the observations and reward for a single step for one agent + :return: a list of actions to execute (of size 2) + """ + + self.action = [] + + return self.action + + def destroy(self): + + pass diff --git a/animalai/communicator_objects/arena_parameters_proto_pb2.py b/animalai/communicator_objects/arena_parameters_proto_pb2.py index 255ede37..930b300c 100644 --- a/animalai/communicator_objects/arena_parameters_proto_pb2.py +++ b/animalai/communicator_objects/arena_parameters_proto_pb2.py @@ -20,7 +20,7 @@ package='communicator_objects', syntax='proto3', serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'), - serialized_pb=_b('\n:animalai/communicator_objects/arena_parameters_proto.proto\x12\x14\x63ommunicator_objects\"\x98\x03\n\x14\x41renaParametersProto\x12\t\n\x01t\x18\x01 \x01(\x05\x12\x46\n\x05items\x18\x02 \x03(\x0b\x32\x37.communicator_objects.ArenaParametersProto.ItemsToSpawn\x12\x17\n\x0frand_all_colors\x18\x03 \x01(\x08\x1a\x93\x02\n\x0cItemsToSpawn\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x12\n\nrand_color\x18\x02 \x01(\x08\x12R\n\tpositions\x18\x03 \x03(\x0b\x32?.communicator_objects.ArenaParametersProto.ItemsToSpawn.Vector3\x12\x11\n\trotations\x18\x04 \x03(\x02\x12N\n\x05sizes\x18\x05 \x03(\x0b\x32?.communicator_objects.ArenaParametersProto.ItemsToSpawn.Vector3\x1a*\n\x07Vector3\x12\t\n\x01x\x18\x01 \x01(\x02\x12\t\n\x01y\x18\x02 \x01(\x02\x12\t\n\x01z\x18\x03 \x01(\x02\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3') + serialized_pb=_b('\n:animalai/communicator_objects/arena_parameters_proto.proto\x12\x14\x63ommunicator_objects\"\xcf\x03\n\x14\x41renaParametersProto\x12\t\n\x01t\x18\x01 \x01(\x05\x12\x46\n\x05items\x18\x02 \x03(\x0b\x32\x37.communicator_objects.ArenaParametersProto.ItemsToSpawn\x12\x11\n\tblackouts\x18\x03 \x03(\x05\x1a\xd0\x02\n\x0cItemsToSpawn\x12\x0c\n\x04name\x18\x01 \x01(\t\x12R\n\tpositions\x18\x03 \x03(\x0b\x32?.communicator_objects.ArenaParametersProto.ItemsToSpawn.Vector3\x12\x11\n\trotations\x18\x04 \x03(\x02\x12N\n\x05sizes\x18\x05 \x03(\x0b\x32?.communicator_objects.ArenaParametersProto.ItemsToSpawn.Vector3\x12O\n\x06\x63olors\x18\x06 \x03(\x0b\x32?.communicator_objects.ArenaParametersProto.ItemsToSpawn.Vector3\x1a*\n\x07Vector3\x12\t\n\x01x\x18\x01 \x01(\x02\x12\t\n\x01y\x18\x02 \x01(\x02\x12\t\n\x01z\x18\x03 \x01(\x02\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3') ) @@ -66,8 +66,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=451, - serialized_end=493, + serialized_start=506, + serialized_end=548, ) _ARENAPARAMETERSPROTO_ITEMSTOSPAWN = _descriptor.Descriptor( @@ -85,29 +85,29 @@ is_extension=False, extension_scope=None, serialized_options=None, file=DESCRIPTOR), _descriptor.FieldDescriptor( - name='rand_color', full_name='communicator_objects.ArenaParametersProto.ItemsToSpawn.rand_color', index=1, - number=2, type=8, cpp_type=7, label=1, - has_default_value=False, default_value=False, + name='positions', full_name='communicator_objects.ArenaParametersProto.ItemsToSpawn.positions', index=1, + number=3, type=11, cpp_type=10, label=3, + has_default_value=False, default_value=[], message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, serialized_options=None, file=DESCRIPTOR), _descriptor.FieldDescriptor( - name='positions', full_name='communicator_objects.ArenaParametersProto.ItemsToSpawn.positions', index=2, - number=3, type=11, cpp_type=10, label=3, + name='rotations', full_name='communicator_objects.ArenaParametersProto.ItemsToSpawn.rotations', index=2, + number=4, type=2, cpp_type=6, label=3, has_default_value=False, default_value=[], message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, serialized_options=None, file=DESCRIPTOR), _descriptor.FieldDescriptor( - name='rotations', full_name='communicator_objects.ArenaParametersProto.ItemsToSpawn.rotations', index=3, - number=4, type=2, cpp_type=6, label=3, + name='sizes', full_name='communicator_objects.ArenaParametersProto.ItemsToSpawn.sizes', index=3, + number=5, type=11, cpp_type=10, label=3, has_default_value=False, default_value=[], message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, serialized_options=None, file=DESCRIPTOR), _descriptor.FieldDescriptor( - name='sizes', full_name='communicator_objects.ArenaParametersProto.ItemsToSpawn.sizes', index=4, - number=5, type=11, cpp_type=10, label=3, + name='colors', full_name='communicator_objects.ArenaParametersProto.ItemsToSpawn.colors', index=4, + number=6, type=11, cpp_type=10, label=3, has_default_value=False, default_value=[], message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, @@ -124,8 +124,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=218, - serialized_end=493, + serialized_start=212, + serialized_end=548, ) _ARENAPARAMETERSPROTO = _descriptor.Descriptor( @@ -150,9 +150,9 @@ is_extension=False, extension_scope=None, serialized_options=None, file=DESCRIPTOR), _descriptor.FieldDescriptor( - name='rand_all_colors', full_name='communicator_objects.ArenaParametersProto.rand_all_colors', index=2, - number=3, type=8, cpp_type=7, label=1, - has_default_value=False, default_value=False, + name='blackouts', full_name='communicator_objects.ArenaParametersProto.blackouts', index=2, + number=3, type=5, cpp_type=1, label=3, + has_default_value=False, default_value=[], message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, serialized_options=None, file=DESCRIPTOR), @@ -169,12 +169,13 @@ oneofs=[ ], serialized_start=85, - serialized_end=493, + serialized_end=548, ) _ARENAPARAMETERSPROTO_ITEMSTOSPAWN_VECTOR3.containing_type = _ARENAPARAMETERSPROTO_ITEMSTOSPAWN _ARENAPARAMETERSPROTO_ITEMSTOSPAWN.fields_by_name['positions'].message_type = _ARENAPARAMETERSPROTO_ITEMSTOSPAWN_VECTOR3 _ARENAPARAMETERSPROTO_ITEMSTOSPAWN.fields_by_name['sizes'].message_type = _ARENAPARAMETERSPROTO_ITEMSTOSPAWN_VECTOR3 +_ARENAPARAMETERSPROTO_ITEMSTOSPAWN.fields_by_name['colors'].message_type = _ARENAPARAMETERSPROTO_ITEMSTOSPAWN_VECTOR3 _ARENAPARAMETERSPROTO_ITEMSTOSPAWN.containing_type = _ARENAPARAMETERSPROTO _ARENAPARAMETERSPROTO.fields_by_name['items'].message_type = _ARENAPARAMETERSPROTO_ITEMSTOSPAWN DESCRIPTOR.message_types_by_name['ArenaParametersProto'] = _ARENAPARAMETERSPROTO diff --git a/animalai/envs/arena_config.py b/animalai/envs/arena_config.py index 407ee5b4..8e48b1a0 100644 --- a/animalai/envs/arena_config.py +++ b/animalai/envs/arena_config.py @@ -2,7 +2,6 @@ import jsonpickle import yaml import copy -import numpy as np from animalai.communicator_objects import UnityRLResetInput, ArenaParametersProto @@ -26,44 +25,41 @@ def to_proto(self): return res +class RGB(yaml.YAMLObject): + yaml_tag = u'!RGB' + + def __init__(self, r=0, g=0, b=0): + self.r = r + self.g = g + self.b = b + + def to_proto(self): + res = ArenaParametersProto.ItemsToSpawn.Vector3() + res.x = self.r + res.y = self.g + res.z = self.b + + return res + + class Item(yaml.YAMLObject): yaml_tag = u'!Item' - def __init__(self, name='', rand_color=False, positions=None, rotations=None, sizes=None): + def __init__(self, name='', positions=None, rotations=None, sizes=None, colors=None): self.name = name - self.rand_color = rand_color self.positions = positions if positions is not None else [] self.rotations = rotations if rotations is not None else [] self.sizes = sizes if sizes is not None else [] + self.colors = colors if colors is not None else [] class Arena(yaml.YAMLObject): yaml_tag = u'!Arena' - def __init__(self, t=1000, rand_all_colors=False, items=None, blackouts=None): + def __init__(self, t=1000, items=None, blackouts=None): self.t = t - self.rand_all_colors = rand_all_colors self.items = items if items is not None else {} self.blackouts = blackouts if blackouts is not None else [] - self.generate_blackout_steps() - - def generate_blackout_steps(self): - # Transform a list of steps at which we turn on/off the light into a list of 1/0 of size t for each step - - if self.blackouts is not None and len(self.blackouts) > 0 and self.t>0: - if self.blackouts[0] > 0: - self.blackouts_steps = np.ones(self.t) - light = True - for i in range(len(self.blackouts) - 1): - self.blackouts_steps[self.blackouts[i]:self.blackouts[i + 1]] = not light - light = not light - self.blackouts_steps[self.blackouts[-1]:] = not light - else: - flip_every = -self.blackouts[0] - self.blackouts_steps = np.array( - ([1] * flip_every + [0] * flip_every) * (self.t // (2 * flip_every) + 1))[:self.t] - else: - self.blackouts_steps = np.ones(max(self.t, 1)) class ArenaConfig(yaml.YAMLObject): @@ -73,8 +69,6 @@ def __init__(self, yaml_path=None): if yaml_path is not None: self.arenas = yaml.load(open(yaml_path, 'r'), Loader=yaml.Loader).arenas - for arena in self.arenas.values(): - arena.generate_blackout_steps() else: self.arenas = {} @@ -89,14 +83,14 @@ def dict_to_arena_config(self) -> UnityRLResetInput: for k in self.arenas: config_out.arenas[k].CopyFrom(ArenaParametersProto()) config_out.arenas[k].t = self.arenas[k].t - config_out.arenas[k].rand_all_colors = self.arenas[k].rand_all_colors + config_out.arenas[k].blackouts.extend(self.arenas[k].blackouts) for item in self.arenas[k].items: to_spawn = config_out.arenas[k].items.add() to_spawn.name = item.name - to_spawn.rand_color = item.rand_color to_spawn.positions.extend([v.to_proto() for v in item.positions]) to_spawn.rotations.extend(item.rotations) to_spawn.sizes.extend([v.to_proto() for v in item.sizes]) + to_spawn.colors.extend([v.to_proto() for v in item.colors]) return config_out @@ -105,7 +99,6 @@ def update(self, arenas_configurations_input): if arenas_configurations_input is not None: for arena_i in arenas_configurations_input.arenas: self.arenas[arena_i] = copy.copy(arenas_configurations_input.arenas[arena_i]) - self.arenas[arena_i].generate_blackout_steps() def constructor_arena(loader, node): diff --git a/animalai/envs/environment.py b/animalai/envs/environment.py index df4ca537..6c7fc0ae 100644 --- a/animalai/envs/environment.py +++ b/animalai/envs/environment.py @@ -30,7 +30,6 @@ def __init__(self, file_name=None, base_port=5005, seed=0, docker_training=False, - no_graphics=False, n_arenas=1, play=False, arenas_configurations=None): @@ -43,7 +42,6 @@ def __init__(self, file_name=None, :int base_port: Baseline port number to connect to Unity environment over. worker_id increments over this. :int worker_id: Number to add to communication port (5005) [0]. Used for asynchronous agent scenarios. :param docker_training: Informs this class whether the process is being run within a container. - :param no_graphics: Whether to run the Unity simulator in no-graphics mode """ atexit.register(self._close) @@ -56,10 +54,9 @@ def __init__(self, file_name=None, self.proc1 = None # The process that is started. If None, no process was started self.communicator = self.get_communicator(worker_id, base_port) self.arenas_configurations = arenas_configurations if arenas_configurations is not None else ArenaConfig() - self.check_lights = True if file_name is not None: - self.executable_launcher(file_name, docker_training, no_graphics) + self.executable_launcher(file_name, docker_training) else: logger.info("Start training by pressing the Play button in the Unity Editor.") self._loaded = True @@ -130,7 +127,7 @@ def brain_names(self): def external_brain_names(self): return self._external_brain_names - def executable_launcher(self, file_name, docker_training, no_graphics): + def executable_launcher(self, file_name, docker_training): cwd = os.getcwd() file_name = (file_name.strip() .replace('.app', '').replace('.exe', '').replace('.x86_64', '').replace('.x86', @@ -177,17 +174,12 @@ def executable_launcher(self, file_name, docker_training, no_graphics): logger.debug("This is the launch string {}".format(launch_string)) # Launch Unity environment if not docker_training: - if no_graphics: + if not self.play: self.proc1 = subprocess.Popen( - [launch_string, '-nographics', '-batchmode', - '--port', str(self.port)]) + [launch_string, '--port', str(self.port), '--nArenas', str(self.n_arenas)]) else: - if not self.play: - self.proc1 = subprocess.Popen( - [launch_string, '--port', str(self.port), '--nArenas', str(self.n_arenas)]) - else: - self.proc1 = subprocess.Popen( - [launch_string, '--port', str(self.port)]) + self.proc1 = subprocess.Popen( + [launch_string, '--port', str(self.port)]) else: """ @@ -209,7 +201,7 @@ def executable_launcher(self, file_name, docker_training, no_graphics): """ docker_ls = ("exec xvfb-run --auto-servernum" " --server-args='-screen 0 640x480x24'" - " {0} --port {1}").format(launch_string, str(self.port)) + " {0} --port {1} --nArenas {2}").format(launch_string, str(self.port), str(self.n_arenas)) self.proc1 = subprocess.Popen(docker_ls, stdout=subprocess.PIPE, stderr=subprocess.PIPE, @@ -232,7 +224,6 @@ def reset(self, arenas_configurations_input=None, train_mode=True) -> AllBrainIn """ if self._loaded: self.arenas_configurations.update(arenas_configurations_input) - self.check_lights = not np.all([e.blackouts for e in self.arenas_configurations.arenas.values()]) outputs = self.communicator.exchange( self._generate_reset_input(train_mode, arenas_configurations_input) @@ -388,8 +379,6 @@ def step(self, vector_action=None, memory=None, text_action=None, value=None, st self._global_done = state[1] for _b in self._external_brain_names: self._n_agents[_b] = len(state[0][_b].agents) - if self.check_lights: - state = self._apply_lights(state, step_number) return state[0] elif not self._loaded: raise UnityEnvironmentException("No Unity environment is loaded.") @@ -436,17 +425,6 @@ def _flatten(cls, arr): arr = [float(x) for x in arr] return arr - def _apply_lights(self, state, step_number): - """ - Sets visual observations to zero for Arenas where the light should be off. - :return: the modified state - """ - if 'Learner' in state[0].keys(): - mask = np.array([e.blackouts_steps[step_number % len(e.blackouts_steps)] \ - for e in self.arenas_configurations.arenas.values()]) - state[0]['Learner'].visual_observations[0] = (state[0]['Learner'].visual_observations[0].T * mask).T - return state - def _get_state(self, output: UnityRLOutput) -> (AllBrainInfo, bool): """ Collects experience information from all external brains in environment at current step. diff --git a/configs/allObjectsRandom.yaml b/configs/allObjectsRandom.yaml index bba69899..fe7b3488 100644 --- a/configs/allObjectsRandom.yaml +++ b/configs/allObjectsRandom.yaml @@ -2,504 +2,176 @@ arenas: 0: !Arena t: 0 - rand_all_colors: true items: - !Item name: CubeTunnel - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: CylinderTunnel - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: Cylinder - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: Ramp - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: Wall - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: WallTransparent - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: Cube - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: CubeTransparent - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: Cardbox1 - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: Cardbox2 - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: WoodLog - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: UObject - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: LObject - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: GoodGoal - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: BadGoal - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: DeathZone - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: GoodGoalMulti - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: MazeGenerator - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: GoodGoalMove - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: GoodGoalBounce - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: BadGoalMove - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: BadGoalBounce - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: GoodGoalMultiMove - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: GoodGoalMultiBounce - positions: [] - rand_color: false - rotations: [] - sizes: [] 1: !Arena t: 0 - rand_all_colors: false items: - !Item name: CubeTunnel - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: Cylinder - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: CylinderTunnel - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: Ramp - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: Wall - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: WallTransparent - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: Cube - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: CubeTransparent - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: Cardbox1 - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: Cardbox2 - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: WoodLog - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: UObject - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: LObject - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: GoodGoal - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: BadGoal - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: DeathZone - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: GoodGoalMulti - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: GoodGoalMulti - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: MazeGenerator - positions: [] - rand_color: false - rotations: [] - sizes: [] 2: !Arena t: 0 - rand_all_colors: false items: - !Item name: CubeTunnel - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: Cylinder - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: CylinderTunnel - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: Ramp - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: Wall - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: WallTransparent - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: Cube - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: CubeTransparent - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: Cardbox1 - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: Cardbox2 - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: WoodLog - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: UObject - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: LObject - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: GoodGoal - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: BadGoal - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: DeathZone - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: GoodGoalMulti - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: GoodGoalMulti - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: MazeGenerator - positions: [] - rand_color: false - rotations: [] - sizes: [] 3: !Arena t: 0 - rand_all_colors: false items: - !Item name: CubeTunnel - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: Cylinder - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: CylinderTunnel - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: Ramp - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: Wall - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: WallTransparent - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: Cube - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: CubeTransparent - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: Cardbox1 - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: Cardbox2 - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: WoodLog - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: UObject - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: LObject - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: GoodGoal - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: BadGoal - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: DeathZone - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: GoodGoalMulti - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: GoodGoalMulti - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: MazeGenerator - positions: [] - rand_color: false - rotations: [] - sizes: [] diff --git a/configs/avoidance.yaml b/configs/avoidance.yaml index bef20ae9..59d1b5fd 100644 --- a/configs/avoidance.yaml +++ b/configs/avoidance.yaml @@ -2,18 +2,10 @@ arenas: 0: !Arena t: 0 - rand_all_colors: true items: - !Item name: GoodGoal - positions: [] - rand_color: false - rotations: [] sizes: - !Vector3 {x: 1, y: 1, z: 1} - !Item - name: DeathZone - positions: [] - rand_color: false - rotations: [] - sizes: [] \ No newline at end of file + name: DeathZone \ No newline at end of file diff --git a/configs/exampleConfig.yaml b/configs/exampleConfig.yaml index 8f0f4526..ce7e39a3 100644 --- a/configs/exampleConfig.yaml +++ b/configs/exampleConfig.yaml @@ -2,26 +2,22 @@ arenas: 0: !Arena t: 0 - rand_all_colors: false items: - !Item name: Cube - positions: + positions: - !Vector3 {x: 10, y: 0, z: 10} - !Vector3 {x: -1, y: 0, z: 30} - rand_color: false + colors: + - !RGB {r: 204, g: 0, b: 204 } rotations: [45] - sizes: + sizes: - !Vector3 {x: -1, y: 5, z: -1} - !Item - name: CylinderTunnel - positions: [] - rand_color: true - rotations: [] - sizes: [] + name: Cylinder + colors: + - !RGB {r: 204, g: 0, b: 204 } + - !RGB {r: 204, g: 0, b: 204 } + - !RGB {r: 204, g: 0, b: 204 } - !Item - name: GoodGoal - positions: [] - rand_color: true - rotations: [] - sizes: [] \ No newline at end of file + name: GoodGoal \ No newline at end of file diff --git a/configs/exampleTraining.yaml b/configs/exampleTraining.yaml index a6840f96..e4a952ad 100644 --- a/configs/exampleTraining.yaml +++ b/configs/exampleTraining.yaml @@ -2,77 +2,33 @@ arenas: 0: !Arena t: 1000 - rand_all_colors: true items: - !Item name: Wall - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: CylinderTunnel - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: GoodGoal - positions: [] - rand_color: false - rotations: [] - sizes: [] 1: !Arena t: 1000 - rand_all_colors: false items: - !Item name: TransparentWall - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: GoodGoal - positions: [] - rand_color: false - rotations: [] - sizes: [] 2: !Arena t: 1000 - rand_all_colors: false items: - !Item name: Cardbox1 - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: BadGoal - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: GoodGoal - positions: [] - rand_color: false - rotations: [] - sizes: [] 3: !Arena t: 1000 - rand_all_colors: false items: - !Item name: DeathZone - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item - name: GoodGoal - positions: [] - rand_color: false - rotations: [] - sizes: [] \ No newline at end of file + name: GoodGoal \ No newline at end of file diff --git a/configs/justFood.yaml b/configs/justFood.yaml index b8999ea7..3bb9836f 100644 --- a/configs/justFood.yaml +++ b/configs/justFood.yaml @@ -2,41 +2,21 @@ arenas: 0: !Arena t: 1000 - rand_all_colors: true items: - !Item name: GoodGoal - positions: [] - rand_color: false - rotations: [] - sizes: [] 1: !Arena t: 1000 - rand_all_colors: true items: - !Item name: GoodGoal - positions: [] - rand_color: false - rotations: [] - sizes: [] 2: !Arena t: 1000 - rand_all_colors: true items: - !Item name: GoodGoal - positions: [] - rand_color: false - rotations: [] - sizes: [] 3: !Arena t: 1000 - rand_all_colors: true items: - !Item - name: GoodGoal - positions: [] - rand_color: false - rotations: [] - sizes: [] \ No newline at end of file + name: GoodGoal \ No newline at end of file diff --git a/configs/lightsOff.yaml b/configs/lightsOff.yaml index 97097c15..a1a9baff 100644 --- a/configs/lightsOff.yaml +++ b/configs/lightsOff.yaml @@ -1,45 +1,25 @@ !ArenaConfig arenas: 0: !Arena - t: 100 - rand_all_colors: true - blackouts: [10,20,30,40,50] + t: 0 + blackouts: [-10] items: - !Item name: GoodGoal - positions: [] - rand_color: false - rotations: [] - sizes: [] 1: !Arena t: 100 - rand_all_colors: true blackouts: [-5] items: - !Item name: GoodGoal - positions: [] - rand_color: false - rotations: [] - sizes: [] 2: !Arena t: 100 - rand_all_colors: true items: - !Item name: GoodGoal - positions: [] - rand_color: false - rotations: [] - sizes: [] 3: !Arena t: 100 - rand_all_colors: true blackouts: [2,4,6,8,10,12,14,16,18,20,30,40,50,60,70,80] items: - !Item - name: GoodGoal - positions: [] - rand_color: false - rotations: [] - sizes: [] \ No newline at end of file + name: GoodGoal \ No newline at end of file diff --git a/configs/movingFood.yaml b/configs/movingFood.yaml index 2ac6ba4a..6ebccd81 100644 --- a/configs/movingFood.yaml +++ b/configs/movingFood.yaml @@ -2,18 +2,8 @@ arenas: 0: !Arena t: 0 - rand_all_colors: true - rand_all_sizes: true items: - !Item name: GoodGoalBounce - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item - name: BadGoalBounce - positions: [] - rand_color: false - rotations: [] - sizes: [] \ No newline at end of file + name: BadGoalBounce \ No newline at end of file diff --git a/configs/objectManipulation.yaml b/configs/objectManipulation.yaml index c751fecc..bf006899 100644 --- a/configs/objectManipulation.yaml +++ b/configs/objectManipulation.yaml @@ -2,54 +2,25 @@ arenas: 0: !Arena t: 0 - rand_all_colors: true items: - !Item name: GoodGoal positions: [] - rand_color: false + colors: [] rotations: [] sizes: - !Vector3 {x: 1, y: 1, z: 1} - !Item name: Cube - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: CubeTransparent - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: Cardbox1 - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: Cardbox2 - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: WoodLog - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: UObject - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item - name: LObject - positions: [] - rand_color: false - rotations: [] - sizes: [] \ No newline at end of file + name: LObject \ No newline at end of file diff --git a/configs/obstacles.yaml b/configs/obstacles.yaml index 75cab2f0..28ae1306 100644 --- a/configs/obstacles.yaml +++ b/configs/obstacles.yaml @@ -2,24 +2,12 @@ arenas: 0: !Arena t: 0 - rand_all_colors: true items: - !Item name: GoodGoal - positions: [] - rand_color: false - rotations: [] sizes: - !Vector3 {x: 1, y: 1, z: 1} - !Item name: Wall - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item - name: Wall - positions: [] - rand_color: false - rotations: [] - sizes: [] \ No newline at end of file + name: Wall \ No newline at end of file diff --git a/configs/preferences.yaml b/configs/preferences.yaml index 7649f348..0d75aa37 100644 --- a/configs/preferences.yaml +++ b/configs/preferences.yaml @@ -2,23 +2,10 @@ arenas: 0: !Arena t: 0 - rand_all_colors: true items: - !Item name: GoodGoal - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item name: GoodGoal - positions: [] - rand_color: false - rotations: [] - sizes: [] - !Item - name: BadGoal - positions: [] - rand_color: false - rotations: [] - sizes: [] \ No newline at end of file + name: BadGoal \ No newline at end of file diff --git a/documentation/configFile.md b/documentation/configFile.md index c9eaad0e..33a3f979 100644 --- a/documentation/configFile.md +++ b/documentation/configFile.md @@ -34,7 +34,6 @@ each will have its local set of coordinates as described above. For a single arena you can provide the following parameters: - `t` an `int`, the length of an episode which can change from one episode to the other. A value of `0` means that the episode will not terminate unlti a reward has been collected (setting `t=0` and having no reward will lead to an infinite episode) -- `rand_all_colors` a `bool`, whether all objects should have a random color or not - `blackouts` [see below](#blackouts) @@ -49,8 +48,9 @@ same manner, using a set of parameters for each item: is empty the position will be sampled randomly in the arena - `sizes`: a list of `Vector3` sizes, if the list is empty the size will be sampled randomly - `rotations`: a list of `float` in the range `[0,360]`, if the list is empty the rotation is sampled randomly -- `rand_color` a `bool` setting whether or not the color(s) of the objects should be randomized (some objects will not -accept random colors) +- `colors`: a list of `RGB` values (integers in the range `[0,255]`), if the list is empty the color is sampled randomly + +Any of these fields can be omitted in the configuration files, in which case the omitted fields are automatically randomized. **All values for the above fields can be found in [the definitions](definitionsOfObjects.md)**. @@ -64,9 +64,8 @@ pass two types of arguments for this parameter: then back on from 15 to 19 included etc... - passing a single negative argument `[-20]` will automatically switch lights on and off every 20 frames. -**Note**: at the moment this feature cannot be combined with an infinite episode (`T=0`) - +**Note**: for infinite episodes (where `t=0`), the first point above would leave the light off after frame `25` while the +second point would keep switching the lights every `20` frame indefinitely. ## Rules and Notes @@ -103,23 +102,25 @@ Let's take a look at an example: arenas: 0: !Arena t: 0 - rand_all_colors: false items: - !Item name: Cube - positions: + positions: - !Vector3 {x: 10, y: 0, z: 10} - !Vector3 {x: -1, y: 0, z: 30} - rand_color: false + colors: + - !RGB {r: 204, g: 0, b: 204 } rotations: [45] - sizes: + sizes: - !Vector3 {x: -1, y: 5, z: -1} - !Item - name: CylinderTunnel - positions: [] - rand_color: true - rotations: [] - sizes: [] + name: Cylinder + colors: + - !RGB {r: 204, g: 0, b: 204 } + - !RGB {r: 204, g: 0, b: 204 } + - !RGB {r: 204, g: 0, b: 204 } + - !Item + name: GoodGoal ``` First of all, we can see that the number of parameters for `positions`, `rotations` and `sizes` do not need to match. The @@ -127,9 +128,10 @@ environment will spawn `max( len(positions), len(rotations), len(sizes) )` objec Any parameter missing will be sampled randomly. In this case this will lead to: -- a `Cube` spawned in `[10,10]` on the groundm with rotation `45` and a size randomized on both `x` and `z` and of `y=5` -- a `Cube` spawnd on the ground, with a random `x` and `z=30`, both its rotation and size will be random -- a `CylinderTunnel` completely randomized, including its color +- a pink `Cube` spawned in `[10,10]` on the ground with rotation `45` and a size randomized on both `x` and `z` and of `y=5` +- a `Cube` spawned on the ground, with a random `x` and `z=30`, its rotation, size and color will be random +- three pink `CylinderTunnel` completely randomized +- a `GoodGoal` randomized - the agent which position and rotation are randomized too The arena will spawn these objects in this order. \ No newline at end of file diff --git a/documentation/definitionsOfObjects.md b/documentation/definitionsOfObjects.md index eee36eda..c0610a8f 100644 --- a/documentation/definitionsOfObjects.md +++ b/documentation/definitionsOfObjects.md @@ -6,7 +6,8 @@ The objects you can spawn in an arena are split among three categories: - rewards Below is a list of objects you can spawn. For each we describe the name you should use to refer to in your configuration files -or in Python directly, as well as their default characteristics and range of values you can assign to them. +or in Python directly, as well as their default characteristics and range of values you can assign to them. **All objects can +be rotated `360` degrees.** Each object has an orientation, we provide the three axes for all of those that are not symmetrical. The color code of the axes is as depicted below: @@ -20,36 +21,30 @@ the ones used in Unity). These are objects that are fixed and will not be impacted by the agent or other objects: -- a rectangular tunnel +- a rectangular tunnel - name: `CubeTunnel` - - can rotate 360 degrees - size range `(3,3,3)-(10,10,10)` - can randomize color -- a cylinder tunnel +- a cylinder tunnel - name: `CylinderTunnel` - - can rotate 360 degrees - - size range `(0.5,2.5,2.5)-(5,5,5)` + - size range `(2.5,2.5,2.5)-(5,5,5)` - can randomize color -- a ramp the agent can climb on +- a ramp the agent can climb on - name: `Ramp` - - can rotate 360 degrees - - size range `(0.5,0.5,0.5)-(10,5,10)` + - size range `(0.5,0.5,3)-(10,3,10)` - can randomize color - **can only spawn on the ground** -- a wall +- a wall - name: `Wall` - - can rotate 360 degrees - - size range `(1,1,1)-(40,10,40)` + - size range `(0.1,1,1)-(40,10,40)` - can randomize color -- a transparent wall +- a transparent wall - name: `WallTransparent` - - can rotate 360 degrees - - size range `(1,1,1)-(40,10,40)` + - size range `(0.1,1,1)-(40,10,40)` - cannot randomize color -- a randomly generated maze of size +- a randomly generated maze of size `16x16` with two entrances. Note this takes quite some room and will be hard to generate last on an arena. - name: `MazeGenerator` - - can rotate 360 degrees - size range constant - can randomize color @@ -57,120 +52,96 @@ These are objects that are fixed and will not be impacted by the agent or other These are objects the agent can move and which will be affected by each other, fixed objects and rewards if they collide -- a cube that can be pushed +- a cube that can be pushed - name: `Cube` - - can rotate 360 degrees - size range `(1,1,1)-(10,10,10)` - can randomize color -- a transparent cube that can be pushed +- a transparent cube that can be pushed - name: `CubeTransparent` - - can rotate 360 degrees - size range `(1,1,1)-(10,10,10)` - cannot randomize color -- a cardbox that can be pushed +- a cardbox that can be pushed - name: `Cardbox1` - - can rotate 360 degrees - size range `(1,1,1)-(10,10,10)` - cannot randomize color -- a cardbox that can be pushed +- a cardbox that can be pushed - name: `Cardbox2` - - can rotate 360 degrees - size range `(1,1,1)-(10,10,10)` - cannot randomize color -- a cylinder +- a cylinder - name: `Cylinder` - - can rotate 360 degrees - size range `(1,1,1)-(10,10,10)` - can randomize color -- a wood log +- a wood log - name: `WoodLog` - - can rotate 360 degrees - size range `(1,1,1)-(10,10,10)` - cannot randomize color -- a U-shaped object +- a U-shaped object - name: `UObject` - - can rotate 360 degrees - size range `(1,0.3,3)-(5,2,20)` - cannot randomize color -- a L-shaped object +- a L-shaped object - name: `LObject` - - can rotate 360 degrees - size range `(1,0.3,3)-(5,2,20)` - cannot randomize color #### Rewards -Objects that may terminate the event if the agents collides with one: +Objects that may terminate the event if the agents collides with one. **Important note:** for sphere goals the `y` and `z` +components of the provided sizes are ignored and only the `x` one counts -- Good goals: green spheres with a reward of 1 - - Fixed good reward + +- Good goals: green spheres with a positive reward equal to their size, terminate an episode + - Fixed good reward - name: `GoodGoal` - - can rotate 360 degrees - - size range `(0.5,0.5,0.5)-(10,10,10)` - - cannot randomize color - - terminates episode + - size range `0.5-5` +
- A good reward moving in a straight line, which stops moving as soon as it hits another object. Will start moving in the direction provided by the rotation parameter - name: `GoodGoalMove` - - can rotate 360 degrees - - size range fixed as `(1,1,1)` - - cannot randomize color - - terminates episode - - A good reward bouncing on objects. Will + - size range `1-3` +
+ - A good reward bouncing on objects. Will start moving in the direction provided by the rotation parameter - name: `GoodGoalBounce` - - can rotate 360 degrees - - size range fixed as `(1,1,1)` - - cannot randomize color - - terminates episode -- Bad goals: red spheres with a reward of -1 - - Fixed bad reward + - size range `1-3` +
+- Bad goals: red spheres with a negative reward equal to their size, terminate an episode + - Fixed bad reward - name: `BadGoal` - - can rotate 360 degrees - - size range `(0.5,0.5,0.5)-(10,10,10)` - - cannot randomize color - - terminates episode - - A bad reward moving in a straight line, + - size range `0.5-5` +
+ - A bad reward moving in a straight line, which stops moving as soon as it hits another object. Will start moving in the direction provided by the rotation parameter - name: `BadGoalMove` - - can rotate 360 degrees - - size range fixed as `(1,1,1)` - - cannot randomize color - - terminates episode - - A bad reward bouncing on objects. Will + - size range `1-3` +
+ - A bad reward bouncing on objects. Will start moving in the direction provided by the rotation parameter - name: `BadGoalBounce` - - can rotate 360 degrees - - size range fixed as `(1,1,1)` - - cannot randomize color - - terminates episode -- Good goals multi: golden spheres with a reward of 1 that will only terminate the episode once all of them are -retrieved (and a GoodGoal if present): - - Fixed good reward multi + - size range `1-3` +
+- Good goals multi: golden spheres with a positive reward equal to their size, that will only terminate the episode once + all of them are retrieved (and a GoodGoal if present): + - Fixed good reward multi - name: `GoodGoalMulti` - - can rotate 360 degrees - - size range `(0.5,0.5,0.5)-(10,10,10)` - - cannot randomize color - - A good reward moving in a straight + - size range `0.5-10` +
+ - A good reward moving in a straight line, which stops moving as soon as it hits another object. Will start moving in the direction provided by the rotation parameter - name: `GoodGoalMultiMove` - - can rotate 360 degrees - - size range fixed as `(1,1,1)` - - cannot randomize color - - A bad reward bouncing on objects. + - size range `1-3` + - A bad reward bouncing on objects. Will start moving in the direction provided by the rotation parameter - name: `GoodGoalMultiBounce` - - can rotate 360 degrees - - size range fixed as `(1,1,1)` - - cannot randomize color -- a deathzone with reward -1 - - name: `DeathZone` - - can rotate 360 degrees - - size range `(1,0,1)-(40,0,40)` - - cannot randomize color - - **the deathzone is always flat and located on the ground** - - terminates episode + - size range `1-3` +- Deathzone: + - a a deathzone with reward -1 + - name: `DeathZone` + - size range `(1,0,1)-(40,0,40)` + - **the deathzone is always flat and located on the ground** + - terminates episode diff --git a/documentation/training.md b/documentation/training.md index 9c699367..0d895c51 100644 --- a/documentation/training.md +++ b/documentation/training.md @@ -50,8 +50,8 @@ as an argument to reset the environment. The environment will use the new config following ones until a new configuration is passed. The syntax is: ``` -env.reset(config=arena_config # A new AernaConfig to use for reset, leave empty to use the last one provided - train_mode=True # True for training +env.reset(arenas_configurations_input=arena_config, # A new ArenaConfig to use for reset, leave empty to use the last one provided + train_mode=True # True for training ) ``` diff --git a/train.py b/train.py index cc3c0ed1..b291b582 100644 --- a/train.py +++ b/train.py @@ -60,7 +60,6 @@ def init_environment(env_path, docker_target_name, no_graphics, worker_id, seed) worker_id=worker_id, seed=seed, docker_training=docker_training, - no_graphics=no_graphics, play=False ) diff --git a/visualizeArena.py b/visualizeArena.py index 4d06ac91..ddc7a5bd 100644 --- a/visualizeArena.py +++ b/visualizeArena.py @@ -26,7 +26,6 @@ def init_environment(env_path, docker_target_name, no_graphics, worker_id, seed) worker_id=worker_id, seed=seed, docker_training=docker_training, - no_graphics=no_graphics, play=True ) diff --git a/visualizeLightsOff.py b/visualizeLightsOff.py index 46e6ae2e..0d941c39 100644 --- a/visualizeLightsOff.py +++ b/visualizeLightsOff.py @@ -31,7 +31,6 @@ worker_id=worker_id, seed=seed, docker_training=docker_training, - no_graphics=no_graphics, play=False ) @@ -60,7 +59,7 @@ def run_step_imshow(step): try: - anim = animation.FuncAnimation(fig, run_step_imshow, init_func=initialize_animation, frames=100, interval=500) + anim = animation.FuncAnimation(fig, run_step_imshow, init_func=initialize_animation, frames=100, interval=50) plt.show() except KeyboardInterrupt: env.close()