Skip to content

Commit

Permalink
adding the trained checkpoint
Browse files Browse the repository at this point in the history
  • Loading branch information
abaybektursun committed Jun 2, 2018
1 parent 5274983 commit ca82ad3
Show file tree
Hide file tree
Showing 8 changed files with 49 additions and 172 deletions.
9 changes: 4 additions & 5 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ EXPOSE 10080
RUN apt-get update

# Install software
RUN apt-get install -y git
RUN apt-get install -y git

# Clone the conf files into the docker container
RUN git clone https://github.com/MultiAgentLearning/playground
Expand All @@ -23,14 +23,13 @@ RUN pip install ./playground/.

RUN pip install --trusted-host pypi.python.org -r ./playground/requirements.txt
RUN pip install --trusted-host pypi.python.org -r ./playground/requirements_extra.txt
RUN pip install --trusted-host pypi.python.org tensorflow-gpu
RUN pip install --trusted-host pypi.python.org tensorflow
RUN pip install --trusted-host pypi.python.org matplotlib
RUN apt-get install -y python3-matplotlib

COPY . /pomm
# Run app.py when the container launches
WORKDIR /RNN_agent

RUN echo `ls`
WORKDIR /pomm/RNN_agent

ENTRYPOINT ["python"]
CMD ["run.py"]
Expand Down
Binary file added RNN_agent/model/-30332.data-00000-of-00001
Binary file not shown.
Binary file added RNN_agent/model/-30332.index
Binary file not shown.
Binary file added RNN_agent/model/-30332.meta
Binary file not shown.
6 changes: 6 additions & 0 deletions RNN_agent/model/checkpoint
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
model_checkpoint_path: "-30332"
all_model_checkpoint_paths: "-29932"
all_model_checkpoint_paths: "-30032"
all_model_checkpoint_paths: "-30132"
all_model_checkpoint_paths: "-30232"
all_model_checkpoint_paths: "-30332"
16 changes: 9 additions & 7 deletions RNN_agent/rnn_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def policy_network(self, states):

def reinforce(self,
init_exp=0.5, # initial exploration prob
anneal_steps=10000, # N steps for annealing exploration
anneal_steps=30000, # N steps for annealing exploration
final_exp=0.0, # final exploration prob
summary_writer=None,
summary_every=100
Expand Down Expand Up @@ -178,8 +178,8 @@ def resetModel(self):
def create_variables(self):
with tf.name_scope("C_inputs"):
# raw state representation
#self.states = tf.placeholder(tf.float32, (None, self.state_dim), name="states")
self.states = tf.placeholder_with_default( tf.zeros((1, self.state_dim)), (None, self.state_dim))
if self.model_training == 'C': self.states = tf.placeholder(tf.float32, (None, self.state_dim), name="states")
else: self.states = tf.placeholder_with_default( tf.zeros((1, self.state_dim)), (None, self.state_dim))

# rollout action based on current policy
with tf.name_scope("C_predict_actions"):
Expand All @@ -199,10 +199,12 @@ def create_variables(self):
# compute loss and gradients
with tf.name_scope("compute_pg_gradients"):
# gradients for selecting action from policy network
#self.taken_actions = tf.placeholder(tf.int32, (None,), name="taken_actions")
#self.discounted_rewards = tf.placeholder(tf.float32, (None,), name="discounted_rewards")
self.taken_actions = tf.placeholder_with_default( tf.zeros((1, ), dtype=tf.int32), (None, ) )
self.discounted_rewards = tf.placeholder_with_default( tf.zeros((1, )), (None,) )
if self.model_training == 'C':
self.taken_actions = tf.placeholder(tf.int32, (None,), name="taken_actions")
self.discounted_rewards = tf.placeholder(tf.float32, (None,), name="discounted_rewards")
else:
self.taken_actions = tf.placeholder_with_default( tf.zeros((1, ), dtype=tf.int32), (None, ) )
self.discounted_rewards = tf.placeholder_with_default( tf.zeros((1, )), (None,) )

with tf.variable_scope("policy_network", reuse=True):
self.logprobs = self.policy_network(self.states)
Expand Down
4 changes: 2 additions & 2 deletions RNN_agent/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ def act(self, observation, action_space):


if __name__ == '__main__':
model = 'model'
model = 'model/'

agent = MyAgent('models/'+model+'/' )
agent = MyAgent()
agent.run()


Expand Down
186 changes: 28 additions & 158 deletions RNN_agent/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,19 @@

import gc
import os

from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
from random import shuffle
from datetime import datetime
from collections import deque

import tensorflow as tf

import easy_tf_log
from easy_tf_log import tflog
easy_tf_log.set_dir('tboard/')

from pommerman import agents

Expand Down Expand Up @@ -134,7 +145,7 @@ def train_M(epochs, save_file_nm, chk_point_folder, load_model=None):

# Train the controller --------------------------------------------------------------------------------------
#------------------------------------------------------------------------------------------------------------
def train_C_generate_data(EPISODES, save_file_nm, chk_point_folder, sess_save_step=100, load_model=None, shuffle_agents=False, record=False, plot_reward=False, add_agents=[agents.SimpleAgent(), agents.RandomAgent(), agents.SimpleAgent()], encourage_win=False):
def train_C_generate_data(EPISODES, save_file_nm, chk_point_folder, sess_save_step=100, load_model=None, shuffle_agents=False, record=False, plot_reward=False, add_agents=[agents.SimpleAgent(), agents.RandomAgent(), agents.SimpleAgent()], encourage_win=False, learn=True):
if plot_reward:
plt.xlabel('Episode #')
plt.ylabel('Average reward for last 100 episodes')
Expand Down Expand Up @@ -170,9 +181,9 @@ def train_C_generate_data(EPISODES, save_file_nm, chk_point_folder, sess_save_st

agent_list = [rnn_agent] + add_agents

if shuffle_agents: shuffle(agent_list)
rnn_agent_index = agent_list.index(rnn_agent)

if shuffle_agents: shuffle(agent_list)
env = pommerman.make('PommeFFACompetition-v0', agent_list)

mean_rewards_list = []
Expand Down Expand Up @@ -204,9 +215,9 @@ def train_C_generate_data(EPISODES, save_file_nm, chk_point_folder, sess_save_st
if not encourage_win:
reward[rnn_agent_index] = reward[rnn_agent_index] if not rnn_agent.is_alive else 0.1
else:
reward[rnn_agent_index] = reward[rnn_agent_index] if not rnn_agent.is_alive else 0.03
reward[rnn_agent_index] = reward[rnn_agent_index] if not rnn_agent.is_alive else 0.09
if encourage_win and done and 'winners' in info:
reward[rnn_agent_index] = 10 if info['winners'][0] == rnn_agent_index else -10
reward[rnn_agent_index] = 5 if info['winners'][0] == rnn_agent_index else -5
#print("t: {} \t reward: {}\t Agent alive: {}".format(t, reward[rnn_agent_index], rnn_agent.is_alive) )

total_rewards += reward[rnn_agent_index]
Expand All @@ -221,10 +232,11 @@ def train_C_generate_data(EPISODES, save_file_nm, chk_point_folder, sess_save_st
other_wins.append(1 if info['winners'][0] != rnn_agent_index else 0)
wins_ratio = np.mean(other_wins)/np.mean(rnn_wins)
tflog('Other wins/agent wins ratio (100 wins)', wins_ratio)
#print('Other wins/agent wins ratio (100 wins)', wins_ratio)

ties.append(1 if 'Tie' in info else 0)
tie_ratio = np.mean(ties)/np.mean(rnn_wins)
tflog('ties/agent wins ratio (100 steps)', tie_ratio)
#tflog('ties/agent wins ratio (100 steps)', tie_ratio)



Expand All @@ -242,12 +254,12 @@ def train_C_generate_data(EPISODES, save_file_nm, chk_point_folder, sess_save_st
print("Reward for this episode: {}".format(total_rewards))
print("Average reward for last 100 episodes: {:.2f}".format(mean_rewards))
mean_rewards_list.append(mean_rewards)
tflog('Iteration Number', rnn_agent.train_iteration)
#tflog('Iteration Number', rnn_agent.train_iteration)
tflog('Average reward for last 100 episodes', mean_rewards)

# Save the model
if i_episode % sess_save_step == 0:
saver.save(rnn_agent.sess, chk_point_folder, global_step=rnn_agent.C_step)
if learn: saver.save(rnn_agent.sess, chk_point_folder, global_step=rnn_agent.C_step)
if record: dset.save()

# Plot rewards
Expand Down Expand Up @@ -279,136 +291,14 @@ def train_C_generate_data(EPISODES, save_file_nm, chk_point_folder, sess_save_st
lvl_ = "dataset_lvl{}.pickle"
lvl = ''
lvl_prev = ''
lvl1 = "dataset_lvl1.pickle"
# Random Actions
# Agent positions are shuffled
lvl2 = "dataset_lvl2.pickle"
# Data is generated while training controller
# Agents at same positions
lvl3 = "dataset_lvl3.pickle"
#
lvl4 = "dataset_lvl4.pickle"
#
lvl5 = "dataset_lvl5.pickle"
#
lvl6 = "dataset_lvl6.pickle"
#
lvl7 = "dataset_lvl7.pickle"
lvl8 = "dataset_lvl8.pickle"
lvl9 = "dataset_lvl9.pickle"
lvl10 = "dataset_lvl10.pickle"
lvl11 = "dataset_lvl11.pickle"
lvl12 = "dataset_lvl12.pickle"
lvl13 = "dataset_lvl13.pickle"
lvl14 = "dataset_lvl14.pickle"
lvl15 = "dataset_lvl15.pickle"
lvl16 = "dataset_lvl16.pickle"
lvl17 = "dataset_lvl17.pickle"
lvl18 = "dataset_lvl18.pickle"
lvl19 = "dataset_lvl19.pickle"

# Level 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#print('-'*150); print('*'*90); print("Generating dataset ", lvl1); print('*'*90);
#generate_data(400, lvl1)

#print('-'*150); print('*'*90); print("Training M (RNN) on dataset ", lvl1); print('*'*90);
#train_M(10, lvl1, models + lvl1 + '/')

# Level 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
'''pbar = tqdm(total=1*10)
for _ in range(1):
print('-'*150); print('*'*90); print("Generating dataset ", lvl2); print('*'*90);
generate_data(10, lvl2, shuffle_agents=True)
pbar.update(10)
gc.collect()
pbar.close()
'''

#print('-'*150); print('*'*90); print("Training M (RNN) on dataset ", lvl2); print('*'*90);
#train_M(10, lvl2, models + lvl2 + '/', load_model=lvl1)

#print('-'*150); print('*'*90); print("Training M (RNN) on dataset ", lvl2); print('*'*90);
#train_M(7, lvl2, models+lvl2+'/', load_model=models+lvl2+'/')

# Level 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#train_C_generate_data(3000, lvl3, models + lvl3 + '/', plot_reward=False, add_agents=[agents.RandomAgent(), agents.SimpleAgent()])

# Level 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#train_C_generate_data(1000, lvl4, models + lvl4 + '/', load_model=models+lvl3+'/', record=True, add_agents=[agents.RandomAgent(), agents.SimpleAgent()])

# Level 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#train_M(5, lvl4, models+lvl5+'/', load_model=models+lvl4+'/')

# Level 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ hory shit really good
#train_C_generate_data(2000, lvl6, models + lvl6 + '/', load_model=models+lvl5+'/', shuffle_agents=True, add_agents=[agents.RandomAgent(), agents.SimpleAgent()])

# Level 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pretty good!
#train_C_generate_data(1000, lvl7, models + lvl7 + '/', load_model=models+lvl6+'/', shuffle_agents=True, record=True, add_agents=[agents.RandomAgent(), agents.SimpleAgent()])

# Level 8 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#train_M(10, lvl4, models+lvl8+'/', load_model=models+lvl7+'/')

# Level 9 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ oblitirated simple agent
#train_C_generate_data(600, lvl9, models + lvl9 + '/', load_model=models+lvl8+'/', shuffle_agents=True, record=True, add_agents=[agents.SimpleAgent()])

# Level 10 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#train_M(20, lvl9, models+lvl10+'/', load_model=models+lvl9+'/')

# Level 11 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#train_C_generate_data(1000, lvl11, models + lvl11 + '/', load_model=models+lvl10+'/', shuffle_agents=True, record=True, add_agents=[agents.SimpleAgent(), agents.SimpleAgent()],encourage_win = True )
# Level 12 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#train_M(20, lvl11, models+lvl12+'/', load_model=models+lvl11+'/')
# Level 13 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#train_C_generate_data(1000, lvl13, models + lvl13 + '/', load_model=models+lvl12+'/', shuffle_agents=True, record=True, add_agents=[agents.RandomAgent(), agents.SimpleAgent(), agents.SimpleAgent()],encourage_win = True )
# Level 14 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#train_M(20, lvl13, models+lvl14+'/', load_model=models+lvl13+'/')
# Level 15 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#train_C_generate_data(1000, lvl15, models + lvl15 + '/', load_model=models+lvl14+'/', shuffle_agents=True, record=True, add_agents=[agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent()],encourage_win = True )
# Level 16 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#train_M(20, lvl15, models+lvl16+'/', load_model=models+lvl15+'/')



#curr_lev = 16;
##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#curr_lev += 1
#lvl = lvl_.format(curr_lev)
#lvl_prev = lvl_.format(curr_lev-1)
#print('Level: ', curr_lev, '~~'*70)

#train_C_generate_data(1500, lvl, models + lvl + '/', load_model=models + lvl_prev +'/', shuffle_agents=True, record=True, add_agents=[agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent()],encourage_win = True )

#curr_lev += 1
#lvl = lvl_.format(curr_lev)
#lvl_prev = lvl_.format(curr_lev-1)
#print('Level: ', curr_lev, '~~'*70)

#train_M(30, lvl_prev, models+lvl+'/', load_model=models+lvl_prev+'/')
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


#curr_lev = 18;
##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#curr_lev += 1
#lvl = lvl_.format(curr_lev)
#lvl_prev = lvl_.format(curr_lev-1)
#print('Level: ', curr_lev, '~~'*70)

#train_C_generate_data(1500, lvl, models + lvl + '/', load_model=models + lvl_prev +'/', shuffle_agents=True, record=True, add_agents=[agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent()],encourage_win = True )
#
#curr_lev += 1
#lvl = lvl_.format(curr_lev)
#lvl_prev = lvl_.format(curr_lev-1)
#print('Level: ', curr_lev, '~~'*70)
#
#train_M(30, lvl_prev, models+lvl+'/', load_model=models+lvl_prev+'/')
#train_M(30, lvl_prev, models+lvl+'/', load_model=models+lvl_prev+'/')
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
def levelup():
global curr_lev; global lvl; global lvl_prev; global lvl_
curr_lev += 1
lvl = lvl_.format(curr_lev)
lvl_prev = lvl_.format(curr_lev-1)
print('Level: ', curr_lev, '~~'*70)
print('Level: ', lvl, '~~'*70)


def create_enemy(load_folder):
Expand All @@ -434,34 +324,14 @@ def create_enemy(load_folder):
return enemy


## ! SELF PLAY !##############################################################################################################################################
##############################################################################################################################################################
curr_lev = 20
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
levelup()
enemy = create_enemy(models+lvl_prev+'/')
train_C_generate_data(1000, lvl, models+lvl+'/', load_model=models+lvl_prev+'/', shuffle_agents=True, record=True, add_agents=[enemy],encourage_win = True )
enemy.sess.close()
levelup()
train_M(30, lvl_prev, models+lvl+'/', load_model=models+lvl_prev+'/')
##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


curr_lev = 38
##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
levelup()
enemy = create_enemy(models+lvl_prev+'/')
train_C_generate_data(1000, lvl, models+lvl+'/', load_model=models+lvl_prev+'/', shuffle_agents=True, record=True, add_agents=[enemy],encourage_win = True )
enemy.sess.close()
levelup()
train_M(30, lvl_prev, models+lvl+'/', load_model=models+lvl_prev+'/')
#levelup()
#train_C_generate_data(1000, lvl, models+lvl+'/', load_model=models+lvl_prev+'/', shuffle_agents=True, record=False, add_agents=[agents.SimpleAgent(), agents.RandomAgent()],encourage_win =True)
##levelup()
#train_M(10, lvl_prev, models+lvl+'/', load_model=models+lvl_prev+'/')
##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
levelup()
enemy = create_enemy(models+lvl_prev+'/')
train_C_generate_data(1000, lvl, models+lvl+'/', load_model=models+lvl_prev+'/', shuffle_agents=True, record=True, add_agents=[enemy],encourage_win = True )
enemy.sess.close()
levelup()
train_M(30, lvl_prev, models+lvl+'/', load_model=models+lvl_prev+'/')
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

levelup()
train_C_generate_data(200, '', models+'blank', load_model='model/', shuffle_agents=True, learn=False, record=False, add_agents=[agents.SimpleAgent(), agents.RandomAgent()])

0 comments on commit ca82ad3

Please sign in to comment.