AAAI-2019

jaromiru · Nov 6, 2018 · 26de7ef · 26de7ef
1 parent 250bba7
commit 26de7ef
Show file tree

Hide file tree

Showing 52 changed files with 1,666 additions and 1,505 deletions.
diff --git a/README.md b/README.md
@@ -1,15 +1,15 @@
-This is a source code for paper *Classification with Costly Features using Deep Reinforcement Learning* wrote by *Jaromír Janisch*, *Tomáš Pevný* and *Viliam Lisý*, available at https://arxiv.org/abs/1711.07364.
+This is a source code for AAAI 2019 paper *Classification with Costly Features using Deep Reinforcement Learning* wrote by *Jaromír Janisch*, *Tomáš Pevný* and *Viliam Lisý*, available at https://arxiv.org/abs/1711.07364.
 
 **Prerequisites:**
 - cuda capable hardware
 - ubuntu 16.04
 - cuda 8/9
-- python 3.6 (numpy, pandas, pytorch)
+- python 3.6 (numpy, pandas, pytorch 0.4)
 
 **Usage:**
-- use tools `tools/conv_*.py` to prepare datasets; read the headers of those files
-- select a dataset to use and copy corresponding file from `consts-template` to `const.py`
-- run `python3.6 main.py`
-- the run will create multiple log files
+- use tools `tools/conv_*.py` to prepare datasets; read the headers of those files; data is expected to be in `../data`
+- pretrained HPC models are in `trained_hpc`, or you can use `tools/hpc_svm.py` to recreate them; they are needed in `../data`
+- run `python3.6 main.py --dataset [dataset] --flambda [lambda] --use_hpc [0|1] --pretrain [0|1]`, choose `dataset` from `config_datasets/`
+- the run will create multiple log files `run*.dat`
 - you can use octave or matlab to analyze them with `tools/debug.m`
-- you can also evaluate the agent on the test set with `tools/eval_*.py`
+- you can also evaluate the agent on the test set with `eval.py --dataset [dataset] --flambda [lambda]`
diff --git a/agent.py b/agent.py
@@ -1,52 +1,123 @@
 import numpy as np
-from env import Environment
-from consts import *
+from config import config
+
+all_agents = np.arange(config.AGENTS)
 
 class Agent():
-	def __init__(self, env, pool, brain):
-		self.env  = env
-		self.pool = pool
-		self.brain = brain
+    def __init__(self, env, pool, brain):
+        self.env  = env
+        self.pool = pool
+        self.brain = brain
+
+        self.epsilon = config.EPSILON_START
+
+        self.idx = np.zeros(config.AGENTS, dtype=np.int32)
+        self.S   = np.zeros((config.AGENTS, config.FEATURE_DIM+1, 2, config.FEATURE_DIM), dtype=np.float32)
+        self.A   = np.zeros((config.AGENTS, config.FEATURE_DIM+1), dtype=np.int64)
+        self.R   = np.zeros((config.AGENTS, config.FEATURE_DIM+1), dtype=np.float32)
+        self.U   = np.zeros((config.AGENTS, config.FEATURE_DIM+1), dtype=np.float32)
+        self.NA  = np.zeros((config.AGENTS, config.FEATURE_DIM+1, config.ACTION_DIM), dtype=np.bool)
+
+        s, na = self.env.reset()
+        self.S[all_agents, self.idx] = s
+        self.NA[all_agents, self.idx] = na
+
+    def act(self, s, na):
+        q = self.brain.predict_np(s)
+        p = q - config.MAX_MASK_CONST * na 	# select an action not considering those already performed
+        a = np.argmax(p, axis=1)
+
+        rand_agents = np.random.rand(config.AGENTS) < self.epsilon
+        rand_number = np.random.rand(config.AGENTS)					# rand() call is expensive, better to do it at once
+
+        possible_actions_count = config.ACTION_DIM - np.sum(na, axis=1)
+        u = (1 - self.epsilon) + (self.epsilon / possible_actions_count)
+
+        for i in range(config.AGENTS):
+            if rand_agents[i]:  # random action
+                possible_actions = np.where( na[i] == False )[0]    # select a random action, don't repeat an action
+
+                w = int(rand_number[i] * possible_actions_count[i])
+                a_ = possible_actions[w]
+
+                if a[i] == a_:
+                    u[i] = (1 - self.epsilon) + (self.epsilon / possible_actions_count[i])  # randomly selected the maximizing action
+
+                else:
+                    a[i] = a_
+                    u[i] = self.epsilon / possible_actions_count[i]  # probability of taking a random action
+
+        return a, u
+
+    def step(self):
+        s = self.S[all_agents, self.idx]
+        na = self.NA[all_agents, self.idx]
+
+        a, u = self.act(s, na)
+        s_, r, na_, done, info = self.env.step(a)
+
+        self.A[all_agents, self.idx] = a
+        self.R[all_agents, self.idx] = r
+        self.U[all_agents, self.idx] = u
+
+        for i in np.where(done)[0]:     # truncate & store the finished episode i
+            idx = self.idx[i]+1
+
+            _s = self.S[i, :idx].copy()
+            _a = self.A[i, :idx].copy()
+            _r = self.R[i, :idx].copy()
+            _u = self.U[i, :idx].copy()
+            _na = self.NA[i, :idx].copy()
+
+            # extract the true state
+            _x = np.broadcast_to(self.env.x[i].copy(), (idx, config.FEATURE_DIM))
+            _y = np.repeat(self.env.y[i], idx)
+
+            self.pool.put( (_s, _a, _r, _u, _na, _x, _y) )
+
+        self.idx = (done == 0) * (self.idx + 1)     # advance idx by 1 and reset to 0 for finished episodes
+
+        self.NA[all_agents, self.idx] = na_     # unavailable actions
+        self.S[all_agents,  self.idx] = s_
 
-		self.epsilon = EPSILON_START
-		self.s = self.env.reset()
+        return s, a, r, s_, done, info
 
-	def store(self, x):
-		self.pool.put(x)
+    def update_epsilon(self, epoch):
+        if epoch >= config.EPSILON_EPOCHS:
+            self.epsilon = config.EPSILON_END
+        else:
+            self.epsilon = config.EPSILON_START + epoch * (config.EPSILON_END - config.EPSILON_START) / config.EPSILON_EPOCHS
 
-	def act(self, s):
-		m = np.zeros((AGENTS, ACTION_DIM))	# create max_mask
-		m[:, CLASSES:] = s[:, FEATURE_DIM:]
+class PerfAgent(Agent):
+    def __init__(self, env, brain):
+        self.env  = env
+        self.brain = brain
 
-		if self.epsilon < 1.0:
-			p = self.brain.predict_np(s) - MAX_MASK_CONST * m 	# select an action not considering those already performed
-			a = np.argmax(p, axis=1)
-		else:
-			a = np.zeros(AGENTS, dtype=np.int32)
+        self.idx = np.zeros(config.AGENTS, dtype=np.int32)
+        self.S   = np.zeros((config.AGENTS, config.FEATURE_DIM+1, 2, config.FEATURE_DIM), dtype=np.float32)
+        self.NA  = np.zeros((config.AGENTS, config.FEATURE_DIM+1, config.ACTION_DIM), dtype=np.bool)
 
-		# override with random action
-		rand_agents = np.where( np.random.rand(AGENTS) < self.epsilon )[0]
-		rand_number = np.random.rand(len(rand_agents))
+        s, na = self.env.reset()
+        self.S[all_agents, self.idx] = s
+        self.NA[all_agents, self.idx] = na
 
-		for i in range(len(rand_agents)):
-			agent = rand_agents[i]
+    def act(self, s, na):
+        q = self.brain.predict_np(s)
+        p = q - config.MAX_MASK_CONST * na 	# select an action not considering those already performed
+        a = np.argmax(p, axis=1)
 
-			possible_actions = np.where( m[agent] == 0. )[0] 	# select a random action, don't repeat an action
-			w = int(rand_number[i] * len(possible_actions))
-			a[agent] = possible_actions[w]
+        return a, 1.0
 
-		return a
+    def step(self):
+        s = self.S[all_agents, self.idx]
+        na = self.NA[all_agents, self.idx]
 
-	def step(self):
-		a = self.act(self.s)
-		s_, r = self.env.step(a)
+        a, u = self.act(s, na)
+        s_, r, na_, done, info = self.env.step(a)
 
-		self.store( (self.s, a, r, s_) )
+        self.idx = (done == 0) * (self.idx + 1)     # advance idx by 1 and reset to 0 for finished episodes
 
-		self.s = s_
+        self.NA[all_agents, self.idx] = na_         # unavailable actions
+        self.S[all_agents, self.idx] = s_
 
-	def update_epsilon(self, epoch):
-		if epoch >= EPSILON_EPOCHS:
-			self.epsilon = EPSILON_END
-		else:
-			self.epsilon = EPSILON_START + epoch * (EPSILON_END - EPSILON_START) / EPSILON_EPOCHS
+        return s, a, r, s_, done, info