Skip to content

Commit

Permalink
AAAI-2019
Browse files Browse the repository at this point in the history
  • Loading branch information
jaromiru committed Nov 6, 2018
1 parent 250bba7 commit 26de7ef
Show file tree
Hide file tree
Showing 52 changed files with 1,666 additions and 1,505 deletions.
14 changes: 7 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
This is a source code for paper *Classification with Costly Features using Deep Reinforcement Learning* wrote by *Jaromír Janisch*, *Tomáš Pevný* and *Viliam Lisý*, available at https://arxiv.org/abs/1711.07364.
This is a source code for AAAI 2019 paper *Classification with Costly Features using Deep Reinforcement Learning* wrote by *Jaromír Janisch*, *Tomáš Pevný* and *Viliam Lisý*, available at https://arxiv.org/abs/1711.07364.

**Prerequisites:**
- cuda capable hardware
- ubuntu 16.04
- cuda 8/9
- python 3.6 (numpy, pandas, pytorch)
- python 3.6 (numpy, pandas, pytorch 0.4)

**Usage:**
- use tools `tools/conv_*.py` to prepare datasets; read the headers of those files
- select a dataset to use and copy corresponding file from `consts-template` to `const.py`
- run `python3.6 main.py`
- the run will create multiple log files
- use tools `tools/conv_*.py` to prepare datasets; read the headers of those files; data is expected to be in `../data`
- pretrained HPC models are in `trained_hpc`, or you can use `tools/hpc_svm.py` to recreate them; they are needed in `../data`
- run `python3.6 main.py --dataset [dataset] --flambda [lambda] --use_hpc [0|1] --pretrain [0|1]`, choose `dataset` from `config_datasets/`
- the run will create multiple log files `run*.dat`
- you can use octave or matlab to analyze them with `tools/debug.m`
- you can also evaluate the agent on the test set with `tools/eval_*.py`
- you can also evaluate the agent on the test set with `eval.py --dataset [dataset] --flambda [lambda]`
145 changes: 108 additions & 37 deletions agent.py
Original file line number Diff line number Diff line change
@@ -1,52 +1,123 @@
import numpy as np
from env import Environment
from consts import *
from config import config

all_agents = np.arange(config.AGENTS)

class Agent():
def __init__(self, env, pool, brain):
self.env = env
self.pool = pool
self.brain = brain
def __init__(self, env, pool, brain):
self.env = env
self.pool = pool
self.brain = brain

self.epsilon = config.EPSILON_START

self.idx = np.zeros(config.AGENTS, dtype=np.int32)
self.S = np.zeros((config.AGENTS, config.FEATURE_DIM+1, 2, config.FEATURE_DIM), dtype=np.float32)
self.A = np.zeros((config.AGENTS, config.FEATURE_DIM+1), dtype=np.int64)
self.R = np.zeros((config.AGENTS, config.FEATURE_DIM+1), dtype=np.float32)
self.U = np.zeros((config.AGENTS, config.FEATURE_DIM+1), dtype=np.float32)
self.NA = np.zeros((config.AGENTS, config.FEATURE_DIM+1, config.ACTION_DIM), dtype=np.bool)

s, na = self.env.reset()
self.S[all_agents, self.idx] = s
self.NA[all_agents, self.idx] = na

def act(self, s, na):
q = self.brain.predict_np(s)
p = q - config.MAX_MASK_CONST * na # select an action not considering those already performed
a = np.argmax(p, axis=1)

rand_agents = np.random.rand(config.AGENTS) < self.epsilon
rand_number = np.random.rand(config.AGENTS) # rand() call is expensive, better to do it at once

possible_actions_count = config.ACTION_DIM - np.sum(na, axis=1)
u = (1 - self.epsilon) + (self.epsilon / possible_actions_count)

for i in range(config.AGENTS):
if rand_agents[i]: # random action
possible_actions = np.where( na[i] == False )[0] # select a random action, don't repeat an action

w = int(rand_number[i] * possible_actions_count[i])
a_ = possible_actions[w]

if a[i] == a_:
u[i] = (1 - self.epsilon) + (self.epsilon / possible_actions_count[i]) # randomly selected the maximizing action

else:
a[i] = a_
u[i] = self.epsilon / possible_actions_count[i] # probability of taking a random action

return a, u

def step(self):
s = self.S[all_agents, self.idx]
na = self.NA[all_agents, self.idx]

a, u = self.act(s, na)
s_, r, na_, done, info = self.env.step(a)

self.A[all_agents, self.idx] = a
self.R[all_agents, self.idx] = r
self.U[all_agents, self.idx] = u

for i in np.where(done)[0]: # truncate & store the finished episode i
idx = self.idx[i]+1

_s = self.S[i, :idx].copy()
_a = self.A[i, :idx].copy()
_r = self.R[i, :idx].copy()
_u = self.U[i, :idx].copy()
_na = self.NA[i, :idx].copy()

# extract the true state
_x = np.broadcast_to(self.env.x[i].copy(), (idx, config.FEATURE_DIM))
_y = np.repeat(self.env.y[i], idx)

self.pool.put( (_s, _a, _r, _u, _na, _x, _y) )

self.idx = (done == 0) * (self.idx + 1) # advance idx by 1 and reset to 0 for finished episodes

self.NA[all_agents, self.idx] = na_ # unavailable actions
self.S[all_agents, self.idx] = s_

self.epsilon = EPSILON_START
self.s = self.env.reset()
return s, a, r, s_, done, info

def store(self, x):
self.pool.put(x)
def update_epsilon(self, epoch):
if epoch >= config.EPSILON_EPOCHS:
self.epsilon = config.EPSILON_END
else:
self.epsilon = config.EPSILON_START + epoch * (config.EPSILON_END - config.EPSILON_START) / config.EPSILON_EPOCHS

def act(self, s):
m = np.zeros((AGENTS, ACTION_DIM)) # create max_mask
m[:, CLASSES:] = s[:, FEATURE_DIM:]
class PerfAgent(Agent):
def __init__(self, env, brain):
self.env = env
self.brain = brain

if self.epsilon < 1.0:
p = self.brain.predict_np(s) - MAX_MASK_CONST * m # select an action not considering those already performed
a = np.argmax(p, axis=1)
else:
a = np.zeros(AGENTS, dtype=np.int32)
self.idx = np.zeros(config.AGENTS, dtype=np.int32)
self.S = np.zeros((config.AGENTS, config.FEATURE_DIM+1, 2, config.FEATURE_DIM), dtype=np.float32)
self.NA = np.zeros((config.AGENTS, config.FEATURE_DIM+1, config.ACTION_DIM), dtype=np.bool)

# override with random action
rand_agents = np.where( np.random.rand(AGENTS) < self.epsilon )[0]
rand_number = np.random.rand(len(rand_agents))
s, na = self.env.reset()
self.S[all_agents, self.idx] = s
self.NA[all_agents, self.idx] = na

for i in range(len(rand_agents)):
agent = rand_agents[i]
def act(self, s, na):
q = self.brain.predict_np(s)
p = q - config.MAX_MASK_CONST * na # select an action not considering those already performed
a = np.argmax(p, axis=1)

possible_actions = np.where( m[agent] == 0. )[0] # select a random action, don't repeat an action
w = int(rand_number[i] * len(possible_actions))
a[agent] = possible_actions[w]
return a, 1.0

return a
def step(self):
s = self.S[all_agents, self.idx]
na = self.NA[all_agents, self.idx]

def step(self):
a = self.act(self.s)
s_, r = self.env.step(a)
a, u = self.act(s, na)
s_, r, na_, done, info = self.env.step(a)

self.store( (self.s, a, r, s_) )
self.idx = (done == 0) * (self.idx + 1) # advance idx by 1 and reset to 0 for finished episodes

self.s = s_
self.NA[all_agents, self.idx] = na_ # unavailable actions
self.S[all_agents, self.idx] = s_

def update_epsilon(self, epoch):
if epoch >= EPSILON_EPOCHS:
self.epsilon = EPSILON_END
else:
self.epsilon = EPSILON_START + epoch * (EPSILON_END - EPSILON_START) / EPSILON_EPOCHS
return s, a, r, s_, done, info
Loading

0 comments on commit 26de7ef

Please sign in to comment.