-
Notifications
You must be signed in to change notification settings - Fork 23
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
52 changed files
with
1,666 additions
and
1,505 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,15 @@ | ||
This is a source code for paper *Classification with Costly Features using Deep Reinforcement Learning* wrote by *Jaromír Janisch*, *Tomáš Pevný* and *Viliam Lisý*, available at https://arxiv.org/abs/1711.07364. | ||
This is a source code for AAAI 2019 paper *Classification with Costly Features using Deep Reinforcement Learning* wrote by *Jaromír Janisch*, *Tomáš Pevný* and *Viliam Lisý*, available at https://arxiv.org/abs/1711.07364. | ||
|
||
**Prerequisites:** | ||
- cuda capable hardware | ||
- ubuntu 16.04 | ||
- cuda 8/9 | ||
- python 3.6 (numpy, pandas, pytorch) | ||
- python 3.6 (numpy, pandas, pytorch 0.4) | ||
|
||
**Usage:** | ||
- use tools `tools/conv_*.py` to prepare datasets; read the headers of those files | ||
- select a dataset to use and copy corresponding file from `consts-template` to `const.py` | ||
- run `python3.6 main.py` | ||
- the run will create multiple log files | ||
- use tools `tools/conv_*.py` to prepare datasets; read the headers of those files; data is expected to be in `../data` | ||
- pretrained HPC models are in `trained_hpc`, or you can use `tools/hpc_svm.py` to recreate them; they are needed in `../data` | ||
- run `python3.6 main.py --dataset [dataset] --flambda [lambda] --use_hpc [0|1] --pretrain [0|1]`, choose `dataset` from `config_datasets/` | ||
- the run will create multiple log files `run*.dat` | ||
- you can use octave or matlab to analyze them with `tools/debug.m` | ||
- you can also evaluate the agent on the test set with `tools/eval_*.py` | ||
- you can also evaluate the agent on the test set with `eval.py --dataset [dataset] --flambda [lambda]` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,52 +1,123 @@ | ||
import numpy as np | ||
from env import Environment | ||
from consts import * | ||
from config import config | ||
|
||
all_agents = np.arange(config.AGENTS) | ||
|
||
class Agent(): | ||
def __init__(self, env, pool, brain): | ||
self.env = env | ||
self.pool = pool | ||
self.brain = brain | ||
def __init__(self, env, pool, brain): | ||
self.env = env | ||
self.pool = pool | ||
self.brain = brain | ||
|
||
self.epsilon = config.EPSILON_START | ||
|
||
self.idx = np.zeros(config.AGENTS, dtype=np.int32) | ||
self.S = np.zeros((config.AGENTS, config.FEATURE_DIM+1, 2, config.FEATURE_DIM), dtype=np.float32) | ||
self.A = np.zeros((config.AGENTS, config.FEATURE_DIM+1), dtype=np.int64) | ||
self.R = np.zeros((config.AGENTS, config.FEATURE_DIM+1), dtype=np.float32) | ||
self.U = np.zeros((config.AGENTS, config.FEATURE_DIM+1), dtype=np.float32) | ||
self.NA = np.zeros((config.AGENTS, config.FEATURE_DIM+1, config.ACTION_DIM), dtype=np.bool) | ||
|
||
s, na = self.env.reset() | ||
self.S[all_agents, self.idx] = s | ||
self.NA[all_agents, self.idx] = na | ||
|
||
def act(self, s, na): | ||
q = self.brain.predict_np(s) | ||
p = q - config.MAX_MASK_CONST * na # select an action not considering those already performed | ||
a = np.argmax(p, axis=1) | ||
|
||
rand_agents = np.random.rand(config.AGENTS) < self.epsilon | ||
rand_number = np.random.rand(config.AGENTS) # rand() call is expensive, better to do it at once | ||
|
||
possible_actions_count = config.ACTION_DIM - np.sum(na, axis=1) | ||
u = (1 - self.epsilon) + (self.epsilon / possible_actions_count) | ||
|
||
for i in range(config.AGENTS): | ||
if rand_agents[i]: # random action | ||
possible_actions = np.where( na[i] == False )[0] # select a random action, don't repeat an action | ||
|
||
w = int(rand_number[i] * possible_actions_count[i]) | ||
a_ = possible_actions[w] | ||
|
||
if a[i] == a_: | ||
u[i] = (1 - self.epsilon) + (self.epsilon / possible_actions_count[i]) # randomly selected the maximizing action | ||
|
||
else: | ||
a[i] = a_ | ||
u[i] = self.epsilon / possible_actions_count[i] # probability of taking a random action | ||
|
||
return a, u | ||
|
||
def step(self): | ||
s = self.S[all_agents, self.idx] | ||
na = self.NA[all_agents, self.idx] | ||
|
||
a, u = self.act(s, na) | ||
s_, r, na_, done, info = self.env.step(a) | ||
|
||
self.A[all_agents, self.idx] = a | ||
self.R[all_agents, self.idx] = r | ||
self.U[all_agents, self.idx] = u | ||
|
||
for i in np.where(done)[0]: # truncate & store the finished episode i | ||
idx = self.idx[i]+1 | ||
|
||
_s = self.S[i, :idx].copy() | ||
_a = self.A[i, :idx].copy() | ||
_r = self.R[i, :idx].copy() | ||
_u = self.U[i, :idx].copy() | ||
_na = self.NA[i, :idx].copy() | ||
|
||
# extract the true state | ||
_x = np.broadcast_to(self.env.x[i].copy(), (idx, config.FEATURE_DIM)) | ||
_y = np.repeat(self.env.y[i], idx) | ||
|
||
self.pool.put( (_s, _a, _r, _u, _na, _x, _y) ) | ||
|
||
self.idx = (done == 0) * (self.idx + 1) # advance idx by 1 and reset to 0 for finished episodes | ||
|
||
self.NA[all_agents, self.idx] = na_ # unavailable actions | ||
self.S[all_agents, self.idx] = s_ | ||
|
||
self.epsilon = EPSILON_START | ||
self.s = self.env.reset() | ||
return s, a, r, s_, done, info | ||
|
||
def store(self, x): | ||
self.pool.put(x) | ||
def update_epsilon(self, epoch): | ||
if epoch >= config.EPSILON_EPOCHS: | ||
self.epsilon = config.EPSILON_END | ||
else: | ||
self.epsilon = config.EPSILON_START + epoch * (config.EPSILON_END - config.EPSILON_START) / config.EPSILON_EPOCHS | ||
|
||
def act(self, s): | ||
m = np.zeros((AGENTS, ACTION_DIM)) # create max_mask | ||
m[:, CLASSES:] = s[:, FEATURE_DIM:] | ||
class PerfAgent(Agent): | ||
def __init__(self, env, brain): | ||
self.env = env | ||
self.brain = brain | ||
|
||
if self.epsilon < 1.0: | ||
p = self.brain.predict_np(s) - MAX_MASK_CONST * m # select an action not considering those already performed | ||
a = np.argmax(p, axis=1) | ||
else: | ||
a = np.zeros(AGENTS, dtype=np.int32) | ||
self.idx = np.zeros(config.AGENTS, dtype=np.int32) | ||
self.S = np.zeros((config.AGENTS, config.FEATURE_DIM+1, 2, config.FEATURE_DIM), dtype=np.float32) | ||
self.NA = np.zeros((config.AGENTS, config.FEATURE_DIM+1, config.ACTION_DIM), dtype=np.bool) | ||
|
||
# override with random action | ||
rand_agents = np.where( np.random.rand(AGENTS) < self.epsilon )[0] | ||
rand_number = np.random.rand(len(rand_agents)) | ||
s, na = self.env.reset() | ||
self.S[all_agents, self.idx] = s | ||
self.NA[all_agents, self.idx] = na | ||
|
||
for i in range(len(rand_agents)): | ||
agent = rand_agents[i] | ||
def act(self, s, na): | ||
q = self.brain.predict_np(s) | ||
p = q - config.MAX_MASK_CONST * na # select an action not considering those already performed | ||
a = np.argmax(p, axis=1) | ||
|
||
possible_actions = np.where( m[agent] == 0. )[0] # select a random action, don't repeat an action | ||
w = int(rand_number[i] * len(possible_actions)) | ||
a[agent] = possible_actions[w] | ||
return a, 1.0 | ||
|
||
return a | ||
def step(self): | ||
s = self.S[all_agents, self.idx] | ||
na = self.NA[all_agents, self.idx] | ||
|
||
def step(self): | ||
a = self.act(self.s) | ||
s_, r = self.env.step(a) | ||
a, u = self.act(s, na) | ||
s_, r, na_, done, info = self.env.step(a) | ||
|
||
self.store( (self.s, a, r, s_) ) | ||
self.idx = (done == 0) * (self.idx + 1) # advance idx by 1 and reset to 0 for finished episodes | ||
|
||
self.s = s_ | ||
self.NA[all_agents, self.idx] = na_ # unavailable actions | ||
self.S[all_agents, self.idx] = s_ | ||
|
||
def update_epsilon(self, epoch): | ||
if epoch >= EPSILON_EPOCHS: | ||
self.epsilon = EPSILON_END | ||
else: | ||
self.epsilon = EPSILON_START + epoch * (EPSILON_END - EPSILON_START) / EPSILON_EPOCHS | ||
return s, a, r, s_, done, info |
Oops, something went wrong.