-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathActor_Net.py
68 lines (51 loc) · 2.51 KB
/
Actor_Net.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Concatenate
from tensorflow.keras.optimizers import Adam
import tensorflow_probability as tfp
class Actor():
def __init__(self, state_dim, action_dim, action_max, learning_rate = 0.001, tau = 0.05, log_std_min = -20, log_std_max = 2):
self.state_dim = state_dim
self.action_dim = action_dim
self.tau = tau
self.action_max = action_max
self.log_std_min = -20
self.log_std_max = 2
self.tfd = tfp.distributions
self.actor = self._bulid_model(state_dim, action_dim)
self.optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
def _bulid_model(self, state_dim, action_dim, units=(400, 300, 100)):
state = Input(shape=state_dim)
x = Dense(units[0], name="L0", activation="relu")(state)
for index in range(1, len(units)):
x = Dense(units[index], name="L{}".format(index), activation="relu")(x)
actions_mean = Dense(action_dim, name="Out_mean")(x)
actions_std = Dense(action_dim, name="Out_std")(x)
model = Model(inputs=state, outputs=[actions_mean, actions_std])
return model
def act(self, state, test=False, use_random=False):
state = np.expand_dims(state, axis=0).astype(np.float64)
if use_random:
a = tf.random.uniform(shape=[1, self.action_dim], minval = -1, maxval = 1, dtype=tf.float64)
a = a * self.action_max
else:
means, log_stds = self.actor.predict(state)
log_stds = tf.clip_by_value(log_stds, self.log_std_min, self.log_std_max)
a, _ = self.process_actions(means, log_stds, test=test)
return a
def process_actions(self, mean, log_std, test=False, eps=1e-6):
std = tf.math.exp(log_std)
raw_actions = mean
if not test:
raw_actions += tf.random.normal(shape=mean.shape, dtype=tf.float64) * std
log_prob_u = self.tfd.Normal(loc=mean, scale=std).log_prob(raw_actions)
actions = tf.math.tanh(raw_actions)
# ******** probability densitiy (Enforcing action bounds)******** #
log_prob = tf.reduce_sum(log_prob_u - tf.math.log(1 - actions ** 2 + eps), axis=1, keepdims=True)
actions = actions * self.action_max
return actions, log_prob
def save(self, name):
self.actor.save(name)
def load(self, name):
self.actor = tf.keras.models.load_model(name)