batch_size = 32
reward = ...+trackpos
batch_size = 32
reward = ...+trackpos
batch_size = 256
reward = ...
batch_size = 32
reward = ...
batch_size = 32
lr = 0.0001
buffer_size = 1000 ?
lr = 0.0003 ?
BUFFER_SIZE = 5000
BATCH_SIZE = 64
LRA = 0.0002
LRC = 0.0002
??
BUFFER_SIZE = 6000
BATCH_SIZE = 64
LRA = 0.0002
LRC = 0.0002
BUFFER_SIZE = 7000
BATCH_SIZE = 64
LRA = 0.0002
LRC = 0.0002
BUFFER_SIZE = 6000
BATCH_SIZE = 128
LRA = 0.0002
LRC = 0.0002
ppo_epoch = 10
BUFFER_SIZE = 6000
BATCH_SIZE = 32
LRA = 0.0002
LRC = 0.0002
ppo_epoch = 10
BUFFER_SIZE = 6000
BATCH_SIZE = 32
LRA = 0.0002
LRC = 0.0002
ppo_epoch = 15
BUFFER_SIZE = 6000
BATCH_SIZE = 32
LRA = 0.0002
LRC = 0.0002
ppo_epoch = 20
no OU noise
BUFFER_SIZE = 6000
BATCH_SIZE = 32
LRA = 0.0002
LRC = 0.0002
ppo_epoch = 15
no OU noise















