1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-17 11:10:20 +01:00

Simulating the act on the trainer. (#65)

* Remove the use of daemon threads for Redis subscribe.
* Emulate act and observe on trainer side to update internal vars.
This commit is contained in:
Ajay Deshpande
2018-11-15 08:38:58 -08:00
committed by Balaji Subramaniam
parent fe6857eabd
commit fde73ced13
13 changed files with 221 additions and 55 deletions

View File

@@ -121,6 +121,7 @@ class PPOAlgorithmParameters(AlgorithmParameters):
self.use_kl_regularization = True
self.beta_entropy = 0.01
self.num_consecutive_playing_steps = EnvironmentSteps(5000)
self.act_for_full_episodes = True
class PPOAgentParameters(AgentParameters):
@@ -354,12 +355,9 @@ class PPOAgent(ActorCriticAgent):
# clean memory
self.call_memory('clean')
def _should_train_helper(self, wait_for_full_episode=True):
return super()._should_train_helper(True)
def train(self):
loss = 0
if self._should_train(wait_for_full_episode=True):
if self._should_train():
for network in self.networks.values():
network.set_is_training(True)
@@ -391,3 +389,4 @@ class PPOAgent(ActorCriticAgent):
def get_prediction(self, states):
tf_input_state = self.prepare_batch_for_inference(states, "actor")
return self.networks['actor'].online_network.predict(tf_input_state)