1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-17 11:10:20 +01:00

Simulating the act on the trainer. (#65)

* Remove the use of daemon threads for Redis subscribe.
* Emulate act and observe on trainer side to update internal vars.
This commit is contained in:
Ajay Deshpande
2018-11-15 08:38:58 -08:00
committed by Balaji Subramaniam
parent fe6857eabd
commit fde73ced13
13 changed files with 221 additions and 55 deletions

View File

@@ -112,6 +112,7 @@ class ClippedPPOAlgorithmParameters(AlgorithmParameters):
self.optimization_epochs = 10
self.normalization_stats = None
self.clipping_decay_schedule = ConstantSchedule(1)
self.act_for_full_episodes = True
class ClippedPPOAgentParameters(AgentParameters):
@@ -294,11 +295,8 @@ class ClippedPPOAgent(ActorCriticAgent):
# clean memory
self.call_memory('clean')
def _should_train_helper(self, wait_for_full_episode=True):
return super()._should_train_helper(True)
def train(self):
if self._should_train(wait_for_full_episode=True):
if self._should_train():
for network in self.networks.values():
network.set_is_training(True)
@@ -334,3 +332,4 @@ class ClippedPPOAgent(ActorCriticAgent):
def choose_action(self, curr_state):
self.ap.algorithm.clipping_decay_schedule.step()
return super().choose_action(curr_state)