1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-17 19:20:19 +01:00

Simulating the act on the trainer. (#65)

* Remove the use of daemon threads for Redis subscribe.
* Emulate act and observe on trainer side to update internal vars.
This commit is contained in:
Ajay Deshpande
2018-11-15 08:38:58 -08:00
committed by Balaji Subramaniam
parent fe6857eabd
commit fde73ced13
13 changed files with 221 additions and 55 deletions

View File

@@ -18,7 +18,7 @@ from typing import Union, List, Dict
import numpy as np
from rl_coach.core_types import EnvResponse, ActionInfo, RunPhase, PredictionType, ActionType
from rl_coach.core_types import EnvResponse, ActionInfo, RunPhase, PredictionType, ActionType, Transition
class AgentInterface(object):
@@ -123,3 +123,33 @@ class AgentInterface(object):
:return: None
"""
raise NotImplementedError("")
# TODO-remove - this is a temporary flow, used by the trainer worker, duplicated from observe() - need to create
# an external trainer flow reusing the existing flow and methods [e.g. observe(), step(), act()]
def emulate_observe_on_trainer(self, transition: Transition) -> bool:
"""
This emulates the act using the transition obtained from the rollout worker on the training worker
in case of distributed training.
Gets a response from the environment.
Processes this information for later use. For example, create a transition and store it in memory.
The action info (a class containing any info the agent wants to store regarding its action decision process) is
stored by the agent itself when deciding on the action.
:param env_response: a EnvResponse containing the response from the environment
:return: a done signal which is based on the agent knowledge. This can be different from the done signal from
the environment. For example, an agent can decide to finish the episode each time it gets some
intrinsic reward
"""
raise NotImplementedError("")
# TODO-remove - this is a temporary flow, used by the trainer worker, duplicated from observe() - need to create
# an external trainer flow reusing the existing flow and methods [e.g. observe(), step(), act()]
def emulate_act_on_trainer(self, transition: Transition) -> ActionInfo:
"""
This emulates the act using the transition obtained from the rollout worker on the training worker
in case of distributed training.
Get a decision of the next action to take.
The action is dependent on the current state which the agent holds from resetting the environment or from
the observe function.
:return: A tuple containing the actual action and additional info on the action
"""
raise NotImplementedError("")