mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 19:20:19 +01:00
Simulating the act on the trainer. (#65)
* Remove the use of daemon threads for Redis subscribe. * Emulate act and observe on trainer side to update internal vars.
This commit is contained in:
committed by
Balaji Subramaniam
parent
fe6857eabd
commit
fde73ced13
@@ -18,7 +18,7 @@ from typing import Union, List, Dict
|
||||
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.core_types import EnvResponse, ActionInfo, RunPhase, PredictionType, ActionType
|
||||
from rl_coach.core_types import EnvResponse, ActionInfo, RunPhase, PredictionType, ActionType, Transition
|
||||
|
||||
|
||||
class AgentInterface(object):
|
||||
@@ -123,3 +123,33 @@ class AgentInterface(object):
|
||||
:return: None
|
||||
"""
|
||||
raise NotImplementedError("")
|
||||
|
||||
# TODO-remove - this is a temporary flow, used by the trainer worker, duplicated from observe() - need to create
|
||||
# an external trainer flow reusing the existing flow and methods [e.g. observe(), step(), act()]
|
||||
def emulate_observe_on_trainer(self, transition: Transition) -> bool:
|
||||
"""
|
||||
This emulates the act using the transition obtained from the rollout worker on the training worker
|
||||
in case of distributed training.
|
||||
Gets a response from the environment.
|
||||
Processes this information for later use. For example, create a transition and store it in memory.
|
||||
The action info (a class containing any info the agent wants to store regarding its action decision process) is
|
||||
stored by the agent itself when deciding on the action.
|
||||
:param env_response: a EnvResponse containing the response from the environment
|
||||
:return: a done signal which is based on the agent knowledge. This can be different from the done signal from
|
||||
the environment. For example, an agent can decide to finish the episode each time it gets some
|
||||
intrinsic reward
|
||||
"""
|
||||
raise NotImplementedError("")
|
||||
|
||||
# TODO-remove - this is a temporary flow, used by the trainer worker, duplicated from observe() - need to create
|
||||
# an external trainer flow reusing the existing flow and methods [e.g. observe(), step(), act()]
|
||||
def emulate_act_on_trainer(self, transition: Transition) -> ActionInfo:
|
||||
"""
|
||||
This emulates the act using the transition obtained from the rollout worker on the training worker
|
||||
in case of distributed training.
|
||||
Get a decision of the next action to take.
|
||||
The action is dependent on the current state which the agent holds from resetting the environment or from
|
||||
the observe function.
|
||||
:return: A tuple containing the actual action and additional info on the action
|
||||
"""
|
||||
raise NotImplementedError("")
|
||||
|
||||
Reference in New Issue
Block a user