mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 19:20:19 +01:00
Adding target reward and target sucess (#58)
* Adding target reward * Adding target successs * Addressing comments * Using custom_reward_threshold and target_success_rate * Adding exit message * Moving success rate to environment * Making target_success_rate optional
This commit is contained in:
committed by
Balaji Subramaniam
parent
0fe583186e
commit
875d6ef017
@@ -103,6 +103,9 @@ class EnvironmentParameters(Parameters):
|
||||
self.default_output_filter = None
|
||||
self.experiment_path = None
|
||||
|
||||
# Set target reward and target_success if present
|
||||
self.target_success_rate = 1.0
|
||||
|
||||
@property
|
||||
def path(self):
|
||||
return 'rl_coach.environments.environment:Environment'
|
||||
@@ -111,7 +114,7 @@ class EnvironmentParameters(Parameters):
|
||||
class Environment(EnvironmentInterface):
|
||||
def __init__(self, level: LevelSelection, seed: int, frame_skip: int, human_control: bool,
|
||||
custom_reward_threshold: Union[int, float], visualization_parameters: VisualizationParameters,
|
||||
**kwargs):
|
||||
target_success_rate: float=1.0, **kwargs):
|
||||
"""
|
||||
:param level: The environment level. Each environment can have multiple levels
|
||||
:param seed: a seed for the random number generator of the environment
|
||||
@@ -166,6 +169,9 @@ class Environment(EnvironmentInterface):
|
||||
if not self.native_rendering:
|
||||
self.renderer = Renderer()
|
||||
|
||||
# Set target reward and target_success if present
|
||||
self.target_success_rate = target_success_rate
|
||||
|
||||
@property
|
||||
def action_space(self) -> Union[List[ActionSpace], ActionSpace]:
|
||||
"""
|
||||
@@ -469,3 +475,5 @@ class Environment(EnvironmentInterface):
|
||||
"""
|
||||
return np.transpose(self.state['observation'], [1, 2, 0])
|
||||
|
||||
def get_target_success_rate(self) -> float:
|
||||
return self.target_success_rate
|
||||
|
||||
Reference in New Issue
Block a user