mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 19:20:19 +01:00
Itaicaspi/episode reset refactoring (#105)
* reordering of the episode reset operation and allowing to store episodes only when they are terminated * reordering of the episode reset operation and allowing to store episodes only when they are terminated * revert tensorflow-gpu to 1.9.0 + bug fix in should_train() * tests readme file and refactoring of policy optimization agent train function * Update README.md * Update README.md * additional policy optimization train function simplifications * Updated the traces after the reordering of the environment reset * docker and jenkins files * updated the traces to the ones from within the docker container * updated traces and added control suite to the docker * updated jenkins file with the intel proxy + updated doom basic a3c test params * updated line breaks in jenkins file * added a missing line break in jenkins file * refining trace tests ignored presets + adding a configurable beta entropy value * switch the order of trace and golden tests in jenkins + fix golden tests processes not killed issue * updated benchmarks for dueling ddqn breakout and pong * allowing dynamic updates to the loss weights + bug fix in episode.update_returns * remove docker and jenkins file
This commit is contained in:
@@ -70,7 +70,7 @@ class LevelManager(EnvironmentInterface):
|
||||
self.should_reset_agent_state_after_time_limit_passes = should_reset_agent_state_after_time_limit_passes
|
||||
self.full_name_id = self.name = name
|
||||
self._phase = RunPhase.HEATUP
|
||||
self.level_was_reset = True
|
||||
self.reset_required = False
|
||||
|
||||
# set self as the parent for all the composite agents
|
||||
for agent in self.agents.values():
|
||||
@@ -104,7 +104,7 @@ class LevelManager(EnvironmentInterface):
|
||||
:return: the environment response as returned in get_last_env_response
|
||||
"""
|
||||
[agent.reset_internal_state() for agent in self.agents.values()]
|
||||
self.level_was_reset = True
|
||||
self.reset_required = False
|
||||
if self.real_environment.current_episode_steps_counter == 0:
|
||||
self.last_env_response = self.real_environment.last_env_response
|
||||
return self.last_env_response
|
||||
@@ -203,6 +203,9 @@ class LevelManager(EnvironmentInterface):
|
||||
for agent_name, agent in self.agents.items():
|
||||
agent.set_incoming_directive(action)
|
||||
|
||||
if self.reset_required:
|
||||
self.reset_internal_state()
|
||||
|
||||
# get last response or initial response from the environment
|
||||
env_response = copy.copy(self.environment.last_env_response)
|
||||
|
||||
@@ -238,7 +241,7 @@ class LevelManager(EnvironmentInterface):
|
||||
# this is the agent's only opportunity to observe this transition - he will not get another one
|
||||
acting_agent.observe(env_response) # TODO: acting agent? maybe all of the agents in the layer?
|
||||
self.handle_episode_ended()
|
||||
self.reset_internal_state()
|
||||
self.reset_required = True
|
||||
|
||||
return env_response_for_upper_level
|
||||
|
||||
|
||||
Reference in New Issue
Block a user