mirror of
https://github.com/gryf/coach.git
synced 2025-12-18 03:30:19 +01:00
Itaicaspi/episode reset refactoring (#105)
* reordering of the episode reset operation and allowing to store episodes only when they are terminated * reordering of the episode reset operation and allowing to store episodes only when they are terminated * revert tensorflow-gpu to 1.9.0 + bug fix in should_train() * tests readme file and refactoring of policy optimization agent train function * Update README.md * Update README.md * additional policy optimization train function simplifications * Updated the traces after the reordering of the environment reset * docker and jenkins files * updated the traces to the ones from within the docker container * updated traces and added control suite to the docker * updated jenkins file with the intel proxy + updated doom basic a3c test params * updated line breaks in jenkins file * added a missing line break in jenkins file * refining trace tests ignored presets + adding a configurable beta entropy value * switch the order of trace and golden tests in jenkins + fix golden tests processes not killed issue * updated benchmarks for dueling ddqn breakout and pong * allowing dynamic updates to the loss weights + bug fix in episode.update_returns * remove docker and jenkins file
This commit is contained in:
@@ -431,8 +431,13 @@ class Agent(AgentInterface):
|
||||
if self.phase != RunPhase.TEST or self.ap.task_parameters.evaluate_only:
|
||||
self.current_episode += 1
|
||||
|
||||
if self.phase != RunPhase.TEST and isinstance(self.memory, EpisodicExperienceReplay):
|
||||
self.call_memory('store_episode', self.current_episode_buffer)
|
||||
if self.phase != RunPhase.TEST:
|
||||
if isinstance(self.memory, EpisodicExperienceReplay):
|
||||
self.call_memory('store_episode', self.current_episode_buffer)
|
||||
elif self.ap.algorithm.store_transitions_only_when_episodes_are_terminated:
|
||||
self.current_episode_buffer.update_returns()
|
||||
for transition in self.current_episode_buffer.transitions:
|
||||
self.call_memory('store', transition)
|
||||
|
||||
if self.phase == RunPhase.TEST:
|
||||
self.accumulated_rewards_across_evaluation_episodes += self.total_reward_in_current_episode
|
||||
@@ -517,7 +522,7 @@ class Agent(AgentInterface):
|
||||
elif step_method.__class__ == EnvironmentSteps:
|
||||
should_update = (self.total_steps_counter - self.last_training_phase_step) >= step_method.num_steps
|
||||
if wait_for_full_episode:
|
||||
should_update = should_update and self.current_episode_steps_counter == 0
|
||||
should_update = should_update and self.current_episode_buffer.is_complete
|
||||
if should_update:
|
||||
self.last_training_phase_step = self.total_steps_counter
|
||||
else:
|
||||
@@ -728,9 +733,9 @@ class Agent(AgentInterface):
|
||||
if self.phase in [RunPhase.TRAIN, RunPhase.HEATUP]:
|
||||
# for episodic memories we keep the transitions in a local buffer until the episode is ended.
|
||||
# for regular memories we insert the transitions directly to the memory
|
||||
if isinstance(self.memory, EpisodicExperienceReplay):
|
||||
self.current_episode_buffer.insert(transition)
|
||||
else:
|
||||
self.current_episode_buffer.insert(transition)
|
||||
if not isinstance(self.memory, EpisodicExperienceReplay) \
|
||||
and not self.ap.algorithm.store_transitions_only_when_episodes_are_terminated:
|
||||
self.call_memory('store', transition)
|
||||
|
||||
if self.ap.visualization.dump_in_episode_signals:
|
||||
|
||||
Reference in New Issue
Block a user