1
0
mirror of https://github.com/gryf/coach.git synced 2026-02-14 21:15:53 +01:00

imitation related bug fixes

This commit is contained in:
itaicaspi-intel
2018-09-12 14:54:33 +03:00
parent a9bd1047c4
commit 171fe97a3a
7 changed files with 21 additions and 22 deletions

View File

@@ -223,11 +223,13 @@ class LevelManager(EnvironmentInterface):
# get action
action_info = acting_agent.act()
# step environment
env_response = self.environment.step(action_info.action)
# imitation agents will return no action since they don't play during training
if action_info:
# step environment
env_response = self.environment.step(action_info.action)
# accumulate rewards such that the master policy will see the total reward during the step phase
accumulated_reward += env_response.reward
# accumulate rewards such that the master policy will see the total reward during the step phase
accumulated_reward += env_response.reward
# update the env response that will be exposed to the parent agent
env_response_for_upper_level = copy.copy(env_response)