mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 19:20:19 +01:00
Release 0.9
Main changes are detailed below: New features - * CARLA 0.7 simulator integration * Human control of the game play * Recording of human game play and storing / loading the replay buffer * Behavioral cloning agent and presets * Golden tests for several presets * Selecting between deep / shallow image embedders * Rendering through pygame (with some boost in performance) API changes - * Improved environment wrapper API * Added an evaluate flag to allow convenient evaluation of existing checkpoints * Improve frameskip definition in Gym Bug fixes - * Fixed loading of checkpoints for agents with more than one network * Fixed the N Step Q learning agent python3 compatibility
This commit is contained in:
@@ -45,7 +45,7 @@ class NStepQAgent(ValueOptimizationAgent, PolicyOptimizationAgent):
|
||||
# 1-Step Q learning
|
||||
q_st_plus_1 = self.main_network.target_network.predict(next_states)
|
||||
|
||||
for i in reversed(xrange(num_transitions)):
|
||||
for i in reversed(range(num_transitions)):
|
||||
state_value_head_targets[i][actions[i]] = \
|
||||
rewards[i] + (1.0 - game_overs[i]) * self.tp.agent.discount * np.max(q_st_plus_1[i], 0)
|
||||
|
||||
@@ -56,7 +56,7 @@ class NStepQAgent(ValueOptimizationAgent, PolicyOptimizationAgent):
|
||||
else:
|
||||
R = np.max(self.main_network.target_network.predict(np.expand_dims(next_states[-1], 0)))
|
||||
|
||||
for i in reversed(xrange(num_transitions)):
|
||||
for i in reversed(range(num_transitions)):
|
||||
R = rewards[i] + self.tp.agent.discount * R
|
||||
state_value_head_targets[i][actions[i]] = R
|
||||
|
||||
|
||||
Reference in New Issue
Block a user