mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 19:20:19 +01:00
fix bug in ddpg
This commit is contained in:
@@ -54,7 +54,7 @@ class DDPGAgent(ActorCriticAgent):
|
|||||||
actions_mean = self.actor_network.online_network.predict(current_states)
|
actions_mean = self.actor_network.online_network.predict(current_states)
|
||||||
critic_online_network = self.critic_network.online_network
|
critic_online_network = self.critic_network.online_network
|
||||||
# TODO: convert into call to predict, current method ignores lstm middleware for example
|
# TODO: convert into call to predict, current method ignores lstm middleware for example
|
||||||
action_gradients = self.critic_network.sess.run(critic_online_network.gradients_wrt_inputs[1],
|
action_gradients = self.critic_network.sess.run(critic_online_network.gradients_wrt_inputs['action'],
|
||||||
feed_dict=critic_online_network._feed_dict({
|
feed_dict=critic_online_network._feed_dict({
|
||||||
**current_states,
|
**current_states,
|
||||||
'action': actions_mean,
|
'action': actions_mean,
|
||||||
|
|||||||
@@ -108,8 +108,7 @@ class TensorFlowArchitecture(Architecture):
|
|||||||
# gradients of the outputs w.r.t. the inputs
|
# gradients of the outputs w.r.t. the inputs
|
||||||
# at the moment, this is only used by ddpg
|
# at the moment, this is only used by ddpg
|
||||||
if len(self.outputs) == 1:
|
if len(self.outputs) == 1:
|
||||||
# TODO: convert gradients_with_respect_to_inputs into dictionary?
|
self.gradients_wrt_inputs = {name: tf.gradients(self.outputs[0], input_ph) for name, input_ph in self.inputs.items()}
|
||||||
self.gradients_wrt_inputs = [tf.gradients(self.outputs[0], input_ph) for input_ph in self.inputs.values()]
|
|
||||||
self.gradients_weights_ph = tf.placeholder('float32', self.outputs[0].shape, 'output_gradient_weights')
|
self.gradients_weights_ph = tf.placeholder('float32', self.outputs[0].shape, 'output_gradient_weights')
|
||||||
self.weighted_gradients = tf.gradients(self.outputs[0], self.trainable_weights, self.gradients_weights_ph)
|
self.weighted_gradients = tf.gradients(self.outputs[0], self.trainable_weights, self.gradients_weights_ph)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user