fix bug in ddpg

2026-02-15 05:25:55 +01:00 · 2018-02-16 20:18:03 -05:00
parent 8248caf35e
commit 5cf10e5f52
2 changed files with 2 additions and 3 deletions
--- a/agents/ddpg_agent.py
+++ b/agents/ddpg_agent.py
@@ -54,7 +54,7 @@ class DDPGAgent(ActorCriticAgent):
        actions_mean = self.actor_network.online_network.predict(current_states)
        critic_online_network = self.critic_network.online_network
        # TODO: convert into call to predict, current method ignores lstm middleware for example
-        action_gradients = self.critic_network.sess.run(critic_online_network.gradients_wrt_inputs[1],
+        action_gradients = self.critic_network.sess.run(critic_online_network.gradients_wrt_inputs['action'],
                                                        feed_dict=critic_online_network._feed_dict({
                                                            **current_states,
                                                            'action': actions_mean,
--- a/architectures/tensorflow_components/architecture.py
+++ b/architectures/tensorflow_components/architecture.py
@@ -108,8 +108,7 @@ class TensorFlowArchitecture(Architecture):
            # gradients of the outputs w.r.t. the inputs
            # at the moment, this is only used by ddpg
            if len(self.outputs) == 1:
-                # TODO: convert gradients_with_respect_to_inputs into dictionary?
+                self.gradients_wrt_inputs = {name: tf.gradients(self.outputs[0], input_ph) for name, input_ph in self.inputs.items()}
                self.gradients_wrt_inputs = [tf.gradients(self.outputs[0], input_ph) for input_ph in self.inputs.values()]
                self.gradients_weights_ph = tf.placeholder('float32', self.outputs[0].shape, 'output_gradient_weights')
                self.weighted_gradients = tf.gradients(self.outputs[0], self.trainable_weights, self.gradients_weights_ph)