From ee6e0bdc3b91b5fb738e8278898ceb49e7080341 Mon Sep 17 00:00:00 2001 From: Zach Dwiel Date: Fri, 16 Feb 2018 13:30:31 -0500 Subject: [PATCH] fix keep_dims -> keepdims --- agents/nec_agent.py | 3 ++- agents/ppo_agent.py | 6 +++++- architectures/tensorflow_components/heads.py | 6 +++--- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/agents/nec_agent.py b/agents/nec_agent.py index 85b2855..2e06338 100644 --- a/agents/nec_agent.py +++ b/agents/nec_agent.py @@ -16,7 +16,8 @@ import numpy as np -from agents.value_optimization_agent import * +from agents.value_optimization_agent import ValueOptimizationAgent +from logger import screen # Neural Episodic Control - https://arxiv.org/pdf/1703.01988.pdf diff --git a/agents/ppo_agent.py b/agents/ppo_agent.py index 4de648d..daef1b1 100644 --- a/agents/ppo_agent.py +++ b/agents/ppo_agent.py @@ -112,8 +112,12 @@ class PPOAgent(ActorCriticAgent): current_values = self.critic_network.online_network.predict(current_states_batch) targets = current_values * (1 - mix_fraction) + total_return_batch * mix_fraction + inputs = copy.copy(current_states_batch) + for input_index, input in enumerate(old_policy_values): + inputs['output_0_{}'.format(input_index)] = input + value_loss = self.critic_network.online_network.\ - accumulate_gradients([current_states_batch] + old_policy_values, targets) + accumulate_gradients(inputs, targets) self.critic_network.apply_gradients_to_online_network() if self.tp.distributed: self.critic_network.apply_gradients_to_global_network() diff --git a/architectures/tensorflow_components/heads.py b/architectures/tensorflow_components/heads.py index de6d2f8..bae947b 100644 --- a/architectures/tensorflow_components/heads.py +++ b/architectures/tensorflow_components/heads.py @@ -23,7 +23,7 @@ from utils import force_list def normalized_columns_initializer(std=1.0): def _initializer(shape, dtype=None, partition_info=None): out = np.random.randn(*shape).astype(np.float32) - out *= std / np.sqrt(np.square(out).sum(axis=0, keepdims=True)) + out *= std / np.sqrt(np.square(out).sum(axis=0, keep_dims=True)) return tf.constant(out) return _initializer @@ -250,7 +250,7 @@ class MeasurementsPredictionHead(Head): name='output') action_stream = tf.reshape(action_stream, (tf.shape(action_stream)[0], self.num_actions, self.multi_step_measurements_size)) - action_stream = action_stream - tf.reduce_mean(action_stream, reduction_indices=1, keepdims=True) + action_stream = action_stream - tf.reduce_mean(action_stream, reduction_indices=1, keep_dims=True) # merge to future measurements predictions self.output = tf.add(expectation_stream, action_stream, name='output') @@ -302,7 +302,7 @@ class DNDQHead(Head): square_diff = tf.square(dnd_embeddings - tf.expand_dims(input_layer, 1)) distances = tf.reduce_sum(square_diff, axis=2) + [self.l2_norm_added_delta] weights = 1.0 / distances - normalised_weights = weights / tf.reduce_sum(weights, axis=1, keepdims=True) + normalised_weights = weights / tf.reduce_sum(weights, axis=1, keep_dims=True) return tf.reduce_sum(dnd_values * normalised_weights, axis=1)