From ee6e0bdc3b91b5fb738e8278898ceb49e7080341 Mon Sep 17 00:00:00 2001
From: Zach Dwiel <zach.dwiel@intel.com>
Date: Fri, 16 Feb 2018 13:30:31 -0500
Subject: [PATCH] fix keep_dims -> keepdims

---
 agents/nec_agent.py                          | 3 ++-
 agents/ppo_agent.py                          | 6 +++++-
 architectures/tensorflow_components/heads.py | 6 +++---
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/agents/nec_agent.py b/agents/nec_agent.py
index 85b2855..2e06338 100644
--- a/agents/nec_agent.py
+++ b/agents/nec_agent.py
@@ -16,7 +16,8 @@
 
 import numpy as np
 
-from agents.value_optimization_agent import *
+from agents.value_optimization_agent import ValueOptimizationAgent
+from logger import screen
 
 
 # Neural Episodic Control - https://arxiv.org/pdf/1703.01988.pdf
diff --git a/agents/ppo_agent.py b/agents/ppo_agent.py
index 4de648d..daef1b1 100644
--- a/agents/ppo_agent.py
+++ b/agents/ppo_agent.py
@@ -112,8 +112,12 @@ class PPOAgent(ActorCriticAgent):
                     current_values = self.critic_network.online_network.predict(current_states_batch)
                     targets = current_values * (1 - mix_fraction) + total_return_batch * mix_fraction
 
+                inputs = copy.copy(current_states_batch)
+                for input_index, input in enumerate(old_policy_values):
+                    inputs['output_0_{}'.format(input_index)] = input
+
                 value_loss = self.critic_network.online_network.\
-                    accumulate_gradients([current_states_batch] + old_policy_values, targets)
+                    accumulate_gradients(inputs, targets)
                 self.critic_network.apply_gradients_to_online_network()
                 if self.tp.distributed:
                     self.critic_network.apply_gradients_to_global_network()
diff --git a/architectures/tensorflow_components/heads.py b/architectures/tensorflow_components/heads.py
index de6d2f8..bae947b 100644
--- a/architectures/tensorflow_components/heads.py
+++ b/architectures/tensorflow_components/heads.py
@@ -23,7 +23,7 @@ from utils import force_list
 def normalized_columns_initializer(std=1.0):
     def _initializer(shape, dtype=None, partition_info=None):
         out = np.random.randn(*shape).astype(np.float32)
-        out *= std / np.sqrt(np.square(out).sum(axis=0, keepdims=True))
+        out *= std / np.sqrt(np.square(out).sum(axis=0, keep_dims=True))
         return tf.constant(out)
     return _initializer
 
@@ -250,7 +250,7 @@ class MeasurementsPredictionHead(Head):
                                             name='output')
             action_stream = tf.reshape(action_stream,
                                        (tf.shape(action_stream)[0], self.num_actions, self.multi_step_measurements_size))
-            action_stream = action_stream - tf.reduce_mean(action_stream, reduction_indices=1, keepdims=True)
+            action_stream = action_stream - tf.reduce_mean(action_stream, reduction_indices=1, keep_dims=True)
 
         # merge to future measurements predictions
         self.output = tf.add(expectation_stream, action_stream, name='output')
@@ -302,7 +302,7 @@ class DNDQHead(Head):
         square_diff = tf.square(dnd_embeddings - tf.expand_dims(input_layer, 1))
         distances = tf.reduce_sum(square_diff, axis=2) + [self.l2_norm_added_delta]
         weights = 1.0 / distances
-        normalised_weights = weights / tf.reduce_sum(weights, axis=1, keepdims=True)
+        normalised_weights = weights / tf.reduce_sum(weights, axis=1, keep_dims=True)
         return tf.reduce_sum(dnd_values * normalised_weights, axis=1)