From 2c62a40466647960bad9dfdbee3e51e733acab64 Mon Sep 17 00:00:00 2001
From: itaicaspi-intel <itai.caspi@intel.com>
Date: Sun, 2 Sep 2018 13:38:16 +0300
Subject: [PATCH] bug fix in dueling network + revert to TF 1.6 for CPU due to
 requirements compatibility issues

---
 .../tensorflow_components/heads/dueling_q_head.py  | 14 +++++++-------
 setup.py                                           |  4 ++--
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/rl_coach/architectures/tensorflow_components/heads/dueling_q_head.py b/rl_coach/architectures/tensorflow_components/heads/dueling_q_head.py
index 7090236..477fdf0 100644
--- a/rl_coach/architectures/tensorflow_components/heads/dueling_q_head.py
+++ b/rl_coach/architectures/tensorflow_components/heads/dueling_q_head.py
@@ -39,15 +39,15 @@ class DuelingQHead(QHead):
     def _build_module(self, input_layer):
         # state value tower - V
         with tf.variable_scope("state_value"):
-            state_value = self.dense_layer(512)(input_layer, activation=self.activation_function, name='fc1')
-            state_value = self.dense_layer(1)(state_value, name='fc2')
-            # state_value = tf.expand_dims(state_value, axis=-1)
+            self.state_value = self.dense_layer(512)(input_layer, activation=self.activation_function, name='fc1')
+            self.state_value = self.dense_layer(1)(self.state_value, name='fc2')
 
         # action advantage tower - A
         with tf.variable_scope("action_advantage"):
-            action_advantage = self.dense_layer(512)(input_layer, activation=self.activation_function, name='fc1')
-            action_advantage = self.dense_layer(self.num_actions)(action_advantage, name='fc2')
-            action_advantage = action_advantage - tf.reduce_mean(action_advantage, axis=1)
+            self.action_advantage = self.dense_layer(512)(input_layer, activation=self.activation_function, name='fc1')
+            self.action_advantage = self.dense_layer(self.num_actions)(self.action_advantage, name='fc2')
+            self.action_mean = tf.reduce_mean(self.action_advantage, axis=1, keep_dims=True)
+            self.action_advantage = self.action_advantage - self.action_mean
 
         # merge to state-action value function Q
-        self.output = tf.add(state_value, action_advantage, name='output')
+        self.output = tf.add(self.state_value, self.action_advantage, name='output')
diff --git a/setup.py b/setup.py
index bd779d2..3a48ce6 100644
--- a/setup.py
+++ b/setup.py
@@ -61,9 +61,9 @@ if not using_GPU:
     # For linux wth no GPU, we install the Intel optimized version of TensorFlow
     if sys.platform == "linux" or sys.platform == "linux2":
         subprocess.check_call(['pip install '
-                               'https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.10.0-cp35-cp35m-linux_x86_64.whl'],
+                               'https://anaconda.org/intel/tensorflow/1.6.0/download/tensorflow-1.6.0-cp35-cp35m-linux_x86_64.whl'],
                               shell=True)
-    install_requires.append('tensorflow==1.10.0')
+    install_requires.append('tensorflow==1.6.0')
 else:
     install_requires.append('tensorflow-gpu==1.10.0')