From 2c62a40466647960bad9dfdbee3e51e733acab64 Mon Sep 17 00:00:00 2001 From: itaicaspi-intel Date: Sun, 2 Sep 2018 13:38:16 +0300 Subject: [PATCH] bug fix in dueling network + revert to TF 1.6 for CPU due to requirements compatibility issues --- .../tensorflow_components/heads/dueling_q_head.py | 14 +++++++------- setup.py | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/rl_coach/architectures/tensorflow_components/heads/dueling_q_head.py b/rl_coach/architectures/tensorflow_components/heads/dueling_q_head.py index 7090236..477fdf0 100644 --- a/rl_coach/architectures/tensorflow_components/heads/dueling_q_head.py +++ b/rl_coach/architectures/tensorflow_components/heads/dueling_q_head.py @@ -39,15 +39,15 @@ class DuelingQHead(QHead): def _build_module(self, input_layer): # state value tower - V with tf.variable_scope("state_value"): - state_value = self.dense_layer(512)(input_layer, activation=self.activation_function, name='fc1') - state_value = self.dense_layer(1)(state_value, name='fc2') - # state_value = tf.expand_dims(state_value, axis=-1) + self.state_value = self.dense_layer(512)(input_layer, activation=self.activation_function, name='fc1') + self.state_value = self.dense_layer(1)(self.state_value, name='fc2') # action advantage tower - A with tf.variable_scope("action_advantage"): - action_advantage = self.dense_layer(512)(input_layer, activation=self.activation_function, name='fc1') - action_advantage = self.dense_layer(self.num_actions)(action_advantage, name='fc2') - action_advantage = action_advantage - tf.reduce_mean(action_advantage, axis=1) + self.action_advantage = self.dense_layer(512)(input_layer, activation=self.activation_function, name='fc1') + self.action_advantage = self.dense_layer(self.num_actions)(self.action_advantage, name='fc2') + self.action_mean = tf.reduce_mean(self.action_advantage, axis=1, keep_dims=True) + self.action_advantage = self.action_advantage - self.action_mean # merge to state-action value function Q - self.output = tf.add(state_value, action_advantage, name='output') + self.output = tf.add(self.state_value, self.action_advantage, name='output') diff --git a/setup.py b/setup.py index bd779d2..3a48ce6 100644 --- a/setup.py +++ b/setup.py @@ -61,9 +61,9 @@ if not using_GPU: # For linux wth no GPU, we install the Intel optimized version of TensorFlow if sys.platform == "linux" or sys.platform == "linux2": subprocess.check_call(['pip install ' - 'https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.10.0-cp35-cp35m-linux_x86_64.whl'], + 'https://anaconda.org/intel/tensorflow/1.6.0/download/tensorflow-1.6.0-cp35-cp35m-linux_x86_64.whl'], shell=True) - install_requires.append('tensorflow==1.10.0') + install_requires.append('tensorflow==1.6.0') else: install_requires.append('tensorflow-gpu==1.10.0')