mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 19:20:19 +01:00
fix keep_dims -> keepdims
This commit is contained in:
@@ -16,7 +16,8 @@
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from agents.value_optimization_agent import *
|
from agents.value_optimization_agent import ValueOptimizationAgent
|
||||||
|
from logger import screen
|
||||||
|
|
||||||
|
|
||||||
# Neural Episodic Control - https://arxiv.org/pdf/1703.01988.pdf
|
# Neural Episodic Control - https://arxiv.org/pdf/1703.01988.pdf
|
||||||
|
|||||||
@@ -112,8 +112,12 @@ class PPOAgent(ActorCriticAgent):
|
|||||||
current_values = self.critic_network.online_network.predict(current_states_batch)
|
current_values = self.critic_network.online_network.predict(current_states_batch)
|
||||||
targets = current_values * (1 - mix_fraction) + total_return_batch * mix_fraction
|
targets = current_values * (1 - mix_fraction) + total_return_batch * mix_fraction
|
||||||
|
|
||||||
|
inputs = copy.copy(current_states_batch)
|
||||||
|
for input_index, input in enumerate(old_policy_values):
|
||||||
|
inputs['output_0_{}'.format(input_index)] = input
|
||||||
|
|
||||||
value_loss = self.critic_network.online_network.\
|
value_loss = self.critic_network.online_network.\
|
||||||
accumulate_gradients([current_states_batch] + old_policy_values, targets)
|
accumulate_gradients(inputs, targets)
|
||||||
self.critic_network.apply_gradients_to_online_network()
|
self.critic_network.apply_gradients_to_online_network()
|
||||||
if self.tp.distributed:
|
if self.tp.distributed:
|
||||||
self.critic_network.apply_gradients_to_global_network()
|
self.critic_network.apply_gradients_to_global_network()
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ from utils import force_list
|
|||||||
def normalized_columns_initializer(std=1.0):
|
def normalized_columns_initializer(std=1.0):
|
||||||
def _initializer(shape, dtype=None, partition_info=None):
|
def _initializer(shape, dtype=None, partition_info=None):
|
||||||
out = np.random.randn(*shape).astype(np.float32)
|
out = np.random.randn(*shape).astype(np.float32)
|
||||||
out *= std / np.sqrt(np.square(out).sum(axis=0, keepdims=True))
|
out *= std / np.sqrt(np.square(out).sum(axis=0, keep_dims=True))
|
||||||
return tf.constant(out)
|
return tf.constant(out)
|
||||||
return _initializer
|
return _initializer
|
||||||
|
|
||||||
@@ -250,7 +250,7 @@ class MeasurementsPredictionHead(Head):
|
|||||||
name='output')
|
name='output')
|
||||||
action_stream = tf.reshape(action_stream,
|
action_stream = tf.reshape(action_stream,
|
||||||
(tf.shape(action_stream)[0], self.num_actions, self.multi_step_measurements_size))
|
(tf.shape(action_stream)[0], self.num_actions, self.multi_step_measurements_size))
|
||||||
action_stream = action_stream - tf.reduce_mean(action_stream, reduction_indices=1, keepdims=True)
|
action_stream = action_stream - tf.reduce_mean(action_stream, reduction_indices=1, keep_dims=True)
|
||||||
|
|
||||||
# merge to future measurements predictions
|
# merge to future measurements predictions
|
||||||
self.output = tf.add(expectation_stream, action_stream, name='output')
|
self.output = tf.add(expectation_stream, action_stream, name='output')
|
||||||
@@ -302,7 +302,7 @@ class DNDQHead(Head):
|
|||||||
square_diff = tf.square(dnd_embeddings - tf.expand_dims(input_layer, 1))
|
square_diff = tf.square(dnd_embeddings - tf.expand_dims(input_layer, 1))
|
||||||
distances = tf.reduce_sum(square_diff, axis=2) + [self.l2_norm_added_delta]
|
distances = tf.reduce_sum(square_diff, axis=2) + [self.l2_norm_added_delta]
|
||||||
weights = 1.0 / distances
|
weights = 1.0 / distances
|
||||||
normalised_weights = weights / tf.reduce_sum(weights, axis=1, keepdims=True)
|
normalised_weights = weights / tf.reduce_sum(weights, axis=1, keep_dims=True)
|
||||||
return tf.reduce_sum(dnd_values * normalised_weights, axis=1)
|
return tf.reduce_sum(dnd_values * normalised_weights, axis=1)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user