1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-17 19:20:19 +01:00

imporved API for getting / setting variables within the graph

This commit is contained in:
Itai Caspi
2017-10-25 16:07:58 +03:00
parent e33b0e8534
commit 1918f16079
5 changed files with 45 additions and 17 deletions

View File

@@ -351,15 +351,19 @@ class PPOHead(Head):
self.num_actions = tuning_parameters.env_instance.action_space_size
self.discrete_controls = tuning_parameters.env_instance.discrete_controls
self.output_scale = np.max(tuning_parameters.env_instance.action_space_abs_range)
# kl coefficient and its corresponding assignment operation and placeholder
self.kl_coefficient = tf.Variable(tuning_parameters.agent.initial_kl_coefficient,
trainable=False, name='kl_coefficient')
self.kl_coefficient_ph = tf.placeholder('float', name='kl_coefficient_ph')
self.assign_kl_coefficient = tf.assign(self.kl_coefficient, self.kl_coefficient_ph)
self.kl_cutoff = 2*tuning_parameters.agent.target_kl_divergence
self.high_kl_penalty_coefficient = tuning_parameters.agent.high_kl_penalty_coefficient
self.clip_likelihood_ratio_using_epsilon = tuning_parameters.agent.clip_likelihood_ratio_using_epsilon
self.use_kl_regularization = tuning_parameters.agent.use_kl_regularization
self.beta = tuning_parameters.agent.beta_entropy
def _build_module(self, input_layer):
eps = 1e-15