mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 11:10:20 +01:00
Parallel agents fixes (#95)
* Parallel agents related bug fixes: checkpoint restore, tensorboard integration. Adding narrow networks support. Reference code for unlimited number of checkpoints
This commit is contained in:
@@ -550,9 +550,10 @@ class Agent(object):
|
|||||||
if current_snapshot_period > model_snapshots_periods_passed:
|
if current_snapshot_period > model_snapshots_periods_passed:
|
||||||
model_snapshots_periods_passed = current_snapshot_period
|
model_snapshots_periods_passed = current_snapshot_period
|
||||||
self.save_model(model_snapshots_periods_passed)
|
self.save_model(model_snapshots_periods_passed)
|
||||||
to_pickle(self.running_observation_stats,
|
if self.running_observation_stats is not None:
|
||||||
os.path.join(self.tp.save_model_dir,
|
to_pickle(self.running_observation_stats,
|
||||||
"running_stats.p".format(model_snapshots_periods_passed)))
|
os.path.join(self.tp.save_model_dir,
|
||||||
|
"running_stats.p".format(model_snapshots_periods_passed)))
|
||||||
|
|
||||||
# play and record in replay buffer
|
# play and record in replay buffer
|
||||||
if self.tp.agent.collect_new_data:
|
if self.tp.agent.collect_new_data:
|
||||||
|
|||||||
@@ -69,7 +69,7 @@ class ClippedPPOAgent(ActorCriticAgent):
|
|||||||
screen.warning("WARNING: The requested policy gradient rescaler is not available")
|
screen.warning("WARNING: The requested policy gradient rescaler is not available")
|
||||||
|
|
||||||
# standardize
|
# standardize
|
||||||
advantages = (advantages - np.mean(advantages)) / np.std(advantages)
|
advantages = (advantages - np.mean(advantages)) / (np.std(advantages) + 1e-8)
|
||||||
|
|
||||||
for transition, advantage, value_target in zip(batch, advantages, value_targets):
|
for transition, advantage, value_target in zip(batch, advantages, value_targets):
|
||||||
transition.info['advantage'] = advantage
|
transition.info['advantage'] = advantage
|
||||||
|
|||||||
@@ -81,6 +81,7 @@ class NetworkWrapper(object):
|
|||||||
variables_to_restore = tf.global_variables()
|
variables_to_restore = tf.global_variables()
|
||||||
variables_to_restore = [v for v in variables_to_restore if '/online' in v.name]
|
variables_to_restore = [v for v in variables_to_restore if '/online' in v.name]
|
||||||
self.model_saver = tf.train.Saver(variables_to_restore)
|
self.model_saver = tf.train.Saver(variables_to_restore)
|
||||||
|
#, max_to_keep=None) # uncomment to unlimit number of stored checkpoints
|
||||||
if self.tp.sess and self.tp.checkpoint_restore_dir:
|
if self.tp.sess and self.tp.checkpoint_restore_dir:
|
||||||
checkpoint = tf.train.latest_checkpoint(self.tp.checkpoint_restore_dir)
|
checkpoint = tf.train.latest_checkpoint(self.tp.checkpoint_restore_dir)
|
||||||
screen.log_title("Loading checkpoint: {}".format(checkpoint))
|
screen.log_title("Loading checkpoint: {}".format(checkpoint))
|
||||||
|
|||||||
@@ -15,18 +15,20 @@
|
|||||||
#
|
#
|
||||||
|
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from configurations import EmbedderComplexity
|
from configurations import EmbedderDepth, EmbedderWidth
|
||||||
|
|
||||||
|
|
||||||
class InputEmbedder(object):
|
class InputEmbedder(object):
|
||||||
def __init__(self, input_size, activation_function=tf.nn.relu,
|
def __init__(self, input_size, activation_function=tf.nn.relu,
|
||||||
embedder_complexity=EmbedderComplexity.Shallow, name="embedder"):
|
embedder_depth=EmbedderDepth.Shallow, embedder_width=EmbedderWidth.Wide,
|
||||||
|
name="embedder"):
|
||||||
self.name = name
|
self.name = name
|
||||||
self.input_size = input_size
|
self.input_size = input_size
|
||||||
self.activation_function = activation_function
|
self.activation_function = activation_function
|
||||||
self.input = None
|
self.input = None
|
||||||
self.output = None
|
self.output = None
|
||||||
self.embedder_complexity = embedder_complexity
|
self.embedder_depth = embedder_depth
|
||||||
|
self.embedder_width = embedder_width
|
||||||
|
|
||||||
def __call__(self, prev_input_placeholder=None):
|
def __call__(self, prev_input_placeholder=None):
|
||||||
with tf.variable_scope(self.get_name()):
|
with tf.variable_scope(self.get_name()):
|
||||||
@@ -47,15 +49,16 @@ class InputEmbedder(object):
|
|||||||
|
|
||||||
class ImageEmbedder(InputEmbedder):
|
class ImageEmbedder(InputEmbedder):
|
||||||
def __init__(self, input_size, input_rescaler=255.0, activation_function=tf.nn.relu,
|
def __init__(self, input_size, input_rescaler=255.0, activation_function=tf.nn.relu,
|
||||||
embedder_complexity=EmbedderComplexity.Shallow, name="embedder"):
|
embedder_depth=EmbedderDepth.Shallow, embedder_width=EmbedderWidth.Wide,
|
||||||
InputEmbedder.__init__(self, input_size, activation_function, embedder_complexity, name)
|
name="embedder"):
|
||||||
|
InputEmbedder.__init__(self, input_size, activation_function, embedder_depth, embedder_width, name)
|
||||||
self.input_rescaler = input_rescaler
|
self.input_rescaler = input_rescaler
|
||||||
|
|
||||||
def _build_module(self):
|
def _build_module(self):
|
||||||
# image observation
|
# image observation
|
||||||
rescaled_observation_stack = self.input / self.input_rescaler
|
rescaled_observation_stack = self.input / self.input_rescaler
|
||||||
|
|
||||||
if self.embedder_complexity == EmbedderComplexity.Shallow:
|
if self.embedder_depth == EmbedderDepth.Shallow:
|
||||||
# same embedder as used in the original DQN paper
|
# same embedder as used in the original DQN paper
|
||||||
self.observation_conv1 = tf.layers.conv2d(rescaled_observation_stack,
|
self.observation_conv1 = tf.layers.conv2d(rescaled_observation_stack,
|
||||||
filters=32, kernel_size=(8, 8), strides=(4, 4),
|
filters=32, kernel_size=(8, 8), strides=(4, 4),
|
||||||
@@ -73,7 +76,7 @@ class ImageEmbedder(InputEmbedder):
|
|||||||
|
|
||||||
self.output = tf.contrib.layers.flatten(self.observation_conv3)
|
self.output = tf.contrib.layers.flatten(self.observation_conv3)
|
||||||
|
|
||||||
elif self.embedder_complexity == EmbedderComplexity.Deep:
|
elif self.embedder_depth == EmbedderDepth.Deep:
|
||||||
# the embedder used in the CARLA papers
|
# the embedder used in the CARLA papers
|
||||||
self.observation_conv1 = tf.layers.conv2d(rescaled_observation_stack,
|
self.observation_conv1 = tf.layers.conv2d(rescaled_observation_stack,
|
||||||
filters=32, kernel_size=(5, 5), strides=(2, 2),
|
filters=32, kernel_size=(5, 5), strides=(2, 2),
|
||||||
@@ -115,24 +118,27 @@ class ImageEmbedder(InputEmbedder):
|
|||||||
|
|
||||||
class VectorEmbedder(InputEmbedder):
|
class VectorEmbedder(InputEmbedder):
|
||||||
def __init__(self, input_size, activation_function=tf.nn.relu,
|
def __init__(self, input_size, activation_function=tf.nn.relu,
|
||||||
embedder_complexity=EmbedderComplexity.Shallow, name="embedder"):
|
embedder_depth=EmbedderDepth.Shallow, embedder_width=EmbedderWidth.Wide,
|
||||||
InputEmbedder.__init__(self, input_size, activation_function, embedder_complexity, name)
|
name="embedder"):
|
||||||
|
InputEmbedder.__init__(self, input_size, activation_function, embedder_depth, embedder_width, name)
|
||||||
|
|
||||||
def _build_module(self):
|
def _build_module(self):
|
||||||
# vector observation
|
# vector observation
|
||||||
input_layer = tf.contrib.layers.flatten(self.input)
|
input_layer = tf.contrib.layers.flatten(self.input)
|
||||||
|
|
||||||
if self.embedder_complexity == EmbedderComplexity.Shallow:
|
width = 128 if self.embedder_width == EmbedderWidth.Wide else 32
|
||||||
self.output = tf.layers.dense(input_layer, 256, activation=self.activation_function,
|
|
||||||
|
if self.embedder_depth == EmbedderDepth.Shallow:
|
||||||
|
self.output = tf.layers.dense(input_layer, 2*width, activation=self.activation_function,
|
||||||
name='fc1')
|
name='fc1')
|
||||||
|
|
||||||
elif self.embedder_complexity == EmbedderComplexity.Deep:
|
elif self.embedder_depth == EmbedderDepth.Deep:
|
||||||
# the embedder used in the CARLA papers
|
# the embedder used in the CARLA papers
|
||||||
self.observation_fc1 = tf.layers.dense(input_layer, 128, activation=self.activation_function,
|
self.observation_fc1 = tf.layers.dense(input_layer, width, activation=self.activation_function,
|
||||||
name='fc1')
|
name='fc1')
|
||||||
self.observation_fc2 = tf.layers.dense(self.observation_fc1, 128, activation=self.activation_function,
|
self.observation_fc2 = tf.layers.dense(self.observation_fc1, width, activation=self.activation_function,
|
||||||
name='fc2')
|
name='fc2')
|
||||||
self.output = tf.layers.dense(self.observation_fc2, 128, activation=self.activation_function,
|
self.output = tf.layers.dense(self.observation_fc2, width, activation=self.activation_function,
|
||||||
name='fc3')
|
name='fc3')
|
||||||
else:
|
else:
|
||||||
raise ValueError("The defined embedder complexity value is invalid")
|
raise ValueError("The defined embedder complexity value is invalid")
|
||||||
|
|||||||
@@ -36,6 +36,7 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture):
|
|||||||
self.output_heads = []
|
self.output_heads = []
|
||||||
self.activation_function = self.get_activation_function(
|
self.activation_function = self.get_activation_function(
|
||||||
tuning_parameters.agent.hidden_layers_activation_function)
|
tuning_parameters.agent.hidden_layers_activation_function)
|
||||||
|
self.embedder_width = tuning_parameters.agent.embedder_width
|
||||||
|
|
||||||
TensorFlowArchitecture.__init__(self, tuning_parameters, name, global_network, network_is_local)
|
TensorFlowArchitecture.__init__(self, tuning_parameters, name, global_network, network_is_local)
|
||||||
|
|
||||||
@@ -57,22 +58,26 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture):
|
|||||||
def get_observation_embedding(with_timestep=False):
|
def get_observation_embedding(with_timestep=False):
|
||||||
if self.input_height > 1:
|
if self.input_height > 1:
|
||||||
return ImageEmbedder((self.input_height, self.input_width, self.input_depth), name="observation",
|
return ImageEmbedder((self.input_height, self.input_width, self.input_depth), name="observation",
|
||||||
input_rescaler=self.tp.agent.input_rescaler)
|
input_rescaler=self.tp.agent.input_rescaler, embedder_width=self.embedder_width)
|
||||||
else:
|
else:
|
||||||
return VectorEmbedder((self.input_width + int(with_timestep), self.input_depth), name="observation")
|
return VectorEmbedder((self.input_width + int(with_timestep), self.input_depth), name="observation",
|
||||||
|
embedder_width=self.embedder_width)
|
||||||
|
|
||||||
input_mapping = {
|
input_mapping = {
|
||||||
InputTypes.Observation: get_observation_embedding(),
|
InputTypes.Observation: get_observation_embedding(),
|
||||||
InputTypes.Measurements: VectorEmbedder(self.measurements_size, name="measurements"),
|
InputTypes.Measurements: VectorEmbedder(self.measurements_size, name="measurements",
|
||||||
InputTypes.GoalVector: VectorEmbedder(self.measurements_size, name="goal_vector"),
|
embedder_width=self.embedder_width),
|
||||||
InputTypes.Action: VectorEmbedder((self.num_actions,), name="action"),
|
InputTypes.GoalVector: VectorEmbedder(self.measurements_size, name="goal_vector",
|
||||||
|
embedder_width=self.embedder_width),
|
||||||
|
InputTypes.Action: VectorEmbedder((self.num_actions,), name="action",
|
||||||
|
embedder_width=self.embedder_width),
|
||||||
InputTypes.TimedObservation: get_observation_embedding(with_timestep=True),
|
InputTypes.TimedObservation: get_observation_embedding(with_timestep=True),
|
||||||
}
|
}
|
||||||
return input_mapping[embedder_type]
|
return input_mapping[embedder_type]
|
||||||
|
|
||||||
def get_middleware_embedder(self, middleware_type):
|
def get_middleware_embedder(self, middleware_type):
|
||||||
return {MiddlewareTypes.LSTM: LSTM_Embedder,
|
return {MiddlewareTypes.LSTM: LSTM_Embedder,
|
||||||
MiddlewareTypes.FC: FC_Embedder}.get(middleware_type)(self.activation_function)
|
MiddlewareTypes.FC: FC_Embedder}.get(middleware_type)(self.activation_function, self.embedder_width)
|
||||||
|
|
||||||
def get_output_head(self, head_type, head_idx, loss_weight=1.):
|
def get_output_head(self, head_type, head_idx, loss_weight=1.):
|
||||||
output_mapping = {
|
output_mapping = {
|
||||||
@@ -174,7 +179,8 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture):
|
|||||||
self.losses = tf.losses.get_losses(self.name)
|
self.losses = tf.losses.get_losses(self.name)
|
||||||
self.losses += tf.losses.get_regularization_losses(self.name)
|
self.losses += tf.losses.get_regularization_losses(self.name)
|
||||||
self.total_loss = tf.losses.compute_weighted_loss(self.losses, scope=self.name)
|
self.total_loss = tf.losses.compute_weighted_loss(self.losses, scope=self.name)
|
||||||
tf.summary.scalar('total_loss', self.total_loss)
|
if self.tp.visualization.tensorboard:
|
||||||
|
tf.summary.scalar('total_loss', self.total_loss)
|
||||||
|
|
||||||
|
|
||||||
# Learning rate
|
# Learning rate
|
||||||
|
|||||||
@@ -395,7 +395,6 @@ class PPOHead(Head):
|
|||||||
|
|
||||||
def _build_module(self, input_layer):
|
def _build_module(self, input_layer):
|
||||||
eps = 1e-15
|
eps = 1e-15
|
||||||
|
|
||||||
if self.discrete_controls:
|
if self.discrete_controls:
|
||||||
self.actions = tf.placeholder(tf.int32, [None], name="actions")
|
self.actions = tf.placeholder(tf.int32, [None], name="actions")
|
||||||
else:
|
else:
|
||||||
@@ -410,7 +409,7 @@ class PPOHead(Head):
|
|||||||
self.policy_mean = tf.nn.softmax(policy_values, name="policy")
|
self.policy_mean = tf.nn.softmax(policy_values, name="policy")
|
||||||
|
|
||||||
# define the distributions for the policy and the old policy
|
# define the distributions for the policy and the old policy
|
||||||
self.policy_distribution = tf.contrib.distributions.Categorical(probs=self.policy_mean)
|
self.policy_distribution = tf.contrib.distributions.Categorical(probs=(self.policy_mean + eps))
|
||||||
self.old_policy_distribution = tf.contrib.distributions.Categorical(probs=self.old_policy_mean)
|
self.old_policy_distribution = tf.contrib.distributions.Categorical(probs=self.old_policy_mean)
|
||||||
|
|
||||||
self.output = self.policy_mean
|
self.output = self.policy_mean
|
||||||
@@ -445,7 +444,7 @@ class PPOHead(Head):
|
|||||||
# calculate surrogate loss
|
# calculate surrogate loss
|
||||||
self.advantages = tf.placeholder(tf.float32, [None], name="advantages")
|
self.advantages = tf.placeholder(tf.float32, [None], name="advantages")
|
||||||
self.target = self.advantages
|
self.target = self.advantages
|
||||||
self.likelihood_ratio = self.action_probs_wrt_policy / self.action_probs_wrt_old_policy
|
self.likelihood_ratio = self.action_probs_wrt_policy / (self.action_probs_wrt_old_policy + eps)
|
||||||
if self.clip_likelihood_ratio_using_epsilon is not None:
|
if self.clip_likelihood_ratio_using_epsilon is not None:
|
||||||
max_value = 1 + self.clip_likelihood_ratio_using_epsilon
|
max_value = 1 + self.clip_likelihood_ratio_using_epsilon
|
||||||
min_value = 1 - self.clip_likelihood_ratio_using_epsilon
|
min_value = 1 - self.clip_likelihood_ratio_using_epsilon
|
||||||
|
|||||||
@@ -16,13 +16,15 @@
|
|||||||
|
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from configurations import EmbedderWidth
|
||||||
|
|
||||||
|
|
||||||
class MiddlewareEmbedder(object):
|
class MiddlewareEmbedder(object):
|
||||||
def __init__(self, activation_function=tf.nn.relu, name="middleware_embedder"):
|
def __init__(self, activation_function=tf.nn.relu, embedder_width=EmbedderWidth.Wide, name="middleware_embedder"):
|
||||||
self.name = name
|
self.name = name
|
||||||
self.input = None
|
self.input = None
|
||||||
self.output = None
|
self.output = None
|
||||||
|
self.embedder_width = embedder_width
|
||||||
self.activation_function = activation_function
|
self.activation_function = activation_function
|
||||||
|
|
||||||
def __call__(self, input_layer):
|
def __call__(self, input_layer):
|
||||||
@@ -70,4 +72,6 @@ class LSTM_Embedder(MiddlewareEmbedder):
|
|||||||
|
|
||||||
class FC_Embedder(MiddlewareEmbedder):
|
class FC_Embedder(MiddlewareEmbedder):
|
||||||
def _build_module(self):
|
def _build_module(self):
|
||||||
self.output = tf.layers.dense(self.input, 512, activation=self.activation_function, name='fc1')
|
width = 512 if self.embedder_width == EmbedderWidth.Wide else 64
|
||||||
|
self.output = tf.layers.dense(self.input, width, activation=self.activation_function, name='fc1')
|
||||||
|
|
||||||
|
|||||||
@@ -32,11 +32,6 @@ class InputTypes(object):
|
|||||||
TimedObservation = 5
|
TimedObservation = 5
|
||||||
|
|
||||||
|
|
||||||
class EmbedderComplexity(object):
|
|
||||||
Shallow = 1
|
|
||||||
Deep = 2
|
|
||||||
|
|
||||||
|
|
||||||
class OutputTypes(object):
|
class OutputTypes(object):
|
||||||
Q = 1
|
Q = 1
|
||||||
DuelingQ = 2
|
DuelingQ = 2
|
||||||
@@ -51,6 +46,17 @@ class OutputTypes(object):
|
|||||||
QuantileRegressionQ = 11
|
QuantileRegressionQ = 11
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class EmbedderDepth(object):
|
||||||
|
Shallow = 1
|
||||||
|
Deep = 2
|
||||||
|
|
||||||
|
|
||||||
|
class EmbedderWidth(object):
|
||||||
|
Narrow = 1
|
||||||
|
Wide = 2
|
||||||
|
|
||||||
|
|
||||||
class MiddlewareTypes(object):
|
class MiddlewareTypes(object):
|
||||||
LSTM = 1
|
LSTM = 1
|
||||||
FC = 2
|
FC = 2
|
||||||
@@ -82,7 +88,8 @@ class AgentParameters(Parameters):
|
|||||||
middleware_type = MiddlewareTypes.FC
|
middleware_type = MiddlewareTypes.FC
|
||||||
loss_weights = [1.0]
|
loss_weights = [1.0]
|
||||||
stop_gradients_from_head = [False]
|
stop_gradients_from_head = [False]
|
||||||
embedder_complexity = EmbedderComplexity.Shallow
|
embedder_depth = EmbedderDepth.Shallow
|
||||||
|
embedder_width = EmbedderWidth.Wide
|
||||||
num_output_head_copies = 1
|
num_output_head_copies = 1
|
||||||
use_measurements = False
|
use_measurements = False
|
||||||
use_accumulated_reward_as_measurement = False
|
use_accumulated_reward_as_measurement = False
|
||||||
|
|||||||
@@ -128,11 +128,14 @@ if __name__ == "__main__":
|
|||||||
def init_fn(scaffold, session):
|
def init_fn(scaffold, session):
|
||||||
session.run(init_all_op)
|
session.run(init_all_op)
|
||||||
|
|
||||||
|
|
||||||
|
#saver = tf.train.Saver(max_to_keep=None) # uncomment to unlimit number of stored checkpoints
|
||||||
scaffold = tf.train.Scaffold(init_op=init_all_op,
|
scaffold = tf.train.Scaffold(init_op=init_all_op,
|
||||||
init_fn=init_fn,
|
init_fn=init_fn,
|
||||||
ready_op=ready_op,
|
ready_op=ready_op,
|
||||||
ready_for_local_init_op=ready_for_local_init_op,
|
ready_for_local_init_op=ready_for_local_init_op,
|
||||||
local_init_op=local_init_op)
|
local_init_op=local_init_op)
|
||||||
|
#saver=saver) # uncomment to unlimit number of stored checkpoints
|
||||||
|
|
||||||
# Due to awkward tensorflow behavior where the same variable is used to decide whether to restore a model
|
# Due to awkward tensorflow behavior where the same variable is used to decide whether to restore a model
|
||||||
# (and where from), or just save the model (and where to), we employ the below. In case where a restore folder
|
# (and where from), or just save the model (and where to), we employ the below. In case where a restore folder
|
||||||
@@ -156,6 +159,10 @@ if __name__ == "__main__":
|
|||||||
tuning_parameters.sess = sess
|
tuning_parameters.sess = sess
|
||||||
for network in agent.networks:
|
for network in agent.networks:
|
||||||
network.set_session(sess)
|
network.set_session(sess)
|
||||||
|
# if hasattr(network.global_network, 'lock_init'):
|
||||||
|
# sess.run(network.global_network.lock_init)
|
||||||
|
# if hasattr(network.global_network, 'release_init'):
|
||||||
|
# sess.run(network.global_network.release_init)
|
||||||
|
|
||||||
if tuning_parameters.visualization.tensorboard:
|
if tuning_parameters.visualization.tensorboard:
|
||||||
# Write the merged summaries to the current experiment directory
|
# Write the merged summaries to the current experiment directory
|
||||||
|
|||||||
11
presets.py
11
presets.py
@@ -664,8 +664,11 @@ class Humanoid_ClippedPPO(Preset):
|
|||||||
def __init__(self):
|
def __init__(self):
|
||||||
Preset.__init__(self, ClippedPPO, GymVectorObservation, ExplorationParameters)
|
Preset.__init__(self, ClippedPPO, GymVectorObservation, ExplorationParameters)
|
||||||
self.env.level = 'Humanoid-v1'
|
self.env.level = 'Humanoid-v1'
|
||||||
self.learning_rate = 0.0001
|
self.agent.embedder_width = EmbedderWidth.Narrow
|
||||||
|
self.learning_rate = 0.00001
|
||||||
self.num_heatup_steps = 0
|
self.num_heatup_steps = 0
|
||||||
|
self.evaluation_episodes = 1
|
||||||
|
self.evaluate_every_x_episodes = 1
|
||||||
self.agent.num_consecutive_training_steps = 1
|
self.agent.num_consecutive_training_steps = 1
|
||||||
self.agent.num_consecutive_playing_steps = 2048
|
self.agent.num_consecutive_playing_steps = 2048
|
||||||
self.agent.discount = 0.99
|
self.agent.discount = 0.99
|
||||||
@@ -1337,7 +1340,7 @@ class Breakout_A3C(Preset):
|
|||||||
class Carla_A3C(Preset):
|
class Carla_A3C(Preset):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
Preset.__init__(self, ActorCritic, Carla, EntropyExploration)
|
Preset.__init__(self, ActorCritic, Carla, EntropyExploration)
|
||||||
self.agent.embedder_complexity = EmbedderComplexity.Deep
|
self.agent.embedder_complexity = EmbedderDepth.Deep
|
||||||
self.agent.policy_gradient_rescaler = 'GAE'
|
self.agent.policy_gradient_rescaler = 'GAE'
|
||||||
self.learning_rate = 0.0001
|
self.learning_rate = 0.0001
|
||||||
self.num_heatup_steps = 0
|
self.num_heatup_steps = 0
|
||||||
@@ -1354,7 +1357,7 @@ class Carla_A3C(Preset):
|
|||||||
class Carla_DDPG(Preset):
|
class Carla_DDPG(Preset):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
Preset.__init__(self, DDPG, Carla, OUExploration)
|
Preset.__init__(self, DDPG, Carla, OUExploration)
|
||||||
self.agent.embedder_complexity = EmbedderComplexity.Deep
|
self.agent.embedder_complexity = EmbedderDepth.Deep
|
||||||
self.learning_rate = 0.0001
|
self.learning_rate = 0.0001
|
||||||
self.num_heatup_steps = 1000
|
self.num_heatup_steps = 1000
|
||||||
self.agent.num_consecutive_training_steps = 5
|
self.agent.num_consecutive_training_steps = 5
|
||||||
@@ -1363,7 +1366,7 @@ class Carla_DDPG(Preset):
|
|||||||
class Carla_BC(Preset):
|
class Carla_BC(Preset):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
Preset.__init__(self, BC, Carla, ExplorationParameters)
|
Preset.__init__(self, BC, Carla, ExplorationParameters)
|
||||||
self.agent.embedder_complexity = EmbedderComplexity.Deep
|
self.agent.embedder_complexity = EmbedderDepth.Deep
|
||||||
self.agent.load_memory_from_file_path = 'datasets/carla_town1.p'
|
self.agent.load_memory_from_file_path = 'datasets/carla_town1.p'
|
||||||
self.learning_rate = 0.0005
|
self.learning_rate = 0.0005
|
||||||
self.num_heatup_steps = 0
|
self.num_heatup_steps = 0
|
||||||
|
|||||||
Reference in New Issue
Block a user