mirror of
https://github.com/gryf/coach.git
synced 2025-12-18 03:30:19 +01:00
network_imporvements branch merge
This commit is contained in:
@@ -22,7 +22,8 @@ from collections import OrderedDict
|
||||
from enum import Enum
|
||||
from typing import Dict, List, Union
|
||||
|
||||
from rl_coach.core_types import TrainingSteps, EnvironmentSteps, GradientClippingMethod
|
||||
from rl_coach.core_types import TrainingSteps, EnvironmentSteps, GradientClippingMethod, RunPhase
|
||||
# from rl_coach.environments.environment import SelectedPhaseOnlyDumpMethod, MaxDumpMethod
|
||||
from rl_coach.filters.filter import NoInputFilter
|
||||
|
||||
|
||||
@@ -173,7 +174,71 @@ class PresetValidationParameters(Parameters):
|
||||
|
||||
|
||||
class NetworkParameters(Parameters):
|
||||
def __init__(self):
|
||||
def __init__(self,
|
||||
force_cpu = False,
|
||||
async_training = False,
|
||||
shared_optimizer = True,
|
||||
scale_down_gradients_by_number_of_workers_for_sync_training = True,
|
||||
clip_gradients = None,
|
||||
gradients_clipping_method = GradientClippingMethod.ClipByGlobalNorm,
|
||||
l2_regularization = 0,
|
||||
learning_rate = 0.00025,
|
||||
learning_rate_decay_rate = 0,
|
||||
learning_rate_decay_steps = 0,
|
||||
input_embedders_parameters = {},
|
||||
embedding_merger_type = EmbeddingMergerType.Concat,
|
||||
middleware_parameters = None,
|
||||
heads_parameters = [],
|
||||
use_separate_networks_per_head = False,
|
||||
optimizer_type = 'Adam',
|
||||
optimizer_epsilon = 0.0001,
|
||||
adam_optimizer_beta1 = 0.9,
|
||||
adam_optimizer_beta2 = 0.99,
|
||||
rms_prop_optimizer_decay = 0.9,
|
||||
batch_size = 32,
|
||||
replace_mse_with_huber_loss = False,
|
||||
create_target_network = False,
|
||||
tensorflow_support = True):
|
||||
"""
|
||||
:param force_cpu:
|
||||
Force the neural networks to run on the CPU even if a GPU is available
|
||||
:param async_training:
|
||||
If set to True, asynchronous training will be used, meaning that each workers will progress in its own
|
||||
speed, while not waiting for the rest of the workers to calculate their gradients.
|
||||
:param shared_optimizer:
|
||||
If set to True, a central optimizer which will be shared with all the workers will be used for applying
|
||||
gradients to the network. Otherwise, each worker will have its own optimizer with its own internal
|
||||
parameters that will only be affected by the gradients calculated by that worker
|
||||
:param scale_down_gradients_by_number_of_workers_for_sync_training:
|
||||
If set to True, in synchronous training, the gradients of each worker will be scaled down by the
|
||||
number of workers. This essentially means that the gradients applied to the network are the average
|
||||
of the gradients over all the workers.
|
||||
:param clip_gradients:
|
||||
A value that will be used for clipping the gradients of the network. If set to None, no gradient clipping
|
||||
will be applied. Otherwise, the gradients will be clipped according to the gradients_clipping_method.
|
||||
:param gradients_clipping_method:
|
||||
A gradient clipping method, defined by a GradientClippingMethod enum, and that will be used to clip the
|
||||
gradients of the network. This will only be used if the clip_gradients value is defined as a value other
|
||||
than None.
|
||||
:param l2_regularization:
|
||||
:param learning_rate:
|
||||
:param learning_rate_decay_rate:
|
||||
:param learning_rate_decay_steps:
|
||||
:param input_embedders_parameters:
|
||||
:param embedding_merger_type:
|
||||
:param middleware_parameters:
|
||||
:param heads_parameters:
|
||||
:param use_separate_networks_per_head:
|
||||
:param optimizer_type:
|
||||
:param optimizer_epsilon:
|
||||
:param adam_optimizer_beta1:
|
||||
:param adam_optimizer_beta2:
|
||||
:param rms_prop_optimizer_decay:
|
||||
:param batch_size:
|
||||
:param replace_mse_with_huber_loss:
|
||||
:param create_target_network:
|
||||
:param tensorflow_support:
|
||||
"""
|
||||
super().__init__()
|
||||
self.framework = Frameworks.tensorflow
|
||||
self.sess = None
|
||||
@@ -182,9 +247,6 @@ class NetworkParameters(Parameters):
|
||||
self.force_cpu = False
|
||||
|
||||
# distributed training options
|
||||
self.num_threads = 1
|
||||
self.synchronize_over_num_threads = 1
|
||||
self.distributed = False
|
||||
self.async_training = False
|
||||
self.shared_optimizer = True
|
||||
self.scale_down_gradients_by_number_of_workers_for_sync_training = True
|
||||
@@ -192,7 +254,6 @@ class NetworkParameters(Parameters):
|
||||
# regularization
|
||||
self.clip_gradients = None
|
||||
self.gradients_clipping_method = GradientClippingMethod.ClipByGlobalNorm
|
||||
self.kl_divergence_constraint = None
|
||||
self.l2_regularization = 0
|
||||
|
||||
# learning rate
|
||||
@@ -205,9 +266,6 @@ class NetworkParameters(Parameters):
|
||||
self.embedding_merger_type = EmbeddingMergerType.Concat
|
||||
self.middleware_parameters = None
|
||||
self.heads_parameters = []
|
||||
self.num_output_head_copies = 1
|
||||
self.loss_weights = []
|
||||
self.rescale_gradient_from_head_by_factor = [1]
|
||||
self.use_separate_networks_per_head = False
|
||||
self.optimizer_type = 'Adam'
|
||||
self.optimizer_epsilon = 0.0001
|
||||
@@ -227,35 +285,113 @@ class NetworkComponentParameters(Parameters):
|
||||
self.dense_layer = dense_layer
|
||||
|
||||
|
||||
|
||||
class VisualizationParameters(Parameters):
|
||||
def __init__(self):
|
||||
def __init__(self,
|
||||
print_networks_summary=False,
|
||||
dump_csv=True,
|
||||
dump_signals_to_csv_every_x_episodes=5,
|
||||
dump_gifs=False,
|
||||
dump_mp4=False,
|
||||
video_dump_methods=[],
|
||||
dump_in_episode_signals=False,
|
||||
dump_parameters_documentation=True,
|
||||
render=False,
|
||||
native_rendering=False,
|
||||
max_fps_for_human_control=10,
|
||||
tensorboard=False,
|
||||
add_rendered_image_to_env_response=False):
|
||||
"""
|
||||
:param print_networks_summary:
|
||||
If set to True, a summary of all the networks structure will be printed at the beginning of the experiment
|
||||
:param dump_csv:
|
||||
If set to True, the logger will dump logs to a csv file once in every dump_signals_to_csv_every_x_episodes
|
||||
episodes. The logs can be later used to visualize the training process using Coach Dashboard.
|
||||
:param dump_signals_to_csv_every_x_episodes:
|
||||
Defines the number of episodes between writing new data to the csv log files. Lower values can affect
|
||||
performance, as writing to disk may take time, and it is done synchronously.
|
||||
:param dump_gifs:
|
||||
If set to True, GIF videos of the environment will be stored into the experiment directory according to
|
||||
the filters defined in video_dump_methods.
|
||||
:param dump_mp4:
|
||||
If set to True, MP4 videos of the environment will be stored into the experiment directory according to
|
||||
the filters defined in video_dump_methods.
|
||||
:param dump_in_episode_signals:
|
||||
If set to True, csv files will be dumped for each episode for inspecting different metrics within the
|
||||
episode. This means that for each step in each episode, different metrics such as the reward, the
|
||||
future return, etc. will be saved. Setting this to True may affect performance severely, and therefore
|
||||
this should be used only for debugging purposes.
|
||||
:param dump_parameters_documentation:
|
||||
If set to True, a json file containing all the agent parameters will be saved in the experiment directory.
|
||||
This may be very useful for inspecting the values defined for each parameters and making sure that all
|
||||
the parameters are defined as expected.
|
||||
:param render:
|
||||
If set to True, the environment render function will be called for each step, rendering the image of the
|
||||
environment. This may affect the performance of training, and is highly dependent on the environment.
|
||||
By default, Coach uses PyGame to render the environment image instead of the environment specific rendered.
|
||||
To change this, use the native_rendering flag.
|
||||
:param native_rendering:
|
||||
If set to True, the environment native renderer will be used for rendering the environment image.
|
||||
In some cases this can be slower than rendering using PyGame through Coach, but in other cases the
|
||||
environment opens its native renderer by default, so rendering with PyGame is an unnecessary overhead.
|
||||
:param max_fps_for_human_control:
|
||||
The maximum number of frames per second used while playing the environment as a human. This only has
|
||||
effect while using the --play flag for Coach.
|
||||
:param tensorboard:
|
||||
If set to True, TensorBoard summaries will be stored in the experiment directory. This can later be
|
||||
loaded in TensorBoard in order to visualize the training process.
|
||||
:param video_dump_methods:
|
||||
A list of dump methods that will be used as filters for deciding when to save videos.
|
||||
The filters in the list will be checked one after the other until the first dump method that returns
|
||||
false for should_dump() in the environment class. This list will only be used if dump_mp4 or dump_gif are
|
||||
set to True.
|
||||
:param add_rendered_image_to_env_response:
|
||||
Some environments have a different observation compared to the one displayed while rendering.
|
||||
For some cases it can be useful to pass the rendered image to the agent for visualization purposes.
|
||||
If this flag is set to True, the rendered image will be added to the environment EnvResponse object,
|
||||
which will be passed to the agent and allow using those images.
|
||||
"""
|
||||
super().__init__()
|
||||
# Visualization parameters
|
||||
self.print_summary = True
|
||||
self.dump_csv = True
|
||||
self.dump_gifs = False
|
||||
self.dump_mp4 = False
|
||||
self.dump_signals_to_csv_every_x_episodes = 5
|
||||
self.dump_in_episode_signals = False
|
||||
self.dump_parameters_documentation = True
|
||||
self.render = False
|
||||
self.native_rendering = False
|
||||
self.max_fps_for_human_control = 10
|
||||
self.tensorboard = False
|
||||
self.video_dump_methods = [] # a list of dump methods which will be checked one after the other until the first
|
||||
# dump method that returns false for should_dump()
|
||||
self.add_rendered_image_to_env_response = False
|
||||
self.print_networks_summary = print_networks_summary
|
||||
self.dump_csv = dump_csv
|
||||
self.dump_gifs = dump_gifs
|
||||
self.dump_mp4 = dump_mp4
|
||||
self.dump_signals_to_csv_every_x_episodes = dump_signals_to_csv_every_x_episodes
|
||||
self.dump_in_episode_signals = dump_in_episode_signals
|
||||
self.dump_parameters_documentation = dump_parameters_documentation
|
||||
self.render = render
|
||||
self.native_rendering = native_rendering
|
||||
self.max_fps_for_human_control = max_fps_for_human_control
|
||||
self.tensorboard = tensorboard
|
||||
self.video_dump_methods = video_dump_methods
|
||||
self.add_rendered_image_to_env_response = add_rendered_image_to_env_response
|
||||
|
||||
|
||||
class AgentParameters(Parameters):
|
||||
def __init__(self, algorithm: AlgorithmParameters, exploration: 'ExplorationParameters', memory: 'MemoryParameters',
|
||||
networks: Dict[str, NetworkParameters], visualization: VisualizationParameters=VisualizationParameters()):
|
||||
"""
|
||||
:param algorithm: the algorithmic parameters
|
||||
:param exploration: the exploration policy parameters
|
||||
:param memory: the memory module parameters
|
||||
:param networks: the parameters for the networks of the agent
|
||||
:param visualization: the visualization parameters
|
||||
:param algorithm:
|
||||
A class inheriting AlgorithmParameters.
|
||||
The parameters used for the specific algorithm used by the agent.
|
||||
These parameters can be later referenced in the agent implementation through self.ap.algorithm.
|
||||
:param exploration:
|
||||
Either a class inheriting ExplorationParameters or a dictionary mapping between action
|
||||
space types and their corresponding ExplorationParameters. If a dictionary was used,
|
||||
when the agent will be instantiated, the correct exploration policy parameters will be used
|
||||
according to the real type of the environment action space.
|
||||
These parameters will be used to instantiate the exporation policy.
|
||||
:param memory:
|
||||
A class inheriting MemoryParameters. It defines all the parameters used by the memory module.
|
||||
:param networks:
|
||||
A dictionary mapping between network names and their corresponding network parmeters, defined
|
||||
as a class inheriting NetworkParameters. Each element will be used in order to instantiate
|
||||
a NetworkWrapper class, and all the network wrappers will be stored in the agent under
|
||||
self.network_wrappers. self.network_wrappers is a dict mapping between the network name that
|
||||
was given in the networks dict, and the instantiated network wrapper.
|
||||
:param visualization:
|
||||
A class inheriting VisualizationParameters and defining various parameters that can be
|
||||
used for visualization purposes, such as printing to the screen, rendering, and saving videos.
|
||||
"""
|
||||
super().__init__()
|
||||
self.visualization = visualization
|
||||
@@ -278,13 +414,14 @@ class AgentParameters(Parameters):
|
||||
|
||||
|
||||
class TaskParameters(Parameters):
|
||||
def __init__(self, framework_type: str, evaluate_only: bool=False, use_cpu: bool=False, experiment_path=None,
|
||||
seed=None):
|
||||
def __init__(self, framework_type: str="tensorflow", evaluate_only: bool=False, use_cpu: bool=False,
|
||||
experiment_path="./experiments/test/", seed=None, save_checkpoint_secs=None):
|
||||
"""
|
||||
:param framework_type: deep learning framework type. currently only tensorflow is supported
|
||||
:param evaluate_only: the task will be used only for evaluating the model
|
||||
:param use_cpu: use the cpu for this task
|
||||
:param experiment_path: the path to the directory which will store all the experiment outputs
|
||||
:param save_checkpoint_secs: the number of seconds between each checkpoint saving
|
||||
:param seed: a seed to use for the random numbers generator
|
||||
"""
|
||||
self.framework_type = framework_type
|
||||
@@ -292,6 +429,7 @@ class TaskParameters(Parameters):
|
||||
self.evaluate_only = evaluate_only
|
||||
self.use_cpu = use_cpu
|
||||
self.experiment_path = experiment_path
|
||||
self.save_checkpoint_secs = save_checkpoint_secs
|
||||
self.seed = seed
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user