network_imporvements branch merge

2026-02-17 06:35:47 +01:00 · 2018-10-02 13:41:46 +03:00
parent 72ea933384
commit 51726a5b80
110 changed files with 1639 additions and 1161 deletions
--- a/rl_coach/base_parameters.py
+++ b/rl_coach/base_parameters.py
@@ -22,7 +22,8 @@ from collections import OrderedDict
 from enum import Enum
 from typing import Dict, List, Union

-from rl_coach.core_types import TrainingSteps, EnvironmentSteps, GradientClippingMethod
+from rl_coach.core_types import TrainingSteps, EnvironmentSteps, GradientClippingMethod, RunPhase
+# from rl_coach.environments.environment import SelectedPhaseOnlyDumpMethod, MaxDumpMethod
 from rl_coach.filters.filter import NoInputFilter


@@ -173,7 +174,71 @@ class PresetValidationParameters(Parameters):


 class NetworkParameters(Parameters):
-    def __init__(self):
+    def __init__(self,
+                 force_cpu = False,
+                 async_training = False,
+                 shared_optimizer = True,
+                 scale_down_gradients_by_number_of_workers_for_sync_training = True,
+                 clip_gradients = None,
+                 gradients_clipping_method = GradientClippingMethod.ClipByGlobalNorm,
+                 l2_regularization = 0,
+                 learning_rate = 0.00025,
+                 learning_rate_decay_rate = 0,
+                 learning_rate_decay_steps = 0,
+                 input_embedders_parameters = {},
+                 embedding_merger_type = EmbeddingMergerType.Concat,
+                 middleware_parameters = None,
+                 heads_parameters = [],
+                 use_separate_networks_per_head = False,
+                 optimizer_type = 'Adam',
+                 optimizer_epsilon = 0.0001,
+                 adam_optimizer_beta1 = 0.9,
+                 adam_optimizer_beta2 = 0.99,
+                 rms_prop_optimizer_decay = 0.9,
+                 batch_size = 32,
+                 replace_mse_with_huber_loss = False,
+                 create_target_network = False,
+                 tensorflow_support = True):
+        """
+        :param force_cpu:
+            Force the neural networks to run on the CPU even if a GPU is available
+        :param async_training:
+            If set to True, asynchronous training will be used, meaning that each workers will progress in its own
+            speed, while not waiting for the rest of the workers to calculate their gradients.
+        :param shared_optimizer:
+            If set to True, a central optimizer which will be shared with all the workers will be used for applying
+            gradients to the network. Otherwise, each worker will have its own optimizer with its own internal
+            parameters that will only be affected by the gradients calculated by that worker
+        :param scale_down_gradients_by_number_of_workers_for_sync_training:
+            If set to True, in synchronous training, the gradients of each worker will be scaled down by the
+            number of workers. This essentially means that the gradients applied to the network are the average
+            of the gradients over all the workers.
+        :param clip_gradients:
+            A value that will be used for clipping the gradients of the network. If set to None, no gradient clipping
+            will be applied. Otherwise, the gradients will be clipped according to the gradients_clipping_method.
+        :param gradients_clipping_method:
+            A gradient clipping method, defined by a GradientClippingMethod enum, and that will be used to clip the
+            gradients of the network. This will only be used if the clip_gradients value is defined as a value other
+            than None.
+        :param l2_regularization:
+        :param learning_rate:
+        :param learning_rate_decay_rate:
+        :param learning_rate_decay_steps:
+        :param input_embedders_parameters:
+        :param embedding_merger_type:
+        :param middleware_parameters:
+        :param heads_parameters:
+        :param use_separate_networks_per_head:
+        :param optimizer_type:
+        :param optimizer_epsilon:
+        :param adam_optimizer_beta1:
+        :param adam_optimizer_beta2:
+        :param rms_prop_optimizer_decay:
+        :param batch_size:
+        :param replace_mse_with_huber_loss:
+        :param create_target_network:
+        :param tensorflow_support:
+        """
        super().__init__()
        self.framework = Frameworks.tensorflow
        self.sess = None
@@ -182,9 +247,6 @@ class NetworkParameters(Parameters):
        self.force_cpu = False

        # distributed training options
-        self.num_threads = 1
-        self.synchronize_over_num_threads = 1
-        self.distributed = False
        self.async_training = False
        self.shared_optimizer = True
        self.scale_down_gradients_by_number_of_workers_for_sync_training = True
@@ -192,7 +254,6 @@ class NetworkParameters(Parameters):
        # regularization
        self.clip_gradients = None
        self.gradients_clipping_method = GradientClippingMethod.ClipByGlobalNorm
-        self.kl_divergence_constraint = None
        self.l2_regularization = 0

        # learning rate
@@ -205,9 +266,6 @@ class NetworkParameters(Parameters):
        self.embedding_merger_type = EmbeddingMergerType.Concat
        self.middleware_parameters = None
        self.heads_parameters = []
-        self.num_output_head_copies = 1
-        self.loss_weights = []
-        self.rescale_gradient_from_head_by_factor = [1]
        self.use_separate_networks_per_head = False
        self.optimizer_type = 'Adam'
        self.optimizer_epsilon = 0.0001
@@ -227,35 +285,113 @@ class NetworkComponentParameters(Parameters):
        self.dense_layer = dense_layer


+
 class VisualizationParameters(Parameters):
-    def __init__(self):
+    def __init__(self,
+                 print_networks_summary=False,
+                 dump_csv=True,
+                 dump_signals_to_csv_every_x_episodes=5,
+                 dump_gifs=False,
+                 dump_mp4=False,
+                 video_dump_methods=[],
+                 dump_in_episode_signals=False,
+                 dump_parameters_documentation=True,
+                 render=False,
+                 native_rendering=False,
+                 max_fps_for_human_control=10,
+                 tensorboard=False,
+                 add_rendered_image_to_env_response=False):
+        """
+        :param print_networks_summary:
+            If set to True, a summary of all the networks structure will be printed at the beginning of the experiment
+        :param dump_csv:
+            If set to True, the logger will dump logs to a csv file once in every dump_signals_to_csv_every_x_episodes
+            episodes. The logs can be later used to visualize the training process using Coach Dashboard.
+        :param dump_signals_to_csv_every_x_episodes:
+            Defines the number of episodes between writing new data to the csv log files. Lower values can affect
+            performance, as writing to disk may take time, and it is done synchronously.
+        :param dump_gifs:
+            If set to True, GIF videos of the environment will be stored into the experiment directory according to
+            the filters defined in video_dump_methods.
+        :param dump_mp4:
+            If set to True, MP4 videos of the environment will be stored into the experiment directory according to
+            the filters defined in video_dump_methods.
+        :param dump_in_episode_signals:
+            If set to True, csv files will be dumped for each episode for inspecting different metrics within the
+            episode. This means that for each step in each episode, different metrics such as the reward, the
+            future return, etc. will be saved. Setting this to True may affect performance severely, and therefore
+            this should be used only for debugging purposes.
+        :param dump_parameters_documentation:
+            If set to True, a json file containing all the agent parameters will be saved in the experiment directory.
+            This may be very useful for inspecting the values defined for each parameters and making sure that all
+            the parameters are defined as expected.
+        :param render:
+            If set to True, the environment render function will be called for each step, rendering the image of the
+            environment. This may affect the performance of training, and is highly dependent on the environment.
+            By default, Coach uses PyGame to render the environment image instead of the environment specific rendered.
+            To change this, use the native_rendering flag.
+        :param native_rendering:
+            If set to True, the environment native renderer will be used for rendering the environment image.
+            In some cases this can be slower than rendering using PyGame through Coach, but in other cases the
+            environment opens its native renderer by default, so rendering with PyGame is an unnecessary overhead.
+        :param max_fps_for_human_control:
+            The maximum number of frames per second used while playing the environment as a human. This only has
+            effect while using the --play flag for Coach.
+        :param tensorboard:
+            If set to True, TensorBoard summaries will be stored in the experiment directory. This can later be
+            loaded in TensorBoard in order to visualize the training process.
+        :param video_dump_methods:
+            A list of dump methods that will be used as filters for deciding when to save videos.
+            The filters in the list will be checked one after the other until the first dump method that returns
+            false for should_dump() in the environment class. This list will only be used if dump_mp4 or dump_gif are
+            set to True.
+        :param add_rendered_image_to_env_response:
+            Some environments have a different observation compared to the one displayed while rendering.
+            For some cases it can be useful to pass the rendered image to the agent for visualization purposes.
+            If this flag is set to True, the rendered image will be added to the environment EnvResponse object,
+            which will be passed to the agent and allow using those images.
+        """
        super().__init__()
-        # Visualization parameters
-        self.print_summary = True
-        self.dump_csv = True
-        self.dump_gifs = False
-        self.dump_mp4 = False
-        self.dump_signals_to_csv_every_x_episodes = 5
-        self.dump_in_episode_signals = False
-        self.dump_parameters_documentation = True
-        self.render = False
-        self.native_rendering = False
-        self.max_fps_for_human_control = 10
-        self.tensorboard = False
-        self.video_dump_methods = []  # a list of dump methods which will be checked one after the other until the first
-                                      # dump method that returns false for should_dump()
-        self.add_rendered_image_to_env_response = False
+        self.print_networks_summary = print_networks_summary
+        self.dump_csv = dump_csv
+        self.dump_gifs = dump_gifs
+        self.dump_mp4 = dump_mp4
+        self.dump_signals_to_csv_every_x_episodes = dump_signals_to_csv_every_x_episodes
+        self.dump_in_episode_signals = dump_in_episode_signals
+        self.dump_parameters_documentation = dump_parameters_documentation
+        self.render = render
+        self.native_rendering = native_rendering
+        self.max_fps_for_human_control = max_fps_for_human_control
+        self.tensorboard = tensorboard
+        self.video_dump_methods = video_dump_methods
+        self.add_rendered_image_to_env_response = add_rendered_image_to_env_response


 class AgentParameters(Parameters):
    def __init__(self, algorithm: AlgorithmParameters, exploration: 'ExplorationParameters', memory: 'MemoryParameters',
                 networks: Dict[str, NetworkParameters], visualization: VisualizationParameters=VisualizationParameters()):
        """
-        :param algorithm: the algorithmic parameters
-        :param exploration: the exploration policy parameters
-        :param memory: the memory module parameters
-        :param networks: the parameters for the networks of the agent
-        :param visualization: the visualization parameters
+        :param algorithm:
+            A class inheriting AlgorithmParameters.
+            The parameters used for the specific algorithm used by the agent.
+            These parameters can be later referenced in the agent implementation through self.ap.algorithm.
+        :param exploration:
+            Either a class inheriting ExplorationParameters or a dictionary mapping between action
+            space types and their corresponding ExplorationParameters. If a dictionary was used,
+            when the agent will be instantiated, the correct exploration policy parameters will be used
+            according to the real type of the environment action space.
+            These parameters will be used to instantiate the exporation policy.
+        :param memory:
+            A class inheriting MemoryParameters. It defines all the parameters used by the memory module.
+        :param networks:
+            A dictionary mapping between network names and their corresponding network parmeters, defined
+            as a class inheriting NetworkParameters. Each element will be used in order to instantiate
+            a NetworkWrapper class, and all the network wrappers will be stored in the agent under
+            self.network_wrappers. self.network_wrappers is a dict mapping between the network name that
+            was given in the networks dict, and the instantiated network wrapper.
+        :param visualization:
+            A class inheriting VisualizationParameters and defining various parameters that can be
+            used for visualization purposes, such as printing to the screen, rendering, and saving videos.
        """
        super().__init__()
        self.visualization = visualization
@@ -278,13 +414,14 @@ class AgentParameters(Parameters):


 class TaskParameters(Parameters):
-    def __init__(self, framework_type: str, evaluate_only: bool=False, use_cpu: bool=False, experiment_path=None,
-                 seed=None):
+    def __init__(self, framework_type: str="tensorflow", evaluate_only: bool=False, use_cpu: bool=False,
+                 experiment_path="./experiments/test/", seed=None, save_checkpoint_secs=None):
        """
        :param framework_type: deep learning framework type. currently only tensorflow is supported
        :param evaluate_only: the task will be used only for evaluating the model
        :param use_cpu: use the cpu for this task
        :param experiment_path: the path to the directory which will store all the experiment outputs
+        :param save_checkpoint_secs: the number of seconds between each checkpoint saving
        :param seed: a seed to use for the random numbers generator
        """
        self.framework_type = framework_type
@@ -292,6 +429,7 @@ class TaskParameters(Parameters):
        self.evaluate_only = evaluate_only
        self.use_cpu = use_cpu
        self.experiment_path = experiment_path
+        self.save_checkpoint_secs = save_checkpoint_secs
        self.seed = seed