mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 19:20:19 +01:00
351 lines
14 KiB
Python
351 lines
14 KiB
Python
#
|
|
# Copyright (c) 2017 Intel Corporation
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
import inspect
|
|
import json
|
|
import os
|
|
import sys
|
|
import types
|
|
from collections import OrderedDict
|
|
from enum import Enum
|
|
from typing import Dict, List, Union
|
|
|
|
from rl_coach.core_types import TrainingSteps, EnvironmentSteps, GradientClippingMethod
|
|
from rl_coach.filters.filter import NoInputFilter
|
|
|
|
|
|
class Frameworks(Enum):
|
|
tensorflow = "TensorFlow"
|
|
|
|
|
|
class EmbedderScheme(Enum):
|
|
Empty = "Empty"
|
|
Shallow = "Shallow"
|
|
Medium = "Medium"
|
|
Deep = "Deep"
|
|
|
|
|
|
class MiddlewareScheme(Enum):
|
|
Empty = "Empty"
|
|
Shallow = "Shallow"
|
|
Medium = "Medium"
|
|
Deep = "Deep"
|
|
|
|
|
|
class EmbeddingMergerType(Enum):
|
|
Concat = 0
|
|
Sum = 1
|
|
#ConcatDepthWise = 2
|
|
#Multiply = 3
|
|
|
|
|
|
def iterable_to_items(obj):
|
|
if isinstance(obj, dict) or isinstance(obj, OrderedDict) or isinstance(obj, types.MappingProxyType):
|
|
items = obj.items()
|
|
elif isinstance(obj, list):
|
|
items = enumerate(obj)
|
|
else:
|
|
raise ValueError("The given object is not a dict or a list")
|
|
return items
|
|
|
|
|
|
def unfold_dict_or_list(obj: Union[Dict, List, OrderedDict]):
|
|
"""
|
|
Recursively unfolds all the parameters in dictionaries and lists
|
|
:param obj: a dictionary or list to unfold
|
|
:return: the unfolded parameters dictionary
|
|
"""
|
|
parameters = OrderedDict()
|
|
items = iterable_to_items(obj)
|
|
for k, v in items:
|
|
if isinstance(v, dict) or isinstance(v, list) or isinstance(v, OrderedDict):
|
|
if 'tensorflow.' not in str(v.__class__):
|
|
parameters[k] = unfold_dict_or_list(v)
|
|
elif 'tensorflow.' in str(v.__class__):
|
|
parameters[k] = v
|
|
elif hasattr(v, '__dict__'):
|
|
sub_params = v.__dict__
|
|
if '__objclass__' not in sub_params.keys():
|
|
try:
|
|
parameters[k] = unfold_dict_or_list(sub_params)
|
|
except RecursionError:
|
|
parameters[k] = sub_params
|
|
parameters[k]['__class__'] = v.__class__.__name__
|
|
else:
|
|
# unfolding this type of object will result in infinite recursion
|
|
parameters[k] = sub_params
|
|
else:
|
|
parameters[k] = v
|
|
if not isinstance(obj, OrderedDict) and not isinstance(obj, list):
|
|
parameters = OrderedDict(sorted(parameters.items()))
|
|
return parameters
|
|
|
|
|
|
class Parameters(object):
|
|
def __setattr__(self, key, value):
|
|
caller_name = sys._getframe(1).f_code.co_name
|
|
|
|
if caller_name != '__init__' and not hasattr(self, key):
|
|
raise TypeError("Parameter '{}' does not exist in {}. Parameters are only to be defined in a constructor of"
|
|
" a class inheriting from Parameters. In order to explicitly register a new parameter "
|
|
"outside of a constructor use register_var().".
|
|
format(key, self.__class__))
|
|
object.__setattr__(self, key, value)
|
|
|
|
@property
|
|
def path(self):
|
|
if hasattr(self, 'parameterized_class_name'):
|
|
module_path = os.path.relpath(inspect.getfile(self.__class__), os.getcwd())[:-3] + '.py'
|
|
|
|
return ':'.join([module_path, self.parameterized_class_name])
|
|
else:
|
|
raise ValueError("The parameters class does not have an attached class it parameterizes. "
|
|
"The self.parameterized_class_name should be set to the parameterized class.")
|
|
|
|
def register_var(self, key, value):
|
|
if hasattr(self, key):
|
|
raise TypeError("Cannot register an already existing parameter '{}'. ".format(key))
|
|
object.__setattr__(self, key, value)
|
|
|
|
def __str__(self):
|
|
result = "\"{}\" {}\n".format(self.__class__.__name__,
|
|
json.dumps(unfold_dict_or_list(self.__dict__), indent=4, default=repr))
|
|
return result
|
|
|
|
|
|
class AlgorithmParameters(Parameters):
|
|
def __init__(self):
|
|
# Architecture parameters
|
|
self.use_accumulated_reward_as_measurement = False
|
|
|
|
# Agent parameters
|
|
self.num_consecutive_playing_steps = EnvironmentSteps(1)
|
|
self.num_consecutive_training_steps = 1 # TODO: update this to TrainingSteps
|
|
|
|
self.heatup_using_network_decisions = False
|
|
self.discount = 0.99
|
|
self.apply_gradients_every_x_episodes = 5
|
|
self.num_steps_between_copying_online_weights_to_target = TrainingSteps(0)
|
|
self.rate_for_copying_weights_to_target = 1.0
|
|
self.load_memory_from_file_path = None
|
|
self.collect_new_data = True
|
|
|
|
# HRL / HER related params
|
|
self.in_action_space = None
|
|
|
|
# distributed agents params
|
|
self.share_statistics_between_workers = True
|
|
|
|
# intrinsic reward
|
|
self.scale_external_reward_by_intrinsic_reward_value = False
|
|
|
|
|
|
class PresetValidationParameters(Parameters):
|
|
def __init__(self):
|
|
super().__init__()
|
|
|
|
# setting a seed will only work for non-parallel algorithms. Parallel algorithms add uncontrollable noise in
|
|
# the form of different workers starting at different times, and getting different assignments of CPU
|
|
# time from the OS.
|
|
|
|
# Testing parameters
|
|
self.test = False
|
|
self.min_reward_threshold = 0
|
|
self.max_episodes_to_achieve_reward = 1
|
|
self.num_workers = 1
|
|
self.reward_test_level = None
|
|
self.trace_test_levels = None
|
|
self.trace_max_env_steps = 5000
|
|
|
|
|
|
class NetworkParameters(Parameters):
|
|
def __init__(self):
|
|
super().__init__()
|
|
self.framework = Frameworks.tensorflow
|
|
self.sess = None
|
|
|
|
# hardware parameters
|
|
self.force_cpu = False
|
|
|
|
# distributed training options
|
|
self.num_threads = 1
|
|
self.synchronize_over_num_threads = 1
|
|
self.distributed = False
|
|
self.async_training = False
|
|
self.shared_optimizer = True
|
|
self.scale_down_gradients_by_number_of_workers_for_sync_training = True
|
|
|
|
# regularization
|
|
self.clip_gradients = None
|
|
self.gradients_clipping_method = GradientClippingMethod.ClipByGlobalNorm
|
|
self.kl_divergence_constraint = None
|
|
self.l2_regularization = 0
|
|
|
|
# learning rate
|
|
self.learning_rate = 0.00025
|
|
self.learning_rate_decay_rate = 0
|
|
self.learning_rate_decay_steps = 0
|
|
|
|
# structure
|
|
self.input_embedders_parameters = []
|
|
self.embedding_merger_type = EmbeddingMergerType.Concat
|
|
self.middleware_parameters = None
|
|
self.heads_parameters = []
|
|
self.num_output_head_copies = 1
|
|
self.loss_weights = []
|
|
self.rescale_gradient_from_head_by_factor = [1]
|
|
self.use_separate_networks_per_head = False
|
|
self.optimizer_type = 'Adam'
|
|
self.optimizer_epsilon = 0.0001
|
|
self.adam_optimizer_beta1 = 0.9
|
|
self.adam_optimizer_beta2 = 0.99
|
|
self.rms_prop_optimizer_decay = 0.9
|
|
self.batch_size = 32
|
|
self.replace_mse_with_huber_loss = False
|
|
self.create_target_network = False
|
|
|
|
# Framework support
|
|
self.tensorflow_support = True
|
|
|
|
|
|
class InputEmbedderParameters(Parameters):
|
|
def __init__(self, activation_function: str='relu', scheme: Union[List, EmbedderScheme]=EmbedderScheme.Medium,
|
|
batchnorm: bool=False, dropout=False, name: str='embedder', input_rescaling=None, input_offset=None,
|
|
input_clipping=None):
|
|
super().__init__()
|
|
self.activation_function = activation_function
|
|
self.scheme = scheme
|
|
self.batchnorm = batchnorm
|
|
self.dropout = dropout
|
|
|
|
if input_rescaling is None:
|
|
input_rescaling = {'image': 255.0, 'vector': 1.0}
|
|
if input_offset is None:
|
|
input_offset = {'image': 0.0, 'vector': 0.0}
|
|
|
|
self.input_rescaling = input_rescaling
|
|
self.input_offset = input_offset
|
|
self.input_clipping = input_clipping
|
|
self.name = name
|
|
|
|
@property
|
|
def path(self):
|
|
return {
|
|
"image": 'image_embedder:ImageEmbedder',
|
|
"vector": 'vector_embedder:VectorEmbedder'
|
|
}
|
|
|
|
|
|
class VisualizationParameters(Parameters):
|
|
def __init__(self):
|
|
super().__init__()
|
|
# Visualization parameters
|
|
self.print_summary = True
|
|
self.dump_csv = True
|
|
self.dump_gifs = False
|
|
self.dump_mp4 = False
|
|
self.dump_signals_to_csv_every_x_episodes = 5
|
|
self.dump_in_episode_signals = False
|
|
self.dump_parameters_documentation = True
|
|
self.render = False
|
|
self.native_rendering = False
|
|
self.max_fps_for_human_control = 10
|
|
self.tensorboard = False
|
|
self.video_dump_methods = [] # a list of dump methods which will be checked one after the other until the first
|
|
# dump method that returns false for should_dump()
|
|
self.add_rendered_image_to_env_response = False
|
|
|
|
|
|
class AgentParameters(Parameters):
|
|
def __init__(self, algorithm: AlgorithmParameters, exploration: 'ExplorationParameters', memory: 'MemoryParameters',
|
|
networks: Dict[str, NetworkParameters], visualization: VisualizationParameters=VisualizationParameters()):
|
|
"""
|
|
:param algorithm: the algorithmic parameters
|
|
:param exploration: the exploration policy parameters
|
|
:param memory: the memory module parameters
|
|
:param networks: the parameters for the networks of the agent
|
|
:param visualization: the visualization parameters
|
|
"""
|
|
super().__init__()
|
|
self.visualization = visualization
|
|
self.algorithm = algorithm
|
|
self.exploration = exploration
|
|
self.memory = memory
|
|
self.network_wrappers = networks
|
|
self.input_filter = None
|
|
self.output_filter = None
|
|
self.pre_network_filter = NoInputFilter()
|
|
self.full_name_id = None # TODO: do we really want to hold this parameters here?
|
|
self.name = None
|
|
self.is_a_highest_level_agent = True
|
|
self.is_a_lowest_level_agent = True
|
|
self.task_parameters = None
|
|
|
|
@property
|
|
def path(self):
|
|
return 'rl_coach.agents.agent:Agent'
|
|
|
|
|
|
class TaskParameters(Parameters):
|
|
def __init__(self, framework_type: str, evaluate_only: bool=False, use_cpu: bool=False, experiment_path=None,
|
|
seed=None):
|
|
"""
|
|
:param framework_type: deep learning framework type. currently only tensorflow is supported
|
|
:param evaluate_only: the task will be used only for evaluating the model
|
|
:param use_cpu: use the cpu for this task
|
|
:param experiment_path: the path to the directory which will store all the experiment outputs
|
|
:param seed: a seed to use for the random numbers generator
|
|
"""
|
|
self.framework_type = framework_type
|
|
self.task_index = None # TODO: not really needed
|
|
self.evaluate_only = evaluate_only
|
|
self.use_cpu = use_cpu
|
|
self.experiment_path = experiment_path
|
|
self.seed = seed
|
|
|
|
|
|
class DistributedTaskParameters(TaskParameters):
|
|
def __init__(self, framework_type: str, parameters_server_hosts: str, worker_hosts: str, job_type: str,
|
|
task_index: int, evaluate_only: bool=False, num_tasks: int=None,
|
|
num_training_tasks: int=None, use_cpu: bool=False, experiment_path=None, dnd=None,
|
|
shared_memory_scratchpad=None, seed=None):
|
|
"""
|
|
:param framework_type: deep learning framework type. currently only tensorflow is supported
|
|
:param evaluate_only: the task will be used only for evaluating the model
|
|
:param parameters_server_hosts: comma-separated list of hostname:port pairs to which the parameter servers are
|
|
assigned
|
|
:param worker_hosts: comma-separated list of hostname:port pairs to which the workers are assigned
|
|
:param job_type: the job type - either ps (short for parameters server) or worker
|
|
:param task_index: the index of the process
|
|
:param num_tasks: the number of total tasks that are running (not including the parameters server)
|
|
:param num_training_tasks: the number of tasks that are training (not including the parameters server)
|
|
:param use_cpu: use the cpu for this task
|
|
:param experiment_path: the path to the directory which will store all the experiment outputs
|
|
:param dnd: an external DND to use for NEC. This is a workaround needed for a shared DND not using the scratchpad.
|
|
:param seed: a seed to use for the random numbers generator
|
|
"""
|
|
super().__init__(framework_type=framework_type, evaluate_only=evaluate_only, use_cpu=use_cpu,
|
|
experiment_path=experiment_path, seed=seed)
|
|
self.parameters_server_hosts = parameters_server_hosts
|
|
self.worker_hosts = worker_hosts
|
|
self.job_type = job_type
|
|
self.task_index = task_index
|
|
self.num_tasks = num_tasks
|
|
self.num_training_tasks = num_training_tasks
|
|
self.device = None # the replicated device which will be used for the global parameters
|
|
self.worker_target = None
|
|
self.dnd = dnd
|
|
self.shared_memory_scratchpad = shared_memory_scratchpad
|