mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 11:10:20 +01:00
Move embedder, middleware, and head parameters to framework agnostic modules. (#45)
Part of #28
This commit is contained in:
committed by
Scott Leishman
parent
16b3e99f37
commit
a888226641
@@ -20,9 +20,9 @@ import numpy as np
|
|||||||
import scipy.signal
|
import scipy.signal
|
||||||
|
|
||||||
from rl_coach.agents.policy_optimization_agent import PolicyOptimizationAgent, PolicyGradientRescaler
|
from rl_coach.agents.policy_optimization_agent import PolicyOptimizationAgent, PolicyGradientRescaler
|
||||||
from rl_coach.architectures.tensorflow_components.heads.policy_head import PolicyHeadParameters
|
from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
|
||||||
from rl_coach.architectures.tensorflow_components.heads.v_head import VHeadParameters
|
from rl_coach.architectures.head_parameters import PolicyHeadParameters, VHeadParameters
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters
|
||||||
from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, \
|
from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, \
|
||||||
AgentParameters
|
AgentParameters
|
||||||
from rl_coach.exploration_policies.categorical import CategoricalParameters
|
from rl_coach.exploration_policies.categorical import CategoricalParameters
|
||||||
@@ -31,7 +31,6 @@ from rl_coach.logger import screen
|
|||||||
from rl_coach.memories.episodic.single_episode_buffer import SingleEpisodeBufferParameters
|
from rl_coach.memories.episodic.single_episode_buffer import SingleEpisodeBufferParameters
|
||||||
from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace
|
from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace
|
||||||
from rl_coach.utils import last_sample
|
from rl_coach.utils import last_sample
|
||||||
from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters
|
|
||||||
|
|
||||||
|
|
||||||
class ActorCriticAlgorithmParameters(AlgorithmParameters):
|
class ActorCriticAlgorithmParameters(AlgorithmParameters):
|
||||||
|
|||||||
@@ -19,13 +19,13 @@ from typing import Union
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from rl_coach.agents.imitation_agent import ImitationAgent
|
from rl_coach.agents.imitation_agent import ImitationAgent
|
||||||
from rl_coach.architectures.tensorflow_components.heads.policy_head import PolicyHeadParameters
|
from rl_coach.architectures.head_parameters import PolicyHeadParameters
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters
|
||||||
|
from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
|
||||||
from rl_coach.base_parameters import AgentParameters, AlgorithmParameters, NetworkParameters, \
|
from rl_coach.base_parameters import AgentParameters, AlgorithmParameters, NetworkParameters, \
|
||||||
MiddlewareScheme
|
MiddlewareScheme
|
||||||
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
||||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
||||||
from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters
|
|
||||||
from rl_coach.memories.non_episodic.experience_replay import ExperienceReplayParameters
|
from rl_coach.memories.non_episodic.experience_replay import ExperienceReplayParameters
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ import numpy as np
|
|||||||
|
|
||||||
from rl_coach.agents.dqn_agent import DQNNetworkParameters, DQNAlgorithmParameters, DQNAgentParameters
|
from rl_coach.agents.dqn_agent import DQNNetworkParameters, DQNAlgorithmParameters, DQNAgentParameters
|
||||||
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
||||||
from rl_coach.architectures.tensorflow_components.heads.categorical_q_head import CategoricalQHeadParameters
|
from rl_coach.architectures.head_parameters import CategoricalQHeadParameters
|
||||||
from rl_coach.base_parameters import AgentParameters
|
from rl_coach.base_parameters import AgentParameters
|
||||||
from rl_coach.core_types import StateType
|
from rl_coach.core_types import StateType
|
||||||
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
||||||
|
|||||||
@@ -17,9 +17,9 @@
|
|||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
from rl_coach.agents.imitation_agent import ImitationAgent
|
from rl_coach.agents.imitation_agent import ImitationAgent
|
||||||
from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters
|
from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
|
||||||
from rl_coach.architectures.tensorflow_components.heads.cil_head import RegressionHeadParameters
|
from rl_coach.architectures.head_parameters import RegressionHeadParameters
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters
|
||||||
from rl_coach.base_parameters import AgentParameters, MiddlewareScheme, NetworkParameters, AlgorithmParameters
|
from rl_coach.base_parameters import AgentParameters, MiddlewareScheme, NetworkParameters, AlgorithmParameters
|
||||||
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
||||||
from rl_coach.memories.non_episodic.balanced_experience_replay import BalancedExperienceReplayParameters
|
from rl_coach.memories.non_episodic.balanced_experience_replay import BalancedExperienceReplayParameters
|
||||||
|
|||||||
@@ -23,12 +23,11 @@ import numpy as np
|
|||||||
|
|
||||||
from rl_coach.agents.actor_critic_agent import ActorCriticAgent
|
from rl_coach.agents.actor_critic_agent import ActorCriticAgent
|
||||||
from rl_coach.agents.policy_optimization_agent import PolicyGradientRescaler
|
from rl_coach.agents.policy_optimization_agent import PolicyGradientRescaler
|
||||||
from rl_coach.architectures.tensorflow_components.heads.ppo_head import PPOHeadParameters
|
from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
|
||||||
from rl_coach.architectures.tensorflow_components.heads.v_head import VHeadParameters
|
from rl_coach.architectures.head_parameters import PPOHeadParameters, VHeadParameters
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters
|
||||||
from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, \
|
from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, \
|
||||||
AgentParameters
|
AgentParameters
|
||||||
from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters
|
|
||||||
from rl_coach.core_types import EnvironmentSteps, Batch, EnvResponse, StateType
|
from rl_coach.core_types import EnvironmentSteps, Batch, EnvResponse, StateType
|
||||||
from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters
|
from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters
|
||||||
from rl_coach.exploration_policies.categorical import CategoricalParameters
|
from rl_coach.exploration_policies.categorical import CategoricalParameters
|
||||||
|
|||||||
@@ -22,10 +22,9 @@ import numpy as np
|
|||||||
|
|
||||||
from rl_coach.agents.actor_critic_agent import ActorCriticAgent
|
from rl_coach.agents.actor_critic_agent import ActorCriticAgent
|
||||||
from rl_coach.agents.agent import Agent
|
from rl_coach.agents.agent import Agent
|
||||||
from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters
|
from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
|
||||||
from rl_coach.architectures.tensorflow_components.heads.ddpg_actor_head import DDPGActorHeadParameters
|
from rl_coach.architectures.head_parameters import DDPGActorHeadParameters, VHeadParameters
|
||||||
from rl_coach.architectures.tensorflow_components.heads.v_head import VHeadParameters
|
from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
|
||||||
from rl_coach.base_parameters import NetworkParameters, AlgorithmParameters, \
|
from rl_coach.base_parameters import NetworkParameters, AlgorithmParameters, \
|
||||||
AgentParameters, EmbedderScheme
|
AgentParameters, EmbedderScheme
|
||||||
from rl_coach.core_types import ActionInfo, EnvironmentSteps
|
from rl_coach.core_types import ActionInfo, EnvironmentSteps
|
||||||
|
|||||||
@@ -21,14 +21,13 @@ from typing import Union
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from rl_coach.agents.agent import Agent
|
from rl_coach.agents.agent import Agent
|
||||||
|
from rl_coach.architectures.head_parameters import MeasurementsPredictionHeadParameters
|
||||||
|
from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
|
||||||
|
from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters
|
||||||
from rl_coach.architectures.tensorflow_components.layers import Conv2d, Dense
|
from rl_coach.architectures.tensorflow_components.layers import Conv2d, Dense
|
||||||
from rl_coach.architectures.tensorflow_components.heads.measurements_prediction_head import \
|
|
||||||
MeasurementsPredictionHeadParameters
|
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
|
||||||
from rl_coach.base_parameters import AlgorithmParameters, AgentParameters, NetworkParameters, \
|
from rl_coach.base_parameters import AlgorithmParameters, AgentParameters, NetworkParameters, \
|
||||||
MiddlewareScheme
|
MiddlewareScheme
|
||||||
from rl_coach.core_types import ActionInfo, EnvironmentSteps, RunPhase
|
from rl_coach.core_types import ActionInfo, EnvironmentSteps, RunPhase
|
||||||
from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters
|
|
||||||
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
||||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
||||||
from rl_coach.memories.memory import MemoryGranularity
|
from rl_coach.memories.memory import MemoryGranularity
|
||||||
|
|||||||
@@ -19,11 +19,11 @@ from typing import Union
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
||||||
from rl_coach.architectures.tensorflow_components.heads.q_head import QHeadParameters
|
from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
from rl_coach.architectures.head_parameters import QHeadParameters
|
||||||
|
from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters
|
||||||
from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, AgentParameters, \
|
from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, AgentParameters, \
|
||||||
MiddlewareScheme
|
MiddlewareScheme
|
||||||
from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters
|
|
||||||
from rl_coach.core_types import EnvironmentSteps
|
from rl_coach.core_types import EnvironmentSteps
|
||||||
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
||||||
from rl_coach.memories.non_episodic.experience_replay import ExperienceReplayParameters
|
from rl_coach.memories.non_episodic.experience_replay import ExperienceReplayParameters
|
||||||
|
|||||||
@@ -23,11 +23,11 @@ from pandas import to_pickle
|
|||||||
|
|
||||||
from rl_coach.agents.agent import Agent
|
from rl_coach.agents.agent import Agent
|
||||||
from rl_coach.agents.bc_agent import BCNetworkParameters
|
from rl_coach.agents.bc_agent import BCNetworkParameters
|
||||||
from rl_coach.architectures.tensorflow_components.heads.policy_head import PolicyHeadParameters
|
from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
from rl_coach.architectures.head_parameters import PolicyHeadParameters
|
||||||
|
from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters
|
||||||
from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, EmbedderScheme, \
|
from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, EmbedderScheme, \
|
||||||
AgentParameters
|
AgentParameters
|
||||||
from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters
|
|
||||||
from rl_coach.core_types import ActionInfo
|
from rl_coach.core_types import ActionInfo
|
||||||
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
||||||
from rl_coach.logger import screen
|
from rl_coach.logger import screen
|
||||||
|
|||||||
@@ -20,10 +20,10 @@ import numpy as np
|
|||||||
|
|
||||||
from rl_coach.agents.policy_optimization_agent import PolicyOptimizationAgent
|
from rl_coach.agents.policy_optimization_agent import PolicyOptimizationAgent
|
||||||
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
||||||
from rl_coach.architectures.tensorflow_components.heads.q_head import QHeadParameters
|
from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
from rl_coach.architectures.head_parameters import QHeadParameters
|
||||||
|
from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters
|
||||||
from rl_coach.base_parameters import AlgorithmParameters, AgentParameters, NetworkParameters
|
from rl_coach.base_parameters import AlgorithmParameters, AgentParameters, NetworkParameters
|
||||||
from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters
|
|
||||||
|
|
||||||
from rl_coach.core_types import EnvironmentSteps
|
from rl_coach.core_types import EnvironmentSteps
|
||||||
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
||||||
|
|||||||
@@ -19,11 +19,11 @@ from typing import Union
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
||||||
from rl_coach.architectures.tensorflow_components.heads.naf_head import NAFHeadParameters
|
from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
from rl_coach.architectures.head_parameters import NAFHeadParameters
|
||||||
|
from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters
|
||||||
from rl_coach.base_parameters import AlgorithmParameters, AgentParameters, \
|
from rl_coach.base_parameters import AlgorithmParameters, AgentParameters, \
|
||||||
NetworkParameters
|
NetworkParameters
|
||||||
from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters
|
|
||||||
|
|
||||||
from rl_coach.core_types import ActionInfo, EnvironmentSteps
|
from rl_coach.core_types import ActionInfo, EnvironmentSteps
|
||||||
from rl_coach.exploration_policies.ou_process import OUProcessParameters
|
from rl_coach.exploration_policies.ou_process import OUProcessParameters
|
||||||
|
|||||||
@@ -21,10 +21,10 @@ from typing import Union
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
||||||
from rl_coach.architectures.tensorflow_components.heads.dnd_q_head import DNDQHeadParameters
|
from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
from rl_coach.architectures.head_parameters import DNDQHeadParameters
|
||||||
|
from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters
|
||||||
from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, AgentParameters
|
from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, AgentParameters
|
||||||
from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters
|
|
||||||
|
|
||||||
from rl_coach.core_types import RunPhase, EnvironmentSteps, Episode, StateType
|
from rl_coach.core_types import RunPhase, EnvironmentSteps, Episode, StateType
|
||||||
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
||||||
|
|||||||
@@ -19,11 +19,11 @@ from typing import Union
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from rl_coach.agents.policy_optimization_agent import PolicyOptimizationAgent, PolicyGradientRescaler
|
from rl_coach.agents.policy_optimization_agent import PolicyOptimizationAgent, PolicyGradientRescaler
|
||||||
from rl_coach.architectures.tensorflow_components.heads.policy_head import PolicyHeadParameters
|
from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
from rl_coach.architectures.head_parameters import PolicyHeadParameters
|
||||||
|
from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters
|
||||||
from rl_coach.base_parameters import NetworkParameters, AlgorithmParameters, \
|
from rl_coach.base_parameters import NetworkParameters, AlgorithmParameters, \
|
||||||
AgentParameters
|
AgentParameters
|
||||||
from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters
|
|
||||||
|
|
||||||
from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters
|
from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters
|
||||||
from rl_coach.exploration_policies.categorical import CategoricalParameters
|
from rl_coach.exploration_policies.categorical import CategoricalParameters
|
||||||
|
|||||||
@@ -22,12 +22,11 @@ import numpy as np
|
|||||||
|
|
||||||
from rl_coach.agents.actor_critic_agent import ActorCriticAgent
|
from rl_coach.agents.actor_critic_agent import ActorCriticAgent
|
||||||
from rl_coach.agents.policy_optimization_agent import PolicyGradientRescaler
|
from rl_coach.agents.policy_optimization_agent import PolicyGradientRescaler
|
||||||
from rl_coach.architectures.tensorflow_components.heads.ppo_head import PPOHeadParameters
|
from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
|
||||||
from rl_coach.architectures.tensorflow_components.heads.v_head import VHeadParameters
|
from rl_coach.architectures.head_parameters import PPOHeadParameters, VHeadParameters
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters
|
||||||
from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, \
|
from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, \
|
||||||
AgentParameters, DistributedTaskParameters
|
AgentParameters, DistributedTaskParameters
|
||||||
from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters
|
|
||||||
|
|
||||||
from rl_coach.core_types import EnvironmentSteps, Batch
|
from rl_coach.core_types import EnvironmentSteps, Batch
|
||||||
from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters
|
from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters
|
||||||
|
|||||||
@@ -20,8 +20,7 @@ import numpy as np
|
|||||||
|
|
||||||
from rl_coach.agents.dqn_agent import DQNAgentParameters, DQNNetworkParameters, DQNAlgorithmParameters
|
from rl_coach.agents.dqn_agent import DQNAgentParameters, DQNNetworkParameters, DQNAlgorithmParameters
|
||||||
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
||||||
from rl_coach.architectures.tensorflow_components.heads.quantile_regression_q_head import \
|
from rl_coach.architectures.head_parameters import QuantileRegressionQHeadParameters
|
||||||
QuantileRegressionQHeadParameters
|
|
||||||
from rl_coach.core_types import StateType
|
from rl_coach.core_types import StateType
|
||||||
from rl_coach.schedules import LinearSchedule
|
from rl_coach.schedules import LinearSchedule
|
||||||
|
|
||||||
|
|||||||
@@ -21,8 +21,8 @@ import numpy as np
|
|||||||
from rl_coach.agents.categorical_dqn_agent import CategoricalDQNAlgorithmParameters, \
|
from rl_coach.agents.categorical_dqn_agent import CategoricalDQNAlgorithmParameters, \
|
||||||
CategoricalDQNAgent, CategoricalDQNAgentParameters
|
CategoricalDQNAgent, CategoricalDQNAgentParameters
|
||||||
from rl_coach.agents.dqn_agent import DQNNetworkParameters
|
from rl_coach.agents.dqn_agent import DQNNetworkParameters
|
||||||
from rl_coach.architectures.tensorflow_components.heads.rainbow_q_head import RainbowQHeadParameters
|
from rl_coach.architectures.head_parameters import RainbowQHeadParameters
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters
|
||||||
from rl_coach.base_parameters import MiddlewareScheme
|
from rl_coach.base_parameters import MiddlewareScheme
|
||||||
from rl_coach.exploration_policies.parameter_noise import ParameterNoiseParameters
|
from rl_coach.exploration_policies.parameter_noise import ParameterNoiseParameters
|
||||||
from rl_coach.memories.non_episodic.prioritized_experience_replay import PrioritizedExperienceReplayParameters, \
|
from rl_coach.memories.non_episodic.prioritized_experience_replay import PrioritizedExperienceReplayParameters, \
|
||||||
|
|||||||
41
rl_coach/architectures/embedder_parameters.py
Normal file
41
rl_coach/architectures/embedder_parameters.py
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2017 Intel Corporation
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
from typing import List, Union
|
||||||
|
|
||||||
|
from rl_coach.base_parameters import EmbedderScheme, NetworkComponentParameters
|
||||||
|
|
||||||
|
|
||||||
|
class InputEmbedderParameters(NetworkComponentParameters):
|
||||||
|
def __init__(self, activation_function: str='relu', scheme: Union[List, EmbedderScheme]=EmbedderScheme.Medium,
|
||||||
|
batchnorm: bool=False, dropout=False, name: str='embedder', input_rescaling=None, input_offset=None,
|
||||||
|
input_clipping=None, dense_layer=None, is_training=False):
|
||||||
|
super().__init__(dense_layer=dense_layer)
|
||||||
|
self.activation_function = activation_function
|
||||||
|
self.scheme = scheme
|
||||||
|
self.batchnorm = batchnorm
|
||||||
|
self.dropout = dropout
|
||||||
|
|
||||||
|
if input_rescaling is None:
|
||||||
|
input_rescaling = {'image': 255.0, 'vector': 1.0}
|
||||||
|
if input_offset is None:
|
||||||
|
input_offset = {'image': 0.0, 'vector': 0.0}
|
||||||
|
|
||||||
|
self.input_rescaling = input_rescaling
|
||||||
|
self.input_offset = input_offset
|
||||||
|
self.input_clipping = input_clipping
|
||||||
|
self.name = name
|
||||||
|
self.is_training = is_training
|
||||||
173
rl_coach/architectures/head_parameters.py
Normal file
173
rl_coach/architectures/head_parameters.py
Normal file
@@ -0,0 +1,173 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2017 Intel Corporation
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
from typing import Type
|
||||||
|
|
||||||
|
from rl_coach.base_parameters import NetworkComponentParameters
|
||||||
|
|
||||||
|
|
||||||
|
class HeadParameters(NetworkComponentParameters):
|
||||||
|
def __init__(self, parameterized_class_name: str, activation_function: str = 'relu', name: str= 'head',
|
||||||
|
num_output_head_copies: int=1, rescale_gradient_from_head_by_factor: float=1.0,
|
||||||
|
loss_weight: float=1.0, dense_layer=None):
|
||||||
|
super().__init__(dense_layer=dense_layer)
|
||||||
|
self.activation_function = activation_function
|
||||||
|
self.name = name
|
||||||
|
self.num_output_head_copies = num_output_head_copies
|
||||||
|
self.rescale_gradient_from_head_by_factor = rescale_gradient_from_head_by_factor
|
||||||
|
self.loss_weight = loss_weight
|
||||||
|
self.parameterized_class_name = parameterized_class_name
|
||||||
|
|
||||||
|
|
||||||
|
class PPOHeadParameters(HeadParameters):
|
||||||
|
def __init__(self, activation_function: str ='tanh', name: str='ppo_head_params',
|
||||||
|
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||||
|
loss_weight: float = 1.0, dense_layer=None):
|
||||||
|
super().__init__(parameterized_class_name="PPOHead", activation_function=activation_function, name=name,
|
||||||
|
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||||
|
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||||
|
loss_weight=loss_weight)
|
||||||
|
|
||||||
|
|
||||||
|
class VHeadParameters(HeadParameters):
|
||||||
|
def __init__(self, activation_function: str ='relu', name: str='v_head_params',
|
||||||
|
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||||
|
loss_weight: float = 1.0, dense_layer=None):
|
||||||
|
super().__init__(parameterized_class_name="VHead", activation_function=activation_function, name=name,
|
||||||
|
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||||
|
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||||
|
loss_weight=loss_weight)
|
||||||
|
|
||||||
|
|
||||||
|
class CategoricalQHeadParameters(HeadParameters):
|
||||||
|
def __init__(self, activation_function: str ='relu', name: str='categorical_q_head_params',
|
||||||
|
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||||
|
loss_weight: float = 1.0, dense_layer=None):
|
||||||
|
super().__init__(parameterized_class_name="CategoricalQHead", activation_function=activation_function, name=name,
|
||||||
|
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||||
|
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||||
|
loss_weight=loss_weight)
|
||||||
|
|
||||||
|
|
||||||
|
class RegressionHeadParameters(HeadParameters):
|
||||||
|
def __init__(self, activation_function: str ='relu', name: str='q_head_params',
|
||||||
|
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||||
|
loss_weight: float = 1.0, dense_layer=None, scheme=None):
|
||||||
|
super().__init__(parameterized_class_name="RegressionHead", activation_function=activation_function, name=name,
|
||||||
|
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||||
|
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||||
|
loss_weight=loss_weight)
|
||||||
|
|
||||||
|
|
||||||
|
class DDPGActorHeadParameters(HeadParameters):
|
||||||
|
def __init__(self, activation_function: str ='tanh', name: str='policy_head_params', batchnorm: bool=True,
|
||||||
|
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||||
|
loss_weight: float = 1.0, dense_layer=None):
|
||||||
|
super().__init__(parameterized_class_name="DDPGActor", activation_function=activation_function, name=name,
|
||||||
|
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||||
|
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||||
|
loss_weight=loss_weight)
|
||||||
|
self.batchnorm = batchnorm
|
||||||
|
|
||||||
|
|
||||||
|
class DNDQHeadParameters(HeadParameters):
|
||||||
|
def __init__(self, activation_function: str ='relu', name: str='dnd_q_head_params',
|
||||||
|
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||||
|
loss_weight: float = 1.0, dense_layer=None):
|
||||||
|
super().__init__(parameterized_class_name="DNDQHead", activation_function=activation_function, name=name,
|
||||||
|
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||||
|
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||||
|
loss_weight=loss_weight)
|
||||||
|
|
||||||
|
|
||||||
|
class DuelingQHeadParameters(HeadParameters):
|
||||||
|
def __init__(self, activation_function: str ='relu', name: str='dueling_q_head_params',
|
||||||
|
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||||
|
loss_weight: float = 1.0, dense_layer=None):
|
||||||
|
super().__init__(parameterized_class_name="DuelingQHead", activation_function=activation_function, name=name,
|
||||||
|
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||||
|
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||||
|
loss_weight=loss_weight)
|
||||||
|
|
||||||
|
|
||||||
|
class MeasurementsPredictionHeadParameters(HeadParameters):
|
||||||
|
def __init__(self, activation_function: str ='relu', name: str='measurements_prediction_head_params',
|
||||||
|
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||||
|
loss_weight: float = 1.0, dense_layer=None):
|
||||||
|
super().__init__(parameterized_class_name="MeasurementsPredictionHead", activation_function=activation_function, name=name,
|
||||||
|
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||||
|
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||||
|
loss_weight=loss_weight)
|
||||||
|
|
||||||
|
|
||||||
|
class NAFHeadParameters(HeadParameters):
|
||||||
|
def __init__(self, activation_function: str ='tanh', name: str='naf_head_params',
|
||||||
|
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||||
|
loss_weight: float = 1.0, dense_layer=None):
|
||||||
|
super().__init__(parameterized_class_name="NAFHead", activation_function=activation_function, name=name,
|
||||||
|
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||||
|
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||||
|
loss_weight=loss_weight)
|
||||||
|
|
||||||
|
|
||||||
|
class PolicyHeadParameters(HeadParameters):
|
||||||
|
def __init__(self, activation_function: str ='tanh', name: str='policy_head_params',
|
||||||
|
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||||
|
loss_weight: float = 1.0, dense_layer=None):
|
||||||
|
super().__init__(parameterized_class_name="PolicyHead", activation_function=activation_function, name=name,
|
||||||
|
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||||
|
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||||
|
loss_weight=loss_weight)
|
||||||
|
|
||||||
|
|
||||||
|
class PPOVHeadParameters(HeadParameters):
|
||||||
|
def __init__(self, activation_function: str ='relu', name: str='ppo_v_head_params',
|
||||||
|
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||||
|
loss_weight: float = 1.0, dense_layer=None):
|
||||||
|
super().__init__(parameterized_class_name="PPOVHead", activation_function=activation_function, name=name,
|
||||||
|
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||||
|
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||||
|
loss_weight=loss_weight)
|
||||||
|
|
||||||
|
|
||||||
|
class QHeadParameters(HeadParameters):
|
||||||
|
def __init__(self, activation_function: str ='relu', name: str='q_head_params',
|
||||||
|
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||||
|
loss_weight: float = 1.0, dense_layer=None):
|
||||||
|
super().__init__(parameterized_class_name="QHead", activation_function=activation_function, name=name,
|
||||||
|
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||||
|
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||||
|
loss_weight=loss_weight)
|
||||||
|
|
||||||
|
|
||||||
|
class QuantileRegressionQHeadParameters(HeadParameters):
|
||||||
|
def __init__(self, activation_function: str ='relu', name: str='quantile_regression_q_head_params',
|
||||||
|
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||||
|
loss_weight: float = 1.0, dense_layer=None):
|
||||||
|
super().__init__(parameterized_class_name="QuantileRegressionQHead", activation_function=activation_function, name=name,
|
||||||
|
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||||
|
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||||
|
loss_weight=loss_weight)
|
||||||
|
|
||||||
|
|
||||||
|
class RainbowQHeadParameters(HeadParameters):
|
||||||
|
def __init__(self, activation_function: str ='relu', name: str='rainbow_q_head_params',
|
||||||
|
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||||
|
loss_weight: float = 1.0, dense_layer=None):
|
||||||
|
super().__init__(parameterized_class_name="RainbowQHead", activation_function=activation_function, name=name,
|
||||||
|
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||||
|
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||||
|
loss_weight=loss_weight)
|
||||||
54
rl_coach/architectures/middleware_parameters.py
Normal file
54
rl_coach/architectures/middleware_parameters.py
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2017 Intel Corporation
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
from typing import List, Type, Union
|
||||||
|
|
||||||
|
from rl_coach.base_parameters import MiddlewareScheme, NetworkComponentParameters
|
||||||
|
|
||||||
|
|
||||||
|
class MiddlewareParameters(NetworkComponentParameters):
|
||||||
|
def __init__(self, parameterized_class_name: str,
|
||||||
|
activation_function: str='relu', scheme: Union[List, MiddlewareScheme]=MiddlewareScheme.Medium,
|
||||||
|
batchnorm: bool=False, dropout: bool=False, name='middleware', dense_layer=None, is_training=False):
|
||||||
|
super().__init__(dense_layer=dense_layer)
|
||||||
|
self.activation_function = activation_function
|
||||||
|
self.scheme = scheme
|
||||||
|
self.batchnorm = batchnorm
|
||||||
|
self.dropout = dropout
|
||||||
|
self.name = name
|
||||||
|
self.is_training = is_training
|
||||||
|
self.parameterized_class_name = parameterized_class_name
|
||||||
|
|
||||||
|
|
||||||
|
class FCMiddlewareParameters(MiddlewareParameters):
|
||||||
|
def __init__(self, activation_function='relu',
|
||||||
|
scheme: Union[List, MiddlewareScheme] = MiddlewareScheme.Medium,
|
||||||
|
batchnorm: bool = False, dropout: bool = False,
|
||||||
|
name="middleware_fc_embedder", dense_layer=None, is_training=False):
|
||||||
|
super().__init__(parameterized_class_name="FCMiddleware", activation_function=activation_function,
|
||||||
|
scheme=scheme, batchnorm=batchnorm, dropout=dropout, name=name, dense_layer=dense_layer,
|
||||||
|
is_training=is_training)
|
||||||
|
|
||||||
|
|
||||||
|
class LSTMMiddlewareParameters(MiddlewareParameters):
|
||||||
|
def __init__(self, activation_function='relu', number_of_lstm_cells=256,
|
||||||
|
scheme: MiddlewareScheme = MiddlewareScheme.Medium,
|
||||||
|
batchnorm: bool = False, dropout: bool = False,
|
||||||
|
name="middleware_lstm_embedder", dense_layer=None, is_training=False):
|
||||||
|
super().__init__(parameterized_class_name="LSTMMiddleware", activation_function=activation_function,
|
||||||
|
scheme=scheme, batchnorm=batchnorm, dropout=dropout, name=name, dense_layer=dense_layer,
|
||||||
|
is_training=is_training)
|
||||||
|
self.number_of_lstm_cells = number_of_lstm_cells
|
||||||
@@ -0,0 +1,4 @@
|
|||||||
|
from .image_embedder import ImageEmbedder
|
||||||
|
from .vector_embedder import VectorEmbedder
|
||||||
|
|
||||||
|
__all__ = ['ImageEmbedder', 'VectorEmbedder']
|
||||||
|
|||||||
@@ -28,35 +28,6 @@ from rl_coach.core_types import InputEmbedding
|
|||||||
from rl_coach.utils import force_list
|
from rl_coach.utils import force_list
|
||||||
|
|
||||||
|
|
||||||
class InputEmbedderParameters(NetworkComponentParameters):
|
|
||||||
def __init__(self, activation_function: str='relu', scheme: Union[List, EmbedderScheme]=EmbedderScheme.Medium,
|
|
||||||
batchnorm: bool=False, dropout=False, name: str='embedder', input_rescaling=None, input_offset=None,
|
|
||||||
input_clipping=None, dense_layer=Dense, is_training=False):
|
|
||||||
super().__init__(dense_layer=dense_layer)
|
|
||||||
self.activation_function = activation_function
|
|
||||||
self.scheme = scheme
|
|
||||||
self.batchnorm = batchnorm
|
|
||||||
self.dropout = dropout
|
|
||||||
|
|
||||||
if input_rescaling is None:
|
|
||||||
input_rescaling = {'image': 255.0, 'vector': 1.0}
|
|
||||||
if input_offset is None:
|
|
||||||
input_offset = {'image': 0.0, 'vector': 0.0}
|
|
||||||
|
|
||||||
self.input_rescaling = input_rescaling
|
|
||||||
self.input_offset = input_offset
|
|
||||||
self.input_clipping = input_clipping
|
|
||||||
self.name = name
|
|
||||||
self.is_training = is_training
|
|
||||||
|
|
||||||
@property
|
|
||||||
def path(self):
|
|
||||||
return {
|
|
||||||
"image": 'image_embedder:ImageEmbedder',
|
|
||||||
"vector": 'vector_embedder:VectorEmbedder'
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class InputEmbedder(object):
|
class InputEmbedder(object):
|
||||||
"""
|
"""
|
||||||
An input embedder is the first part of the network, which takes the input from the state and produces a vector
|
An input embedder is the first part of the network, which takes the input from the state and produces a vector
|
||||||
@@ -83,6 +54,8 @@ class InputEmbedder(object):
|
|||||||
self.input_offset = input_offset
|
self.input_offset = input_offset
|
||||||
self.input_clipping = input_clipping
|
self.input_clipping = input_clipping
|
||||||
self.dense_layer = dense_layer
|
self.dense_layer = dense_layer
|
||||||
|
if self.dense_layer is None:
|
||||||
|
self.dense_layer = Dense
|
||||||
self.is_training = is_training
|
self.is_training = is_training
|
||||||
|
|
||||||
# layers order is conv -> batchnorm -> activation -> dropout
|
# layers order is conv -> batchnorm -> activation -> dropout
|
||||||
|
|||||||
@@ -20,10 +20,10 @@ from typing import Dict
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters
|
from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
|
||||||
|
from rl_coach.architectures.head_parameters import HeadParameters
|
||||||
|
from rl_coach.architectures.middleware_parameters import MiddlewareParameters
|
||||||
from rl_coach.architectures.tensorflow_components.architecture import TensorFlowArchitecture
|
from rl_coach.architectures.tensorflow_components.architecture import TensorFlowArchitecture
|
||||||
from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters
|
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.middleware import MiddlewareParameters
|
|
||||||
from rl_coach.base_parameters import AgentParameters, EmbeddingMergerType
|
from rl_coach.base_parameters import AgentParameters, EmbeddingMergerType
|
||||||
from rl_coach.core_types import PredictionType
|
from rl_coach.core_types import PredictionType
|
||||||
from rl_coach.spaces import SpacesDefinition, PlanarMapsObservationSpace
|
from rl_coach.spaces import SpacesDefinition, PlanarMapsObservationSpace
|
||||||
@@ -136,15 +136,17 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture):
|
|||||||
raise ValueError("The key for the input embedder ({}) must match one of the following keys: {}"
|
raise ValueError("The key for the input embedder ({}) must match one of the following keys: {}"
|
||||||
.format(input_name, allowed_inputs.keys()))
|
.format(input_name, allowed_inputs.keys()))
|
||||||
|
|
||||||
type = "vector"
|
mod_names = {'image': 'ImageEmbedder', 'vector': 'VectorEmbedder'}
|
||||||
if isinstance(allowed_inputs[input_name], PlanarMapsObservationSpace):
|
|
||||||
type = "image"
|
|
||||||
|
|
||||||
embedder_path = 'rl_coach.architectures.tensorflow_components.embedders.' + embedder_params.path[type]
|
emb_type = "vector"
|
||||||
|
if isinstance(allowed_inputs[input_name], PlanarMapsObservationSpace):
|
||||||
|
emb_type = "image"
|
||||||
|
|
||||||
|
embedder_path = 'rl_coach.architectures.tensorflow_components.embedders:' + mod_names[emb_type]
|
||||||
embedder_params_copy = copy.copy(embedder_params)
|
embedder_params_copy = copy.copy(embedder_params)
|
||||||
embedder_params_copy.activation_function = self.get_activation_function(embedder_params.activation_function)
|
embedder_params_copy.activation_function = self.get_activation_function(embedder_params.activation_function)
|
||||||
embedder_params_copy.input_rescaling = embedder_params_copy.input_rescaling[type]
|
embedder_params_copy.input_rescaling = embedder_params_copy.input_rescaling[emb_type]
|
||||||
embedder_params_copy.input_offset = embedder_params_copy.input_offset[type]
|
embedder_params_copy.input_offset = embedder_params_copy.input_offset[emb_type]
|
||||||
embedder_params_copy.name = input_name
|
embedder_params_copy.name = input_name
|
||||||
module = dynamic_import_and_instantiate_module_from_params(embedder_params_copy,
|
module = dynamic_import_and_instantiate_module_from_params(embedder_params_copy,
|
||||||
path=embedder_path,
|
path=embedder_path,
|
||||||
@@ -157,25 +159,25 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture):
|
|||||||
:param middleware_params: the paramaeters of the middleware class
|
:param middleware_params: the paramaeters of the middleware class
|
||||||
:return: the middleware instance
|
:return: the middleware instance
|
||||||
"""
|
"""
|
||||||
|
mod_name = middleware_params.parameterized_class_name
|
||||||
|
middleware_path = 'rl_coach.architectures.tensorflow_components.middlewares:' + mod_name
|
||||||
middleware_params_copy = copy.copy(middleware_params)
|
middleware_params_copy = copy.copy(middleware_params)
|
||||||
middleware_params_copy.activation_function = self.get_activation_function(middleware_params.activation_function)
|
middleware_params_copy.activation_function = self.get_activation_function(middleware_params.activation_function)
|
||||||
module = dynamic_import_and_instantiate_module_from_params(middleware_params_copy)
|
module = dynamic_import_and_instantiate_module_from_params(middleware_params_copy, path=middleware_path)
|
||||||
return module
|
return module
|
||||||
|
|
||||||
def get_output_head(self, head_params: HeadParameters, head_idx: int):
|
def get_output_head(self, head_params: HeadParameters, head_idx: int):
|
||||||
"""
|
"""
|
||||||
Given a head type, creates the head and returns it
|
Given a head type, creates the head and returns it
|
||||||
:param head_params: the parameters of the head to create
|
:param head_params: the parameters of the head to create
|
||||||
:param head_type: the path to the class of the head under the embedders directory or a full path to a head class.
|
|
||||||
the path should be in the following structure: <module_path>:<class_path>
|
|
||||||
:param head_idx: the head index
|
:param head_idx: the head index
|
||||||
:param loss_weight: the weight to assign for the embedders loss
|
|
||||||
:return: the head
|
:return: the head
|
||||||
"""
|
"""
|
||||||
|
mod_name = head_params.parameterized_class_name
|
||||||
|
head_path = 'rl_coach.architectures.tensorflow_components.heads:' + mod_name
|
||||||
head_params_copy = copy.copy(head_params)
|
head_params_copy = copy.copy(head_params)
|
||||||
head_params_copy.activation_function = self.get_activation_function(head_params_copy.activation_function)
|
head_params_copy.activation_function = self.get_activation_function(head_params_copy.activation_function)
|
||||||
return dynamic_import_and_instantiate_module_from_params(head_params_copy, extra_kwargs={
|
return dynamic_import_and_instantiate_module_from_params(head_params_copy, path=head_path, extra_kwargs={
|
||||||
'agent_parameters': self.ap, 'spaces': self.spaces, 'network_name': self.network_wrapper_name,
|
'agent_parameters': self.ap, 'spaces': self.spaces, 'network_name': self.network_wrapper_name,
|
||||||
'head_idx': head_idx, 'is_local': self.network_is_local})
|
'head_idx': head_idx, 'is_local': self.network_is_local})
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,29 @@
|
|||||||
|
from .categorical_q_head import CategoricalQHead
|
||||||
|
from .ddpg_actor_head import DDPGActor
|
||||||
|
from .dnd_q_head import DNDQHead
|
||||||
|
from .dueling_q_head import DuelingQHead
|
||||||
|
from .measurements_prediction_head import MeasurementsPredictionHead
|
||||||
|
from .naf_head import NAFHead
|
||||||
|
from .policy_head import PolicyHead
|
||||||
|
from .ppo_head import PPOHead
|
||||||
|
from .ppo_v_head import PPOVHead
|
||||||
|
from .q_head import QHead
|
||||||
|
from .quantile_regression_q_head import QuantileRegressionQHead
|
||||||
|
from .rainbow_q_head import RainbowQHead
|
||||||
|
from .v_head import VHead
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
'CategoricalQHead',
|
||||||
|
'DDPGActor',
|
||||||
|
'DNDQHead',
|
||||||
|
'DuelingQHead',
|
||||||
|
'MeasurementsPredictionHead',
|
||||||
|
'NAFHead',
|
||||||
|
'PolicyHead',
|
||||||
|
'PPOHead',
|
||||||
|
'PPOVHead',
|
||||||
|
'QHead',
|
||||||
|
'QuantileRegressionQHead',
|
||||||
|
'RainbowQHead',
|
||||||
|
'VHead'
|
||||||
|
]
|
||||||
|
|||||||
@@ -18,22 +18,12 @@ import tensorflow as tf
|
|||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
from rl_coach.architectures.tensorflow_components.heads.head import Head
|
||||||
from rl_coach.base_parameters import AgentParameters
|
from rl_coach.base_parameters import AgentParameters
|
||||||
from rl_coach.core_types import QActionStateValue
|
from rl_coach.core_types import QActionStateValue
|
||||||
from rl_coach.spaces import SpacesDefinition
|
from rl_coach.spaces import SpacesDefinition
|
||||||
|
|
||||||
|
|
||||||
class CategoricalQHeadParameters(HeadParameters):
|
|
||||||
def __init__(self, activation_function: str ='relu', name: str='categorical_q_head_params',
|
|
||||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
|
||||||
loss_weight: float = 1.0, dense_layer=Dense):
|
|
||||||
super().__init__(parameterized_class=CategoricalQHead, activation_function=activation_function, name=name,
|
|
||||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
|
||||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
|
||||||
loss_weight=loss_weight)
|
|
||||||
|
|
||||||
|
|
||||||
class CategoricalQHead(Head):
|
class CategoricalQHead(Head):
|
||||||
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
|
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
|
||||||
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str ='relu',
|
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str ='relu',
|
||||||
|
|||||||
@@ -16,25 +16,14 @@
|
|||||||
|
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.layers import Dense, batchnorm_activation_dropout
|
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||||
|
from rl_coach.architectures.tensorflow_components.heads.head import Head
|
||||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
|
||||||
from rl_coach.base_parameters import AgentParameters
|
from rl_coach.base_parameters import AgentParameters
|
||||||
from rl_coach.core_types import QActionStateValue
|
from rl_coach.core_types import QActionStateValue
|
||||||
from rl_coach.spaces import SpacesDefinition, BoxActionSpace, DiscreteActionSpace
|
from rl_coach.spaces import SpacesDefinition, BoxActionSpace, DiscreteActionSpace
|
||||||
from rl_coach.utils import force_list
|
from rl_coach.utils import force_list
|
||||||
|
|
||||||
|
|
||||||
class RegressionHeadParameters(HeadParameters):
|
|
||||||
def __init__(self, activation_function: str ='relu', name: str='q_head_params',
|
|
||||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
|
||||||
loss_weight: float = 1.0, dense_layer=Dense, scheme=[Dense(256), Dense(256)]):
|
|
||||||
super().__init__(parameterized_class=RegressionHead, activation_function=activation_function, name=name,
|
|
||||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
|
||||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
|
||||||
loss_weight=loss_weight)
|
|
||||||
|
|
||||||
|
|
||||||
class RegressionHead(Head):
|
class RegressionHead(Head):
|
||||||
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
|
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
|
||||||
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu',
|
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu',
|
||||||
|
|||||||
@@ -17,23 +17,12 @@
|
|||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.layers import batchnorm_activation_dropout, Dense
|
from rl_coach.architectures.tensorflow_components.layers import batchnorm_activation_dropout, Dense
|
||||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
from rl_coach.architectures.tensorflow_components.heads.head import Head
|
||||||
from rl_coach.base_parameters import AgentParameters
|
from rl_coach.base_parameters import AgentParameters
|
||||||
from rl_coach.core_types import ActionProbabilities
|
from rl_coach.core_types import ActionProbabilities
|
||||||
from rl_coach.spaces import SpacesDefinition
|
from rl_coach.spaces import SpacesDefinition
|
||||||
|
|
||||||
|
|
||||||
class DDPGActorHeadParameters(HeadParameters):
|
|
||||||
def __init__(self, activation_function: str ='tanh', name: str='policy_head_params', batchnorm: bool=True,
|
|
||||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
|
||||||
loss_weight: float = 1.0, dense_layer=Dense):
|
|
||||||
super().__init__(parameterized_class=DDPGActor, activation_function=activation_function, name=name,
|
|
||||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
|
||||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
|
||||||
loss_weight=loss_weight)
|
|
||||||
self.batchnorm = batchnorm
|
|
||||||
|
|
||||||
|
|
||||||
class DDPGActor(Head):
|
class DDPGActor(Head):
|
||||||
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
|
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
|
||||||
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='tanh',
|
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='tanh',
|
||||||
|
|||||||
@@ -16,23 +16,12 @@
|
|||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||||
from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters
|
|
||||||
from rl_coach.architectures.tensorflow_components.heads.q_head import QHead
|
from rl_coach.architectures.tensorflow_components.heads.q_head import QHead
|
||||||
from rl_coach.base_parameters import AgentParameters
|
from rl_coach.base_parameters import AgentParameters
|
||||||
from rl_coach.memories.non_episodic import differentiable_neural_dictionary
|
from rl_coach.memories.non_episodic import differentiable_neural_dictionary
|
||||||
from rl_coach.spaces import SpacesDefinition
|
from rl_coach.spaces import SpacesDefinition
|
||||||
|
|
||||||
|
|
||||||
class DNDQHeadParameters(HeadParameters):
|
|
||||||
def __init__(self, activation_function: str ='relu', name: str='dnd_q_head_params',
|
|
||||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
|
||||||
loss_weight: float = 1.0, dense_layer=Dense):
|
|
||||||
super().__init__(parameterized_class=DNDQHead, activation_function=activation_function, name=name,
|
|
||||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
|
||||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
|
||||||
loss_weight=loss_weight)
|
|
||||||
|
|
||||||
|
|
||||||
class DNDQHead(QHead):
|
class DNDQHead(QHead):
|
||||||
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
|
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
|
||||||
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu',
|
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu',
|
||||||
|
|||||||
@@ -17,21 +17,11 @@
|
|||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||||
from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters
|
|
||||||
from rl_coach.architectures.tensorflow_components.heads.q_head import QHead
|
from rl_coach.architectures.tensorflow_components.heads.q_head import QHead
|
||||||
from rl_coach.base_parameters import AgentParameters
|
from rl_coach.base_parameters import AgentParameters
|
||||||
from rl_coach.spaces import SpacesDefinition
|
from rl_coach.spaces import SpacesDefinition
|
||||||
|
|
||||||
|
|
||||||
class DuelingQHeadParameters(HeadParameters):
|
|
||||||
def __init__(self, activation_function: str ='relu', name: str='dueling_q_head_params',
|
|
||||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
|
||||||
loss_weight: float = 1.0, dense_layer=Dense):
|
|
||||||
super().__init__(parameterized_class=DuelingQHead, activation_function=activation_function, name=name,
|
|
||||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
|
||||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
|
||||||
loss_weight=loss_weight)
|
|
||||||
|
|
||||||
class DuelingQHead(QHead):
|
class DuelingQHead(QHead):
|
||||||
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
|
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
|
||||||
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu',
|
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu',
|
||||||
|
|||||||
@@ -33,19 +33,6 @@ def normalized_columns_initializer(std=1.0):
|
|||||||
return _initializer
|
return _initializer
|
||||||
|
|
||||||
|
|
||||||
class HeadParameters(NetworkComponentParameters):
|
|
||||||
def __init__(self, parameterized_class: Type['Head'], activation_function: str = 'relu', name: str= 'head',
|
|
||||||
num_output_head_copies: int=1, rescale_gradient_from_head_by_factor: float=1.0,
|
|
||||||
loss_weight: float=1.0, dense_layer=Dense):
|
|
||||||
super().__init__(dense_layer=dense_layer)
|
|
||||||
self.activation_function = activation_function
|
|
||||||
self.name = name
|
|
||||||
self.num_output_head_copies = num_output_head_copies
|
|
||||||
self.rescale_gradient_from_head_by_factor = rescale_gradient_from_head_by_factor
|
|
||||||
self.loss_weight = loss_weight
|
|
||||||
self.parameterized_class_name = parameterized_class.__name__
|
|
||||||
|
|
||||||
|
|
||||||
class Head(object):
|
class Head(object):
|
||||||
"""
|
"""
|
||||||
A head is the final part of the network. It takes the embedding from the middleware embedder and passes it through
|
A head is the final part of the network. It takes the embedding from the middleware embedder and passes it through
|
||||||
@@ -74,6 +61,8 @@ class Head(object):
|
|||||||
self.return_type = None
|
self.return_type = None
|
||||||
self.activation_function = activation_function
|
self.activation_function = activation_function
|
||||||
self.dense_layer = dense_layer
|
self.dense_layer = dense_layer
|
||||||
|
if self.dense_layer is None:
|
||||||
|
self.dense_layer = Dense
|
||||||
|
|
||||||
def __call__(self, input_layer):
|
def __call__(self, input_layer):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -17,23 +17,12 @@
|
|||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||||
|
from rl_coach.architectures.tensorflow_components.heads.head import Head
|
||||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
|
||||||
from rl_coach.base_parameters import AgentParameters
|
from rl_coach.base_parameters import AgentParameters
|
||||||
from rl_coach.core_types import Measurements
|
from rl_coach.core_types import Measurements
|
||||||
from rl_coach.spaces import SpacesDefinition
|
from rl_coach.spaces import SpacesDefinition
|
||||||
|
|
||||||
|
|
||||||
class MeasurementsPredictionHeadParameters(HeadParameters):
|
|
||||||
def __init__(self, activation_function: str ='relu', name: str='measurements_prediction_head_params',
|
|
||||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
|
||||||
loss_weight: float = 1.0, dense_layer=Dense):
|
|
||||||
super().__init__(parameterized_class=MeasurementsPredictionHead, activation_function=activation_function, name=name,
|
|
||||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
|
||||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
|
||||||
loss_weight=loss_weight)
|
|
||||||
|
|
||||||
|
|
||||||
class MeasurementsPredictionHead(Head):
|
class MeasurementsPredictionHead(Head):
|
||||||
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
|
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
|
||||||
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu',
|
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu',
|
||||||
|
|||||||
@@ -17,23 +17,13 @@
|
|||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
from rl_coach.architectures.tensorflow_components.heads.head import Head
|
||||||
from rl_coach.base_parameters import AgentParameters
|
from rl_coach.base_parameters import AgentParameters
|
||||||
from rl_coach.core_types import QActionStateValue
|
from rl_coach.core_types import QActionStateValue
|
||||||
from rl_coach.spaces import BoxActionSpace
|
from rl_coach.spaces import BoxActionSpace
|
||||||
from rl_coach.spaces import SpacesDefinition
|
from rl_coach.spaces import SpacesDefinition
|
||||||
|
|
||||||
|
|
||||||
class NAFHeadParameters(HeadParameters):
|
|
||||||
def __init__(self, activation_function: str ='tanh', name: str='naf_head_params',
|
|
||||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
|
||||||
loss_weight: float = 1.0, dense_layer=Dense):
|
|
||||||
super().__init__(parameterized_class=NAFHead, activation_function=activation_function, name=name,
|
|
||||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
|
||||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
|
||||||
loss_weight=loss_weight)
|
|
||||||
|
|
||||||
|
|
||||||
class NAFHead(Head):
|
class NAFHead(Head):
|
||||||
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
|
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
|
||||||
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True,activation_function: str='relu',
|
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True,activation_function: str='relu',
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ import numpy as np
|
|||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer, HeadParameters
|
from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer
|
||||||
from rl_coach.base_parameters import AgentParameters
|
from rl_coach.base_parameters import AgentParameters
|
||||||
from rl_coach.core_types import ActionProbabilities
|
from rl_coach.core_types import ActionProbabilities
|
||||||
from rl_coach.exploration_policies.continuous_entropy import ContinuousEntropyParameters
|
from rl_coach.exploration_policies.continuous_entropy import ContinuousEntropyParameters
|
||||||
@@ -27,17 +27,6 @@ from rl_coach.spaces import SpacesDefinition
|
|||||||
from rl_coach.utils import eps, indent_string
|
from rl_coach.utils import eps, indent_string
|
||||||
|
|
||||||
|
|
||||||
class PolicyHeadParameters(HeadParameters):
|
|
||||||
def __init__(self, activation_function: str ='tanh', name: str='policy_head_params',
|
|
||||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
|
||||||
loss_weight: float = 1.0, dense_layer=Dense):
|
|
||||||
super().__init__(parameterized_class=PolicyHead, activation_function=activation_function, name=name,
|
|
||||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
|
||||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
|
||||||
loss_weight=loss_weight)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class PolicyHead(Head):
|
class PolicyHead(Head):
|
||||||
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
|
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
|
||||||
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='tanh',
|
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='tanh',
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ import numpy as np
|
|||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters, normalized_columns_initializer
|
from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer
|
||||||
from rl_coach.base_parameters import AgentParameters
|
from rl_coach.base_parameters import AgentParameters
|
||||||
from rl_coach.core_types import ActionProbabilities
|
from rl_coach.core_types import ActionProbabilities
|
||||||
from rl_coach.spaces import BoxActionSpace, DiscreteActionSpace
|
from rl_coach.spaces import BoxActionSpace, DiscreteActionSpace
|
||||||
@@ -26,16 +26,6 @@ from rl_coach.spaces import SpacesDefinition
|
|||||||
from rl_coach.utils import eps
|
from rl_coach.utils import eps
|
||||||
|
|
||||||
|
|
||||||
class PPOHeadParameters(HeadParameters):
|
|
||||||
def __init__(self, activation_function: str ='tanh', name: str='ppo_head_params',
|
|
||||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
|
||||||
loss_weight: float = 1.0, dense_layer=Dense):
|
|
||||||
super().__init__(parameterized_class=PPOHead, activation_function=activation_function, name=name,
|
|
||||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
|
||||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
|
||||||
loss_weight=loss_weight)
|
|
||||||
|
|
||||||
|
|
||||||
class PPOHead(Head):
|
class PPOHead(Head):
|
||||||
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
|
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
|
||||||
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='tanh',
|
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='tanh',
|
||||||
|
|||||||
@@ -17,23 +17,12 @@
|
|||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||||
|
from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer
|
||||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer, HeadParameters
|
|
||||||
from rl_coach.base_parameters import AgentParameters
|
from rl_coach.base_parameters import AgentParameters
|
||||||
from rl_coach.core_types import ActionProbabilities
|
from rl_coach.core_types import ActionProbabilities
|
||||||
from rl_coach.spaces import SpacesDefinition
|
from rl_coach.spaces import SpacesDefinition
|
||||||
|
|
||||||
|
|
||||||
class PPOVHeadParameters(HeadParameters):
|
|
||||||
def __init__(self, activation_function: str ='relu', name: str='ppo_v_head_params',
|
|
||||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
|
||||||
loss_weight: float = 1.0, dense_layer=Dense):
|
|
||||||
super().__init__(parameterized_class=PPOVHead, activation_function=activation_function, name=name,
|
|
||||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
|
||||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
|
||||||
loss_weight=loss_weight)
|
|
||||||
|
|
||||||
|
|
||||||
class PPOVHead(Head):
|
class PPOVHead(Head):
|
||||||
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
|
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
|
||||||
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu',
|
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu',
|
||||||
|
|||||||
@@ -17,23 +17,12 @@
|
|||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||||
|
from rl_coach.architectures.tensorflow_components.heads.head import Head
|
||||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
|
||||||
from rl_coach.base_parameters import AgentParameters
|
from rl_coach.base_parameters import AgentParameters
|
||||||
from rl_coach.core_types import QActionStateValue
|
from rl_coach.core_types import QActionStateValue
|
||||||
from rl_coach.spaces import SpacesDefinition, BoxActionSpace, DiscreteActionSpace
|
from rl_coach.spaces import SpacesDefinition, BoxActionSpace, DiscreteActionSpace
|
||||||
|
|
||||||
|
|
||||||
class QHeadParameters(HeadParameters):
|
|
||||||
def __init__(self, activation_function: str ='relu', name: str='q_head_params',
|
|
||||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
|
||||||
loss_weight: float = 1.0, dense_layer=Dense):
|
|
||||||
super().__init__(parameterized_class=QHead, activation_function=activation_function, name=name,
|
|
||||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
|
||||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
|
||||||
loss_weight=loss_weight)
|
|
||||||
|
|
||||||
|
|
||||||
class QHead(Head):
|
class QHead(Head):
|
||||||
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
|
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
|
||||||
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu',
|
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu',
|
||||||
|
|||||||
@@ -17,23 +17,12 @@
|
|||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||||
|
from rl_coach.architectures.tensorflow_components.heads.head import Head
|
||||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
|
||||||
from rl_coach.base_parameters import AgentParameters
|
from rl_coach.base_parameters import AgentParameters
|
||||||
from rl_coach.core_types import QActionStateValue
|
from rl_coach.core_types import QActionStateValue
|
||||||
from rl_coach.spaces import SpacesDefinition
|
from rl_coach.spaces import SpacesDefinition
|
||||||
|
|
||||||
|
|
||||||
class QuantileRegressionQHeadParameters(HeadParameters):
|
|
||||||
def __init__(self, activation_function: str ='relu', name: str='quantile_regression_q_head_params',
|
|
||||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
|
||||||
loss_weight: float = 1.0, dense_layer=Dense):
|
|
||||||
super().__init__(parameterized_class=QuantileRegressionQHead, activation_function=activation_function, name=name,
|
|
||||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
|
||||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
|
||||||
loss_weight=loss_weight)
|
|
||||||
|
|
||||||
|
|
||||||
class QuantileRegressionQHead(Head):
|
class QuantileRegressionQHead(Head):
|
||||||
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
|
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
|
||||||
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu',
|
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu',
|
||||||
|
|||||||
@@ -17,22 +17,12 @@
|
|||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||||
from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters, Head
|
from rl_coach.architectures.tensorflow_components.heads.head import Head
|
||||||
from rl_coach.base_parameters import AgentParameters
|
from rl_coach.base_parameters import AgentParameters
|
||||||
from rl_coach.core_types import QActionStateValue
|
from rl_coach.core_types import QActionStateValue
|
||||||
from rl_coach.spaces import SpacesDefinition
|
from rl_coach.spaces import SpacesDefinition
|
||||||
|
|
||||||
|
|
||||||
class RainbowQHeadParameters(HeadParameters):
|
|
||||||
def __init__(self, activation_function: str ='relu', name: str='rainbow_q_head_params',
|
|
||||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
|
||||||
loss_weight: float = 1.0, dense_layer=Dense):
|
|
||||||
super().__init__(parameterized_class=RainbowQHead, activation_function=activation_function, name=name,
|
|
||||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
|
||||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
|
||||||
loss_weight=loss_weight)
|
|
||||||
|
|
||||||
|
|
||||||
class RainbowQHead(Head):
|
class RainbowQHead(Head):
|
||||||
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
|
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
|
||||||
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu',
|
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu',
|
||||||
|
|||||||
@@ -17,23 +17,12 @@
|
|||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||||
|
from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer
|
||||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer, HeadParameters
|
|
||||||
from rl_coach.base_parameters import AgentParameters
|
from rl_coach.base_parameters import AgentParameters
|
||||||
from rl_coach.core_types import VStateValue
|
from rl_coach.core_types import VStateValue
|
||||||
from rl_coach.spaces import SpacesDefinition
|
from rl_coach.spaces import SpacesDefinition
|
||||||
|
|
||||||
|
|
||||||
class VHeadParameters(HeadParameters):
|
|
||||||
def __init__(self, activation_function: str ='relu', name: str='v_head_params',
|
|
||||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
|
||||||
loss_weight: float = 1.0, dense_layer=Dense):
|
|
||||||
super().__init__(parameterized_class=VHead, activation_function=activation_function, name=name,
|
|
||||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
|
||||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
|
||||||
loss_weight=loss_weight)
|
|
||||||
|
|
||||||
|
|
||||||
class VHead(Head):
|
class VHead(Head):
|
||||||
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
|
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
|
||||||
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu',
|
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu',
|
||||||
|
|||||||
@@ -0,0 +1,4 @@
|
|||||||
|
from .fc_middleware import FCMiddleware
|
||||||
|
from .lstm_middleware import LSTMMiddleware
|
||||||
|
|
||||||
|
__all__ = ["FCMiddleware", "LSTMMiddleware"]
|
||||||
|
|||||||
@@ -18,22 +18,12 @@ from typing import Union, List
|
|||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.layers import batchnorm_activation_dropout, Dense
|
from rl_coach.architectures.tensorflow_components.layers import batchnorm_activation_dropout, Dense
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.middleware import Middleware, MiddlewareParameters
|
from rl_coach.architectures.tensorflow_components.middlewares.middleware import Middleware
|
||||||
from rl_coach.base_parameters import MiddlewareScheme
|
from rl_coach.base_parameters import MiddlewareScheme
|
||||||
from rl_coach.core_types import Middleware_FC_Embedding
|
from rl_coach.core_types import Middleware_FC_Embedding
|
||||||
from rl_coach.utils import force_list
|
from rl_coach.utils import force_list
|
||||||
|
|
||||||
|
|
||||||
class FCMiddlewareParameters(MiddlewareParameters):
|
|
||||||
def __init__(self, activation_function='relu',
|
|
||||||
scheme: Union[List, MiddlewareScheme] = MiddlewareScheme.Medium,
|
|
||||||
batchnorm: bool = False, dropout: bool = False,
|
|
||||||
name="middleware_fc_embedder", dense_layer=Dense, is_training=False):
|
|
||||||
super().__init__(parameterized_class=FCMiddleware, activation_function=activation_function,
|
|
||||||
scheme=scheme, batchnorm=batchnorm, dropout=dropout, name=name, dense_layer=dense_layer,
|
|
||||||
is_training=is_training)
|
|
||||||
|
|
||||||
|
|
||||||
class FCMiddleware(Middleware):
|
class FCMiddleware(Middleware):
|
||||||
def __init__(self, activation_function=tf.nn.relu,
|
def __init__(self, activation_function=tf.nn.relu,
|
||||||
scheme: MiddlewareScheme = MiddlewareScheme.Medium,
|
scheme: MiddlewareScheme = MiddlewareScheme.Medium,
|
||||||
|
|||||||
@@ -19,23 +19,12 @@ import numpy as np
|
|||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.layers import batchnorm_activation_dropout, Dense
|
from rl_coach.architectures.tensorflow_components.layers import batchnorm_activation_dropout, Dense
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.middleware import Middleware, MiddlewareParameters
|
from rl_coach.architectures.tensorflow_components.middlewares.middleware import Middleware
|
||||||
from rl_coach.base_parameters import MiddlewareScheme
|
from rl_coach.base_parameters import MiddlewareScheme
|
||||||
from rl_coach.core_types import Middleware_LSTM_Embedding
|
from rl_coach.core_types import Middleware_LSTM_Embedding
|
||||||
from rl_coach.utils import force_list
|
from rl_coach.utils import force_list
|
||||||
|
|
||||||
|
|
||||||
class LSTMMiddlewareParameters(MiddlewareParameters):
|
|
||||||
def __init__(self, activation_function='relu', number_of_lstm_cells=256,
|
|
||||||
scheme: MiddlewareScheme = MiddlewareScheme.Medium,
|
|
||||||
batchnorm: bool = False, dropout: bool = False,
|
|
||||||
name="middleware_lstm_embedder", dense_layer=Dense, is_training=False):
|
|
||||||
super().__init__(parameterized_class=LSTMMiddleware, activation_function=activation_function,
|
|
||||||
scheme=scheme, batchnorm=batchnorm, dropout=dropout, name=name, dense_layer=dense_layer,
|
|
||||||
is_training=is_training)
|
|
||||||
self.number_of_lstm_cells = number_of_lstm_cells
|
|
||||||
|
|
||||||
|
|
||||||
class LSTMMiddleware(Middleware):
|
class LSTMMiddleware(Middleware):
|
||||||
def __init__(self, activation_function=tf.nn.relu, number_of_lstm_cells: int=256,
|
def __init__(self, activation_function=tf.nn.relu, number_of_lstm_cells: int=256,
|
||||||
scheme: MiddlewareScheme = MiddlewareScheme.Medium,
|
scheme: MiddlewareScheme = MiddlewareScheme.Medium,
|
||||||
|
|||||||
@@ -14,7 +14,6 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
import copy
|
import copy
|
||||||
from typing import Type, Union, List
|
|
||||||
|
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
@@ -23,20 +22,6 @@ from rl_coach.base_parameters import MiddlewareScheme, NetworkComponentParameter
|
|||||||
from rl_coach.core_types import MiddlewareEmbedding
|
from rl_coach.core_types import MiddlewareEmbedding
|
||||||
|
|
||||||
|
|
||||||
class MiddlewareParameters(NetworkComponentParameters):
|
|
||||||
def __init__(self, parameterized_class: Type['Middleware'],
|
|
||||||
activation_function: str='relu', scheme: Union[List, MiddlewareScheme]=MiddlewareScheme.Medium,
|
|
||||||
batchnorm: bool=False, dropout: bool=False, name='middleware', dense_layer=Dense, is_training=False):
|
|
||||||
super().__init__(dense_layer=dense_layer)
|
|
||||||
self.activation_function = activation_function
|
|
||||||
self.scheme = scheme
|
|
||||||
self.batchnorm = batchnorm
|
|
||||||
self.dropout = dropout
|
|
||||||
self.name = name
|
|
||||||
self.is_training = is_training
|
|
||||||
self.parameterized_class_name = parameterized_class.__name__
|
|
||||||
|
|
||||||
|
|
||||||
class Middleware(object):
|
class Middleware(object):
|
||||||
"""
|
"""
|
||||||
A middleware embedder is the middle part of the network. It takes the embeddings from the input embedders,
|
A middleware embedder is the middle part of the network. It takes the embeddings from the input embedders,
|
||||||
@@ -57,6 +42,8 @@ class Middleware(object):
|
|||||||
self.scheme = scheme
|
self.scheme = scheme
|
||||||
self.return_type = MiddlewareEmbedding
|
self.return_type = MiddlewareEmbedding
|
||||||
self.dense_layer = dense_layer
|
self.dense_layer = dense_layer
|
||||||
|
if self.dense_layer is None:
|
||||||
|
self.dense_layer = Dense
|
||||||
self.is_training = is_training
|
self.is_training = is_training
|
||||||
|
|
||||||
# layers order is conv -> batchnorm -> activation -> dropout
|
# layers order is conv -> batchnorm -> activation -> dropout
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
from rl_coach.agents.actor_critic_agent import ActorCriticAgentParameters
|
from rl_coach.agents.actor_critic_agent import ActorCriticAgentParameters
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters
|
||||||
from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters
|
from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters
|
||||||
from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps
|
from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps
|
||||||
from rl_coach.environments.environment import SingleLevelSelection
|
from rl_coach.environments.environment import SingleLevelSelection
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
from rl_coach.agents.actor_critic_agent import ActorCriticAgentParameters
|
from rl_coach.agents.actor_critic_agent import ActorCriticAgentParameters
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.lstm_middleware import LSTMMiddlewareParameters
|
from rl_coach.architectures.middleware_parameters import LSTMMiddlewareParameters
|
||||||
from rl_coach.base_parameters import VisualizationParameters, MiddlewareScheme, PresetValidationParameters
|
from rl_coach.base_parameters import VisualizationParameters, MiddlewareScheme, PresetValidationParameters
|
||||||
from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps
|
from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps
|
||||||
from rl_coach.environments.environment import SingleLevelSelection
|
from rl_coach.environments.environment import SingleLevelSelection
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import math
|
import math
|
||||||
|
|
||||||
from rl_coach.agents.ddqn_agent import DDQNAgentParameters
|
from rl_coach.agents.ddqn_agent import DDQNAgentParameters
|
||||||
from rl_coach.architectures.tensorflow_components.heads.dueling_q_head import DuelingQHeadParameters
|
from rl_coach.architectures.head_parameters import DuelingQHeadParameters
|
||||||
from rl_coach.base_parameters import VisualizationParameters, MiddlewareScheme, PresetValidationParameters
|
from rl_coach.base_parameters import VisualizationParameters, MiddlewareScheme, PresetValidationParameters
|
||||||
from rl_coach.environments.environment import SingleLevelSelection
|
from rl_coach.environments.environment import SingleLevelSelection
|
||||||
from rl_coach.environments.gym_environment import Atari, atari_deterministic_v4, atari_schedule
|
from rl_coach.environments.gym_environment import Atari, atari_deterministic_v4, atari_schedule
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
from rl_coach.agents.ddqn_agent import DDQNAgentParameters
|
from rl_coach.agents.ddqn_agent import DDQNAgentParameters
|
||||||
from rl_coach.architectures.tensorflow_components.heads.dueling_q_head import DuelingQHeadParameters
|
from rl_coach.architectures.head_parameters import DuelingQHeadParameters
|
||||||
from rl_coach.base_parameters import VisualizationParameters, MiddlewareScheme, PresetValidationParameters
|
from rl_coach.base_parameters import VisualizationParameters, MiddlewareScheme, PresetValidationParameters
|
||||||
from rl_coach.core_types import EnvironmentSteps
|
from rl_coach.core_types import EnvironmentSteps
|
||||||
from rl_coach.environments.environment import SingleLevelSelection
|
from rl_coach.environments.environment import SingleLevelSelection
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
from rl_coach.agents.dqn_agent import DQNAgentParameters
|
from rl_coach.agents.dqn_agent import DQNAgentParameters
|
||||||
from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters
|
from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
|
||||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||||
from rl_coach.base_parameters import VisualizationParameters, EmbedderScheme, \
|
from rl_coach.base_parameters import VisualizationParameters, EmbedderScheme, \
|
||||||
PresetValidationParameters
|
PresetValidationParameters
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
from rl_coach.agents.dqn_agent import DQNAgentParameters
|
from rl_coach.agents.dqn_agent import DQNAgentParameters
|
||||||
from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters
|
from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
|
||||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||||
from rl_coach.base_parameters import VisualizationParameters, EmbedderScheme, \
|
from rl_coach.base_parameters import VisualizationParameters, EmbedderScheme, \
|
||||||
PresetValidationParameters
|
PresetValidationParameters
|
||||||
|
|||||||
@@ -7,10 +7,10 @@ from carla.driving_benchmark.experiment_suites import CoRL2017
|
|||||||
from rl_coach.logger import screen
|
from rl_coach.logger import screen
|
||||||
|
|
||||||
from rl_coach.agents.cil_agent import CILAgentParameters
|
from rl_coach.agents.cil_agent import CILAgentParameters
|
||||||
from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters
|
from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
|
||||||
from rl_coach.architectures.tensorflow_components.heads.cil_head import RegressionHeadParameters
|
from rl_coach.architectures.head_parameters import RegressionHeadParameters
|
||||||
|
from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters
|
||||||
from rl_coach.architectures.tensorflow_components.layers import Conv2d, Dense, BatchnormActivationDropout
|
from rl_coach.architectures.tensorflow_components.layers import Conv2d, Dense, BatchnormActivationDropout
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
|
||||||
from rl_coach.base_parameters import VisualizationParameters
|
from rl_coach.base_parameters import VisualizationParameters
|
||||||
from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps
|
from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps
|
||||||
from rl_coach.environments.carla_environment import CarlaEnvironmentParameters
|
from rl_coach.environments.carla_environment import CarlaEnvironmentParameters
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import math
|
import math
|
||||||
|
|
||||||
from rl_coach.agents.ddqn_agent import DDQNAgentParameters
|
from rl_coach.agents.ddqn_agent import DDQNAgentParameters
|
||||||
from rl_coach.architectures.tensorflow_components.heads.dueling_q_head import DuelingQHeadParameters
|
from rl_coach.architectures.head_parameters import DuelingQHeadParameters
|
||||||
from rl_coach.base_parameters import VisualizationParameters, MiddlewareScheme
|
from rl_coach.base_parameters import VisualizationParameters, MiddlewareScheme
|
||||||
from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps
|
from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps
|
||||||
from rl_coach.environments.carla_environment import CarlaEnvironmentParameters
|
from rl_coach.environments.carla_environment import CarlaEnvironmentParameters
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import math
|
import math
|
||||||
|
|
||||||
from rl_coach.agents.ddqn_agent import DDQNAgentParameters
|
from rl_coach.agents.ddqn_agent import DDQNAgentParameters
|
||||||
from rl_coach.architectures.tensorflow_components.heads.dueling_q_head import DuelingQHeadParameters
|
from rl_coach.architectures.head_parameters import DuelingQHeadParameters
|
||||||
from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters
|
from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters
|
||||||
from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps
|
from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps
|
||||||
from rl_coach.environments.gym_environment import GymVectorEnvironment
|
from rl_coach.environments.gym_environment import GymVectorEnvironment
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
from rl_coach.agents.ddqn_agent import DDQNAgentParameters
|
from rl_coach.agents.ddqn_agent import DDQNAgentParameters
|
||||||
from rl_coach.architectures.tensorflow_components.heads.dueling_q_head import DuelingQHeadParameters
|
from rl_coach.architectures.head_parameters import DuelingQHeadParameters
|
||||||
from rl_coach.base_parameters import VisualizationParameters
|
from rl_coach.base_parameters import VisualizationParameters
|
||||||
from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps
|
from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps
|
||||||
from rl_coach.environments.doom_environment import DoomEnvironmentParameters
|
from rl_coach.environments.doom_environment import DoomEnvironmentParameters
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
from rl_coach.agents.ddqn_agent import DDQNAgentParameters
|
from rl_coach.agents.ddqn_agent import DDQNAgentParameters
|
||||||
from rl_coach.architectures.tensorflow_components.heads.dueling_q_head import DuelingQHeadParameters
|
from rl_coach.architectures.head_parameters import DuelingQHeadParameters
|
||||||
from rl_coach.base_parameters import VisualizationParameters
|
from rl_coach.base_parameters import VisualizationParameters
|
||||||
from rl_coach.core_types import EnvironmentEpisodes, EnvironmentSteps
|
from rl_coach.core_types import EnvironmentEpisodes, EnvironmentSteps
|
||||||
from rl_coach.environments.gym_environment import GymEnvironmentParameters
|
from rl_coach.environments.gym_environment import GymEnvironmentParameters
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from rl_coach.agents.ddpg_agent import DDPGAgentParameters
|
from rl_coach.agents.ddpg_agent import DDPGAgentParameters
|
||||||
from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters
|
from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
|
||||||
|
from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters
|
||||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
|
||||||
from rl_coach.base_parameters import VisualizationParameters, EmbedderScheme, PresetValidationParameters
|
from rl_coach.base_parameters import VisualizationParameters, EmbedderScheme, PresetValidationParameters
|
||||||
from rl_coach.core_types import EnvironmentEpisodes, EnvironmentSteps, TrainingSteps
|
from rl_coach.core_types import EnvironmentEpisodes, EnvironmentSteps, TrainingSteps
|
||||||
from rl_coach.environments.environment import SingleLevelSelection
|
from rl_coach.environments.environment import SingleLevelSelection
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from rl_coach.agents.actor_critic_agent import ActorCriticAgentParameters
|
from rl_coach.agents.actor_critic_agent import ActorCriticAgentParameters
|
||||||
from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters
|
from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
|
||||||
|
from rl_coach.architectures.middleware_parameters import LSTMMiddlewareParameters
|
||||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.lstm_middleware import LSTMMiddlewareParameters
|
|
||||||
from rl_coach.base_parameters import VisualizationParameters, MiddlewareScheme, PresetValidationParameters
|
from rl_coach.base_parameters import VisualizationParameters, MiddlewareScheme, PresetValidationParameters
|
||||||
from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps
|
from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps
|
||||||
from rl_coach.environments.environment import SingleLevelSelection
|
from rl_coach.environments.environment import SingleLevelSelection
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from rl_coach.agents.hac_ddpg_agent import HACDDPGAgentParameters
|
from rl_coach.agents.hac_ddpg_agent import HACDDPGAgentParameters
|
||||||
from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters
|
from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
|
||||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||||
from rl_coach.base_parameters import VisualizationParameters, EmbeddingMergerType, EmbedderScheme
|
from rl_coach.base_parameters import VisualizationParameters, EmbeddingMergerType, EmbedderScheme
|
||||||
from rl_coach.core_types import EnvironmentEpisodes, EnvironmentSteps, TrainingSteps
|
from rl_coach.core_types import EnvironmentEpisodes, EnvironmentSteps, TrainingSteps
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
from rl_coach.agents.actor_critic_agent import ActorCriticAgentParameters
|
from rl_coach.agents.actor_critic_agent import ActorCriticAgentParameters
|
||||||
from rl_coach.agents.policy_optimization_agent import PolicyGradientRescaler
|
from rl_coach.agents.policy_optimization_agent import PolicyGradientRescaler
|
||||||
from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters
|
from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
|
||||||
from rl_coach.base_parameters import VisualizationParameters
|
from rl_coach.base_parameters import VisualizationParameters
|
||||||
from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps
|
from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps
|
||||||
from rl_coach.environments.starcraft2_environment import StarCraft2EnvironmentParameters
|
from rl_coach.environments.starcraft2_environment import StarCraft2EnvironmentParameters
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
|
|
||||||
from rl_coach.agents.ddqn_agent import DDQNAgentParameters
|
from rl_coach.agents.ddqn_agent import DDQNAgentParameters
|
||||||
from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters
|
from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
|
||||||
from rl_coach.architectures.tensorflow_components.heads.dueling_q_head import DuelingQHeadParameters
|
from rl_coach.architectures.head_parameters import DuelingQHeadParameters
|
||||||
from rl_coach.base_parameters import VisualizationParameters
|
from rl_coach.base_parameters import VisualizationParameters
|
||||||
from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps
|
from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps
|
||||||
from rl_coach.environments.starcraft2_environment import StarCraft2EnvironmentParameters
|
from rl_coach.environments.starcraft2_environment import StarCraft2EnvironmentParameters
|
||||||
|
|||||||
@@ -134,7 +134,7 @@
|
|||||||
"from rl_coach.environments.gym_environment import GymVectorEnvironment\n",
|
"from rl_coach.environments.gym_environment import GymVectorEnvironment\n",
|
||||||
"from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager\n",
|
"from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager\n",
|
||||||
"from rl_coach.graph_managers.graph_manager import SimpleSchedule\n",
|
"from rl_coach.graph_managers.graph_manager import SimpleSchedule\n",
|
||||||
"from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters\n",
|
"from rl_coach.architectures.embedder_parameters import InputEmbedderParameters\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# define the environment parameters\n",
|
"# define the environment parameters\n",
|
||||||
"bit_length = 10\n",
|
"bit_length = 10\n",
|
||||||
|
|||||||
@@ -162,9 +162,9 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from rl_coach.architectures.tensorflow_components.architecture import Dense\n",
|
"from rl_coach.architectures.tensorflow_components.layers import Dense\n",
|
||||||
"from rl_coach.base_parameters import VisualizationParameters, EmbeddingMergerType, EmbedderScheme\n",
|
"from rl_coach.base_parameters import VisualizationParameters, EmbeddingMergerType, EmbedderScheme\n",
|
||||||
"from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters\n",
|
"from rl_coach.architectures.embedder_parameters import InputEmbedderParameters\n",
|
||||||
"from rl_coach.memories.episodic.episodic_hindsight_experience_replay import HindsightGoalSelectionMethod, \\\n",
|
"from rl_coach.memories.episodic.episodic_hindsight_experience_replay import HindsightGoalSelectionMethod, \\\n",
|
||||||
" EpisodicHindsightExperienceReplayParameters\n",
|
" EpisodicHindsightExperienceReplayParameters\n",
|
||||||
"from rl_coach.memories.episodic.episodic_hrl_hindsight_experience_replay import \\\n",
|
"from rl_coach.memories.episodic.episodic_hrl_hindsight_experience_replay import \\\n",
|
||||||
|
|||||||
Reference in New Issue
Block a user