diff --git a/rl_coach/agents/actor_critic_agent.py b/rl_coach/agents/actor_critic_agent.py index df549b7..ac65c41 100644 --- a/rl_coach/agents/actor_critic_agent.py +++ b/rl_coach/agents/actor_critic_agent.py @@ -18,18 +18,17 @@ from typing import Union import numpy as np import scipy.signal + from rl_coach.agents.policy_optimization_agent import PolicyOptimizationAgent, PolicyGradientRescaler from rl_coach.architectures.tensorflow_components.heads.policy_head import PolicyHeadParameters from rl_coach.architectures.tensorflow_components.heads.v_head import VHeadParameters from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, \ AgentParameters, InputEmbedderParameters -from rl_coach.core_types import QActionStateValue -from rl_coach.spaces import DiscreteActionSpace -from rl_coach.utils import last_sample - from rl_coach.logger import screen from rl_coach.memories.episodic.single_episode_buffer import SingleEpisodeBufferParameters +from rl_coach.spaces import DiscreteActionSpace +from rl_coach.utils import last_sample class ActorCriticAlgorithmParameters(AlgorithmParameters): diff --git a/rl_coach/agents/agent.py b/rl_coach/agents/agent.py index b6c7241..8f48d34 100644 --- a/rl_coach/agents/agent.py +++ b/rl_coach/agents/agent.py @@ -20,20 +20,19 @@ from collections import OrderedDict from typing import Dict, List, Union, Tuple import numpy as np +from pandas import read_pickle +from six.moves import range from rl_coach.agents.agent_interface import AgentInterface +from rl_coach.architectures.network_wrapper import NetworkWrapper from rl_coach.base_parameters import AgentParameters, DistributedTaskParameters from rl_coach.core_types import RunPhase, PredictionType, EnvironmentEpisodes, ActionType, Batch, Episode, StateType from rl_coach.core_types import Transition, ActionInfo, TrainingSteps, EnvironmentSteps, EnvResponse -from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplay -from pandas import read_pickle -from six.moves import range -from rl_coach.spaces import SpacesDefinition, VectorObservationSpace, GoalsSpace, AttentionActionSpace -from rl_coach.utils import Signal, force_list, set_cpu -from rl_coach.utils import dynamic_import_and_instantiate_module_from_params - -from rl_coach.architectures.network_wrapper import NetworkWrapper from rl_coach.logger import screen, Logger, EpisodeLogger +from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplay +from rl_coach.spaces import SpacesDefinition, VectorObservationSpace, GoalsSpace, AttentionActionSpace +from rl_coach.utils import Signal, force_list +from rl_coach.utils import dynamic_import_and_instantiate_module_from_params class Agent(AgentInterface): diff --git a/rl_coach/agents/bc_agent.py b/rl_coach/agents/bc_agent.py index 37e1aef..2499bfb 100644 --- a/rl_coach/agents/bc_agent.py +++ b/rl_coach/agents/bc_agent.py @@ -17,14 +17,14 @@ from typing import Union import numpy as np + from rl_coach.agents.imitation_agent import ImitationAgent from rl_coach.architectures.tensorflow_components.heads.policy_head import PolicyHeadParameters from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters -from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters - from rl_coach.base_parameters import AgentParameters, AlgorithmParameters, NetworkParameters, InputEmbedderParameters, \ MiddlewareScheme from rl_coach.exploration_policies.e_greedy import EGreedyParameters +from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters class BCAlgorithmParameters(AlgorithmParameters): diff --git a/rl_coach/agents/bootstrapped_dqn_agent.py b/rl_coach/agents/bootstrapped_dqn_agent.py index bef2233..b5814d3 100644 --- a/rl_coach/agents/bootstrapped_dqn_agent.py +++ b/rl_coach/agents/bootstrapped_dqn_agent.py @@ -17,9 +17,9 @@ from typing import Union import numpy as np + from rl_coach.agents.dqn_agent import DQNAgentParameters, DQNNetworkParameters from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent - from rl_coach.exploration_policies.bootstrapped import BootstrappedParameters diff --git a/rl_coach/agents/categorical_dqn_agent.py b/rl_coach/agents/categorical_dqn_agent.py index 191ec2f..c463d94 100644 --- a/rl_coach/agents/categorical_dqn_agent.py +++ b/rl_coach/agents/categorical_dqn_agent.py @@ -17,15 +17,15 @@ from typing import Union import numpy as np + from rl_coach.agents.dqn_agent import DQNNetworkParameters, DQNAlgorithmParameters from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent from rl_coach.architectures.tensorflow_components.heads.categorical_q_head import CategoricalQHeadParameters from rl_coach.base_parameters import AgentParameters -from rl_coach.memories.non_episodic.experience_replay import ExperienceReplayParameters -from rl_coach.schedules import LinearSchedule - from rl_coach.core_types import StateType from rl_coach.exploration_policies.e_greedy import EGreedyParameters +from rl_coach.memories.non_episodic.experience_replay import ExperienceReplayParameters +from rl_coach.schedules import LinearSchedule class CategoricalDQNNetworkParameters(DQNNetworkParameters): diff --git a/rl_coach/agents/clipped_ppo_agent.py b/rl_coach/agents/clipped_ppo_agent.py index 4588035..f6f3796 100644 --- a/rl_coach/agents/clipped_ppo_agent.py +++ b/rl_coach/agents/clipped_ppo_agent.py @@ -20,21 +20,21 @@ from random import shuffle from typing import Union import numpy as np + from rl_coach.agents.actor_critic_agent import ActorCriticAgent from rl_coach.agents.policy_optimization_agent import PolicyGradientRescaler +from rl_coach.architectures.tensorflow_components.heads.ppo_head import PPOHeadParameters from rl_coach.architectures.tensorflow_components.heads.v_head import VHeadParameters from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, \ AgentParameters, InputEmbedderParameters from rl_coach.core_types import EnvironmentSteps, Batch, EnvResponse, StateType from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters +from rl_coach.logger import screen from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters from rl_coach.schedules import ConstantSchedule from rl_coach.spaces import DiscreteActionSpace -from rl_coach.architectures.tensorflow_components.heads.ppo_head import PPOHeadParameters -from rl_coach.logger import screen - class ClippedPPONetworkParameters(NetworkParameters): def __init__(self): diff --git a/rl_coach/agents/composite_agent.py b/rl_coach/agents/composite_agent.py index d44246f..5f8f53f 100644 --- a/rl_coach/agents/composite_agent.py +++ b/rl_coach/agents/composite_agent.py @@ -20,15 +20,14 @@ from enum import Enum from typing import Union, List, Dict import numpy as np + from rl_coach.agents.agent_interface import AgentInterface from rl_coach.base_parameters import AgentParameters, VisualizationParameters -# from rl_coach.environments.environment_interface import ActionSpace -from rl_coach.spaces import ActionSpace -from rl_coach.spaces import AgentSelection, AttentionActionSpace, ObservationSpace, SpacesDefinition -from rl_coach.utils import short_dynamic_import - from rl_coach.core_types import ActionInfo, EnvResponse, ActionType, RunPhase from rl_coach.filters.observation.observation_crop_filter import ObservationCropFilter +from rl_coach.spaces import ActionSpace +from rl_coach.spaces import AgentSelection, AttentionActionSpace, SpacesDefinition +from rl_coach.utils import short_dynamic_import class DecisionPolicy(object): diff --git a/rl_coach/agents/ddpg_agent.py b/rl_coach/agents/ddpg_agent.py index 4efeafd..dd08069 100644 --- a/rl_coach/agents/ddpg_agent.py +++ b/rl_coach/agents/ddpg_agent.py @@ -19,19 +19,19 @@ from typing import Union from collections import OrderedDict import numpy as np + from rl_coach.agents.actor_critic_agent import ActorCriticAgent from rl_coach.agents.agent import Agent +from rl_coach.architectures.tensorflow_components.heads.ddpg_actor_head import DDPGActorHeadParameters from rl_coach.architectures.tensorflow_components.heads.v_head import VHeadParameters from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters from rl_coach.base_parameters import NetworkParameters, AlgorithmParameters, \ AgentParameters, InputEmbedderParameters, EmbedderScheme +from rl_coach.core_types import ActionInfo, EnvironmentSteps from rl_coach.exploration_policies.ou_process import OUProcessParameters from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters from rl_coach.spaces import BoxActionSpace, GoalsSpace -from rl_coach.architectures.tensorflow_components.heads.ddpg_actor_head import DDPGActorHeadParameters -from rl_coach.core_types import ActionInfo, EnvironmentSteps - class DDPGCriticNetworkParameters(NetworkParameters): def __init__(self): diff --git a/rl_coach/agents/ddqn_agent.py b/rl_coach/agents/ddqn_agent.py index 3c93f12..5268e6d 100644 --- a/rl_coach/agents/ddqn_agent.py +++ b/rl_coach/agents/ddqn_agent.py @@ -17,11 +17,11 @@ from typing import Union import numpy as np -from rl_coach.schedules import LinearSchedule from rl_coach.agents.dqn_agent import DQNAgentParameters from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent from rl_coach.core_types import EnvironmentSteps +from rl_coach.schedules import LinearSchedule class DDQNAgentParameters(DQNAgentParameters): diff --git a/rl_coach/agents/dfp_agent.py b/rl_coach/agents/dfp_agent.py index d5bd4c7..425bf8c 100644 --- a/rl_coach/agents/dfp_agent.py +++ b/rl_coach/agents/dfp_agent.py @@ -19,19 +19,20 @@ from enum import Enum from typing import Union import numpy as np + from rl_coach.agents.agent import Agent from rl_coach.architectures.tensorflow_components.architecture import Conv2d, Dense -from rl_coach.architectures.tensorflow_components.heads.measurements_prediction_head import MeasurementsPredictionHeadParameters +from rl_coach.architectures.tensorflow_components.heads.measurements_prediction_head import \ + MeasurementsPredictionHeadParameters from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters from rl_coach.base_parameters import AlgorithmParameters, AgentParameters, NetworkParameters, \ InputEmbedderParameters, MiddlewareScheme +from rl_coach.core_types import ActionInfo, EnvironmentSteps, RunPhase +from rl_coach.exploration_policies.e_greedy import EGreedyParameters from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters from rl_coach.memories.memory import MemoryGranularity from rl_coach.spaces import SpacesDefinition, VectorObservationSpace -from rl_coach.core_types import ActionInfo, EnvironmentSteps, RunPhase -from rl_coach.exploration_policies.e_greedy import EGreedyParameters - class HandlingTargetsAfterEpisodeEnd(Enum): LastStep = 0 diff --git a/rl_coach/agents/dqn_agent.py b/rl_coach/agents/dqn_agent.py index 4858f86..f78f464 100644 --- a/rl_coach/agents/dqn_agent.py +++ b/rl_coach/agents/dqn_agent.py @@ -17,16 +17,16 @@ from typing import Union import numpy as np + from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent from rl_coach.architectures.tensorflow_components.heads.q_head import QHeadParameters from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, AgentParameters, \ InputEmbedderParameters, MiddlewareScheme -from rl_coach.memories.non_episodic.experience_replay import ExperienceReplayParameters -from rl_coach.schedules import LinearSchedule - from rl_coach.core_types import EnvironmentSteps from rl_coach.exploration_policies.e_greedy import EGreedyParameters +from rl_coach.memories.non_episodic.experience_replay import ExperienceReplayParameters +from rl_coach.schedules import LinearSchedule class DQNAlgorithmParameters(AlgorithmParameters): diff --git a/rl_coach/agents/hac_ddpg_agent.py b/rl_coach/agents/hac_ddpg_agent.py index e87969e..5313da8 100644 --- a/rl_coach/agents/hac_ddpg_agent.py +++ b/rl_coach/agents/hac_ddpg_agent.py @@ -17,7 +17,6 @@ from typing import Union import numpy as np -import copy from rl_coach.agents.ddpg_agent import DDPGAgent, DDPGAgentParameters, DDPGAlgorithmParameters from rl_coach.core_types import RunPhase diff --git a/rl_coach/agents/human_agent.py b/rl_coach/agents/human_agent.py index d839fe3..d3c54c9 100644 --- a/rl_coach/agents/human_agent.py +++ b/rl_coach/agents/human_agent.py @@ -19,6 +19,8 @@ from collections import OrderedDict from typing import Union import pygame +from pandas import to_pickle + from rl_coach.agents.agent import Agent from rl_coach.agents.bc_agent import BCNetworkParameters from rl_coach.architectures.tensorflow_components.heads.policy_head import PolicyHeadParameters @@ -26,11 +28,9 @@ from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware impo from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, InputEmbedderParameters, EmbedderScheme, \ AgentParameters from rl_coach.core_types import ActionInfo -from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters -from pandas import to_pickle - from rl_coach.exploration_policies.e_greedy import EGreedyParameters from rl_coach.logger import screen +from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters class HumanAlgorithmParameters(AlgorithmParameters): diff --git a/rl_coach/agents/imitation_agent.py b/rl_coach/agents/imitation_agent.py index 9136726..19ae834 100644 --- a/rl_coach/agents/imitation_agent.py +++ b/rl_coach/agents/imitation_agent.py @@ -17,11 +17,10 @@ from collections import OrderedDict from typing import Union -from rl_coach.core_types import RunPhase, ActionInfo -from rl_coach.spaces import DiscreteActionSpace - from rl_coach.agents.agent import Agent +from rl_coach.core_types import RunPhase, ActionInfo from rl_coach.logger import screen +from rl_coach.spaces import DiscreteActionSpace ## This is an abstract agent - there is no learn_from_batch method ## diff --git a/rl_coach/agents/n_step_q_agent.py b/rl_coach/agents/n_step_q_agent.py index 227c122..39a9b3a 100644 --- a/rl_coach/agents/n_step_q_agent.py +++ b/rl_coach/agents/n_step_q_agent.py @@ -17,17 +17,17 @@ from typing import Union import numpy as np + from rl_coach.agents.policy_optimization_agent import PolicyOptimizationAgent from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent from rl_coach.architectures.tensorflow_components.heads.q_head import QHeadParameters from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters from rl_coach.base_parameters import AlgorithmParameters, AgentParameters, NetworkParameters, \ InputEmbedderParameters -from rl_coach.exploration_policies.e_greedy import EGreedyParameters -from rl_coach.utils import last_sample - from rl_coach.core_types import EnvironmentSteps +from rl_coach.exploration_policies.e_greedy import EGreedyParameters from rl_coach.memories.episodic.single_episode_buffer import SingleEpisodeBufferParameters +from rl_coach.utils import last_sample class NStepQNetworkParameters(NetworkParameters): diff --git a/rl_coach/agents/naf_agent.py b/rl_coach/agents/naf_agent.py index 7a218fb..8c00dc9 100644 --- a/rl_coach/agents/naf_agent.py +++ b/rl_coach/agents/naf_agent.py @@ -17,16 +17,16 @@ from typing import Union import numpy as np + from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent from rl_coach.architectures.tensorflow_components.heads.naf_head import NAFHeadParameters from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters from rl_coach.base_parameters import AlgorithmParameters, AgentParameters, \ NetworkParameters, InputEmbedderParameters -from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters -from rl_coach.spaces import BoxActionSpace - from rl_coach.core_types import ActionInfo, EnvironmentSteps from rl_coach.exploration_policies.ou_process import OUProcessParameters +from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters +from rl_coach.spaces import BoxActionSpace class NAFNetworkParameters(NetworkParameters): diff --git a/rl_coach/agents/nec_agent.py b/rl_coach/agents/nec_agent.py index c135b3c..faf31de 100644 --- a/rl_coach/agents/nec_agent.py +++ b/rl_coach/agents/nec_agent.py @@ -19,17 +19,17 @@ import pickle from typing import Union import numpy as np + from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent from rl_coach.architectures.tensorflow_components.heads.dnd_q_head import DNDQHeadParameters from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, AgentParameters, \ InputEmbedderParameters from rl_coach.core_types import RunPhase, EnvironmentSteps, Episode, StateType -from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters, MemoryGranularity -from rl_coach.schedules import ConstantSchedule - from rl_coach.exploration_policies.e_greedy import EGreedyParameters from rl_coach.logger import screen +from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters, MemoryGranularity +from rl_coach.schedules import ConstantSchedule class NECNetworkParameters(NetworkParameters): diff --git a/rl_coach/agents/pal_agent.py b/rl_coach/agents/pal_agent.py index 3c1d657..cb928e7 100644 --- a/rl_coach/agents/pal_agent.py +++ b/rl_coach/agents/pal_agent.py @@ -20,8 +20,7 @@ import numpy as np from rl_coach.agents.dqn_agent import DQNAgentParameters, DQNAlgorithmParameters from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent -from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplay, \ - EpisodicExperienceReplayParameters +from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters class PALAlgorithmParameters(DQNAlgorithmParameters): diff --git a/rl_coach/agents/policy_gradients_agent.py b/rl_coach/agents/policy_gradients_agent.py index 4d67e42..d14073b 100644 --- a/rl_coach/agents/policy_gradients_agent.py +++ b/rl_coach/agents/policy_gradients_agent.py @@ -17,16 +17,16 @@ from typing import Union import numpy as np + from rl_coach.agents.policy_optimization_agent import PolicyOptimizationAgent, PolicyGradientRescaler from rl_coach.architectures.tensorflow_components.heads.policy_head import PolicyHeadParameters from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters from rl_coach.base_parameters import NetworkParameters, AlgorithmParameters, \ AgentParameters, InputEmbedderParameters from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters -from rl_coach.spaces import DiscreteActionSpace - from rl_coach.logger import screen from rl_coach.memories.episodic.single_episode_buffer import SingleEpisodeBufferParameters +from rl_coach.spaces import DiscreteActionSpace class PolicyGradientNetworkParameters(NetworkParameters): diff --git a/rl_coach/agents/policy_optimization_agent.py b/rl_coach/agents/policy_optimization_agent.py index 1c16ce8..70ec43d 100644 --- a/rl_coach/agents/policy_optimization_agent.py +++ b/rl_coach/agents/policy_optimization_agent.py @@ -19,12 +19,12 @@ from enum import Enum from typing import Union import numpy as np -from rl_coach.core_types import Batch, ActionInfo -from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace -from rl_coach.utils import eps from rl_coach.agents.agent import Agent +from rl_coach.core_types import Batch, ActionInfo from rl_coach.logger import screen +from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace +from rl_coach.utils import eps class PolicyGradientRescaler(Enum): diff --git a/rl_coach/agents/ppo_agent.py b/rl_coach/agents/ppo_agent.py index 8380525..ebbe971 100644 --- a/rl_coach/agents/ppo_agent.py +++ b/rl_coach/agents/ppo_agent.py @@ -19,21 +19,21 @@ from collections import OrderedDict from typing import Union import numpy as np + from rl_coach.agents.actor_critic_agent import ActorCriticAgent from rl_coach.agents.policy_optimization_agent import PolicyGradientRescaler +from rl_coach.architectures.tensorflow_components.heads.ppo_head import PPOHeadParameters from rl_coach.architectures.tensorflow_components.heads.v_head import VHeadParameters from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, \ AgentParameters, InputEmbedderParameters, DistributedTaskParameters from rl_coach.core_types import EnvironmentSteps, Batch from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters +from rl_coach.logger import screen from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters from rl_coach.spaces import DiscreteActionSpace from rl_coach.utils import force_list -from rl_coach.architectures.tensorflow_components.heads.ppo_head import PPOHeadParameters -from rl_coach.logger import screen - class PPOCriticNetworkParameters(NetworkParameters): def __init__(self): diff --git a/rl_coach/agents/qr_dqn_agent.py b/rl_coach/agents/qr_dqn_agent.py index 7a3cdc1..479cd80 100644 --- a/rl_coach/agents/qr_dqn_agent.py +++ b/rl_coach/agents/qr_dqn_agent.py @@ -17,12 +17,13 @@ from typing import Union import numpy as np -from rl_coach.architectures.tensorflow_components.heads.quantile_regression_q_head import QuantileRegressionQHeadParameters -from rl_coach.schedules import LinearSchedule from rl_coach.agents.dqn_agent import DQNAgentParameters, DQNNetworkParameters, DQNAlgorithmParameters from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent +from rl_coach.architectures.tensorflow_components.heads.quantile_regression_q_head import \ + QuantileRegressionQHeadParameters from rl_coach.core_types import StateType +from rl_coach.schedules import LinearSchedule class QuantileRegressionDQNNetworkParameters(DQNNetworkParameters): diff --git a/rl_coach/agents/value_optimization_agent.py b/rl_coach/agents/value_optimization_agent.py index afd242b..9771ae5 100644 --- a/rl_coach/agents/value_optimization_agent.py +++ b/rl_coach/agents/value_optimization_agent.py @@ -17,11 +17,11 @@ from typing import Union import numpy as np -from rl_coach.memories.non_episodic.prioritized_experience_replay import PrioritizedExperienceReplay -from rl_coach.spaces import DiscreteActionSpace from rl_coach.agents.agent import Agent from rl_coach.core_types import ActionInfo, StateType +from rl_coach.memories.non_episodic.prioritized_experience_replay import PrioritizedExperienceReplay +from rl_coach.spaces import DiscreteActionSpace ## This is an abstract agent - there is no learn_from_batch method ## diff --git a/rl_coach/architectures/network_wrapper.py b/rl_coach/architectures/network_wrapper.py index 6f21f9c..0660f24 100644 --- a/rl_coach/architectures/network_wrapper.py +++ b/rl_coach/architectures/network_wrapper.py @@ -17,9 +17,8 @@ from typing import List, Tuple from rl_coach.base_parameters import Frameworks, AgentParameters -from rl_coach.spaces import SpacesDefinition - from rl_coach.logger import failed_imports +from rl_coach.spaces import SpacesDefinition try: import tensorflow as tf diff --git a/rl_coach/architectures/tensorflow_components/architecture.py b/rl_coach/architectures/tensorflow_components/architecture.py index 6caf6c8..b84fbb8 100644 --- a/rl_coach/architectures/tensorflow_components/architecture.py +++ b/rl_coach/architectures/tensorflow_components/architecture.py @@ -19,12 +19,12 @@ from typing import List import numpy as np import tensorflow as tf -from rl_coach.base_parameters import AgentParameters, DistributedTaskParameters -from rl_coach.spaces import SpacesDefinition -from rl_coach.utils import force_list, squeeze_list from rl_coach.architectures.architecture import Architecture +from rl_coach.base_parameters import AgentParameters, DistributedTaskParameters from rl_coach.core_types import GradientClippingMethod +from rl_coach.spaces import SpacesDefinition +from rl_coach.utils import force_list, squeeze_list def batchnorm_activation_dropout(input_layer, batchnorm, activation_function, dropout, dropout_rate, layer_idx): diff --git a/rl_coach/architectures/tensorflow_components/embedders/embedder.py b/rl_coach/architectures/tensorflow_components/embedders/embedder.py index 430f053..88b3bb3 100644 --- a/rl_coach/architectures/tensorflow_components/embedders/embedder.py +++ b/rl_coach/architectures/tensorflow_components/embedders/embedder.py @@ -18,10 +18,9 @@ from typing import List, Union import numpy as np import tensorflow as tf -from rl_coach.architectures.tensorflow_components.shared_variables import SharedRunningStats -from rl_coach.base_parameters import EmbedderScheme from rl_coach.architectures.tensorflow_components.architecture import batchnorm_activation_dropout +from rl_coach.base_parameters import EmbedderScheme from rl_coach.core_types import InputEmbedding diff --git a/rl_coach/architectures/tensorflow_components/embedders/image_embedder.py b/rl_coach/architectures/tensorflow_components/embedders/image_embedder.py index 3301180..0cd5084 100644 --- a/rl_coach/architectures/tensorflow_components/embedders/image_embedder.py +++ b/rl_coach/architectures/tensorflow_components/embedders/image_embedder.py @@ -17,10 +17,10 @@ from typing import List import tensorflow as tf -from rl_coach.architectures.tensorflow_components.architecture import Conv2d -from rl_coach.base_parameters import EmbedderScheme +from rl_coach.architectures.tensorflow_components.architecture import Conv2d from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedder +from rl_coach.base_parameters import EmbedderScheme from rl_coach.core_types import InputImageEmbedding diff --git a/rl_coach/architectures/tensorflow_components/embedders/vector_embedder.py b/rl_coach/architectures/tensorflow_components/embedders/vector_embedder.py index f0bde1f..8328aa5 100644 --- a/rl_coach/architectures/tensorflow_components/embedders/vector_embedder.py +++ b/rl_coach/architectures/tensorflow_components/embedders/vector_embedder.py @@ -17,10 +17,10 @@ from typing import List import tensorflow as tf -from rl_coach.architectures.tensorflow_components.architecture import Dense -from rl_coach.base_parameters import EmbedderScheme +from rl_coach.architectures.tensorflow_components.architecture import Dense from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedder +from rl_coach.base_parameters import EmbedderScheme from rl_coach.core_types import InputVectorEmbedding diff --git a/rl_coach/architectures/tensorflow_components/general_network.py b/rl_coach/architectures/tensorflow_components/general_network.py index 2808ede..0ee4f06 100644 --- a/rl_coach/architectures/tensorflow_components/general_network.py +++ b/rl_coach/architectures/tensorflow_components/general_network.py @@ -19,15 +19,15 @@ from typing import Dict import numpy as np import tensorflow as tf + +from rl_coach.architectures.tensorflow_components.architecture import TensorFlowArchitecture from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters from rl_coach.architectures.tensorflow_components.middlewares.middleware import MiddlewareParameters from rl_coach.base_parameters import AgentParameters, InputEmbedderParameters, EmbeddingMergerType +from rl_coach.core_types import PredictionType from rl_coach.spaces import SpacesDefinition, PlanarMapsObservationSpace from rl_coach.utils import get_all_subclasses, dynamic_import_and_instantiate_module_from_params -from rl_coach.architectures.tensorflow_components.architecture import TensorFlowArchitecture -from rl_coach.core_types import PredictionType - class GeneralTensorFlowNetwork(TensorFlowArchitecture): """ diff --git a/rl_coach/architectures/tensorflow_components/heads/categorical_q_head.py b/rl_coach/architectures/tensorflow_components/heads/categorical_q_head.py index c759eb3..89ae4c8 100644 --- a/rl_coach/architectures/tensorflow_components/heads/categorical_q_head.py +++ b/rl_coach/architectures/tensorflow_components/heads/categorical_q_head.py @@ -15,11 +15,11 @@ # import tensorflow as tf -from rl_coach.base_parameters import AgentParameters -from rl_coach.spaces import SpacesDefinition from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters +from rl_coach.base_parameters import AgentParameters from rl_coach.core_types import QActionStateValue +from rl_coach.spaces import SpacesDefinition class CategoricalQHeadParameters(HeadParameters): diff --git a/rl_coach/architectures/tensorflow_components/heads/ddpg_actor_head.py b/rl_coach/architectures/tensorflow_components/heads/ddpg_actor_head.py index 8b7c0dc..802c0de 100644 --- a/rl_coach/architectures/tensorflow_components/heads/ddpg_actor_head.py +++ b/rl_coach/architectures/tensorflow_components/heads/ddpg_actor_head.py @@ -15,13 +15,12 @@ # import tensorflow as tf -from rl_coach.architectures.tensorflow_components.architecture import batchnorm_activation_dropout +from rl_coach.architectures.tensorflow_components.architecture import batchnorm_activation_dropout from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters from rl_coach.base_parameters import AgentParameters -from rl_coach.spaces import SpacesDefinition - from rl_coach.core_types import ActionProbabilities +from rl_coach.spaces import SpacesDefinition class DDPGActorHeadParameters(HeadParameters): diff --git a/rl_coach/architectures/tensorflow_components/heads/dnd_q_head.py b/rl_coach/architectures/tensorflow_components/heads/dnd_q_head.py index affb23b..964189c 100644 --- a/rl_coach/architectures/tensorflow_components/heads/dnd_q_head.py +++ b/rl_coach/architectures/tensorflow_components/heads/dnd_q_head.py @@ -14,12 +14,12 @@ # limitations under the License. # import tensorflow as tf -from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters -from rl_coach.base_parameters import AgentParameters +from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters from rl_coach.architectures.tensorflow_components.heads.q_head import QHead -from rl_coach.spaces import SpacesDefinition +from rl_coach.base_parameters import AgentParameters from rl_coach.memories.non_episodic import differentiable_neural_dictionary +from rl_coach.spaces import SpacesDefinition class DNDQHeadParameters(HeadParameters): diff --git a/rl_coach/architectures/tensorflow_components/heads/dueling_q_head.py b/rl_coach/architectures/tensorflow_components/heads/dueling_q_head.py index 495cae3..f6f2424 100644 --- a/rl_coach/architectures/tensorflow_components/heads/dueling_q_head.py +++ b/rl_coach/architectures/tensorflow_components/heads/dueling_q_head.py @@ -15,10 +15,10 @@ # import tensorflow as tf -from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters -from rl_coach.base_parameters import AgentParameters +from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters from rl_coach.architectures.tensorflow_components.heads.q_head import QHead +from rl_coach.base_parameters import AgentParameters from rl_coach.spaces import SpacesDefinition diff --git a/rl_coach/architectures/tensorflow_components/heads/head.py b/rl_coach/architectures/tensorflow_components/heads/head.py index 891e304..54c294a 100644 --- a/rl_coach/architectures/tensorflow_components/heads/head.py +++ b/rl_coach/architectures/tensorflow_components/heads/head.py @@ -17,10 +17,10 @@ from typing import Type import numpy as np import tensorflow as tf -from rl_coach.base_parameters import AgentParameters, Parameters -from rl_coach.spaces import SpacesDefinition from tensorflow.python.ops.losses.losses_impl import Reduction +from rl_coach.base_parameters import AgentParameters, Parameters +from rl_coach.spaces import SpacesDefinition from rl_coach.utils import force_list diff --git a/rl_coach/architectures/tensorflow_components/heads/measurements_prediction_head.py b/rl_coach/architectures/tensorflow_components/heads/measurements_prediction_head.py index e8b9cc5..5f1cfba 100644 --- a/rl_coach/architectures/tensorflow_components/heads/measurements_prediction_head.py +++ b/rl_coach/architectures/tensorflow_components/heads/measurements_prediction_head.py @@ -15,11 +15,11 @@ # import tensorflow as tf -from rl_coach.base_parameters import AgentParameters -from rl_coach.spaces import SpacesDefinition from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters +from rl_coach.base_parameters import AgentParameters from rl_coach.core_types import Measurements +from rl_coach.spaces import SpacesDefinition class MeasurementsPredictionHeadParameters(HeadParameters): diff --git a/rl_coach/architectures/tensorflow_components/heads/naf_head.py b/rl_coach/architectures/tensorflow_components/heads/naf_head.py index 4e5c6b5..d315648 100644 --- a/rl_coach/architectures/tensorflow_components/heads/naf_head.py +++ b/rl_coach/architectures/tensorflow_components/heads/naf_head.py @@ -15,12 +15,12 @@ # import tensorflow as tf -from rl_coach.base_parameters import AgentParameters -from rl_coach.spaces import BoxActionSpace -from rl_coach.spaces import SpacesDefinition from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters +from rl_coach.base_parameters import AgentParameters from rl_coach.core_types import QActionStateValue +from rl_coach.spaces import BoxActionSpace +from rl_coach.spaces import SpacesDefinition class NAFHeadParameters(HeadParameters): diff --git a/rl_coach/architectures/tensorflow_components/heads/policy_head.py b/rl_coach/architectures/tensorflow_components/heads/policy_head.py index 106ae8e..8bf4aa8 100644 --- a/rl_coach/architectures/tensorflow_components/heads/policy_head.py +++ b/rl_coach/architectures/tensorflow_components/heads/policy_head.py @@ -16,15 +16,15 @@ import numpy as np import tensorflow as tf + from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer, HeadParameters from rl_coach.base_parameters import AgentParameters +from rl_coach.core_types import ActionProbabilities +from rl_coach.exploration_policies.continuous_entropy import ContinuousEntropyParameters from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace, CompoundActionSpace from rl_coach.spaces import SpacesDefinition from rl_coach.utils import eps -from rl_coach.core_types import ActionProbabilities -from rl_coach.exploration_policies.continuous_entropy import ContinuousEntropyParameters - class PolicyHeadParameters(HeadParameters): def __init__(self, activation_function: str ='tanh', name: str='policy_head_params'): diff --git a/rl_coach/architectures/tensorflow_components/heads/ppo_head.py b/rl_coach/architectures/tensorflow_components/heads/ppo_head.py index 755ffa6..cb8777d 100644 --- a/rl_coach/architectures/tensorflow_components/heads/ppo_head.py +++ b/rl_coach/architectures/tensorflow_components/heads/ppo_head.py @@ -16,14 +16,14 @@ import numpy as np import tensorflow as tf + +from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters, normalized_columns_initializer from rl_coach.base_parameters import AgentParameters +from rl_coach.core_types import ActionProbabilities from rl_coach.spaces import BoxActionSpace, DiscreteActionSpace from rl_coach.spaces import SpacesDefinition from rl_coach.utils import eps -from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters, normalized_columns_initializer -from rl_coach.core_types import ActionProbabilities - class PPOHeadParameters(HeadParameters): def __init__(self, activation_function: str ='tanh', name: str='ppo_head_params'): diff --git a/rl_coach/architectures/tensorflow_components/heads/ppo_v_head.py b/rl_coach/architectures/tensorflow_components/heads/ppo_v_head.py index 0e04edd..fff4525 100644 --- a/rl_coach/architectures/tensorflow_components/heads/ppo_v_head.py +++ b/rl_coach/architectures/tensorflow_components/heads/ppo_v_head.py @@ -15,11 +15,11 @@ # import tensorflow as tf -from rl_coach.base_parameters import AgentParameters -from rl_coach.spaces import SpacesDefinition from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer, HeadParameters +from rl_coach.base_parameters import AgentParameters from rl_coach.core_types import ActionProbabilities +from rl_coach.spaces import SpacesDefinition class PPOVHeadParameters(HeadParameters): diff --git a/rl_coach/architectures/tensorflow_components/heads/q_head.py b/rl_coach/architectures/tensorflow_components/heads/q_head.py index 41a697e..1393875 100644 --- a/rl_coach/architectures/tensorflow_components/heads/q_head.py +++ b/rl_coach/architectures/tensorflow_components/heads/q_head.py @@ -15,11 +15,11 @@ # import tensorflow as tf -from rl_coach.base_parameters import AgentParameters -from rl_coach.spaces import SpacesDefinition, BoxActionSpace, DiscreteActionSpace from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters +from rl_coach.base_parameters import AgentParameters from rl_coach.core_types import QActionStateValue +from rl_coach.spaces import SpacesDefinition, BoxActionSpace, DiscreteActionSpace class QHeadParameters(HeadParameters): diff --git a/rl_coach/architectures/tensorflow_components/heads/quantile_regression_q_head.py b/rl_coach/architectures/tensorflow_components/heads/quantile_regression_q_head.py index 86ed3b6..039855b 100644 --- a/rl_coach/architectures/tensorflow_components/heads/quantile_regression_q_head.py +++ b/rl_coach/architectures/tensorflow_components/heads/quantile_regression_q_head.py @@ -15,11 +15,11 @@ # import tensorflow as tf -from rl_coach.base_parameters import AgentParameters -from rl_coach.spaces import SpacesDefinition from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters +from rl_coach.base_parameters import AgentParameters from rl_coach.core_types import QActionStateValue +from rl_coach.spaces import SpacesDefinition class QuantileRegressionQHeadParameters(HeadParameters): diff --git a/rl_coach/architectures/tensorflow_components/heads/v_head.py b/rl_coach/architectures/tensorflow_components/heads/v_head.py index 458447c..f7e4ad6 100644 --- a/rl_coach/architectures/tensorflow_components/heads/v_head.py +++ b/rl_coach/architectures/tensorflow_components/heads/v_head.py @@ -15,11 +15,11 @@ # import tensorflow as tf -from rl_coach.base_parameters import AgentParameters -from rl_coach.spaces import SpacesDefinition from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer, HeadParameters +from rl_coach.base_parameters import AgentParameters from rl_coach.core_types import VStateValue +from rl_coach.spaces import SpacesDefinition class VHeadParameters(HeadParameters): diff --git a/rl_coach/architectures/tensorflow_components/middlewares/fc_middleware.py b/rl_coach/architectures/tensorflow_components/middlewares/fc_middleware.py index a6c9b79..f3b728d 100644 --- a/rl_coach/architectures/tensorflow_components/middlewares/fc_middleware.py +++ b/rl_coach/architectures/tensorflow_components/middlewares/fc_middleware.py @@ -16,10 +16,10 @@ from typing import Union, List import tensorflow as tf -from rl_coach.architectures.tensorflow_components.middlewares.middleware import Middleware, MiddlewareParameters -from rl_coach.base_parameters import MiddlewareScheme from rl_coach.architectures.tensorflow_components.architecture import batchnorm_activation_dropout, Dense +from rl_coach.architectures.tensorflow_components.middlewares.middleware import Middleware, MiddlewareParameters +from rl_coach.base_parameters import MiddlewareScheme from rl_coach.core_types import Middleware_FC_Embedding diff --git a/rl_coach/architectures/tensorflow_components/middlewares/lstm_middleware.py b/rl_coach/architectures/tensorflow_components/middlewares/lstm_middleware.py index 9078a64..2cd9924 100644 --- a/rl_coach/architectures/tensorflow_components/middlewares/lstm_middleware.py +++ b/rl_coach/architectures/tensorflow_components/middlewares/lstm_middleware.py @@ -17,10 +17,10 @@ import numpy as np import tensorflow as tf -from rl_coach.architectures.tensorflow_components.middlewares.middleware import Middleware, MiddlewareParameters -from rl_coach.base_parameters import MiddlewareScheme from rl_coach.architectures.tensorflow_components.architecture import batchnorm_activation_dropout +from rl_coach.architectures.tensorflow_components.middlewares.middleware import Middleware, MiddlewareParameters +from rl_coach.base_parameters import MiddlewareScheme from rl_coach.core_types import Middleware_LSTM_Embedding diff --git a/rl_coach/architectures/tensorflow_components/middlewares/middleware.py b/rl_coach/architectures/tensorflow_components/middlewares/middleware.py index 6f59309..5a01025 100644 --- a/rl_coach/architectures/tensorflow_components/middlewares/middleware.py +++ b/rl_coach/architectures/tensorflow_components/middlewares/middleware.py @@ -16,8 +16,8 @@ from typing import Type, Union, List import tensorflow as tf -from rl_coach.base_parameters import MiddlewareScheme, Parameters +from rl_coach.base_parameters import MiddlewareScheme, Parameters from rl_coach.core_types import MiddlewareEmbedding diff --git a/rl_coach/core_types.py b/rl_coach/core_types.py index f75ee5f..a46a2c8 100644 --- a/rl_coach/core_types.py +++ b/rl_coach/core_types.py @@ -15,12 +15,12 @@ # +import copy from enum import Enum -from typing import List, Union, Dict, Any, Type from random import shuffle +from typing import List, Union, Dict, Any, Type import numpy as np -import copy ActionType = Union[int, float, np.ndarray, List] GoalType = Union[None, np.ndarray] diff --git a/rl_coach/datasets/README.md b/rl_coach/datasets/README.md deleted file mode 100644 index e69de29..0000000 diff --git a/rl_coach/datasets/doom_basic.tar.gz b/rl_coach/datasets/doom_basic.tar.gz deleted file mode 100644 index c1250e0..0000000 Binary files a/rl_coach/datasets/doom_basic.tar.gz and /dev/null differ diff --git a/rl_coach/datasets/montezuma_revenge.tar.gz b/rl_coach/datasets/montezuma_revenge.tar.gz deleted file mode 100644 index cdf7d3d..0000000 Binary files a/rl_coach/datasets/montezuma_revenge.tar.gz and /dev/null differ diff --git a/rl_coach/debug_utils.py b/rl_coach/debug_utils.py index 21e2faa..d72ed33 100644 --- a/rl_coach/debug_utils.py +++ b/rl_coach/debug_utils.py @@ -18,6 +18,7 @@ import math import matplotlib.pyplot as plt import numpy as np + from rl_coach.filters.observation.observation_stacking_filter import LazyStack diff --git a/rl_coach/environments/carla_environment.py b/rl_coach/environments/carla_environment.py index 4e22352..73fee2c 100644 --- a/rl_coach/environments/carla_environment.py +++ b/rl_coach/environments/carla_environment.py @@ -1,10 +1,25 @@ +# +# Copyright (c) 2017 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + import random import sys from os import path, environ -from rl_coach.filters.observation.observation_to_uint8_filter import ObservationToUInt8Filter - from rl_coach.filters.observation.observation_rgb_to_y_filter import ObservationRGBToYFilter +from rl_coach.filters.observation.observation_to_uint8_filter import ObservationToUInt8Filter try: if 'CARLA_ROOT' in environ: diff --git a/rl_coach/environments/doom_environment.py b/rl_coach/environments/doom_environment.py index 822b5f1..66adc8b 100644 --- a/rl_coach/environments/doom_environment.py +++ b/rl_coach/environments/doom_environment.py @@ -26,18 +26,18 @@ from os import path, environ from typing import Union, List import numpy as np + from rl_coach.base_parameters import VisualizationParameters from rl_coach.environments.environment import Environment, EnvironmentParameters, LevelSelection from rl_coach.filters.action.full_discrete_action_space_map import FullDiscreteActionSpaceMap from rl_coach.filters.filter import InputFilter, OutputFilter from rl_coach.filters.observation.observation_rescale_to_size_filter import ObservationRescaleToSizeFilter +from rl_coach.filters.observation.observation_rgb_to_y_filter import ObservationRGBToYFilter from rl_coach.filters.observation.observation_stacking_filter import ObservationStackingFilter from rl_coach.filters.observation.observation_to_uint8_filter import ObservationToUInt8Filter from rl_coach.spaces import MultiSelectActionSpace, ImageObservationSpace, \ VectorObservationSpace, StateSpace -from rl_coach.filters.observation.observation_rgb_to_y_filter import ObservationRGBToYFilter - # enum of the available levels and their path class DoomLevel(Enum): diff --git a/rl_coach/environments/environment.py b/rl_coach/environments/environment.py index 473dca0..17ac00b 100644 --- a/rl_coach/environments/environment.py +++ b/rl_coach/environments/environment.py @@ -20,17 +20,17 @@ from collections import OrderedDict from typing import Union, List, Tuple, Dict import numpy as np + +from rl_coach import logger from rl_coach.base_parameters import Parameters from rl_coach.base_parameters import VisualizationParameters from rl_coach.core_types import GoalType, ActionType, EnvResponse, RunPhase +from rl_coach.environments.environment_interface import EnvironmentInterface +from rl_coach.logger import screen from rl_coach.renderer import Renderer from rl_coach.spaces import ActionSpace, ObservationSpace, DiscreteActionSpace, RewardSpace, StateSpace from rl_coach.utils import squeeze_list, force_list -from rl_coach import logger -from rl_coach.environments.environment_interface import EnvironmentInterface -from rl_coach.logger import screen - class LevelSelection(object): def __init__(self, level: str): diff --git a/rl_coach/environments/environment_interface.py b/rl_coach/environments/environment_interface.py index f92d1ca..a1fdb2a 100644 --- a/rl_coach/environments/environment_interface.py +++ b/rl_coach/environments/environment_interface.py @@ -16,9 +16,8 @@ from typing import Union, Dict -from rl_coach.spaces import ActionSpace - from rl_coach.core_types import ActionType, EnvResponse, RunPhase +from rl_coach.spaces import ActionSpace class EnvironmentInterface(object): diff --git a/rl_coach/environments/mujoco/pendulum_with_goals.py b/rl_coach/environments/mujoco/pendulum_with_goals.py index 84eb227..777ed74 100644 --- a/rl_coach/environments/mujoco/pendulum_with_goals.py +++ b/rl_coach/environments/mujoco/pendulum_with_goals.py @@ -1,10 +1,26 @@ -import numpy as np -import gym +# +# Copyright (c) 2017 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + import os + +import gym +import numpy as np from gym import spaces from gym.envs.registration import EnvSpec - -from mujoco_py import load_model_from_path, MjSim , MjViewer, MjRenderContextOffscreen +from mujoco_py import load_model_from_path, MjSim, MjViewer, MjRenderContextOffscreen class PendulumWithGoals(gym.Env): diff --git a/rl_coach/environments/starcraft2_environment.py b/rl_coach/environments/starcraft2_environment.py index 6c8593f..eba32ab 100644 --- a/rl_coach/environments/starcraft2_environment.py +++ b/rl_coach/environments/starcraft2_environment.py @@ -19,6 +19,7 @@ from enum import Enum from typing import Union, List import numpy as np + from rl_coach.filters.observation.observation_move_axis_filter import ObservationMoveAxisFilter try: diff --git a/rl_coach/environments/toy_problems/bit_flip.py b/rl_coach/environments/toy_problems/bit_flip.py index d674ab5..6084ef4 100644 --- a/rl_coach/environments/toy_problems/bit_flip.py +++ b/rl_coach/environments/toy_problems/bit_flip.py @@ -1,8 +1,25 @@ -import numpy as np -import gym -from gym import spaces +# +# Copyright (c) 2017 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + import random +import gym +import numpy as np +from gym import spaces + class BitFlip(gym.Env): metadata = { diff --git a/rl_coach/environments/toy_problems/exploration_chain.py b/rl_coach/environments/toy_problems/exploration_chain.py index 2c83a47..a6efc6c 100644 --- a/rl_coach/environments/toy_problems/exploration_chain.py +++ b/rl_coach/environments/toy_problems/exploration_chain.py @@ -1,8 +1,25 @@ -import numpy as np -import gym -from gym import spaces +# +# Copyright (c) 2017 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + from enum import Enum +import gym +import numpy as np +from gym import spaces + class ExplorationChain(gym.Env): metadata = { diff --git a/rl_coach/exploration_policies/additive_noise.py b/rl_coach/exploration_policies/additive_noise.py index 7912a31..c3e7fb9 100644 --- a/rl_coach/exploration_policies/additive_noise.py +++ b/rl_coach/exploration_policies/additive_noise.py @@ -17,11 +17,11 @@ from typing import List import numpy as np -from rl_coach.schedules import Schedule, LinearSchedule -from rl_coach.spaces import ActionSpace, BoxActionSpace from rl_coach.core_types import RunPhase, ActionType from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy, ExplorationParameters +from rl_coach.schedules import Schedule, LinearSchedule +from rl_coach.spaces import ActionSpace, BoxActionSpace # TODO: consider renaming to gaussian sampling diff --git a/rl_coach/exploration_policies/boltzmann.py b/rl_coach/exploration_policies/boltzmann.py index a0e3854..61a2e92 100644 --- a/rl_coach/exploration_policies/boltzmann.py +++ b/rl_coach/exploration_policies/boltzmann.py @@ -17,11 +17,11 @@ from typing import List import numpy as np -from rl_coach.schedules import Schedule -from rl_coach.spaces import ActionSpace from rl_coach.core_types import RunPhase, ActionType from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy, ExplorationParameters +from rl_coach.schedules import Schedule +from rl_coach.spaces import ActionSpace class BoltzmannParameters(ExplorationParameters): diff --git a/rl_coach/exploration_policies/bootstrapped.py b/rl_coach/exploration_policies/bootstrapped.py index c3061b1..96a5ff4 100644 --- a/rl_coach/exploration_policies/bootstrapped.py +++ b/rl_coach/exploration_policies/bootstrapped.py @@ -17,13 +17,13 @@ from typing import List import numpy as np -from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters -from rl_coach.exploration_policies.e_greedy import EGreedy, EGreedyParameters -from rl_coach.schedules import Schedule, LinearSchedule -from rl_coach.spaces import ActionSpace from rl_coach.core_types import RunPhase, ActionType +from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters +from rl_coach.exploration_policies.e_greedy import EGreedy, EGreedyParameters from rl_coach.exploration_policies.exploration_policy import ExplorationParameters +from rl_coach.schedules import Schedule, LinearSchedule +from rl_coach.spaces import ActionSpace class BootstrappedParameters(EGreedyParameters): diff --git a/rl_coach/exploration_policies/categorical.py b/rl_coach/exploration_policies/categorical.py index c4c1d0f..4c99e50 100644 --- a/rl_coach/exploration_policies/categorical.py +++ b/rl_coach/exploration_policies/categorical.py @@ -17,10 +17,10 @@ from typing import List import numpy as np -from rl_coach.spaces import ActionSpace from rl_coach.core_types import RunPhase, ActionType from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy, ExplorationParameters +from rl_coach.spaces import ActionSpace class CategoricalParameters(ExplorationParameters): diff --git a/rl_coach/exploration_policies/e_greedy.py b/rl_coach/exploration_policies/e_greedy.py index b1c934d..884f36d 100644 --- a/rl_coach/exploration_policies/e_greedy.py +++ b/rl_coach/exploration_policies/e_greedy.py @@ -17,15 +17,15 @@ from typing import List import numpy as np + +from rl_coach.core_types import RunPhase, ActionType from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters +from rl_coach.exploration_policies.exploration_policy import ExplorationParameters +from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy from rl_coach.schedules import Schedule, LinearSchedule from rl_coach.spaces import ActionSpace, DiscreteActionSpace, BoxActionSpace from rl_coach.utils import dynamic_import_and_instantiate_module_from_params -from rl_coach.core_types import RunPhase, ActionType -from rl_coach.exploration_policies.exploration_policy import ExplorationParameters -from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy - class EGreedyParameters(ExplorationParameters): def __init__(self): diff --git a/rl_coach/exploration_policies/exploration_policy.py b/rl_coach/exploration_policies/exploration_policy.py index 4dcd6ef..b4b5a6d 100644 --- a/rl_coach/exploration_policies/exploration_policy.py +++ b/rl_coach/exploration_policies/exploration_policy.py @@ -17,9 +17,8 @@ from typing import List from rl_coach.base_parameters import Parameters -from rl_coach.spaces import ActionSpace - from rl_coach.core_types import RunPhase, ActionType +from rl_coach.spaces import ActionSpace class ExplorationParameters(Parameters): diff --git a/rl_coach/exploration_policies/greedy.py b/rl_coach/exploration_policies/greedy.py index f5b402c..b0d788e 100644 --- a/rl_coach/exploration_policies/greedy.py +++ b/rl_coach/exploration_policies/greedy.py @@ -17,10 +17,10 @@ from typing import List import numpy as np -from rl_coach.spaces import ActionSpace, DiscreteActionSpace, BoxActionSpace from rl_coach.core_types import ActionType from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy, ExplorationParameters +from rl_coach.spaces import ActionSpace, DiscreteActionSpace, BoxActionSpace class GreedyParameters(ExplorationParameters): diff --git a/rl_coach/exploration_policies/ou_process.py b/rl_coach/exploration_policies/ou_process.py index 98286bc..fd8cf77 100644 --- a/rl_coach/exploration_policies/ou_process.py +++ b/rl_coach/exploration_policies/ou_process.py @@ -17,10 +17,10 @@ from typing import List import numpy as np -from rl_coach.spaces import ActionSpace, BoxActionSpace, GoalsSpace from rl_coach.core_types import RunPhase, ActionType from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy, ExplorationParameters +from rl_coach.spaces import ActionSpace, BoxActionSpace, GoalsSpace # Based on on the description in: diff --git a/rl_coach/exploration_policies/truncated_normal.py b/rl_coach/exploration_policies/truncated_normal.py index 47c141d..4113274 100644 --- a/rl_coach/exploration_policies/truncated_normal.py +++ b/rl_coach/exploration_policies/truncated_normal.py @@ -17,12 +17,12 @@ from typing import List import numpy as np -from rl_coach.schedules import Schedule, LinearSchedule from scipy.stats import truncnorm -from rl_coach.spaces import ActionSpace, BoxActionSpace from rl_coach.core_types import RunPhase, ActionType from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy, ExplorationParameters +from rl_coach.schedules import Schedule, LinearSchedule +from rl_coach.spaces import ActionSpace, BoxActionSpace class TruncatedNormalParameters(ExplorationParameters): diff --git a/rl_coach/exploration_policies/ucb.py b/rl_coach/exploration_policies/ucb.py index a9d5ead..41abc1b 100644 --- a/rl_coach/exploration_policies/ucb.py +++ b/rl_coach/exploration_policies/ucb.py @@ -17,13 +17,13 @@ from typing import List import numpy as np -from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters -from rl_coach.exploration_policies.e_greedy import EGreedy, EGreedyParameters -from rl_coach.schedules import Schedule, LinearSchedule, PieceWiseSchedule -from rl_coach.spaces import ActionSpace from rl_coach.core_types import RunPhase, ActionType, EnvironmentSteps +from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters +from rl_coach.exploration_policies.e_greedy import EGreedy, EGreedyParameters from rl_coach.exploration_policies.exploration_policy import ExplorationParameters +from rl_coach.schedules import Schedule, LinearSchedule, PieceWiseSchedule +from rl_coach.spaces import ActionSpace class UCBParameters(EGreedyParameters): diff --git a/rl_coach/filters/action/action_filter.py b/rl_coach/filters/action/action_filter.py index 49d1442..9556afd 100644 --- a/rl_coach/filters/action/action_filter.py +++ b/rl_coach/filters/action/action_filter.py @@ -14,10 +14,9 @@ # limitations under the License. # -from rl_coach.spaces import ActionSpace - from rl_coach.core_types import ActionType from rl_coach.filters.filter import Filter +from rl_coach.spaces import ActionSpace class ActionFilter(Filter): diff --git a/rl_coach/filters/action/attention_discretization.py b/rl_coach/filters/action/attention_discretization.py index 0cd4928..32d8bf0 100644 --- a/rl_coach/filters/action/attention_discretization.py +++ b/rl_coach/filters/action/attention_discretization.py @@ -17,8 +17,8 @@ from typing import Union, List import numpy as np -from rl_coach.filters.action.box_discretization import BoxDiscretization +from rl_coach.filters.action.box_discretization import BoxDiscretization from rl_coach.filters.action.partial_discrete_action_space_map import PartialDiscreteActionSpaceMap from rl_coach.spaces import AttentionActionSpace, BoxActionSpace, DiscreteActionSpace diff --git a/rl_coach/filters/action/box_masking.py b/rl_coach/filters/action/box_masking.py index e533a55..c6b6a4b 100644 --- a/rl_coach/filters/action/box_masking.py +++ b/rl_coach/filters/action/box_masking.py @@ -17,10 +17,10 @@ from typing import Union import numpy as np -from rl_coach.spaces import BoxActionSpace from rl_coach.core_types import ActionType from rl_coach.filters.action.action_filter import ActionFilter +from rl_coach.spaces import BoxActionSpace class BoxMasking(ActionFilter): diff --git a/rl_coach/filters/action/linear_box_to_box_map.py b/rl_coach/filters/action/linear_box_to_box_map.py index 9cafb6a..e739f95 100644 --- a/rl_coach/filters/action/linear_box_to_box_map.py +++ b/rl_coach/filters/action/linear_box_to_box_map.py @@ -17,10 +17,10 @@ from typing import Union import numpy as np -from rl_coach.spaces import BoxActionSpace from rl_coach.core_types import ActionType from rl_coach.filters.action.action_filter import ActionFilter +from rl_coach.spaces import BoxActionSpace class LinearBoxToBoxMap(ActionFilter): diff --git a/rl_coach/filters/action/partial_discrete_action_space_map.py b/rl_coach/filters/action/partial_discrete_action_space_map.py index b4caf80..3e3c92b 100644 --- a/rl_coach/filters/action/partial_discrete_action_space_map.py +++ b/rl_coach/filters/action/partial_discrete_action_space_map.py @@ -16,10 +16,9 @@ from typing import List -from rl_coach.spaces import DiscreteActionSpace, ActionSpace - from rl_coach.core_types import ActionType from rl_coach.filters.action.action_filter import ActionFilter +from rl_coach.spaces import DiscreteActionSpace, ActionSpace class PartialDiscreteActionSpaceMap(ActionFilter): diff --git a/rl_coach/filters/filter.py b/rl_coach/filters/filter.py index 42d0b8e..35d7e7c 100644 --- a/rl_coach/filters/filter.py +++ b/rl_coach/filters/filter.py @@ -19,8 +19,8 @@ from collections import OrderedDict from copy import deepcopy from typing import Dict, Union, List -from rl_coach.spaces import ActionSpace, RewardSpace, ObservationSpace from rl_coach.core_types import EnvResponse, ActionInfo, Transition +from rl_coach.spaces import ActionSpace, RewardSpace, ObservationSpace from rl_coach.utils import force_list diff --git a/rl_coach/filters/observation/observation_clipping_filter.py b/rl_coach/filters/observation/observation_clipping_filter.py index ff6d9bf..82e3105 100644 --- a/rl_coach/filters/observation/observation_clipping_filter.py +++ b/rl_coach/filters/observation/observation_clipping_filter.py @@ -16,10 +16,10 @@ import numpy as np -from rl_coach.spaces import ObservationSpace from rl_coach.core_types import ObservationType from rl_coach.filters.observation.observation_filter import ObservationFilter +from rl_coach.spaces import ObservationSpace class ObservationClippingFilter(ObservationFilter): diff --git a/rl_coach/filters/observation/observation_crop_filter.py b/rl_coach/filters/observation/observation_crop_filter.py index d702011..4f3e066 100644 --- a/rl_coach/filters/observation/observation_crop_filter.py +++ b/rl_coach/filters/observation/observation_crop_filter.py @@ -16,10 +16,10 @@ from typing import Union, Tuple import numpy as np -from rl_coach.spaces import ObservationSpace from rl_coach.core_types import ObservationType from rl_coach.filters.observation.observation_filter import ObservationFilter +from rl_coach.spaces import ObservationSpace class ObservationCropFilter(ObservationFilter): diff --git a/rl_coach/filters/observation/observation_move_axis_filter.py b/rl_coach/filters/observation/observation_move_axis_filter.py index 378caae..288f10d 100644 --- a/rl_coach/filters/observation/observation_move_axis_filter.py +++ b/rl_coach/filters/observation/observation_move_axis_filter.py @@ -15,10 +15,10 @@ # import numpy as np -from rl_coach.spaces import ObservationSpace, PlanarMapsObservationSpace from rl_coach.core_types import ObservationType from rl_coach.filters.observation.observation_filter import ObservationFilter +from rl_coach.spaces import ObservationSpace, PlanarMapsObservationSpace class ObservationMoveAxisFilter(ObservationFilter): diff --git a/rl_coach/filters/observation/observation_normalization_filter.py b/rl_coach/filters/observation/observation_normalization_filter.py index 479f8e7..178036d 100644 --- a/rl_coach/filters/observation/observation_normalization_filter.py +++ b/rl_coach/filters/observation/observation_normalization_filter.py @@ -16,11 +16,11 @@ from typing import List import numpy as np -from rl_coach.spaces import ObservationSpace from rl_coach.architectures.tensorflow_components.shared_variables import SharedRunningStats from rl_coach.core_types import ObservationType from rl_coach.filters.observation.observation_filter import ObservationFilter +from rl_coach.spaces import ObservationSpace class ObservationNormalizationFilter(ObservationFilter): diff --git a/rl_coach/filters/observation/observation_reduction_by_sub_parts_name_filter.py b/rl_coach/filters/observation/observation_reduction_by_sub_parts_name_filter.py index 61a9b17..701687d 100644 --- a/rl_coach/filters/observation/observation_reduction_by_sub_parts_name_filter.py +++ b/rl_coach/filters/observation/observation_reduction_by_sub_parts_name_filter.py @@ -17,10 +17,9 @@ import copy from enum import Enum from typing import List -from rl_coach.spaces import ObservationSpace, VectorObservationSpace - from rl_coach.core_types import ObservationType from rl_coach.filters.observation.observation_filter import ObservationFilter +from rl_coach.spaces import ObservationSpace, VectorObservationSpace class ObservationReductionBySubPartsNameFilter(ObservationFilter): diff --git a/rl_coach/filters/observation/observation_rescale_size_by_factor_filter.py b/rl_coach/filters/observation/observation_rescale_size_by_factor_filter.py index 7ece865..4cd0770 100644 --- a/rl_coach/filters/observation/observation_rescale_size_by_factor_filter.py +++ b/rl_coach/filters/observation/observation_rescale_size_by_factor_filter.py @@ -17,10 +17,10 @@ from enum import Enum import scipy.ndimage -from rl_coach.spaces import ObservationSpace from rl_coach.core_types import ObservationType from rl_coach.filters.observation.observation_filter import ObservationFilter +from rl_coach.spaces import ObservationSpace # imresize interpolation types as defined by scipy here: diff --git a/rl_coach/filters/observation/observation_rescale_to_size_filter.py b/rl_coach/filters/observation/observation_rescale_to_size_filter.py index 4ed559f..630fb74 100644 --- a/rl_coach/filters/observation/observation_rescale_to_size_filter.py +++ b/rl_coach/filters/observation/observation_rescale_to_size_filter.py @@ -19,10 +19,10 @@ from enum import Enum import numpy as np import scipy.ndimage -from rl_coach.spaces import ObservationSpace, PlanarMapsObservationSpace, ImageObservationSpace from rl_coach.core_types import ObservationType from rl_coach.filters.observation.observation_filter import ObservationFilter +from rl_coach.spaces import ObservationSpace, PlanarMapsObservationSpace, ImageObservationSpace # imresize interpolation types as defined by scipy here: diff --git a/rl_coach/filters/observation/observation_rgb_to_y_filter.py b/rl_coach/filters/observation/observation_rgb_to_y_filter.py index 82ebb0a..ed8ea83 100644 --- a/rl_coach/filters/observation/observation_rgb_to_y_filter.py +++ b/rl_coach/filters/observation/observation_rgb_to_y_filter.py @@ -14,10 +14,9 @@ # limitations under the License. # -from rl_coach.spaces import ObservationSpace - from rl_coach.core_types import ObservationType from rl_coach.filters.observation.observation_filter import ObservationFilter +from rl_coach.spaces import ObservationSpace class ObservationRGBToYFilter(ObservationFilter): diff --git a/rl_coach/filters/observation/observation_squeeze_filter.py b/rl_coach/filters/observation/observation_squeeze_filter.py index df258b1..865abc7 100644 --- a/rl_coach/filters/observation/observation_squeeze_filter.py +++ b/rl_coach/filters/observation/observation_squeeze_filter.py @@ -15,10 +15,10 @@ # import numpy as np -from rl_coach.spaces import ObservationSpace from rl_coach.core_types import ObservationType from rl_coach.filters.observation.observation_filter import ObservationFilter +from rl_coach.spaces import ObservationSpace class ObservationSqueezeFilter(ObservationFilter): diff --git a/rl_coach/filters/observation/observation_stacking_filter.py b/rl_coach/filters/observation/observation_stacking_filter.py index f6f5f83..77f84b6 100644 --- a/rl_coach/filters/observation/observation_stacking_filter.py +++ b/rl_coach/filters/observation/observation_stacking_filter.py @@ -18,10 +18,10 @@ import copy from collections import deque import numpy as np -from rl_coach.spaces import ObservationSpace from rl_coach.core_types import ObservationType from rl_coach.filters.observation.observation_filter import ObservationFilter +from rl_coach.spaces import ObservationSpace class LazyStack(object): diff --git a/rl_coach/filters/observation/observation_to_uint8_filter.py b/rl_coach/filters/observation/observation_to_uint8_filter.py index 057167b..12cb8ee 100644 --- a/rl_coach/filters/observation/observation_to_uint8_filter.py +++ b/rl_coach/filters/observation/observation_to_uint8_filter.py @@ -15,10 +15,10 @@ # import numpy as np -from rl_coach.spaces import ObservationSpace from rl_coach.core_types import ObservationType from rl_coach.filters.observation.observation_filter import ObservationFilter +from rl_coach.spaces import ObservationSpace class ObservationToUInt8Filter(ObservationFilter): diff --git a/rl_coach/filters/reward/reward_clipping_filter.py b/rl_coach/filters/reward/reward_clipping_filter.py index 77d3202..14499e2 100644 --- a/rl_coach/filters/reward/reward_clipping_filter.py +++ b/rl_coach/filters/reward/reward_clipping_filter.py @@ -15,10 +15,10 @@ # import numpy as np -from rl_coach.spaces import RewardSpace from rl_coach.core_types import RewardType from rl_coach.filters.reward.reward_filter import RewardFilter +from rl_coach.spaces import RewardSpace class RewardClippingFilter(RewardFilter): diff --git a/rl_coach/filters/reward/reward_normalization_filter.py b/rl_coach/filters/reward/reward_normalization_filter.py index ebb5967..fa33a4e 100644 --- a/rl_coach/filters/reward/reward_normalization_filter.py +++ b/rl_coach/filters/reward/reward_normalization_filter.py @@ -16,11 +16,11 @@ import numpy as np -from rl_coach.spaces import RewardSpace from rl_coach.architectures.tensorflow_components.shared_variables import SharedRunningStats from rl_coach.core_types import RewardType from rl_coach.filters.reward.reward_filter import RewardFilter +from rl_coach.spaces import RewardSpace class RewardNormalizationFilter(RewardFilter): diff --git a/rl_coach/filters/reward/reward_rescale_filter.py b/rl_coach/filters/reward/reward_rescale_filter.py index a8530d7..3b0be61 100644 --- a/rl_coach/filters/reward/reward_rescale_filter.py +++ b/rl_coach/filters/reward/reward_rescale_filter.py @@ -14,10 +14,9 @@ # limitations under the License. # -from rl_coach.spaces import RewardSpace - from rl_coach.core_types import RewardType from rl_coach.filters.reward.reward_filter import RewardFilter +from rl_coach.spaces import RewardSpace class RewardRescaleFilter(RewardFilter): diff --git a/rl_coach/graph_managers/basic_rl_graph_manager.py b/rl_coach/graph_managers/basic_rl_graph_manager.py index 2bd3214..3eb9604 100644 --- a/rl_coach/graph_managers/basic_rl_graph_manager.py +++ b/rl_coach/graph_managers/basic_rl_graph_manager.py @@ -15,11 +15,11 @@ # from typing import Tuple, List -from rl_coach.base_parameters import AgentParameters, VisualizationParameters, TaskParameters, PresetValidationParameters +from rl_coach.base_parameters import AgentParameters, VisualizationParameters, TaskParameters, \ + PresetValidationParameters from rl_coach.environments.environment import EnvironmentParameters, Environment -from rl_coach.level_manager import LevelManager - from rl_coach.graph_managers.graph_manager import GraphManager, ScheduleParameters +from rl_coach.level_manager import LevelManager from rl_coach.utils import short_dynamic_import diff --git a/rl_coach/graph_managers/graph_manager.py b/rl_coach/graph_managers/graph_manager.py index cc889c4..394a31a 100644 --- a/rl_coach/graph_managers/graph_manager.py +++ b/rl_coach/graph_managers/graph_manager.py @@ -18,20 +18,19 @@ import copy import os import time from collections import OrderedDict -from typing import List, Tuple from distutils.dir_util import copy_tree, remove_tree +from typing import List, Tuple -import numpy as np -from rl_coach.base_parameters import iterable_to_items, TaskParameters, DistributedTaskParameters, VisualizationParameters, \ +from rl_coach.base_parameters import iterable_to_items, TaskParameters, DistributedTaskParameters, \ + VisualizationParameters, \ Parameters, PresetValidationParameters from rl_coach.core_types import TotalStepsCounter, RunPhase, PlayingStepsType, TrainingSteps, EnvironmentEpisodes, \ EnvironmentSteps, \ StepMethod from rl_coach.environments.environment import Environment from rl_coach.level_manager import LevelManager -from rl_coach.utils import set_cpu - from rl_coach.logger import screen, Logger +from rl_coach.utils import set_cpu class ScheduleParameters(Parameters): diff --git a/rl_coach/graph_managers/hac_graph_manager.py b/rl_coach/graph_managers/hac_graph_manager.py index 9fcd88a..390ee92 100644 --- a/rl_coach/graph_managers/hac_graph_manager.py +++ b/rl_coach/graph_managers/hac_graph_manager.py @@ -13,17 +13,16 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import numpy as np from typing import List, Union, Tuple -from rl_coach.base_parameters import AgentParameters, VisualizationParameters, TaskParameters, PresetValidationParameters +from rl_coach.base_parameters import AgentParameters, VisualizationParameters, TaskParameters, \ + PresetValidationParameters +from rl_coach.core_types import EnvironmentSteps from rl_coach.environments.environment import EnvironmentParameters, Environment +from rl_coach.graph_managers.graph_manager import GraphManager, ScheduleParameters from rl_coach.level_manager import LevelManager from rl_coach.utils import short_dynamic_import -from rl_coach.core_types import EnvironmentSteps -from rl_coach.graph_managers.graph_manager import GraphManager, ScheduleParameters - class HACGraphManager(GraphManager): """ diff --git a/rl_coach/graph_managers/hrl_graph_manager.py b/rl_coach/graph_managers/hrl_graph_manager.py index f8b81a5..d75687d 100644 --- a/rl_coach/graph_managers/hrl_graph_manager.py +++ b/rl_coach/graph_managers/hrl_graph_manager.py @@ -16,14 +16,14 @@ from typing import List, Union, Tuple -from rl_coach.base_parameters import AgentParameters, VisualizationParameters, TaskParameters, PresetValidationParameters +from rl_coach.base_parameters import AgentParameters, VisualizationParameters, TaskParameters, \ + PresetValidationParameters +from rl_coach.core_types import EnvironmentSteps from rl_coach.environments.environment import EnvironmentParameters, Environment +from rl_coach.graph_managers.graph_manager import GraphManager, ScheduleParameters from rl_coach.level_manager import LevelManager from rl_coach.utils import short_dynamic_import -from rl_coach.core_types import EnvironmentSteps -from rl_coach.graph_managers.graph_manager import GraphManager, ScheduleParameters - class HRLGraphManager(GraphManager): """ diff --git a/rl_coach/level_manager.py b/rl_coach/level_manager.py index 52cc410..bb4ee60 100644 --- a/rl_coach/level_manager.py +++ b/rl_coach/level_manager.py @@ -14,14 +14,13 @@ # limitations under the License. # import copy -from typing import Union, Dict, Tuple, Type - -from rl_coach.environments.environment import Environment -from rl_coach.environments.environment_interface import EnvironmentInterface -from rl_coach.spaces import ActionSpace, SpacesDefinition +from typing import Union, Dict from rl_coach.agents.composite_agent import CompositeAgent from rl_coach.core_types import EnvResponse, ActionInfo, RunPhase, ActionType, EnvironmentSteps +from rl_coach.environments.environment import Environment +from rl_coach.environments.environment_interface import EnvironmentInterface +from rl_coach.spaces import ActionSpace, SpacesDefinition class LevelManager(EnvironmentInterface): diff --git a/rl_coach/memories/episodic/episodic_experience_replay.py b/rl_coach/memories/episodic/episodic_experience_replay.py index 9520320..2c86c5f 100644 --- a/rl_coach/memories/episodic/episodic_experience_replay.py +++ b/rl_coach/memories/episodic/episodic_experience_replay.py @@ -17,10 +17,10 @@ from typing import List, Tuple, Union, Dict, Any import numpy as np -from rl_coach.utils import ReaderWriterLock from rl_coach.core_types import Transition, Episode from rl_coach.memories.memory import Memory, MemoryGranularity, MemoryParameters +from rl_coach.utils import ReaderWriterLock class EpisodicExperienceReplayParameters(MemoryParameters): diff --git a/rl_coach/memories/episodic/episodic_hindsight_experience_replay.py b/rl_coach/memories/episodic/episodic_hindsight_experience_replay.py index 7a9ac7b..c30f451 100644 --- a/rl_coach/memories/episodic/episodic_hindsight_experience_replay.py +++ b/rl_coach/memories/episodic/episodic_hindsight_experience_replay.py @@ -21,7 +21,8 @@ from typing import Tuple, List import numpy as np from rl_coach.core_types import Episode, Transition -from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters, EpisodicExperienceReplay +from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters, \ + EpisodicExperienceReplay from rl_coach.memories.non_episodic.experience_replay import MemoryGranularity from rl_coach.spaces import GoalsSpace diff --git a/rl_coach/memories/episodic/single_episode_buffer.py b/rl_coach/memories/episodic/single_episode_buffer.py index f1cd64b..e64167b 100644 --- a/rl_coach/memories/episodic/single_episode_buffer.py +++ b/rl_coach/memories/episodic/single_episode_buffer.py @@ -14,9 +14,8 @@ # limitations under the License. # -from rl_coach.memories.memory import MemoryGranularity, MemoryParameters - from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplay +from rl_coach.memories.memory import MemoryGranularity, MemoryParameters class SingleEpisodeBufferParameters(MemoryParameters): diff --git a/rl_coach/memories/non_episodic/experience_replay.py b/rl_coach/memories/non_episodic/experience_replay.py index 2e8d22f..6b8653f 100644 --- a/rl_coach/memories/non_episodic/experience_replay.py +++ b/rl_coach/memories/non_episodic/experience_replay.py @@ -17,10 +17,10 @@ from typing import List, Tuple, Union, Dict, Any import numpy as np -from rl_coach.utils import ReaderWriterLock from rl_coach.core_types import Transition from rl_coach.memories.memory import Memory, MemoryGranularity, MemoryParameters +from rl_coach.utils import ReaderWriterLock class ExperienceReplayParameters(MemoryParameters): diff --git a/rl_coach/memories/non_episodic/prioritized_experience_replay.py b/rl_coach/memories/non_episodic/prioritized_experience_replay.py index 2a6c2e8..a8fdcfc 100644 --- a/rl_coach/memories/non_episodic/prioritized_experience_replay.py +++ b/rl_coach/memories/non_episodic/prioritized_experience_replay.py @@ -20,11 +20,11 @@ from enum import Enum from typing import List, Tuple, Any import numpy as np -from rl_coach.memories.memory import MemoryGranularity -from rl_coach.schedules import Schedule, ConstantSchedule from rl_coach.core_types import Transition +from rl_coach.memories.memory import MemoryGranularity from rl_coach.memories.non_episodic.experience_replay import ExperienceReplayParameters, ExperienceReplay +from rl_coach.schedules import Schedule, ConstantSchedule class PrioritizedExperienceReplayParameters(ExperienceReplayParameters): diff --git a/rl_coach/presets/Atari_A3C.py b/rl_coach/presets/Atari_A3C.py index e203b31..72a12ae 100644 --- a/rl_coach/presets/Atari_A3C.py +++ b/rl_coach/presets/Atari_A3C.py @@ -1,14 +1,13 @@ from rl_coach.agents.actor_critic_agent import ActorCriticAgentParameters from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters +from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase from rl_coach.environments.environment import SingleLevelSelection, SelectedPhaseOnlyDumpMethod, MaxDumpMethod from rl_coach.environments.gym_environment import Atari, atari_deterministic_v4 +from rl_coach.exploration_policies.categorical import CategoricalParameters from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters -from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase -from rl_coach.exploration_policies.categorical import CategoricalParameters - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/Atari_A3C_LSTM.py b/rl_coach/presets/Atari_A3C_LSTM.py index 7b51c2d..268ac3a 100644 --- a/rl_coach/presets/Atari_A3C_LSTM.py +++ b/rl_coach/presets/Atari_A3C_LSTM.py @@ -1,14 +1,13 @@ from rl_coach.agents.actor_critic_agent import ActorCriticAgentParameters from rl_coach.architectures.tensorflow_components.middlewares.lstm_middleware import LSTMMiddlewareParameters from rl_coach.base_parameters import VisualizationParameters, MiddlewareScheme, PresetValidationParameters +from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase from rl_coach.environments.environment import SingleLevelSelection, SelectedPhaseOnlyDumpMethod, MaxDumpMethod from rl_coach.environments.gym_environment import Atari, atari_deterministic_v4, AtariInputFilter +from rl_coach.exploration_policies.categorical import CategoricalParameters from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters -from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase -from rl_coach.exploration_policies.categorical import CategoricalParameters - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/Atari_Bootstrapped_DQN.py b/rl_coach/presets/Atari_Bootstrapped_DQN.py index 51eac41..8523d55 100644 --- a/rl_coach/presets/Atari_Bootstrapped_DQN.py +++ b/rl_coach/presets/Atari_Bootstrapped_DQN.py @@ -1,12 +1,11 @@ +from rl_coach.agents.bootstrapped_dqn_agent import BootstrappedDQNAgentParameters from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters +from rl_coach.core_types import EnvironmentSteps, RunPhase from rl_coach.environments.environment import MaxDumpMethod, SelectedPhaseOnlyDumpMethod, SingleLevelSelection from rl_coach.environments.gym_environment import Atari, atari_deterministic_v4 from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters -from rl_coach.agents.bootstrapped_dqn_agent import BootstrappedDQNAgentParameters -from rl_coach.core_types import EnvironmentSteps, RunPhase - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/Atari_C51.py b/rl_coach/presets/Atari_C51.py index 9f1d97b..2fc2978 100644 --- a/rl_coach/presets/Atari_C51.py +++ b/rl_coach/presets/Atari_C51.py @@ -1,12 +1,11 @@ +from rl_coach.agents.categorical_dqn_agent import CategoricalDQNAgentParameters from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters +from rl_coach.core_types import EnvironmentSteps, RunPhase from rl_coach.environments.environment import MaxDumpMethod, SelectedPhaseOnlyDumpMethod, SingleLevelSelection from rl_coach.environments.gym_environment import Atari, atari_deterministic_v4 from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters -from rl_coach.agents.categorical_dqn_agent import CategoricalDQNAgentParameters -from rl_coach.core_types import EnvironmentSteps, RunPhase - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/Atari_DDQN.py b/rl_coach/presets/Atari_DDQN.py index ff6fa75..a96811d 100644 --- a/rl_coach/presets/Atari_DDQN.py +++ b/rl_coach/presets/Atari_DDQN.py @@ -1,12 +1,11 @@ +from rl_coach.agents.ddqn_agent import DDQNAgentParameters from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters +from rl_coach.core_types import EnvironmentSteps, RunPhase from rl_coach.environments.environment import MaxDumpMethod, SelectedPhaseOnlyDumpMethod, SingleLevelSelection from rl_coach.environments.gym_environment import Atari, atari_deterministic_v4 from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters -from rl_coach.agents.ddqn_agent import DDQNAgentParameters -from rl_coach.core_types import EnvironmentSteps, RunPhase - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/Atari_DDQN_with_PER.py b/rl_coach/presets/Atari_DDQN_with_PER.py index 525d2fd..b8df216 100644 --- a/rl_coach/presets/Atari_DDQN_with_PER.py +++ b/rl_coach/presets/Atari_DDQN_with_PER.py @@ -1,4 +1,6 @@ +from rl_coach.agents.ddqn_agent import DDQNAgentParameters from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters +from rl_coach.core_types import EnvironmentSteps, RunPhase from rl_coach.environments.environment import MaxDumpMethod, SelectedPhaseOnlyDumpMethod, SingleLevelSelection from rl_coach.environments.gym_environment import Atari, atari_deterministic_v4 from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager @@ -6,9 +8,6 @@ from rl_coach.graph_managers.graph_manager import ScheduleParameters from rl_coach.memories.non_episodic.prioritized_experience_replay import PrioritizedExperienceReplayParameters from rl_coach.schedules import LinearSchedule -from rl_coach.agents.ddqn_agent import DDQNAgentParameters -from rl_coach.core_types import EnvironmentSteps, RunPhase - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/Atari_DQN.py b/rl_coach/presets/Atari_DQN.py index a055785..c500dd0 100644 --- a/rl_coach/presets/Atari_DQN.py +++ b/rl_coach/presets/Atari_DQN.py @@ -1,12 +1,11 @@ +from rl_coach.agents.dqn_agent import DQNAgentParameters from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters +from rl_coach.core_types import EnvironmentSteps, RunPhase from rl_coach.environments.environment import MaxDumpMethod, SelectedPhaseOnlyDumpMethod, SingleLevelSelection from rl_coach.environments.gym_environment import Atari, atari_deterministic_v4 from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters -from rl_coach.agents.dqn_agent import DQNAgentParameters -from rl_coach.core_types import EnvironmentSteps, RunPhase - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/Atari_DQN_with_PER.py b/rl_coach/presets/Atari_DQN_with_PER.py index 5253d3d..ee5c133 100644 --- a/rl_coach/presets/Atari_DQN_with_PER.py +++ b/rl_coach/presets/Atari_DQN_with_PER.py @@ -1,4 +1,6 @@ +from rl_coach.agents.dqn_agent import DQNAgentParameters from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters +from rl_coach.core_types import EnvironmentSteps, RunPhase from rl_coach.environments.environment import MaxDumpMethod, SelectedPhaseOnlyDumpMethod, SingleLevelSelection from rl_coach.environments.gym_environment import Atari, atari_deterministic_v4 from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager @@ -6,9 +8,6 @@ from rl_coach.graph_managers.graph_manager import ScheduleParameters from rl_coach.memories.non_episodic.prioritized_experience_replay import PrioritizedExperienceReplayParameters from rl_coach.schedules import LinearSchedule -from rl_coach.agents.dqn_agent import DQNAgentParameters -from rl_coach.core_types import EnvironmentSteps, RunPhase - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/Atari_Dueling_DDQN.py b/rl_coach/presets/Atari_Dueling_DDQN.py index 3ed48cd..d7fc252 100644 --- a/rl_coach/presets/Atari_Dueling_DDQN.py +++ b/rl_coach/presets/Atari_Dueling_DDQN.py @@ -1,15 +1,14 @@ import math +from rl_coach.agents.ddqn_agent import DDQNAgentParameters from rl_coach.architectures.tensorflow_components.heads.dueling_q_head import DuelingQHeadParameters from rl_coach.base_parameters import VisualizationParameters, MiddlewareScheme, PresetValidationParameters +from rl_coach.core_types import EnvironmentSteps, RunPhase from rl_coach.environments.environment import MaxDumpMethod, SelectedPhaseOnlyDumpMethod, SingleLevelSelection from rl_coach.environments.gym_environment import Atari, atari_deterministic_v4 from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters -from rl_coach.agents.ddqn_agent import DDQNAgentParameters -from rl_coach.core_types import EnvironmentSteps, RunPhase - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/Atari_Dueling_DDQN_with_PER_OpenAI.py b/rl_coach/presets/Atari_Dueling_DDQN_with_PER_OpenAI.py index 07a904f..3c8d9d0 100644 --- a/rl_coach/presets/Atari_Dueling_DDQN_with_PER_OpenAI.py +++ b/rl_coach/presets/Atari_Dueling_DDQN_with_PER_OpenAI.py @@ -1,5 +1,7 @@ +from rl_coach.agents.ddqn_agent import DDQNAgentParameters from rl_coach.architectures.tensorflow_components.heads.dueling_q_head import DuelingQHeadParameters from rl_coach.base_parameters import VisualizationParameters, MiddlewareScheme, PresetValidationParameters +from rl_coach.core_types import EnvironmentSteps, RunPhase from rl_coach.environments.environment import MaxDumpMethod, SelectedPhaseOnlyDumpMethod, SingleLevelSelection from rl_coach.environments.gym_environment import Atari, atari_deterministic_v4 from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager @@ -7,9 +9,6 @@ from rl_coach.graph_managers.graph_manager import ScheduleParameters from rl_coach.memories.non_episodic.prioritized_experience_replay import PrioritizedExperienceReplayParameters from rl_coach.schedules import LinearSchedule, PieceWiseSchedule, ConstantSchedule -from rl_coach.agents.ddqn_agent import DDQNAgentParameters -from rl_coach.core_types import EnvironmentSteps, RunPhase - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/Atari_NEC.py b/rl_coach/presets/Atari_NEC.py index cb172bd..158e408 100644 --- a/rl_coach/presets/Atari_NEC.py +++ b/rl_coach/presets/Atari_NEC.py @@ -1,12 +1,11 @@ +from rl_coach.agents.nec_agent import NECAgentParameters from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters +from rl_coach.core_types import EnvironmentEpisodes, EnvironmentSteps, RunPhase from rl_coach.environments.environment import SingleLevelSelection, SelectedPhaseOnlyDumpMethod, MaxDumpMethod from rl_coach.environments.gym_environment import Atari, AtariInputFilter, atari_deterministic_v4 from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters -from rl_coach.agents.nec_agent import NECAgentParameters -from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/Atari_NStepQ.py b/rl_coach/presets/Atari_NStepQ.py index cff99fb..91d4f8c 100644 --- a/rl_coach/presets/Atari_NStepQ.py +++ b/rl_coach/presets/Atari_NStepQ.py @@ -1,13 +1,12 @@ +from rl_coach.agents.n_step_q_agent import NStepQAgentParameters from rl_coach.architectures.tensorflow_components.architecture import Conv2d, Dense from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters +from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase from rl_coach.environments.environment import SingleLevelSelection, SelectedPhaseOnlyDumpMethod, MaxDumpMethod from rl_coach.environments.gym_environment import Atari, atari_deterministic_v4 from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters -from rl_coach.agents.n_step_q_agent import NStepQAgentParameters -from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/Atari_QR_DQN.py b/rl_coach/presets/Atari_QR_DQN.py index ba787f7..d4c01f9 100644 --- a/rl_coach/presets/Atari_QR_DQN.py +++ b/rl_coach/presets/Atari_QR_DQN.py @@ -1,12 +1,11 @@ +from rl_coach.agents.qr_dqn_agent import QuantileRegressionDQNAgentParameters from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters +from rl_coach.core_types import EnvironmentSteps, RunPhase from rl_coach.environments.environment import MaxDumpMethod, SelectedPhaseOnlyDumpMethod, SingleLevelSelection from rl_coach.environments.gym_environment import Atari, atari_deterministic_v4 from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters -from rl_coach.agents.qr_dqn_agent import QuantileRegressionDQNAgentParameters -from rl_coach.core_types import EnvironmentSteps, RunPhase - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/Atari_UCB_with_Q_Ensembles.py b/rl_coach/presets/Atari_UCB_with_Q_Ensembles.py index 41c4515..59041b4 100644 --- a/rl_coach/presets/Atari_UCB_with_Q_Ensembles.py +++ b/rl_coach/presets/Atari_UCB_with_Q_Ensembles.py @@ -1,13 +1,12 @@ from rl_coach.agents.bootstrapped_dqn_agent import BootstrappedDQNAgentParameters from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters +from rl_coach.core_types import EnvironmentSteps, RunPhase from rl_coach.environments.environment import MaxDumpMethod, SelectedPhaseOnlyDumpMethod, SingleLevelSelection from rl_coach.environments.gym_environment import Atari, atari_deterministic_v4 +from rl_coach.exploration_policies.ucb import UCBParameters from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters -from rl_coach.core_types import EnvironmentSteps, RunPhase -from rl_coach.exploration_policies.ucb import UCBParameters - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/BitFlip_DQN.py b/rl_coach/presets/BitFlip_DQN.py index fc0cf01..25d99fd 100644 --- a/rl_coach/presets/BitFlip_DQN.py +++ b/rl_coach/presets/BitFlip_DQN.py @@ -1,15 +1,14 @@ +from rl_coach.agents.dqn_agent import DQNAgentParameters from rl_coach.architectures.tensorflow_components.architecture import Dense from rl_coach.base_parameters import VisualizationParameters, EmbedderScheme, InputEmbedderParameters, \ PresetValidationParameters +from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps from rl_coach.environments.gym_environment import Mujoco from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters from rl_coach.memories.memory import MemoryGranularity from rl_coach.schedules import ConstantSchedule -from rl_coach.agents.dqn_agent import DQNAgentParameters -from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps - bit_length = 8 #################### diff --git a/rl_coach/presets/BitFlip_DQN_HER.py b/rl_coach/presets/BitFlip_DQN_HER.py index df74366..2b38e74 100644 --- a/rl_coach/presets/BitFlip_DQN_HER.py +++ b/rl_coach/presets/BitFlip_DQN_HER.py @@ -1,6 +1,8 @@ +from rl_coach.agents.dqn_agent import DQNAgentParameters from rl_coach.architectures.tensorflow_components.architecture import Dense from rl_coach.base_parameters import VisualizationParameters, EmbedderScheme, InputEmbedderParameters, \ PresetValidationParameters +from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps from rl_coach.environments.gym_environment import Mujoco from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters @@ -10,9 +12,6 @@ from rl_coach.memories.memory import MemoryGranularity from rl_coach.schedules import ConstantSchedule from rl_coach.spaces import GoalsSpace, ReachingGoal -from rl_coach.agents.dqn_agent import DQNAgentParameters -from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps - bit_length = 20 #################### diff --git a/rl_coach/presets/Carla_3_Cameras_DDPG.py b/rl_coach/presets/Carla_3_Cameras_DDPG.py index 19ca370..b2f5c67 100644 --- a/rl_coach/presets/Carla_3_Cameras_DDPG.py +++ b/rl_coach/presets/Carla_3_Cameras_DDPG.py @@ -1,14 +1,13 @@ import copy +from rl_coach.agents.ddpg_agent import DDPGAgentParameters from rl_coach.base_parameters import VisualizationParameters +from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase from rl_coach.environments.carla_environment import CarlaEnvironmentParameters, CameraTypes, CarlaInputFilter from rl_coach.environments.environment import MaxDumpMethod, SelectedPhaseOnlyDumpMethod from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters -from rl_coach.agents.ddpg_agent import DDPGAgentParameters -from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/Carla_DDPG.py b/rl_coach/presets/Carla_DDPG.py index 820d22a..4f31979 100644 --- a/rl_coach/presets/Carla_DDPG.py +++ b/rl_coach/presets/Carla_DDPG.py @@ -1,12 +1,11 @@ +from rl_coach.agents.ddpg_agent import DDPGAgentParameters from rl_coach.base_parameters import VisualizationParameters +from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase from rl_coach.environments.carla_environment import CarlaEnvironmentParameters from rl_coach.environments.environment import MaxDumpMethod, SelectedPhaseOnlyDumpMethod from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters -from rl_coach.agents.ddpg_agent import DDPGAgentParameters -from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/Carla_Dueling_DDQN.py b/rl_coach/presets/Carla_Dueling_DDQN.py index 7c40820..cf18ce8 100644 --- a/rl_coach/presets/Carla_Dueling_DDQN.py +++ b/rl_coach/presets/Carla_Dueling_DDQN.py @@ -1,7 +1,9 @@ import math +from rl_coach.agents.ddqn_agent import DDQNAgentParameters from rl_coach.architectures.tensorflow_components.heads.dueling_q_head import DuelingQHeadParameters from rl_coach.base_parameters import VisualizationParameters, MiddlewareScheme +from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase from rl_coach.environments.carla_environment import CarlaEnvironmentParameters from rl_coach.environments.environment import MaxDumpMethod, SelectedPhaseOnlyDumpMethod from rl_coach.filters.action.box_discretization import BoxDiscretization @@ -9,9 +11,6 @@ from rl_coach.filters.filter import OutputFilter from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters -from rl_coach.agents.ddqn_agent import DDQNAgentParameters -from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/CartPole_A3C.py b/rl_coach/presets/CartPole_A3C.py index 9b063d7..04bf233 100644 --- a/rl_coach/presets/CartPole_A3C.py +++ b/rl_coach/presets/CartPole_A3C.py @@ -1,15 +1,14 @@ from rl_coach.agents.actor_critic_agent import ActorCriticAgentParameters from rl_coach.agents.policy_optimization_agent import PolicyGradientRescaler from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters +from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase from rl_coach.environments.environment import SelectedPhaseOnlyDumpMethod, MaxDumpMethod from rl_coach.environments.gym_environment import MujocoInputFilter, Mujoco from rl_coach.exploration_policies.categorical import CategoricalParameters +from rl_coach.filters.reward.reward_rescale_filter import RewardRescaleFilter from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters -from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase -from rl_coach.filters.reward.reward_rescale_filter import RewardRescaleFilter - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/CartPole_DFP.py b/rl_coach/presets/CartPole_DFP.py index 4bd88de..d263e40 100644 --- a/rl_coach/presets/CartPole_DFP.py +++ b/rl_coach/presets/CartPole_DFP.py @@ -1,13 +1,12 @@ +from rl_coach.agents.dfp_agent import DFPAgentParameters, HandlingTargetsAfterEpisodeEnd from rl_coach.base_parameters import VisualizationParameters, EmbedderScheme, PresetValidationParameters +from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase from rl_coach.environments.environment import SelectedPhaseOnlyDumpMethod, MaxDumpMethod from rl_coach.environments.gym_environment import Mujoco from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters from rl_coach.schedules import LinearSchedule -from rl_coach.agents.dfp_agent import DFPAgentParameters, HandlingTargetsAfterEpisodeEnd -from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/CartPole_DQN.py b/rl_coach/presets/CartPole_DQN.py index 22cec3b..4bee20d 100644 --- a/rl_coach/presets/CartPole_DQN.py +++ b/rl_coach/presets/CartPole_DQN.py @@ -1,4 +1,6 @@ +from rl_coach.agents.dqn_agent import DQNAgentParameters from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters +from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase from rl_coach.environments.environment import SelectedPhaseOnlyDumpMethod, MaxDumpMethod from rl_coach.environments.gym_environment import Mujoco from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager @@ -6,9 +8,6 @@ from rl_coach.graph_managers.graph_manager import ScheduleParameters from rl_coach.memories.memory import MemoryGranularity from rl_coach.schedules import LinearSchedule -from rl_coach.agents.dqn_agent import DQNAgentParameters -from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/CartPole_Dueling_DDQN.py b/rl_coach/presets/CartPole_Dueling_DDQN.py index 8ee65a2..5a2caa2 100644 --- a/rl_coach/presets/CartPole_Dueling_DDQN.py +++ b/rl_coach/presets/CartPole_Dueling_DDQN.py @@ -1,7 +1,9 @@ import math +from rl_coach.agents.ddqn_agent import DDQNAgentParameters from rl_coach.architectures.tensorflow_components.heads.dueling_q_head import DuelingQHeadParameters from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters +from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase from rl_coach.environments.environment import SelectedPhaseOnlyDumpMethod, MaxDumpMethod from rl_coach.environments.gym_environment import Mujoco from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager @@ -9,9 +11,6 @@ from rl_coach.graph_managers.graph_manager import ScheduleParameters from rl_coach.memories.memory import MemoryGranularity from rl_coach.schedules import LinearSchedule -from rl_coach.agents.ddqn_agent import DDQNAgentParameters -from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/CartPole_NEC.py b/rl_coach/presets/CartPole_NEC.py index 7023ae3..7a961aa 100644 --- a/rl_coach/presets/CartPole_NEC.py +++ b/rl_coach/presets/CartPole_NEC.py @@ -1,15 +1,14 @@ from rl_coach.agents.nec_agent import NECAgentParameters from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters +from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase from rl_coach.environments.environment import SelectedPhaseOnlyDumpMethod, MaxDumpMethod from rl_coach.environments.gym_environment import Atari, MujocoInputFilter +from rl_coach.filters.reward.reward_rescale_filter import RewardRescaleFilter from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters from rl_coach.memories.memory import MemoryGranularity from rl_coach.schedules import LinearSchedule -from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase -from rl_coach.filters.reward.reward_rescale_filter import RewardRescaleFilter - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/CartPole_NStepQ.py b/rl_coach/presets/CartPole_NStepQ.py index 8e7e210..f91142f 100644 --- a/rl_coach/presets/CartPole_NStepQ.py +++ b/rl_coach/presets/CartPole_NStepQ.py @@ -1,13 +1,12 @@ from rl_coach.agents.n_step_q_agent import NStepQAgentParameters from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters +from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase from rl_coach.environments.environment import SelectedPhaseOnlyDumpMethod, MaxDumpMethod from rl_coach.environments.gym_environment import MujocoInputFilter, Mujoco +from rl_coach.filters.reward.reward_rescale_filter import RewardRescaleFilter from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters -from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase -from rl_coach.filters.reward.reward_rescale_filter import RewardRescaleFilter - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/CartPole_PAL.py b/rl_coach/presets/CartPole_PAL.py index f25843c..cedb2b1 100644 --- a/rl_coach/presets/CartPole_PAL.py +++ b/rl_coach/presets/CartPole_PAL.py @@ -1,5 +1,6 @@ from rl_coach.agents.pal_agent import PALAgentParameters from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters +from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase from rl_coach.environments.environment import SelectedPhaseOnlyDumpMethod, MaxDumpMethod from rl_coach.environments.gym_environment import Mujoco from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager @@ -7,9 +8,6 @@ from rl_coach.graph_managers.graph_manager import ScheduleParameters from rl_coach.memories.memory import MemoryGranularity from rl_coach.schedules import LinearSchedule -from rl_coach.agents.dqn_agent import DQNAgentParameters -from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/CartPole_PG.py b/rl_coach/presets/CartPole_PG.py index 614d2eb..2d578de 100644 --- a/rl_coach/presets/CartPole_PG.py +++ b/rl_coach/presets/CartPole_PG.py @@ -1,14 +1,13 @@ from rl_coach.agents.policy_gradients_agent import PolicyGradientsAgentParameters from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters +from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase from rl_coach.environments.environment import SelectedPhaseOnlyDumpMethod, MaxDumpMethod from rl_coach.environments.gym_environment import MujocoInputFilter, Mujoco from rl_coach.exploration_policies.categorical import CategoricalParameters +from rl_coach.filters.reward.reward_rescale_filter import RewardRescaleFilter from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters -from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase -from rl_coach.filters.reward.reward_rescale_filter import RewardRescaleFilter - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/ControlSuite_DDPG.py b/rl_coach/presets/ControlSuite_DDPG.py index 1923507..7357e4e 100644 --- a/rl_coach/presets/ControlSuite_DDPG.py +++ b/rl_coach/presets/ControlSuite_DDPG.py @@ -1,15 +1,14 @@ from rl_coach.agents.ddpg_agent import DDPGAgentParameters from rl_coach.architectures.tensorflow_components.architecture import Dense -from rl_coach.base_parameters import VisualizationParameters, EmbedderScheme, PresetValidationParameters +from rl_coach.base_parameters import VisualizationParameters, EmbedderScheme +from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase from rl_coach.environments.control_suite_environment import ControlSuiteEnvironmentParameters, control_suite_envs from rl_coach.environments.environment import MaxDumpMethod, SelectedPhaseOnlyDumpMethod, SingleLevelSelection from rl_coach.environments.gym_environment import MujocoInputFilter +from rl_coach.filters.reward.reward_rescale_filter import RewardRescaleFilter from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters -from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase -from rl_coach.filters.reward.reward_rescale_filter import RewardRescaleFilter - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/Doom_Basic_A3C.py b/rl_coach/presets/Doom_Basic_A3C.py index ac973ca..a95e9a2 100644 --- a/rl_coach/presets/Doom_Basic_A3C.py +++ b/rl_coach/presets/Doom_Basic_A3C.py @@ -1,18 +1,14 @@ from rl_coach.agents.actor_critic_agent import ActorCriticAgentParameters -from rl_coach.agents.dqn_agent import DQNAgentParameters from rl_coach.agents.policy_optimization_agent import PolicyGradientRescaler from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters +from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase +from rl_coach.environments.doom_environment import DoomEnvironmentParameters from rl_coach.environments.environment import SelectedPhaseOnlyDumpMethod, MaxDumpMethod from rl_coach.environments.gym_environment import MujocoInputFilter from rl_coach.exploration_policies.categorical import CategoricalParameters from rl_coach.filters.reward.reward_rescale_filter import RewardRescaleFilter from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters -from rl_coach.memories.memory import MemoryGranularity -from rl_coach.schedules import LinearSchedule - -from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase -from rl_coach.environments.doom_environment import DoomEnvironmentParameters #################### # Graph Scheduling # diff --git a/rl_coach/presets/Doom_Basic_BC.py b/rl_coach/presets/Doom_Basic_BC.py index aaa2f95..cf5e16c 100644 --- a/rl_coach/presets/Doom_Basic_BC.py +++ b/rl_coach/presets/Doom_Basic_BC.py @@ -1,12 +1,11 @@ from rl_coach.agents.bc_agent import BCAgentParameters from rl_coach.base_parameters import VisualizationParameters +from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps +from rl_coach.environments.doom_environment import DoomEnvironmentParameters from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters from rl_coach.schedules import LinearSchedule -from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps -from rl_coach.environments.doom_environment import DoomEnvironmentParameters - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/Doom_Basic_DFP.py b/rl_coach/presets/Doom_Basic_DFP.py index c1e4211..dac99e5 100644 --- a/rl_coach/presets/Doom_Basic_DFP.py +++ b/rl_coach/presets/Doom_Basic_DFP.py @@ -1,13 +1,12 @@ from rl_coach.agents.dfp_agent import DFPAgentParameters, HandlingTargetsAfterEpisodeEnd from rl_coach.base_parameters import VisualizationParameters +from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase +from rl_coach.environments.doom_environment import DoomEnvironmentParameters from rl_coach.environments.environment import SelectedPhaseOnlyDumpMethod, MaxDumpMethod from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters from rl_coach.schedules import LinearSchedule -from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase -from rl_coach.environments.doom_environment import DoomEnvironmentParameters - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/Doom_Basic_DQN.py b/rl_coach/presets/Doom_Basic_DQN.py index 8f89ab4..83268df 100644 --- a/rl_coach/presets/Doom_Basic_DQN.py +++ b/rl_coach/presets/Doom_Basic_DQN.py @@ -1,14 +1,13 @@ from rl_coach.agents.dqn_agent import DQNAgentParameters from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters +from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase +from rl_coach.environments.doom_environment import DoomEnvironmentParameters from rl_coach.environments.environment import SelectedPhaseOnlyDumpMethod, MaxDumpMethod from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters from rl_coach.memories.memory import MemoryGranularity from rl_coach.schedules import LinearSchedule -from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase -from rl_coach.environments.doom_environment import DoomEnvironmentParameters - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/Doom_Basic_Dueling_DDQN.py b/rl_coach/presets/Doom_Basic_Dueling_DDQN.py index 2771715..715af8b 100644 --- a/rl_coach/presets/Doom_Basic_Dueling_DDQN.py +++ b/rl_coach/presets/Doom_Basic_Dueling_DDQN.py @@ -1,15 +1,14 @@ from rl_coach.agents.ddqn_agent import DDQNAgentParameters from rl_coach.architectures.tensorflow_components.heads.dueling_q_head import DuelingQHeadParameters from rl_coach.base_parameters import VisualizationParameters +from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase +from rl_coach.environments.doom_environment import DoomEnvironmentParameters from rl_coach.environments.environment import SelectedPhaseOnlyDumpMethod, MaxDumpMethod from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters from rl_coach.memories.memory import MemoryGranularity from rl_coach.schedules import LinearSchedule -from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase -from rl_coach.environments.doom_environment import DoomEnvironmentParameters - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/Doom_Battle_DFP.py b/rl_coach/presets/Doom_Battle_DFP.py index 58fd76d..3dac6e2 100644 --- a/rl_coach/presets/Doom_Battle_DFP.py +++ b/rl_coach/presets/Doom_Battle_DFP.py @@ -1,13 +1,12 @@ from rl_coach.agents.dfp_agent import DFPAgentParameters from rl_coach.base_parameters import VisualizationParameters +from rl_coach.core_types import EnvironmentSteps, RunPhase +from rl_coach.environments.doom_environment import DoomEnvironmentParameters, DoomEnvironment from rl_coach.environments.environment import MaxDumpMethod, SelectedPhaseOnlyDumpMethod from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters from rl_coach.schedules import LinearSchedule -from rl_coach.core_types import TrainingSteps, EnvironmentSteps, RunPhase -from rl_coach.environments.doom_environment import DoomEnvironmentParameters, DoomEnvironment - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/Doom_Health_DFP.py b/rl_coach/presets/Doom_Health_DFP.py index 1cf358c..94603f3 100644 --- a/rl_coach/presets/Doom_Health_DFP.py +++ b/rl_coach/presets/Doom_Health_DFP.py @@ -1,14 +1,13 @@ from rl_coach.agents.dfp_agent import DFPAgentParameters from rl_coach.base_parameters import VisualizationParameters, EmbedderScheme, MiddlewareScheme, \ PresetValidationParameters +from rl_coach.core_types import EnvironmentSteps, RunPhase +from rl_coach.environments.doom_environment import DoomEnvironmentParameters from rl_coach.environments.environment import SelectedPhaseOnlyDumpMethod, MaxDumpMethod from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters from rl_coach.schedules import LinearSchedule -from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase -from rl_coach.environments.doom_environment import DoomEnvironmentParameters - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/Doom_Health_MMC.py b/rl_coach/presets/Doom_Health_MMC.py index 2aa6737..1a8210e 100644 --- a/rl_coach/presets/Doom_Health_MMC.py +++ b/rl_coach/presets/Doom_Health_MMC.py @@ -1,16 +1,13 @@ -from rl_coach.agents.dfp_agent import DFPAgentParameters from rl_coach.agents.mmc_agent import MixedMonteCarloAgentParameters -from rl_coach.base_parameters import VisualizationParameters, EmbedderScheme, MiddlewareScheme, \ - PresetValidationParameters +from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters +from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase +from rl_coach.environments.doom_environment import DoomEnvironmentParameters from rl_coach.environments.environment import SelectedPhaseOnlyDumpMethod, MaxDumpMethod from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters from rl_coach.memories.memory import MemoryGranularity from rl_coach.schedules import LinearSchedule -from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase -from rl_coach.environments.doom_environment import DoomEnvironmentParameters - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/Doom_Health_Supreme_DFP.py b/rl_coach/presets/Doom_Health_Supreme_DFP.py index 157e838..14c7873 100644 --- a/rl_coach/presets/Doom_Health_Supreme_DFP.py +++ b/rl_coach/presets/Doom_Health_Supreme_DFP.py @@ -1,14 +1,13 @@ from rl_coach.agents.dfp_agent import DFPAgentParameters from rl_coach.base_parameters import VisualizationParameters, EmbedderScheme, MiddlewareScheme, \ PresetValidationParameters +from rl_coach.core_types import EnvironmentSteps, RunPhase +from rl_coach.environments.doom_environment import DoomEnvironmentParameters from rl_coach.environments.environment import SelectedPhaseOnlyDumpMethod, MaxDumpMethod from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters from rl_coach.schedules import LinearSchedule -from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase -from rl_coach.environments.doom_environment import DoomEnvironmentParameters - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/ExplorationChain_Bootstrapped_DQN.py b/rl_coach/presets/ExplorationChain_Bootstrapped_DQN.py index 4dc63dc..7ba8651 100644 --- a/rl_coach/presets/ExplorationChain_Bootstrapped_DQN.py +++ b/rl_coach/presets/ExplorationChain_Bootstrapped_DQN.py @@ -1,4 +1,6 @@ -from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters +from rl_coach.agents.bootstrapped_dqn_agent import BootstrappedDQNAgentParameters +from rl_coach.base_parameters import VisualizationParameters +from rl_coach.core_types import EnvironmentEpisodes, EnvironmentSteps from rl_coach.environments.gym_environment import Mujoco from rl_coach.filters.filter import NoInputFilter, NoOutputFilter from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager @@ -6,9 +8,6 @@ from rl_coach.graph_managers.graph_manager import ScheduleParameters from rl_coach.memories.memory import MemoryGranularity from rl_coach.schedules import ConstantSchedule -from rl_coach.agents.bootstrapped_dqn_agent import BootstrappedDQNAgentParameters -from rl_coach.core_types import EnvironmentEpisodes, EnvironmentSteps - N = 20 num_output_head_copies = 20 diff --git a/rl_coach/presets/ExplorationChain_Dueling_DDQN.py b/rl_coach/presets/ExplorationChain_Dueling_DDQN.py index 021edef..a75670f 100644 --- a/rl_coach/presets/ExplorationChain_Dueling_DDQN.py +++ b/rl_coach/presets/ExplorationChain_Dueling_DDQN.py @@ -1,5 +1,7 @@ +from rl_coach.agents.ddqn_agent import DDQNAgentParameters from rl_coach.architectures.tensorflow_components.heads.dueling_q_head import DuelingQHeadParameters -from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters +from rl_coach.base_parameters import VisualizationParameters +from rl_coach.core_types import EnvironmentEpisodes, EnvironmentSteps from rl_coach.environments.gym_environment import GymEnvironmentParameters from rl_coach.filters.filter import NoInputFilter, NoOutputFilter from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager @@ -7,9 +9,6 @@ from rl_coach.graph_managers.graph_manager import ScheduleParameters from rl_coach.memories.memory import MemoryGranularity from rl_coach.schedules import LinearSchedule -from rl_coach.agents.ddqn_agent import DDQNAgentParameters -from rl_coach.core_types import EnvironmentEpisodes, EnvironmentSteps - N = 20 num_output_head_copies = 20 diff --git a/rl_coach/presets/ExplorationChain_UCB_Q_ensembles.py b/rl_coach/presets/ExplorationChain_UCB_Q_ensembles.py index e4ac446..6695773 100644 --- a/rl_coach/presets/ExplorationChain_UCB_Q_ensembles.py +++ b/rl_coach/presets/ExplorationChain_UCB_Q_ensembles.py @@ -1,15 +1,14 @@ from rl_coach.agents.bootstrapped_dqn_agent import BootstrappedDQNAgentParameters from rl_coach.base_parameters import VisualizationParameters +from rl_coach.core_types import EnvironmentEpisodes, EnvironmentSteps from rl_coach.environments.gym_environment import Mujoco +from rl_coach.exploration_policies.ucb import UCBParameters from rl_coach.filters.filter import NoInputFilter, NoOutputFilter from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters from rl_coach.memories.memory import MemoryGranularity from rl_coach.schedules import ConstantSchedule -from rl_coach.core_types import EnvironmentEpisodes, EnvironmentSteps -from rl_coach.exploration_policies.ucb import UCBParameters - N = 20 num_output_head_copies = 20 diff --git a/rl_coach/presets/Fetch_DDPG_HER_baselines.py b/rl_coach/presets/Fetch_DDPG_HER_baselines.py index 09f17ac..4b2fd62 100644 --- a/rl_coach/presets/Fetch_DDPG_HER_baselines.py +++ b/rl_coach/presets/Fetch_DDPG_HER_baselines.py @@ -1,9 +1,12 @@ from rl_coach.agents.ddpg_agent import DDPGAgentParameters from rl_coach.architectures.tensorflow_components.architecture import Dense from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters -from rl_coach.base_parameters import VisualizationParameters, EmbedderScheme, InputEmbedderParameters, PresetValidationParameters +from rl_coach.base_parameters import VisualizationParameters, EmbedderScheme, InputEmbedderParameters, \ + PresetValidationParameters +from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase from rl_coach.environments.environment import SelectedPhaseOnlyDumpMethod, MaxDumpMethod, SingleLevelSelection from rl_coach.environments.gym_environment import Mujoco, MujocoInputFilter, fetch_v1 +from rl_coach.exploration_policies.e_greedy import EGreedyParameters from rl_coach.filters.observation.observation_clipping_filter import ObservationClippingFilter from rl_coach.filters.observation.observation_normalization_filter import ObservationNormalizationFilter from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager @@ -14,9 +17,6 @@ from rl_coach.memories.memory import MemoryGranularity from rl_coach.schedules import ConstantSchedule from rl_coach.spaces import GoalsSpace, ReachingGoal -from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase -from rl_coach.exploration_policies.e_greedy import EGreedyParameters - cycles = 100 # 20 for reach. for others it's 100 #################### diff --git a/rl_coach/presets/InvertedPendulum_PG.py b/rl_coach/presets/InvertedPendulum_PG.py index 144513d..2b53b91 100644 --- a/rl_coach/presets/InvertedPendulum_PG.py +++ b/rl_coach/presets/InvertedPendulum_PG.py @@ -1,14 +1,13 @@ from rl_coach.agents.policy_gradients_agent import PolicyGradientsAgentParameters from rl_coach.base_parameters import VisualizationParameters +from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase from rl_coach.environments.environment import MaxDumpMethod, SelectedPhaseOnlyDumpMethod from rl_coach.environments.gym_environment import Mujoco, MujocoInputFilter from rl_coach.filters.observation.observation_normalization_filter import ObservationNormalizationFilter +from rl_coach.filters.reward.reward_rescale_filter import RewardRescaleFilter from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters -from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase -from rl_coach.filters.reward.reward_rescale_filter import RewardRescaleFilter - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/MontezumaRevenge_BC.py b/rl_coach/presets/MontezumaRevenge_BC.py index 056849c..0e026ca 100644 --- a/rl_coach/presets/MontezumaRevenge_BC.py +++ b/rl_coach/presets/MontezumaRevenge_BC.py @@ -1,13 +1,12 @@ +from rl_coach.agents.bc_agent import BCAgentParameters from rl_coach.base_parameters import VisualizationParameters +from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase from rl_coach.environments.environment import MaxDumpMethod, SelectedPhaseOnlyDumpMethod from rl_coach.environments.gym_environment import Atari from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters from rl_coach.memories.memory import MemoryGranularity -from rl_coach.agents.bc_agent import BCAgentParameters -from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/Mujoco_A3C.py b/rl_coach/presets/Mujoco_A3C.py index 86287f9..c8831c9 100644 --- a/rl_coach/presets/Mujoco_A3C.py +++ b/rl_coach/presets/Mujoco_A3C.py @@ -1,15 +1,14 @@ from rl_coach.agents.actor_critic_agent import ActorCriticAgentParameters from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters +from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase from rl_coach.environments.environment import MaxDumpMethod, SelectedPhaseOnlyDumpMethod, SingleLevelSelection from rl_coach.environments.gym_environment import Mujoco, mujoco_v2, MujocoInputFilter from rl_coach.exploration_policies.continuous_entropy import ContinuousEntropyParameters from rl_coach.filters.observation.observation_normalization_filter import ObservationNormalizationFilter +from rl_coach.filters.reward.reward_rescale_filter import RewardRescaleFilter from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters -from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase -from rl_coach.filters.reward.reward_rescale_filter import RewardRescaleFilter - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/Mujoco_A3C_LSTM.py b/rl_coach/presets/Mujoco_A3C_LSTM.py index 0df4181..bf7f265 100644 --- a/rl_coach/presets/Mujoco_A3C_LSTM.py +++ b/rl_coach/presets/Mujoco_A3C_LSTM.py @@ -1,17 +1,17 @@ from rl_coach.agents.actor_critic_agent import ActorCriticAgentParameters from rl_coach.architectures.tensorflow_components.architecture import Dense from rl_coach.architectures.tensorflow_components.middlewares.lstm_middleware import LSTMMiddlewareParameters -from rl_coach.base_parameters import VisualizationParameters, InputEmbedderParameters, MiddlewareScheme, PresetValidationParameters +from rl_coach.base_parameters import VisualizationParameters, InputEmbedderParameters, MiddlewareScheme, \ + PresetValidationParameters +from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase from rl_coach.environments.environment import MaxDumpMethod, SelectedPhaseOnlyDumpMethod, SingleLevelSelection from rl_coach.environments.gym_environment import Mujoco, mujoco_v2, MujocoInputFilter from rl_coach.exploration_policies.continuous_entropy import ContinuousEntropyParameters from rl_coach.filters.observation.observation_normalization_filter import ObservationNormalizationFilter +from rl_coach.filters.reward.reward_rescale_filter import RewardRescaleFilter from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters -from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase -from rl_coach.filters.reward.reward_rescale_filter import RewardRescaleFilter - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/Mujoco_ClippedPPO.py b/rl_coach/presets/Mujoco_ClippedPPO.py index d4f2bdc..2930dc8 100644 --- a/rl_coach/presets/Mujoco_ClippedPPO.py +++ b/rl_coach/presets/Mujoco_ClippedPPO.py @@ -1,17 +1,15 @@ -from rl_coach.exploration_policies .additive_noise import AdditiveNoiseParameters - from rl_coach.agents.clipped_ppo_agent import ClippedPPOAgentParameters from rl_coach.architectures.tensorflow_components.architecture import Dense from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters +from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase from rl_coach.environments.environment import MaxDumpMethod, SelectedPhaseOnlyDumpMethod, SingleLevelSelection from rl_coach.environments.gym_environment import Mujoco, mujoco_v2, MujocoInputFilter +from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters +from rl_coach.filters.observation.observation_normalization_filter import ObservationNormalizationFilter from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters from rl_coach.schedules import LinearSchedule -from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase -from rl_coach.filters.observation.observation_normalization_filter import ObservationNormalizationFilter - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/Mujoco_DDPG.py b/rl_coach/presets/Mujoco_DDPG.py index 1703910..762a70a 100644 --- a/rl_coach/presets/Mujoco_DDPG.py +++ b/rl_coach/presets/Mujoco_DDPG.py @@ -1,13 +1,12 @@ from rl_coach.agents.ddpg_agent import DDPGAgentParameters from rl_coach.architectures.tensorflow_components.architecture import Dense from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters, EmbedderScheme +from rl_coach.core_types import EnvironmentEpisodes, EnvironmentSteps, RunPhase from rl_coach.environments.environment import MaxDumpMethod, SelectedPhaseOnlyDumpMethod, SingleLevelSelection from rl_coach.environments.gym_environment import Mujoco, mujoco_v2 from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters -from rl_coach.core_types import EnvironmentEpisodes, EnvironmentSteps, RunPhase - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/Mujoco_NAF.py b/rl_coach/presets/Mujoco_NAF.py index d725b8b..534556c 100644 --- a/rl_coach/presets/Mujoco_NAF.py +++ b/rl_coach/presets/Mujoco_NAF.py @@ -1,13 +1,12 @@ +from rl_coach.agents.naf_agent import NAFAgentParameters from rl_coach.architectures.tensorflow_components.architecture import Dense from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters +from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase, GradientClippingMethod from rl_coach.environments.environment import MaxDumpMethod, SelectedPhaseOnlyDumpMethod, SingleLevelSelection from rl_coach.environments.gym_environment import Mujoco, mujoco_v2 from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters -from rl_coach.agents.naf_agent import NAFAgentParameters -from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase, GradientClippingMethod - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/Mujoco_PPO.py b/rl_coach/presets/Mujoco_PPO.py index 983e470..3dc72e9 100644 --- a/rl_coach/presets/Mujoco_PPO.py +++ b/rl_coach/presets/Mujoco_PPO.py @@ -1,15 +1,14 @@ from rl_coach.agents.ppo_agent import PPOAgentParameters from rl_coach.architectures.tensorflow_components.architecture import Dense from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters +from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase from rl_coach.environments.environment import MaxDumpMethod, SelectedPhaseOnlyDumpMethod, SingleLevelSelection from rl_coach.environments.gym_environment import Mujoco, mujoco_v2, MujocoInputFilter +from rl_coach.exploration_policies.continuous_entropy import ContinuousEntropyParameters from rl_coach.filters.observation.observation_normalization_filter import ObservationNormalizationFilter from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters -from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase -from rl_coach.exploration_policies.continuous_entropy import ContinuousEntropyParameters - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/Pendulum_HAC.py b/rl_coach/presets/Pendulum_HAC.py index 265c294..6cff907 100644 --- a/rl_coach/presets/Pendulum_HAC.py +++ b/rl_coach/presets/Pendulum_HAC.py @@ -1,11 +1,14 @@ import numpy as np -from rl_coach.agents.ddpg_agent import DDPGAgentParameters + from rl_coach.agents.hac_ddpg_agent import HACDDPGAgentParameters from rl_coach.architectures.tensorflow_components.architecture import Dense from rl_coach.base_parameters import VisualizationParameters, EmbeddingMergerType, EmbedderScheme, \ InputEmbedderParameters +from rl_coach.core_types import EnvironmentEpisodes, EnvironmentSteps, RunPhase, TrainingSteps from rl_coach.environments.environment import SelectedPhaseOnlyDumpMethod from rl_coach.environments.gym_environment import Mujoco +from rl_coach.exploration_policies.e_greedy import EGreedyParameters +from rl_coach.exploration_policies.ou_process import OUProcessParameters from rl_coach.graph_managers.graph_manager import ScheduleParameters from rl_coach.graph_managers.hac_graph_manager import HACGraphManager from rl_coach.memories.episodic.episodic_hindsight_experience_replay import HindsightGoalSelectionMethod, \ @@ -13,14 +16,9 @@ from rl_coach.memories.episodic.episodic_hindsight_experience_replay import Hind from rl_coach.memories.episodic.episodic_hrl_hindsight_experience_replay import \ EpisodicHRLHindsightExperienceReplayParameters from rl_coach.memories.memory import MemoryGranularity - from rl_coach.schedules import ConstantSchedule from rl_coach.spaces import GoalsSpace, ReachingGoal -from rl_coach.core_types import EnvironmentEpisodes, EnvironmentSteps, RunPhase, TrainingSteps -from rl_coach.exploration_policies.e_greedy import EGreedyParameters -from rl_coach.exploration_policies.ou_process import OUProcessParameters - #################### # Graph Scheduling # #################### diff --git a/rl_coach/presets/Starcraft_CollectMinerals_A3C.py b/rl_coach/presets/Starcraft_CollectMinerals_A3C.py index ad41a7e..33d2a05 100644 --- a/rl_coach/presets/Starcraft_CollectMinerals_A3C.py +++ b/rl_coach/presets/Starcraft_CollectMinerals_A3C.py @@ -1,15 +1,14 @@ from rl_coach.agents.actor_critic_agent import ActorCriticAgentParameters from rl_coach.agents.policy_optimization_agent import PolicyGradientRescaler from rl_coach.base_parameters import VisualizationParameters, InputEmbedderParameters -from rl_coach.environments.environment import MaxDumpMethod, SelectedPhaseOnlyDumpMethod, AlwaysDumpMethod -from rl_coach.environments.starcraft2_environment import StarCraft2EnvironmentParameters -from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager -from rl_coach.graph_managers.graph_manager import ScheduleParameters -from rl_coach.schedules import LinearSchedule, ConstantSchedule - from rl_coach.core_types import RunPhase from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps +from rl_coach.environments.environment import MaxDumpMethod, SelectedPhaseOnlyDumpMethod +from rl_coach.environments.starcraft2_environment import StarCraft2EnvironmentParameters from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters +from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager +from rl_coach.graph_managers.graph_manager import ScheduleParameters +from rl_coach.schedules import ConstantSchedule #################### # Graph Scheduling # diff --git a/rl_coach/presets/Starcraft_CollectMinerals_Dueling_DDQN.py b/rl_coach/presets/Starcraft_CollectMinerals_Dueling_DDQN.py index ebc7f49..38df3e4 100644 --- a/rl_coach/presets/Starcraft_CollectMinerals_Dueling_DDQN.py +++ b/rl_coach/presets/Starcraft_CollectMinerals_Dueling_DDQN.py @@ -1,7 +1,10 @@ from collections import OrderedDict +from rl_coach.agents.ddqn_agent import DDQNAgentParameters from rl_coach.architectures.tensorflow_components.heads.dueling_q_head import DuelingQHeadParameters from rl_coach.base_parameters import VisualizationParameters, InputEmbedderParameters +from rl_coach.core_types import RunPhase +from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps from rl_coach.environments.environment import MaxDumpMethod, SelectedPhaseOnlyDumpMethod from rl_coach.environments.starcraft2_environment import StarCraft2EnvironmentParameters from rl_coach.filters.action.box_discretization import BoxDiscretization @@ -11,10 +14,6 @@ from rl_coach.graph_managers.graph_manager import ScheduleParameters from rl_coach.memories.memory import MemoryGranularity from rl_coach.schedules import LinearSchedule -from rl_coach.agents.ddqn_agent import DDQNAgentParameters -from rl_coach.core_types import RunPhase -from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps - #################### # Graph Scheduling # #################### diff --git a/rl_coach/spaces.py b/rl_coach/spaces.py index 404ee7d..26e20a4 100644 --- a/rl_coach/spaces.py +++ b/rl_coach/spaces.py @@ -22,9 +22,9 @@ from typing import Union, List, Dict, Tuple, Callable import numpy as np import scipy import scipy.spatial -from rl_coach.utils import eps from rl_coach.core_types import ActionType, ActionInfo +from rl_coach.utils import eps class Space(object): diff --git a/rl_coach/utils.py b/rl_coach/utils.py index c65ad87..dc1c071 100644 --- a/rl_coach/utils.py +++ b/rl_coach/utils.py @@ -22,11 +22,11 @@ import os import signal import threading import time +from multiprocessing import Manager from subprocess import Popen from typing import List, Tuple import numpy as np -from multiprocessing import Manager killed_processes = []