mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 11:10:20 +01:00
removing datasets + imports optimization
This commit is contained in:
@@ -18,18 +18,17 @@ from typing import Union
|
||||
|
||||
import numpy as np
|
||||
import scipy.signal
|
||||
|
||||
from rl_coach.agents.policy_optimization_agent import PolicyOptimizationAgent, PolicyGradientRescaler
|
||||
from rl_coach.architectures.tensorflow_components.heads.policy_head import PolicyHeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.heads.v_head import VHeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
||||
from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, \
|
||||
AgentParameters, InputEmbedderParameters
|
||||
from rl_coach.core_types import QActionStateValue
|
||||
from rl_coach.spaces import DiscreteActionSpace
|
||||
from rl_coach.utils import last_sample
|
||||
|
||||
from rl_coach.logger import screen
|
||||
from rl_coach.memories.episodic.single_episode_buffer import SingleEpisodeBufferParameters
|
||||
from rl_coach.spaces import DiscreteActionSpace
|
||||
from rl_coach.utils import last_sample
|
||||
|
||||
|
||||
class ActorCriticAlgorithmParameters(AlgorithmParameters):
|
||||
|
||||
@@ -20,20 +20,19 @@ from collections import OrderedDict
|
||||
from typing import Dict, List, Union, Tuple
|
||||
|
||||
import numpy as np
|
||||
from pandas import read_pickle
|
||||
from six.moves import range
|
||||
|
||||
from rl_coach.agents.agent_interface import AgentInterface
|
||||
from rl_coach.architectures.network_wrapper import NetworkWrapper
|
||||
from rl_coach.base_parameters import AgentParameters, DistributedTaskParameters
|
||||
from rl_coach.core_types import RunPhase, PredictionType, EnvironmentEpisodes, ActionType, Batch, Episode, StateType
|
||||
from rl_coach.core_types import Transition, ActionInfo, TrainingSteps, EnvironmentSteps, EnvResponse
|
||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplay
|
||||
from pandas import read_pickle
|
||||
from six.moves import range
|
||||
from rl_coach.spaces import SpacesDefinition, VectorObservationSpace, GoalsSpace, AttentionActionSpace
|
||||
from rl_coach.utils import Signal, force_list, set_cpu
|
||||
from rl_coach.utils import dynamic_import_and_instantiate_module_from_params
|
||||
|
||||
from rl_coach.architectures.network_wrapper import NetworkWrapper
|
||||
from rl_coach.logger import screen, Logger, EpisodeLogger
|
||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplay
|
||||
from rl_coach.spaces import SpacesDefinition, VectorObservationSpace, GoalsSpace, AttentionActionSpace
|
||||
from rl_coach.utils import Signal, force_list
|
||||
from rl_coach.utils import dynamic_import_and_instantiate_module_from_params
|
||||
|
||||
|
||||
class Agent(AgentInterface):
|
||||
|
||||
@@ -17,14 +17,14 @@
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.agents.imitation_agent import ImitationAgent
|
||||
from rl_coach.architectures.tensorflow_components.heads.policy_head import PolicyHeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
||||
|
||||
from rl_coach.base_parameters import AgentParameters, AlgorithmParameters, NetworkParameters, InputEmbedderParameters, \
|
||||
MiddlewareScheme
|
||||
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
||||
|
||||
|
||||
class BCAlgorithmParameters(AlgorithmParameters):
|
||||
|
||||
@@ -17,9 +17,9 @@
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.agents.dqn_agent import DQNAgentParameters, DQNNetworkParameters
|
||||
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
||||
|
||||
from rl_coach.exploration_policies.bootstrapped import BootstrappedParameters
|
||||
|
||||
|
||||
|
||||
@@ -17,15 +17,15 @@
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.agents.dqn_agent import DQNNetworkParameters, DQNAlgorithmParameters
|
||||
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
||||
from rl_coach.architectures.tensorflow_components.heads.categorical_q_head import CategoricalQHeadParameters
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
from rl_coach.memories.non_episodic.experience_replay import ExperienceReplayParameters
|
||||
from rl_coach.schedules import LinearSchedule
|
||||
|
||||
from rl_coach.core_types import StateType
|
||||
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
||||
from rl_coach.memories.non_episodic.experience_replay import ExperienceReplayParameters
|
||||
from rl_coach.schedules import LinearSchedule
|
||||
|
||||
|
||||
class CategoricalDQNNetworkParameters(DQNNetworkParameters):
|
||||
|
||||
@@ -20,21 +20,21 @@ from random import shuffle
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.agents.actor_critic_agent import ActorCriticAgent
|
||||
from rl_coach.agents.policy_optimization_agent import PolicyGradientRescaler
|
||||
from rl_coach.architectures.tensorflow_components.heads.ppo_head import PPOHeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.heads.v_head import VHeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
||||
from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, \
|
||||
AgentParameters, InputEmbedderParameters
|
||||
from rl_coach.core_types import EnvironmentSteps, Batch, EnvResponse, StateType
|
||||
from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters
|
||||
from rl_coach.logger import screen
|
||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
||||
from rl_coach.schedules import ConstantSchedule
|
||||
from rl_coach.spaces import DiscreteActionSpace
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.heads.ppo_head import PPOHeadParameters
|
||||
from rl_coach.logger import screen
|
||||
|
||||
|
||||
class ClippedPPONetworkParameters(NetworkParameters):
|
||||
def __init__(self):
|
||||
|
||||
@@ -20,15 +20,14 @@ from enum import Enum
|
||||
from typing import Union, List, Dict
|
||||
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.agents.agent_interface import AgentInterface
|
||||
from rl_coach.base_parameters import AgentParameters, VisualizationParameters
|
||||
# from rl_coach.environments.environment_interface import ActionSpace
|
||||
from rl_coach.spaces import ActionSpace
|
||||
from rl_coach.spaces import AgentSelection, AttentionActionSpace, ObservationSpace, SpacesDefinition
|
||||
from rl_coach.utils import short_dynamic_import
|
||||
|
||||
from rl_coach.core_types import ActionInfo, EnvResponse, ActionType, RunPhase
|
||||
from rl_coach.filters.observation.observation_crop_filter import ObservationCropFilter
|
||||
from rl_coach.spaces import ActionSpace
|
||||
from rl_coach.spaces import AgentSelection, AttentionActionSpace, SpacesDefinition
|
||||
from rl_coach.utils import short_dynamic_import
|
||||
|
||||
|
||||
class DecisionPolicy(object):
|
||||
|
||||
@@ -19,19 +19,19 @@ from typing import Union
|
||||
from collections import OrderedDict
|
||||
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.agents.actor_critic_agent import ActorCriticAgent
|
||||
from rl_coach.agents.agent import Agent
|
||||
from rl_coach.architectures.tensorflow_components.heads.ddpg_actor_head import DDPGActorHeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.heads.v_head import VHeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
||||
from rl_coach.base_parameters import NetworkParameters, AlgorithmParameters, \
|
||||
AgentParameters, InputEmbedderParameters, EmbedderScheme
|
||||
from rl_coach.core_types import ActionInfo, EnvironmentSteps
|
||||
from rl_coach.exploration_policies.ou_process import OUProcessParameters
|
||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
||||
from rl_coach.spaces import BoxActionSpace, GoalsSpace
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.heads.ddpg_actor_head import DDPGActorHeadParameters
|
||||
from rl_coach.core_types import ActionInfo, EnvironmentSteps
|
||||
|
||||
|
||||
class DDPGCriticNetworkParameters(NetworkParameters):
|
||||
def __init__(self):
|
||||
|
||||
@@ -17,11 +17,11 @@
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
from rl_coach.schedules import LinearSchedule
|
||||
|
||||
from rl_coach.agents.dqn_agent import DQNAgentParameters
|
||||
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
||||
from rl_coach.core_types import EnvironmentSteps
|
||||
from rl_coach.schedules import LinearSchedule
|
||||
|
||||
|
||||
class DDQNAgentParameters(DQNAgentParameters):
|
||||
|
||||
@@ -19,19 +19,20 @@ from enum import Enum
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.agents.agent import Agent
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Conv2d, Dense
|
||||
from rl_coach.architectures.tensorflow_components.heads.measurements_prediction_head import MeasurementsPredictionHeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.heads.measurements_prediction_head import \
|
||||
MeasurementsPredictionHeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
||||
from rl_coach.base_parameters import AlgorithmParameters, AgentParameters, NetworkParameters, \
|
||||
InputEmbedderParameters, MiddlewareScheme
|
||||
from rl_coach.core_types import ActionInfo, EnvironmentSteps, RunPhase
|
||||
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
||||
from rl_coach.memories.memory import MemoryGranularity
|
||||
from rl_coach.spaces import SpacesDefinition, VectorObservationSpace
|
||||
|
||||
from rl_coach.core_types import ActionInfo, EnvironmentSteps, RunPhase
|
||||
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
||||
|
||||
|
||||
class HandlingTargetsAfterEpisodeEnd(Enum):
|
||||
LastStep = 0
|
||||
|
||||
@@ -17,16 +17,16 @@
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
||||
from rl_coach.architectures.tensorflow_components.heads.q_head import QHeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
||||
from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, AgentParameters, \
|
||||
InputEmbedderParameters, MiddlewareScheme
|
||||
from rl_coach.memories.non_episodic.experience_replay import ExperienceReplayParameters
|
||||
from rl_coach.schedules import LinearSchedule
|
||||
|
||||
from rl_coach.core_types import EnvironmentSteps
|
||||
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
||||
from rl_coach.memories.non_episodic.experience_replay import ExperienceReplayParameters
|
||||
from rl_coach.schedules import LinearSchedule
|
||||
|
||||
|
||||
class DQNAlgorithmParameters(AlgorithmParameters):
|
||||
|
||||
@@ -17,7 +17,6 @@
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
import copy
|
||||
|
||||
from rl_coach.agents.ddpg_agent import DDPGAgent, DDPGAgentParameters, DDPGAlgorithmParameters
|
||||
from rl_coach.core_types import RunPhase
|
||||
|
||||
@@ -19,6 +19,8 @@ from collections import OrderedDict
|
||||
from typing import Union
|
||||
|
||||
import pygame
|
||||
from pandas import to_pickle
|
||||
|
||||
from rl_coach.agents.agent import Agent
|
||||
from rl_coach.agents.bc_agent import BCNetworkParameters
|
||||
from rl_coach.architectures.tensorflow_components.heads.policy_head import PolicyHeadParameters
|
||||
@@ -26,11 +28,9 @@ from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware impo
|
||||
from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, InputEmbedderParameters, EmbedderScheme, \
|
||||
AgentParameters
|
||||
from rl_coach.core_types import ActionInfo
|
||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
||||
from pandas import to_pickle
|
||||
|
||||
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
||||
from rl_coach.logger import screen
|
||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
||||
|
||||
|
||||
class HumanAlgorithmParameters(AlgorithmParameters):
|
||||
|
||||
@@ -17,11 +17,10 @@
|
||||
from collections import OrderedDict
|
||||
from typing import Union
|
||||
|
||||
from rl_coach.core_types import RunPhase, ActionInfo
|
||||
from rl_coach.spaces import DiscreteActionSpace
|
||||
|
||||
from rl_coach.agents.agent import Agent
|
||||
from rl_coach.core_types import RunPhase, ActionInfo
|
||||
from rl_coach.logger import screen
|
||||
from rl_coach.spaces import DiscreteActionSpace
|
||||
|
||||
|
||||
## This is an abstract agent - there is no learn_from_batch method ##
|
||||
|
||||
@@ -17,17 +17,17 @@
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.agents.policy_optimization_agent import PolicyOptimizationAgent
|
||||
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
||||
from rl_coach.architectures.tensorflow_components.heads.q_head import QHeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
||||
from rl_coach.base_parameters import AlgorithmParameters, AgentParameters, NetworkParameters, \
|
||||
InputEmbedderParameters
|
||||
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
||||
from rl_coach.utils import last_sample
|
||||
|
||||
from rl_coach.core_types import EnvironmentSteps
|
||||
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
||||
from rl_coach.memories.episodic.single_episode_buffer import SingleEpisodeBufferParameters
|
||||
from rl_coach.utils import last_sample
|
||||
|
||||
|
||||
class NStepQNetworkParameters(NetworkParameters):
|
||||
|
||||
@@ -17,16 +17,16 @@
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
||||
from rl_coach.architectures.tensorflow_components.heads.naf_head import NAFHeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
||||
from rl_coach.base_parameters import AlgorithmParameters, AgentParameters, \
|
||||
NetworkParameters, InputEmbedderParameters
|
||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
||||
from rl_coach.spaces import BoxActionSpace
|
||||
|
||||
from rl_coach.core_types import ActionInfo, EnvironmentSteps
|
||||
from rl_coach.exploration_policies.ou_process import OUProcessParameters
|
||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
||||
from rl_coach.spaces import BoxActionSpace
|
||||
|
||||
|
||||
class NAFNetworkParameters(NetworkParameters):
|
||||
|
||||
@@ -19,17 +19,17 @@ import pickle
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
||||
from rl_coach.architectures.tensorflow_components.heads.dnd_q_head import DNDQHeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
||||
from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, AgentParameters, \
|
||||
InputEmbedderParameters
|
||||
from rl_coach.core_types import RunPhase, EnvironmentSteps, Episode, StateType
|
||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters, MemoryGranularity
|
||||
from rl_coach.schedules import ConstantSchedule
|
||||
|
||||
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
||||
from rl_coach.logger import screen
|
||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters, MemoryGranularity
|
||||
from rl_coach.schedules import ConstantSchedule
|
||||
|
||||
|
||||
class NECNetworkParameters(NetworkParameters):
|
||||
|
||||
@@ -20,8 +20,7 @@ import numpy as np
|
||||
|
||||
from rl_coach.agents.dqn_agent import DQNAgentParameters, DQNAlgorithmParameters
|
||||
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplay, \
|
||||
EpisodicExperienceReplayParameters
|
||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
||||
|
||||
|
||||
class PALAlgorithmParameters(DQNAlgorithmParameters):
|
||||
|
||||
@@ -17,16 +17,16 @@
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.agents.policy_optimization_agent import PolicyOptimizationAgent, PolicyGradientRescaler
|
||||
from rl_coach.architectures.tensorflow_components.heads.policy_head import PolicyHeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
||||
from rl_coach.base_parameters import NetworkParameters, AlgorithmParameters, \
|
||||
AgentParameters, InputEmbedderParameters
|
||||
from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters
|
||||
from rl_coach.spaces import DiscreteActionSpace
|
||||
|
||||
from rl_coach.logger import screen
|
||||
from rl_coach.memories.episodic.single_episode_buffer import SingleEpisodeBufferParameters
|
||||
from rl_coach.spaces import DiscreteActionSpace
|
||||
|
||||
|
||||
class PolicyGradientNetworkParameters(NetworkParameters):
|
||||
|
||||
@@ -19,12 +19,12 @@ from enum import Enum
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
from rl_coach.core_types import Batch, ActionInfo
|
||||
from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace
|
||||
from rl_coach.utils import eps
|
||||
|
||||
from rl_coach.agents.agent import Agent
|
||||
from rl_coach.core_types import Batch, ActionInfo
|
||||
from rl_coach.logger import screen
|
||||
from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace
|
||||
from rl_coach.utils import eps
|
||||
|
||||
|
||||
class PolicyGradientRescaler(Enum):
|
||||
|
||||
@@ -19,21 +19,21 @@ from collections import OrderedDict
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.agents.actor_critic_agent import ActorCriticAgent
|
||||
from rl_coach.agents.policy_optimization_agent import PolicyGradientRescaler
|
||||
from rl_coach.architectures.tensorflow_components.heads.ppo_head import PPOHeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.heads.v_head import VHeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
||||
from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, \
|
||||
AgentParameters, InputEmbedderParameters, DistributedTaskParameters
|
||||
from rl_coach.core_types import EnvironmentSteps, Batch
|
||||
from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters
|
||||
from rl_coach.logger import screen
|
||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
||||
from rl_coach.spaces import DiscreteActionSpace
|
||||
from rl_coach.utils import force_list
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.heads.ppo_head import PPOHeadParameters
|
||||
from rl_coach.logger import screen
|
||||
|
||||
|
||||
class PPOCriticNetworkParameters(NetworkParameters):
|
||||
def __init__(self):
|
||||
|
||||
@@ -17,12 +17,13 @@
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
from rl_coach.architectures.tensorflow_components.heads.quantile_regression_q_head import QuantileRegressionQHeadParameters
|
||||
from rl_coach.schedules import LinearSchedule
|
||||
|
||||
from rl_coach.agents.dqn_agent import DQNAgentParameters, DQNNetworkParameters, DQNAlgorithmParameters
|
||||
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
||||
from rl_coach.architectures.tensorflow_components.heads.quantile_regression_q_head import \
|
||||
QuantileRegressionQHeadParameters
|
||||
from rl_coach.core_types import StateType
|
||||
from rl_coach.schedules import LinearSchedule
|
||||
|
||||
|
||||
class QuantileRegressionDQNNetworkParameters(DQNNetworkParameters):
|
||||
|
||||
@@ -17,11 +17,11 @@
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
from rl_coach.memories.non_episodic.prioritized_experience_replay import PrioritizedExperienceReplay
|
||||
from rl_coach.spaces import DiscreteActionSpace
|
||||
|
||||
from rl_coach.agents.agent import Agent
|
||||
from rl_coach.core_types import ActionInfo, StateType
|
||||
from rl_coach.memories.non_episodic.prioritized_experience_replay import PrioritizedExperienceReplay
|
||||
from rl_coach.spaces import DiscreteActionSpace
|
||||
|
||||
|
||||
## This is an abstract agent - there is no learn_from_batch method ##
|
||||
|
||||
@@ -17,9 +17,8 @@
|
||||
from typing import List, Tuple
|
||||
|
||||
from rl_coach.base_parameters import Frameworks, AgentParameters
|
||||
from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
from rl_coach.logger import failed_imports
|
||||
from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
try:
|
||||
import tensorflow as tf
|
||||
|
||||
@@ -19,12 +19,12 @@ from typing import List
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from rl_coach.base_parameters import AgentParameters, DistributedTaskParameters
|
||||
from rl_coach.spaces import SpacesDefinition
|
||||
from rl_coach.utils import force_list, squeeze_list
|
||||
|
||||
from rl_coach.architectures.architecture import Architecture
|
||||
from rl_coach.base_parameters import AgentParameters, DistributedTaskParameters
|
||||
from rl_coach.core_types import GradientClippingMethod
|
||||
from rl_coach.spaces import SpacesDefinition
|
||||
from rl_coach.utils import force_list, squeeze_list
|
||||
|
||||
|
||||
def batchnorm_activation_dropout(input_layer, batchnorm, activation_function, dropout, dropout_rate, layer_idx):
|
||||
|
||||
@@ -18,10 +18,9 @@ from typing import List, Union
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from rl_coach.architectures.tensorflow_components.shared_variables import SharedRunningStats
|
||||
from rl_coach.base_parameters import EmbedderScheme
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import batchnorm_activation_dropout
|
||||
from rl_coach.base_parameters import EmbedderScheme
|
||||
from rl_coach.core_types import InputEmbedding
|
||||
|
||||
|
||||
|
||||
@@ -17,10 +17,10 @@
|
||||
from typing import List
|
||||
|
||||
import tensorflow as tf
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Conv2d
|
||||
from rl_coach.base_parameters import EmbedderScheme
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Conv2d
|
||||
from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedder
|
||||
from rl_coach.base_parameters import EmbedderScheme
|
||||
from rl_coach.core_types import InputImageEmbedding
|
||||
|
||||
|
||||
|
||||
@@ -17,10 +17,10 @@
|
||||
from typing import List
|
||||
|
||||
import tensorflow as tf
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Dense
|
||||
from rl_coach.base_parameters import EmbedderScheme
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Dense
|
||||
from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedder
|
||||
from rl_coach.base_parameters import EmbedderScheme
|
||||
from rl_coach.core_types import InputVectorEmbedding
|
||||
|
||||
|
||||
|
||||
@@ -19,15 +19,15 @@ from typing import Dict
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import TensorFlowArchitecture
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.middlewares.middleware import MiddlewareParameters
|
||||
from rl_coach.base_parameters import AgentParameters, InputEmbedderParameters, EmbeddingMergerType
|
||||
from rl_coach.core_types import PredictionType
|
||||
from rl_coach.spaces import SpacesDefinition, PlanarMapsObservationSpace
|
||||
from rl_coach.utils import get_all_subclasses, dynamic_import_and_instantiate_module_from_params
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import TensorFlowArchitecture
|
||||
from rl_coach.core_types import PredictionType
|
||||
|
||||
|
||||
class GeneralTensorFlowNetwork(TensorFlowArchitecture):
|
||||
"""
|
||||
|
||||
@@ -15,11 +15,11 @@
|
||||
#
|
||||
|
||||
import tensorflow as tf
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
from rl_coach.core_types import QActionStateValue
|
||||
from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
|
||||
class CategoricalQHeadParameters(HeadParameters):
|
||||
|
||||
@@ -15,13 +15,12 @@
|
||||
#
|
||||
|
||||
import tensorflow as tf
|
||||
from rl_coach.architectures.tensorflow_components.architecture import batchnorm_activation_dropout
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import batchnorm_activation_dropout
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
from rl_coach.core_types import ActionProbabilities
|
||||
from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
|
||||
class DDPGActorHeadParameters(HeadParameters):
|
||||
|
||||
@@ -14,12 +14,12 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
import tensorflow as tf
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters
|
||||
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.heads.q_head import QHead
|
||||
from rl_coach.spaces import SpacesDefinition
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
from rl_coach.memories.non_episodic import differentiable_neural_dictionary
|
||||
from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
|
||||
class DNDQHeadParameters(HeadParameters):
|
||||
|
||||
@@ -15,10 +15,10 @@
|
||||
#
|
||||
|
||||
import tensorflow as tf
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters
|
||||
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.heads.q_head import QHead
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
|
||||
|
||||
@@ -17,10 +17,10 @@ from typing import Type
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from rl_coach.base_parameters import AgentParameters, Parameters
|
||||
from rl_coach.spaces import SpacesDefinition
|
||||
from tensorflow.python.ops.losses.losses_impl import Reduction
|
||||
|
||||
from rl_coach.base_parameters import AgentParameters, Parameters
|
||||
from rl_coach.spaces import SpacesDefinition
|
||||
from rl_coach.utils import force_list
|
||||
|
||||
|
||||
|
||||
@@ -15,11 +15,11 @@
|
||||
#
|
||||
|
||||
import tensorflow as tf
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
from rl_coach.core_types import Measurements
|
||||
from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
|
||||
class MeasurementsPredictionHeadParameters(HeadParameters):
|
||||
|
||||
@@ -15,12 +15,12 @@
|
||||
#
|
||||
|
||||
import tensorflow as tf
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
from rl_coach.spaces import BoxActionSpace
|
||||
from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
from rl_coach.core_types import QActionStateValue
|
||||
from rl_coach.spaces import BoxActionSpace
|
||||
from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
|
||||
class NAFHeadParameters(HeadParameters):
|
||||
|
||||
@@ -16,15 +16,15 @@
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer, HeadParameters
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
from rl_coach.core_types import ActionProbabilities
|
||||
from rl_coach.exploration_policies.continuous_entropy import ContinuousEntropyParameters
|
||||
from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace, CompoundActionSpace
|
||||
from rl_coach.spaces import SpacesDefinition
|
||||
from rl_coach.utils import eps
|
||||
|
||||
from rl_coach.core_types import ActionProbabilities
|
||||
from rl_coach.exploration_policies.continuous_entropy import ContinuousEntropyParameters
|
||||
|
||||
|
||||
class PolicyHeadParameters(HeadParameters):
|
||||
def __init__(self, activation_function: str ='tanh', name: str='policy_head_params'):
|
||||
|
||||
@@ -16,14 +16,14 @@
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters, normalized_columns_initializer
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
from rl_coach.core_types import ActionProbabilities
|
||||
from rl_coach.spaces import BoxActionSpace, DiscreteActionSpace
|
||||
from rl_coach.spaces import SpacesDefinition
|
||||
from rl_coach.utils import eps
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters, normalized_columns_initializer
|
||||
from rl_coach.core_types import ActionProbabilities
|
||||
|
||||
|
||||
class PPOHeadParameters(HeadParameters):
|
||||
def __init__(self, activation_function: str ='tanh', name: str='ppo_head_params'):
|
||||
|
||||
@@ -15,11 +15,11 @@
|
||||
#
|
||||
|
||||
import tensorflow as tf
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer, HeadParameters
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
from rl_coach.core_types import ActionProbabilities
|
||||
from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
|
||||
class PPOVHeadParameters(HeadParameters):
|
||||
|
||||
@@ -15,11 +15,11 @@
|
||||
#
|
||||
|
||||
import tensorflow as tf
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
from rl_coach.spaces import SpacesDefinition, BoxActionSpace, DiscreteActionSpace
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
from rl_coach.core_types import QActionStateValue
|
||||
from rl_coach.spaces import SpacesDefinition, BoxActionSpace, DiscreteActionSpace
|
||||
|
||||
|
||||
class QHeadParameters(HeadParameters):
|
||||
|
||||
@@ -15,11 +15,11 @@
|
||||
#
|
||||
|
||||
import tensorflow as tf
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
from rl_coach.core_types import QActionStateValue
|
||||
from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
|
||||
class QuantileRegressionQHeadParameters(HeadParameters):
|
||||
|
||||
@@ -15,11 +15,11 @@
|
||||
#
|
||||
|
||||
import tensorflow as tf
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer, HeadParameters
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
from rl_coach.core_types import VStateValue
|
||||
from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
|
||||
class VHeadParameters(HeadParameters):
|
||||
|
||||
@@ -16,10 +16,10 @@
|
||||
from typing import Union, List
|
||||
|
||||
import tensorflow as tf
|
||||
from rl_coach.architectures.tensorflow_components.middlewares.middleware import Middleware, MiddlewareParameters
|
||||
from rl_coach.base_parameters import MiddlewareScheme
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import batchnorm_activation_dropout, Dense
|
||||
from rl_coach.architectures.tensorflow_components.middlewares.middleware import Middleware, MiddlewareParameters
|
||||
from rl_coach.base_parameters import MiddlewareScheme
|
||||
from rl_coach.core_types import Middleware_FC_Embedding
|
||||
|
||||
|
||||
|
||||
@@ -17,10 +17,10 @@
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from rl_coach.architectures.tensorflow_components.middlewares.middleware import Middleware, MiddlewareParameters
|
||||
from rl_coach.base_parameters import MiddlewareScheme
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import batchnorm_activation_dropout
|
||||
from rl_coach.architectures.tensorflow_components.middlewares.middleware import Middleware, MiddlewareParameters
|
||||
from rl_coach.base_parameters import MiddlewareScheme
|
||||
from rl_coach.core_types import Middleware_LSTM_Embedding
|
||||
|
||||
|
||||
|
||||
@@ -16,8 +16,8 @@
|
||||
from typing import Type, Union, List
|
||||
|
||||
import tensorflow as tf
|
||||
from rl_coach.base_parameters import MiddlewareScheme, Parameters
|
||||
|
||||
from rl_coach.base_parameters import MiddlewareScheme, Parameters
|
||||
from rl_coach.core_types import MiddlewareEmbedding
|
||||
|
||||
|
||||
|
||||
@@ -15,12 +15,12 @@
|
||||
#
|
||||
|
||||
|
||||
import copy
|
||||
from enum import Enum
|
||||
from typing import List, Union, Dict, Any, Type
|
||||
from random import shuffle
|
||||
from typing import List, Union, Dict, Any, Type
|
||||
|
||||
import numpy as np
|
||||
import copy
|
||||
|
||||
ActionType = Union[int, float, np.ndarray, List]
|
||||
GoalType = Union[None, np.ndarray]
|
||||
|
||||
Binary file not shown.
Binary file not shown.
@@ -18,6 +18,7 @@ import math
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.filters.observation.observation_stacking_filter import LazyStack
|
||||
|
||||
|
||||
|
||||
@@ -1,10 +1,25 @@
|
||||
#
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import random
|
||||
import sys
|
||||
from os import path, environ
|
||||
|
||||
from rl_coach.filters.observation.observation_to_uint8_filter import ObservationToUInt8Filter
|
||||
|
||||
from rl_coach.filters.observation.observation_rgb_to_y_filter import ObservationRGBToYFilter
|
||||
from rl_coach.filters.observation.observation_to_uint8_filter import ObservationToUInt8Filter
|
||||
|
||||
try:
|
||||
if 'CARLA_ROOT' in environ:
|
||||
|
||||
@@ -26,18 +26,18 @@ from os import path, environ
|
||||
from typing import Union, List
|
||||
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.base_parameters import VisualizationParameters
|
||||
from rl_coach.environments.environment import Environment, EnvironmentParameters, LevelSelection
|
||||
from rl_coach.filters.action.full_discrete_action_space_map import FullDiscreteActionSpaceMap
|
||||
from rl_coach.filters.filter import InputFilter, OutputFilter
|
||||
from rl_coach.filters.observation.observation_rescale_to_size_filter import ObservationRescaleToSizeFilter
|
||||
from rl_coach.filters.observation.observation_rgb_to_y_filter import ObservationRGBToYFilter
|
||||
from rl_coach.filters.observation.observation_stacking_filter import ObservationStackingFilter
|
||||
from rl_coach.filters.observation.observation_to_uint8_filter import ObservationToUInt8Filter
|
||||
from rl_coach.spaces import MultiSelectActionSpace, ImageObservationSpace, \
|
||||
VectorObservationSpace, StateSpace
|
||||
|
||||
from rl_coach.filters.observation.observation_rgb_to_y_filter import ObservationRGBToYFilter
|
||||
|
||||
|
||||
# enum of the available levels and their path
|
||||
class DoomLevel(Enum):
|
||||
|
||||
@@ -20,17 +20,17 @@ from collections import OrderedDict
|
||||
from typing import Union, List, Tuple, Dict
|
||||
|
||||
import numpy as np
|
||||
|
||||
from rl_coach import logger
|
||||
from rl_coach.base_parameters import Parameters
|
||||
from rl_coach.base_parameters import VisualizationParameters
|
||||
from rl_coach.core_types import GoalType, ActionType, EnvResponse, RunPhase
|
||||
from rl_coach.environments.environment_interface import EnvironmentInterface
|
||||
from rl_coach.logger import screen
|
||||
from rl_coach.renderer import Renderer
|
||||
from rl_coach.spaces import ActionSpace, ObservationSpace, DiscreteActionSpace, RewardSpace, StateSpace
|
||||
from rl_coach.utils import squeeze_list, force_list
|
||||
|
||||
from rl_coach import logger
|
||||
from rl_coach.environments.environment_interface import EnvironmentInterface
|
||||
from rl_coach.logger import screen
|
||||
|
||||
|
||||
class LevelSelection(object):
|
||||
def __init__(self, level: str):
|
||||
|
||||
@@ -16,9 +16,8 @@
|
||||
|
||||
from typing import Union, Dict
|
||||
|
||||
from rl_coach.spaces import ActionSpace
|
||||
|
||||
from rl_coach.core_types import ActionType, EnvResponse, RunPhase
|
||||
from rl_coach.spaces import ActionSpace
|
||||
|
||||
|
||||
class EnvironmentInterface(object):
|
||||
|
||||
@@ -1,9 +1,25 @@
|
||||
import numpy as np
|
||||
import gym
|
||||
#
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import os
|
||||
|
||||
import gym
|
||||
import numpy as np
|
||||
from gym import spaces
|
||||
from gym.envs.registration import EnvSpec
|
||||
|
||||
from mujoco_py import load_model_from_path, MjSim, MjViewer, MjRenderContextOffscreen
|
||||
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@ from enum import Enum
|
||||
from typing import Union, List
|
||||
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.filters.observation.observation_move_axis_filter import ObservationMoveAxisFilter
|
||||
|
||||
try:
|
||||
|
||||
@@ -1,8 +1,25 @@
|
||||
import numpy as np
|
||||
import gym
|
||||
from gym import spaces
|
||||
#
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import random
|
||||
|
||||
import gym
|
||||
import numpy as np
|
||||
from gym import spaces
|
||||
|
||||
|
||||
class BitFlip(gym.Env):
|
||||
metadata = {
|
||||
|
||||
@@ -1,8 +1,25 @@
|
||||
import numpy as np
|
||||
import gym
|
||||
from gym import spaces
|
||||
#
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from enum import Enum
|
||||
|
||||
import gym
|
||||
import numpy as np
|
||||
from gym import spaces
|
||||
|
||||
|
||||
class ExplorationChain(gym.Env):
|
||||
metadata = {
|
||||
|
||||
@@ -17,11 +17,11 @@
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
from rl_coach.schedules import Schedule, LinearSchedule
|
||||
from rl_coach.spaces import ActionSpace, BoxActionSpace
|
||||
|
||||
from rl_coach.core_types import RunPhase, ActionType
|
||||
from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy, ExplorationParameters
|
||||
from rl_coach.schedules import Schedule, LinearSchedule
|
||||
from rl_coach.spaces import ActionSpace, BoxActionSpace
|
||||
|
||||
|
||||
# TODO: consider renaming to gaussian sampling
|
||||
|
||||
@@ -17,11 +17,11 @@
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
from rl_coach.schedules import Schedule
|
||||
from rl_coach.spaces import ActionSpace
|
||||
|
||||
from rl_coach.core_types import RunPhase, ActionType
|
||||
from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy, ExplorationParameters
|
||||
from rl_coach.schedules import Schedule
|
||||
from rl_coach.spaces import ActionSpace
|
||||
|
||||
|
||||
class BoltzmannParameters(ExplorationParameters):
|
||||
|
||||
@@ -17,13 +17,13 @@
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters
|
||||
from rl_coach.exploration_policies.e_greedy import EGreedy, EGreedyParameters
|
||||
from rl_coach.schedules import Schedule, LinearSchedule
|
||||
from rl_coach.spaces import ActionSpace
|
||||
|
||||
from rl_coach.core_types import RunPhase, ActionType
|
||||
from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters
|
||||
from rl_coach.exploration_policies.e_greedy import EGreedy, EGreedyParameters
|
||||
from rl_coach.exploration_policies.exploration_policy import ExplorationParameters
|
||||
from rl_coach.schedules import Schedule, LinearSchedule
|
||||
from rl_coach.spaces import ActionSpace
|
||||
|
||||
|
||||
class BootstrappedParameters(EGreedyParameters):
|
||||
|
||||
@@ -17,10 +17,10 @@
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
from rl_coach.spaces import ActionSpace
|
||||
|
||||
from rl_coach.core_types import RunPhase, ActionType
|
||||
from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy, ExplorationParameters
|
||||
from rl_coach.spaces import ActionSpace
|
||||
|
||||
|
||||
class CategoricalParameters(ExplorationParameters):
|
||||
|
||||
@@ -17,15 +17,15 @@
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.core_types import RunPhase, ActionType
|
||||
from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters
|
||||
from rl_coach.exploration_policies.exploration_policy import ExplorationParameters
|
||||
from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy
|
||||
from rl_coach.schedules import Schedule, LinearSchedule
|
||||
from rl_coach.spaces import ActionSpace, DiscreteActionSpace, BoxActionSpace
|
||||
from rl_coach.utils import dynamic_import_and_instantiate_module_from_params
|
||||
|
||||
from rl_coach.core_types import RunPhase, ActionType
|
||||
from rl_coach.exploration_policies.exploration_policy import ExplorationParameters
|
||||
from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy
|
||||
|
||||
|
||||
class EGreedyParameters(ExplorationParameters):
|
||||
def __init__(self):
|
||||
|
||||
@@ -17,9 +17,8 @@
|
||||
from typing import List
|
||||
|
||||
from rl_coach.base_parameters import Parameters
|
||||
from rl_coach.spaces import ActionSpace
|
||||
|
||||
from rl_coach.core_types import RunPhase, ActionType
|
||||
from rl_coach.spaces import ActionSpace
|
||||
|
||||
|
||||
class ExplorationParameters(Parameters):
|
||||
|
||||
@@ -17,10 +17,10 @@
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
from rl_coach.spaces import ActionSpace, DiscreteActionSpace, BoxActionSpace
|
||||
|
||||
from rl_coach.core_types import ActionType
|
||||
from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy, ExplorationParameters
|
||||
from rl_coach.spaces import ActionSpace, DiscreteActionSpace, BoxActionSpace
|
||||
|
||||
|
||||
class GreedyParameters(ExplorationParameters):
|
||||
|
||||
@@ -17,10 +17,10 @@
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
from rl_coach.spaces import ActionSpace, BoxActionSpace, GoalsSpace
|
||||
|
||||
from rl_coach.core_types import RunPhase, ActionType
|
||||
from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy, ExplorationParameters
|
||||
from rl_coach.spaces import ActionSpace, BoxActionSpace, GoalsSpace
|
||||
|
||||
|
||||
# Based on on the description in:
|
||||
|
||||
@@ -17,12 +17,12 @@
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
from rl_coach.schedules import Schedule, LinearSchedule
|
||||
from scipy.stats import truncnorm
|
||||
from rl_coach.spaces import ActionSpace, BoxActionSpace
|
||||
|
||||
from rl_coach.core_types import RunPhase, ActionType
|
||||
from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy, ExplorationParameters
|
||||
from rl_coach.schedules import Schedule, LinearSchedule
|
||||
from rl_coach.spaces import ActionSpace, BoxActionSpace
|
||||
|
||||
|
||||
class TruncatedNormalParameters(ExplorationParameters):
|
||||
|
||||
@@ -17,13 +17,13 @@
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters
|
||||
from rl_coach.exploration_policies.e_greedy import EGreedy, EGreedyParameters
|
||||
from rl_coach.schedules import Schedule, LinearSchedule, PieceWiseSchedule
|
||||
from rl_coach.spaces import ActionSpace
|
||||
|
||||
from rl_coach.core_types import RunPhase, ActionType, EnvironmentSteps
|
||||
from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters
|
||||
from rl_coach.exploration_policies.e_greedy import EGreedy, EGreedyParameters
|
||||
from rl_coach.exploration_policies.exploration_policy import ExplorationParameters
|
||||
from rl_coach.schedules import Schedule, LinearSchedule, PieceWiseSchedule
|
||||
from rl_coach.spaces import ActionSpace
|
||||
|
||||
|
||||
class UCBParameters(EGreedyParameters):
|
||||
|
||||
@@ -14,10 +14,9 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from rl_coach.spaces import ActionSpace
|
||||
|
||||
from rl_coach.core_types import ActionType
|
||||
from rl_coach.filters.filter import Filter
|
||||
from rl_coach.spaces import ActionSpace
|
||||
|
||||
|
||||
class ActionFilter(Filter):
|
||||
|
||||
@@ -17,8 +17,8 @@
|
||||
from typing import Union, List
|
||||
|
||||
import numpy as np
|
||||
from rl_coach.filters.action.box_discretization import BoxDiscretization
|
||||
|
||||
from rl_coach.filters.action.box_discretization import BoxDiscretization
|
||||
from rl_coach.filters.action.partial_discrete_action_space_map import PartialDiscreteActionSpaceMap
|
||||
from rl_coach.spaces import AttentionActionSpace, BoxActionSpace, DiscreteActionSpace
|
||||
|
||||
|
||||
@@ -17,10 +17,10 @@
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
from rl_coach.spaces import BoxActionSpace
|
||||
|
||||
from rl_coach.core_types import ActionType
|
||||
from rl_coach.filters.action.action_filter import ActionFilter
|
||||
from rl_coach.spaces import BoxActionSpace
|
||||
|
||||
|
||||
class BoxMasking(ActionFilter):
|
||||
|
||||
@@ -17,10 +17,10 @@
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
from rl_coach.spaces import BoxActionSpace
|
||||
|
||||
from rl_coach.core_types import ActionType
|
||||
from rl_coach.filters.action.action_filter import ActionFilter
|
||||
from rl_coach.spaces import BoxActionSpace
|
||||
|
||||
|
||||
class LinearBoxToBoxMap(ActionFilter):
|
||||
|
||||
@@ -16,10 +16,9 @@
|
||||
|
||||
from typing import List
|
||||
|
||||
from rl_coach.spaces import DiscreteActionSpace, ActionSpace
|
||||
|
||||
from rl_coach.core_types import ActionType
|
||||
from rl_coach.filters.action.action_filter import ActionFilter
|
||||
from rl_coach.spaces import DiscreteActionSpace, ActionSpace
|
||||
|
||||
|
||||
class PartialDiscreteActionSpaceMap(ActionFilter):
|
||||
|
||||
@@ -19,8 +19,8 @@ from collections import OrderedDict
|
||||
from copy import deepcopy
|
||||
from typing import Dict, Union, List
|
||||
|
||||
from rl_coach.spaces import ActionSpace, RewardSpace, ObservationSpace
|
||||
from rl_coach.core_types import EnvResponse, ActionInfo, Transition
|
||||
from rl_coach.spaces import ActionSpace, RewardSpace, ObservationSpace
|
||||
from rl_coach.utils import force_list
|
||||
|
||||
|
||||
|
||||
@@ -16,10 +16,10 @@
|
||||
|
||||
|
||||
import numpy as np
|
||||
from rl_coach.spaces import ObservationSpace
|
||||
|
||||
from rl_coach.core_types import ObservationType
|
||||
from rl_coach.filters.observation.observation_filter import ObservationFilter
|
||||
from rl_coach.spaces import ObservationSpace
|
||||
|
||||
|
||||
class ObservationClippingFilter(ObservationFilter):
|
||||
|
||||
@@ -16,10 +16,10 @@
|
||||
from typing import Union, Tuple
|
||||
|
||||
import numpy as np
|
||||
from rl_coach.spaces import ObservationSpace
|
||||
|
||||
from rl_coach.core_types import ObservationType
|
||||
from rl_coach.filters.observation.observation_filter import ObservationFilter
|
||||
from rl_coach.spaces import ObservationSpace
|
||||
|
||||
|
||||
class ObservationCropFilter(ObservationFilter):
|
||||
|
||||
@@ -15,10 +15,10 @@
|
||||
#
|
||||
|
||||
import numpy as np
|
||||
from rl_coach.spaces import ObservationSpace, PlanarMapsObservationSpace
|
||||
|
||||
from rl_coach.core_types import ObservationType
|
||||
from rl_coach.filters.observation.observation_filter import ObservationFilter
|
||||
from rl_coach.spaces import ObservationSpace, PlanarMapsObservationSpace
|
||||
|
||||
|
||||
class ObservationMoveAxisFilter(ObservationFilter):
|
||||
|
||||
@@ -16,11 +16,11 @@
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
from rl_coach.spaces import ObservationSpace
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.shared_variables import SharedRunningStats
|
||||
from rl_coach.core_types import ObservationType
|
||||
from rl_coach.filters.observation.observation_filter import ObservationFilter
|
||||
from rl_coach.spaces import ObservationSpace
|
||||
|
||||
|
||||
class ObservationNormalizationFilter(ObservationFilter):
|
||||
|
||||
@@ -17,10 +17,9 @@ import copy
|
||||
from enum import Enum
|
||||
from typing import List
|
||||
|
||||
from rl_coach.spaces import ObservationSpace, VectorObservationSpace
|
||||
|
||||
from rl_coach.core_types import ObservationType
|
||||
from rl_coach.filters.observation.observation_filter import ObservationFilter
|
||||
from rl_coach.spaces import ObservationSpace, VectorObservationSpace
|
||||
|
||||
|
||||
class ObservationReductionBySubPartsNameFilter(ObservationFilter):
|
||||
|
||||
@@ -17,10 +17,10 @@
|
||||
from enum import Enum
|
||||
|
||||
import scipy.ndimage
|
||||
from rl_coach.spaces import ObservationSpace
|
||||
|
||||
from rl_coach.core_types import ObservationType
|
||||
from rl_coach.filters.observation.observation_filter import ObservationFilter
|
||||
from rl_coach.spaces import ObservationSpace
|
||||
|
||||
|
||||
# imresize interpolation types as defined by scipy here:
|
||||
|
||||
@@ -19,10 +19,10 @@ from enum import Enum
|
||||
|
||||
import numpy as np
|
||||
import scipy.ndimage
|
||||
from rl_coach.spaces import ObservationSpace, PlanarMapsObservationSpace, ImageObservationSpace
|
||||
|
||||
from rl_coach.core_types import ObservationType
|
||||
from rl_coach.filters.observation.observation_filter import ObservationFilter
|
||||
from rl_coach.spaces import ObservationSpace, PlanarMapsObservationSpace, ImageObservationSpace
|
||||
|
||||
|
||||
# imresize interpolation types as defined by scipy here:
|
||||
|
||||
@@ -14,10 +14,9 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from rl_coach.spaces import ObservationSpace
|
||||
|
||||
from rl_coach.core_types import ObservationType
|
||||
from rl_coach.filters.observation.observation_filter import ObservationFilter
|
||||
from rl_coach.spaces import ObservationSpace
|
||||
|
||||
|
||||
class ObservationRGBToYFilter(ObservationFilter):
|
||||
|
||||
@@ -15,10 +15,10 @@
|
||||
#
|
||||
|
||||
import numpy as np
|
||||
from rl_coach.spaces import ObservationSpace
|
||||
|
||||
from rl_coach.core_types import ObservationType
|
||||
from rl_coach.filters.observation.observation_filter import ObservationFilter
|
||||
from rl_coach.spaces import ObservationSpace
|
||||
|
||||
|
||||
class ObservationSqueezeFilter(ObservationFilter):
|
||||
|
||||
@@ -18,10 +18,10 @@ import copy
|
||||
from collections import deque
|
||||
|
||||
import numpy as np
|
||||
from rl_coach.spaces import ObservationSpace
|
||||
|
||||
from rl_coach.core_types import ObservationType
|
||||
from rl_coach.filters.observation.observation_filter import ObservationFilter
|
||||
from rl_coach.spaces import ObservationSpace
|
||||
|
||||
|
||||
class LazyStack(object):
|
||||
|
||||
@@ -15,10 +15,10 @@
|
||||
#
|
||||
|
||||
import numpy as np
|
||||
from rl_coach.spaces import ObservationSpace
|
||||
|
||||
from rl_coach.core_types import ObservationType
|
||||
from rl_coach.filters.observation.observation_filter import ObservationFilter
|
||||
from rl_coach.spaces import ObservationSpace
|
||||
|
||||
|
||||
class ObservationToUInt8Filter(ObservationFilter):
|
||||
|
||||
@@ -15,10 +15,10 @@
|
||||
#
|
||||
|
||||
import numpy as np
|
||||
from rl_coach.spaces import RewardSpace
|
||||
|
||||
from rl_coach.core_types import RewardType
|
||||
from rl_coach.filters.reward.reward_filter import RewardFilter
|
||||
from rl_coach.spaces import RewardSpace
|
||||
|
||||
|
||||
class RewardClippingFilter(RewardFilter):
|
||||
|
||||
@@ -16,11 +16,11 @@
|
||||
|
||||
|
||||
import numpy as np
|
||||
from rl_coach.spaces import RewardSpace
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.shared_variables import SharedRunningStats
|
||||
from rl_coach.core_types import RewardType
|
||||
from rl_coach.filters.reward.reward_filter import RewardFilter
|
||||
from rl_coach.spaces import RewardSpace
|
||||
|
||||
|
||||
class RewardNormalizationFilter(RewardFilter):
|
||||
|
||||
@@ -14,10 +14,9 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from rl_coach.spaces import RewardSpace
|
||||
|
||||
from rl_coach.core_types import RewardType
|
||||
from rl_coach.filters.reward.reward_filter import RewardFilter
|
||||
from rl_coach.spaces import RewardSpace
|
||||
|
||||
|
||||
class RewardRescaleFilter(RewardFilter):
|
||||
|
||||
@@ -15,11 +15,11 @@
|
||||
#
|
||||
from typing import Tuple, List
|
||||
|
||||
from rl_coach.base_parameters import AgentParameters, VisualizationParameters, TaskParameters, PresetValidationParameters
|
||||
from rl_coach.base_parameters import AgentParameters, VisualizationParameters, TaskParameters, \
|
||||
PresetValidationParameters
|
||||
from rl_coach.environments.environment import EnvironmentParameters, Environment
|
||||
from rl_coach.level_manager import LevelManager
|
||||
|
||||
from rl_coach.graph_managers.graph_manager import GraphManager, ScheduleParameters
|
||||
from rl_coach.level_manager import LevelManager
|
||||
from rl_coach.utils import short_dynamic_import
|
||||
|
||||
|
||||
|
||||
@@ -18,20 +18,19 @@ import copy
|
||||
import os
|
||||
import time
|
||||
from collections import OrderedDict
|
||||
from typing import List, Tuple
|
||||
from distutils.dir_util import copy_tree, remove_tree
|
||||
from typing import List, Tuple
|
||||
|
||||
import numpy as np
|
||||
from rl_coach.base_parameters import iterable_to_items, TaskParameters, DistributedTaskParameters, VisualizationParameters, \
|
||||
from rl_coach.base_parameters import iterable_to_items, TaskParameters, DistributedTaskParameters, \
|
||||
VisualizationParameters, \
|
||||
Parameters, PresetValidationParameters
|
||||
from rl_coach.core_types import TotalStepsCounter, RunPhase, PlayingStepsType, TrainingSteps, EnvironmentEpisodes, \
|
||||
EnvironmentSteps, \
|
||||
StepMethod
|
||||
from rl_coach.environments.environment import Environment
|
||||
from rl_coach.level_manager import LevelManager
|
||||
from rl_coach.utils import set_cpu
|
||||
|
||||
from rl_coach.logger import screen, Logger
|
||||
from rl_coach.utils import set_cpu
|
||||
|
||||
|
||||
class ScheduleParameters(Parameters):
|
||||
|
||||
@@ -13,17 +13,16 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import numpy as np
|
||||
from typing import List, Union, Tuple
|
||||
|
||||
from rl_coach.base_parameters import AgentParameters, VisualizationParameters, TaskParameters, PresetValidationParameters
|
||||
from rl_coach.base_parameters import AgentParameters, VisualizationParameters, TaskParameters, \
|
||||
PresetValidationParameters
|
||||
from rl_coach.core_types import EnvironmentSteps
|
||||
from rl_coach.environments.environment import EnvironmentParameters, Environment
|
||||
from rl_coach.graph_managers.graph_manager import GraphManager, ScheduleParameters
|
||||
from rl_coach.level_manager import LevelManager
|
||||
from rl_coach.utils import short_dynamic_import
|
||||
|
||||
from rl_coach.core_types import EnvironmentSteps
|
||||
from rl_coach.graph_managers.graph_manager import GraphManager, ScheduleParameters
|
||||
|
||||
|
||||
class HACGraphManager(GraphManager):
|
||||
"""
|
||||
|
||||
@@ -16,14 +16,14 @@
|
||||
|
||||
from typing import List, Union, Tuple
|
||||
|
||||
from rl_coach.base_parameters import AgentParameters, VisualizationParameters, TaskParameters, PresetValidationParameters
|
||||
from rl_coach.base_parameters import AgentParameters, VisualizationParameters, TaskParameters, \
|
||||
PresetValidationParameters
|
||||
from rl_coach.core_types import EnvironmentSteps
|
||||
from rl_coach.environments.environment import EnvironmentParameters, Environment
|
||||
from rl_coach.graph_managers.graph_manager import GraphManager, ScheduleParameters
|
||||
from rl_coach.level_manager import LevelManager
|
||||
from rl_coach.utils import short_dynamic_import
|
||||
|
||||
from rl_coach.core_types import EnvironmentSteps
|
||||
from rl_coach.graph_managers.graph_manager import GraphManager, ScheduleParameters
|
||||
|
||||
|
||||
class HRLGraphManager(GraphManager):
|
||||
"""
|
||||
|
||||
@@ -14,14 +14,13 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
import copy
|
||||
from typing import Union, Dict, Tuple, Type
|
||||
|
||||
from rl_coach.environments.environment import Environment
|
||||
from rl_coach.environments.environment_interface import EnvironmentInterface
|
||||
from rl_coach.spaces import ActionSpace, SpacesDefinition
|
||||
from typing import Union, Dict
|
||||
|
||||
from rl_coach.agents.composite_agent import CompositeAgent
|
||||
from rl_coach.core_types import EnvResponse, ActionInfo, RunPhase, ActionType, EnvironmentSteps
|
||||
from rl_coach.environments.environment import Environment
|
||||
from rl_coach.environments.environment_interface import EnvironmentInterface
|
||||
from rl_coach.spaces import ActionSpace, SpacesDefinition
|
||||
|
||||
|
||||
class LevelManager(EnvironmentInterface):
|
||||
|
||||
@@ -17,10 +17,10 @@
|
||||
from typing import List, Tuple, Union, Dict, Any
|
||||
|
||||
import numpy as np
|
||||
from rl_coach.utils import ReaderWriterLock
|
||||
|
||||
from rl_coach.core_types import Transition, Episode
|
||||
from rl_coach.memories.memory import Memory, MemoryGranularity, MemoryParameters
|
||||
from rl_coach.utils import ReaderWriterLock
|
||||
|
||||
|
||||
class EpisodicExperienceReplayParameters(MemoryParameters):
|
||||
|
||||
@@ -21,7 +21,8 @@ from typing import Tuple, List
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.core_types import Episode, Transition
|
||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters, EpisodicExperienceReplay
|
||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters, \
|
||||
EpisodicExperienceReplay
|
||||
from rl_coach.memories.non_episodic.experience_replay import MemoryGranularity
|
||||
from rl_coach.spaces import GoalsSpace
|
||||
|
||||
|
||||
@@ -14,9 +14,8 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from rl_coach.memories.memory import MemoryGranularity, MemoryParameters
|
||||
|
||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplay
|
||||
from rl_coach.memories.memory import MemoryGranularity, MemoryParameters
|
||||
|
||||
|
||||
class SingleEpisodeBufferParameters(MemoryParameters):
|
||||
|
||||
@@ -17,10 +17,10 @@
|
||||
from typing import List, Tuple, Union, Dict, Any
|
||||
|
||||
import numpy as np
|
||||
from rl_coach.utils import ReaderWriterLock
|
||||
|
||||
from rl_coach.core_types import Transition
|
||||
from rl_coach.memories.memory import Memory, MemoryGranularity, MemoryParameters
|
||||
from rl_coach.utils import ReaderWriterLock
|
||||
|
||||
|
||||
class ExperienceReplayParameters(MemoryParameters):
|
||||
|
||||
@@ -20,11 +20,11 @@ from enum import Enum
|
||||
from typing import List, Tuple, Any
|
||||
|
||||
import numpy as np
|
||||
from rl_coach.memories.memory import MemoryGranularity
|
||||
from rl_coach.schedules import Schedule, ConstantSchedule
|
||||
|
||||
from rl_coach.core_types import Transition
|
||||
from rl_coach.memories.memory import MemoryGranularity
|
||||
from rl_coach.memories.non_episodic.experience_replay import ExperienceReplayParameters, ExperienceReplay
|
||||
from rl_coach.schedules import Schedule, ConstantSchedule
|
||||
|
||||
|
||||
class PrioritizedExperienceReplayParameters(ExperienceReplayParameters):
|
||||
|
||||
@@ -1,14 +1,13 @@
|
||||
from rl_coach.agents.actor_critic_agent import ActorCriticAgentParameters
|
||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
||||
from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters
|
||||
from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase
|
||||
from rl_coach.environments.environment import SingleLevelSelection, SelectedPhaseOnlyDumpMethod, MaxDumpMethod
|
||||
from rl_coach.environments.gym_environment import Atari, atari_deterministic_v4
|
||||
from rl_coach.exploration_policies.categorical import CategoricalParameters
|
||||
from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
|
||||
from rl_coach.graph_managers.graph_manager import ScheduleParameters
|
||||
|
||||
from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase
|
||||
from rl_coach.exploration_policies.categorical import CategoricalParameters
|
||||
|
||||
####################
|
||||
# Graph Scheduling #
|
||||
####################
|
||||
|
||||
@@ -1,14 +1,13 @@
|
||||
from rl_coach.agents.actor_critic_agent import ActorCriticAgentParameters
|
||||
from rl_coach.architectures.tensorflow_components.middlewares.lstm_middleware import LSTMMiddlewareParameters
|
||||
from rl_coach.base_parameters import VisualizationParameters, MiddlewareScheme, PresetValidationParameters
|
||||
from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase
|
||||
from rl_coach.environments.environment import SingleLevelSelection, SelectedPhaseOnlyDumpMethod, MaxDumpMethod
|
||||
from rl_coach.environments.gym_environment import Atari, atari_deterministic_v4, AtariInputFilter
|
||||
from rl_coach.exploration_policies.categorical import CategoricalParameters
|
||||
from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
|
||||
from rl_coach.graph_managers.graph_manager import ScheduleParameters
|
||||
|
||||
from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase
|
||||
from rl_coach.exploration_policies.categorical import CategoricalParameters
|
||||
|
||||
####################
|
||||
# Graph Scheduling #
|
||||
####################
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user