mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 11:10:20 +01:00
removing datasets + imports optimization
This commit is contained in:
@@ -18,18 +18,17 @@ from typing import Union
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import scipy.signal
|
import scipy.signal
|
||||||
|
|
||||||
from rl_coach.agents.policy_optimization_agent import PolicyOptimizationAgent, PolicyGradientRescaler
|
from rl_coach.agents.policy_optimization_agent import PolicyOptimizationAgent, PolicyGradientRescaler
|
||||||
from rl_coach.architectures.tensorflow_components.heads.policy_head import PolicyHeadParameters
|
from rl_coach.architectures.tensorflow_components.heads.policy_head import PolicyHeadParameters
|
||||||
from rl_coach.architectures.tensorflow_components.heads.v_head import VHeadParameters
|
from rl_coach.architectures.tensorflow_components.heads.v_head import VHeadParameters
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
||||||
from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, \
|
from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, \
|
||||||
AgentParameters, InputEmbedderParameters
|
AgentParameters, InputEmbedderParameters
|
||||||
from rl_coach.core_types import QActionStateValue
|
|
||||||
from rl_coach.spaces import DiscreteActionSpace
|
|
||||||
from rl_coach.utils import last_sample
|
|
||||||
|
|
||||||
from rl_coach.logger import screen
|
from rl_coach.logger import screen
|
||||||
from rl_coach.memories.episodic.single_episode_buffer import SingleEpisodeBufferParameters
|
from rl_coach.memories.episodic.single_episode_buffer import SingleEpisodeBufferParameters
|
||||||
|
from rl_coach.spaces import DiscreteActionSpace
|
||||||
|
from rl_coach.utils import last_sample
|
||||||
|
|
||||||
|
|
||||||
class ActorCriticAlgorithmParameters(AlgorithmParameters):
|
class ActorCriticAlgorithmParameters(AlgorithmParameters):
|
||||||
|
|||||||
@@ -20,20 +20,19 @@ from collections import OrderedDict
|
|||||||
from typing import Dict, List, Union, Tuple
|
from typing import Dict, List, Union, Tuple
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from pandas import read_pickle
|
||||||
|
from six.moves import range
|
||||||
|
|
||||||
from rl_coach.agents.agent_interface import AgentInterface
|
from rl_coach.agents.agent_interface import AgentInterface
|
||||||
|
from rl_coach.architectures.network_wrapper import NetworkWrapper
|
||||||
from rl_coach.base_parameters import AgentParameters, DistributedTaskParameters
|
from rl_coach.base_parameters import AgentParameters, DistributedTaskParameters
|
||||||
from rl_coach.core_types import RunPhase, PredictionType, EnvironmentEpisodes, ActionType, Batch, Episode, StateType
|
from rl_coach.core_types import RunPhase, PredictionType, EnvironmentEpisodes, ActionType, Batch, Episode, StateType
|
||||||
from rl_coach.core_types import Transition, ActionInfo, TrainingSteps, EnvironmentSteps, EnvResponse
|
from rl_coach.core_types import Transition, ActionInfo, TrainingSteps, EnvironmentSteps, EnvResponse
|
||||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplay
|
|
||||||
from pandas import read_pickle
|
|
||||||
from six.moves import range
|
|
||||||
from rl_coach.spaces import SpacesDefinition, VectorObservationSpace, GoalsSpace, AttentionActionSpace
|
|
||||||
from rl_coach.utils import Signal, force_list, set_cpu
|
|
||||||
from rl_coach.utils import dynamic_import_and_instantiate_module_from_params
|
|
||||||
|
|
||||||
from rl_coach.architectures.network_wrapper import NetworkWrapper
|
|
||||||
from rl_coach.logger import screen, Logger, EpisodeLogger
|
from rl_coach.logger import screen, Logger, EpisodeLogger
|
||||||
|
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplay
|
||||||
|
from rl_coach.spaces import SpacesDefinition, VectorObservationSpace, GoalsSpace, AttentionActionSpace
|
||||||
|
from rl_coach.utils import Signal, force_list
|
||||||
|
from rl_coach.utils import dynamic_import_and_instantiate_module_from_params
|
||||||
|
|
||||||
|
|
||||||
class Agent(AgentInterface):
|
class Agent(AgentInterface):
|
||||||
|
|||||||
@@ -17,14 +17,14 @@
|
|||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from rl_coach.agents.imitation_agent import ImitationAgent
|
from rl_coach.agents.imitation_agent import ImitationAgent
|
||||||
from rl_coach.architectures.tensorflow_components.heads.policy_head import PolicyHeadParameters
|
from rl_coach.architectures.tensorflow_components.heads.policy_head import PolicyHeadParameters
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
||||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
|
||||||
|
|
||||||
from rl_coach.base_parameters import AgentParameters, AlgorithmParameters, NetworkParameters, InputEmbedderParameters, \
|
from rl_coach.base_parameters import AgentParameters, AlgorithmParameters, NetworkParameters, InputEmbedderParameters, \
|
||||||
MiddlewareScheme
|
MiddlewareScheme
|
||||||
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
||||||
|
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
||||||
|
|
||||||
|
|
||||||
class BCAlgorithmParameters(AlgorithmParameters):
|
class BCAlgorithmParameters(AlgorithmParameters):
|
||||||
|
|||||||
@@ -17,9 +17,9 @@
|
|||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from rl_coach.agents.dqn_agent import DQNAgentParameters, DQNNetworkParameters
|
from rl_coach.agents.dqn_agent import DQNAgentParameters, DQNNetworkParameters
|
||||||
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
||||||
|
|
||||||
from rl_coach.exploration_policies.bootstrapped import BootstrappedParameters
|
from rl_coach.exploration_policies.bootstrapped import BootstrappedParameters
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -17,15 +17,15 @@
|
|||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from rl_coach.agents.dqn_agent import DQNNetworkParameters, DQNAlgorithmParameters
|
from rl_coach.agents.dqn_agent import DQNNetworkParameters, DQNAlgorithmParameters
|
||||||
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
||||||
from rl_coach.architectures.tensorflow_components.heads.categorical_q_head import CategoricalQHeadParameters
|
from rl_coach.architectures.tensorflow_components.heads.categorical_q_head import CategoricalQHeadParameters
|
||||||
from rl_coach.base_parameters import AgentParameters
|
from rl_coach.base_parameters import AgentParameters
|
||||||
from rl_coach.memories.non_episodic.experience_replay import ExperienceReplayParameters
|
|
||||||
from rl_coach.schedules import LinearSchedule
|
|
||||||
|
|
||||||
from rl_coach.core_types import StateType
|
from rl_coach.core_types import StateType
|
||||||
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
||||||
|
from rl_coach.memories.non_episodic.experience_replay import ExperienceReplayParameters
|
||||||
|
from rl_coach.schedules import LinearSchedule
|
||||||
|
|
||||||
|
|
||||||
class CategoricalDQNNetworkParameters(DQNNetworkParameters):
|
class CategoricalDQNNetworkParameters(DQNNetworkParameters):
|
||||||
|
|||||||
@@ -20,21 +20,21 @@ from random import shuffle
|
|||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from rl_coach.agents.actor_critic_agent import ActorCriticAgent
|
from rl_coach.agents.actor_critic_agent import ActorCriticAgent
|
||||||
from rl_coach.agents.policy_optimization_agent import PolicyGradientRescaler
|
from rl_coach.agents.policy_optimization_agent import PolicyGradientRescaler
|
||||||
|
from rl_coach.architectures.tensorflow_components.heads.ppo_head import PPOHeadParameters
|
||||||
from rl_coach.architectures.tensorflow_components.heads.v_head import VHeadParameters
|
from rl_coach.architectures.tensorflow_components.heads.v_head import VHeadParameters
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
||||||
from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, \
|
from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, \
|
||||||
AgentParameters, InputEmbedderParameters
|
AgentParameters, InputEmbedderParameters
|
||||||
from rl_coach.core_types import EnvironmentSteps, Batch, EnvResponse, StateType
|
from rl_coach.core_types import EnvironmentSteps, Batch, EnvResponse, StateType
|
||||||
from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters
|
from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters
|
||||||
|
from rl_coach.logger import screen
|
||||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
||||||
from rl_coach.schedules import ConstantSchedule
|
from rl_coach.schedules import ConstantSchedule
|
||||||
from rl_coach.spaces import DiscreteActionSpace
|
from rl_coach.spaces import DiscreteActionSpace
|
||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.heads.ppo_head import PPOHeadParameters
|
|
||||||
from rl_coach.logger import screen
|
|
||||||
|
|
||||||
|
|
||||||
class ClippedPPONetworkParameters(NetworkParameters):
|
class ClippedPPONetworkParameters(NetworkParameters):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
|||||||
@@ -20,15 +20,14 @@ from enum import Enum
|
|||||||
from typing import Union, List, Dict
|
from typing import Union, List, Dict
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from rl_coach.agents.agent_interface import AgentInterface
|
from rl_coach.agents.agent_interface import AgentInterface
|
||||||
from rl_coach.base_parameters import AgentParameters, VisualizationParameters
|
from rl_coach.base_parameters import AgentParameters, VisualizationParameters
|
||||||
# from rl_coach.environments.environment_interface import ActionSpace
|
|
||||||
from rl_coach.spaces import ActionSpace
|
|
||||||
from rl_coach.spaces import AgentSelection, AttentionActionSpace, ObservationSpace, SpacesDefinition
|
|
||||||
from rl_coach.utils import short_dynamic_import
|
|
||||||
|
|
||||||
from rl_coach.core_types import ActionInfo, EnvResponse, ActionType, RunPhase
|
from rl_coach.core_types import ActionInfo, EnvResponse, ActionType, RunPhase
|
||||||
from rl_coach.filters.observation.observation_crop_filter import ObservationCropFilter
|
from rl_coach.filters.observation.observation_crop_filter import ObservationCropFilter
|
||||||
|
from rl_coach.spaces import ActionSpace
|
||||||
|
from rl_coach.spaces import AgentSelection, AttentionActionSpace, SpacesDefinition
|
||||||
|
from rl_coach.utils import short_dynamic_import
|
||||||
|
|
||||||
|
|
||||||
class DecisionPolicy(object):
|
class DecisionPolicy(object):
|
||||||
|
|||||||
@@ -19,19 +19,19 @@ from typing import Union
|
|||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from rl_coach.agents.actor_critic_agent import ActorCriticAgent
|
from rl_coach.agents.actor_critic_agent import ActorCriticAgent
|
||||||
from rl_coach.agents.agent import Agent
|
from rl_coach.agents.agent import Agent
|
||||||
|
from rl_coach.architectures.tensorflow_components.heads.ddpg_actor_head import DDPGActorHeadParameters
|
||||||
from rl_coach.architectures.tensorflow_components.heads.v_head import VHeadParameters
|
from rl_coach.architectures.tensorflow_components.heads.v_head import VHeadParameters
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
||||||
from rl_coach.base_parameters import NetworkParameters, AlgorithmParameters, \
|
from rl_coach.base_parameters import NetworkParameters, AlgorithmParameters, \
|
||||||
AgentParameters, InputEmbedderParameters, EmbedderScheme
|
AgentParameters, InputEmbedderParameters, EmbedderScheme
|
||||||
|
from rl_coach.core_types import ActionInfo, EnvironmentSteps
|
||||||
from rl_coach.exploration_policies.ou_process import OUProcessParameters
|
from rl_coach.exploration_policies.ou_process import OUProcessParameters
|
||||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
||||||
from rl_coach.spaces import BoxActionSpace, GoalsSpace
|
from rl_coach.spaces import BoxActionSpace, GoalsSpace
|
||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.heads.ddpg_actor_head import DDPGActorHeadParameters
|
|
||||||
from rl_coach.core_types import ActionInfo, EnvironmentSteps
|
|
||||||
|
|
||||||
|
|
||||||
class DDPGCriticNetworkParameters(NetworkParameters):
|
class DDPGCriticNetworkParameters(NetworkParameters):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
|||||||
@@ -17,11 +17,11 @@
|
|||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from rl_coach.schedules import LinearSchedule
|
|
||||||
|
|
||||||
from rl_coach.agents.dqn_agent import DQNAgentParameters
|
from rl_coach.agents.dqn_agent import DQNAgentParameters
|
||||||
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
||||||
from rl_coach.core_types import EnvironmentSteps
|
from rl_coach.core_types import EnvironmentSteps
|
||||||
|
from rl_coach.schedules import LinearSchedule
|
||||||
|
|
||||||
|
|
||||||
class DDQNAgentParameters(DQNAgentParameters):
|
class DDQNAgentParameters(DQNAgentParameters):
|
||||||
|
|||||||
@@ -19,19 +19,20 @@ from enum import Enum
|
|||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from rl_coach.agents.agent import Agent
|
from rl_coach.agents.agent import Agent
|
||||||
from rl_coach.architectures.tensorflow_components.architecture import Conv2d, Dense
|
from rl_coach.architectures.tensorflow_components.architecture import Conv2d, Dense
|
||||||
from rl_coach.architectures.tensorflow_components.heads.measurements_prediction_head import MeasurementsPredictionHeadParameters
|
from rl_coach.architectures.tensorflow_components.heads.measurements_prediction_head import \
|
||||||
|
MeasurementsPredictionHeadParameters
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
||||||
from rl_coach.base_parameters import AlgorithmParameters, AgentParameters, NetworkParameters, \
|
from rl_coach.base_parameters import AlgorithmParameters, AgentParameters, NetworkParameters, \
|
||||||
InputEmbedderParameters, MiddlewareScheme
|
InputEmbedderParameters, MiddlewareScheme
|
||||||
|
from rl_coach.core_types import ActionInfo, EnvironmentSteps, RunPhase
|
||||||
|
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
||||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
||||||
from rl_coach.memories.memory import MemoryGranularity
|
from rl_coach.memories.memory import MemoryGranularity
|
||||||
from rl_coach.spaces import SpacesDefinition, VectorObservationSpace
|
from rl_coach.spaces import SpacesDefinition, VectorObservationSpace
|
||||||
|
|
||||||
from rl_coach.core_types import ActionInfo, EnvironmentSteps, RunPhase
|
|
||||||
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
|
||||||
|
|
||||||
|
|
||||||
class HandlingTargetsAfterEpisodeEnd(Enum):
|
class HandlingTargetsAfterEpisodeEnd(Enum):
|
||||||
LastStep = 0
|
LastStep = 0
|
||||||
|
|||||||
@@ -17,16 +17,16 @@
|
|||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
||||||
from rl_coach.architectures.tensorflow_components.heads.q_head import QHeadParameters
|
from rl_coach.architectures.tensorflow_components.heads.q_head import QHeadParameters
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
||||||
from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, AgentParameters, \
|
from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, AgentParameters, \
|
||||||
InputEmbedderParameters, MiddlewareScheme
|
InputEmbedderParameters, MiddlewareScheme
|
||||||
from rl_coach.memories.non_episodic.experience_replay import ExperienceReplayParameters
|
|
||||||
from rl_coach.schedules import LinearSchedule
|
|
||||||
|
|
||||||
from rl_coach.core_types import EnvironmentSteps
|
from rl_coach.core_types import EnvironmentSteps
|
||||||
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
||||||
|
from rl_coach.memories.non_episodic.experience_replay import ExperienceReplayParameters
|
||||||
|
from rl_coach.schedules import LinearSchedule
|
||||||
|
|
||||||
|
|
||||||
class DQNAlgorithmParameters(AlgorithmParameters):
|
class DQNAlgorithmParameters(AlgorithmParameters):
|
||||||
|
|||||||
@@ -17,7 +17,6 @@
|
|||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import copy
|
|
||||||
|
|
||||||
from rl_coach.agents.ddpg_agent import DDPGAgent, DDPGAgentParameters, DDPGAlgorithmParameters
|
from rl_coach.agents.ddpg_agent import DDPGAgent, DDPGAgentParameters, DDPGAlgorithmParameters
|
||||||
from rl_coach.core_types import RunPhase
|
from rl_coach.core_types import RunPhase
|
||||||
|
|||||||
@@ -19,6 +19,8 @@ from collections import OrderedDict
|
|||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
import pygame
|
import pygame
|
||||||
|
from pandas import to_pickle
|
||||||
|
|
||||||
from rl_coach.agents.agent import Agent
|
from rl_coach.agents.agent import Agent
|
||||||
from rl_coach.agents.bc_agent import BCNetworkParameters
|
from rl_coach.agents.bc_agent import BCNetworkParameters
|
||||||
from rl_coach.architectures.tensorflow_components.heads.policy_head import PolicyHeadParameters
|
from rl_coach.architectures.tensorflow_components.heads.policy_head import PolicyHeadParameters
|
||||||
@@ -26,11 +28,9 @@ from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware impo
|
|||||||
from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, InputEmbedderParameters, EmbedderScheme, \
|
from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, InputEmbedderParameters, EmbedderScheme, \
|
||||||
AgentParameters
|
AgentParameters
|
||||||
from rl_coach.core_types import ActionInfo
|
from rl_coach.core_types import ActionInfo
|
||||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
|
||||||
from pandas import to_pickle
|
|
||||||
|
|
||||||
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
||||||
from rl_coach.logger import screen
|
from rl_coach.logger import screen
|
||||||
|
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
||||||
|
|
||||||
|
|
||||||
class HumanAlgorithmParameters(AlgorithmParameters):
|
class HumanAlgorithmParameters(AlgorithmParameters):
|
||||||
|
|||||||
@@ -17,11 +17,10 @@
|
|||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
from rl_coach.core_types import RunPhase, ActionInfo
|
|
||||||
from rl_coach.spaces import DiscreteActionSpace
|
|
||||||
|
|
||||||
from rl_coach.agents.agent import Agent
|
from rl_coach.agents.agent import Agent
|
||||||
|
from rl_coach.core_types import RunPhase, ActionInfo
|
||||||
from rl_coach.logger import screen
|
from rl_coach.logger import screen
|
||||||
|
from rl_coach.spaces import DiscreteActionSpace
|
||||||
|
|
||||||
|
|
||||||
## This is an abstract agent - there is no learn_from_batch method ##
|
## This is an abstract agent - there is no learn_from_batch method ##
|
||||||
|
|||||||
@@ -17,17 +17,17 @@
|
|||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from rl_coach.agents.policy_optimization_agent import PolicyOptimizationAgent
|
from rl_coach.agents.policy_optimization_agent import PolicyOptimizationAgent
|
||||||
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
||||||
from rl_coach.architectures.tensorflow_components.heads.q_head import QHeadParameters
|
from rl_coach.architectures.tensorflow_components.heads.q_head import QHeadParameters
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
||||||
from rl_coach.base_parameters import AlgorithmParameters, AgentParameters, NetworkParameters, \
|
from rl_coach.base_parameters import AlgorithmParameters, AgentParameters, NetworkParameters, \
|
||||||
InputEmbedderParameters
|
InputEmbedderParameters
|
||||||
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
|
||||||
from rl_coach.utils import last_sample
|
|
||||||
|
|
||||||
from rl_coach.core_types import EnvironmentSteps
|
from rl_coach.core_types import EnvironmentSteps
|
||||||
|
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
||||||
from rl_coach.memories.episodic.single_episode_buffer import SingleEpisodeBufferParameters
|
from rl_coach.memories.episodic.single_episode_buffer import SingleEpisodeBufferParameters
|
||||||
|
from rl_coach.utils import last_sample
|
||||||
|
|
||||||
|
|
||||||
class NStepQNetworkParameters(NetworkParameters):
|
class NStepQNetworkParameters(NetworkParameters):
|
||||||
|
|||||||
@@ -17,16 +17,16 @@
|
|||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
||||||
from rl_coach.architectures.tensorflow_components.heads.naf_head import NAFHeadParameters
|
from rl_coach.architectures.tensorflow_components.heads.naf_head import NAFHeadParameters
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
||||||
from rl_coach.base_parameters import AlgorithmParameters, AgentParameters, \
|
from rl_coach.base_parameters import AlgorithmParameters, AgentParameters, \
|
||||||
NetworkParameters, InputEmbedderParameters
|
NetworkParameters, InputEmbedderParameters
|
||||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
|
||||||
from rl_coach.spaces import BoxActionSpace
|
|
||||||
|
|
||||||
from rl_coach.core_types import ActionInfo, EnvironmentSteps
|
from rl_coach.core_types import ActionInfo, EnvironmentSteps
|
||||||
from rl_coach.exploration_policies.ou_process import OUProcessParameters
|
from rl_coach.exploration_policies.ou_process import OUProcessParameters
|
||||||
|
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
||||||
|
from rl_coach.spaces import BoxActionSpace
|
||||||
|
|
||||||
|
|
||||||
class NAFNetworkParameters(NetworkParameters):
|
class NAFNetworkParameters(NetworkParameters):
|
||||||
|
|||||||
@@ -19,17 +19,17 @@ import pickle
|
|||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
||||||
from rl_coach.architectures.tensorflow_components.heads.dnd_q_head import DNDQHeadParameters
|
from rl_coach.architectures.tensorflow_components.heads.dnd_q_head import DNDQHeadParameters
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
||||||
from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, AgentParameters, \
|
from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, AgentParameters, \
|
||||||
InputEmbedderParameters
|
InputEmbedderParameters
|
||||||
from rl_coach.core_types import RunPhase, EnvironmentSteps, Episode, StateType
|
from rl_coach.core_types import RunPhase, EnvironmentSteps, Episode, StateType
|
||||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters, MemoryGranularity
|
|
||||||
from rl_coach.schedules import ConstantSchedule
|
|
||||||
|
|
||||||
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
||||||
from rl_coach.logger import screen
|
from rl_coach.logger import screen
|
||||||
|
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters, MemoryGranularity
|
||||||
|
from rl_coach.schedules import ConstantSchedule
|
||||||
|
|
||||||
|
|
||||||
class NECNetworkParameters(NetworkParameters):
|
class NECNetworkParameters(NetworkParameters):
|
||||||
|
|||||||
@@ -20,8 +20,7 @@ import numpy as np
|
|||||||
|
|
||||||
from rl_coach.agents.dqn_agent import DQNAgentParameters, DQNAlgorithmParameters
|
from rl_coach.agents.dqn_agent import DQNAgentParameters, DQNAlgorithmParameters
|
||||||
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
||||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplay, \
|
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
||||||
EpisodicExperienceReplayParameters
|
|
||||||
|
|
||||||
|
|
||||||
class PALAlgorithmParameters(DQNAlgorithmParameters):
|
class PALAlgorithmParameters(DQNAlgorithmParameters):
|
||||||
|
|||||||
@@ -17,16 +17,16 @@
|
|||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from rl_coach.agents.policy_optimization_agent import PolicyOptimizationAgent, PolicyGradientRescaler
|
from rl_coach.agents.policy_optimization_agent import PolicyOptimizationAgent, PolicyGradientRescaler
|
||||||
from rl_coach.architectures.tensorflow_components.heads.policy_head import PolicyHeadParameters
|
from rl_coach.architectures.tensorflow_components.heads.policy_head import PolicyHeadParameters
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
||||||
from rl_coach.base_parameters import NetworkParameters, AlgorithmParameters, \
|
from rl_coach.base_parameters import NetworkParameters, AlgorithmParameters, \
|
||||||
AgentParameters, InputEmbedderParameters
|
AgentParameters, InputEmbedderParameters
|
||||||
from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters
|
from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters
|
||||||
from rl_coach.spaces import DiscreteActionSpace
|
|
||||||
|
|
||||||
from rl_coach.logger import screen
|
from rl_coach.logger import screen
|
||||||
from rl_coach.memories.episodic.single_episode_buffer import SingleEpisodeBufferParameters
|
from rl_coach.memories.episodic.single_episode_buffer import SingleEpisodeBufferParameters
|
||||||
|
from rl_coach.spaces import DiscreteActionSpace
|
||||||
|
|
||||||
|
|
||||||
class PolicyGradientNetworkParameters(NetworkParameters):
|
class PolicyGradientNetworkParameters(NetworkParameters):
|
||||||
|
|||||||
@@ -19,12 +19,12 @@ from enum import Enum
|
|||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from rl_coach.core_types import Batch, ActionInfo
|
|
||||||
from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace
|
|
||||||
from rl_coach.utils import eps
|
|
||||||
|
|
||||||
from rl_coach.agents.agent import Agent
|
from rl_coach.agents.agent import Agent
|
||||||
|
from rl_coach.core_types import Batch, ActionInfo
|
||||||
from rl_coach.logger import screen
|
from rl_coach.logger import screen
|
||||||
|
from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace
|
||||||
|
from rl_coach.utils import eps
|
||||||
|
|
||||||
|
|
||||||
class PolicyGradientRescaler(Enum):
|
class PolicyGradientRescaler(Enum):
|
||||||
|
|||||||
@@ -19,21 +19,21 @@ from collections import OrderedDict
|
|||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from rl_coach.agents.actor_critic_agent import ActorCriticAgent
|
from rl_coach.agents.actor_critic_agent import ActorCriticAgent
|
||||||
from rl_coach.agents.policy_optimization_agent import PolicyGradientRescaler
|
from rl_coach.agents.policy_optimization_agent import PolicyGradientRescaler
|
||||||
|
from rl_coach.architectures.tensorflow_components.heads.ppo_head import PPOHeadParameters
|
||||||
from rl_coach.architectures.tensorflow_components.heads.v_head import VHeadParameters
|
from rl_coach.architectures.tensorflow_components.heads.v_head import VHeadParameters
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
||||||
from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, \
|
from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, \
|
||||||
AgentParameters, InputEmbedderParameters, DistributedTaskParameters
|
AgentParameters, InputEmbedderParameters, DistributedTaskParameters
|
||||||
from rl_coach.core_types import EnvironmentSteps, Batch
|
from rl_coach.core_types import EnvironmentSteps, Batch
|
||||||
from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters
|
from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters
|
||||||
|
from rl_coach.logger import screen
|
||||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
||||||
from rl_coach.spaces import DiscreteActionSpace
|
from rl_coach.spaces import DiscreteActionSpace
|
||||||
from rl_coach.utils import force_list
|
from rl_coach.utils import force_list
|
||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.heads.ppo_head import PPOHeadParameters
|
|
||||||
from rl_coach.logger import screen
|
|
||||||
|
|
||||||
|
|
||||||
class PPOCriticNetworkParameters(NetworkParameters):
|
class PPOCriticNetworkParameters(NetworkParameters):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
|||||||
@@ -17,12 +17,13 @@
|
|||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from rl_coach.architectures.tensorflow_components.heads.quantile_regression_q_head import QuantileRegressionQHeadParameters
|
|
||||||
from rl_coach.schedules import LinearSchedule
|
|
||||||
|
|
||||||
from rl_coach.agents.dqn_agent import DQNAgentParameters, DQNNetworkParameters, DQNAlgorithmParameters
|
from rl_coach.agents.dqn_agent import DQNAgentParameters, DQNNetworkParameters, DQNAlgorithmParameters
|
||||||
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
||||||
|
from rl_coach.architectures.tensorflow_components.heads.quantile_regression_q_head import \
|
||||||
|
QuantileRegressionQHeadParameters
|
||||||
from rl_coach.core_types import StateType
|
from rl_coach.core_types import StateType
|
||||||
|
from rl_coach.schedules import LinearSchedule
|
||||||
|
|
||||||
|
|
||||||
class QuantileRegressionDQNNetworkParameters(DQNNetworkParameters):
|
class QuantileRegressionDQNNetworkParameters(DQNNetworkParameters):
|
||||||
|
|||||||
@@ -17,11 +17,11 @@
|
|||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from rl_coach.memories.non_episodic.prioritized_experience_replay import PrioritizedExperienceReplay
|
|
||||||
from rl_coach.spaces import DiscreteActionSpace
|
|
||||||
|
|
||||||
from rl_coach.agents.agent import Agent
|
from rl_coach.agents.agent import Agent
|
||||||
from rl_coach.core_types import ActionInfo, StateType
|
from rl_coach.core_types import ActionInfo, StateType
|
||||||
|
from rl_coach.memories.non_episodic.prioritized_experience_replay import PrioritizedExperienceReplay
|
||||||
|
from rl_coach.spaces import DiscreteActionSpace
|
||||||
|
|
||||||
|
|
||||||
## This is an abstract agent - there is no learn_from_batch method ##
|
## This is an abstract agent - there is no learn_from_batch method ##
|
||||||
|
|||||||
@@ -17,9 +17,8 @@
|
|||||||
from typing import List, Tuple
|
from typing import List, Tuple
|
||||||
|
|
||||||
from rl_coach.base_parameters import Frameworks, AgentParameters
|
from rl_coach.base_parameters import Frameworks, AgentParameters
|
||||||
from rl_coach.spaces import SpacesDefinition
|
|
||||||
|
|
||||||
from rl_coach.logger import failed_imports
|
from rl_coach.logger import failed_imports
|
||||||
|
from rl_coach.spaces import SpacesDefinition
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|||||||
@@ -19,12 +19,12 @@ from typing import List
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from rl_coach.base_parameters import AgentParameters, DistributedTaskParameters
|
|
||||||
from rl_coach.spaces import SpacesDefinition
|
|
||||||
from rl_coach.utils import force_list, squeeze_list
|
|
||||||
|
|
||||||
from rl_coach.architectures.architecture import Architecture
|
from rl_coach.architectures.architecture import Architecture
|
||||||
|
from rl_coach.base_parameters import AgentParameters, DistributedTaskParameters
|
||||||
from rl_coach.core_types import GradientClippingMethod
|
from rl_coach.core_types import GradientClippingMethod
|
||||||
|
from rl_coach.spaces import SpacesDefinition
|
||||||
|
from rl_coach.utils import force_list, squeeze_list
|
||||||
|
|
||||||
|
|
||||||
def batchnorm_activation_dropout(input_layer, batchnorm, activation_function, dropout, dropout_rate, layer_idx):
|
def batchnorm_activation_dropout(input_layer, batchnorm, activation_function, dropout, dropout_rate, layer_idx):
|
||||||
|
|||||||
@@ -18,10 +18,9 @@ from typing import List, Union
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from rl_coach.architectures.tensorflow_components.shared_variables import SharedRunningStats
|
|
||||||
from rl_coach.base_parameters import EmbedderScheme
|
|
||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.architecture import batchnorm_activation_dropout
|
from rl_coach.architectures.tensorflow_components.architecture import batchnorm_activation_dropout
|
||||||
|
from rl_coach.base_parameters import EmbedderScheme
|
||||||
from rl_coach.core_types import InputEmbedding
|
from rl_coach.core_types import InputEmbedding
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -17,10 +17,10 @@
|
|||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from rl_coach.architectures.tensorflow_components.architecture import Conv2d
|
|
||||||
from rl_coach.base_parameters import EmbedderScheme
|
|
||||||
|
|
||||||
|
from rl_coach.architectures.tensorflow_components.architecture import Conv2d
|
||||||
from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedder
|
from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedder
|
||||||
|
from rl_coach.base_parameters import EmbedderScheme
|
||||||
from rl_coach.core_types import InputImageEmbedding
|
from rl_coach.core_types import InputImageEmbedding
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -17,10 +17,10 @@
|
|||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from rl_coach.architectures.tensorflow_components.architecture import Dense
|
|
||||||
from rl_coach.base_parameters import EmbedderScheme
|
|
||||||
|
|
||||||
|
from rl_coach.architectures.tensorflow_components.architecture import Dense
|
||||||
from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedder
|
from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedder
|
||||||
|
from rl_coach.base_parameters import EmbedderScheme
|
||||||
from rl_coach.core_types import InputVectorEmbedding
|
from rl_coach.core_types import InputVectorEmbedding
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -19,15 +19,15 @@ from typing import Dict
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
|
from rl_coach.architectures.tensorflow_components.architecture import TensorFlowArchitecture
|
||||||
from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters
|
from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.middleware import MiddlewareParameters
|
from rl_coach.architectures.tensorflow_components.middlewares.middleware import MiddlewareParameters
|
||||||
from rl_coach.base_parameters import AgentParameters, InputEmbedderParameters, EmbeddingMergerType
|
from rl_coach.base_parameters import AgentParameters, InputEmbedderParameters, EmbeddingMergerType
|
||||||
|
from rl_coach.core_types import PredictionType
|
||||||
from rl_coach.spaces import SpacesDefinition, PlanarMapsObservationSpace
|
from rl_coach.spaces import SpacesDefinition, PlanarMapsObservationSpace
|
||||||
from rl_coach.utils import get_all_subclasses, dynamic_import_and_instantiate_module_from_params
|
from rl_coach.utils import get_all_subclasses, dynamic_import_and_instantiate_module_from_params
|
||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.architecture import TensorFlowArchitecture
|
|
||||||
from rl_coach.core_types import PredictionType
|
|
||||||
|
|
||||||
|
|
||||||
class GeneralTensorFlowNetwork(TensorFlowArchitecture):
|
class GeneralTensorFlowNetwork(TensorFlowArchitecture):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -15,11 +15,11 @@
|
|||||||
#
|
#
|
||||||
|
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from rl_coach.base_parameters import AgentParameters
|
|
||||||
from rl_coach.spaces import SpacesDefinition
|
|
||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
||||||
|
from rl_coach.base_parameters import AgentParameters
|
||||||
from rl_coach.core_types import QActionStateValue
|
from rl_coach.core_types import QActionStateValue
|
||||||
|
from rl_coach.spaces import SpacesDefinition
|
||||||
|
|
||||||
|
|
||||||
class CategoricalQHeadParameters(HeadParameters):
|
class CategoricalQHeadParameters(HeadParameters):
|
||||||
|
|||||||
@@ -15,13 +15,12 @@
|
|||||||
#
|
#
|
||||||
|
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from rl_coach.architectures.tensorflow_components.architecture import batchnorm_activation_dropout
|
|
||||||
|
|
||||||
|
from rl_coach.architectures.tensorflow_components.architecture import batchnorm_activation_dropout
|
||||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
||||||
from rl_coach.base_parameters import AgentParameters
|
from rl_coach.base_parameters import AgentParameters
|
||||||
from rl_coach.spaces import SpacesDefinition
|
|
||||||
|
|
||||||
from rl_coach.core_types import ActionProbabilities
|
from rl_coach.core_types import ActionProbabilities
|
||||||
|
from rl_coach.spaces import SpacesDefinition
|
||||||
|
|
||||||
|
|
||||||
class DDPGActorHeadParameters(HeadParameters):
|
class DDPGActorHeadParameters(HeadParameters):
|
||||||
|
|||||||
@@ -14,12 +14,12 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters
|
|
||||||
|
|
||||||
from rl_coach.base_parameters import AgentParameters
|
from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters
|
||||||
from rl_coach.architectures.tensorflow_components.heads.q_head import QHead
|
from rl_coach.architectures.tensorflow_components.heads.q_head import QHead
|
||||||
from rl_coach.spaces import SpacesDefinition
|
from rl_coach.base_parameters import AgentParameters
|
||||||
from rl_coach.memories.non_episodic import differentiable_neural_dictionary
|
from rl_coach.memories.non_episodic import differentiable_neural_dictionary
|
||||||
|
from rl_coach.spaces import SpacesDefinition
|
||||||
|
|
||||||
|
|
||||||
class DNDQHeadParameters(HeadParameters):
|
class DNDQHeadParameters(HeadParameters):
|
||||||
|
|||||||
@@ -15,10 +15,10 @@
|
|||||||
#
|
#
|
||||||
|
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters
|
|
||||||
|
|
||||||
from rl_coach.base_parameters import AgentParameters
|
from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters
|
||||||
from rl_coach.architectures.tensorflow_components.heads.q_head import QHead
|
from rl_coach.architectures.tensorflow_components.heads.q_head import QHead
|
||||||
|
from rl_coach.base_parameters import AgentParameters
|
||||||
from rl_coach.spaces import SpacesDefinition
|
from rl_coach.spaces import SpacesDefinition
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -17,10 +17,10 @@ from typing import Type
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from rl_coach.base_parameters import AgentParameters, Parameters
|
|
||||||
from rl_coach.spaces import SpacesDefinition
|
|
||||||
from tensorflow.python.ops.losses.losses_impl import Reduction
|
from tensorflow.python.ops.losses.losses_impl import Reduction
|
||||||
|
|
||||||
|
from rl_coach.base_parameters import AgentParameters, Parameters
|
||||||
|
from rl_coach.spaces import SpacesDefinition
|
||||||
from rl_coach.utils import force_list
|
from rl_coach.utils import force_list
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -15,11 +15,11 @@
|
|||||||
#
|
#
|
||||||
|
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from rl_coach.base_parameters import AgentParameters
|
|
||||||
from rl_coach.spaces import SpacesDefinition
|
|
||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
||||||
|
from rl_coach.base_parameters import AgentParameters
|
||||||
from rl_coach.core_types import Measurements
|
from rl_coach.core_types import Measurements
|
||||||
|
from rl_coach.spaces import SpacesDefinition
|
||||||
|
|
||||||
|
|
||||||
class MeasurementsPredictionHeadParameters(HeadParameters):
|
class MeasurementsPredictionHeadParameters(HeadParameters):
|
||||||
|
|||||||
@@ -15,12 +15,12 @@
|
|||||||
#
|
#
|
||||||
|
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from rl_coach.base_parameters import AgentParameters
|
|
||||||
from rl_coach.spaces import BoxActionSpace
|
|
||||||
from rl_coach.spaces import SpacesDefinition
|
|
||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
||||||
|
from rl_coach.base_parameters import AgentParameters
|
||||||
from rl_coach.core_types import QActionStateValue
|
from rl_coach.core_types import QActionStateValue
|
||||||
|
from rl_coach.spaces import BoxActionSpace
|
||||||
|
from rl_coach.spaces import SpacesDefinition
|
||||||
|
|
||||||
|
|
||||||
class NAFHeadParameters(HeadParameters):
|
class NAFHeadParameters(HeadParameters):
|
||||||
|
|||||||
@@ -16,15 +16,15 @@
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer, HeadParameters
|
from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer, HeadParameters
|
||||||
from rl_coach.base_parameters import AgentParameters
|
from rl_coach.base_parameters import AgentParameters
|
||||||
|
from rl_coach.core_types import ActionProbabilities
|
||||||
|
from rl_coach.exploration_policies.continuous_entropy import ContinuousEntropyParameters
|
||||||
from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace, CompoundActionSpace
|
from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace, CompoundActionSpace
|
||||||
from rl_coach.spaces import SpacesDefinition
|
from rl_coach.spaces import SpacesDefinition
|
||||||
from rl_coach.utils import eps
|
from rl_coach.utils import eps
|
||||||
|
|
||||||
from rl_coach.core_types import ActionProbabilities
|
|
||||||
from rl_coach.exploration_policies.continuous_entropy import ContinuousEntropyParameters
|
|
||||||
|
|
||||||
|
|
||||||
class PolicyHeadParameters(HeadParameters):
|
class PolicyHeadParameters(HeadParameters):
|
||||||
def __init__(self, activation_function: str ='tanh', name: str='policy_head_params'):
|
def __init__(self, activation_function: str ='tanh', name: str='policy_head_params'):
|
||||||
|
|||||||
@@ -16,14 +16,14 @@
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
|
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters, normalized_columns_initializer
|
||||||
from rl_coach.base_parameters import AgentParameters
|
from rl_coach.base_parameters import AgentParameters
|
||||||
|
from rl_coach.core_types import ActionProbabilities
|
||||||
from rl_coach.spaces import BoxActionSpace, DiscreteActionSpace
|
from rl_coach.spaces import BoxActionSpace, DiscreteActionSpace
|
||||||
from rl_coach.spaces import SpacesDefinition
|
from rl_coach.spaces import SpacesDefinition
|
||||||
from rl_coach.utils import eps
|
from rl_coach.utils import eps
|
||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters, normalized_columns_initializer
|
|
||||||
from rl_coach.core_types import ActionProbabilities
|
|
||||||
|
|
||||||
|
|
||||||
class PPOHeadParameters(HeadParameters):
|
class PPOHeadParameters(HeadParameters):
|
||||||
def __init__(self, activation_function: str ='tanh', name: str='ppo_head_params'):
|
def __init__(self, activation_function: str ='tanh', name: str='ppo_head_params'):
|
||||||
|
|||||||
@@ -15,11 +15,11 @@
|
|||||||
#
|
#
|
||||||
|
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from rl_coach.base_parameters import AgentParameters
|
|
||||||
from rl_coach.spaces import SpacesDefinition
|
|
||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer, HeadParameters
|
from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer, HeadParameters
|
||||||
|
from rl_coach.base_parameters import AgentParameters
|
||||||
from rl_coach.core_types import ActionProbabilities
|
from rl_coach.core_types import ActionProbabilities
|
||||||
|
from rl_coach.spaces import SpacesDefinition
|
||||||
|
|
||||||
|
|
||||||
class PPOVHeadParameters(HeadParameters):
|
class PPOVHeadParameters(HeadParameters):
|
||||||
|
|||||||
@@ -15,11 +15,11 @@
|
|||||||
#
|
#
|
||||||
|
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from rl_coach.base_parameters import AgentParameters
|
|
||||||
from rl_coach.spaces import SpacesDefinition, BoxActionSpace, DiscreteActionSpace
|
|
||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
||||||
|
from rl_coach.base_parameters import AgentParameters
|
||||||
from rl_coach.core_types import QActionStateValue
|
from rl_coach.core_types import QActionStateValue
|
||||||
|
from rl_coach.spaces import SpacesDefinition, BoxActionSpace, DiscreteActionSpace
|
||||||
|
|
||||||
|
|
||||||
class QHeadParameters(HeadParameters):
|
class QHeadParameters(HeadParameters):
|
||||||
|
|||||||
@@ -15,11 +15,11 @@
|
|||||||
#
|
#
|
||||||
|
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from rl_coach.base_parameters import AgentParameters
|
|
||||||
from rl_coach.spaces import SpacesDefinition
|
|
||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
||||||
|
from rl_coach.base_parameters import AgentParameters
|
||||||
from rl_coach.core_types import QActionStateValue
|
from rl_coach.core_types import QActionStateValue
|
||||||
|
from rl_coach.spaces import SpacesDefinition
|
||||||
|
|
||||||
|
|
||||||
class QuantileRegressionQHeadParameters(HeadParameters):
|
class QuantileRegressionQHeadParameters(HeadParameters):
|
||||||
|
|||||||
@@ -15,11 +15,11 @@
|
|||||||
#
|
#
|
||||||
|
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from rl_coach.base_parameters import AgentParameters
|
|
||||||
from rl_coach.spaces import SpacesDefinition
|
|
||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer, HeadParameters
|
from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer, HeadParameters
|
||||||
|
from rl_coach.base_parameters import AgentParameters
|
||||||
from rl_coach.core_types import VStateValue
|
from rl_coach.core_types import VStateValue
|
||||||
|
from rl_coach.spaces import SpacesDefinition
|
||||||
|
|
||||||
|
|
||||||
class VHeadParameters(HeadParameters):
|
class VHeadParameters(HeadParameters):
|
||||||
|
|||||||
@@ -16,10 +16,10 @@
|
|||||||
from typing import Union, List
|
from typing import Union, List
|
||||||
|
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.middleware import Middleware, MiddlewareParameters
|
|
||||||
from rl_coach.base_parameters import MiddlewareScheme
|
|
||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.architecture import batchnorm_activation_dropout, Dense
|
from rl_coach.architectures.tensorflow_components.architecture import batchnorm_activation_dropout, Dense
|
||||||
|
from rl_coach.architectures.tensorflow_components.middlewares.middleware import Middleware, MiddlewareParameters
|
||||||
|
from rl_coach.base_parameters import MiddlewareScheme
|
||||||
from rl_coach.core_types import Middleware_FC_Embedding
|
from rl_coach.core_types import Middleware_FC_Embedding
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -17,10 +17,10 @@
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.middleware import Middleware, MiddlewareParameters
|
|
||||||
from rl_coach.base_parameters import MiddlewareScheme
|
|
||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.architecture import batchnorm_activation_dropout
|
from rl_coach.architectures.tensorflow_components.architecture import batchnorm_activation_dropout
|
||||||
|
from rl_coach.architectures.tensorflow_components.middlewares.middleware import Middleware, MiddlewareParameters
|
||||||
|
from rl_coach.base_parameters import MiddlewareScheme
|
||||||
from rl_coach.core_types import Middleware_LSTM_Embedding
|
from rl_coach.core_types import Middleware_LSTM_Embedding
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -16,8 +16,8 @@
|
|||||||
from typing import Type, Union, List
|
from typing import Type, Union, List
|
||||||
|
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from rl_coach.base_parameters import MiddlewareScheme, Parameters
|
|
||||||
|
|
||||||
|
from rl_coach.base_parameters import MiddlewareScheme, Parameters
|
||||||
from rl_coach.core_types import MiddlewareEmbedding
|
from rl_coach.core_types import MiddlewareEmbedding
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -15,12 +15,12 @@
|
|||||||
#
|
#
|
||||||
|
|
||||||
|
|
||||||
|
import copy
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import List, Union, Dict, Any, Type
|
|
||||||
from random import shuffle
|
from random import shuffle
|
||||||
|
from typing import List, Union, Dict, Any, Type
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import copy
|
|
||||||
|
|
||||||
ActionType = Union[int, float, np.ndarray, List]
|
ActionType = Union[int, float, np.ndarray, List]
|
||||||
GoalType = Union[None, np.ndarray]
|
GoalType = Union[None, np.ndarray]
|
||||||
|
|||||||
Binary file not shown.
Binary file not shown.
@@ -18,6 +18,7 @@ import math
|
|||||||
|
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from rl_coach.filters.observation.observation_stacking_filter import LazyStack
|
from rl_coach.filters.observation.observation_stacking_filter import LazyStack
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,10 +1,25 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2017 Intel Corporation
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
import random
|
import random
|
||||||
import sys
|
import sys
|
||||||
from os import path, environ
|
from os import path, environ
|
||||||
|
|
||||||
from rl_coach.filters.observation.observation_to_uint8_filter import ObservationToUInt8Filter
|
|
||||||
|
|
||||||
from rl_coach.filters.observation.observation_rgb_to_y_filter import ObservationRGBToYFilter
|
from rl_coach.filters.observation.observation_rgb_to_y_filter import ObservationRGBToYFilter
|
||||||
|
from rl_coach.filters.observation.observation_to_uint8_filter import ObservationToUInt8Filter
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if 'CARLA_ROOT' in environ:
|
if 'CARLA_ROOT' in environ:
|
||||||
|
|||||||
@@ -26,18 +26,18 @@ from os import path, environ
|
|||||||
from typing import Union, List
|
from typing import Union, List
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from rl_coach.base_parameters import VisualizationParameters
|
from rl_coach.base_parameters import VisualizationParameters
|
||||||
from rl_coach.environments.environment import Environment, EnvironmentParameters, LevelSelection
|
from rl_coach.environments.environment import Environment, EnvironmentParameters, LevelSelection
|
||||||
from rl_coach.filters.action.full_discrete_action_space_map import FullDiscreteActionSpaceMap
|
from rl_coach.filters.action.full_discrete_action_space_map import FullDiscreteActionSpaceMap
|
||||||
from rl_coach.filters.filter import InputFilter, OutputFilter
|
from rl_coach.filters.filter import InputFilter, OutputFilter
|
||||||
from rl_coach.filters.observation.observation_rescale_to_size_filter import ObservationRescaleToSizeFilter
|
from rl_coach.filters.observation.observation_rescale_to_size_filter import ObservationRescaleToSizeFilter
|
||||||
|
from rl_coach.filters.observation.observation_rgb_to_y_filter import ObservationRGBToYFilter
|
||||||
from rl_coach.filters.observation.observation_stacking_filter import ObservationStackingFilter
|
from rl_coach.filters.observation.observation_stacking_filter import ObservationStackingFilter
|
||||||
from rl_coach.filters.observation.observation_to_uint8_filter import ObservationToUInt8Filter
|
from rl_coach.filters.observation.observation_to_uint8_filter import ObservationToUInt8Filter
|
||||||
from rl_coach.spaces import MultiSelectActionSpace, ImageObservationSpace, \
|
from rl_coach.spaces import MultiSelectActionSpace, ImageObservationSpace, \
|
||||||
VectorObservationSpace, StateSpace
|
VectorObservationSpace, StateSpace
|
||||||
|
|
||||||
from rl_coach.filters.observation.observation_rgb_to_y_filter import ObservationRGBToYFilter
|
|
||||||
|
|
||||||
|
|
||||||
# enum of the available levels and their path
|
# enum of the available levels and their path
|
||||||
class DoomLevel(Enum):
|
class DoomLevel(Enum):
|
||||||
|
|||||||
@@ -20,17 +20,17 @@ from collections import OrderedDict
|
|||||||
from typing import Union, List, Tuple, Dict
|
from typing import Union, List, Tuple, Dict
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
from rl_coach import logger
|
||||||
from rl_coach.base_parameters import Parameters
|
from rl_coach.base_parameters import Parameters
|
||||||
from rl_coach.base_parameters import VisualizationParameters
|
from rl_coach.base_parameters import VisualizationParameters
|
||||||
from rl_coach.core_types import GoalType, ActionType, EnvResponse, RunPhase
|
from rl_coach.core_types import GoalType, ActionType, EnvResponse, RunPhase
|
||||||
|
from rl_coach.environments.environment_interface import EnvironmentInterface
|
||||||
|
from rl_coach.logger import screen
|
||||||
from rl_coach.renderer import Renderer
|
from rl_coach.renderer import Renderer
|
||||||
from rl_coach.spaces import ActionSpace, ObservationSpace, DiscreteActionSpace, RewardSpace, StateSpace
|
from rl_coach.spaces import ActionSpace, ObservationSpace, DiscreteActionSpace, RewardSpace, StateSpace
|
||||||
from rl_coach.utils import squeeze_list, force_list
|
from rl_coach.utils import squeeze_list, force_list
|
||||||
|
|
||||||
from rl_coach import logger
|
|
||||||
from rl_coach.environments.environment_interface import EnvironmentInterface
|
|
||||||
from rl_coach.logger import screen
|
|
||||||
|
|
||||||
|
|
||||||
class LevelSelection(object):
|
class LevelSelection(object):
|
||||||
def __init__(self, level: str):
|
def __init__(self, level: str):
|
||||||
|
|||||||
@@ -16,9 +16,8 @@
|
|||||||
|
|
||||||
from typing import Union, Dict
|
from typing import Union, Dict
|
||||||
|
|
||||||
from rl_coach.spaces import ActionSpace
|
|
||||||
|
|
||||||
from rl_coach.core_types import ActionType, EnvResponse, RunPhase
|
from rl_coach.core_types import ActionType, EnvResponse, RunPhase
|
||||||
|
from rl_coach.spaces import ActionSpace
|
||||||
|
|
||||||
|
|
||||||
class EnvironmentInterface(object):
|
class EnvironmentInterface(object):
|
||||||
|
|||||||
@@ -1,9 +1,25 @@
|
|||||||
import numpy as np
|
#
|
||||||
import gym
|
# Copyright (c) 2017 Intel Corporation
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
import gym
|
||||||
|
import numpy as np
|
||||||
from gym import spaces
|
from gym import spaces
|
||||||
from gym.envs.registration import EnvSpec
|
from gym.envs.registration import EnvSpec
|
||||||
|
|
||||||
from mujoco_py import load_model_from_path, MjSim, MjViewer, MjRenderContextOffscreen
|
from mujoco_py import load_model_from_path, MjSim, MjViewer, MjRenderContextOffscreen
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ from enum import Enum
|
|||||||
from typing import Union, List
|
from typing import Union, List
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from rl_coach.filters.observation.observation_move_axis_filter import ObservationMoveAxisFilter
|
from rl_coach.filters.observation.observation_move_axis_filter import ObservationMoveAxisFilter
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -1,8 +1,25 @@
|
|||||||
import numpy as np
|
#
|
||||||
import gym
|
# Copyright (c) 2017 Intel Corporation
|
||||||
from gym import spaces
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
import random
|
import random
|
||||||
|
|
||||||
|
import gym
|
||||||
|
import numpy as np
|
||||||
|
from gym import spaces
|
||||||
|
|
||||||
|
|
||||||
class BitFlip(gym.Env):
|
class BitFlip(gym.Env):
|
||||||
metadata = {
|
metadata = {
|
||||||
|
|||||||
@@ -1,8 +1,25 @@
|
|||||||
import numpy as np
|
#
|
||||||
import gym
|
# Copyright (c) 2017 Intel Corporation
|
||||||
from gym import spaces
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
|
||||||
|
import gym
|
||||||
|
import numpy as np
|
||||||
|
from gym import spaces
|
||||||
|
|
||||||
|
|
||||||
class ExplorationChain(gym.Env):
|
class ExplorationChain(gym.Env):
|
||||||
metadata = {
|
metadata = {
|
||||||
|
|||||||
@@ -17,11 +17,11 @@
|
|||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from rl_coach.schedules import Schedule, LinearSchedule
|
|
||||||
from rl_coach.spaces import ActionSpace, BoxActionSpace
|
|
||||||
|
|
||||||
from rl_coach.core_types import RunPhase, ActionType
|
from rl_coach.core_types import RunPhase, ActionType
|
||||||
from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy, ExplorationParameters
|
from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy, ExplorationParameters
|
||||||
|
from rl_coach.schedules import Schedule, LinearSchedule
|
||||||
|
from rl_coach.spaces import ActionSpace, BoxActionSpace
|
||||||
|
|
||||||
|
|
||||||
# TODO: consider renaming to gaussian sampling
|
# TODO: consider renaming to gaussian sampling
|
||||||
|
|||||||
@@ -17,11 +17,11 @@
|
|||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from rl_coach.schedules import Schedule
|
|
||||||
from rl_coach.spaces import ActionSpace
|
|
||||||
|
|
||||||
from rl_coach.core_types import RunPhase, ActionType
|
from rl_coach.core_types import RunPhase, ActionType
|
||||||
from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy, ExplorationParameters
|
from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy, ExplorationParameters
|
||||||
|
from rl_coach.schedules import Schedule
|
||||||
|
from rl_coach.spaces import ActionSpace
|
||||||
|
|
||||||
|
|
||||||
class BoltzmannParameters(ExplorationParameters):
|
class BoltzmannParameters(ExplorationParameters):
|
||||||
|
|||||||
@@ -17,13 +17,13 @@
|
|||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters
|
|
||||||
from rl_coach.exploration_policies.e_greedy import EGreedy, EGreedyParameters
|
|
||||||
from rl_coach.schedules import Schedule, LinearSchedule
|
|
||||||
from rl_coach.spaces import ActionSpace
|
|
||||||
|
|
||||||
from rl_coach.core_types import RunPhase, ActionType
|
from rl_coach.core_types import RunPhase, ActionType
|
||||||
|
from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters
|
||||||
|
from rl_coach.exploration_policies.e_greedy import EGreedy, EGreedyParameters
|
||||||
from rl_coach.exploration_policies.exploration_policy import ExplorationParameters
|
from rl_coach.exploration_policies.exploration_policy import ExplorationParameters
|
||||||
|
from rl_coach.schedules import Schedule, LinearSchedule
|
||||||
|
from rl_coach.spaces import ActionSpace
|
||||||
|
|
||||||
|
|
||||||
class BootstrappedParameters(EGreedyParameters):
|
class BootstrappedParameters(EGreedyParameters):
|
||||||
|
|||||||
@@ -17,10 +17,10 @@
|
|||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from rl_coach.spaces import ActionSpace
|
|
||||||
|
|
||||||
from rl_coach.core_types import RunPhase, ActionType
|
from rl_coach.core_types import RunPhase, ActionType
|
||||||
from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy, ExplorationParameters
|
from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy, ExplorationParameters
|
||||||
|
from rl_coach.spaces import ActionSpace
|
||||||
|
|
||||||
|
|
||||||
class CategoricalParameters(ExplorationParameters):
|
class CategoricalParameters(ExplorationParameters):
|
||||||
|
|||||||
@@ -17,15 +17,15 @@
|
|||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
from rl_coach.core_types import RunPhase, ActionType
|
||||||
from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters
|
from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters
|
||||||
|
from rl_coach.exploration_policies.exploration_policy import ExplorationParameters
|
||||||
|
from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy
|
||||||
from rl_coach.schedules import Schedule, LinearSchedule
|
from rl_coach.schedules import Schedule, LinearSchedule
|
||||||
from rl_coach.spaces import ActionSpace, DiscreteActionSpace, BoxActionSpace
|
from rl_coach.spaces import ActionSpace, DiscreteActionSpace, BoxActionSpace
|
||||||
from rl_coach.utils import dynamic_import_and_instantiate_module_from_params
|
from rl_coach.utils import dynamic_import_and_instantiate_module_from_params
|
||||||
|
|
||||||
from rl_coach.core_types import RunPhase, ActionType
|
|
||||||
from rl_coach.exploration_policies.exploration_policy import ExplorationParameters
|
|
||||||
from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy
|
|
||||||
|
|
||||||
|
|
||||||
class EGreedyParameters(ExplorationParameters):
|
class EGreedyParameters(ExplorationParameters):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
|||||||
@@ -17,9 +17,8 @@
|
|||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
from rl_coach.base_parameters import Parameters
|
from rl_coach.base_parameters import Parameters
|
||||||
from rl_coach.spaces import ActionSpace
|
|
||||||
|
|
||||||
from rl_coach.core_types import RunPhase, ActionType
|
from rl_coach.core_types import RunPhase, ActionType
|
||||||
|
from rl_coach.spaces import ActionSpace
|
||||||
|
|
||||||
|
|
||||||
class ExplorationParameters(Parameters):
|
class ExplorationParameters(Parameters):
|
||||||
|
|||||||
@@ -17,10 +17,10 @@
|
|||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from rl_coach.spaces import ActionSpace, DiscreteActionSpace, BoxActionSpace
|
|
||||||
|
|
||||||
from rl_coach.core_types import ActionType
|
from rl_coach.core_types import ActionType
|
||||||
from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy, ExplorationParameters
|
from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy, ExplorationParameters
|
||||||
|
from rl_coach.spaces import ActionSpace, DiscreteActionSpace, BoxActionSpace
|
||||||
|
|
||||||
|
|
||||||
class GreedyParameters(ExplorationParameters):
|
class GreedyParameters(ExplorationParameters):
|
||||||
|
|||||||
@@ -17,10 +17,10 @@
|
|||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from rl_coach.spaces import ActionSpace, BoxActionSpace, GoalsSpace
|
|
||||||
|
|
||||||
from rl_coach.core_types import RunPhase, ActionType
|
from rl_coach.core_types import RunPhase, ActionType
|
||||||
from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy, ExplorationParameters
|
from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy, ExplorationParameters
|
||||||
|
from rl_coach.spaces import ActionSpace, BoxActionSpace, GoalsSpace
|
||||||
|
|
||||||
|
|
||||||
# Based on on the description in:
|
# Based on on the description in:
|
||||||
|
|||||||
@@ -17,12 +17,12 @@
|
|||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from rl_coach.schedules import Schedule, LinearSchedule
|
|
||||||
from scipy.stats import truncnorm
|
from scipy.stats import truncnorm
|
||||||
from rl_coach.spaces import ActionSpace, BoxActionSpace
|
|
||||||
|
|
||||||
from rl_coach.core_types import RunPhase, ActionType
|
from rl_coach.core_types import RunPhase, ActionType
|
||||||
from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy, ExplorationParameters
|
from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy, ExplorationParameters
|
||||||
|
from rl_coach.schedules import Schedule, LinearSchedule
|
||||||
|
from rl_coach.spaces import ActionSpace, BoxActionSpace
|
||||||
|
|
||||||
|
|
||||||
class TruncatedNormalParameters(ExplorationParameters):
|
class TruncatedNormalParameters(ExplorationParameters):
|
||||||
|
|||||||
@@ -17,13 +17,13 @@
|
|||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters
|
|
||||||
from rl_coach.exploration_policies.e_greedy import EGreedy, EGreedyParameters
|
|
||||||
from rl_coach.schedules import Schedule, LinearSchedule, PieceWiseSchedule
|
|
||||||
from rl_coach.spaces import ActionSpace
|
|
||||||
|
|
||||||
from rl_coach.core_types import RunPhase, ActionType, EnvironmentSteps
|
from rl_coach.core_types import RunPhase, ActionType, EnvironmentSteps
|
||||||
|
from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters
|
||||||
|
from rl_coach.exploration_policies.e_greedy import EGreedy, EGreedyParameters
|
||||||
from rl_coach.exploration_policies.exploration_policy import ExplorationParameters
|
from rl_coach.exploration_policies.exploration_policy import ExplorationParameters
|
||||||
|
from rl_coach.schedules import Schedule, LinearSchedule, PieceWiseSchedule
|
||||||
|
from rl_coach.spaces import ActionSpace
|
||||||
|
|
||||||
|
|
||||||
class UCBParameters(EGreedyParameters):
|
class UCBParameters(EGreedyParameters):
|
||||||
|
|||||||
@@ -14,10 +14,9 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
|
|
||||||
from rl_coach.spaces import ActionSpace
|
|
||||||
|
|
||||||
from rl_coach.core_types import ActionType
|
from rl_coach.core_types import ActionType
|
||||||
from rl_coach.filters.filter import Filter
|
from rl_coach.filters.filter import Filter
|
||||||
|
from rl_coach.spaces import ActionSpace
|
||||||
|
|
||||||
|
|
||||||
class ActionFilter(Filter):
|
class ActionFilter(Filter):
|
||||||
|
|||||||
@@ -17,8 +17,8 @@
|
|||||||
from typing import Union, List
|
from typing import Union, List
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from rl_coach.filters.action.box_discretization import BoxDiscretization
|
|
||||||
|
|
||||||
|
from rl_coach.filters.action.box_discretization import BoxDiscretization
|
||||||
from rl_coach.filters.action.partial_discrete_action_space_map import PartialDiscreteActionSpaceMap
|
from rl_coach.filters.action.partial_discrete_action_space_map import PartialDiscreteActionSpaceMap
|
||||||
from rl_coach.spaces import AttentionActionSpace, BoxActionSpace, DiscreteActionSpace
|
from rl_coach.spaces import AttentionActionSpace, BoxActionSpace, DiscreteActionSpace
|
||||||
|
|
||||||
|
|||||||
@@ -17,10 +17,10 @@
|
|||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from rl_coach.spaces import BoxActionSpace
|
|
||||||
|
|
||||||
from rl_coach.core_types import ActionType
|
from rl_coach.core_types import ActionType
|
||||||
from rl_coach.filters.action.action_filter import ActionFilter
|
from rl_coach.filters.action.action_filter import ActionFilter
|
||||||
|
from rl_coach.spaces import BoxActionSpace
|
||||||
|
|
||||||
|
|
||||||
class BoxMasking(ActionFilter):
|
class BoxMasking(ActionFilter):
|
||||||
|
|||||||
@@ -17,10 +17,10 @@
|
|||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from rl_coach.spaces import BoxActionSpace
|
|
||||||
|
|
||||||
from rl_coach.core_types import ActionType
|
from rl_coach.core_types import ActionType
|
||||||
from rl_coach.filters.action.action_filter import ActionFilter
|
from rl_coach.filters.action.action_filter import ActionFilter
|
||||||
|
from rl_coach.spaces import BoxActionSpace
|
||||||
|
|
||||||
|
|
||||||
class LinearBoxToBoxMap(ActionFilter):
|
class LinearBoxToBoxMap(ActionFilter):
|
||||||
|
|||||||
@@ -16,10 +16,9 @@
|
|||||||
|
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
from rl_coach.spaces import DiscreteActionSpace, ActionSpace
|
|
||||||
|
|
||||||
from rl_coach.core_types import ActionType
|
from rl_coach.core_types import ActionType
|
||||||
from rl_coach.filters.action.action_filter import ActionFilter
|
from rl_coach.filters.action.action_filter import ActionFilter
|
||||||
|
from rl_coach.spaces import DiscreteActionSpace, ActionSpace
|
||||||
|
|
||||||
|
|
||||||
class PartialDiscreteActionSpaceMap(ActionFilter):
|
class PartialDiscreteActionSpaceMap(ActionFilter):
|
||||||
|
|||||||
@@ -19,8 +19,8 @@ from collections import OrderedDict
|
|||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
from typing import Dict, Union, List
|
from typing import Dict, Union, List
|
||||||
|
|
||||||
from rl_coach.spaces import ActionSpace, RewardSpace, ObservationSpace
|
|
||||||
from rl_coach.core_types import EnvResponse, ActionInfo, Transition
|
from rl_coach.core_types import EnvResponse, ActionInfo, Transition
|
||||||
|
from rl_coach.spaces import ActionSpace, RewardSpace, ObservationSpace
|
||||||
from rl_coach.utils import force_list
|
from rl_coach.utils import force_list
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -16,10 +16,10 @@
|
|||||||
|
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from rl_coach.spaces import ObservationSpace
|
|
||||||
|
|
||||||
from rl_coach.core_types import ObservationType
|
from rl_coach.core_types import ObservationType
|
||||||
from rl_coach.filters.observation.observation_filter import ObservationFilter
|
from rl_coach.filters.observation.observation_filter import ObservationFilter
|
||||||
|
from rl_coach.spaces import ObservationSpace
|
||||||
|
|
||||||
|
|
||||||
class ObservationClippingFilter(ObservationFilter):
|
class ObservationClippingFilter(ObservationFilter):
|
||||||
|
|||||||
@@ -16,10 +16,10 @@
|
|||||||
from typing import Union, Tuple
|
from typing import Union, Tuple
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from rl_coach.spaces import ObservationSpace
|
|
||||||
|
|
||||||
from rl_coach.core_types import ObservationType
|
from rl_coach.core_types import ObservationType
|
||||||
from rl_coach.filters.observation.observation_filter import ObservationFilter
|
from rl_coach.filters.observation.observation_filter import ObservationFilter
|
||||||
|
from rl_coach.spaces import ObservationSpace
|
||||||
|
|
||||||
|
|
||||||
class ObservationCropFilter(ObservationFilter):
|
class ObservationCropFilter(ObservationFilter):
|
||||||
|
|||||||
@@ -15,10 +15,10 @@
|
|||||||
#
|
#
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from rl_coach.spaces import ObservationSpace, PlanarMapsObservationSpace
|
|
||||||
|
|
||||||
from rl_coach.core_types import ObservationType
|
from rl_coach.core_types import ObservationType
|
||||||
from rl_coach.filters.observation.observation_filter import ObservationFilter
|
from rl_coach.filters.observation.observation_filter import ObservationFilter
|
||||||
|
from rl_coach.spaces import ObservationSpace, PlanarMapsObservationSpace
|
||||||
|
|
||||||
|
|
||||||
class ObservationMoveAxisFilter(ObservationFilter):
|
class ObservationMoveAxisFilter(ObservationFilter):
|
||||||
|
|||||||
@@ -16,11 +16,11 @@
|
|||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from rl_coach.spaces import ObservationSpace
|
|
||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.shared_variables import SharedRunningStats
|
from rl_coach.architectures.tensorflow_components.shared_variables import SharedRunningStats
|
||||||
from rl_coach.core_types import ObservationType
|
from rl_coach.core_types import ObservationType
|
||||||
from rl_coach.filters.observation.observation_filter import ObservationFilter
|
from rl_coach.filters.observation.observation_filter import ObservationFilter
|
||||||
|
from rl_coach.spaces import ObservationSpace
|
||||||
|
|
||||||
|
|
||||||
class ObservationNormalizationFilter(ObservationFilter):
|
class ObservationNormalizationFilter(ObservationFilter):
|
||||||
|
|||||||
@@ -17,10 +17,9 @@ import copy
|
|||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
from rl_coach.spaces import ObservationSpace, VectorObservationSpace
|
|
||||||
|
|
||||||
from rl_coach.core_types import ObservationType
|
from rl_coach.core_types import ObservationType
|
||||||
from rl_coach.filters.observation.observation_filter import ObservationFilter
|
from rl_coach.filters.observation.observation_filter import ObservationFilter
|
||||||
|
from rl_coach.spaces import ObservationSpace, VectorObservationSpace
|
||||||
|
|
||||||
|
|
||||||
class ObservationReductionBySubPartsNameFilter(ObservationFilter):
|
class ObservationReductionBySubPartsNameFilter(ObservationFilter):
|
||||||
|
|||||||
@@ -17,10 +17,10 @@
|
|||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
|
||||||
import scipy.ndimage
|
import scipy.ndimage
|
||||||
from rl_coach.spaces import ObservationSpace
|
|
||||||
|
|
||||||
from rl_coach.core_types import ObservationType
|
from rl_coach.core_types import ObservationType
|
||||||
from rl_coach.filters.observation.observation_filter import ObservationFilter
|
from rl_coach.filters.observation.observation_filter import ObservationFilter
|
||||||
|
from rl_coach.spaces import ObservationSpace
|
||||||
|
|
||||||
|
|
||||||
# imresize interpolation types as defined by scipy here:
|
# imresize interpolation types as defined by scipy here:
|
||||||
|
|||||||
@@ -19,10 +19,10 @@ from enum import Enum
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import scipy.ndimage
|
import scipy.ndimage
|
||||||
from rl_coach.spaces import ObservationSpace, PlanarMapsObservationSpace, ImageObservationSpace
|
|
||||||
|
|
||||||
from rl_coach.core_types import ObservationType
|
from rl_coach.core_types import ObservationType
|
||||||
from rl_coach.filters.observation.observation_filter import ObservationFilter
|
from rl_coach.filters.observation.observation_filter import ObservationFilter
|
||||||
|
from rl_coach.spaces import ObservationSpace, PlanarMapsObservationSpace, ImageObservationSpace
|
||||||
|
|
||||||
|
|
||||||
# imresize interpolation types as defined by scipy here:
|
# imresize interpolation types as defined by scipy here:
|
||||||
|
|||||||
@@ -14,10 +14,9 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
|
|
||||||
from rl_coach.spaces import ObservationSpace
|
|
||||||
|
|
||||||
from rl_coach.core_types import ObservationType
|
from rl_coach.core_types import ObservationType
|
||||||
from rl_coach.filters.observation.observation_filter import ObservationFilter
|
from rl_coach.filters.observation.observation_filter import ObservationFilter
|
||||||
|
from rl_coach.spaces import ObservationSpace
|
||||||
|
|
||||||
|
|
||||||
class ObservationRGBToYFilter(ObservationFilter):
|
class ObservationRGBToYFilter(ObservationFilter):
|
||||||
|
|||||||
@@ -15,10 +15,10 @@
|
|||||||
#
|
#
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from rl_coach.spaces import ObservationSpace
|
|
||||||
|
|
||||||
from rl_coach.core_types import ObservationType
|
from rl_coach.core_types import ObservationType
|
||||||
from rl_coach.filters.observation.observation_filter import ObservationFilter
|
from rl_coach.filters.observation.observation_filter import ObservationFilter
|
||||||
|
from rl_coach.spaces import ObservationSpace
|
||||||
|
|
||||||
|
|
||||||
class ObservationSqueezeFilter(ObservationFilter):
|
class ObservationSqueezeFilter(ObservationFilter):
|
||||||
|
|||||||
@@ -18,10 +18,10 @@ import copy
|
|||||||
from collections import deque
|
from collections import deque
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from rl_coach.spaces import ObservationSpace
|
|
||||||
|
|
||||||
from rl_coach.core_types import ObservationType
|
from rl_coach.core_types import ObservationType
|
||||||
from rl_coach.filters.observation.observation_filter import ObservationFilter
|
from rl_coach.filters.observation.observation_filter import ObservationFilter
|
||||||
|
from rl_coach.spaces import ObservationSpace
|
||||||
|
|
||||||
|
|
||||||
class LazyStack(object):
|
class LazyStack(object):
|
||||||
|
|||||||
@@ -15,10 +15,10 @@
|
|||||||
#
|
#
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from rl_coach.spaces import ObservationSpace
|
|
||||||
|
|
||||||
from rl_coach.core_types import ObservationType
|
from rl_coach.core_types import ObservationType
|
||||||
from rl_coach.filters.observation.observation_filter import ObservationFilter
|
from rl_coach.filters.observation.observation_filter import ObservationFilter
|
||||||
|
from rl_coach.spaces import ObservationSpace
|
||||||
|
|
||||||
|
|
||||||
class ObservationToUInt8Filter(ObservationFilter):
|
class ObservationToUInt8Filter(ObservationFilter):
|
||||||
|
|||||||
@@ -15,10 +15,10 @@
|
|||||||
#
|
#
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from rl_coach.spaces import RewardSpace
|
|
||||||
|
|
||||||
from rl_coach.core_types import RewardType
|
from rl_coach.core_types import RewardType
|
||||||
from rl_coach.filters.reward.reward_filter import RewardFilter
|
from rl_coach.filters.reward.reward_filter import RewardFilter
|
||||||
|
from rl_coach.spaces import RewardSpace
|
||||||
|
|
||||||
|
|
||||||
class RewardClippingFilter(RewardFilter):
|
class RewardClippingFilter(RewardFilter):
|
||||||
|
|||||||
@@ -16,11 +16,11 @@
|
|||||||
|
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from rl_coach.spaces import RewardSpace
|
|
||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.shared_variables import SharedRunningStats
|
from rl_coach.architectures.tensorflow_components.shared_variables import SharedRunningStats
|
||||||
from rl_coach.core_types import RewardType
|
from rl_coach.core_types import RewardType
|
||||||
from rl_coach.filters.reward.reward_filter import RewardFilter
|
from rl_coach.filters.reward.reward_filter import RewardFilter
|
||||||
|
from rl_coach.spaces import RewardSpace
|
||||||
|
|
||||||
|
|
||||||
class RewardNormalizationFilter(RewardFilter):
|
class RewardNormalizationFilter(RewardFilter):
|
||||||
|
|||||||
@@ -14,10 +14,9 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
|
|
||||||
from rl_coach.spaces import RewardSpace
|
|
||||||
|
|
||||||
from rl_coach.core_types import RewardType
|
from rl_coach.core_types import RewardType
|
||||||
from rl_coach.filters.reward.reward_filter import RewardFilter
|
from rl_coach.filters.reward.reward_filter import RewardFilter
|
||||||
|
from rl_coach.spaces import RewardSpace
|
||||||
|
|
||||||
|
|
||||||
class RewardRescaleFilter(RewardFilter):
|
class RewardRescaleFilter(RewardFilter):
|
||||||
|
|||||||
@@ -15,11 +15,11 @@
|
|||||||
#
|
#
|
||||||
from typing import Tuple, List
|
from typing import Tuple, List
|
||||||
|
|
||||||
from rl_coach.base_parameters import AgentParameters, VisualizationParameters, TaskParameters, PresetValidationParameters
|
from rl_coach.base_parameters import AgentParameters, VisualizationParameters, TaskParameters, \
|
||||||
|
PresetValidationParameters
|
||||||
from rl_coach.environments.environment import EnvironmentParameters, Environment
|
from rl_coach.environments.environment import EnvironmentParameters, Environment
|
||||||
from rl_coach.level_manager import LevelManager
|
|
||||||
|
|
||||||
from rl_coach.graph_managers.graph_manager import GraphManager, ScheduleParameters
|
from rl_coach.graph_managers.graph_manager import GraphManager, ScheduleParameters
|
||||||
|
from rl_coach.level_manager import LevelManager
|
||||||
from rl_coach.utils import short_dynamic_import
|
from rl_coach.utils import short_dynamic_import
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -18,20 +18,19 @@ import copy
|
|||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
from typing import List, Tuple
|
|
||||||
from distutils.dir_util import copy_tree, remove_tree
|
from distutils.dir_util import copy_tree, remove_tree
|
||||||
|
from typing import List, Tuple
|
||||||
|
|
||||||
import numpy as np
|
from rl_coach.base_parameters import iterable_to_items, TaskParameters, DistributedTaskParameters, \
|
||||||
from rl_coach.base_parameters import iterable_to_items, TaskParameters, DistributedTaskParameters, VisualizationParameters, \
|
VisualizationParameters, \
|
||||||
Parameters, PresetValidationParameters
|
Parameters, PresetValidationParameters
|
||||||
from rl_coach.core_types import TotalStepsCounter, RunPhase, PlayingStepsType, TrainingSteps, EnvironmentEpisodes, \
|
from rl_coach.core_types import TotalStepsCounter, RunPhase, PlayingStepsType, TrainingSteps, EnvironmentEpisodes, \
|
||||||
EnvironmentSteps, \
|
EnvironmentSteps, \
|
||||||
StepMethod
|
StepMethod
|
||||||
from rl_coach.environments.environment import Environment
|
from rl_coach.environments.environment import Environment
|
||||||
from rl_coach.level_manager import LevelManager
|
from rl_coach.level_manager import LevelManager
|
||||||
from rl_coach.utils import set_cpu
|
|
||||||
|
|
||||||
from rl_coach.logger import screen, Logger
|
from rl_coach.logger import screen, Logger
|
||||||
|
from rl_coach.utils import set_cpu
|
||||||
|
|
||||||
|
|
||||||
class ScheduleParameters(Parameters):
|
class ScheduleParameters(Parameters):
|
||||||
|
|||||||
@@ -13,17 +13,16 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
import numpy as np
|
|
||||||
from typing import List, Union, Tuple
|
from typing import List, Union, Tuple
|
||||||
|
|
||||||
from rl_coach.base_parameters import AgentParameters, VisualizationParameters, TaskParameters, PresetValidationParameters
|
from rl_coach.base_parameters import AgentParameters, VisualizationParameters, TaskParameters, \
|
||||||
|
PresetValidationParameters
|
||||||
|
from rl_coach.core_types import EnvironmentSteps
|
||||||
from rl_coach.environments.environment import EnvironmentParameters, Environment
|
from rl_coach.environments.environment import EnvironmentParameters, Environment
|
||||||
|
from rl_coach.graph_managers.graph_manager import GraphManager, ScheduleParameters
|
||||||
from rl_coach.level_manager import LevelManager
|
from rl_coach.level_manager import LevelManager
|
||||||
from rl_coach.utils import short_dynamic_import
|
from rl_coach.utils import short_dynamic_import
|
||||||
|
|
||||||
from rl_coach.core_types import EnvironmentSteps
|
|
||||||
from rl_coach.graph_managers.graph_manager import GraphManager, ScheduleParameters
|
|
||||||
|
|
||||||
|
|
||||||
class HACGraphManager(GraphManager):
|
class HACGraphManager(GraphManager):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -16,14 +16,14 @@
|
|||||||
|
|
||||||
from typing import List, Union, Tuple
|
from typing import List, Union, Tuple
|
||||||
|
|
||||||
from rl_coach.base_parameters import AgentParameters, VisualizationParameters, TaskParameters, PresetValidationParameters
|
from rl_coach.base_parameters import AgentParameters, VisualizationParameters, TaskParameters, \
|
||||||
|
PresetValidationParameters
|
||||||
|
from rl_coach.core_types import EnvironmentSteps
|
||||||
from rl_coach.environments.environment import EnvironmentParameters, Environment
|
from rl_coach.environments.environment import EnvironmentParameters, Environment
|
||||||
|
from rl_coach.graph_managers.graph_manager import GraphManager, ScheduleParameters
|
||||||
from rl_coach.level_manager import LevelManager
|
from rl_coach.level_manager import LevelManager
|
||||||
from rl_coach.utils import short_dynamic_import
|
from rl_coach.utils import short_dynamic_import
|
||||||
|
|
||||||
from rl_coach.core_types import EnvironmentSteps
|
|
||||||
from rl_coach.graph_managers.graph_manager import GraphManager, ScheduleParameters
|
|
||||||
|
|
||||||
|
|
||||||
class HRLGraphManager(GraphManager):
|
class HRLGraphManager(GraphManager):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -14,14 +14,13 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
import copy
|
import copy
|
||||||
from typing import Union, Dict, Tuple, Type
|
from typing import Union, Dict
|
||||||
|
|
||||||
from rl_coach.environments.environment import Environment
|
|
||||||
from rl_coach.environments.environment_interface import EnvironmentInterface
|
|
||||||
from rl_coach.spaces import ActionSpace, SpacesDefinition
|
|
||||||
|
|
||||||
from rl_coach.agents.composite_agent import CompositeAgent
|
from rl_coach.agents.composite_agent import CompositeAgent
|
||||||
from rl_coach.core_types import EnvResponse, ActionInfo, RunPhase, ActionType, EnvironmentSteps
|
from rl_coach.core_types import EnvResponse, ActionInfo, RunPhase, ActionType, EnvironmentSteps
|
||||||
|
from rl_coach.environments.environment import Environment
|
||||||
|
from rl_coach.environments.environment_interface import EnvironmentInterface
|
||||||
|
from rl_coach.spaces import ActionSpace, SpacesDefinition
|
||||||
|
|
||||||
|
|
||||||
class LevelManager(EnvironmentInterface):
|
class LevelManager(EnvironmentInterface):
|
||||||
|
|||||||
@@ -17,10 +17,10 @@
|
|||||||
from typing import List, Tuple, Union, Dict, Any
|
from typing import List, Tuple, Union, Dict, Any
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from rl_coach.utils import ReaderWriterLock
|
|
||||||
|
|
||||||
from rl_coach.core_types import Transition, Episode
|
from rl_coach.core_types import Transition, Episode
|
||||||
from rl_coach.memories.memory import Memory, MemoryGranularity, MemoryParameters
|
from rl_coach.memories.memory import Memory, MemoryGranularity, MemoryParameters
|
||||||
|
from rl_coach.utils import ReaderWriterLock
|
||||||
|
|
||||||
|
|
||||||
class EpisodicExperienceReplayParameters(MemoryParameters):
|
class EpisodicExperienceReplayParameters(MemoryParameters):
|
||||||
|
|||||||
@@ -21,7 +21,8 @@ from typing import Tuple, List
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from rl_coach.core_types import Episode, Transition
|
from rl_coach.core_types import Episode, Transition
|
||||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters, EpisodicExperienceReplay
|
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters, \
|
||||||
|
EpisodicExperienceReplay
|
||||||
from rl_coach.memories.non_episodic.experience_replay import MemoryGranularity
|
from rl_coach.memories.non_episodic.experience_replay import MemoryGranularity
|
||||||
from rl_coach.spaces import GoalsSpace
|
from rl_coach.spaces import GoalsSpace
|
||||||
|
|
||||||
|
|||||||
@@ -14,9 +14,8 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
|
|
||||||
from rl_coach.memories.memory import MemoryGranularity, MemoryParameters
|
|
||||||
|
|
||||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplay
|
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplay
|
||||||
|
from rl_coach.memories.memory import MemoryGranularity, MemoryParameters
|
||||||
|
|
||||||
|
|
||||||
class SingleEpisodeBufferParameters(MemoryParameters):
|
class SingleEpisodeBufferParameters(MemoryParameters):
|
||||||
|
|||||||
@@ -17,10 +17,10 @@
|
|||||||
from typing import List, Tuple, Union, Dict, Any
|
from typing import List, Tuple, Union, Dict, Any
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from rl_coach.utils import ReaderWriterLock
|
|
||||||
|
|
||||||
from rl_coach.core_types import Transition
|
from rl_coach.core_types import Transition
|
||||||
from rl_coach.memories.memory import Memory, MemoryGranularity, MemoryParameters
|
from rl_coach.memories.memory import Memory, MemoryGranularity, MemoryParameters
|
||||||
|
from rl_coach.utils import ReaderWriterLock
|
||||||
|
|
||||||
|
|
||||||
class ExperienceReplayParameters(MemoryParameters):
|
class ExperienceReplayParameters(MemoryParameters):
|
||||||
|
|||||||
@@ -20,11 +20,11 @@ from enum import Enum
|
|||||||
from typing import List, Tuple, Any
|
from typing import List, Tuple, Any
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from rl_coach.memories.memory import MemoryGranularity
|
|
||||||
from rl_coach.schedules import Schedule, ConstantSchedule
|
|
||||||
|
|
||||||
from rl_coach.core_types import Transition
|
from rl_coach.core_types import Transition
|
||||||
|
from rl_coach.memories.memory import MemoryGranularity
|
||||||
from rl_coach.memories.non_episodic.experience_replay import ExperienceReplayParameters, ExperienceReplay
|
from rl_coach.memories.non_episodic.experience_replay import ExperienceReplayParameters, ExperienceReplay
|
||||||
|
from rl_coach.schedules import Schedule, ConstantSchedule
|
||||||
|
|
||||||
|
|
||||||
class PrioritizedExperienceReplayParameters(ExperienceReplayParameters):
|
class PrioritizedExperienceReplayParameters(ExperienceReplayParameters):
|
||||||
|
|||||||
@@ -1,14 +1,13 @@
|
|||||||
from rl_coach.agents.actor_critic_agent import ActorCriticAgentParameters
|
from rl_coach.agents.actor_critic_agent import ActorCriticAgentParameters
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
||||||
from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters
|
from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters
|
||||||
|
from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase
|
||||||
from rl_coach.environments.environment import SingleLevelSelection, SelectedPhaseOnlyDumpMethod, MaxDumpMethod
|
from rl_coach.environments.environment import SingleLevelSelection, SelectedPhaseOnlyDumpMethod, MaxDumpMethod
|
||||||
from rl_coach.environments.gym_environment import Atari, atari_deterministic_v4
|
from rl_coach.environments.gym_environment import Atari, atari_deterministic_v4
|
||||||
|
from rl_coach.exploration_policies.categorical import CategoricalParameters
|
||||||
from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
|
from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
|
||||||
from rl_coach.graph_managers.graph_manager import ScheduleParameters
|
from rl_coach.graph_managers.graph_manager import ScheduleParameters
|
||||||
|
|
||||||
from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase
|
|
||||||
from rl_coach.exploration_policies.categorical import CategoricalParameters
|
|
||||||
|
|
||||||
####################
|
####################
|
||||||
# Graph Scheduling #
|
# Graph Scheduling #
|
||||||
####################
|
####################
|
||||||
|
|||||||
@@ -1,14 +1,13 @@
|
|||||||
from rl_coach.agents.actor_critic_agent import ActorCriticAgentParameters
|
from rl_coach.agents.actor_critic_agent import ActorCriticAgentParameters
|
||||||
from rl_coach.architectures.tensorflow_components.middlewares.lstm_middleware import LSTMMiddlewareParameters
|
from rl_coach.architectures.tensorflow_components.middlewares.lstm_middleware import LSTMMiddlewareParameters
|
||||||
from rl_coach.base_parameters import VisualizationParameters, MiddlewareScheme, PresetValidationParameters
|
from rl_coach.base_parameters import VisualizationParameters, MiddlewareScheme, PresetValidationParameters
|
||||||
|
from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase
|
||||||
from rl_coach.environments.environment import SingleLevelSelection, SelectedPhaseOnlyDumpMethod, MaxDumpMethod
|
from rl_coach.environments.environment import SingleLevelSelection, SelectedPhaseOnlyDumpMethod, MaxDumpMethod
|
||||||
from rl_coach.environments.gym_environment import Atari, atari_deterministic_v4, AtariInputFilter
|
from rl_coach.environments.gym_environment import Atari, atari_deterministic_v4, AtariInputFilter
|
||||||
|
from rl_coach.exploration_policies.categorical import CategoricalParameters
|
||||||
from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
|
from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
|
||||||
from rl_coach.graph_managers.graph_manager import ScheduleParameters
|
from rl_coach.graph_managers.graph_manager import ScheduleParameters
|
||||||
|
|
||||||
from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase
|
|
||||||
from rl_coach.exploration_policies.categorical import CategoricalParameters
|
|
||||||
|
|
||||||
####################
|
####################
|
||||||
# Graph Scheduling #
|
# Graph Scheduling #
|
||||||
####################
|
####################
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user