mirror of
https://github.com/gryf/coach.git
synced 2026-02-20 00:35:56 +01:00
removing datasets + imports optimization
This commit is contained in:
@@ -18,18 +18,17 @@ from typing import Union
|
||||
|
||||
import numpy as np
|
||||
import scipy.signal
|
||||
|
||||
from rl_coach.agents.policy_optimization_agent import PolicyOptimizationAgent, PolicyGradientRescaler
|
||||
from rl_coach.architectures.tensorflow_components.heads.policy_head import PolicyHeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.heads.v_head import VHeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
||||
from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, \
|
||||
AgentParameters, InputEmbedderParameters
|
||||
from rl_coach.core_types import QActionStateValue
|
||||
from rl_coach.spaces import DiscreteActionSpace
|
||||
from rl_coach.utils import last_sample
|
||||
|
||||
from rl_coach.logger import screen
|
||||
from rl_coach.memories.episodic.single_episode_buffer import SingleEpisodeBufferParameters
|
||||
from rl_coach.spaces import DiscreteActionSpace
|
||||
from rl_coach.utils import last_sample
|
||||
|
||||
|
||||
class ActorCriticAlgorithmParameters(AlgorithmParameters):
|
||||
|
||||
@@ -20,20 +20,19 @@ from collections import OrderedDict
|
||||
from typing import Dict, List, Union, Tuple
|
||||
|
||||
import numpy as np
|
||||
from pandas import read_pickle
|
||||
from six.moves import range
|
||||
|
||||
from rl_coach.agents.agent_interface import AgentInterface
|
||||
from rl_coach.architectures.network_wrapper import NetworkWrapper
|
||||
from rl_coach.base_parameters import AgentParameters, DistributedTaskParameters
|
||||
from rl_coach.core_types import RunPhase, PredictionType, EnvironmentEpisodes, ActionType, Batch, Episode, StateType
|
||||
from rl_coach.core_types import Transition, ActionInfo, TrainingSteps, EnvironmentSteps, EnvResponse
|
||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplay
|
||||
from pandas import read_pickle
|
||||
from six.moves import range
|
||||
from rl_coach.spaces import SpacesDefinition, VectorObservationSpace, GoalsSpace, AttentionActionSpace
|
||||
from rl_coach.utils import Signal, force_list, set_cpu
|
||||
from rl_coach.utils import dynamic_import_and_instantiate_module_from_params
|
||||
|
||||
from rl_coach.architectures.network_wrapper import NetworkWrapper
|
||||
from rl_coach.logger import screen, Logger, EpisodeLogger
|
||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplay
|
||||
from rl_coach.spaces import SpacesDefinition, VectorObservationSpace, GoalsSpace, AttentionActionSpace
|
||||
from rl_coach.utils import Signal, force_list
|
||||
from rl_coach.utils import dynamic_import_and_instantiate_module_from_params
|
||||
|
||||
|
||||
class Agent(AgentInterface):
|
||||
|
||||
@@ -17,14 +17,14 @@
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.agents.imitation_agent import ImitationAgent
|
||||
from rl_coach.architectures.tensorflow_components.heads.policy_head import PolicyHeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
||||
|
||||
from rl_coach.base_parameters import AgentParameters, AlgorithmParameters, NetworkParameters, InputEmbedderParameters, \
|
||||
MiddlewareScheme
|
||||
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
||||
|
||||
|
||||
class BCAlgorithmParameters(AlgorithmParameters):
|
||||
|
||||
@@ -17,9 +17,9 @@
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.agents.dqn_agent import DQNAgentParameters, DQNNetworkParameters
|
||||
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
||||
|
||||
from rl_coach.exploration_policies.bootstrapped import BootstrappedParameters
|
||||
|
||||
|
||||
|
||||
@@ -17,15 +17,15 @@
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.agents.dqn_agent import DQNNetworkParameters, DQNAlgorithmParameters
|
||||
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
||||
from rl_coach.architectures.tensorflow_components.heads.categorical_q_head import CategoricalQHeadParameters
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
from rl_coach.memories.non_episodic.experience_replay import ExperienceReplayParameters
|
||||
from rl_coach.schedules import LinearSchedule
|
||||
|
||||
from rl_coach.core_types import StateType
|
||||
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
||||
from rl_coach.memories.non_episodic.experience_replay import ExperienceReplayParameters
|
||||
from rl_coach.schedules import LinearSchedule
|
||||
|
||||
|
||||
class CategoricalDQNNetworkParameters(DQNNetworkParameters):
|
||||
|
||||
@@ -20,21 +20,21 @@ from random import shuffle
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.agents.actor_critic_agent import ActorCriticAgent
|
||||
from rl_coach.agents.policy_optimization_agent import PolicyGradientRescaler
|
||||
from rl_coach.architectures.tensorflow_components.heads.ppo_head import PPOHeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.heads.v_head import VHeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
||||
from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, \
|
||||
AgentParameters, InputEmbedderParameters
|
||||
from rl_coach.core_types import EnvironmentSteps, Batch, EnvResponse, StateType
|
||||
from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters
|
||||
from rl_coach.logger import screen
|
||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
||||
from rl_coach.schedules import ConstantSchedule
|
||||
from rl_coach.spaces import DiscreteActionSpace
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.heads.ppo_head import PPOHeadParameters
|
||||
from rl_coach.logger import screen
|
||||
|
||||
|
||||
class ClippedPPONetworkParameters(NetworkParameters):
|
||||
def __init__(self):
|
||||
|
||||
@@ -20,15 +20,14 @@ from enum import Enum
|
||||
from typing import Union, List, Dict
|
||||
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.agents.agent_interface import AgentInterface
|
||||
from rl_coach.base_parameters import AgentParameters, VisualizationParameters
|
||||
# from rl_coach.environments.environment_interface import ActionSpace
|
||||
from rl_coach.spaces import ActionSpace
|
||||
from rl_coach.spaces import AgentSelection, AttentionActionSpace, ObservationSpace, SpacesDefinition
|
||||
from rl_coach.utils import short_dynamic_import
|
||||
|
||||
from rl_coach.core_types import ActionInfo, EnvResponse, ActionType, RunPhase
|
||||
from rl_coach.filters.observation.observation_crop_filter import ObservationCropFilter
|
||||
from rl_coach.spaces import ActionSpace
|
||||
from rl_coach.spaces import AgentSelection, AttentionActionSpace, SpacesDefinition
|
||||
from rl_coach.utils import short_dynamic_import
|
||||
|
||||
|
||||
class DecisionPolicy(object):
|
||||
|
||||
@@ -19,19 +19,19 @@ from typing import Union
|
||||
from collections import OrderedDict
|
||||
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.agents.actor_critic_agent import ActorCriticAgent
|
||||
from rl_coach.agents.agent import Agent
|
||||
from rl_coach.architectures.tensorflow_components.heads.ddpg_actor_head import DDPGActorHeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.heads.v_head import VHeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
||||
from rl_coach.base_parameters import NetworkParameters, AlgorithmParameters, \
|
||||
AgentParameters, InputEmbedderParameters, EmbedderScheme
|
||||
from rl_coach.core_types import ActionInfo, EnvironmentSteps
|
||||
from rl_coach.exploration_policies.ou_process import OUProcessParameters
|
||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
||||
from rl_coach.spaces import BoxActionSpace, GoalsSpace
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.heads.ddpg_actor_head import DDPGActorHeadParameters
|
||||
from rl_coach.core_types import ActionInfo, EnvironmentSteps
|
||||
|
||||
|
||||
class DDPGCriticNetworkParameters(NetworkParameters):
|
||||
def __init__(self):
|
||||
|
||||
@@ -17,11 +17,11 @@
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
from rl_coach.schedules import LinearSchedule
|
||||
|
||||
from rl_coach.agents.dqn_agent import DQNAgentParameters
|
||||
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
||||
from rl_coach.core_types import EnvironmentSteps
|
||||
from rl_coach.schedules import LinearSchedule
|
||||
|
||||
|
||||
class DDQNAgentParameters(DQNAgentParameters):
|
||||
|
||||
@@ -19,19 +19,20 @@ from enum import Enum
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.agents.agent import Agent
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Conv2d, Dense
|
||||
from rl_coach.architectures.tensorflow_components.heads.measurements_prediction_head import MeasurementsPredictionHeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.heads.measurements_prediction_head import \
|
||||
MeasurementsPredictionHeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
||||
from rl_coach.base_parameters import AlgorithmParameters, AgentParameters, NetworkParameters, \
|
||||
InputEmbedderParameters, MiddlewareScheme
|
||||
from rl_coach.core_types import ActionInfo, EnvironmentSteps, RunPhase
|
||||
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
||||
from rl_coach.memories.memory import MemoryGranularity
|
||||
from rl_coach.spaces import SpacesDefinition, VectorObservationSpace
|
||||
|
||||
from rl_coach.core_types import ActionInfo, EnvironmentSteps, RunPhase
|
||||
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
||||
|
||||
|
||||
class HandlingTargetsAfterEpisodeEnd(Enum):
|
||||
LastStep = 0
|
||||
|
||||
@@ -17,16 +17,16 @@
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
||||
from rl_coach.architectures.tensorflow_components.heads.q_head import QHeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
||||
from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, AgentParameters, \
|
||||
InputEmbedderParameters, MiddlewareScheme
|
||||
from rl_coach.memories.non_episodic.experience_replay import ExperienceReplayParameters
|
||||
from rl_coach.schedules import LinearSchedule
|
||||
|
||||
from rl_coach.core_types import EnvironmentSteps
|
||||
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
||||
from rl_coach.memories.non_episodic.experience_replay import ExperienceReplayParameters
|
||||
from rl_coach.schedules import LinearSchedule
|
||||
|
||||
|
||||
class DQNAlgorithmParameters(AlgorithmParameters):
|
||||
|
||||
@@ -17,7 +17,6 @@
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
import copy
|
||||
|
||||
from rl_coach.agents.ddpg_agent import DDPGAgent, DDPGAgentParameters, DDPGAlgorithmParameters
|
||||
from rl_coach.core_types import RunPhase
|
||||
|
||||
@@ -19,6 +19,8 @@ from collections import OrderedDict
|
||||
from typing import Union
|
||||
|
||||
import pygame
|
||||
from pandas import to_pickle
|
||||
|
||||
from rl_coach.agents.agent import Agent
|
||||
from rl_coach.agents.bc_agent import BCNetworkParameters
|
||||
from rl_coach.architectures.tensorflow_components.heads.policy_head import PolicyHeadParameters
|
||||
@@ -26,11 +28,9 @@ from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware impo
|
||||
from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, InputEmbedderParameters, EmbedderScheme, \
|
||||
AgentParameters
|
||||
from rl_coach.core_types import ActionInfo
|
||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
||||
from pandas import to_pickle
|
||||
|
||||
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
||||
from rl_coach.logger import screen
|
||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
||||
|
||||
|
||||
class HumanAlgorithmParameters(AlgorithmParameters):
|
||||
|
||||
@@ -17,11 +17,10 @@
|
||||
from collections import OrderedDict
|
||||
from typing import Union
|
||||
|
||||
from rl_coach.core_types import RunPhase, ActionInfo
|
||||
from rl_coach.spaces import DiscreteActionSpace
|
||||
|
||||
from rl_coach.agents.agent import Agent
|
||||
from rl_coach.core_types import RunPhase, ActionInfo
|
||||
from rl_coach.logger import screen
|
||||
from rl_coach.spaces import DiscreteActionSpace
|
||||
|
||||
|
||||
## This is an abstract agent - there is no learn_from_batch method ##
|
||||
|
||||
@@ -17,17 +17,17 @@
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.agents.policy_optimization_agent import PolicyOptimizationAgent
|
||||
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
||||
from rl_coach.architectures.tensorflow_components.heads.q_head import QHeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
||||
from rl_coach.base_parameters import AlgorithmParameters, AgentParameters, NetworkParameters, \
|
||||
InputEmbedderParameters
|
||||
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
||||
from rl_coach.utils import last_sample
|
||||
|
||||
from rl_coach.core_types import EnvironmentSteps
|
||||
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
||||
from rl_coach.memories.episodic.single_episode_buffer import SingleEpisodeBufferParameters
|
||||
from rl_coach.utils import last_sample
|
||||
|
||||
|
||||
class NStepQNetworkParameters(NetworkParameters):
|
||||
|
||||
@@ -17,16 +17,16 @@
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
||||
from rl_coach.architectures.tensorflow_components.heads.naf_head import NAFHeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
||||
from rl_coach.base_parameters import AlgorithmParameters, AgentParameters, \
|
||||
NetworkParameters, InputEmbedderParameters
|
||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
||||
from rl_coach.spaces import BoxActionSpace
|
||||
|
||||
from rl_coach.core_types import ActionInfo, EnvironmentSteps
|
||||
from rl_coach.exploration_policies.ou_process import OUProcessParameters
|
||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
||||
from rl_coach.spaces import BoxActionSpace
|
||||
|
||||
|
||||
class NAFNetworkParameters(NetworkParameters):
|
||||
|
||||
@@ -19,17 +19,17 @@ import pickle
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
||||
from rl_coach.architectures.tensorflow_components.heads.dnd_q_head import DNDQHeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
||||
from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, AgentParameters, \
|
||||
InputEmbedderParameters
|
||||
from rl_coach.core_types import RunPhase, EnvironmentSteps, Episode, StateType
|
||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters, MemoryGranularity
|
||||
from rl_coach.schedules import ConstantSchedule
|
||||
|
||||
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
||||
from rl_coach.logger import screen
|
||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters, MemoryGranularity
|
||||
from rl_coach.schedules import ConstantSchedule
|
||||
|
||||
|
||||
class NECNetworkParameters(NetworkParameters):
|
||||
|
||||
@@ -20,8 +20,7 @@ import numpy as np
|
||||
|
||||
from rl_coach.agents.dqn_agent import DQNAgentParameters, DQNAlgorithmParameters
|
||||
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplay, \
|
||||
EpisodicExperienceReplayParameters
|
||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
||||
|
||||
|
||||
class PALAlgorithmParameters(DQNAlgorithmParameters):
|
||||
|
||||
@@ -17,16 +17,16 @@
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.agents.policy_optimization_agent import PolicyOptimizationAgent, PolicyGradientRescaler
|
||||
from rl_coach.architectures.tensorflow_components.heads.policy_head import PolicyHeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
||||
from rl_coach.base_parameters import NetworkParameters, AlgorithmParameters, \
|
||||
AgentParameters, InputEmbedderParameters
|
||||
from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters
|
||||
from rl_coach.spaces import DiscreteActionSpace
|
||||
|
||||
from rl_coach.logger import screen
|
||||
from rl_coach.memories.episodic.single_episode_buffer import SingleEpisodeBufferParameters
|
||||
from rl_coach.spaces import DiscreteActionSpace
|
||||
|
||||
|
||||
class PolicyGradientNetworkParameters(NetworkParameters):
|
||||
|
||||
@@ -19,12 +19,12 @@ from enum import Enum
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
from rl_coach.core_types import Batch, ActionInfo
|
||||
from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace
|
||||
from rl_coach.utils import eps
|
||||
|
||||
from rl_coach.agents.agent import Agent
|
||||
from rl_coach.core_types import Batch, ActionInfo
|
||||
from rl_coach.logger import screen
|
||||
from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace
|
||||
from rl_coach.utils import eps
|
||||
|
||||
|
||||
class PolicyGradientRescaler(Enum):
|
||||
|
||||
@@ -19,21 +19,21 @@ from collections import OrderedDict
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.agents.actor_critic_agent import ActorCriticAgent
|
||||
from rl_coach.agents.policy_optimization_agent import PolicyGradientRescaler
|
||||
from rl_coach.architectures.tensorflow_components.heads.ppo_head import PPOHeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.heads.v_head import VHeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters
|
||||
from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, \
|
||||
AgentParameters, InputEmbedderParameters, DistributedTaskParameters
|
||||
from rl_coach.core_types import EnvironmentSteps, Batch
|
||||
from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters
|
||||
from rl_coach.logger import screen
|
||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
||||
from rl_coach.spaces import DiscreteActionSpace
|
||||
from rl_coach.utils import force_list
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.heads.ppo_head import PPOHeadParameters
|
||||
from rl_coach.logger import screen
|
||||
|
||||
|
||||
class PPOCriticNetworkParameters(NetworkParameters):
|
||||
def __init__(self):
|
||||
|
||||
@@ -17,12 +17,13 @@
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
from rl_coach.architectures.tensorflow_components.heads.quantile_regression_q_head import QuantileRegressionQHeadParameters
|
||||
from rl_coach.schedules import LinearSchedule
|
||||
|
||||
from rl_coach.agents.dqn_agent import DQNAgentParameters, DQNNetworkParameters, DQNAlgorithmParameters
|
||||
from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
|
||||
from rl_coach.architectures.tensorflow_components.heads.quantile_regression_q_head import \
|
||||
QuantileRegressionQHeadParameters
|
||||
from rl_coach.core_types import StateType
|
||||
from rl_coach.schedules import LinearSchedule
|
||||
|
||||
|
||||
class QuantileRegressionDQNNetworkParameters(DQNNetworkParameters):
|
||||
|
||||
@@ -17,11 +17,11 @@
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
from rl_coach.memories.non_episodic.prioritized_experience_replay import PrioritizedExperienceReplay
|
||||
from rl_coach.spaces import DiscreteActionSpace
|
||||
|
||||
from rl_coach.agents.agent import Agent
|
||||
from rl_coach.core_types import ActionInfo, StateType
|
||||
from rl_coach.memories.non_episodic.prioritized_experience_replay import PrioritizedExperienceReplay
|
||||
from rl_coach.spaces import DiscreteActionSpace
|
||||
|
||||
|
||||
## This is an abstract agent - there is no learn_from_batch method ##
|
||||
|
||||
Reference in New Issue
Block a user