Fix for issue #128 - circular DQN import (#130)

2026-02-16 05:55:46 +01:00 · 2018-12-16 16:06:44 +02:00
parent e08accdc22
commit f9ee526536
4 changed files with 7 additions and 2 deletions
--- a/rl_coach/agents/dqn_agent.py
+++ b/rl_coach/agents/dqn_agent.py
@@ -36,6 +36,7 @@ class DQNAlgorithmParameters(AlgorithmParameters):
        self.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(10000)
        self.num_consecutive_playing_steps = EnvironmentSteps(4)
        self.discount = 0.99
        self.supports_parameter_noise = True
 class DQNNetworkParameters(NetworkParameters):
--- a/rl_coach/base_parameters.py
+++ b/rl_coach/base_parameters.py
@@ -211,6 +211,9 @@ class AlgorithmParameters(Parameters):
        # Should the workers wait for full episode
        self.act_for_full_episodes = False
        # Support for parameter noise
        self.supports_parameter_noise = False
 class PresetValidationParameters(Parameters):
    def __init__(self,
--- a/rl_coach/exploration_policies/parameter_noise.py
+++ b/rl_coach/exploration_policies/parameter_noise.py
@@ -18,7 +18,6 @@ from typing import List, Dict
 import numpy as np
 from rl_coach.agents.dqn_agent import DQNAgentParameters
 from rl_coach.architectures.layers import NoisyNetDense
 from rl_coach.base_parameters import AgentParameters, NetworkParameters
 from rl_coach.spaces import ActionSpace, BoxActionSpace, DiscreteActionSpace
@@ -30,7 +29,8 @@ from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy,
 class ParameterNoiseParameters(ExplorationParameters):
    def __init__(self, agent_params: AgentParameters):
        super().__init__()
-        if not isinstance(agent_params, DQNAgentParameters):
+
        if not agent_params.algorithm.supports_parameter_noise:
            raise ValueError("Currently only DQN variants are supported for using an exploration type of "
                             "ParameterNoise.")
--- a/rl_coach/filters/observation/observation_normalization_filter.py
+++ b/rl_coach/filters/observation/observation_normalization_filter.py
@@ -87,3 +87,4 @@ class ObservationNormalizationFilter(ObservationFilter):
    def restore_state_from_checkpoint(self, checkpoint_dir: str, checkpoint_prefix: str):
        self.running_observation_stats.restore_state_from_checkpoint(checkpoint_dir, checkpoint_prefix)
`@@ -87,3 +87,4 @@ class ObservationNormalizationFilter(ObservationFilter):`

	`def restore_state_from_checkpoint(self, checkpoint_dir: str, checkpoint_prefix: str):`	`def restore_state_from_checkpoint(self, checkpoint_dir: str, checkpoint_prefix: str):`
	`self.running_observation_stats.restore_state_from_checkpoint(checkpoint_dir, checkpoint_prefix)`	`self.running_observation_stats.restore_state_from_checkpoint(checkpoint_dir, checkpoint_prefix)`