mirror of
https://github.com/gryf/coach.git
synced 2026-02-15 05:25:55 +01:00
Cleanup imports.
Till now, most of the modules were importing all of the module objects (variables, classes, functions, other imports) into module namespace, which potentially could (and was) cause of unintentional use of class or methods, which was indirect imported. With this patch, all the star imports were substituted with top-level module, which provides desired class or function. Besides, all imports where sorted (where possible) in a way pep8[1] suggests - first are imports from standard library, than goes third party imports (like numpy, tensorflow etc) and finally coach modules. All of those sections are separated by one empty line. [1] https://www.python.org/dev/peps/pep-0008/#imports
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -13,16 +13,29 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from exploration_policies.additive_noise import AdditiveNoise
|
||||
from exploration_policies.approximated_thompson_sampling_using_dropout import ApproximatedThompsonSamplingUsingDropout
|
||||
from exploration_policies.bayesian import Bayesian
|
||||
from exploration_policies.boltzmann import Boltzmann
|
||||
from exploration_policies.bootstrapped import Bootstrapped
|
||||
from exploration_policies.categorical import Categorical
|
||||
from exploration_policies.continuous_entropy import ContinuousEntropy
|
||||
from exploration_policies.e_greedy import EGreedy
|
||||
from exploration_policies.exploration_policy import ExplorationPolicy
|
||||
from exploration_policies.greedy import Greedy
|
||||
from exploration_policies.ou_process import OUProcess
|
||||
from exploration_policies.thompson_sampling import ThompsonSampling
|
||||
|
||||
from exploration_policies.additive_noise import *
|
||||
from exploration_policies.approximated_thompson_sampling_using_dropout import *
|
||||
from exploration_policies.bayesian import *
|
||||
from exploration_policies.boltzmann import *
|
||||
from exploration_policies.bootstrapped import *
|
||||
from exploration_policies.categorical import *
|
||||
from exploration_policies.continuous_entropy import *
|
||||
from exploration_policies.e_greedy import *
|
||||
from exploration_policies.exploration_policy import *
|
||||
from exploration_policies.greedy import *
|
||||
from exploration_policies.ou_process import *
|
||||
from exploration_policies.thompson_sampling import *
|
||||
|
||||
__all__ = [AdditiveNoise,
|
||||
ApproximatedThompsonSamplingUsingDropout,
|
||||
Bayesian,
|
||||
Boltzmann,
|
||||
Bootstrapped,
|
||||
Categorical,
|
||||
ContinuousEntropy,
|
||||
EGreedy,
|
||||
ExplorationPolicy,
|
||||
Greedy,
|
||||
OUProcess,
|
||||
ThompsonSampling]
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -13,18 +13,19 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import numpy as np
|
||||
from exploration_policies.exploration_policy import *
|
||||
|
||||
from exploration_policies import exploration_policy
|
||||
import utils
|
||||
|
||||
|
||||
class AdditiveNoise(ExplorationPolicy):
|
||||
class AdditiveNoise(exploration_policy.ExplorationPolicy):
|
||||
def __init__(self, tuning_parameters):
|
||||
"""
|
||||
:param tuning_parameters: A Preset class instance with all the running paramaters
|
||||
:type tuning_parameters: Preset
|
||||
"""
|
||||
ExplorationPolicy.__init__(self, tuning_parameters)
|
||||
exploration_policy.ExplorationPolicy.__init__(self, tuning_parameters)
|
||||
self.variance = tuning_parameters.exploration.initial_noise_variance_percentage
|
||||
self.final_variance = tuning_parameters.exploration.final_noise_variance_percentage
|
||||
self.decay_steps = tuning_parameters.exploration.noise_variance_decay_steps
|
||||
@@ -37,7 +38,7 @@ class AdditiveNoise(ExplorationPolicy):
|
||||
self.variance = self.final_variance
|
||||
|
||||
def get_action(self, action_values):
|
||||
if self.phase == RunPhase.TRAIN:
|
||||
if self.phase == utils.RunPhase.TRAIN:
|
||||
self.decay_exploration()
|
||||
action = np.random.normal(action_values, 2 * self.variance * self.action_abs_range)
|
||||
return action #np.clip(action, -self.action_abs_range, self.action_abs_range).squeeze()
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -13,17 +13,18 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import numpy as np
|
||||
|
||||
from exploration_policies.exploration_policy import *
|
||||
from exploration_policies import exploration_policy
|
||||
|
||||
|
||||
class ApproximatedThompsonSamplingUsingDropout(ExplorationPolicy):
|
||||
class ApproximatedThompsonSamplingUsingDropout(exploration_policy.ExplorationPolicy):
|
||||
def __init__(self, tuning_parameters):
|
||||
"""
|
||||
:param tuning_parameters: A Preset class instance with all the running paramaters
|
||||
:type tuning_parameters: Preset
|
||||
"""
|
||||
ExplorationPolicy.__init__(self, tuning_parameters)
|
||||
exploration_policy.ExplorationPolicy.__init__(self, tuning_parameters)
|
||||
self.dropout_discard_probability = tuning_parameters.exploration.dropout_discard_probability
|
||||
self.network = tuning_parameters.network
|
||||
self.assign_op = self.network.dropout_discard_probability.assign(self.dropout_discard_probability)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -13,18 +13,19 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import numpy as np
|
||||
|
||||
from exploration_policies.exploration_policy import *
|
||||
import tensorflow as tf
|
||||
from exploration_policies import exploration_policy
|
||||
import utils
|
||||
|
||||
|
||||
class Bayesian(ExplorationPolicy):
|
||||
class Bayesian(exploration_policy.ExplorationPolicy):
|
||||
def __init__(self, tuning_parameters):
|
||||
"""
|
||||
:param tuning_parameters: A Preset class instance with all the running paramaters
|
||||
:type tuning_parameters: Preset
|
||||
"""
|
||||
ExplorationPolicy.__init__(self, tuning_parameters)
|
||||
exploration_policy.ExplorationPolicy.__init__(self, tuning_parameters)
|
||||
self.keep_probability = tuning_parameters.exploration.initial_keep_probability
|
||||
self.final_keep_probability = tuning_parameters.exploration.final_keep_probability
|
||||
self.keep_probability_decay_delta = (
|
||||
@@ -40,7 +41,7 @@ class Bayesian(ExplorationPolicy):
|
||||
self.keep_probability -= self.keep_probability_decay_delta
|
||||
|
||||
def get_action(self, action_values):
|
||||
if self.phase == RunPhase.TRAIN:
|
||||
if self.phase == utils.RunPhase.TRAIN:
|
||||
self.decay_keep_probability()
|
||||
# dropout = self.network.get_layer('variable_dropout_1')
|
||||
# with tf.Session() as sess:
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -13,17 +13,18 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import numpy as np
|
||||
|
||||
from exploration_policies.exploration_policy import *
|
||||
from exploration_policies import exploration_policy
|
||||
import utils
|
||||
|
||||
|
||||
class Boltzmann(ExplorationPolicy):
|
||||
class Boltzmann(exploration_policy.ExplorationPolicy):
|
||||
def __init__(self, tuning_parameters):
|
||||
"""
|
||||
:param tuning_parameters: A Preset class instance with all the running paramaters
|
||||
:type tuning_parameters: Preset
|
||||
"""
|
||||
ExplorationPolicy.__init__(self, tuning_parameters)
|
||||
exploration_policy.ExplorationPolicy.__init__(self, tuning_parameters)
|
||||
self.temperature = tuning_parameters.exploration.initial_temperature
|
||||
self.final_temperature = tuning_parameters.exploration.final_temperature
|
||||
self.temperature_decay_delta = (
|
||||
@@ -35,7 +36,7 @@ class Boltzmann(ExplorationPolicy):
|
||||
self.temperature -= self.temperature_decay_delta
|
||||
|
||||
def get_action(self, action_values):
|
||||
if self.phase == RunPhase.TRAIN:
|
||||
if self.phase == utils.RunPhase.TRAIN:
|
||||
self.decay_temperature()
|
||||
# softmax calculation
|
||||
exp_probabilities = np.exp(action_values / self.temperature)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -13,17 +13,18 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import numpy as np
|
||||
|
||||
from exploration_policies.e_greedy import *
|
||||
from exploration_policies import e_greedy
|
||||
|
||||
|
||||
class Bootstrapped(EGreedy):
|
||||
class Bootstrapped(e_greedy.EGreedy):
|
||||
def __init__(self, tuning_parameters):
|
||||
"""
|
||||
:param tuning_parameters: A Preset class instance with all the running parameters
|
||||
:type tuning_parameters: Preset
|
||||
"""
|
||||
EGreedy.__init__(self, tuning_parameters)
|
||||
e_greedy.EGreedy.__init__(self, tuning_parameters)
|
||||
self.num_heads = tuning_parameters.exploration.architecture_num_q_heads
|
||||
self.selected_head = 0
|
||||
|
||||
@@ -31,7 +32,7 @@ class Bootstrapped(EGreedy):
|
||||
self.selected_head = np.random.randint(self.num_heads)
|
||||
|
||||
def get_action(self, action_values):
|
||||
return EGreedy.get_action(self, action_values[self.selected_head])
|
||||
return e_greedy.EGreedy.get_action(self, action_values[self.selected_head])
|
||||
|
||||
def get_control_param(self):
|
||||
return self.selected_head
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -13,17 +13,18 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import numpy as np
|
||||
|
||||
from exploration_policies.exploration_policy import *
|
||||
from exploration_policies import exploration_policy
|
||||
|
||||
|
||||
class Categorical(ExplorationPolicy):
|
||||
class Categorical(exploration_policy.ExplorationPolicy):
|
||||
def __init__(self, tuning_parameters):
|
||||
"""
|
||||
:param tuning_parameters: A Preset class instance with all the running paramaters
|
||||
:type tuning_parameters: Preset
|
||||
"""
|
||||
ExplorationPolicy.__init__(self, tuning_parameters)
|
||||
exploration_policy.ExplorationPolicy.__init__(self, tuning_parameters)
|
||||
|
||||
def get_action(self, action_values):
|
||||
# choose actions according to the probabilities
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -13,10 +13,8 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import numpy as np
|
||||
from exploration_policies.exploration_policy import *
|
||||
from exploration_policies import exploration_policy
|
||||
|
||||
|
||||
class ContinuousEntropy(ExplorationPolicy):
|
||||
class ContinuousEntropy(exploration_policy.ExplorationPolicy):
|
||||
pass
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -13,17 +13,19 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import numpy as np
|
||||
|
||||
from exploration_policies.exploration_policy import *
|
||||
from exploration_policies import exploration_policy
|
||||
import utils
|
||||
|
||||
|
||||
class EGreedy(ExplorationPolicy):
|
||||
class EGreedy(exploration_policy.ExplorationPolicy):
|
||||
def __init__(self, tuning_parameters):
|
||||
"""
|
||||
:param tuning_parameters: A Preset class instance with all the running paramaters
|
||||
:type tuning_parameters: Preset
|
||||
"""
|
||||
ExplorationPolicy.__init__(self, tuning_parameters)
|
||||
exploration_policy.ExplorationPolicy.__init__(self, tuning_parameters)
|
||||
self.epsilon = tuning_parameters.exploration.initial_epsilon
|
||||
self.final_epsilon = tuning_parameters.exploration.final_epsilon
|
||||
self.epsilon_decay_delta = (
|
||||
@@ -52,9 +54,9 @@ class EGreedy(ExplorationPolicy):
|
||||
self.variance = self.final_variance
|
||||
|
||||
def get_action(self, action_values):
|
||||
if self.phase == RunPhase.TRAIN:
|
||||
if self.phase == utils.RunPhase.TRAIN:
|
||||
self.decay_exploration()
|
||||
epsilon = self.evaluation_epsilon if self.phase == RunPhase.TEST else self.epsilon
|
||||
epsilon = self.evaluation_epsilon if self.phase == utils.RunPhase.TEST else self.epsilon
|
||||
|
||||
if self.discrete_controls:
|
||||
top_action = np.argmax(action_values)
|
||||
@@ -67,4 +69,4 @@ class EGreedy(ExplorationPolicy):
|
||||
return np.squeeze(action_values + (np.random.rand() < epsilon) * noise)
|
||||
|
||||
def get_control_param(self):
|
||||
return self.evaluation_epsilon if self.phase == RunPhase.TEST else self.epsilon
|
||||
return self.evaluation_epsilon if self.phase == utils.RunPhase.TEST else self.epsilon
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -13,10 +13,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import numpy as np
|
||||
from utils import *
|
||||
from configurations import *
|
||||
import utils
|
||||
|
||||
|
||||
class ExplorationPolicy(object):
|
||||
@@ -25,7 +22,7 @@ class ExplorationPolicy(object):
|
||||
:param tuning_parameters: A Preset class instance with all the running paramaters
|
||||
:type tuning_parameters: Preset
|
||||
"""
|
||||
self.phase = RunPhase.HEATUP
|
||||
self.phase = utils.RunPhase.HEATUP
|
||||
self.action_space_size = tuning_parameters.env.action_space_size
|
||||
self.action_abs_range = tuning_parameters.env_instance.action_space_abs_range
|
||||
self.discrete_controls = tuning_parameters.env_instance.discrete_controls
|
||||
@@ -39,7 +36,7 @@ class ExplorationPolicy(object):
|
||||
|
||||
def get_action(self, action_values):
|
||||
"""
|
||||
Given a list of values corresponding to each action,
|
||||
Given a list of values corresponding to each action,
|
||||
choose one actions according to the exploration policy
|
||||
:param action_values: A list of action values
|
||||
:return: The chosen action
|
||||
@@ -55,4 +52,4 @@ class ExplorationPolicy(object):
|
||||
self.phase = phase
|
||||
|
||||
def get_control_param(self):
|
||||
return 0
|
||||
return 0
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -13,17 +13,18 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import numpy as np
|
||||
|
||||
from exploration_policies.exploration_policy import *
|
||||
from exploration_policies import exploration_policy
|
||||
|
||||
|
||||
class Greedy(ExplorationPolicy):
|
||||
class Greedy(exploration_policy.ExplorationPolicy):
|
||||
def __init__(self, tuning_parameters):
|
||||
"""
|
||||
:param tuning_parameters: A Preset class instance with all the running paramaters
|
||||
:type tuning_parameters: Preset
|
||||
"""
|
||||
ExplorationPolicy.__init__(self, tuning_parameters)
|
||||
exploration_policy.ExplorationPolicy.__init__(self, tuning_parameters)
|
||||
|
||||
def get_action(self, action_values):
|
||||
return np.argmax(action_values)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -13,21 +13,21 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import numpy as np
|
||||
from exploration_policies.exploration_policy import *
|
||||
|
||||
from exploration_policies import exploration_policy
|
||||
|
||||
# Based on on the description in:
|
||||
# https://math.stackexchange.com/questions/1287634/implementing-ornstein-uhlenbeck-in-matlab
|
||||
|
||||
# Ornstein-Uhlenbeck process
|
||||
class OUProcess(ExplorationPolicy):
|
||||
class OUProcess(exploration_policy.ExplorationPolicy):
|
||||
def __init__(self, tuning_parameters):
|
||||
"""
|
||||
:param tuning_parameters: A Preset class instance with all the running paramaters
|
||||
:type tuning_parameters: Preset
|
||||
"""
|
||||
ExplorationPolicy.__init__(self, tuning_parameters)
|
||||
exploration_policy.ExplorationPolicy.__init__(self, tuning_parameters)
|
||||
self.action_space_size = tuning_parameters.env.action_space_size
|
||||
self.mu = float(tuning_parameters.exploration.mu) * np.ones(self.action_space_size)
|
||||
self.theta = tuning_parameters.exploration.theta
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -13,17 +13,18 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import numpy as np
|
||||
|
||||
from exploration_policies.exploration_policy import *
|
||||
from exploration_policies import exploration_policy
|
||||
|
||||
|
||||
class ThompsonSampling(ExplorationPolicy):
|
||||
class ThompsonSampling(exploration_policy.ExplorationPolicy):
|
||||
def __init__(self, tuning_parameters):
|
||||
"""
|
||||
:param tuning_parameters: A Preset class instance with all the running paramaters
|
||||
:type tuning_parameters: Preset
|
||||
"""
|
||||
ExplorationPolicy.__init__(self, tuning_parameters)
|
||||
exploration_policy.ExplorationPolicy.__init__(self, tuning_parameters)
|
||||
self.action_space_size = tuning_parameters.env.action_space_size
|
||||
|
||||
def get_action(self, action_values):
|
||||
|
||||
Reference in New Issue
Block a user