1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-17 19:20:19 +01:00

Cleanup imports.

Till now, most of the modules were importing all of the module objects
(variables, classes, functions, other imports) into module namespace,
which potentially could (and was) cause of unintentional use of class or
methods, which was indirect imported.

With this patch, all the star imports were substituted with top-level
module, which provides desired class or function.

Besides, all imports where sorted (where possible) in a way pep8[1]
suggests - first are imports from standard library, than goes third
party imports (like numpy, tensorflow etc) and finally coach modules.
All of those sections are separated by one empty line.

[1] https://www.python.org/dev/peps/pep-0008/#imports
This commit is contained in:
Roman Dobosz
2018-04-12 19:46:32 +02:00
parent cafa152382
commit 1b095aeeca
75 changed files with 1169 additions and 1139 deletions

View File

@@ -13,26 +13,48 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
from agents.actor_critic_agent import ActorCriticAgent
from agents.agent import Agent
from agents.bc_agent import BCAgent
from agents.bootstrapped_dqn_agent import BootstrappedDQNAgent
from agents.categorical_dqn_agent import CategoricalDQNAgent
from agents.clipped_ppo_agent import ClippedPPOAgent
from agents.ddpg_agent import DDPGAgent
from agents.ddqn_agent import DDQNAgent
from agents.dfp_agent import DFPAgent
from agents.dqn_agent import DQNAgent
from agents.human_agent import HumanAgent
from agents.imitation_agent import ImitationAgent
from agents.mmc_agent import MixedMonteCarloAgent
from agents.n_step_q_agent import NStepQAgent
from agents.naf_agent import NAFAgent
from agents.nec_agent import NECAgent
from agents.pal_agent import PALAgent
from agents.policy_gradients_agent import PolicyGradientsAgent
from agents.policy_optimization_agent import PolicyOptimizationAgent
from agents.ppo_agent import PPOAgent
from agents.qr_dqn_agent import QuantileRegressionDQNAgent
from agents.value_optimization_agent import ValueOptimizationAgent
from agents.actor_critic_agent import * __all__ = [ActorCriticAgent,
from agents.agent import * Agent,
from agents.bc_agent import * BCAgent,
from agents.bootstrapped_dqn_agent import * BootstrappedDQNAgent,
from agents.clipped_ppo_agent import * CategoricalDQNAgent,
from agents.ddpg_agent import * ClippedPPOAgent,
from agents.ddqn_agent import * DDPGAgent,
from agents.dfp_agent import * DDQNAgent,
from agents.dqn_agent import * DFPAgent,
from agents.categorical_dqn_agent import * DQNAgent,
from agents.human_agent import * HumanAgent,
from agents.imitation_agent import * ImitationAgent,
from agents.mmc_agent import * MixedMonteCarloAgent,
from agents.n_step_q_agent import * NAFAgent,
from agents.naf_agent import * NECAgent,
from agents.nec_agent import * NStepQAgent,
from agents.pal_agent import * PALAgent,
from agents.policy_gradients_agent import * PPOAgent,
from agents.policy_optimization_agent import * PolicyGradientsAgent,
from agents.ppo_agent import * PolicyOptimizationAgent,
from agents.value_optimization_agent import * QuantileRegressionDQNAgent,
from agents.qr_dqn_agent import * ValueOptimizationAgent]

View File

@@ -13,23 +13,24 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import numpy as np
from scipy import signal
from agents.policy_optimization_agent import * from agents import policy_optimization_agent as poa
from logger import * import utils
from utils import * import logger
import scipy.signal
# Actor Critic - https://arxiv.org/abs/1602.01783 # Actor Critic - https://arxiv.org/abs/1602.01783
class ActorCriticAgent(PolicyOptimizationAgent): class ActorCriticAgent(poa.PolicyOptimizationAgent):
def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0, create_target_network = False): def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0, create_target_network = False):
PolicyOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id, create_target_network) poa.PolicyOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id, create_target_network)
self.last_gradient_update_step_idx = 0 self.last_gradient_update_step_idx = 0
self.action_advantages = Signal('Advantages') self.action_advantages = utils.Signal('Advantages')
self.state_values = Signal('Values') self.state_values = utils.Signal('Values')
self.unclipped_grads = Signal('Grads (unclipped)') self.unclipped_grads = utils.Signal('Grads (unclipped)')
self.value_loss = Signal('Value Loss') self.value_loss = utils.Signal('Value Loss')
self.policy_loss = Signal('Policy Loss') self.policy_loss = utils.Signal('Policy Loss')
self.signals.append(self.action_advantages) self.signals.append(self.action_advantages)
self.signals.append(self.state_values) self.signals.append(self.state_values)
self.signals.append(self.unclipped_grads) self.signals.append(self.unclipped_grads)
@@ -38,7 +39,7 @@ class ActorCriticAgent(PolicyOptimizationAgent):
# Discounting function used to calculate discounted returns. # Discounting function used to calculate discounted returns.
def discount(self, x, gamma): def discount(self, x, gamma):
return scipy.signal.lfilter([1], [1, -gamma], x[::-1], axis=0)[::-1] return signal.lfilter([1], [1, -gamma], x[::-1], axis=0)[::-1]
def get_general_advantage_estimation_values(self, rewards, values): def get_general_advantage_estimation_values(self, rewards, values):
# values contain n+1 elements (t ... t+n+1), rewards contain n elements (t ... t + n) # values contain n+1 elements (t ... t+n+1), rewards contain n elements (t ... t + n)
@@ -72,20 +73,20 @@ class ActorCriticAgent(PolicyOptimizationAgent):
# estimate the advantage function # estimate the advantage function
action_advantages = np.zeros((num_transitions, 1)) action_advantages = np.zeros((num_transitions, 1))
if self.policy_gradient_rescaler == PolicyGradientRescaler.A_VALUE: if self.policy_gradient_rescaler == poa.PolicyGradientRescaler.A_VALUE:
if game_overs[-1]: if game_overs[-1]:
R = 0 R = 0
else: else:
R = self.main_network.online_network.predict(last_sample(next_states))[0] R = self.main_network.online_network.predict(utils.last_sample(next_states))[0]
for i in reversed(range(num_transitions)): for i in reversed(range(num_transitions)):
R = rewards[i] + self.tp.agent.discount * R R = rewards[i] + self.tp.agent.discount * R
state_value_head_targets[i] = R state_value_head_targets[i] = R
action_advantages[i] = R - current_state_values[i] action_advantages[i] = R - current_state_values[i]
elif self.policy_gradient_rescaler == PolicyGradientRescaler.GAE: elif self.policy_gradient_rescaler == poa.PolicyGradientRescaler.GAE:
# get bootstraps # get bootstraps
bootstrapped_value = self.main_network.online_network.predict(last_sample(next_states))[0] bootstrapped_value = self.main_network.online_network.predict(utils.last_sample(next_states))[0]
values = np.append(current_state_values, bootstrapped_value) values = np.append(current_state_values, bootstrapped_value)
if game_overs[-1]: if game_overs[-1]:
values[-1] = 0 values[-1] = 0
@@ -94,7 +95,7 @@ class ActorCriticAgent(PolicyOptimizationAgent):
gae_values, state_value_head_targets = self.get_general_advantage_estimation_values(rewards, values) gae_values, state_value_head_targets = self.get_general_advantage_estimation_values(rewards, values)
action_advantages = np.vstack(gae_values) action_advantages = np.vstack(gae_values)
else: else:
screen.warning("WARNING: The requested policy gradient rescaler is not available") logger.screen.warning("WARNING: The requested policy gradient rescaler is not available")
action_advantages = action_advantages.squeeze(axis=-1) action_advantages = action_advantages.squeeze(axis=-1)
if not self.env.discrete_controls and len(actions.shape) < 2: if not self.env.discrete_controls and len(actions.shape) < 2:
@@ -113,7 +114,7 @@ class ActorCriticAgent(PolicyOptimizationAgent):
return total_loss return total_loss
def choose_action(self, curr_state, phase=RunPhase.TRAIN): def choose_action(self, curr_state, phase=utils.RunPhase.TRAIN):
# TODO: rename curr_state -> state # TODO: rename curr_state -> state
# convert to batch so we can run it through the network # convert to batch so we can run it through the network
@@ -126,7 +127,7 @@ class ActorCriticAgent(PolicyOptimizationAgent):
# DISCRETE # DISCRETE
state_value, action_probabilities = self.main_network.online_network.predict(curr_state) state_value, action_probabilities = self.main_network.online_network.predict(curr_state)
action_probabilities = action_probabilities.squeeze() action_probabilities = action_probabilities.squeeze()
if phase == RunPhase.TRAIN: if phase == utils.RunPhase.TRAIN:
action = self.exploration_policy.get_action(action_probabilities) action = self.exploration_policy.get_action(action_probabilities)
else: else:
action = np.argmax(action_probabilities) action = np.argmax(action_probabilities)
@@ -137,7 +138,7 @@ class ActorCriticAgent(PolicyOptimizationAgent):
state_value, action_values_mean, action_values_std = self.main_network.online_network.predict(curr_state) state_value, action_values_mean, action_values_std = self.main_network.online_network.predict(curr_state)
action_values_mean = action_values_mean.squeeze() action_values_mean = action_values_mean.squeeze()
action_values_std = action_values_std.squeeze() action_values_std = action_values_std.squeeze()
if phase == RunPhase.TRAIN: if phase == utils.RunPhase.TRAIN:
action = np.squeeze(np.random.randn(1, self.action_space_size) * action_values_std + action_values_mean) action = np.squeeze(np.random.randn(1, self.action_space_size) * action_values_std + action_values_mean)
else: else:
action = action_values_mean action = action_values_mean

View File

@@ -13,32 +13,28 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import collections
import scipy.ndimage
try:
import matplotlib.pyplot as plt
except:
from logger import failed_imports
failed_imports.append("matplotlib")
import copy
from renderer import Renderer
from configurations import Preset
from collections import deque
from utils import LazyStack
from collections import OrderedDict
from utils import RunPhase, Signal, is_empty, RunningStat
from architectures import *
from exploration_policies import *
from memories import *
from memories.memory import *
from logger import logger, screen
import random import random
import time import time
import os
import itertools import logger
from architectures.tensorflow_components.shared_variables import SharedRunningStats try:
import matplotlib.pyplot as plt
except ImportError:
logger.failed_imports.append("matplotlib")
import numpy as np
from pandas.io import pickle
from six.moves import range from six.moves import range
import scipy
from architectures.tensorflow_components import shared_variables as sv
import configurations
import exploration_policies as ep
import memories
from memories import memory
import renderer
import utils
class Agent(object): class Agent(object):
@@ -54,7 +50,7 @@ class Agent(object):
:param thread_id: int :param thread_id: int
""" """
screen.log_title("Creating agent {}".format(task_id)) logger.screen.log_title("Creating agent {}".format(task_id))
self.task_id = task_id self.task_id = task_id
self.sess = tuning_parameters.sess self.sess = tuning_parameters.sess
self.env = tuning_parameters.env_instance = env self.env = tuning_parameters.env_instance = env
@@ -71,21 +67,20 @@ class Agent(object):
# modules # modules
if tuning_parameters.agent.load_memory_from_file_path: if tuning_parameters.agent.load_memory_from_file_path:
screen.log_title("Loading replay buffer from pickle. Pickle path: {}" logger.screen.log_title("Loading replay buffer from pickle. Pickle path: {}"
.format(tuning_parameters.agent.load_memory_from_file_path)) .format(tuning_parameters.agent.load_memory_from_file_path))
self.memory = read_pickle(tuning_parameters.agent.load_memory_from_file_path) self.memory = pickle.read_pickle(tuning_parameters.agent.load_memory_from_file_path)
else: else:
self.memory = eval(tuning_parameters.memory + '(tuning_parameters)') self.memory = eval('memories.' + tuning_parameters.memory + '(tuning_parameters)')
# self.architecture = eval(tuning_parameters.architecture)
self.has_global = replicated_device is not None self.has_global = replicated_device is not None
self.replicated_device = replicated_device self.replicated_device = replicated_device
self.worker_device = "/job:worker/task:{}/cpu:0".format(task_id) if replicated_device is not None else "/gpu:0" self.worker_device = "/job:worker/task:{}/cpu:0".format(task_id) if replicated_device is not None else "/gpu:0"
self.exploration_policy = eval(tuning_parameters.exploration.policy + '(tuning_parameters)') self.exploration_policy = eval('ep.' + tuning_parameters.exploration.policy + '(tuning_parameters)')
self.evaluation_exploration_policy = eval(tuning_parameters.exploration.evaluation_policy self.evaluation_exploration_policy = eval('ep.' + tuning_parameters.exploration.evaluation_policy
+ '(tuning_parameters)') + '(tuning_parameters)')
self.evaluation_exploration_policy.change_phase(RunPhase.TEST) self.evaluation_exploration_policy.change_phase(utils.RunPhase.TEST)
# initialize all internal variables # initialize all internal variables
self.tp = tuning_parameters self.tp = tuning_parameters
@@ -100,30 +95,30 @@ class Agent(object):
self.episode_running_info = {} self.episode_running_info = {}
self.last_episode_evaluation_ran = 0 self.last_episode_evaluation_ran = 0
self.running_observations = [] self.running_observations = []
logger.set_current_time(self.current_episode) logger.logger.set_current_time(self.current_episode)
self.main_network = None self.main_network = None
self.networks = [] self.networks = []
self.last_episode_images = [] self.last_episode_images = []
self.renderer = Renderer() self.renderer = renderer.Renderer()
# signals # signals
self.signals = [] self.signals = []
self.loss = Signal('Loss') self.loss = utils.Signal('Loss')
self.signals.append(self.loss) self.signals.append(self.loss)
self.curr_learning_rate = Signal('Learning Rate') self.curr_learning_rate = utils.Signal('Learning Rate')
self.signals.append(self.curr_learning_rate) self.signals.append(self.curr_learning_rate)
if self.tp.env.normalize_observation and not self.env.is_state_type_image: if self.tp.env.normalize_observation and not self.env.is_state_type_image:
if not self.tp.distributed or not self.tp.agent.share_statistics_between_workers: if not self.tp.distributed or not self.tp.agent.share_statistics_between_workers:
self.running_observation_stats = RunningStat((self.tp.env.desired_observation_width,)) self.running_observation_stats = utils.RunningStat((self.tp.env.desired_observation_width,))
self.running_reward_stats = RunningStat(()) self.running_reward_stats = utils.RunningStat(())
else: else:
self.running_observation_stats = SharedRunningStats(self.tp, replicated_device, self.running_observation_stats = sv.SharedRunningStats(self.tp, replicated_device,
shape=(self.tp.env.desired_observation_width,), shape=(self.tp.env.desired_observation_width,),
name='observation_stats') name='observation_stats')
self.running_reward_stats = SharedRunningStats(self.tp, replicated_device, self.running_reward_stats = sv.SharedRunningStats(self.tp, replicated_device,
shape=(), shape=(),
name='reward_stats') name='reward_stats')
# env is already reset at this point. Otherwise we're getting an error where you cannot # env is already reset at this point. Otherwise we're getting an error where you cannot
# reset an env which is not done # reset an env which is not done
@@ -137,13 +132,13 @@ class Agent(object):
def log_to_screen(self, phase): def log_to_screen(self, phase):
# log to screen # log to screen
if self.current_episode >= 0: if self.current_episode >= 0:
if phase == RunPhase.TRAIN: if phase == utils.RunPhase.TRAIN:
exploration = self.exploration_policy.get_control_param() exploration = self.exploration_policy.get_control_param()
else: else:
exploration = self.evaluation_exploration_policy.get_control_param() exploration = self.evaluation_exploration_policy.get_control_param()
screen.log_dict( logger.screen.log_dict(
OrderedDict([ collections.OrderedDict([
("Worker", self.task_id), ("Worker", self.task_id),
("Episode", self.current_episode), ("Episode", self.current_episode),
("total reward", self.total_reward_in_current_episode), ("total reward", self.total_reward_in_current_episode),
@@ -154,37 +149,37 @@ class Agent(object):
prefix=phase prefix=phase
) )
def update_log(self, phase=RunPhase.TRAIN): def update_log(self, phase=utils.RunPhase.TRAIN):
""" """
Writes logging messages to screen and updates the log file with all the signal values. Writes logging messages to screen and updates the log file with all the signal values.
:return: None :return: None
""" """
# log all the signals to file # log all the signals to file
logger.set_current_time(self.current_episode) logger.logger.set_current_time(self.current_episode)
logger.create_signal_value('Training Iter', self.training_iteration) logger.logger.create_signal_value('Training Iter', self.training_iteration)
logger.create_signal_value('In Heatup', int(phase == RunPhase.HEATUP)) logger.logger.create_signal_value('In Heatup', int(phase == utils.RunPhase.HEATUP))
logger.create_signal_value('ER #Transitions', self.memory.num_transitions()) logger.logger.create_signal_value('ER #Transitions', self.memory.num_transitions())
logger.create_signal_value('ER #Episodes', self.memory.length()) logger.logger.create_signal_value('ER #Episodes', self.memory.length())
logger.create_signal_value('Episode Length', self.current_episode_steps_counter) logger.logger.create_signal_value('Episode Length', self.current_episode_steps_counter)
logger.create_signal_value('Total steps', self.total_steps_counter) logger.logger.create_signal_value('Total steps', self.total_steps_counter)
logger.create_signal_value("Epsilon", self.exploration_policy.get_control_param()) logger.logger.create_signal_value("Epsilon", self.exploration_policy.get_control_param())
logger.create_signal_value("Training Reward", self.total_reward_in_current_episode logger.logger.create_signal_value("Training Reward", self.total_reward_in_current_episode
if phase == RunPhase.TRAIN else np.nan) if phase == utils.RunPhase.TRAIN else np.nan)
logger.create_signal_value('Evaluation Reward', self.total_reward_in_current_episode logger.logger.create_signal_value('Evaluation Reward', self.total_reward_in_current_episode
if phase == RunPhase.TEST else np.nan) if phase == utils.RunPhase.TEST else np.nan)
logger.create_signal_value('Update Target Network', 0, overwrite=False) logger.logger.create_signal_value('Update Target Network', 0, overwrite=False)
logger.update_wall_clock_time(self.current_episode) logger.logger.update_wall_clock_time(self.current_episode)
for signal in self.signals: for signal in self.signals:
logger.create_signal_value("{}/Mean".format(signal.name), signal.get_mean()) logger.logger.create_signal_value("{}/Mean".format(signal.name), signal.get_mean())
logger.create_signal_value("{}/Stdev".format(signal.name), signal.get_stdev()) logger.logger.create_signal_value("{}/Stdev".format(signal.name), signal.get_stdev())
logger.create_signal_value("{}/Max".format(signal.name), signal.get_max()) logger.logger.create_signal_value("{}/Max".format(signal.name), signal.get_max())
logger.create_signal_value("{}/Min".format(signal.name), signal.get_min()) logger.logger.create_signal_value("{}/Min".format(signal.name), signal.get_min())
# dump # dump
if self.current_episode % self.tp.visualization.dump_signals_to_csv_every_x_episodes == 0 \ if self.current_episode % self.tp.visualization.dump_signals_to_csv_every_x_episodes == 0 \
and self.current_episode > 0: and self.current_episode > 0:
logger.dump_output_csv() logger.logger.dump_output_csv()
def reset_game(self, do_not_reset_env=False): def reset_game(self, do_not_reset_env=False):
""" """
@@ -211,7 +206,7 @@ class Agent(object):
self.episode_running_info[action] = [] self.episode_running_info[action] = []
plt.clf() plt.clf()
if self.tp.agent.middleware_type == MiddlewareTypes.LSTM: if self.tp.agent.middleware_type == configurations.MiddlewareTypes.LSTM:
for network in self.networks: for network in self.networks:
network.online_network.curr_rnn_c_in = network.online_network.middleware_embedder.c_init network.online_network.curr_rnn_c_in = network.online_network.middleware_embedder.c_init
network.online_network.curr_rnn_h_in = network.online_network.middleware_embedder.h_init network.online_network.curr_rnn_h_in = network.online_network.middleware_embedder.h_init
@@ -281,9 +276,9 @@ class Agent(object):
if self.total_steps_counter % self.tp.agent.num_steps_between_copying_online_weights_to_target == 0: if self.total_steps_counter % self.tp.agent.num_steps_between_copying_online_weights_to_target == 0:
for network in self.networks: for network in self.networks:
network.update_target_network(self.tp.agent.rate_for_copying_weights_to_target) network.update_target_network(self.tp.agent.rate_for_copying_weights_to_target)
logger.create_signal_value('Update Target Network', 1) logger.logger.create_signal_value('Update Target Network', 1)
else: else:
logger.create_signal_value('Update Target Network', 0, overwrite=False) logger.logger.create_signal_value('Update Target Network', 0, overwrite=False)
return loss return loss
@@ -321,7 +316,7 @@ class Agent(object):
plt.legend() plt.legend()
plt.pause(0.00000001) plt.pause(0.00000001)
def choose_action(self, curr_state, phase=RunPhase.TRAIN): def choose_action(self, curr_state, phase=utils.RunPhase.TRAIN):
""" """
choose an action to act with in the current episode being played. Different behavior might be exhibited when training choose an action to act with in the current episode being played. Different behavior might be exhibited when training
or testing. or testing.
@@ -358,8 +353,8 @@ class Agent(object):
:return: None :return: None
""" """
observation = self.preprocess_observation(self.env.state['observation']) observation = self.preprocess_observation(self.env.state['observation'])
self.curr_stack = deque([observation]*self.tp.env.observation_stack_size, maxlen=self.tp.env.observation_stack_size) self.curr_stack = collections.deque([observation]*self.tp.env.observation_stack_size, maxlen=self.tp.env.observation_stack_size)
observation = LazyStack(self.curr_stack, -1) observation = utils.LazyStack(self.curr_stack, -1)
self.curr_state = { self.curr_state = {
'observation': observation 'observation': observation
@@ -369,21 +364,21 @@ class Agent(object):
if self.tp.agent.use_accumulated_reward_as_measurement: if self.tp.agent.use_accumulated_reward_as_measurement:
self.curr_state['measurements'] = np.append(self.curr_state['measurements'], 0) self.curr_state['measurements'] = np.append(self.curr_state['measurements'], 0)
def act(self, phase=RunPhase.TRAIN): def act(self, phase=utils.RunPhase.TRAIN):
""" """
Take one step in the environment according to the network prediction and store the transition in memory Take one step in the environment according to the network prediction and store the transition in memory
:param phase: Either Train or Test to specify if greedy actions should be used and if transitions should be stored :param phase: Either Train or Test to specify if greedy actions should be used and if transitions should be stored
:return: A boolean value that signals an episode termination :return: A boolean value that signals an episode termination
""" """
if phase != RunPhase.TEST: if phase != utils.RunPhase.TEST:
self.total_steps_counter += 1 self.total_steps_counter += 1
self.current_episode_steps_counter += 1 self.current_episode_steps_counter += 1
# get new action # get new action
action_info = {"action_probability": 1.0 / self.env.action_space_size, "action_value": 0, "max_action_value": 0} action_info = {"action_probability": 1.0 / self.env.action_space_size, "action_value": 0, "max_action_value": 0}
if phase == RunPhase.HEATUP and not self.tp.heatup_using_network_decisions: if phase == utils.RunPhase.HEATUP and not self.tp.heatup_using_network_decisions:
action = self.env.get_random_action() action = self.env.get_random_action()
else: else:
action, action_info = self.choose_action(self.curr_state, phase=phase) action, action_info = self.choose_action(self.curr_state, phase=phase)
@@ -402,13 +397,13 @@ class Agent(object):
next_state['observation'] = self.preprocess_observation(next_state['observation']) next_state['observation'] = self.preprocess_observation(next_state['observation'])
# plot action values online # plot action values online
if self.tp.visualization.plot_action_values_online and phase != RunPhase.HEATUP: if self.tp.visualization.plot_action_values_online and phase != utils.RunPhase.HEATUP:
self.plot_action_values_online() self.plot_action_values_online()
# initialize the next state # initialize the next state
# TODO: provide option to stack more than just the observation # TODO: provide option to stack more than just the observation
self.curr_stack.append(next_state['observation']) self.curr_stack.append(next_state['observation'])
observation = LazyStack(self.curr_stack, -1) observation = utils.LazyStack(self.curr_stack, -1)
next_state['observation'] = observation next_state['observation'] = observation
if self.tp.agent.use_measurements and 'measurements' in result.keys(): if self.tp.agent.use_measurements and 'measurements' in result.keys():
@@ -417,14 +412,14 @@ class Agent(object):
next_state['measurements'] = np.append(next_state['measurements'], self.total_reward_in_current_episode) next_state['measurements'] = np.append(next_state['measurements'], self.total_reward_in_current_episode)
# store the transition only if we are training # store the transition only if we are training
if phase == RunPhase.TRAIN or phase == RunPhase.HEATUP: if phase == utils.RunPhase.TRAIN or phase == utils.RunPhase.HEATUP:
transition = Transition(self.curr_state, result['action'], shaped_reward, next_state, result['done']) transition = memory.Transition(self.curr_state, result['action'], shaped_reward, next_state, result['done'])
for key in action_info.keys(): for key in action_info.keys():
transition.info[key] = action_info[key] transition.info[key] = action_info[key]
if self.tp.agent.add_a_normalized_timestep_to_the_observation: if self.tp.agent.add_a_normalized_timestep_to_the_observation:
transition.info['timestep'] = float(self.current_episode_steps_counter) / self.env.timestep_limit transition.info['timestep'] = float(self.current_episode_steps_counter) / self.env.timestep_limit
self.memory.store(transition) self.memory.store(transition)
elif phase == RunPhase.TEST and self.tp.visualization.dump_gifs: elif phase == utils.RunPhase.TEST and self.tp.visualization.dump_gifs:
# we store the transitions only for saving gifs # we store the transitions only for saving gifs
self.last_episode_images.append(self.env.get_rendered_image()) self.last_episode_images.append(self.env.get_rendered_image())
@@ -437,7 +432,7 @@ class Agent(object):
self.update_log(phase=phase) self.update_log(phase=phase)
self.log_to_screen(phase=phase) self.log_to_screen(phase=phase)
if phase == RunPhase.TRAIN or phase == RunPhase.HEATUP: if phase == utils.RunPhase.TRAIN or phase == utils.RunPhase.HEATUP:
self.reset_game() self.reset_game()
self.current_episode += 1 self.current_episode += 1
@@ -456,8 +451,8 @@ class Agent(object):
max_reward_achieved = -float('inf') max_reward_achieved = -float('inf')
average_evaluation_reward = 0 average_evaluation_reward = 0
screen.log_title("Running evaluation") logger.screen.log_title("Running evaluation")
self.env.change_phase(RunPhase.TEST) self.env.change_phase(utils.RunPhase.TEST)
for i in range(num_episodes): for i in range(num_episodes):
# keep the online network in sync with the global network # keep the online network in sync with the global network
if keep_networks_synced: if keep_networks_synced:
@@ -466,7 +461,7 @@ class Agent(object):
episode_ended = False episode_ended = False
while not episode_ended: while not episode_ended:
episode_ended = self.act(phase=RunPhase.TEST) episode_ended = self.act(phase=utils.RunPhase.TEST)
if keep_networks_synced \ if keep_networks_synced \
and self.total_steps_counter % self.tp.agent.update_evaluation_agent_network_after_every_num_steps: and self.total_steps_counter % self.tp.agent.update_evaluation_agent_network_after_every_num_steps:
@@ -477,7 +472,7 @@ class Agent(object):
max_reward_achieved = self.total_reward_in_current_episode max_reward_achieved = self.total_reward_in_current_episode
frame_skipping = int(5/self.tp.env.frame_skip) frame_skipping = int(5/self.tp.env.frame_skip)
if self.tp.visualization.dump_gifs: if self.tp.visualization.dump_gifs:
logger.create_gif(self.last_episode_images[::frame_skipping], logger.logger.create_gif(self.last_episode_images[::frame_skipping],
name='score-{}'.format(max_reward_achieved), fps=10) name='score-{}'.format(max_reward_achieved), fps=10)
average_evaluation_reward += self.total_reward_in_current_episode average_evaluation_reward += self.total_reward_in_current_episode
@@ -485,8 +480,8 @@ class Agent(object):
average_evaluation_reward /= float(num_episodes) average_evaluation_reward /= float(num_episodes)
self.env.change_phase(RunPhase.TRAIN) self.env.change_phase(utils.RunPhase.TRAIN)
screen.log_title("Evaluation done. Average reward = {}.".format(average_evaluation_reward)) logger.screen.log_title("Evaluation done. Average reward = {}.".format(average_evaluation_reward))
def post_training_commands(self): def post_training_commands(self):
pass pass
@@ -505,15 +500,15 @@ class Agent(object):
# heatup phase # heatup phase
if self.tp.num_heatup_steps != 0: if self.tp.num_heatup_steps != 0:
self.in_heatup = True self.in_heatup = True
screen.log_title("Starting heatup {}".format(self.task_id)) logger.screen.log_title("Starting heatup {}".format(self.task_id))
num_steps_required_for_one_training_batch = self.tp.batch_size * self.tp.env.observation_stack_size num_steps_required_for_one_training_batch = self.tp.batch_size * self.tp.env.observation_stack_size
for step in range(max(self.tp.num_heatup_steps, num_steps_required_for_one_training_batch)): for step in range(max(self.tp.num_heatup_steps, num_steps_required_for_one_training_batch)):
self.act(phase=RunPhase.HEATUP) self.act(phase=utils.RunPhase.HEATUP)
# training phase # training phase
self.in_heatup = False self.in_heatup = False
screen.log_title("Starting training {}".format(self.task_id)) logger.screen.log_title("Starting training {}".format(self.task_id))
self.exploration_policy.change_phase(RunPhase.TRAIN) self.exploration_policy.change_phase(utils.RunPhase.TRAIN)
training_start_time = time.time() training_start_time = time.time()
model_snapshots_periods_passed = -1 model_snapshots_periods_passed = -1
self.reset_game() self.reset_game()
@@ -557,7 +552,7 @@ class Agent(object):
self.loss.add_sample(loss) self.loss.add_sample(loss)
self.training_iteration += 1 self.training_iteration += 1
if self.imitation: if self.imitation:
self.log_to_screen(RunPhase.TRAIN) self.log_to_screen(utils.RunPhase.TRAIN)
self.post_training_commands() self.post_training_commands()
def save_model(self, model_id): def save_model(self, model_id):

View File

@@ -13,16 +13,15 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import numpy as np import numpy as np
from agents.imitation_agent import ImitationAgent from agents import imitation_agent
# Behavioral Cloning Agent # Behavioral Cloning Agent
class BCAgent(ImitationAgent): class BCAgent(imitation_agent.ImitationAgent):
def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0): def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
ImitationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id) imitation_agent.ImitationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
def learn_from_batch(self, batch): def learn_from_batch(self, batch):
current_states, _, actions, _, _, _ = self.extract_batch(batch) current_states, _, actions, _, _, _ = self.extract_batch(batch)

View File

@@ -13,17 +13,18 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import numpy as np
from agents.value_optimization_agent import * from agents import value_optimization_agent as voa
import utils
# Bootstrapped DQN - https://arxiv.org/pdf/1602.04621.pdf # Bootstrapped DQN - https://arxiv.org/pdf/1602.04621.pdf
class BootstrappedDQNAgent(ValueOptimizationAgent): class BootstrappedDQNAgent(voa.ValueOptimizationAgent):
def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0): def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id) voa.ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
def reset_game(self, do_not_reset_env=False): def reset_game(self, do_not_reset_env=False):
ValueOptimizationAgent.reset_game(self, do_not_reset_env) voa.ValueOptimizationAgent.reset_game(self, do_not_reset_env)
self.exploration_policy.select_head() self.exploration_policy.select_head()
def learn_from_batch(self, batch): def learn_from_batch(self, batch):
@@ -51,8 +52,8 @@ class BootstrappedDQNAgent(ValueOptimizationAgent):
return total_loss return total_loss
def act(self, phase=RunPhase.TRAIN): def act(self, phase=utils.RunPhase.TRAIN):
ValueOptimizationAgent.act(self, phase) voa.ValueOptimizationAgent.act(self, phase)
mask = np.random.binomial(1, self.tp.exploration.bootstrapped_data_sharing_probability, mask = np.random.binomial(1, self.tp.exploration.bootstrapped_data_sharing_probability,
self.tp.exploration.architecture_num_q_heads) self.tp.exploration.architecture_num_q_heads)
self.memory.update_last_transition_info({'mask': mask}) self.memory.update_last_transition_info({'mask': mask})

View File

@@ -13,14 +13,15 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import numpy as np
from agents.value_optimization_agent import * from agents import value_optimization_agent as voa
# Categorical Deep Q Network - https://arxiv.org/pdf/1707.06887.pdf # Categorical Deep Q Network - https://arxiv.org/pdf/1707.06887.pdf
class CategoricalDQNAgent(ValueOptimizationAgent): class CategoricalDQNAgent(voa.ValueOptimizationAgent):
def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0): def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id) voa.ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
self.z_values = np.linspace(self.tp.agent.v_min, self.tp.agent.v_max, self.tp.agent.atoms) self.z_values = np.linspace(self.tp.agent.v_min, self.tp.agent.v_max, self.tp.agent.atoms)
# prediction's format is (batch,actions,atoms) # prediction's format is (batch,actions,atoms)
@@ -57,4 +58,3 @@ class CategoricalDQNAgent(ValueOptimizationAgent):
total_loss = result[0] total_loss = result[0]
return total_loss return total_loss

View File

@@ -13,27 +13,34 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import collections
from agents.actor_critic_agent import * import copy
from random import shuffle from random import shuffle
import numpy as np
from agents import actor_critic_agent as aca
from agents import policy_optimization_agent as poa
import logger
import utils
# Clipped Proximal Policy Optimization - https://arxiv.org/abs/1707.06347 # Clipped Proximal Policy Optimization - https://arxiv.org/abs/1707.06347
class ClippedPPOAgent(ActorCriticAgent): class ClippedPPOAgent(aca.ActorCriticAgent):
def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0): def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
ActorCriticAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id, aca.ActorCriticAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id,
create_target_network=True) create_target_network=True)
# signals definition # signals definition
self.value_loss = Signal('Value Loss') self.value_loss = utils.Signal('Value Loss')
self.signals.append(self.value_loss) self.signals.append(self.value_loss)
self.policy_loss = Signal('Policy Loss') self.policy_loss = utils.Signal('Policy Loss')
self.signals.append(self.policy_loss) self.signals.append(self.policy_loss)
self.total_kl_divergence_during_training_process = 0.0 self.total_kl_divergence_during_training_process = 0.0
self.unclipped_grads = Signal('Grads (unclipped)') self.unclipped_grads = utils.Signal('Grads (unclipped)')
self.signals.append(self.unclipped_grads) self.signals.append(self.unclipped_grads)
self.value_targets = Signal('Value Targets') self.value_targets = utils.Signal('Value Targets')
self.signals.append(self.value_targets) self.signals.append(self.value_targets)
self.kl_divergence = Signal('KL Divergence') self.kl_divergence = utils.Signal('KL Divergence')
self.signals.append(self.kl_divergence) self.signals.append(self.kl_divergence)
def fill_advantages(self, batch): def fill_advantages(self, batch):
@@ -46,9 +53,9 @@ class ClippedPPOAgent(ActorCriticAgent):
# calculate advantages # calculate advantages
advantages = [] advantages = []
value_targets = [] value_targets = []
if self.policy_gradient_rescaler == PolicyGradientRescaler.A_VALUE: if self.policy_gradient_rescaler == poa.PolicyGradientRescaler.A_VALUE:
advantages = total_return - current_state_values advantages = total_return - current_state_values
elif self.policy_gradient_rescaler == PolicyGradientRescaler.GAE: elif self.policy_gradient_rescaler == poa.PolicyGradientRescaler.GAE:
# get bootstraps # get bootstraps
episode_start_idx = 0 episode_start_idx = 0
advantages = np.array([]) advantages = np.array([])
@@ -66,7 +73,7 @@ class ClippedPPOAgent(ActorCriticAgent):
advantages = np.append(advantages, rollout_advantages) advantages = np.append(advantages, rollout_advantages)
value_targets = np.append(value_targets, gae_based_value_targets) value_targets = np.append(value_targets, gae_based_value_targets)
else: else:
screen.warning("WARNING: The requested policy gradient rescaler is not available") logger.screen.warning("WARNING: The requested policy gradient rescaler is not available")
# standardize # standardize
advantages = (advantages - np.mean(advantages)) / np.std(advantages) advantages = (advantages - np.mean(advantages)) / np.std(advantages)
@@ -144,8 +151,8 @@ class ClippedPPOAgent(ActorCriticAgent):
curr_learning_rate = self.tp.learning_rate curr_learning_rate = self.tp.learning_rate
# log training parameters # log training parameters
screen.log_dict( logger.screen.log_dict(
OrderedDict([ collections.OrderedDict([
("Surrogate loss", loss['policy_losses'][0]), ("Surrogate loss", loss['policy_losses'][0]),
("KL divergence", loss['fetch_result'][0]), ("KL divergence", loss['fetch_result'][0]),
("Entropy", loss['fetch_result'][1]), ("Entropy", loss['fetch_result'][1]),
@@ -184,13 +191,13 @@ class ClippedPPOAgent(ActorCriticAgent):
self.update_log() # should be done in order to update the data that has been accumulated * while not playing * self.update_log() # should be done in order to update the data that has been accumulated * while not playing *
return np.append(losses[0], losses[1]) return np.append(losses[0], losses[1])
def choose_action(self, current_state, phase=RunPhase.TRAIN): def choose_action(self, current_state, phase=utils.RunPhase.TRAIN):
if self.env.discrete_controls: if self.env.discrete_controls:
# DISCRETE # DISCRETE
_, action_values = self.main_network.online_network.predict(self.tf_input_state(current_state)) _, action_values = self.main_network.online_network.predict(self.tf_input_state(current_state))
action_values = action_values.squeeze() action_values = action_values.squeeze()
if phase == RunPhase.TRAIN: if phase == utils.RunPhase.TRAIN:
action = self.exploration_policy.get_action(action_values) action = self.exploration_policy.get_action(action_values)
else: else:
action = np.argmax(action_values) action = np.argmax(action_values)
@@ -201,7 +208,7 @@ class ClippedPPOAgent(ActorCriticAgent):
_, action_values_mean, action_values_std = self.main_network.online_network.predict(self.tf_input_state(current_state)) _, action_values_mean, action_values_std = self.main_network.online_network.predict(self.tf_input_state(current_state))
action_values_mean = action_values_mean.squeeze() action_values_mean = action_values_mean.squeeze()
action_values_std = action_values_std.squeeze() action_values_std = action_values_std.squeeze()
if phase == RunPhase.TRAIN: if phase == utils.RunPhase.TRAIN:
action = np.squeeze(np.random.randn(1, self.action_space_size) * action_values_std + action_values_mean) action = np.squeeze(np.random.randn(1, self.action_space_size) * action_values_std + action_values_mean)
# if self.current_episode % 5 == 0 and self.current_episode_steps_counter < 5: # if self.current_episode % 5 == 0 and self.current_episode_steps_counter < 5:
# print action # print action

View File

@@ -13,28 +13,34 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import copy
from agents.actor_critic_agent import * import numpy as np
from configurations import *
from agents import actor_critic_agent as aca
from agents import agent
from architectures import network_wrapper as nw
import configurations as conf
import utils
# Deep Deterministic Policy Gradients Network - https://arxiv.org/pdf/1509.02971.pdf # Deep Deterministic Policy Gradients Network - https://arxiv.org/pdf/1509.02971.pdf
class DDPGAgent(ActorCriticAgent): class DDPGAgent(aca.ActorCriticAgent):
def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0): def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
ActorCriticAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id, aca.ActorCriticAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id,
create_target_network=True) create_target_network=True)
# define critic network # define critic network
self.critic_network = self.main_network self.critic_network = self.main_network
# self.networks.append(self.critic_network) # self.networks.append(self.critic_network)
# define actor network # define actor network
tuning_parameters.agent.input_types = {'observation': InputTypes.Observation} tuning_parameters.agent.input_types = {'observation': conf.InputTypes.Observation}
tuning_parameters.agent.output_types = [OutputTypes.Pi] tuning_parameters.agent.output_types = [conf.OutputTypes.Pi]
self.actor_network = NetworkWrapper(tuning_parameters, True, self.has_global, 'actor', self.actor_network = nw.NetworkWrapper(tuning_parameters, True, self.has_global, 'actor',
self.replicated_device, self.worker_device) self.replicated_device, self.worker_device)
self.networks.append(self.actor_network) self.networks.append(self.actor_network)
self.q_values = Signal("Q") self.q_values = utils.Signal("Q")
self.signals.append(self.q_values) self.signals.append(self.q_values)
self.reset_game(do_not_reset_env=True) self.reset_game(do_not_reset_env=True)
@@ -82,14 +88,14 @@ class DDPGAgent(ActorCriticAgent):
return total_loss return total_loss
def train(self): def train(self):
return Agent.train(self) return agent.Agent.train(self)
def choose_action(self, curr_state, phase=RunPhase.TRAIN): def choose_action(self, curr_state, phase=utils.RunPhase.TRAIN):
assert not self.env.discrete_controls, 'DDPG works only for continuous control problems' assert not self.env.discrete_controls, 'DDPG works only for continuous control problems'
result = self.actor_network.online_network.predict(self.tf_input_state(curr_state)) result = self.actor_network.online_network.predict(self.tf_input_state(curr_state))
action_values = result[0].squeeze() action_values = result[0].squeeze()
if phase == RunPhase.TRAIN: if phase == utils.RunPhase.TRAIN:
action = self.exploration_policy.get_action(action_values) action = self.exploration_policy.get_action(action_values)
else: else:
action = action_values action = action_values

View File

@@ -13,14 +13,15 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import numpy as np
from agents.value_optimization_agent import * from agents import value_optimization_agent as voa
# Double DQN - https://arxiv.org/abs/1509.06461 # Double DQN - https://arxiv.org/abs/1509.06461
class DDQNAgent(ValueOptimizationAgent): class DDQNAgent(voa.ValueOptimizationAgent):
def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0): def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id) voa.ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
def learn_from_batch(self, batch): def learn_from_batch(self, batch):
current_states, next_states, actions, rewards, game_overs, _ = self.extract_batch(batch) current_states, next_states, actions, rewards, game_overs, _ = self.extract_batch(batch)

View File

@@ -13,17 +13,20 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import numpy as np
from agents.agent import * from agents import agent
from architectures import network_wrapper as nw
import utils
# Direct Future Prediction Agent - http://vladlen.info/papers/learning-to-act.pdf # Direct Future Prediction Agent - http://vladlen.info/papers/learning-to-act.pdf
class DFPAgent(Agent): class DFPAgent(agent.Agent):
def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0): def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
Agent.__init__(self, env, tuning_parameters, replicated_device, thread_id) agent.Agent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
self.current_goal = self.tp.agent.goal_vector self.current_goal = self.tp.agent.goal_vector
self.main_network = NetworkWrapper(tuning_parameters, False, self.has_global, 'main', self.main_network = nw.NetworkWrapper(tuning_parameters, False, self.has_global, 'main',
self.replicated_device, self.worker_device) self.replicated_device, self.worker_device)
self.networks.append(self.main_network) self.networks.append(self.main_network)
def learn_from_batch(self, batch): def learn_from_batch(self, batch):
@@ -45,7 +48,7 @@ class DFPAgent(Agent):
return total_loss return total_loss
def choose_action(self, curr_state, phase=RunPhase.TRAIN): def choose_action(self, curr_state, phase=utils.RunPhase.TRAIN):
# convert to batch so we can run it through the network # convert to batch so we can run it through the network
observation = np.expand_dims(np.array(curr_state['observation']), 0) observation = np.expand_dims(np.array(curr_state['observation']), 0)
measurements = np.expand_dims(np.array(curr_state['measurements']), 0) measurements = np.expand_dims(np.array(curr_state['measurements']), 0)
@@ -66,7 +69,7 @@ class DFPAgent(Agent):
self.tp.agent.future_measurements_weights) self.tp.agent.future_measurements_weights)
# choose action according to the exploration policy and the current phase (evaluating or training the agent) # choose action according to the exploration policy and the current phase (evaluating or training the agent)
if phase == RunPhase.TRAIN: if phase == utils.RunPhase.TRAIN:
action = self.exploration_policy.get_action(action_values) action = self.exploration_policy.get_action(action_values)
else: else:
action = np.argmax(action_values) action = np.argmax(action_values)

View File

@@ -13,14 +13,15 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import numpy as np
from agents.value_optimization_agent import * from agents import value_optimization_agent as voa
# Distributional Deep Q Network - https://arxiv.org/pdf/1707.06887.pdf # Distributional Deep Q Network - https://arxiv.org/pdf/1707.06887.pdf
class DistributionalDQNAgent(ValueOptimizationAgent): class DistributionalDQNAgent(voa.ValueOptimizationAgent):
def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0): def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id) voa.ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
self.z_values = np.linspace(self.tp.agent.v_min, self.tp.agent.v_max, self.tp.agent.atoms) self.z_values = np.linspace(self.tp.agent.v_min, self.tp.agent.v_max, self.tp.agent.atoms)
# prediction's format is (batch,actions,atoms) # prediction's format is (batch,actions,atoms)
@@ -57,4 +58,3 @@ class DistributionalDQNAgent(ValueOptimizationAgent):
total_loss = result[0] total_loss = result[0]
return total_loss return total_loss

View File

@@ -13,14 +13,15 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import numpy as np
from agents.value_optimization_agent import * from agents import value_optimization_agent as voa
# Deep Q Network - https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf # Deep Q Network - https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf
class DQNAgent(ValueOptimizationAgent): class DQNAgent(voa.ValueOptimizationAgent):
def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0): def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id) voa.ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
def learn_from_batch(self, batch): def learn_from_batch(self, batch):
current_states, next_states, actions, rewards, game_overs, _ = self.extract_batch(batch) current_states, next_states, actions, rewards, game_overs, _ = self.extract_batch(batch)

View File

@@ -13,31 +13,37 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import collections
import os
from agents.agent import *
import pygame import pygame
from pandas.io import pickle
from agents import agent
import logger
import utils
class HumanAgent(Agent): class HumanAgent(agent.Agent):
def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0): def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
Agent.__init__(self, env, tuning_parameters, replicated_device, thread_id) agent.Agent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
self.clock = pygame.time.Clock() self.clock = pygame.time.Clock()
self.max_fps = int(self.tp.visualization.max_fps_for_human_control) self.max_fps = int(self.tp.visualization.max_fps_for_human_control)
screen.log_title("Human Control Mode") utils.screen.log_title("Human Control Mode")
available_keys = self.env.get_available_keys() available_keys = self.env.get_available_keys()
if available_keys: if available_keys:
screen.log("Use keyboard keys to move. Press escape to quit. Available keys:") utils.screen.log("Use keyboard keys to move. Press escape to quit. Available keys:")
screen.log("") utils.screen.log("")
for action, key in self.env.get_available_keys(): for action, key in self.env.get_available_keys():
screen.log("\t- {}: {}".format(action, key)) utils.screen.log("\t- {}: {}".format(action, key))
screen.separator() utils.screen.separator()
def train(self): def train(self):
return 0 return 0
def choose_action(self, curr_state, phase=RunPhase.TRAIN): def choose_action(self, curr_state, phase=utils.RunPhase.TRAIN):
action = self.env.get_action_from_user() action = self.env.get_action_from_user()
# keep constant fps # keep constant fps
@@ -49,16 +55,16 @@ class HumanAgent(Agent):
return action, {"action_value": 0} return action, {"action_value": 0}
def save_replay_buffer_and_exit(self): def save_replay_buffer_and_exit(self):
replay_buffer_path = os.path.join(logger.experiments_path, 'replay_buffer.p') replay_buffer_path = os.path.join(logger.logger.experiments_path, 'replay_buffer.p')
self.memory.tp = None self.memory.tp = None
to_pickle(self.memory, replay_buffer_path) pickle.to_pickle(self.memory, replay_buffer_path)
screen.log_title("Replay buffer was stored in {}".format(replay_buffer_path)) utils.screen.log_title("Replay buffer was stored in {}".format(replay_buffer_path))
exit() exit()
def log_to_screen(self, phase): def log_to_screen(self, phase):
# log to screen # log to utils.screen
screen.log_dict( utils.screen.log_dict(
OrderedDict([ collections.OrderedDict([
("Episode", self.current_episode), ("Episode", self.current_episode),
("total reward", self.total_reward_in_current_episode), ("total reward", self.total_reward_in_current_episode),
("steps", self.total_steps_counter) ("steps", self.total_steps_counter)

View File

@@ -13,23 +13,27 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import collections
from agents.agent import * from agents import agent
from architectures import network_wrapper as nw
import utils
import logging
# Imitation Agent # Imitation Agent
class ImitationAgent(Agent): class ImitationAgent(agent.Agent):
def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0): def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
Agent.__init__(self, env, tuning_parameters, replicated_device, thread_id) agent.Agent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
self.main_network = NetworkWrapper(tuning_parameters, False, self.has_global, 'main', self.main_network = nw.NetworkWrapper(tuning_parameters, False, self.has_global, 'main',
self.replicated_device, self.worker_device) self.replicated_device, self.worker_device)
self.networks.append(self.main_network) self.networks.append(self.main_network)
self.imitation = True self.imitation = True
def extract_action_values(self, prediction): def extract_action_values(self, prediction):
return prediction.squeeze() return prediction.squeeze()
def choose_action(self, curr_state, phase=RunPhase.TRAIN): def choose_action(self, curr_state, phase=utils.RunPhase.TRAIN):
# convert to batch so we can run it through the network # convert to batch so we can run it through the network
prediction = self.main_network.online_network.predict(self.tf_input_state(curr_state)) prediction = self.main_network.online_network.predict(self.tf_input_state(curr_state))
@@ -49,10 +53,10 @@ class ImitationAgent(Agent):
def log_to_screen(self, phase): def log_to_screen(self, phase):
# log to screen # log to screen
if phase == RunPhase.TRAIN: if phase == utils.RunPhase.TRAIN:
# for the training phase - we log during the episode to visualize the progress in training # for the training phase - we log during the episode to visualize the progress in training
screen.log_dict( logging.screen.log_dict(
OrderedDict([ collections.OrderedDict([
("Worker", self.task_id), ("Worker", self.task_id),
("Episode", self.current_episode), ("Episode", self.current_episode),
("Loss", self.loss.values[-1]), ("Loss", self.loss.values[-1]),
@@ -62,4 +66,4 @@ class ImitationAgent(Agent):
) )
else: else:
# for the evaluation phase - logging as in regular RL # for the evaluation phase - logging as in regular RL
Agent.log_to_screen(self, phase) agent.Agent.log_to_screen(self, phase)

View File

@@ -13,13 +13,14 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import numpy as np
from agents.value_optimization_agent import * from agents import value_optimization_agent as voa
class MixedMonteCarloAgent(ValueOptimizationAgent): class MixedMonteCarloAgent(voa.ValueOptimizationAgent):
def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0): def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id) voa.ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
self.mixing_rate = tuning_parameters.agent.monte_carlo_mixing_rate self.mixing_rate = tuning_parameters.agent.monte_carlo_mixing_rate
def learn_from_batch(self, batch): def learn_from_batch(self, batch):

View File

@@ -14,22 +14,21 @@
# limitations under the License. # limitations under the License.
# #
import numpy as np import numpy as np
import scipy.signal
from agents.value_optimization_agent import ValueOptimizationAgent from agents import value_optimization_agent as voa
from agents.policy_optimization_agent import PolicyOptimizationAgent from agents import policy_optimization_agent as poa
from logger import logger import logger
from utils import Signal, last_sample import utils
# N Step Q Learning Agent - https://arxiv.org/abs/1602.01783 # N Step Q Learning Agent - https://arxiv.org/abs/1602.01783
class NStepQAgent(ValueOptimizationAgent, PolicyOptimizationAgent): class NStepQAgent(voa.ValueOptimizationAgent, poa.PolicyOptimizationAgent):
def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0): def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id, create_target_network=True) voa.ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id, create_target_network=True)
self.last_gradient_update_step_idx = 0 self.last_gradient_update_step_idx = 0
self.q_values = Signal('Q Values') self.q_values = utils.Signal('Q Values')
self.unclipped_grads = Signal('Grads (unclipped)') self.unclipped_grads = utils.Signal('Grads (unclipped)')
self.value_loss = Signal('Value Loss') self.value_loss = utils.Signal('Value Loss')
self.signals.append(self.q_values) self.signals.append(self.q_values)
self.signals.append(self.unclipped_grads) self.signals.append(self.unclipped_grads)
self.signals.append(self.value_loss) self.signals.append(self.value_loss)
@@ -57,7 +56,7 @@ class NStepQAgent(ValueOptimizationAgent, PolicyOptimizationAgent):
if game_overs[-1]: if game_overs[-1]:
R = 0 R = 0
else: else:
R = np.max(self.main_network.target_network.predict(last_sample(next_states))) R = np.max(self.main_network.target_network.predict(utils.last_sample(next_states)))
for i in reversed(range(num_transitions)): for i in reversed(range(num_transitions)):
R = rewards[i] + self.tp.agent.discount * R R = rewards[i] + self.tp.agent.discount * R
@@ -85,4 +84,4 @@ class NStepQAgent(ValueOptimizationAgent, PolicyOptimizationAgent):
else: else:
logger.create_signal_value('Update Target Network', 0, overwrite=False) logger.create_signal_value('Update Target Network', 0, overwrite=False)
return PolicyOptimizationAgent.train(self) return poa.PolicyOptimizationAgent.train(self)

View File

@@ -13,21 +13,20 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import numpy as np import numpy as np
from agents.value_optimization_agent import ValueOptimizationAgent from agents.value_optimization_agent import ValueOptimizationAgent
from utils import RunPhase, Signal import utils
# Normalized Advantage Functions - https://arxiv.org/pdf/1603.00748.pdf # Normalized Advantage Functions - https://arxiv.org/pdf/1603.00748.pdf
class NAFAgent(ValueOptimizationAgent): class NAFAgent(ValueOptimizationAgent):
def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0): def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id) ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
self.l_values = Signal("L") self.l_values = utils.Signal("L")
self.a_values = Signal("Advantage") self.a_values = utils.Signal("Advantage")
self.mu_values = Signal("Action") self.mu_values = utils.Signal("Action")
self.v_values = Signal("V") self.v_values = utils.Signal("V")
self.signals += [self.l_values, self.a_values, self.mu_values, self.v_values] self.signals += [self.l_values, self.a_values, self.mu_values, self.v_values]
def learn_from_batch(self, batch): def learn_from_batch(self, batch):
@@ -49,7 +48,7 @@ class NAFAgent(ValueOptimizationAgent):
return total_loss return total_loss
def choose_action(self, curr_state, phase=RunPhase.TRAIN): def choose_action(self, curr_state, phase=utils.RunPhase.TRAIN):
assert not self.env.discrete_controls, 'NAF works only for continuous control problems' assert not self.env.discrete_controls, 'NAF works only for continuous control problems'
# convert to batch so we can run it through the network # convert to batch so we can run it through the network
@@ -60,7 +59,7 @@ class NAFAgent(ValueOptimizationAgent):
outputs=naf_head.mu, outputs=naf_head.mu,
squeeze_output=False, squeeze_output=False,
) )
if phase == RunPhase.TRAIN: if phase == utils.RunPhase.TRAIN:
action = self.exploration_policy.get_action(action_values) action = self.exploration_policy.get_action(action_values)
else: else:
action = action_values action = action_values

View File

@@ -13,19 +13,16 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
from agents import value_optimization_agent as voa
import numpy as np
from agents.value_optimization_agent import ValueOptimizationAgent
from logger import screen from logger import screen
from utils import RunPhase import utils
# Neural Episodic Control - https://arxiv.org/pdf/1703.01988.pdf # Neural Episodic Control - https://arxiv.org/pdf/1703.01988.pdf
class NECAgent(ValueOptimizationAgent): class NECAgent(voa.ValueOptimizationAgent):
def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0): def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id, voa.ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id,
create_target_network=False) create_target_network=False)
self.current_episode_state_embeddings = [] self.current_episode_state_embeddings = []
self.training_started = False self.training_started = False
@@ -52,7 +49,7 @@ class NECAgent(ValueOptimizationAgent):
return total_loss return total_loss
def act(self, phase=RunPhase.TRAIN): def act(self, phase=utils.RunPhase.TRAIN):
if self.in_heatup: if self.in_heatup:
# get embedding in heatup (otherwise we get it through choose_action) # get embedding in heatup (otherwise we get it through choose_action)
embedding = self.main_network.online_network.predict( embedding = self.main_network.online_network.predict(

View File

@@ -13,14 +13,15 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import numpy as np
from agents.value_optimization_agent import * from agents import value_optimization_agent as voa
# Persistent Advantage Learning - https://arxiv.org/pdf/1512.04860.pdf # Persistent Advantage Learning - https://arxiv.org/pdf/1512.04860.pdf
class PALAgent(ValueOptimizationAgent): class PALAgent(voa.ValueOptimizationAgent):
def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0): def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id) voa.ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
self.alpha = tuning_parameters.agent.pal_alpha self.alpha = tuning_parameters.agent.pal_alpha
self.persistent = tuning_parameters.agent.persistent_advantage_learning self.persistent = tuning_parameters.agent.persistent_advantage_learning
self.monte_carlo_mixing_rate = tuning_parameters.agent.monte_carlo_mixing_rate self.monte_carlo_mixing_rate = tuning_parameters.agent.monte_carlo_mixing_rate

View File

@@ -13,25 +13,18 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
from agents.policy_optimization_agent import *
import numpy as np import numpy as np
from logger import *
import tensorflow as tf
try:
import matplotlib.pyplot as plt
except:
from logger import failed_imports
failed_imports.append("matplotlib")
from utils import * from agents import policy_optimization_agent as poa
import logger
import utils
class PolicyGradientsAgent(PolicyOptimizationAgent): class PolicyGradientsAgent(poa.PolicyOptimizationAgent):
def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0): def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
PolicyOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id) poa.PolicyOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
self.returns_mean = Signal('Returns Mean') self.returns_mean = utils.Signal('Returns Mean')
self.returns_variance = Signal('Returns Variance') self.returns_variance = utils.Signal('Returns Variance')
self.signals.append(self.returns_mean) self.signals.append(self.returns_mean)
self.signals.append(self.returns_variance) self.signals.append(self.returns_variance)
self.last_gradient_update_step_idx = 0 self.last_gradient_update_step_idx = 0
@@ -41,21 +34,21 @@ class PolicyGradientsAgent(PolicyOptimizationAgent):
current_states, next_states, actions, rewards, game_overs, total_returns = self.extract_batch(batch) current_states, next_states, actions, rewards, game_overs, total_returns = self.extract_batch(batch)
for i in reversed(range(len(total_returns))): for i in reversed(range(len(total_returns))):
if self.policy_gradient_rescaler == PolicyGradientRescaler.TOTAL_RETURN: if self.policy_gradient_rescaler == poa.PolicyGradientRescaler.TOTAL_RETURN:
total_returns[i] = total_returns[0] total_returns[i] = total_returns[0]
elif self.policy_gradient_rescaler == PolicyGradientRescaler.FUTURE_RETURN: elif self.policy_gradient_rescaler == poa.PolicyGradientRescaler.FUTURE_RETURN:
# just take the total return as it is # just take the total return as it is
pass pass
elif self.policy_gradient_rescaler == PolicyGradientRescaler.FUTURE_RETURN_NORMALIZED_BY_EPISODE: elif self.policy_gradient_rescaler == poa.PolicyGradientRescaler.FUTURE_RETURN_NORMALIZED_BY_EPISODE:
# we can get a single transition episode while playing Doom Basic, causing the std to be 0 # we can get a single transition episode while playing Doom Basic, causing the std to be 0
if self.std_discounted_return != 0: if self.std_discounted_return != 0:
total_returns[i] = (total_returns[i] - self.mean_discounted_return) / self.std_discounted_return total_returns[i] = (total_returns[i] - self.mean_discounted_return) / self.std_discounted_return
else: else:
total_returns[i] = 0 total_returns[i] = 0
elif self.policy_gradient_rescaler == PolicyGradientRescaler.FUTURE_RETURN_NORMALIZED_BY_TIMESTEP: elif self.policy_gradient_rescaler == poa.PolicyGradientRescaler.FUTURE_RETURN_NORMALIZED_BY_TIMESTEP:
total_returns[i] -= self.mean_return_over_multiple_episodes[i] total_returns[i] -= self.mean_return_over_multiple_episodes[i]
else: else:
screen.warning("WARNING: The requested policy gradient rescaler is not available") logger.screen.warning("WARNING: The requested policy gradient rescaler is not available")
targets = total_returns targets = total_returns
if not self.env.discrete_controls and len(actions.shape) < 2: if not self.env.discrete_controls and len(actions.shape) < 2:
@@ -69,12 +62,12 @@ class PolicyGradientsAgent(PolicyOptimizationAgent):
return total_loss return total_loss
def choose_action(self, curr_state, phase=RunPhase.TRAIN): def choose_action(self, curr_state, phase=utils.RunPhase.TRAIN):
# convert to batch so we can run it through the network # convert to batch so we can run it through the network
if self.env.discrete_controls: if self.env.discrete_controls:
# DISCRETE # DISCRETE
action_values = self.main_network.online_network.predict(self.tf_input_state(curr_state)).squeeze() action_values = self.main_network.online_network.predict(self.tf_input_state(curr_state)).squeeze()
if phase == RunPhase.TRAIN: if phase == utils.RunPhase.TRAIN:
action = self.exploration_policy.get_action(action_values) action = self.exploration_policy.get_action(action_values)
else: else:
action = np.argmax(action_values) action = np.argmax(action_values)
@@ -84,7 +77,7 @@ class PolicyGradientsAgent(PolicyOptimizationAgent):
# CONTINUOUS # CONTINUOUS
result = self.main_network.online_network.predict(self.tf_input_state(curr_state)) result = self.main_network.online_network.predict(self.tf_input_state(curr_state))
action_values = result[0].squeeze() action_values = result[0].squeeze()
if phase == RunPhase.TRAIN: if phase == utils.RunPhase.TRAIN:
action = self.exploration_policy.get_action(action_values) action = self.exploration_policy.get_action(action_values)
else: else:
action = action_values action = action_values

View File

@@ -13,12 +13,17 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import collections
from agents.agent import * import numpy as np
from memories.memory import Episode
from agents import agent
from architectures import network_wrapper as nw
import logger
import utils
class PolicyGradientRescaler(Enum): class PolicyGradientRescaler(utils.Enum):
TOTAL_RETURN = 0 TOTAL_RETURN = 0
FUTURE_RETURN = 1 FUTURE_RETURN = 1
FUTURE_RETURN_NORMALIZED_BY_EPISODE = 2 FUTURE_RETURN_NORMALIZED_BY_EPISODE = 2
@@ -30,11 +35,11 @@ class PolicyGradientRescaler(Enum):
GAE = 8 GAE = 8
class PolicyOptimizationAgent(Agent): class PolicyOptimizationAgent(agent.Agent):
def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0, create_target_network=False): def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0, create_target_network=False):
Agent.__init__(self, env, tuning_parameters, replicated_device, thread_id) agent.Agent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
self.main_network = NetworkWrapper(tuning_parameters, create_target_network, self.has_global, 'main', self.main_network = nw.NetworkWrapper(tuning_parameters, create_target_network, self.has_global, 'main',
self.replicated_device, self.worker_device) self.replicated_device, self.worker_device)
self.networks.append(self.main_network) self.networks.append(self.main_network)
self.policy_gradient_rescaler = PolicyGradientRescaler().get(self.tp.agent.policy_gradient_rescaler) self.policy_gradient_rescaler = PolicyGradientRescaler().get(self.tp.agent.policy_gradient_rescaler)
@@ -44,7 +49,7 @@ class PolicyOptimizationAgent(Agent):
self.max_episode_length = 100000 self.max_episode_length = 100000
self.mean_return_over_multiple_episodes = np.zeros(self.max_episode_length) self.mean_return_over_multiple_episodes = np.zeros(self.max_episode_length)
self.num_episodes_where_step_has_been_seen = np.zeros(self.max_episode_length) self.num_episodes_where_step_has_been_seen = np.zeros(self.max_episode_length)
self.entropy = Signal('Entropy') self.entropy = utils.Signal('Entropy')
self.signals.append(self.entropy) self.signals.append(self.entropy)
self.reset_game(do_not_reset_env=True) self.reset_game(do_not_reset_env=True)
@@ -52,8 +57,8 @@ class PolicyOptimizationAgent(Agent):
def log_to_screen(self, phase): def log_to_screen(self, phase):
# log to screen # log to screen
if self.current_episode > 0: if self.current_episode > 0:
screen.log_dict( logger.screen.log_dict(
OrderedDict([ collections.OrderedDict([
("Worker", self.task_id), ("Worker", self.task_id),
("Episode", self.current_episode), ("Episode", self.current_episode),
("total reward", self.total_reward_in_current_episode), ("total reward", self.total_reward_in_current_episode),

View File

@@ -13,36 +13,44 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import collections
import copy
from agents.actor_critic_agent import * import numpy as np
from random import shuffle
from agents import actor_critic_agent as aca
from agents import policy_optimization_agent as poa
from architectures import network_wrapper as nw
import configurations
import logger
import utils
# Proximal Policy Optimization - https://arxiv.org/pdf/1707.06347.pdf # Proximal Policy Optimization - https://arxiv.org/pdf/1707.06347.pdf
class PPOAgent(ActorCriticAgent): class PPOAgent(aca.ActorCriticAgent):
def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0): def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
ActorCriticAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id, aca.ActorCriticAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id,
create_target_network=True) create_target_network=True)
self.critic_network = self.main_network self.critic_network = self.main_network
# define the policy network # define the policy network
tuning_parameters.agent.input_types = {'observation': InputTypes.Observation} tuning_parameters.agent.input_types = {'observation': configurations.InputTypes.Observation}
tuning_parameters.agent.output_types = [OutputTypes.PPO] tuning_parameters.agent.output_types = [configurations.OutputTypes.PPO]
tuning_parameters.agent.optimizer_type = 'Adam' tuning_parameters.agent.optimizer_type = 'Adam'
tuning_parameters.agent.l2_regularization = 0 tuning_parameters.agent.l2_regularization = 0
self.policy_network = NetworkWrapper(tuning_parameters, True, self.has_global, 'policy', self.policy_network = nw.NetworkWrapper(tuning_parameters, True, self.has_global, 'policy',
self.replicated_device, self.worker_device) self.replicated_device, self.worker_device)
self.networks.append(self.policy_network) self.networks.append(self.policy_network)
# signals definition # signals definition
self.value_loss = Signal('Value Loss') self.value_loss = utils.Signal('Value Loss')
self.signals.append(self.value_loss) self.signals.append(self.value_loss)
self.policy_loss = Signal('Policy Loss') self.policy_loss = utils.Signal('Policy Loss')
self.signals.append(self.policy_loss) self.signals.append(self.policy_loss)
self.kl_divergence = Signal('KL Divergence') self.kl_divergence = utils.Signal('KL Divergence')
self.signals.append(self.kl_divergence) self.signals.append(self.kl_divergence)
self.total_kl_divergence_during_training_process = 0.0 self.total_kl_divergence_during_training_process = 0.0
self.unclipped_grads = Signal('Grads (unclipped)') self.unclipped_grads = utils.Signal('Grads (unclipped)')
self.signals.append(self.unclipped_grads) self.signals.append(self.unclipped_grads)
self.reset_game(do_not_reset_env=True) self.reset_game(do_not_reset_env=True)
@@ -57,9 +65,9 @@ class PPOAgent(ActorCriticAgent):
# calculate advantages # calculate advantages
advantages = [] advantages = []
if self.policy_gradient_rescaler == PolicyGradientRescaler.A_VALUE: if self.policy_gradient_rescaler == poa.PolicyGradientRescaler.A_VALUE:
advantages = total_return - current_state_values advantages = total_return - current_state_values
elif self.policy_gradient_rescaler == PolicyGradientRescaler.GAE: elif self.policy_gradient_rescaler == poa.PolicyGradientRescaler.GAE:
# get bootstraps # get bootstraps
episode_start_idx = 0 episode_start_idx = 0
advantages = np.array([]) advantages = np.array([])
@@ -76,7 +84,7 @@ class PPOAgent(ActorCriticAgent):
episode_start_idx = idx + 1 episode_start_idx = idx + 1
advantages = np.append(advantages, rollout_advantages) advantages = np.append(advantages, rollout_advantages)
else: else:
screen.warning("WARNING: The requested policy gradient rescaler is not available") logger.screen.warning("WARNING: The requested policy gradient rescaler is not available")
# standardize # standardize
advantages = (advantages - np.mean(advantages)) / np.std(advantages) advantages = (advantages - np.mean(advantages)) / np.std(advantages)
@@ -107,7 +115,7 @@ class PPOAgent(ActorCriticAgent):
for k, v in current_states.items() for k, v in current_states.items()
} }
total_return_batch = total_return[i * batch_size:(i + 1) * batch_size] total_return_batch = total_return[i * batch_size:(i + 1) * batch_size]
old_policy_values = force_list(self.critic_network.target_network.predict( old_policy_values = utils.force_list(self.critic_network.target_network.predict(
current_states_batch).squeeze()) current_states_batch).squeeze())
if self.critic_network.online_network.optimizer_type != 'LBFGS': if self.critic_network.online_network.optimizer_type != 'LBFGS':
targets = total_return_batch targets = total_return_batch
@@ -155,7 +163,7 @@ class PPOAgent(ActorCriticAgent):
actions = np.expand_dims(actions, -1) actions = np.expand_dims(actions, -1)
# get old policy probabilities and distribution # get old policy probabilities and distribution
old_policy = force_list(self.policy_network.target_network.predict(current_states)) old_policy = utils.force_list(self.policy_network.target_network.predict(current_states))
# calculate gradients and apply on both the local policy network and on the global policy network # calculate gradients and apply on both the local policy network and on the global policy network
fetches = [self.policy_network.online_network.output_heads[0].kl_divergence, fetches = [self.policy_network.online_network.output_heads[0].kl_divergence,
@@ -196,8 +204,8 @@ class PPOAgent(ActorCriticAgent):
curr_learning_rate = self.tp.learning_rate curr_learning_rate = self.tp.learning_rate
# log training parameters # log training parameters
screen.log_dict( logger.screen.log_dict(
OrderedDict([ collections.OrderedDict([
("Surrogate loss", loss['policy_losses'][0]), ("Surrogate loss", loss['policy_losses'][0]),
("KL divergence", loss['fetch_result'][0]), ("KL divergence", loss['fetch_result'][0]),
("Entropy", loss['fetch_result'][1]), ("Entropy", loss['fetch_result'][1]),
@@ -215,7 +223,7 @@ class PPOAgent(ActorCriticAgent):
def update_kl_coefficient(self): def update_kl_coefficient(self):
# John Schulman takes the mean kl divergence only over the last epoch which is strange but we will follow # John Schulman takes the mean kl divergence only over the last epoch which is strange but we will follow
# his implementation for now because we know it works well # his implementation for now because we know it works well
screen.log_title("KL = {}".format(self.total_kl_divergence_during_training_process)) logger.screen.log_title("KL = {}".format(self.total_kl_divergence_during_training_process))
# update kl coefficient # update kl coefficient
kl_target = self.tp.agent.target_kl_divergence kl_target = self.tp.agent.target_kl_divergence
@@ -236,7 +244,7 @@ class PPOAgent(ActorCriticAgent):
new_kl_coefficient, new_kl_coefficient,
self.policy_network.online_network.output_heads[0].kl_coefficient_ph) self.policy_network.online_network.output_heads[0].kl_coefficient_ph)
screen.log_title("KL penalty coefficient change = {} -> {}".format(kl_coefficient, new_kl_coefficient)) logger.screen.log_title("KL penalty coefficient change = {} -> {}".format(kl_coefficient, new_kl_coefficient))
def post_training_commands(self): def post_training_commands(self):
if self.tp.agent.use_kl_regularization: if self.tp.agent.use_kl_regularization:
@@ -264,12 +272,12 @@ class PPOAgent(ActorCriticAgent):
self.update_log() # should be done in order to update the data that has been accumulated * while not playing * self.update_log() # should be done in order to update the data that has been accumulated * while not playing *
return np.append(value_loss, policy_loss) return np.append(value_loss, policy_loss)
def choose_action(self, curr_state, phase=RunPhase.TRAIN): def choose_action(self, curr_state, phase=utils.RunPhase.TRAIN):
if self.env.discrete_controls: if self.env.discrete_controls:
# DISCRETE # DISCRETE
action_values = self.policy_network.online_network.predict(self.tf_input_state(curr_state)).squeeze() action_values = self.policy_network.online_network.predict(self.tf_input_state(curr_state)).squeeze()
if phase == RunPhase.TRAIN: if phase == utils.RunPhase.TRAIN:
action = self.exploration_policy.get_action(action_values) action = self.exploration_policy.get_action(action_values)
else: else:
action = np.argmax(action_values) action = np.argmax(action_values)
@@ -280,7 +288,7 @@ class PPOAgent(ActorCriticAgent):
action_values_mean, action_values_std = self.policy_network.online_network.predict(self.tf_input_state(curr_state)) action_values_mean, action_values_std = self.policy_network.online_network.predict(self.tf_input_state(curr_state))
action_values_mean = action_values_mean.squeeze() action_values_mean = action_values_mean.squeeze()
action_values_std = action_values_std.squeeze() action_values_std = action_values_std.squeeze()
if phase == RunPhase.TRAIN: if phase == utils.RunPhase.TRAIN:
action = np.squeeze(np.random.randn(1, self.action_space_size) * action_values_std + action_values_mean) action = np.squeeze(np.random.randn(1, self.action_space_size) * action_values_std + action_values_mean)
else: else:
action = action_values_mean action = action_values_mean

View File

@@ -13,14 +13,15 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import numpy as np
from agents.value_optimization_agent import * from agents import value_optimization_agent as voa
# Quantile Regression Deep Q Network - https://arxiv.org/pdf/1710.10044v1.pdf # Quantile Regression Deep Q Network - https://arxiv.org/pdf/1710.10044v1.pdf
class QuantileRegressionDQNAgent(ValueOptimizationAgent): class QuantileRegressionDQNAgent(voa.ValueOptimizationAgent):
def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0): def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id) voa.ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
self.quantile_probabilities = np.ones(self.tp.agent.atoms) / float(self.tp.agent.atoms) self.quantile_probabilities = np.ones(self.tp.agent.atoms) / float(self.tp.agent.atoms)
# prediction's format is (batch,actions,atoms) # prediction's format is (batch,actions,atoms)

View File

@@ -13,21 +13,20 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import numpy as np import numpy as np
from agents.agent import Agent from agents import agent
from architectures.network_wrapper import NetworkWrapper from architectures import network_wrapper as nw
from utils import RunPhase, Signal import utils
class ValueOptimizationAgent(Agent): class ValueOptimizationAgent(agent.Agent):
def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0, create_target_network=True): def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0, create_target_network=True):
Agent.__init__(self, env, tuning_parameters, replicated_device, thread_id) agent.Agent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
self.main_network = NetworkWrapper(tuning_parameters, create_target_network, self.has_global, 'main', self.main_network = nw.NetworkWrapper(tuning_parameters, create_target_network, self.has_global, 'main',
self.replicated_device, self.worker_device) self.replicated_device, self.worker_device)
self.networks.append(self.main_network) self.networks.append(self.main_network)
self.q_values = Signal("Q") self.q_values = utils.Signal("Q")
self.signals.append(self.q_values) self.signals.append(self.q_values)
self.reset_game(do_not_reset_env=True) self.reset_game(do_not_reset_env=True)
@@ -47,12 +46,12 @@ class ValueOptimizationAgent(Agent):
'require exploration policies which return a single action.' 'require exploration policies which return a single action.'
).format(policy.__class__.__name__)) ).format(policy.__class__.__name__))
def choose_action(self, curr_state, phase=RunPhase.TRAIN): def choose_action(self, curr_state, phase=utils.RunPhase.TRAIN):
prediction = self.get_prediction(curr_state) prediction = self.get_prediction(curr_state)
actions_q_values = self.get_q_values(prediction) actions_q_values = self.get_q_values(prediction)
# choose action according to the exploration policy and the current phase (evaluating or training the agent) # choose action according to the exploration policy and the current phase (evaluating or training the agent)
if phase == RunPhase.TRAIN: if phase == utils.RunPhase.TRAIN:
exploration_policy = self.exploration_policy exploration_policy = self.exploration_policy
else: else:
exploration_policy = self.evaluation_exploration_policy exploration_policy = self.evaluation_exploration_policy

View File

@@ -13,19 +13,16 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import logger
from architectures.architecture import *
from logger import failed_imports
try:
from architectures.tensorflow_components.general_network import *
from architectures.tensorflow_components.architecture import *
except ImportError:
failed_imports.append("TensorFlow")
try: try:
from architectures.neon_components.general_network import * from architectures.tensorflow_components import general_network as ts_gn
from architectures.neon_components.architecture import * from architectures.tensorflow_components import architecture as ts_arch
except ImportError: except ImportError:
failed_imports.append("Neon") logger.failed_imports.append("TensorFlow")
from architectures.network_wrapper import * try:
from architectures.neon_components import general_network as neon_gn
from architectures.neon_components import architecture as neon_arch
except ImportError:
logger.failed_imports.append("Neon")

View File

@@ -14,8 +14,6 @@
# limitations under the License. # limitations under the License.
# #
from configurations import Preset
class Architecture(object): class Architecture(object):
def __init__(self, tuning_parameters, name=""): def __init__(self, tuning_parameters, name=""):

View File

@@ -13,19 +13,16 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import sys
import copy
from ngraph.frontends.neon import *
import ngraph as ng import ngraph as ng
from architectures.architecture import *
import numpy as np import numpy as np
from utils import *
from architectures import architecture
import utils
class NeonArchitecture(Architecture): class NeonArchitecture(architecture.Architecture):
def __init__(self, tuning_parameters, name="", global_network=None, network_is_local=True): def __init__(self, tuning_parameters, name="", global_network=None, network_is_local=True):
Architecture.__init__(self, tuning_parameters, name) architecture.Architecture.__init__(self, tuning_parameters, name)
assert tuning_parameters.agent.neon_support, 'Neon is not supported for this agent' assert tuning_parameters.agent.neon_support, 'Neon is not supported for this agent'
self.clip_error = tuning_parameters.clip_gradients self.clip_error = tuning_parameters.clip_gradients
self.total_loss = None self.total_loss = None
@@ -113,8 +110,8 @@ class NeonArchitecture(Architecture):
def accumulate_gradients(self, inputs, targets): def accumulate_gradients(self, inputs, targets):
# Neon doesn't currently allow separating the grads calculation and grad apply operations # Neon doesn't currently allow separating the grads calculation and grad apply operations
# so this feature is not currently available. instead we do a full training iteration # so this feature is not currently available. instead we do a full training iteration
inputs = force_list(inputs) inputs = utils.force_list(inputs)
targets = force_list(targets) targets = utils.force_list(targets)
for idx, input in enumerate(inputs): for idx, input in enumerate(inputs):
inputs[idx] = input.swapaxes(0, -1) inputs[idx] = input.swapaxes(0, -1)

View File

@@ -13,10 +13,9 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import ngraph.frontends.neon as neon
import ngraph as ng import ngraph as ng
from ngraph.util.names import name_scope import ngraph.frontends.neon as neon
import ngraph.util.names as ngraph_names
class InputEmbedder(object): class InputEmbedder(object):
@@ -31,7 +30,7 @@ class InputEmbedder(object):
self.output = None self.output = None
def __call__(self, prev_input_placeholder=None): def __call__(self, prev_input_placeholder=None):
with name_scope(self.get_name()): with ngraph_names.name_scope(self.get_name()):
# create the input axes # create the input axes
axes = [] axes = []
if len(self.input_size) == 2: if len(self.input_size) == 2:

View File

@@ -13,15 +13,18 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import ngraph as ng
from ngraph.frontends import neon
from ngraph.util import names as ngraph_names
from architectures.neon_components.embedders import * from architectures.neon_components import architecture
from architectures.neon_components.heads import * from architectures.neon_components import embedders
from architectures.neon_components.middleware import * from architectures.neon_components import middleware
from architectures.neon_components.architecture import * from architectures.neon_components import heads
from configurations import InputTypes, OutputTypes, MiddlewareTypes import configurations as conf
class GeneralNeonNetwork(NeonArchitecture): class GeneralNeonNetwork(architecture.NeonArchitecture):
def __init__(self, tuning_parameters, name="", global_network=None, network_is_local=True): def __init__(self, tuning_parameters, name="", global_network=None, network_is_local=True):
self.global_network = global_network self.global_network = global_network
self.network_is_local = network_is_local self.network_is_local = network_is_local
@@ -34,7 +37,7 @@ class GeneralNeonNetwork(NeonArchitecture):
self.activation_function = self.get_activation_function( self.activation_function = self.get_activation_function(
tuning_parameters.agent.hidden_layers_activation_function) tuning_parameters.agent.hidden_layers_activation_function)
NeonArchitecture.__init__(self, tuning_parameters, name, global_network, network_is_local) architecture.NeonArchitecture.__init__(self, tuning_parameters, name, global_network, network_is_local)
def get_activation_function(self, activation_function_string): def get_activation_function(self, activation_function_string):
activation_functions = { activation_functions = {
@@ -53,36 +56,36 @@ class GeneralNeonNetwork(NeonArchitecture):
# the observation can be either an image or a vector # the observation can be either an image or a vector
def get_observation_embedding(with_timestep=False): def get_observation_embedding(with_timestep=False):
if self.input_height > 1: if self.input_height > 1:
return ImageEmbedder((self.input_depth, self.input_height, self.input_width), self.batch_size, return embedders.ImageEmbedder((self.input_depth, self.input_height, self.input_width), self.batch_size,
name="observation") name="observation")
else: else:
return VectorEmbedder((self.input_depth, self.input_width + int(with_timestep)), self.batch_size, return embedders.VectorEmbedder((self.input_depth, self.input_width + int(with_timestep)), self.batch_size,
name="observation") name="observation")
input_mapping = { input_mapping = {
InputTypes.Observation: get_observation_embedding(), conf.InputTypes.Observation: get_observation_embedding(),
InputTypes.Measurements: VectorEmbedder(self.measurements_size, self.batch_size, name="measurements"), conf.InputTypes.Measurements: embedders.VectorEmbedder(self.measurements_size, self.batch_size, name="measurements"),
InputTypes.GoalVector: VectorEmbedder(self.measurements_size, self.batch_size, name="goal_vector"), conf.InputTypes.GoalVector: embedders.VectorEmbedder(self.measurements_size, self.batch_size, name="goal_vector"),
InputTypes.Action: VectorEmbedder((self.num_actions,), self.batch_size, name="action"), conf.InputTypes.Action: embedders.VectorEmbedder((self.num_actions,), self.batch_size, name="action"),
InputTypes.TimedObservation: get_observation_embedding(with_timestep=True), conf.InputTypes.TimedObservation: get_observation_embedding(with_timestep=True),
} }
return input_mapping[embedder_type] return input_mapping[embedder_type]
def get_middleware_embedder(self, middleware_type): def get_middleware_embedder(self, middleware_type):
return {MiddlewareTypes.LSTM: None, # LSTM over Neon is currently not supported in Coach return {conf.MiddlewareTypes.LSTM: None, # LSTM over Neon is currently not supported in Coach
MiddlewareTypes.FC: FC_Embedder}.get(middleware_type)(self.activation_function) conf.MiddlewareTypes.FC: middleware.FC_Embedder}.get(middleware_type)(self.activation_function)
def get_output_head(self, head_type, head_idx, loss_weight=1.): def get_output_head(self, head_type, head_idx, loss_weight=1.):
output_mapping = { output_mapping = {
OutputTypes.Q: QHead, conf.OutputTypes.Q: heads.QHead,
OutputTypes.DuelingQ: DuelingQHead, conf.OutputTypes.DuelingQ: heads.DuelingQHead,
OutputTypes.V: None, # Policy Optimization algorithms over Neon are currently not supported in Coach conf.OutputTypes.V: None, # Policy Optimization algorithms over Neon are currently not supported in Coach
OutputTypes.Pi: None, # Policy Optimization algorithms over Neon are currently not supported in Coach conf.OutputTypes.Pi: None, # Policy Optimization algorithms over Neon are currently not supported in Coach
OutputTypes.MeasurementsPrediction: None, # DFP over Neon is currently not supported in Coach conf.OutputTypes.MeasurementsPrediction: None, # DFP over Neon is currently not supported in Coach
OutputTypes.DNDQ: None, # NEC over Neon is currently not supported in Coach conf.OutputTypes.DNDQ: None, # NEC over Neon is currently not supported in Coach
OutputTypes.NAF: None, # NAF over Neon is currently not supported in Coach conf.OutputTypes.NAF: None, # NAF over Neon is currently not supported in Coach
OutputTypes.PPO: None, # PPO over Neon is currently not supported in Coach conf.OutputTypes.PPO: None, # PPO over Neon is currently not supported in Coach
OutputTypes.PPO_V: None # PPO over Neon is currently not supported in Coach conf.OutputTypes.PPO_V: None # PPO over Neon is currently not supported in Coach
} }
return output_mapping[head_type](self.tp, head_idx, loss_weight, self.network_is_local) return output_mapping[head_type](self.tp, head_idx, loss_weight, self.network_is_local)
@@ -104,7 +107,7 @@ class GeneralNeonNetwork(NeonArchitecture):
done_creating_input_placeholders = False done_creating_input_placeholders = False
for network_idx in range(self.num_networks): for network_idx in range(self.num_networks):
with name_scope('network_{}'.format(network_idx)): with ngraph_names.name_scope('network_{}'.format(network_idx)):
#################### ####################
# Input Embeddings # # Input Embeddings #
#################### ####################

View File

@@ -13,13 +13,12 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import ngraph as ng import ngraph as ng
from ngraph.util.names import name_scope from ngraph.frontends import neon
import ngraph.frontends.neon as neon from ngraph.util import names as ngraph_names
import numpy as np
from utils import force_list import utils
from architectures.neon_components.losses import * from architectures.neon_components import losses
class Head(object): class Head(object):
@@ -30,7 +29,7 @@ class Head(object):
self.loss = [] self.loss = []
self.loss_type = [] self.loss_type = []
self.regularizations = [] self.regularizations = []
self.loss_weight = force_list(loss_weight) self.loss_weight = utils.force_list(loss_weight)
self.weights_init = neon.GlorotInit() self.weights_init = neon.GlorotInit()
self.biases_init = neon.ConstantInit() self.biases_init = neon.ConstantInit()
self.target = [] self.target = []
@@ -44,15 +43,15 @@ class Head(object):
:param input_layer: the input to the graph :param input_layer: the input to the graph
:return: the output of the last layer and the target placeholder :return: the output of the last layer and the target placeholder
""" """
with name_scope(self.get_name()): with ngraph_names.name_scope(self.get_name()):
self._build_module(input_layer) self._build_module(input_layer)
self.output = force_list(self.output) self.output = utils.force_list(self.output)
self.target = force_list(self.target) self.target = utils.force_list(self.target)
self.input = force_list(self.input) self.input = utils.force_list(self.input)
self.loss_type = force_list(self.loss_type) self.loss_type = utils.force_list(self.loss_type)
self.loss = force_list(self.loss) self.loss = utils.force_list(self.loss)
self.regularizations = force_list(self.regularizations) self.regularizations = utils.force_list(self.regularizations)
if self.is_local: if self.is_local:
self.set_loss() self.set_loss()
@@ -106,7 +105,7 @@ class QHead(Head):
if tuning_parameters.agent.replace_mse_with_huber_loss: if tuning_parameters.agent.replace_mse_with_huber_loss:
raise Exception("huber loss is not supported in neon") raise Exception("huber loss is not supported in neon")
else: else:
self.loss_type = mean_squared_error self.loss_type = losses.mean_squared_error
def _build_module(self, input_layer): def _build_module(self, input_layer):
# Standard Q Network # Standard Q Network
@@ -159,7 +158,7 @@ class MeasurementsPredictionHead(Head):
if tuning_parameters.agent.replace_mse_with_huber_loss: if tuning_parameters.agent.replace_mse_with_huber_loss:
raise Exception("huber loss is not supported in neon") raise Exception("huber loss is not supported in neon")
else: else:
self.loss_type = mean_squared_error self.loss_type = losses.mean_squared_error
def _build_module(self, input_layer): def _build_module(self, input_layer):
# This is almost exactly the same as Dueling Network but we predict the future measurements for each action # This is almost exactly the same as Dueling Network but we predict the future measurements for each action
@@ -167,7 +166,7 @@ class MeasurementsPredictionHead(Head):
multistep_measurements_size = self.measurements_size[0] * self.num_predicted_steps_ahead multistep_measurements_size = self.measurements_size[0] * self.num_predicted_steps_ahead
# actions expectation tower (expectation stream) - E # actions expectation tower (expectation stream) - E
with name_scope("expectation_stream"): with ngraph_names.name_scope("expectation_stream"):
expectation_stream = neon.Sequential([ expectation_stream = neon.Sequential([
neon.Affine(nout=256, activation=neon.Rectlin(), neon.Affine(nout=256, activation=neon.Rectlin(),
weight_init=self.weights_init, bias_init=self.biases_init), weight_init=self.weights_init, bias_init=self.biases_init),
@@ -176,7 +175,7 @@ class MeasurementsPredictionHead(Head):
])(input_layer) ])(input_layer)
# action fine differences tower (action stream) - A # action fine differences tower (action stream) - A
with name_scope("action_stream"): with ngraph_names.name_scope("action_stream"):
action_stream_unnormalized = neon.Sequential([ action_stream_unnormalized = neon.Sequential([
neon.Affine(nout=256, activation=neon.Rectlin(), neon.Affine(nout=256, activation=neon.Rectlin(),
weight_init=self.weights_init, bias_init=self.biases_init), weight_init=self.weights_init, bias_init=self.biases_init),
@@ -191,4 +190,3 @@ class MeasurementsPredictionHead(Head):
# merge to future measurements predictions # merge to future measurements predictions
self.output = repeated_expectation_stream + action_stream self.output = repeated_expectation_stream + action_stream

View File

@@ -13,15 +13,12 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import ngraph as ng import ngraph as ng
import ngraph.frontends.neon as neon from ngraph.util import names as ngraph_names
from ngraph.util.names import name_scope
import numpy as np
def mean_squared_error(targets, outputs, weights=1.0, scope=""): def mean_squared_error(targets, outputs, weights=1.0, scope=""):
with name_scope(scope): with ngraph_names.name_scope(scope):
# TODO: reduce mean over the action axis # TODO: reduce mean over the action axis
loss = ng.squared_L2(targets - outputs) loss = ng.squared_L2(targets - outputs)
weighted_loss = loss * weights weighted_loss = loss * weights

View File

@@ -13,11 +13,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import ngraph as ng
import ngraph.frontends.neon as neon import ngraph.frontends.neon as neon
from ngraph.util.names import name_scope from ngraph.util import names as ngraph_names
import numpy as np
class MiddlewareEmbedder(object): class MiddlewareEmbedder(object):
@@ -30,7 +27,7 @@ class MiddlewareEmbedder(object):
self.activation_function = activation_function self.activation_function = activation_function
def __call__(self, input_layer): def __call__(self, input_layer):
with name_scope(self.get_name()): with ngraph_names.name_scope(self.get_name()):
self.input = input_layer self.input = input_layer
self._build_module() self._build_module()

View File

@@ -13,20 +13,21 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import os
import collections
from collections import OrderedDict import configurations as conf
from configurations import Preset, Frameworks import logger
from logger import *
try: try:
import tensorflow as tf import tensorflow as tf
from architectures.tensorflow_components.general_network import GeneralTensorFlowNetwork from architectures.tensorflow_components import general_network as tf_net #import GeneralTensorFlowNetwork
except ImportError: except ImportError:
failed_imports.append("TensorFlow") logger.failed_imports.append("TensorFlow")
try: try:
from architectures.neon_components.general_network import GeneralNeonNetwork from architectures.neon_components import general_network as neon_net
except ImportError: except ImportError:
failed_imports.append("Neon") logger.failed_imports.append("Neon")
class NetworkWrapper(object): class NetworkWrapper(object):
@@ -50,12 +51,12 @@ class NetworkWrapper(object):
self.name = name self.name = name
self.sess = tuning_parameters.sess self.sess = tuning_parameters.sess
if self.tp.framework == Frameworks.TensorFlow: if self.tp.framework == conf.Frameworks.TensorFlow:
general_network = GeneralTensorFlowNetwork general_network = tf_net.GeneralTensorFlowNetwork
elif self.tp.framework == Frameworks.Neon: elif self.tp.framework == conf.Frameworks.Neon:
general_network = GeneralNeonNetwork general_network = neon_net.GeneralNeonNetwork
else: else:
raise Exception("{} Framework is not supported".format(Frameworks().to_string(self.tp.framework))) raise Exception("{} Framework is not supported".format(conf.Frameworks().to_string(self.tp.framework)))
# Global network - the main network shared between threads # Global network - the main network shared between threads
self.global_network = None self.global_network = None
@@ -77,13 +78,13 @@ class NetworkWrapper(object):
self.target_network = general_network(tuning_parameters, '{}/target'.format(name), self.target_network = general_network(tuning_parameters, '{}/target'.format(name),
network_is_local=True) network_is_local=True)
if not self.tp.distributed and self.tp.framework == Frameworks.TensorFlow: if not self.tp.distributed and self.tp.framework == conf.Frameworks.TensorFlow:
variables_to_restore = tf.global_variables() variables_to_restore = tf.global_variables()
variables_to_restore = [v for v in variables_to_restore if '/online' in v.name] variables_to_restore = [v for v in variables_to_restore if '/online' in v.name]
self.model_saver = tf.train.Saver(variables_to_restore) self.model_saver = tf.train.Saver(variables_to_restore)
if self.tp.sess and self.tp.checkpoint_restore_dir: if self.tp.sess and self.tp.checkpoint_restore_dir:
checkpoint = tf.train.latest_checkpoint(self.tp.checkpoint_restore_dir) checkpoint = tf.train.latest_checkpoint(self.tp.checkpoint_restore_dir)
screen.log_title("Loading checkpoint: {}".format(checkpoint)) logger.screen.log_title("Loading checkpoint: {}".format(checkpoint))
self.model_saver.restore(self.tp.sess, checkpoint) self.model_saver.restore(self.tp.sess, checkpoint)
self.update_target_network() self.update_target_network()
@@ -178,8 +179,8 @@ class NetworkWrapper(object):
def save_model(self, model_id): def save_model(self, model_id):
saved_model_path = self.model_saver.save(self.tp.sess, os.path.join(self.tp.save_model_dir, saved_model_path = self.model_saver.save(self.tp.sess, os.path.join(self.tp.save_model_dir,
str(model_id) + '.ckpt')) str(model_id) + '.ckpt'))
screen.log_dict( logger.screen.log_dict(
OrderedDict([ collections.OrderedDict([
("Saving model", saved_model_path), ("Saving model", saved_model_path),
]), ]),
prefix="Checkpoint" prefix="Checkpoint"

View File

@@ -15,12 +15,11 @@
# #
import time import time
import numpy as np
import tensorflow as tf import tensorflow as tf
from architectures.architecture import Architecture from architectures import architecture
from utils import force_list, squeeze_list import configurations as conf
from configurations import Preset, MiddlewareTypes import utils
def variable_summaries(var): def variable_summaries(var):
"""Attach a lot of summaries to a Tensor (for TensorBoard visualization).""" """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
@@ -37,14 +36,14 @@ def variable_summaries(var):
tf.summary.scalar('min', tf.reduce_min(var)) tf.summary.scalar('min', tf.reduce_min(var))
tf.summary.histogram('histogram', var) tf.summary.histogram('histogram', var)
class TensorFlowArchitecture(Architecture): class TensorFlowArchitecture(architecture.Architecture):
def __init__(self, tuning_parameters, name="", global_network=None, network_is_local=True): def __init__(self, tuning_parameters, name="", global_network=None, network_is_local=True):
""" """
:param tuning_parameters: The parameters used for running the algorithm :param tuning_parameters: The parameters used for running the algorithm
:type tuning_parameters: Preset :type tuning_parameters: Preset
:param name: The name of the network :param name: The name of the network
""" """
Architecture.__init__(self, tuning_parameters, name) architecture.Architecture.__init__(self, tuning_parameters, name)
self.middleware_embedder = None self.middleware_embedder = None
self.network_is_local = network_is_local self.network_is_local = network_is_local
assert tuning_parameters.agent.tensorflow_support, 'TensorFlow is not supported for this agent' assert tuning_parameters.agent.tensorflow_support, 'TensorFlow is not supported for this agent'
@@ -174,7 +173,7 @@ class TensorFlowArchitecture(Architecture):
feed_dict = self._feed_dict(inputs) feed_dict = self._feed_dict(inputs)
# feed targets # feed targets
targets = force_list(targets) targets = utils.force_list(targets)
for placeholder_idx, target in enumerate(targets): for placeholder_idx, target in enumerate(targets):
feed_dict[self.targets[placeholder_idx]] = target feed_dict[self.targets[placeholder_idx]] = target
@@ -186,13 +185,13 @@ class TensorFlowArchitecture(Architecture):
else: else:
fetches.append(self.tensor_gradients) fetches.append(self.tensor_gradients)
fetches += [self.total_loss, self.losses] fetches += [self.total_loss, self.losses]
if self.tp.agent.middleware_type == MiddlewareTypes.LSTM: if self.tp.agent.middleware_type == conf.MiddlewareTypes.LSTM:
fetches.append(self.middleware_embedder.state_out) fetches.append(self.middleware_embedder.state_out)
additional_fetches_start_idx = len(fetches) additional_fetches_start_idx = len(fetches)
fetches += additional_fetches fetches += additional_fetches
# feed the lstm state if necessary # feed the lstm state if necessary
if self.tp.agent.middleware_type == MiddlewareTypes.LSTM: if self.tp.agent.middleware_type == conf.MiddlewareTypes.LSTM:
# we can't always assume that we are starting from scratch here can we? # we can't always assume that we are starting from scratch here can we?
feed_dict[self.middleware_embedder.c_in] = self.middleware_embedder.c_init feed_dict[self.middleware_embedder.c_in] = self.middleware_embedder.c_init
feed_dict[self.middleware_embedder.h_in] = self.middleware_embedder.h_init feed_dict[self.middleware_embedder.h_in] = self.middleware_embedder.h_init
@@ -206,7 +205,7 @@ class TensorFlowArchitecture(Architecture):
# extract the fetches # extract the fetches
norm_unclipped_grads, grads, total_loss, losses = result[:4] norm_unclipped_grads, grads, total_loss, losses = result[:4]
if self.tp.agent.middleware_type == MiddlewareTypes.LSTM: if self.tp.agent.middleware_type == conf.MiddlewareTypes.LSTM:
(self.curr_rnn_c_in, self.curr_rnn_h_in) = result[4] (self.curr_rnn_c_in, self.curr_rnn_h_in) = result[4]
fetched_tensors = [] fetched_tensors = []
if len(additional_fetches) > 0: if len(additional_fetches) > 0:
@@ -308,7 +307,7 @@ class TensorFlowArchitecture(Architecture):
if outputs is None: if outputs is None:
outputs = self.outputs outputs = self.outputs
if self.tp.agent.middleware_type == MiddlewareTypes.LSTM: if self.tp.agent.middleware_type == conf.MiddlewareTypes.LSTM:
feed_dict[self.middleware_embedder.c_in] = self.curr_rnn_c_in feed_dict[self.middleware_embedder.c_in] = self.curr_rnn_c_in
feed_dict[self.middleware_embedder.h_in] = self.curr_rnn_h_in feed_dict[self.middleware_embedder.h_in] = self.curr_rnn_h_in
@@ -317,7 +316,7 @@ class TensorFlowArchitecture(Architecture):
output = self.tp.sess.run(outputs, feed_dict) output = self.tp.sess.run(outputs, feed_dict)
if squeeze_output: if squeeze_output:
output = squeeze_list(output) output = utils.squeeze_list(output)
return output return output

View File

@@ -13,8 +13,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import tensorflow as tf import tensorflow as tf
from configurations import EmbedderComplexity from configurations import EmbedderComplexity

View File

@@ -13,15 +13,16 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import tensorflow as tf
from architectures.tensorflow_components.embedders import * from architectures.tensorflow_components import architecture
from architectures.tensorflow_components.heads import * from architectures.tensorflow_components import embedders
from architectures.tensorflow_components.middleware import * from architectures.tensorflow_components import middleware
from architectures.tensorflow_components.architecture import * from architectures.tensorflow_components import heads
from configurations import InputTypes, OutputTypes, MiddlewareTypes import configurations as conf
class GeneralTensorFlowNetwork(TensorFlowArchitecture): class GeneralTensorFlowNetwork(architecture.TensorFlowArchitecture):
""" """
A generalized version of all possible networks implemented using tensorflow. A generalized version of all possible networks implemented using tensorflow.
""" """
@@ -37,7 +38,7 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture):
self.activation_function = self.get_activation_function( self.activation_function = self.get_activation_function(
tuning_parameters.agent.hidden_layers_activation_function) tuning_parameters.agent.hidden_layers_activation_function)
TensorFlowArchitecture.__init__(self, tuning_parameters, name, global_network, network_is_local) architecture.TensorFlowArchitecture.__init__(self, tuning_parameters, name, global_network, network_is_local)
def get_activation_function(self, activation_function_string): def get_activation_function(self, activation_function_string):
activation_functions = { activation_functions = {
@@ -56,37 +57,37 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture):
# the observation can be either an image or a vector # the observation can be either an image or a vector
def get_observation_embedding(with_timestep=False): def get_observation_embedding(with_timestep=False):
if self.input_height > 1: if self.input_height > 1:
return ImageEmbedder((self.input_height, self.input_width, self.input_depth), name="observation", return embedders.ImageEmbedder((self.input_height, self.input_width, self.input_depth), name="observation",
input_rescaler=self.tp.agent.input_rescaler) input_rescaler=self.tp.agent.input_rescaler)
else: else:
return VectorEmbedder((self.input_width + int(with_timestep), self.input_depth), name="observation") return embedders.VectorEmbedder((self.input_width + int(with_timestep), self.input_depth), name="observation")
input_mapping = { input_mapping = {
InputTypes.Observation: get_observation_embedding(), conf.InputTypes.Observation: get_observation_embedding(),
InputTypes.Measurements: VectorEmbedder(self.measurements_size, name="measurements"), conf.InputTypes.Measurements: embedders.VectorEmbedder(self.measurements_size, name="measurements"),
InputTypes.GoalVector: VectorEmbedder(self.measurements_size, name="goal_vector"), conf.InputTypes.GoalVector: embedders.VectorEmbedder(self.measurements_size, name="goal_vector"),
InputTypes.Action: VectorEmbedder((self.num_actions,), name="action"), conf.InputTypes.Action: embedders.VectorEmbedder((self.num_actions,), name="action"),
InputTypes.TimedObservation: get_observation_embedding(with_timestep=True), conf.InputTypes.TimedObservation: get_observation_embedding(with_timestep=True),
} }
return input_mapping[embedder_type] return input_mapping[embedder_type]
def get_middleware_embedder(self, middleware_type): def get_middleware_embedder(self, middleware_type):
return {MiddlewareTypes.LSTM: LSTM_Embedder, return {conf.MiddlewareTypes.LSTM: middleware.LSTM_Embedder,
MiddlewareTypes.FC: FC_Embedder}.get(middleware_type)(self.activation_function) conf.MiddlewareTypes.FC: middleware.FC_Embedder}.get(middleware_type)(self.activation_function)
def get_output_head(self, head_type, head_idx, loss_weight=1.): def get_output_head(self, head_type, head_idx, loss_weight=1.):
output_mapping = { output_mapping = {
OutputTypes.Q: QHead, conf.OutputTypes.Q: heads.QHead,
OutputTypes.DuelingQ: DuelingQHead, conf.OutputTypes.DuelingQ: heads.DuelingQHead,
OutputTypes.V: VHead, conf.OutputTypes.V: heads.VHead,
OutputTypes.Pi: PolicyHead, conf.OutputTypes.Pi: heads.PolicyHead,
OutputTypes.MeasurementsPrediction: MeasurementsPredictionHead, conf.OutputTypes.MeasurementsPrediction: heads.MeasurementsPredictionHead,
OutputTypes.DNDQ: DNDQHead, conf.OutputTypes.DNDQ: heads.DNDQHead,
OutputTypes.NAF: NAFHead, conf.OutputTypes.NAF: heads.NAFHead,
OutputTypes.PPO: PPOHead, conf.OutputTypes.PPO: heads.PPOHead,
OutputTypes.PPO_V: PPOVHead, conf.OutputTypes.PPO_V: heads.PPOVHead,
OutputTypes.CategoricalQ: CategoricalQHead, conf.OutputTypes.CategoricalQ: heads.CategoricalQHead,
OutputTypes.QuantileRegressionQ: QuantileRegressionQHead conf.OutputTypes.QuantileRegressionQ: heads.QuantileRegressionQHead
} }
return output_mapping[head_type](self.tp, head_idx, loss_weight, self.network_is_local) return output_mapping[head_type](self.tp, head_idx, loss_weight, self.network_is_local)

View File

@@ -13,10 +13,10 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import tensorflow as tf import tensorflow as tf
import numpy as np import numpy as np
from utils import force_list
import utils
# Used to initialize weights for policy and value output layers # Used to initialize weights for policy and value output layers
@@ -36,7 +36,7 @@ class Head(object):
self.loss = [] self.loss = []
self.loss_type = [] self.loss_type = []
self.regularizations = [] self.regularizations = []
self.loss_weight = force_list(loss_weight) self.loss_weight = utils.force_list(loss_weight)
self.target = [] self.target = []
self.input = [] self.input = []
self.is_local = is_local self.is_local = is_local
@@ -50,12 +50,12 @@ class Head(object):
with tf.variable_scope(self.get_name(), initializer=tf.contrib.layers.xavier_initializer()): with tf.variable_scope(self.get_name(), initializer=tf.contrib.layers.xavier_initializer()):
self._build_module(input_layer) self._build_module(input_layer)
self.output = force_list(self.output) self.output = utils.force_list(self.output)
self.target = force_list(self.target) self.target = utils.force_list(self.target)
self.input = force_list(self.input) self.input = utils.force_list(self.input)
self.loss_type = force_list(self.loss_type) self.loss_type = utils.force_list(self.loss_type)
self.loss = force_list(self.loss) self.loss = utils.force_list(self.loss)
self.regularizations = force_list(self.regularizations) self.regularizations = utils.force_list(self.regularizations)
if self.is_local: if self.is_local:
self.set_loss() self.set_loss()
self._post_build() self._post_build()

View File

@@ -13,7 +13,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import tensorflow as tf import tensorflow as tf
import numpy as np import numpy as np

View File

@@ -13,7 +13,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import tensorflow as tf import tensorflow as tf
import numpy as np import numpy as np

View File

@@ -13,46 +13,42 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import sys, inspect, re
import os
import json
import presets
from presets import *
from utils import set_gpu, list_all_classes_in_module
from architectures import *
from environments import *
from agents import *
from utils import *
from logger import screen, logger
import argparse
from subprocess import Popen
import datetime
import presets
import atexit import atexit
import sys import json
import os
import re
import subprocess import subprocess
from threading import Thread import sys
import time
if len(set(failed_imports)) > 0: import agents
screen.warning("Warning: failed to import the following packages - {}".format(', '.join(set(failed_imports)))) import argparse
import configurations as conf
import environments
import logger
import presets
import utils
if len(set(logger.failed_imports)) > 0:
logger.screen.warning("Warning: failed to import the following packages - {}".format(', '.join(set(logger.failed_imports))))
def set_framework(framework_type): def set_framework(framework_type):
# choosing neural network framework # choosing neural network framework
framework = Frameworks().get(framework_type) framework = conf.Frameworks().get(framework_type)
sess = None sess = None
if framework == Frameworks.TensorFlow: if framework == conf.Frameworks.TensorFlow:
import tensorflow as tf import tensorflow as tf
config = tf.ConfigProto() config = tf.ConfigProto()
config.allow_soft_placement = True config.allow_soft_placement = True
config.gpu_options.allow_growth = True config.gpu_options.allow_growth = True
config.gpu_options.per_process_gpu_memory_fraction = 0.2 config.gpu_options.per_process_gpu_memory_fraction = 0.2
sess = tf.Session(config=config) sess = tf.Session(config=config)
elif framework == Frameworks.Neon: elif framework == conf.Frameworks.Neon:
import ngraph as ng import ngraph as ng
sess = ng.transformers.make_transformer() sess = ng.transformers.make_transformer()
screen.log_title("Using {} framework".format(Frameworks().to_string(framework))) logger.screen.log_title("Using {} framework".format(conf.Frameworks().to_string(framework)))
return sess return sess
@@ -66,8 +62,8 @@ def check_input_and_fill_run_dict(parser):
# list available presets # list available presets
if args.list: if args.list:
presets_lists = list_all_classes_in_module(presets) presets_lists = utils.list_all_classes_in_module(presets)
screen.log_title("Available Presets:") logger.screen.log_title("Available Presets:")
for preset in presets_lists: for preset in presets_lists:
print(preset) print(preset)
sys.exit(0) sys.exit(0)
@@ -77,28 +73,28 @@ def check_input_and_fill_run_dict(parser):
# num_workers = int(args.num_workers) # num_workers = int(args.num_workers)
num_workers = int(re.match("^\d+$", args.num_workers).group(0)) num_workers = int(re.match("^\d+$", args.num_workers).group(0))
except ValueError: except ValueError:
screen.error("Parameter num_workers should be an integer.") logger.screen.error("Parameter num_workers should be an integer.")
preset_names = list_all_classes_in_module(presets) preset_names = utils.list_all_classes_in_module(presets)
if args.preset is not None and args.preset not in preset_names: if args.preset is not None and args.preset not in preset_names:
screen.error("A non-existing preset was selected. ") logger.screen.error("A non-existing preset was selected. ")
if args.checkpoint_restore_dir is not None and not os.path.exists(args.checkpoint_restore_dir): if args.checkpoint_restore_dir is not None and not os.path.exists(args.checkpoint_restore_dir):
screen.error("The requested checkpoint folder to load from does not exist. ") logger.screen.error("The requested checkpoint folder to load from does not exist. ")
if args.save_model_sec is not None: if args.save_model_sec is not None:
try: try:
args.save_model_sec = int(args.save_model_sec) args.save_model_sec = int(args.save_model_sec)
except ValueError: except ValueError:
screen.error("Parameter save_model_sec should be an integer.") logger.screen.error("Parameter save_model_sec should be an integer.")
if args.preset is None and (args.agent_type is None or args.environment_type is None if args.preset is None and (args.agent_type is None or args.environment_type is None
or args.exploration_policy_type is None) and not args.play: or args.exploration_policy_type is None) and not args.play:
screen.error('When no preset is given for Coach to run, the user is expected to input the desired agent_type,' logger.screen.error('When no preset is given for Coach to run, the user is expected to input the desired agent_type,'
' environment_type and exploration_policy_type to assemble a preset. ' ' environment_type and exploration_policy_type to assemble a preset. '
'\nAt least one of these parameters was not given.') '\nAt least one of these parameters was not given.')
elif args.preset is None and args.play and args.environment_type is None: elif args.preset is None and args.play and args.environment_type is None:
screen.error('When no preset is given for Coach to run, and the user requests human control over the environment,' logger.screen.error('When no preset is given for Coach to run, and the user requests human control over the environment,'
' the user is expected to input the desired environment_type and level.' ' the user is expected to input the desired environment_type and level.'
'\nAt least one of these parameters was not given.') '\nAt least one of these parameters was not given.')
elif args.preset is None and args.play and args.environment_type: elif args.preset is None and args.play and args.environment_type:
@@ -106,11 +102,11 @@ def check_input_and_fill_run_dict(parser):
args.exploration_policy_type = 'ExplorationParameters' args.exploration_policy_type = 'ExplorationParameters'
# get experiment name and path # get experiment name and path
experiment_name = logger.get_experiment_name(args.experiment_name) experiment_name = logger.logger.get_experiment_name(args.experiment_name)
experiment_path = logger.get_experiment_path(experiment_name) experiment_path = logger.logger.get_experiment_path(experiment_name)
if args.play and num_workers > 1: if args.play and num_workers > 1:
screen.warning("Playing the game as a human is only available with a single worker. " logger.screen.warning("Playing the game as a human is only available with a single worker. "
"The number of workers will be reduced to 1") "The number of workers will be reduced to 1")
num_workers = 1 num_workers = 1
@@ -123,7 +119,7 @@ def check_input_and_fill_run_dict(parser):
run_dict['preset'] = args.preset run_dict['preset'] = args.preset
run_dict['custom_parameter'] = args.custom_parameter run_dict['custom_parameter'] = args.custom_parameter
run_dict['experiment_path'] = experiment_path run_dict['experiment_path'] = experiment_path
run_dict['framework'] = Frameworks().get(args.framework) run_dict['framework'] = conf.Frameworks().get(args.framework)
run_dict['play'] = args.play run_dict['play'] = args.play
run_dict['evaluate'] = args.evaluate# or args.play run_dict['evaluate'] = args.evaluate# or args.play
@@ -251,16 +247,16 @@ if __name__ == "__main__":
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
# dump documentation # dump documentation
logger.set_dump_dir(run_dict['experiment_path'], add_timestamp=True) logger.logger.set_dump_dir(run_dict['experiment_path'], add_timestamp=True)
if not args.no_summary: if not args.no_summary:
atexit.register(logger.summarize_experiment) atexit.register(logger.logger.summarize_experiment)
screen.change_terminal_title(logger.experiment_name) logger.screen.change_terminal_title(logger.logger.experiment_name)
# Single-threaded runs # Single-threaded runs
if run_dict['num_threads'] == 1: if run_dict['num_threads'] == 1:
# set tuning parameters # set tuning parameters
json_run_dict_path = run_dict_to_json(run_dict) json_run_dict_path = run_dict_to_json(run_dict)
tuning_parameters = json_to_preset(json_run_dict_path) tuning_parameters = presets.json_to_preset(json_run_dict_path)
tuning_parameters.sess = set_framework(args.framework) tuning_parameters.sess = set_framework(args.framework)
if args.print_parameters: if args.print_parameters:
@@ -268,8 +264,9 @@ if __name__ == "__main__":
# Single-thread runs # Single-thread runs
tuning_parameters.task_index = 0 tuning_parameters.task_index = 0
env_instance = create_environment(tuning_parameters) env_instance = environments.create_environment(tuning_parameters)
agent = eval(tuning_parameters.agent.type + '(env_instance, tuning_parameters)') agent = eval('agents.' + tuning_parameters.agent.type +
'(env_instance, tuning_parameters)')
# Start the training or evaluation # Start the training or evaluation
if tuning_parameters.evaluate: if tuning_parameters.evaluate:
@@ -282,11 +279,11 @@ if __name__ == "__main__":
assert args.framework.lower() == 'tensorflow', "Distributed training works only with TensorFlow" assert args.framework.lower() == 'tensorflow', "Distributed training works only with TensorFlow"
os.environ["OMP_NUM_THREADS"]="1" os.environ["OMP_NUM_THREADS"]="1"
# set parameter server and workers addresses # set parameter server and workers addresses
ps_hosts = "localhost:{}".format(get_open_port()) ps_hosts = "localhost:{}".format(utils.get_open_port())
worker_hosts = ",".join(["localhost:{}".format(get_open_port()) for i in range(run_dict['num_threads'] + 1)]) worker_hosts = ",".join(["localhost:{}".format(utils.get_open_port()) for i in range(run_dict['num_threads'] + 1)])
# Make sure to disable GPU so that all the workers will use the CPU # Make sure to disable GPU so that all the workers will use the CPU
set_cpu() utils.set_cpu()
# create a parameter server # create a parameter server
cmd = [ cmd = [
@@ -296,9 +293,9 @@ if __name__ == "__main__":
"--worker_hosts={}".format(worker_hosts), "--worker_hosts={}".format(worker_hosts),
"--job_name=ps", "--job_name=ps",
] ]
parameter_server = Popen(cmd) parameter_server = subprocess.Popen(cmd)
screen.log_title("*** Distributed Training ***") logger.screen.log_title("*** Distributed Training ***")
time.sleep(1) time.sleep(1)
# create N training workers and 1 evaluating worker # create N training workers and 1 evaluating worker
@@ -321,7 +318,7 @@ if __name__ == "__main__":
"--job_name=worker", "--job_name=worker",
"--load_json={}".format(json_run_dict_path)] "--load_json={}".format(json_run_dict_path)]
p = Popen(workers_args) p = subprocess.Popen(workers_args)
if i != run_dict['num_threads']: if i != run_dict['num_threads']:
workers.append(p) workers.append(p)

View File

@@ -13,13 +13,13 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
from utils import Enum
import json import json
import types import types
import utils
class Frameworks(Enum): class Frameworks(utils.Enum):
TensorFlow = 1 TensorFlow = 1
Neon = 2 Neon = 2

View File

@@ -19,29 +19,24 @@ To run Coach Dashboard, run the following command:
python3 dashboard.py python3 dashboard.py
""" """
from utils import *
import os
import datetime
import sys
import wx
import random
import pandas as pd
from pandas.io.common import EmptyDataError
import numpy as np
import colorsys import colorsys
from bokeh.palettes import Dark2 import datetime
from bokeh.layouts import row, column, widgetbox, Spacer import enum
from bokeh.models import ColumnDataSource, Range1d, LinearAxis, HoverTool, WheelZoomTool, PanTool, Legend import itertools
from bokeh.models.widgets import RadioButtonGroup, MultiSelect, Button, Select, Slider, Div, CheckboxGroup import os
from bokeh.models.glyphs import Patch import random
from bokeh.plotting import figure, show, curdoc
from utils import force_list from bokeh import palettes
from utils import squeeze_list from bokeh import layouts as bl
from itertools import cycle from bokeh import models as bm
from os import listdir from bokeh.models import widgets as bw
from os.path import isfile, join, isdir, basename from bokeh import plotting as bp
from enum import Enum import numpy as np
import pandas as pd
from pandas.io import pandas_common
import wx
import utils
class DialogApp(wx.App): class DialogApp(wx.App):
@@ -67,7 +62,7 @@ class Signal:
self.name = name self.name = name
self.full_name = "{}/{}".format(parent.filename, self.name) self.full_name = "{}/{}".format(parent.filename, self.name)
self.selected = False self.selected = False
self.color = random.choice(Dark2[8]) self.color = random.choice(palettes.Dark2[8])
self.line = None self.line = None
self.bands = None self.bands = None
self.bokeh_source = parent.bokeh_source self.bokeh_source = parent.bokeh_source
@@ -79,12 +74,12 @@ class Signal:
if (len(name.split('/')) == 1 and name == self.name) or '/'.join(name.split('/')[:-1]) == self.name: if (len(name.split('/')) == 1 and name == self.name) or '/'.join(name.split('/')[:-1]) == self.name:
self.sub_signals.append(name) self.sub_signals.append(name)
if len(self.sub_signals) > 1: if len(self.sub_signals) > 1:
self.mean_signal = squeeze_list([name for name in self.sub_signals if 'Mean' in name.split('/')[-1]]) self.mean_signal = utils.squeeze_list([name for name in self.sub_signals if 'Mean' in name.split('/')[-1]])
self.stdev_signal = squeeze_list([name for name in self.sub_signals if 'Stdev' in name.split('/')[-1]]) self.stdev_signal = utils.squeeze_list([name for name in self.sub_signals if 'Stdev' in name.split('/')[-1]])
self.min_signal = squeeze_list([name for name in self.sub_signals if 'Min' in name.split('/')[-1]]) self.min_signal = utils.squeeze_list([name for name in self.sub_signals if 'Min' in name.split('/')[-1]])
self.max_signal = squeeze_list([name for name in self.sub_signals if 'Max' in name.split('/')[-1]]) self.max_signal = utils.squeeze_list([name for name in self.sub_signals if 'Max' in name.split('/')[-1]])
else: else:
self.mean_signal = squeeze_list(self.name) self.mean_signal = utils.squeeze_list(self.name)
self.stdev_signal = None self.stdev_signal = None
self.min_signal = None self.min_signal = None
self.max_signal = None self.max_signal = None
@@ -107,16 +102,16 @@ class Signal:
if self.selected != val: if self.selected != val:
self.selected = val self.selected = val
if self.line: if self.line:
# self.set_color(Dark2[8][current_color]) # self.set_color(palettes.Dark2[8][current_color])
# current_color = (current_color + 1) % len(Dark2[8]) # current_color = (current_color + 1) % len(palettes.Dark2[8])
self.line.visible = self.selected self.line.visible = self.selected
if self.bands: if self.bands:
self.bands.visible = self.selected and self.show_bollinger_bands self.bands.visible = self.selected and self.show_bollinger_bands
elif self.selected: elif self.selected:
# lazy plotting - plot only when selected for the first time # lazy plotting - plot only when selected for the first time
show_spinner() show_spinner()
self.set_color(Dark2[8][current_color]) self.set_color(palettes.Dark2[8][current_color])
current_color = (current_color + 1) % len(Dark2[8]) current_color = (current_color + 1) % len(palettes.Dark2[8])
if self.has_bollinger_bands: if self.has_bollinger_bands:
self.set_bands_source() self.set_bands_source()
self.create_bands() self.create_bands()
@@ -149,7 +144,7 @@ class Signal:
if self.bollinger_bands_source: if self.bollinger_bands_source:
self.bollinger_bands_source.data = source_data self.bollinger_bands_source.data = source_data
else: else:
self.bollinger_bands_source = ColumnDataSource(source_data) self.bollinger_bands_source = bm.ColumnDataSource(source_data)
def change_bollinger_bands_state(self, new_state): def change_bollinger_bands_state(self, new_state):
self.show_bollinger_bands = new_state self.show_bollinger_bands = new_state
@@ -192,11 +187,11 @@ class SignalsFileBase:
def update_source_and_signals(self): def update_source_and_signals(self):
# create bokeh data sources # create bokeh data sources
self.bokeh_source_orig = ColumnDataSource(self.csv) self.bokeh_source_orig = bm.ColumnDataSource(self.csv)
self.bokeh_source_orig.data['index'] = self.bokeh_source_orig.data[x_axis] self.bokeh_source_orig.data['index'] = self.bokeh_source_orig.data[x_axis]
if self.bokeh_source is None: if self.bokeh_source is None:
self.bokeh_source = ColumnDataSource(self.csv) self.bokeh_source = bm.ColumnDataSource(self.csv)
else: else:
# self.bokeh_source.data = self.bokeh_source_orig.data # self.bokeh_source.data = self.bokeh_source_orig.data
# smooth the data if necessary # smooth the data if necessary
@@ -282,7 +277,7 @@ class SignalsFile(SignalsFileBase):
def __init__(self, csv_path, load=True): def __init__(self, csv_path, load=True):
SignalsFileBase.__init__(self) SignalsFileBase.__init__(self)
self.full_csv_path = csv_path self.full_csv_path = csv_path
self.dir, self.filename, _ = break_file_path(csv_path) self.dir, self.filename, _ = utils.break_file_path(csv_path)
if load: if load:
self.load() self.load()
# this helps set the correct x axis # this helps set the correct x axis
@@ -296,7 +291,7 @@ class SignalsFile(SignalsFileBase):
try: try:
self.csv = pd.read_csv(self.full_csv_path) self.csv = pd.read_csv(self.full_csv_path)
break break
except EmptyDataError: except pandas_common.EmptyDataError:
self.csv = None self.csv = None
continue continue
self.csv = self.csv.interpolate() self.csv = self.csv.interpolate()
@@ -327,7 +322,7 @@ class SignalsFilesGroup(SignalsFileBase):
else: else:
# get the common directory for all the experiments # get the common directory for all the experiments
self.dir = os.path.dirname(os.path.commonprefix(csv_paths)) self.dir = os.path.dirname(os.path.commonprefix(csv_paths))
self.filename = '{} - Group({})'.format(basename(self.dir), len(self.signals_files)) self.filename = '{} - Group({})'.format(os.path.basename(self.dir), len(self.signals_files))
self.load() self.load()
# this helps set the correct x axis # this helps set the correct x axis
@@ -425,7 +420,7 @@ class SignalsFilesGroup(SignalsFileBase):
pass pass
class RunType(Enum): class RunType(enum.Enum):
SINGLE_FOLDER_SINGLE_FILE = 1 SINGLE_FOLDER_SINGLE_FILE = 1
SINGLE_FOLDER_MULTIPLE_FILES = 2 SINGLE_FOLDER_MULTIPLE_FILES = 2
MULTIPLE_FOLDERS_SINGLE_FILES = 3 MULTIPLE_FOLDERS_SINGLE_FILES = 3
@@ -433,7 +428,7 @@ class RunType(Enum):
UNKNOWN = 0 UNKNOWN = 0
class FolderType(Enum): class FolderType(enum.Enum):
SINGLE_FILE = 1 SINGLE_FILE = 1
MULTIPLE_FILES = 2 MULTIPLE_FILES = 2
MULTIPLE_FOLDERS = 3 MULTIPLE_FOLDERS = 3
@@ -454,24 +449,24 @@ root_dir = os.path.dirname(os.path.abspath(__file__))
with open(os.path.join(root_dir, 'spinner.css'), 'r') as f: with open(os.path.join(root_dir, 'spinner.css'), 'r') as f:
spinner_style = """<style>{}</style>""".format(f.read()) spinner_style = """<style>{}</style>""".format(f.read())
spinner_html = """<ul class="spinner"><li></li><li></li><li></li><li></li></ul>""" spinner_html = """<ul class="spinner"><li></li><li></li><li></li><li></li></ul>"""
spinner = Div(text="""""") spinner = bw.Div(text="""""")
# file refresh time placeholder # file refresh time placeholder
refresh_info = Div(text="""""", width=210) refresh_info = bw.Div(text="""""", width=210)
# create figures # create figures
plot = figure(plot_width=1200, plot_height=800, plot = bp.figure(plot_width=1200, plot_height=800,
tools='pan,box_zoom,wheel_zoom,crosshair,undo,redo,reset,save', tools='pan,box_zoom,wheel_zoom,crosshair,undo,redo,reset,save',
toolbar_location='above', x_axis_label='Episodes', toolbar_location='above', x_axis_label='Episodes',
x_range=Range1d(0, 10000), y_range=Range1d(0, 100000)) x_range=bm.Range1d(0, 10000), y_range=bm.Range1d(0, 100000))
plot.extra_y_ranges = {"secondary": Range1d(start=-100, end=200)} plot.extra_y_ranges = {"secondary": bm.Range1d(start=-100, end=200)}
plot.add_layout(LinearAxis(y_range_name="secondary"), 'right') plot.add_layout(bm.LinearAxis(y_range_name="secondary"), 'right')
# legend # legend
div = Div(text="""""") div = bw.Div(text="""""")
legend = widgetbox([div]) legend = bl.widgetbox([div])
bokeh_legend = Legend( bokeh_legend = bm.Legend(
items=[("12345678901234567890123456789012345678901234567890", [])], # 50 letters items=[("12345678901234567890123456789012345678901234567890", [])], # 50 letters
# items=[(" ", [])], # 50 letters # items=[(" ", [])], # 50 letters
location=(-20, 0), orientation="vertical", location=(-20, 0), orientation="vertical",
@@ -605,8 +600,8 @@ def load_files_group():
# classify the folder as containing a single file, multiple files or only folders # classify the folder as containing a single file, multiple files or only folders
def classify_folder(dir_path): def classify_folder(dir_path):
files = [f for f in listdir(dir_path) if isfile(join(dir_path, f)) and f.endswith('.csv')] files = [f for f in os.listdir(dir_path) if os.path.isfile(os.path.join(dir_path, f)) and f.endswith('.csv')]
folders = [d for d in listdir(dir_path) if isdir(join(dir_path, d))] folders = [d for d in os.listdir(dir_path) if os.path.isdir(os.path.join(dir_path, d))]
if len(files) == 1: if len(files) == 1:
return FolderType.SINGLE_FILE return FolderType.SINGLE_FILE
elif len(files) > 1: elif len(files) > 1:
@@ -628,7 +623,7 @@ def get_run_type(dir_path):
elif folder_type == FolderType.MULTIPLE_FOLDERS: elif folder_type == FolderType.MULTIPLE_FOLDERS:
# folder contains sub dirs -> we assume we can classify the folder using only the first sub dir # folder contains sub dirs -> we assume we can classify the folder using only the first sub dir
sub_dirs = [d for d in listdir(dir_path) if isdir(join(dir_path, d))] sub_dirs = [d for d in os.listdir(dir_path) if os.path.isdir(os.path.join(dir_path, d))]
# checking only the first folder in the root dir for its type, since we assume that all sub dirs will share the # checking only the first folder in the root dir for its type, since we assume that all sub dirs will share the
# same structure (i.e. if one is a result of multi-threaded run, so will all the other). # same structure (i.e. if one is a result of multi-threaded run, so will all the other).
@@ -645,12 +640,12 @@ def add_directory_csv_files(dir_path, paths=None):
if not paths: if not paths:
paths = [] paths = []
for p in listdir(dir_path): for p in os.listdir(dir_path):
path = join(dir_path, p) path = os.path.join(dir_path, p)
if isdir(path): if os.path.isdir(path):
# call recursively for each dir # call recursively for each dir
paths = add_directory_csv_files(path, paths) paths = add_directory_csv_files(path, paths)
elif isfile(path) and path.endswith('.csv'): elif os.path.isfile(path) and path.endswith('.csv'):
# add every file to the list # add every file to the list
paths.append(path) paths.append(path)
@@ -667,7 +662,7 @@ def handle_dir(dir_path, run_type):
elif run_type == RunType.MULTIPLE_FOLDERS_SINGLE_FILES: elif run_type == RunType.MULTIPLE_FOLDERS_SINGLE_FILES:
create_files_group_signal(paths) create_files_group_signal(paths)
elif run_type == RunType.MULTIPLE_FOLDERS_MULTIPLE_FILES: elif run_type == RunType.MULTIPLE_FOLDERS_MULTIPLE_FILES:
sub_dirs = [d for d in listdir(dir_path) if isdir(join(dir_path, d))] sub_dirs = [d for d in os.listdir(dir_path) if os.path.isdir(os.path.join(dir_path, d))]
# for d in sub_dirs: # for d in sub_dirs:
# paths = add_directory_csv_files(os.path.join(dir_path, d)) # paths = add_directory_csv_files(os.path.join(dir_path, d))
# create_files_group_signal(paths) # create_files_group_signal(paths)
@@ -731,7 +726,7 @@ def unload_file():
selected_file.hide_all_signals() selected_file.hide_all_signals()
del signals_files[selected_file.filename] del signals_files[selected_file.filename]
data_selector.options = [""] data_selector.options = [""]
filenames = cycle(files_selector.options) filenames = itertools.cycle(files_selector.options)
files_selector.options.remove(selected_file.filename) files_selector.options.remove(selected_file.filename)
if len(files_selector.options) > 0: if len(files_selector.options) > 0:
files_selector.value = next(filenames) files_selector.value = next(filenames)
@@ -869,48 +864,48 @@ crcolor, crRGBs = generate_color_range(color_resolution, brightness) # produce
# ---------------- Build Website Layout ------------------- # ---------------- Build Website Layout -------------------
# select file # select file
file_selection_button = Button(label="Select Files", button_type="success", width=120) file_selection_button = bw.Button(label="Select Files", button_type="success", width=120)
file_selection_button.on_click(load_files_group) file_selection_button.on_click(load_files_group)
files_selector_spacer = Spacer(width=10) files_selector_spacer = bl.Spacer(width=10)
group_selection_button = Button(label="Select Directory", button_type="primary", width=140) group_selection_button = bw.Button(label="Select Directory", button_type="primary", width=140)
group_selection_button.on_click(load_directory_group) group_selection_button.on_click(load_directory_group)
unload_file_button = Button(label="Unload", button_type="danger", width=50) unload_file_button = bw.Button(label="Unload", button_type="danger", width=50)
unload_file_button.on_click(unload_file) unload_file_button.on_click(unload_file)
# files selection box # files selection box
files_selector = Select(title="Files:", options=[], width=200) files_selector = bw.Select(title="Files:", options=[], width=200)
files_selector.on_change('value', change_data_selector) files_selector.on_change('value', change_data_selector)
# data selection box # data selection box
data_selector = MultiSelect(title="Data:", options=[], size=12) data_selector = bw.MultiSelect(title="Data:", options=[], size=12)
data_selector.on_change('value', select_data) data_selector.on_change('value', select_data)
# x axis selection box # x axis selection box
x_axis_selector_title = Div(text="""X Axis:""") x_axis_selector_title = bw.Div(text="""X Axis:""")
x_axis_selector = RadioButtonGroup(labels=x_axis_options, active=0) x_axis_selector = bw.RadioButtonGroup(labels=x_axis_options, active=0)
x_axis_selector.on_click(change_x_axis) x_axis_selector.on_click(change_x_axis)
# toggle second axis button # toggle second axis bw.button
toggle_second_axis_button = Button(label="Toggle Second Axis", button_type="success") toggle_second_axis_button = bw.Button(label="Toggle Second Axis", button_type="success")
toggle_second_axis_button.on_click(toggle_second_axis) toggle_second_axis_button.on_click(toggle_second_axis)
# averaging slider # averaging slider
averaging_slider = Slider(title="Averaging window", start=1, end=101, step=10) averaging_slider = bw.Slider(title="Averaging window", start=1, end=101, step=10)
averaging_slider.on_change('value', update_averaging) averaging_slider.on_change('value', update_averaging)
# group properties checkbox # group properties checkbox
group_cb = CheckboxGroup(labels=["Show statistics bands", "Ungroup signals"], active=[]) group_cb = bw.CheckboxGroup(labels=["Show statistics bands", "Ungroup signals"], active=[])
group_cb.on_click(toggle_group_property) group_cb.on_click(toggle_group_property)
# color selector # color selector
color_selector_title = Div(text="""Select Color:""") color_selector_title = bw.Div(text="""Select Color:""")
crsource = ColumnDataSource(data=dict(x=crx, y=cry, crcolor=crcolor, RGBs=crRGBs)) crsource = bm.ColumnDataSource(data=dict(x=crx, y=cry, crcolor=crcolor, RGBs=crRGBs))
color_selector = figure(x_range=(0, color_resolution), y_range=(0, 10), color_selector = bp.figure(x_range=(0, color_resolution), y_range=(0, 10),
plot_width=300, plot_height=40, plot_width=300, plot_height=40,
tools='tap') tools='tap')
color_selector.axis.visible = False color_selector.axis.visible = False
color_range = color_selector.rect(x='x', y='y', width=1, height=10, color_range = color_selector.rect(x='x', y='y', width=1, height=10,
color='crcolor', source=crsource) color='crcolor', source=crsource)
@@ -920,43 +915,43 @@ color_selector.toolbar.logo = None
color_selector.toolbar_location = None color_selector.toolbar_location = None
# title # title
title = Div(text="""<h1>Coach Dashboard</h1>""") title = bw.Div(text="""<h1>Coach Dashboard</h1>""")
# landing page # landing page
landing_page_description = Div(text="""<h3>Start by selecting an experiment file or directory to open:</h3>""") landing_page_description = bw.Div(text="""<h3>Start by selecting an experiment file or directory to open:</h3>""")
center = Div(text="""<style>html { text-align: center; } </style>""") center = bw.Div(text="""<style>html { text-align: center; } </style>""")
center_buttons = Div(text="""<style>.bk-grid-row .bk-layout-fixed { margin: 0 auto; }</style>""", width=0) center_buttons = bw.Div(text="""<style>.bk-grid-row .bk-layout-fixed { margin: 0 auto; }</style>""", width=0)
landing_page = column(center, landing_page = bl.column(center,
title, title,
landing_page_description, landing_page_description,
row(center_buttons), bl.row(center_buttons),
row(file_selection_button, sizing_mode='scale_width'), bl.row(file_selection_button, sizing_mode='scale_width'),
row(group_selection_button, sizing_mode='scale_width'), bl.row(group_selection_button, sizing_mode='scale_width'),
sizing_mode='scale_width') sizing_mode='scale_width')
# main layout of the document # main layout of the document
layout = row(file_selection_button, files_selector_spacer, group_selection_button, width=300) layout = bl.row(file_selection_button, files_selector_spacer, group_selection_button, width=300)
layout = column(layout, files_selector) layout = bl.column(layout, files_selector)
layout = column(layout, row(refresh_info, unload_file_button)) layout = bl.column(layout, bl.row(refresh_info, unload_file_button))
layout = column(layout, data_selector) layout = bl.column(layout, data_selector)
layout = column(layout, color_selector_title) layout = bl.column(layout, color_selector_title)
layout = column(layout, color_selector) layout = bl.column(layout, color_selector)
layout = column(layout, x_axis_selector_title) layout = bl.column(layout, x_axis_selector_title)
layout = column(layout, x_axis_selector) layout = bl.column(layout, x_axis_selector)
layout = column(layout, group_cb) layout = bl.column(layout, group_cb)
layout = column(layout, toggle_second_axis_button) layout = bl.column(layout, toggle_second_axis_button)
layout = column(layout, averaging_slider) layout = bl.column(layout, averaging_slider)
# layout = column(layout, legend) # layout = bl.column(layout, legend)
layout = row(layout, plot) layout = bl.row(layout, plot)
layout = column(title, layout) layout = bl.column(title, layout)
layout = column(layout, spinner) layout = bl.column(layout, spinner)
doc = curdoc() doc = bp.curdoc()
doc.add_root(landing_page) doc.add_root(landing_page)
doc.add_periodic_callback(reload_all_files, 20000) doc.add_periodic_callback(reload_all_files, 20000)
plot.y_range = Range1d(0, 100) plot.y_range = bm.Range1d(0, 100)
plot.extra_y_ranges['secondary'] = Range1d(0, 100) plot.extra_y_ranges['secondary'] = bm.Range1d(0, 100)
# show load file dialog immediately on start # show load file dialog immediately on start
#doc.add_timeout_callback(load_files, 1000) #doc.add_timeout_callback(load_files, 1000)

View File

@@ -13,7 +13,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy as np import numpy as np

View File

@@ -24,9 +24,9 @@ Adds support for displaying math formulas using [MathJax](http://www.mathjax.org
Author: 2015, Dmitry Shachnev <mitya57@gmail.com>. Author: 2015, Dmitry Shachnev <mitya57@gmail.com>.
''' '''
import markdown import markdown
class MathExtension(markdown.extensions.Extension): class MathExtension(markdown.extensions.Extension):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
self.config = { self.config = {

View File

@@ -1,3 +1,4 @@
#!/usr/bin/env python3
# #
# Copyright (c) 2017 Intel Corporation # Copyright (c) 2017 Intel Corporation
# #
@@ -13,11 +14,9 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
#!/usr/bin/env python3
from distutils.core import setup from distutils.core import setup
long_description = \ long_description = \
"""This extension adds math formulas support to Python-Markdown_ """This extension adds math formulas support to Python-Markdown_
(works with version 2.6 or newer). (works with version 2.6 or newer).

View File

@@ -13,8 +13,10 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import fnmatch
import os
import os, fnmatch, sys
def findReplace(directory, find, replace, filePattern): def findReplace(directory, find, replace, filePattern):
for path, dirs, files in os.walk(os.path.abspath(directory)): for path, dirs, files in os.walk(os.path.abspath(directory)):
for filename in fnmatch.filter(files, filePattern): for filename in fnmatch.filter(files, filePattern):
@@ -25,7 +27,8 @@ def findReplace(directory, find, replace, filePattern):
with open(filepath, "w") as f: with open(filepath, "w") as f:
f.write(s) f.write(s)
if __name__=="__main__":
if __name__ == "__main__":
findReplace('./site/', '/"', '/index.html"', "*.html") findReplace('./site/', '/"', '/index.html"', "*.html")
findReplace('./site/', '"/index.html"', '"./index.html"', "*.html") findReplace('./site/', '"/index.html"', '"./index.html"', "*.html")
findReplace('./site/', '"."', '"./index.html"', "*.html") findReplace('./site/', '"."', '"./index.html"', "*.html")

View File

@@ -13,15 +13,13 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
from environments.gym_environment_wrapper import GymEnvironmentWrapper
from logger import * from environments.doom_environment_wrapper import DoomEnvironmentWrapper
from utils import Enum, get_open_port from environments.carla_environment_wrapper import CarlaEnvironmentWrapper
from environments.gym_environment_wrapper import * import utils
from environments.doom_environment_wrapper import *
from environments.carla_environment_wrapper import *
class EnvTypes(Enum): class EnvTypes(utils.Enum):
Doom = "DoomEnvironmentWrapper" Doom = "DoomEnvironmentWrapper"
Gym = "GymEnvironmentWrapper" Gym = "GymEnvironmentWrapper"
Carla = "CarlaEnvironmentWrapper" Carla = "CarlaEnvironmentWrapper"
@@ -31,6 +29,3 @@ def create_environment(tuning_parameters):
env_type_name, env_type = EnvTypes().verify(tuning_parameters.env.type) env_type_name, env_type = EnvTypes().verify(tuning_parameters.env.type)
env = eval(env_type)(tuning_parameters) env = eval(env_type)(tuning_parameters)
return env return env

View File

@@ -1,34 +1,31 @@
import logging
import os
import signal
import subprocess
import sys import sys
from os import path, environ
try:
if 'CARLA_ROOT' in environ:
sys.path.append(path.join(environ.get('CARLA_ROOT'), 'PythonClient'))
from carla.client import CarlaClient
from carla.settings import CarlaSettings
from carla.tcp import TCPConnectionError
from carla.sensor import Camera
from carla.client import VehicleControl
except ImportError:
from logger import failed_imports
failed_imports.append("CARLA")
import numpy as np import numpy as np
import time
import logging import logger
import subprocess try:
import signal if 'CARLA_ROOT' in os.environ:
from environments.environment_wrapper import EnvironmentWrapper sys.path.append(os.path.join(os.environ.get('CARLA_ROOT'),
from utils import * 'PythonClient'))
from logger import screen, logger from carla import client as carla_client
from PIL import Image from carla import settings as carla_settings
from carla import sensor as carla_sensor
except ImportError:
logger.failed_imports.append("CARLA")
from environments import environment_wrapper as ew
import utils
# enum of the available levels and their path # enum of the available levels and their path
class CarlaLevel(Enum): class CarlaLevel(utils.Enum):
TOWN1 = "/Game/Maps/Town01" TOWN1 = "/Game/Maps/Town01"
TOWN2 = "/Game/Maps/Town02" TOWN2 = "/Game/Maps/Town02"
key_map = { key_map = {
'BRAKE': (274,), # down arrow 'BRAKE': (274,), # down arrow
'GAS': (273,), # up arrow 'GAS': (273,), # up arrow
@@ -41,16 +38,16 @@ key_map = {
} }
class CarlaEnvironmentWrapper(EnvironmentWrapper): class CarlaEnvironmentWrapper(ew.EnvironmentWrapper):
def __init__(self, tuning_parameters): def __init__(self, tuning_parameters):
EnvironmentWrapper.__init__(self, tuning_parameters) ew.EnvironmentWrapper.__init__(self, tuning_parameters)
self.tp = tuning_parameters self.tp = tuning_parameters
# server configuration # server configuration
self.server_height = self.tp.env.server_height self.server_height = self.tp.env.server_height
self.server_width = self.tp.env.server_width self.server_width = self.tp.env.server_width
self.port = get_open_port() self.port = utils.get_open_port()
self.host = 'localhost' self.host = 'localhost'
self.map = CarlaLevel().get(self.tp.env.level) self.map = CarlaLevel().get(self.tp.env.level)
@@ -70,7 +67,7 @@ class CarlaEnvironmentWrapper(EnvironmentWrapper):
self.settings = fp.read() self.settings = fp.read()
else: else:
# hard coded settings # hard coded settings
self.settings = CarlaSettings() self.settings = carla_settings.CarlaSettings()
self.settings.set( self.settings.set(
SynchronousMode=True, SynchronousMode=True,
SendNonPlayerAgentsInfo=False, SendNonPlayerAgentsInfo=False,
@@ -80,7 +77,7 @@ class CarlaEnvironmentWrapper(EnvironmentWrapper):
self.settings.randomize_seeds() self.settings.randomize_seeds()
# add cameras # add cameras
camera = Camera('CameraRGB') camera = carla_sensor.Camera('CameraRGB')
camera.set_image_size(self.width, self.height) camera.set_image_size(self.width, self.height)
camera.set_position(200, 0, 140) camera.set_position(200, 0, 140)
camera.set_rotation(0, 0, 0) camera.set_rotation(0, 0, 0)
@@ -92,7 +89,7 @@ class CarlaEnvironmentWrapper(EnvironmentWrapper):
logging.disable(40) logging.disable(40)
# open the client # open the client
self.game = CarlaClient(self.host, self.port, timeout=99999999) self.game = carla_client.CarlaClient(self.host, self.port, timeout=99999999)
self.game.connect() self.game.connect()
scene = self.game.load_settings(self.settings) scene = self.game.load_settings(self.settings)
@@ -141,12 +138,12 @@ class CarlaEnvironmentWrapper(EnvironmentWrapper):
self.renderer.create_screen(image.shape[1], image.shape[0]) self.renderer.create_screen(image.shape[1], image.shape[0])
def _open_server(self): def _open_server(self):
log_path = path.join(logger.experiments_path, "CARLA_LOG_{}.txt".format(self.port)) log_path = os.path.join(logger.logger.experiments_path, "CARLA_LOG_{}.txt".format(self.port))
with open(log_path, "wb") as out: with open(log_path, "wb") as out:
cmd = [path.join(environ.get('CARLA_ROOT'), 'CarlaUE4.sh'), self.map, cmd = [os.path.join(os.environ.get('CARLA_ROOT'), 'CarlaUE4.sh'), self.map,
"-benchmark", "-carla-server", "-fps=10", "-world-port={}".format(self.port), "-benchmark", "-carla-server", "-fps=10", "-world-port={}".format(self.port),
"-windowed -ResX={} -ResY={}".format(self.server_width, self.server_height), "-windowed -ResX={} -ResY={}".format(self.server_width, self.server_height),
"-carla-no-hud"] "-carla-no-hud"]
if self.config: if self.config:
cmd.append("-carla-settings={}".format(self.config)) cmd.append("-carla-settings={}".format(self.config))
p = subprocess.Popen(cmd, stdout=out, stderr=out) p = subprocess.Popen(cmd, stdout=out, stderr=out)
@@ -201,7 +198,7 @@ class CarlaEnvironmentWrapper(EnvironmentWrapper):
action = action_idx action = action_idx
self.last_action_idx = action self.last_action_idx = action
self.control = VehicleControl() self.control = carla_client.VehicleControl()
self.control.throttle = np.clip(action[0], 0, 1) self.control.throttle = np.clip(action[0], 0, 1)
self.control.steer = np.clip(action[1], -1, 1) self.control.steer = np.clip(action[1], -1, 1)
self.control.brake = np.abs(np.clip(action[0], -1, 0)) self.control.brake = np.abs(np.clip(action[0], -1, 0))

View File

@@ -13,23 +13,23 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import enum
import os
import numpy as np
import logger
try: try:
import vizdoom import vizdoom
except ImportError: except ImportError:
from logger import failed_imports logger.failed_imports.append("ViZDoom")
failed_imports.append("ViZDoom")
import numpy as np from environments import environment_wrapper as ew
from environments.environment_wrapper import EnvironmentWrapper import utils
from os import path, environ
from utils import *
from logger import *
# enum of the available levels and their path # enum of the available levels and their path
class DoomLevel(Enum): class DoomLevel(utils.Enum):
BASIC = "basic.cfg" BASIC = "basic.cfg"
DEFEND = "defend_the_center.cfg" DEFEND = "defend_the_center.cfg"
DEATHMATCH = "deathmatch.cfg" DEATHMATCH = "deathmatch.cfg"
@@ -40,6 +40,7 @@ class DoomLevel(Enum):
DEFEND_THE_LINE = "defend_the_line.cfg" DEFEND_THE_LINE = "defend_the_line.cfg"
DEADLY_CORRIDOR = "deadly_corridor.cfg" DEADLY_CORRIDOR = "deadly_corridor.cfg"
key_map = { key_map = {
'NO-OP': 96, # ` 'NO-OP': 96, # `
'ATTACK': 13, # enter 'ATTACK': 13, # enter
@@ -78,15 +79,16 @@ key_map = {
} }
class DoomEnvironmentWrapper(EnvironmentWrapper): class DoomEnvironmentWrapper(ew.EnvironmentWrapper):
def __init__(self, tuning_parameters): def __init__(self, tuning_parameters):
EnvironmentWrapper.__init__(self, tuning_parameters) ew.EnvironmentWrapper.__init__(self, tuning_parameters)
# load the emulator with the required level # load the emulator with the required level
self.level = DoomLevel().get(self.tp.env.level) self.level = DoomLevel().get(self.tp.env.level)
self.scenarios_dir = path.join(environ.get('VIZDOOM_ROOT'), 'scenarios') self.scenarios_dir = os.path.join(os.environ.get('VIZDOOM_ROOT'),
'scenarios')
self.game = vizdoom.DoomGame() self.game = vizdoom.DoomGame()
self.game.load_config(path.join(self.scenarios_dir, self.level)) self.game.load_config(os.path.join(self.scenarios_dir, self.level))
self.game.set_window_visible(False) self.game.set_window_visible(False)
self.game.add_game_args("+vid_forcesurface 1") self.game.add_game_args("+vid_forcesurface 1")

View File

@@ -13,14 +13,14 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import numpy as np
from utils import *
from configurations import Preset
from renderer import Renderer
import operator import operator
import time import time
import numpy as np
import renderer
import utils
class EnvironmentWrapper(object): class EnvironmentWrapper(object):
def __init__(self, tuning_parameters): def __init__(self, tuning_parameters):
@@ -50,7 +50,7 @@ class EnvironmentWrapper(object):
self.height = 1 self.height = 1
self.is_state_type_image = True self.is_state_type_image = True
self.measurements_size = 0 self.measurements_size = 0
self.phase = RunPhase.TRAIN self.phase = utils.RunPhase.TRAIN
self.tp = tuning_parameters self.tp = tuning_parameters
self.record_video_every = self.tp.visualization.record_video_every self.record_video_every = self.tp.visualization.record_video_every
self.env_id = self.tp.env.level self.env_id = self.tp.env.level
@@ -62,7 +62,7 @@ class EnvironmentWrapper(object):
self.wait_for_explicit_human_action = False self.wait_for_explicit_human_action = False
self.is_rendered = self.is_rendered or self.human_control self.is_rendered = self.is_rendered or self.human_control
self.game_is_open = True self.game_is_open = True
self.renderer = Renderer() self.renderer = renderer.Renderer()
@property @property
def measurements(self): def measurements(self):

View File

@@ -13,40 +13,18 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import random
import sys
from logger import *
import gym import gym
import numpy as np import numpy as np
import time
import random
try:
import roboschool
from OpenGL import GL
except ImportError:
from logger import failed_imports
failed_imports.append("RoboSchool")
try: from environments import environment_wrapper as ew
from gym_extensions.continuous import mujoco import utils
except:
from logger import failed_imports
failed_imports.append("GymExtensions")
try:
import pybullet_envs
except ImportError:
from logger import failed_imports
failed_imports.append("PyBullet")
from gym import wrappers
from utils import force_list, RunPhase
from environments.environment_wrapper import EnvironmentWrapper
class GymEnvironmentWrapper(EnvironmentWrapper): class GymEnvironmentWrapper(ew.EnvironmentWrapper):
def __init__(self, tuning_parameters): def __init__(self, tuning_parameters):
EnvironmentWrapper.__init__(self, tuning_parameters) ew.EnvironmentWrapper.__init__(self, tuning_parameters)
# env parameters # env parameters
if ':' in self.env_id: if ':' in self.env_id:
@@ -124,7 +102,7 @@ class GymEnvironmentWrapper(EnvironmentWrapper):
def _update_state(self): def _update_state(self):
if hasattr(self.env, 'env') and hasattr(self.env.env, 'ale'): if hasattr(self.env, 'env') and hasattr(self.env.env, 'ale'):
if self.phase == RunPhase.TRAIN and hasattr(self, 'current_ale_lives'): if self.phase == utils.RunPhase.TRAIN and hasattr(self, 'current_ale_lives'):
# signal termination for life loss # signal termination for life loss
if self.current_ale_lives != self.env.env.ale.lives(): if self.current_ale_lives != self.env.env.ale.lives():
self.done = True self.done = True
@@ -146,7 +124,7 @@ class GymEnvironmentWrapper(EnvironmentWrapper):
if type(action_idx) == int and action_idx == 0: if type(action_idx) == int and action_idx == 0:
# deal with the "reset" action 0 # deal with the "reset" action 0
action = [0] * self.env.action_space.shape[0] action = [0] * self.env.action_space.shape[0]
action = np.array(force_list(action)) action = np.array(utils.force_list(action))
# removing redundant dimensions such that the action size will match the expected action size from gym # removing redundant dimensions such that the action size will match the expected action size from gym
if action.shape != self.env.action_space.shape: if action.shape != self.env.action_space.shape:
action = np.squeeze(action) action = np.squeeze(action)

View File

@@ -13,16 +13,29 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
from exploration_policies.additive_noise import AdditiveNoise
from exploration_policies.approximated_thompson_sampling_using_dropout import ApproximatedThompsonSamplingUsingDropout
from exploration_policies.bayesian import Bayesian
from exploration_policies.boltzmann import Boltzmann
from exploration_policies.bootstrapped import Bootstrapped
from exploration_policies.categorical import Categorical
from exploration_policies.continuous_entropy import ContinuousEntropy
from exploration_policies.e_greedy import EGreedy
from exploration_policies.exploration_policy import ExplorationPolicy
from exploration_policies.greedy import Greedy
from exploration_policies.ou_process import OUProcess
from exploration_policies.thompson_sampling import ThompsonSampling
from exploration_policies.additive_noise import *
from exploration_policies.approximated_thompson_sampling_using_dropout import * __all__ = [AdditiveNoise,
from exploration_policies.bayesian import * ApproximatedThompsonSamplingUsingDropout,
from exploration_policies.boltzmann import * Bayesian,
from exploration_policies.bootstrapped import * Boltzmann,
from exploration_policies.categorical import * Bootstrapped,
from exploration_policies.continuous_entropy import * Categorical,
from exploration_policies.e_greedy import * ContinuousEntropy,
from exploration_policies.exploration_policy import * EGreedy,
from exploration_policies.greedy import * ExplorationPolicy,
from exploration_policies.ou_process import * Greedy,
from exploration_policies.thompson_sampling import * OUProcess,
ThompsonSampling]

View File

@@ -13,18 +13,19 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import numpy as np import numpy as np
from exploration_policies.exploration_policy import *
from exploration_policies import exploration_policy
import utils
class AdditiveNoise(ExplorationPolicy): class AdditiveNoise(exploration_policy.ExplorationPolicy):
def __init__(self, tuning_parameters): def __init__(self, tuning_parameters):
""" """
:param tuning_parameters: A Preset class instance with all the running paramaters :param tuning_parameters: A Preset class instance with all the running paramaters
:type tuning_parameters: Preset :type tuning_parameters: Preset
""" """
ExplorationPolicy.__init__(self, tuning_parameters) exploration_policy.ExplorationPolicy.__init__(self, tuning_parameters)
self.variance = tuning_parameters.exploration.initial_noise_variance_percentage self.variance = tuning_parameters.exploration.initial_noise_variance_percentage
self.final_variance = tuning_parameters.exploration.final_noise_variance_percentage self.final_variance = tuning_parameters.exploration.final_noise_variance_percentage
self.decay_steps = tuning_parameters.exploration.noise_variance_decay_steps self.decay_steps = tuning_parameters.exploration.noise_variance_decay_steps
@@ -37,7 +38,7 @@ class AdditiveNoise(ExplorationPolicy):
self.variance = self.final_variance self.variance = self.final_variance
def get_action(self, action_values): def get_action(self, action_values):
if self.phase == RunPhase.TRAIN: if self.phase == utils.RunPhase.TRAIN:
self.decay_exploration() self.decay_exploration()
action = np.random.normal(action_values, 2 * self.variance * self.action_abs_range) action = np.random.normal(action_values, 2 * self.variance * self.action_abs_range)
return action #np.clip(action, -self.action_abs_range, self.action_abs_range).squeeze() return action #np.clip(action, -self.action_abs_range, self.action_abs_range).squeeze()

View File

@@ -13,17 +13,18 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import numpy as np
from exploration_policies.exploration_policy import * from exploration_policies import exploration_policy
class ApproximatedThompsonSamplingUsingDropout(ExplorationPolicy): class ApproximatedThompsonSamplingUsingDropout(exploration_policy.ExplorationPolicy):
def __init__(self, tuning_parameters): def __init__(self, tuning_parameters):
""" """
:param tuning_parameters: A Preset class instance with all the running paramaters :param tuning_parameters: A Preset class instance with all the running paramaters
:type tuning_parameters: Preset :type tuning_parameters: Preset
""" """
ExplorationPolicy.__init__(self, tuning_parameters) exploration_policy.ExplorationPolicy.__init__(self, tuning_parameters)
self.dropout_discard_probability = tuning_parameters.exploration.dropout_discard_probability self.dropout_discard_probability = tuning_parameters.exploration.dropout_discard_probability
self.network = tuning_parameters.network self.network = tuning_parameters.network
self.assign_op = self.network.dropout_discard_probability.assign(self.dropout_discard_probability) self.assign_op = self.network.dropout_discard_probability.assign(self.dropout_discard_probability)

View File

@@ -13,18 +13,19 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import numpy as np
from exploration_policies.exploration_policy import * from exploration_policies import exploration_policy
import tensorflow as tf import utils
class Bayesian(ExplorationPolicy): class Bayesian(exploration_policy.ExplorationPolicy):
def __init__(self, tuning_parameters): def __init__(self, tuning_parameters):
""" """
:param tuning_parameters: A Preset class instance with all the running paramaters :param tuning_parameters: A Preset class instance with all the running paramaters
:type tuning_parameters: Preset :type tuning_parameters: Preset
""" """
ExplorationPolicy.__init__(self, tuning_parameters) exploration_policy.ExplorationPolicy.__init__(self, tuning_parameters)
self.keep_probability = tuning_parameters.exploration.initial_keep_probability self.keep_probability = tuning_parameters.exploration.initial_keep_probability
self.final_keep_probability = tuning_parameters.exploration.final_keep_probability self.final_keep_probability = tuning_parameters.exploration.final_keep_probability
self.keep_probability_decay_delta = ( self.keep_probability_decay_delta = (
@@ -40,7 +41,7 @@ class Bayesian(ExplorationPolicy):
self.keep_probability -= self.keep_probability_decay_delta self.keep_probability -= self.keep_probability_decay_delta
def get_action(self, action_values): def get_action(self, action_values):
if self.phase == RunPhase.TRAIN: if self.phase == utils.RunPhase.TRAIN:
self.decay_keep_probability() self.decay_keep_probability()
# dropout = self.network.get_layer('variable_dropout_1') # dropout = self.network.get_layer('variable_dropout_1')
# with tf.Session() as sess: # with tf.Session() as sess:

View File

@@ -13,17 +13,18 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import numpy as np
from exploration_policies.exploration_policy import * from exploration_policies import exploration_policy
import utils
class Boltzmann(exploration_policy.ExplorationPolicy):
class Boltzmann(ExplorationPolicy):
def __init__(self, tuning_parameters): def __init__(self, tuning_parameters):
""" """
:param tuning_parameters: A Preset class instance with all the running paramaters :param tuning_parameters: A Preset class instance with all the running paramaters
:type tuning_parameters: Preset :type tuning_parameters: Preset
""" """
ExplorationPolicy.__init__(self, tuning_parameters) exploration_policy.ExplorationPolicy.__init__(self, tuning_parameters)
self.temperature = tuning_parameters.exploration.initial_temperature self.temperature = tuning_parameters.exploration.initial_temperature
self.final_temperature = tuning_parameters.exploration.final_temperature self.final_temperature = tuning_parameters.exploration.final_temperature
self.temperature_decay_delta = ( self.temperature_decay_delta = (
@@ -35,7 +36,7 @@ class Boltzmann(ExplorationPolicy):
self.temperature -= self.temperature_decay_delta self.temperature -= self.temperature_decay_delta
def get_action(self, action_values): def get_action(self, action_values):
if self.phase == RunPhase.TRAIN: if self.phase == utils.RunPhase.TRAIN:
self.decay_temperature() self.decay_temperature()
# softmax calculation # softmax calculation
exp_probabilities = np.exp(action_values / self.temperature) exp_probabilities = np.exp(action_values / self.temperature)

View File

@@ -13,17 +13,18 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import numpy as np
from exploration_policies.e_greedy import * from exploration_policies import e_greedy
class Bootstrapped(EGreedy): class Bootstrapped(e_greedy.EGreedy):
def __init__(self, tuning_parameters): def __init__(self, tuning_parameters):
""" """
:param tuning_parameters: A Preset class instance with all the running parameters :param tuning_parameters: A Preset class instance with all the running parameters
:type tuning_parameters: Preset :type tuning_parameters: Preset
""" """
EGreedy.__init__(self, tuning_parameters) e_greedy.EGreedy.__init__(self, tuning_parameters)
self.num_heads = tuning_parameters.exploration.architecture_num_q_heads self.num_heads = tuning_parameters.exploration.architecture_num_q_heads
self.selected_head = 0 self.selected_head = 0
@@ -31,7 +32,7 @@ class Bootstrapped(EGreedy):
self.selected_head = np.random.randint(self.num_heads) self.selected_head = np.random.randint(self.num_heads)
def get_action(self, action_values): def get_action(self, action_values):
return EGreedy.get_action(self, action_values[self.selected_head]) return e_greedy.EGreedy.get_action(self, action_values[self.selected_head])
def get_control_param(self): def get_control_param(self):
return self.selected_head return self.selected_head

View File

@@ -13,17 +13,18 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import numpy as np
from exploration_policies.exploration_policy import * from exploration_policies import exploration_policy
class Categorical(ExplorationPolicy): class Categorical(exploration_policy.ExplorationPolicy):
def __init__(self, tuning_parameters): def __init__(self, tuning_parameters):
""" """
:param tuning_parameters: A Preset class instance with all the running paramaters :param tuning_parameters: A Preset class instance with all the running paramaters
:type tuning_parameters: Preset :type tuning_parameters: Preset
""" """
ExplorationPolicy.__init__(self, tuning_parameters) exploration_policy.ExplorationPolicy.__init__(self, tuning_parameters)
def get_action(self, action_values): def get_action(self, action_values):
# choose actions according to the probabilities # choose actions according to the probabilities

View File

@@ -13,10 +13,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
from exploration_policies import exploration_policy
import numpy as np
from exploration_policies.exploration_policy import *
class ContinuousEntropy(ExplorationPolicy): class ContinuousEntropy(exploration_policy.ExplorationPolicy):
pass pass

View File

@@ -13,17 +13,19 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import numpy as np
from exploration_policies.exploration_policy import * from exploration_policies import exploration_policy
import utils
class EGreedy(ExplorationPolicy): class EGreedy(exploration_policy.ExplorationPolicy):
def __init__(self, tuning_parameters): def __init__(self, tuning_parameters):
""" """
:param tuning_parameters: A Preset class instance with all the running paramaters :param tuning_parameters: A Preset class instance with all the running paramaters
:type tuning_parameters: Preset :type tuning_parameters: Preset
""" """
ExplorationPolicy.__init__(self, tuning_parameters) exploration_policy.ExplorationPolicy.__init__(self, tuning_parameters)
self.epsilon = tuning_parameters.exploration.initial_epsilon self.epsilon = tuning_parameters.exploration.initial_epsilon
self.final_epsilon = tuning_parameters.exploration.final_epsilon self.final_epsilon = tuning_parameters.exploration.final_epsilon
self.epsilon_decay_delta = ( self.epsilon_decay_delta = (
@@ -52,9 +54,9 @@ class EGreedy(ExplorationPolicy):
self.variance = self.final_variance self.variance = self.final_variance
def get_action(self, action_values): def get_action(self, action_values):
if self.phase == RunPhase.TRAIN: if self.phase == utils.RunPhase.TRAIN:
self.decay_exploration() self.decay_exploration()
epsilon = self.evaluation_epsilon if self.phase == RunPhase.TEST else self.epsilon epsilon = self.evaluation_epsilon if self.phase == utils.RunPhase.TEST else self.epsilon
if self.discrete_controls: if self.discrete_controls:
top_action = np.argmax(action_values) top_action = np.argmax(action_values)
@@ -67,4 +69,4 @@ class EGreedy(ExplorationPolicy):
return np.squeeze(action_values + (np.random.rand() < epsilon) * noise) return np.squeeze(action_values + (np.random.rand() < epsilon) * noise)
def get_control_param(self): def get_control_param(self):
return self.evaluation_epsilon if self.phase == RunPhase.TEST else self.epsilon return self.evaluation_epsilon if self.phase == utils.RunPhase.TEST else self.epsilon

View File

@@ -13,10 +13,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import utils
import numpy as np
from utils import *
from configurations import *
class ExplorationPolicy(object): class ExplorationPolicy(object):
@@ -25,7 +22,7 @@ class ExplorationPolicy(object):
:param tuning_parameters: A Preset class instance with all the running paramaters :param tuning_parameters: A Preset class instance with all the running paramaters
:type tuning_parameters: Preset :type tuning_parameters: Preset
""" """
self.phase = RunPhase.HEATUP self.phase = utils.RunPhase.HEATUP
self.action_space_size = tuning_parameters.env.action_space_size self.action_space_size = tuning_parameters.env.action_space_size
self.action_abs_range = tuning_parameters.env_instance.action_space_abs_range self.action_abs_range = tuning_parameters.env_instance.action_space_abs_range
self.discrete_controls = tuning_parameters.env_instance.discrete_controls self.discrete_controls = tuning_parameters.env_instance.discrete_controls

View File

@@ -13,17 +13,18 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import numpy as np
from exploration_policies.exploration_policy import * from exploration_policies import exploration_policy
class Greedy(ExplorationPolicy): class Greedy(exploration_policy.ExplorationPolicy):
def __init__(self, tuning_parameters): def __init__(self, tuning_parameters):
""" """
:param tuning_parameters: A Preset class instance with all the running paramaters :param tuning_parameters: A Preset class instance with all the running paramaters
:type tuning_parameters: Preset :type tuning_parameters: Preset
""" """
ExplorationPolicy.__init__(self, tuning_parameters) exploration_policy.ExplorationPolicy.__init__(self, tuning_parameters)
def get_action(self, action_values): def get_action(self, action_values):
return np.argmax(action_values) return np.argmax(action_values)

View File

@@ -13,21 +13,21 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import numpy as np import numpy as np
from exploration_policies.exploration_policy import *
from exploration_policies import exploration_policy
# Based on on the description in: # Based on on the description in:
# https://math.stackexchange.com/questions/1287634/implementing-ornstein-uhlenbeck-in-matlab # https://math.stackexchange.com/questions/1287634/implementing-ornstein-uhlenbeck-in-matlab
# Ornstein-Uhlenbeck process # Ornstein-Uhlenbeck process
class OUProcess(ExplorationPolicy): class OUProcess(exploration_policy.ExplorationPolicy):
def __init__(self, tuning_parameters): def __init__(self, tuning_parameters):
""" """
:param tuning_parameters: A Preset class instance with all the running paramaters :param tuning_parameters: A Preset class instance with all the running paramaters
:type tuning_parameters: Preset :type tuning_parameters: Preset
""" """
ExplorationPolicy.__init__(self, tuning_parameters) exploration_policy.ExplorationPolicy.__init__(self, tuning_parameters)
self.action_space_size = tuning_parameters.env.action_space_size self.action_space_size = tuning_parameters.env.action_space_size
self.mu = float(tuning_parameters.exploration.mu) * np.ones(self.action_space_size) self.mu = float(tuning_parameters.exploration.mu) * np.ones(self.action_space_size)
self.theta = tuning_parameters.exploration.theta self.theta = tuning_parameters.exploration.theta

View File

@@ -13,17 +13,18 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import numpy as np
from exploration_policies.exploration_policy import * from exploration_policies import exploration_policy
class ThompsonSampling(ExplorationPolicy): class ThompsonSampling(exploration_policy.ExplorationPolicy):
def __init__(self, tuning_parameters): def __init__(self, tuning_parameters):
""" """
:param tuning_parameters: A Preset class instance with all the running paramaters :param tuning_parameters: A Preset class instance with all the running paramaters
:type tuning_parameters: Preset :type tuning_parameters: Preset
""" """
ExplorationPolicy.__init__(self, tuning_parameters) exploration_policy.ExplorationPolicy.__init__(self, tuning_parameters)
self.action_space_size = tuning_parameters.env.action_space_size self.action_space_size = tuning_parameters.env.action_space_size
def get_action(self, action_values): def get_action(self, action_values):

View File

@@ -13,19 +13,16 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import datetime
from pandas import *
import os import os
import re import re
from pprint import pprint
import threading
from subprocess import Popen, PIPE
import time
import datetime
from six.moves import input
from PIL import Image
from typing import Union
import shutil import shutil
import time
import typing
import pandas
import PIL
from six.moves import input
global failed_imports global failed_imports
failed_imports = [] failed_imports = []
@@ -90,7 +87,7 @@ class ScreenLogger(object):
def ask_input(self, title): def ask_input(self, title):
return input("{}{}{}".format(Colors.BG_CYAN, title, Colors.END)) return input("{}{}{}".format(Colors.BG_CYAN, title, Colors.END))
def ask_yes_no(self, title: str, default: Union[None, bool]=None): def ask_yes_no(self, title: str, default: typing.Union[None, bool]=None):
""" """
Ask the user for a yes / no question and return True if the answer is yes and False otherwise. Ask the user for a yes / no question and return True if the answer is yes and False otherwise.
The function will keep asking the user for an answer until he answers one of the possible responses. The function will keep asking the user for an answer until he answers one of the possible responses.
@@ -156,7 +153,7 @@ class BaseLogger(object):
class Logger(BaseLogger): class Logger(BaseLogger):
def __init__(self): def __init__(self):
BaseLogger.__init__(self) BaseLogger.__init__(self)
self.data = DataFrame() self.data = pandas.DataFrame()
self.csv_path = '' self.csv_path = ''
self.doc_path = '' self.doc_path = ''
self.aggregated_data_across_threads = None self.aggregated_data_across_threads = None
@@ -249,7 +246,7 @@ class Logger(BaseLogger):
if not os.path.exists(output_dir): if not os.path.exists(output_dir):
os.makedirs(output_dir) os.makedirs(output_dir)
output_path = os.path.join(output_dir, output_file) output_path = os.path.join(output_dir, output_file)
pil_images = [Image.fromarray(image) for image in images] pil_images = [PIL.Image.fromarray(image) for image in images]
pil_images[0].save(output_path, save_all=True, append_images=pil_images[1:], duration=1.0 / fps, loop=0) pil_images[0].save(output_path, save_all=True, append_images=pil_images[1:], duration=1.0 / fps, loop=0)
def remove_experiment_dir(self): def remove_experiment_dir(self):

View File

@@ -13,7 +13,18 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
from memories.differentiable_neural_dictionary import AnnoyDictionary
from memories.differentiable_neural_dictionary import AnnoyIndex
from memories.differentiable_neural_dictionary import QDND
from memories.episodic_experience_replay import EpisodicExperienceReplay
from memories.memory import Episode
from memories.memory import Memory
from memories.memory import Transition
from memories.differentiable_neural_dictionary import * __all__ = [AnnoyDictionary,
from memories.episodic_experience_replay import * AnnoyIndex,
from memories.memory import * Episode,
EpisodicExperienceReplay,
Memory,
QDND,
Transition]

View File

@@ -13,10 +13,11 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import os
import pickle
import numpy as np import numpy as np
from annoy import AnnoyIndex from annoy import AnnoyIndex
import os, pickle
class AnnoyDictionary(object): class AnnoyDictionary(object):

View File

@@ -13,24 +13,25 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import typing
from memories.memory import * import numpy as np
import threading
from typing import Union from memories import memory
class EpisodicExperienceReplay(Memory): class EpisodicExperienceReplay(memory.Memory):
def __init__(self, tuning_parameters): def __init__(self, tuning_parameters):
""" """
:param tuning_parameters: A Preset class instance with all the running paramaters :param tuning_parameters: A Preset class instance with all the running paramaters
:type tuning_parameters: Preset :type tuning_parameters: Preset
""" """
Memory.__init__(self, tuning_parameters) memory.Memory.__init__(self, tuning_parameters)
self.tp = tuning_parameters self.tp = tuning_parameters
self.max_size_in_episodes = tuning_parameters.agent.num_episodes_in_experience_replay self.max_size_in_episodes = tuning_parameters.agent.num_episodes_in_experience_replay
self.max_size_in_transitions = tuning_parameters.agent.num_transitions_in_experience_replay self.max_size_in_transitions = tuning_parameters.agent.num_transitions_in_experience_replay
self.discount = tuning_parameters.agent.discount self.discount = tuning_parameters.agent.discount
self.buffer = [Episode()] # list of episodes self.buffer = [memory.Episode()] # list of episodes
self.transitions = [] self.transitions = []
self._length = 1 self._length = 1
self._num_transitions = 0 self._num_transitions = 0
@@ -96,7 +97,7 @@ class EpisodicExperienceReplay(Memory):
def store(self, transition): def store(self, transition):
if len(self.buffer) == 0: if len(self.buffer) == 0:
self.buffer.append(Episode()) self.buffer.append(memory.Episode())
last_episode = self.buffer[-1] last_episode = self.buffer[-1]
last_episode.insert(transition) last_episode.insert(transition)
self.transitions.append(transition) self.transitions.append(transition)
@@ -109,7 +110,7 @@ class EpisodicExperienceReplay(Memory):
n_step_return=self.tp.agent.n_step) n_step_return=self.tp.agent.n_step)
self.buffer[-1].update_measurements_targets(self.tp.agent.num_predicted_steps_ahead) self.buffer[-1].update_measurements_targets(self.tp.agent.num_predicted_steps_ahead)
# self.buffer[-1].update_actions_probabilities() # used for off-policy policy optimization # self.buffer[-1].update_actions_probabilities() # used for off-policy policy optimization
self.buffer.append(Episode()) self.buffer.append(memory.Episode())
self.enforce_length() self.enforce_length()
@@ -148,7 +149,7 @@ class EpisodicExperienceReplay(Memory):
def get(self, index): def get(self, index):
return self.get_episode(index) return self.get_episode(index)
def get_last_complete_episode(self) -> Union[None, Episode]: def get_last_complete_episode(self) -> typing.Union[None, memory.Episode]:
""" """
Returns the last complete episode in the memory or None if there are no complete episodes Returns the last complete episode in the memory or None if there are no complete episodes
:return: None or the last complete episode :return: None or the last complete episode
@@ -170,7 +171,7 @@ class EpisodicExperienceReplay(Memory):
def clean(self): def clean(self):
self.transitions = [] self.transitions = []
self.buffer = [Episode()] self.buffer = [memory.Episode()]
self._length = 1 self._length = 1
self._num_transitions = 0 self._num_transitions = 0
self._num_transitions_in_complete_episodes = 0 self._num_transitions_in_complete_episodes = 0

View File

@@ -13,10 +13,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import numpy as np import numpy as np
import copy
from configurations import *
class Memory(object): class Memory(object):

View File

@@ -13,19 +13,16 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import argparse import argparse
import tensorflow as tf import os
from architectures import *
from environments import *
from agents import *
from utils import *
import time import time
import copy
from logger import * import tensorflow as tf
from configurations import *
from presets import * import agents
import shutil import environments
import logger
import presets
start_time = time.time() start_time = time.time()
@@ -66,15 +63,15 @@ if __name__ == "__main__":
elif args.job_name == "worker": elif args.job_name == "worker":
# get tuning parameters # get tuning parameters
tuning_parameters = json_to_preset(args.load_json_path) tuning_parameters = presets.json_to_preset(args.load_json_path)
# dump documentation # dump documentation
if not os.path.exists(tuning_parameters.experiment_path): if not os.path.exists(tuning_parameters.experiment_path):
os.makedirs(tuning_parameters.experiment_path) os.makedirs(tuning_parameters.experiment_path)
if tuning_parameters.evaluate_only: if tuning_parameters.evaluate_only:
logger.set_dump_dir(tuning_parameters.experiment_path, tuning_parameters.task_id, filename='evaluator') logger.logger.set_dump_dir(tuning_parameters.experiment_path, tuning_parameters.task_id, filename='evaluator')
else: else:
logger.set_dump_dir(tuning_parameters.experiment_path, tuning_parameters.task_id) logger.logger.set_dump_dir(tuning_parameters.experiment_path, tuning_parameters.task_id)
# multi-threading parameters # multi-threading parameters
tuning_parameters.start_time = start_time tuning_parameters.start_time = start_time
@@ -98,8 +95,8 @@ if __name__ == "__main__":
cluster=cluster) cluster=cluster)
# create the agent and the environment # create the agent and the environment
env_instance = create_environment(tuning_parameters) env_instance = environments.create_environment(tuning_parameters)
exec('agent = ' + tuning_parameters.agent.type + '(env_instance, tuning_parameters, replicated_device=device, ' exec('agent = agents.' + tuning_parameters.agent.type + '(env_instance, tuning_parameters, replicated_device=device, '
'thread_id=tuning_parameters.task_id)') 'thread_id=tuning_parameters.task_id)')
# building the scaffold # building the scaffold
@@ -169,6 +166,6 @@ if __name__ == "__main__":
else: else:
agent.improve() agent.improve()
else: else:
screen.error("Invalid mode requested for parallel_actor.") logger.screen.error("Invalid mode requested for parallel_actor.")
exit(1) exit(1)

View File

@@ -1,8 +1,10 @@
import argparse import argparse
import os
import matplotlib import matplotlib
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from dashboard import SignalsFile from dashboard import SignalsFile
import os
class FigureMaker(object): class FigureMaker(object):

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
import pygame
from pygame.locals import *
import numpy as np import numpy as np
import pygame
from pygame import locals as loc
class Renderer(object): class Renderer(object):
@@ -21,7 +21,8 @@ class Renderer(object):
:return: None :return: None
""" """
self.size = (width, height) self.size = (width, height)
self.screen = self.display.set_mode(self.size, HWSURFACE | DOUBLEBUF) self.screen = self.display.set_mode(self.size,
loc.HWSURFACE | loc.DOUBLEBUF)
self.display.set_caption("Coach") self.display.set_caption("Coach")
self.is_open = True self.is_open = True

View File

@@ -13,23 +13,21 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import argparse
# -*- coding: utf-8 -*-
import presets
import numpy as np
import pandas as pd
from os import path
import os
import glob import glob
import os
import shutil import shutil
import signal
import subprocess
import sys import sys
import time import time
from logger import screen
from utils import list_all_classes_in_module, threaded_cmd_line_run, killed_processes
import subprocess
import signal
import argparse
import numpy as np
import pandas as pd
import logger
import presets
import utils
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
@@ -61,7 +59,7 @@ if __name__ == '__main__':
if args.preset is not None: if args.preset is not None:
presets_lists = [args.preset] presets_lists = [args.preset]
else: else:
presets_lists = list_all_classes_in_module(presets) presets_lists = utils.list_all_classes_in_module(presets)
win_size = 10 win_size = 10
fail_count = 0 fail_count = 0
test_count = 0 test_count = 0
@@ -70,7 +68,7 @@ if __name__ == '__main__':
# create a clean experiment directory # create a clean experiment directory
test_name = '__test' test_name = '__test'
test_path = os.path.join('./experiments', test_name) test_path = os.path.join('./experiments', test_name)
if path.exists(test_path): if os.path.exists(test_path):
shutil.rmtree(test_path) shutil.rmtree(test_path)
if args.ignore_presets is not None: if args.ignore_presets is not None:
presets_to_ignore = args.ignore_presets.split(',') presets_to_ignore = args.ignore_presets.split(',')
@@ -100,7 +98,7 @@ if __name__ == '__main__':
test_count += 1 test_count += 1
# run the experiment in a separate thread # run the experiment in a separate thread
screen.log_title("Running test {} - {}".format(preset_name, framework)) logger.screen.log_title("Running test {} - {}".format(preset_name, framework))
log_file_name = 'test_log_{preset_name}_{framework}.txt'.format( log_file_name = 'test_log_{preset_name}_{framework}.txt'.format(
preset_name=preset_name, preset_name=preset_name,
framework=framework, framework=framework,
@@ -139,7 +137,7 @@ if __name__ == '__main__':
tries_counter = 0 tries_counter = 0
while not csv_paths: while not csv_paths:
csv_paths = glob.glob(path.join(test_path, '*', filename_pattern)) csv_paths = glob.glob(os.path.join(test_path, '*', filename_pattern))
if tries_counter > read_csv_tries: if tries_counter > read_csv_tries:
break break
tries_counter += 1 tries_counter += 1
@@ -195,26 +193,26 @@ if __name__ == '__main__':
# kill test and print result # kill test and print result
os.killpg(os.getpgid(p.pid), signal.SIGTERM) os.killpg(os.getpgid(p.pid), signal.SIGTERM)
if test_passed: if test_passed:
screen.success("Passed successfully") logger.screen.success("Passed successfully")
else: else:
if csv_paths: if csv_paths:
screen.error("Failed due to insufficient reward", crash=False) logger.screen.error("Failed due to insufficient reward", crash=False)
screen.error("preset.test_max_step_threshold: {}".format(preset.test_max_step_threshold), crash=False) logger.screen.error("preset.test_max_step_threshold: {}".format(preset.test_max_step_threshold), crash=False)
screen.error("preset.test_min_return_threshold: {}".format(preset.test_min_return_threshold), crash=False) logger.screen.error("preset.test_min_return_threshold: {}".format(preset.test_min_return_threshold), crash=False)
screen.error("averaged_rewards: {}".format(averaged_rewards), crash=False) logger.screen.error("averaged_rewards: {}".format(averaged_rewards), crash=False)
screen.error("episode number: {}".format(csv['Episode #'].values[-1]), crash=False) logger.screen.error("episode number: {}".format(csv['Episode #'].values[-1]), crash=False)
else: else:
screen.error("csv file never found", crash=False) logger.screen.error("csv file never found", crash=False)
if args.verbose: if args.verbose:
screen.error("command exitcode: {}".format(p.returncode), crash=False) logger.screen.error("command exitcode: {}".format(p.returncode), crash=False)
screen.error(open(log_file_name).read(), crash=False) logger.screen.error(open(log_file_name).read(), crash=False)
fail_count += 1 fail_count += 1
shutil.rmtree(test_path) shutil.rmtree(test_path)
screen.separator() logger.screen.separator()
if fail_count == 0: if fail_count == 0:
screen.success(" Summary: " + str(test_count) + "/" + str(test_count) + " tests passed successfully") logger.screen.success(" Summary: " + str(test_count) + "/" + str(test_count) + " tests passed successfully")
else: else:
screen.error(" Summary: " + str(test_count - fail_count) + "/" + str(test_count) + " tests passed successfully") logger.screen.error(" Summary: " + str(test_count - fail_count) + "/" + str(test_count) + " tests passed successfully")

View File

@@ -13,20 +13,22 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import json
import inspect
import os
import numpy as np
import threading
from subprocess import call, Popen
import signal
import copy import copy
import inspect
import json
import os
import signal
import subprocess
import threading
import numpy as np
killed_processes = [] killed_processes = []
eps = np.finfo(np.float32).eps eps = np.finfo(np.float32).eps
class Enum(object): class Enum(object):
def __init__(self): def __init__(self):
pass pass
@@ -161,7 +163,7 @@ def ClassToDict(x):
def cmd_line_run(result, run_cmd, id=-1): def cmd_line_run(result, run_cmd, id=-1):
p = Popen(run_cmd, shell=True, executable="/bin/bash") p = subprocess.Popen(run_cmd, shell=True, executable="/bin/bash")
while result[0] is None or result[0] == [None]: while result[0] is None or result[0] == [None]:
if id in killed_processes: if id in killed_processes:
p.kill() p.kill()