From 1b095aeecacfb9bccef69ac9cf6b5da1ed1183f8 Mon Sep 17 00:00:00 2001
From: Roman Dobosz <roman.dobosz@intel.com>
Date: Thu, 12 Apr 2018 19:46:32 +0200
Subject: [PATCH] Cleanup imports.

Till now, most of the modules were importing all of the module objects
(variables, classes, functions, other imports) into module namespace,
which potentially could (and was) cause of unintentional use of class or
methods, which was indirect imported.

With this patch, all the star imports were substituted with top-level
module, which provides desired class or function.

Besides, all imports where sorted (where possible) in a way pep8[1]
suggests - first are imports from standard library, than goes third
party imports (like numpy, tensorflow etc) and finally coach modules.
All of those sections are separated by one empty line.

[1] https://www.python.org/dev/peps/pep-0008/#imports
---
 agents/__init__.py                            |  68 ++-
 agents/actor_critic_agent.py                  |  41 +-
 agents/agent.py                               | 181 ++++----
 agents/bc_agent.py                            |   7 +-
 agents/bootstrapped_dqn_agent.py              |  17 +-
 agents/categorical_dqn_agent.py               |  10 +-
 agents/clipped_ppo_agent.py                   |  43 +-
 agents/ddpg_agent.py                          |  32 +-
 agents/ddqn_agent.py                          |   9 +-
 agents/dfp_agent.py                           |  17 +-
 agents/distributional_dqn_agent.py            |  10 +-
 agents/dqn_agent.py                           |   9 +-
 agents/human_agent.py                         |  36 +-
 agents/imitation_agent.py                     |  24 +-
 agents/mmc_agent.py                           |   9 +-
 agents/n_step_q_agent.py                      |  23 +-
 agents/naf_agent.py                           |  15 +-
 agents/nec_agent.py                           |  15 +-
 agents/pal_agent.py                           |   9 +-
 agents/policy_gradients_agent.py              |  37 +-
 agents/policy_optimization_agent.py           |  27 +-
 agents/ppo_agent.py                           |  58 +--
 agents/qr_dqn_agent.py                        |   7 +-
 agents/value_optimization_agent.py            |  21 +-
 architectures/__init__.py                     |  23 +-
 architectures/architecture.py                 |   6 +-
 architectures/neon_components/architecture.py |  19 +-
 architectures/neon_components/embedders.py    |   9 +-
 .../neon_components/general_network.py        |  59 +--
 architectures/neon_components/heads.py        |  38 +-
 architectures/neon_components/losses.py       |   9 +-
 architectures/neon_components/middleware.py   |   9 +-
 architectures/network_wrapper.py              |  33 +-
 .../tensorflow_components/architecture.py     |  23 +-
 .../tensorflow_components/embedders.py        |   4 +-
 .../tensorflow_components/general_network.py  |  57 +--
 architectures/tensorflow_components/heads.py  |  18 +-
 .../tensorflow_components/middleware.py       |   1 -
 .../tensorflow_components/shared_variables.py |   5 +-
 coach.py                                      |  95 ++--
 configurations.py                             |   6 +-
 dashboard.py                                  | 203 +++++----
 debug_utils.py                                |   3 +-
 docs/docs/mdx_math.py                         |   4 +-
 docs/docs/setup.py                            |   7 +-
 docs/fix_index.py                             |  11 +-
 environments/__init__.py                      |  17 +-
 environments/carla_environment_wrapper.py     |  65 ++-
 environments/doom_environment_wrapper.py      |  26 +-
 environments/environment_wrapper.py           |  14 +-
 environments/gym_environment_wrapper.py       |  36 +-
 exploration_policies/__init__.py              |  39 +-
 exploration_policies/additive_noise.py        |  13 +-
 ...ximated_thompson_sampling_using_dropout.py |   9 +-
 exploration_policies/bayesian.py              |  13 +-
 exploration_policies/boltzmann.py             |  13 +-
 exploration_policies/bootstrapped.py          |  11 +-
 exploration_policies/categorical.py           |   9 +-
 exploration_policies/continuous_entropy.py    |   8 +-
 exploration_policies/e_greedy.py              |  16 +-
 exploration_policies/exploration_policy.py    |  13 +-
 exploration_policies/greedy.py                |   9 +-
 exploration_policies/ou_process.py            |  10 +-
 exploration_policies/thompson_sampling.py     |   9 +-
 logger.py                                     |  23 +-
 memories/__init__.py                          |  19 +-
 memories/differentiable_neural_dictionary.py  |   5 +-
 memories/episodic_experience_replay.py        |  23 +-
 memories/memory.py                            |   5 +-
 parallel_actor.py                             |  33 +-
 plot_atari.py                                 |   4 +-
 presets.py                                    | 418 +++++++++---------
 renderer.py                                   |   7 +-
 run_test.py                                   |  54 ++-
 utils.py                                      |  20 +-
 75 files changed, 1169 insertions(+), 1139 deletions(-)

diff --git a/agents/__init__.py b/agents/__init__.py
index fdbd13e..6be2538 100644
--- a/agents/__init__.py
+++ b/agents/__init__.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,26 +13,48 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+from agents.actor_critic_agent import ActorCriticAgent
+from agents.agent import Agent
+from agents.bc_agent import BCAgent
+from agents.bootstrapped_dqn_agent import BootstrappedDQNAgent
+from agents.categorical_dqn_agent import CategoricalDQNAgent
+from agents.clipped_ppo_agent import ClippedPPOAgent
+from agents.ddpg_agent import DDPGAgent
+from agents.ddqn_agent import DDQNAgent
+from agents.dfp_agent import DFPAgent
+from agents.dqn_agent import DQNAgent
+from agents.human_agent import HumanAgent
+from agents.imitation_agent import ImitationAgent
+from agents.mmc_agent import MixedMonteCarloAgent
+from agents.n_step_q_agent import NStepQAgent
+from agents.naf_agent import NAFAgent
+from agents.nec_agent import NECAgent
+from agents.pal_agent import PALAgent
+from agents.policy_gradients_agent import PolicyGradientsAgent
+from agents.policy_optimization_agent import PolicyOptimizationAgent
+from agents.ppo_agent import PPOAgent
+from agents.qr_dqn_agent import QuantileRegressionDQNAgent
+from agents.value_optimization_agent import ValueOptimizationAgent
 
-from agents.actor_critic_agent import *
-from agents.agent import *
-from agents.bc_agent import *
-from agents.bootstrapped_dqn_agent import *
-from agents.clipped_ppo_agent import *
-from agents.ddpg_agent import *
-from agents.ddqn_agent import *
-from agents.dfp_agent import *
-from agents.dqn_agent import *
-from agents.categorical_dqn_agent import *
-from agents.human_agent import *
-from agents.imitation_agent import *
-from agents.mmc_agent import *
-from agents.n_step_q_agent import *
-from agents.naf_agent import *
-from agents.nec_agent import *
-from agents.pal_agent import *
-from agents.policy_gradients_agent import *
-from agents.policy_optimization_agent import *
-from agents.ppo_agent import *
-from agents.value_optimization_agent import *
-from agents.qr_dqn_agent import *
+__all__ = [ActorCriticAgent,
+           Agent,
+           BCAgent,
+           BootstrappedDQNAgent,
+           CategoricalDQNAgent,
+           ClippedPPOAgent,
+           DDPGAgent,
+           DDQNAgent,
+           DFPAgent,
+           DQNAgent,
+           HumanAgent,
+           ImitationAgent,
+           MixedMonteCarloAgent,
+           NAFAgent,
+           NECAgent,
+           NStepQAgent,
+           PALAgent,
+           PPOAgent,
+           PolicyGradientsAgent,
+           PolicyOptimizationAgent,
+           QuantileRegressionDQNAgent,
+           ValueOptimizationAgent]
diff --git a/agents/actor_critic_agent.py b/agents/actor_critic_agent.py
index 729e67f..d514acd 100644
--- a/agents/actor_critic_agent.py
+++ b/agents/actor_critic_agent.py
@@ -13,23 +13,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import numpy as np
+from scipy import signal
 
-from agents.policy_optimization_agent import *
-from logger import *
-from utils import *
-import scipy.signal
+from agents import policy_optimization_agent as poa
+import utils
+import logger
 
 
 # Actor Critic - https://arxiv.org/abs/1602.01783
-class ActorCriticAgent(PolicyOptimizationAgent):
+class ActorCriticAgent(poa.PolicyOptimizationAgent):
     def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0, create_target_network = False):
-        PolicyOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id, create_target_network)
+        poa.PolicyOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id, create_target_network)
         self.last_gradient_update_step_idx = 0
-        self.action_advantages = Signal('Advantages')
-        self.state_values = Signal('Values')
-        self.unclipped_grads = Signal('Grads (unclipped)')
-        self.value_loss = Signal('Value Loss')
-        self.policy_loss = Signal('Policy Loss')
+        self.action_advantages = utils.Signal('Advantages')
+        self.state_values = utils.Signal('Values')
+        self.unclipped_grads = utils.Signal('Grads (unclipped)')
+        self.value_loss = utils.Signal('Value Loss')
+        self.policy_loss = utils.Signal('Policy Loss')
         self.signals.append(self.action_advantages)
         self.signals.append(self.state_values)
         self.signals.append(self.unclipped_grads)
@@ -38,7 +39,7 @@ class ActorCriticAgent(PolicyOptimizationAgent):
 
     # Discounting function used to calculate discounted returns.
     def discount(self, x, gamma):
-        return scipy.signal.lfilter([1], [1, -gamma], x[::-1], axis=0)[::-1]
+        return signal.lfilter([1], [1, -gamma], x[::-1], axis=0)[::-1]
 
     def get_general_advantage_estimation_values(self, rewards, values):
         # values contain n+1 elements (t ... t+n+1), rewards contain n elements (t ... t + n)
@@ -72,20 +73,20 @@ class ActorCriticAgent(PolicyOptimizationAgent):
         # estimate the advantage function
         action_advantages = np.zeros((num_transitions, 1))
 
-        if self.policy_gradient_rescaler == PolicyGradientRescaler.A_VALUE:
+        if self.policy_gradient_rescaler == poa.PolicyGradientRescaler.A_VALUE:
             if game_overs[-1]:
                 R = 0
             else:
-                R = self.main_network.online_network.predict(last_sample(next_states))[0]
+                R = self.main_network.online_network.predict(utils.last_sample(next_states))[0]
 
             for i in reversed(range(num_transitions)):
                 R = rewards[i] + self.tp.agent.discount * R
                 state_value_head_targets[i] = R
                 action_advantages[i] = R - current_state_values[i]
 
-        elif self.policy_gradient_rescaler == PolicyGradientRescaler.GAE:
+        elif self.policy_gradient_rescaler == poa.PolicyGradientRescaler.GAE:
             # get bootstraps
-            bootstrapped_value = self.main_network.online_network.predict(last_sample(next_states))[0]
+            bootstrapped_value = self.main_network.online_network.predict(utils.last_sample(next_states))[0]
             values = np.append(current_state_values, bootstrapped_value)
             if game_overs[-1]:
                 values[-1] = 0
@@ -94,7 +95,7 @@ class ActorCriticAgent(PolicyOptimizationAgent):
             gae_values, state_value_head_targets = self.get_general_advantage_estimation_values(rewards, values)
             action_advantages = np.vstack(gae_values)
         else:
-            screen.warning("WARNING: The requested policy gradient rescaler is not available")
+            logger.screen.warning("WARNING: The requested policy gradient rescaler is not available")
 
         action_advantages = action_advantages.squeeze(axis=-1)
         if not self.env.discrete_controls and len(actions.shape) < 2:
@@ -113,7 +114,7 @@ class ActorCriticAgent(PolicyOptimizationAgent):
 
         return total_loss
 
-    def choose_action(self, curr_state, phase=RunPhase.TRAIN):
+    def choose_action(self, curr_state, phase=utils.RunPhase.TRAIN):
         # TODO: rename curr_state -> state
 
         # convert to batch so we can run it through the network
@@ -126,7 +127,7 @@ class ActorCriticAgent(PolicyOptimizationAgent):
             # DISCRETE
             state_value, action_probabilities = self.main_network.online_network.predict(curr_state)
             action_probabilities = action_probabilities.squeeze()
-            if phase == RunPhase.TRAIN:
+            if phase == utils.RunPhase.TRAIN:
                 action = self.exploration_policy.get_action(action_probabilities)
             else:
                 action = np.argmax(action_probabilities)
@@ -137,7 +138,7 @@ class ActorCriticAgent(PolicyOptimizationAgent):
             state_value, action_values_mean, action_values_std = self.main_network.online_network.predict(curr_state)
             action_values_mean = action_values_mean.squeeze()
             action_values_std = action_values_std.squeeze()
-            if phase == RunPhase.TRAIN:
+            if phase == utils.RunPhase.TRAIN:
                 action = np.squeeze(np.random.randn(1, self.action_space_size) * action_values_std + action_values_mean)
             else:
                 action = action_values_mean
diff --git a/agents/agent.py b/agents/agent.py
index 006544e..c1f5262 100644
--- a/agents/agent.py
+++ b/agents/agent.py
@@ -13,32 +13,28 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
-import scipy.ndimage
-try:
-    import matplotlib.pyplot as plt
-except:
-    from logger import failed_imports
-    failed_imports.append("matplotlib")
-
-import copy
-from renderer import Renderer
-from configurations import Preset
-from collections import deque
-from utils import LazyStack
-from collections import OrderedDict
-from utils import RunPhase, Signal, is_empty, RunningStat
-from architectures import *
-from exploration_policies import *
-from memories import *
-from memories.memory import *
-from logger import logger, screen
+import collections
 import random
 import time
-import os
-import itertools
-from architectures.tensorflow_components.shared_variables import SharedRunningStats
+
+import logger
+try:
+    import matplotlib.pyplot as plt
+except ImportError:
+    logger.failed_imports.append("matplotlib")
+
+import numpy as np
+from pandas.io import pickle
 from six.moves import range
+import scipy
+
+from architectures.tensorflow_components import shared_variables as sv
+import configurations
+import exploration_policies as ep
+import memories
+from memories import memory
+import renderer
+import utils
 
 
 class Agent(object):
@@ -54,7 +50,7 @@ class Agent(object):
         :param thread_id: int
         """
 
-        screen.log_title("Creating agent {}".format(task_id))
+        logger.screen.log_title("Creating agent {}".format(task_id))
         self.task_id = task_id
         self.sess = tuning_parameters.sess
         self.env = tuning_parameters.env_instance = env
@@ -71,21 +67,20 @@ class Agent(object):
 
         # modules
         if tuning_parameters.agent.load_memory_from_file_path:
-            screen.log_title("Loading replay buffer from pickle. Pickle path: {}"
+            logger.screen.log_title("Loading replay buffer from pickle. Pickle path: {}"
                              .format(tuning_parameters.agent.load_memory_from_file_path))
-            self.memory = read_pickle(tuning_parameters.agent.load_memory_from_file_path)
+            self.memory = pickle.read_pickle(tuning_parameters.agent.load_memory_from_file_path)
         else:
-            self.memory = eval(tuning_parameters.memory + '(tuning_parameters)')
-        # self.architecture = eval(tuning_parameters.architecture)
+            self.memory = eval('memories.' + tuning_parameters.memory + '(tuning_parameters)')
 
         self.has_global = replicated_device is not None
         self.replicated_device = replicated_device
         self.worker_device = "/job:worker/task:{}/cpu:0".format(task_id) if replicated_device is not None else "/gpu:0"
 
-        self.exploration_policy = eval(tuning_parameters.exploration.policy + '(tuning_parameters)')
-        self.evaluation_exploration_policy = eval(tuning_parameters.exploration.evaluation_policy
+        self.exploration_policy = eval('ep.' + tuning_parameters.exploration.policy + '(tuning_parameters)')
+        self.evaluation_exploration_policy = eval('ep.' + tuning_parameters.exploration.evaluation_policy
                                                   + '(tuning_parameters)')
-        self.evaluation_exploration_policy.change_phase(RunPhase.TEST)
+        self.evaluation_exploration_policy.change_phase(utils.RunPhase.TEST)
 
         # initialize all internal variables
         self.tp = tuning_parameters
@@ -100,30 +95,30 @@ class Agent(object):
         self.episode_running_info = {}
         self.last_episode_evaluation_ran = 0
         self.running_observations = []
-        logger.set_current_time(self.current_episode)
+        logger.logger.set_current_time(self.current_episode)
         self.main_network = None
         self.networks = []
         self.last_episode_images = []
-        self.renderer = Renderer()
+        self.renderer = renderer.Renderer()
 
         # signals
         self.signals = []
-        self.loss = Signal('Loss')
+        self.loss = utils.Signal('Loss')
         self.signals.append(self.loss)
-        self.curr_learning_rate = Signal('Learning Rate')
+        self.curr_learning_rate = utils.Signal('Learning Rate')
         self.signals.append(self.curr_learning_rate)
 
         if self.tp.env.normalize_observation and not self.env.is_state_type_image:
             if not self.tp.distributed or not self.tp.agent.share_statistics_between_workers:
-                self.running_observation_stats = RunningStat((self.tp.env.desired_observation_width,))
-                self.running_reward_stats = RunningStat(())
+                self.running_observation_stats = utils.RunningStat((self.tp.env.desired_observation_width,))
+                self.running_reward_stats = utils.RunningStat(())
             else:
-                self.running_observation_stats = SharedRunningStats(self.tp, replicated_device,
-                                                                    shape=(self.tp.env.desired_observation_width,),
-                                                                    name='observation_stats')
-                self.running_reward_stats = SharedRunningStats(self.tp, replicated_device,
-                                                               shape=(),
-                                                               name='reward_stats')
+                self.running_observation_stats = sv.SharedRunningStats(self.tp, replicated_device,
+                                                                       shape=(self.tp.env.desired_observation_width,),
+                                                                       name='observation_stats')
+                self.running_reward_stats = sv.SharedRunningStats(self.tp, replicated_device,
+                                                                  shape=(),
+                                                                  name='reward_stats')
 
         # env is already reset at this point. Otherwise we're getting an error where you cannot
         # reset an env which is not done
@@ -137,13 +132,13 @@ class Agent(object):
     def log_to_screen(self, phase):
         # log to screen
         if self.current_episode >= 0:
-            if phase == RunPhase.TRAIN:
+            if phase == utils.RunPhase.TRAIN:
                 exploration = self.exploration_policy.get_control_param()
             else:
                 exploration = self.evaluation_exploration_policy.get_control_param()
 
-            screen.log_dict(
-                OrderedDict([
+            logger.screen.log_dict(
+                collections.OrderedDict([
                     ("Worker", self.task_id),
                     ("Episode", self.current_episode),
                     ("total reward", self.total_reward_in_current_episode),
@@ -154,37 +149,37 @@ class Agent(object):
                 prefix=phase
             )
 
-    def update_log(self, phase=RunPhase.TRAIN):
+    def update_log(self, phase=utils.RunPhase.TRAIN):
         """
         Writes logging messages to screen and updates the log file with all the signal values.
         :return: None
         """
         # log all the signals to file
-        logger.set_current_time(self.current_episode)
-        logger.create_signal_value('Training Iter', self.training_iteration)
-        logger.create_signal_value('In Heatup', int(phase == RunPhase.HEATUP))
-        logger.create_signal_value('ER #Transitions', self.memory.num_transitions())
-        logger.create_signal_value('ER #Episodes', self.memory.length())
-        logger.create_signal_value('Episode Length', self.current_episode_steps_counter)
-        logger.create_signal_value('Total steps', self.total_steps_counter)
-        logger.create_signal_value("Epsilon", self.exploration_policy.get_control_param())
-        logger.create_signal_value("Training Reward", self.total_reward_in_current_episode
-                                   if phase == RunPhase.TRAIN else np.nan)
-        logger.create_signal_value('Evaluation Reward', self.total_reward_in_current_episode
-                                   if phase == RunPhase.TEST else np.nan)
-        logger.create_signal_value('Update Target Network', 0, overwrite=False)
-        logger.update_wall_clock_time(self.current_episode)
+        logger.logger.set_current_time(self.current_episode)
+        logger.logger.create_signal_value('Training Iter', self.training_iteration)
+        logger.logger.create_signal_value('In Heatup', int(phase == utils.RunPhase.HEATUP))
+        logger.logger.create_signal_value('ER #Transitions', self.memory.num_transitions())
+        logger.logger.create_signal_value('ER #Episodes', self.memory.length())
+        logger.logger.create_signal_value('Episode Length', self.current_episode_steps_counter)
+        logger.logger.create_signal_value('Total steps', self.total_steps_counter)
+        logger.logger.create_signal_value("Epsilon", self.exploration_policy.get_control_param())
+        logger.logger.create_signal_value("Training Reward", self.total_reward_in_current_episode
+                                          if phase == utils.RunPhase.TRAIN else np.nan)
+        logger.logger.create_signal_value('Evaluation Reward', self.total_reward_in_current_episode
+                                          if phase == utils.RunPhase.TEST else np.nan)
+        logger.logger.create_signal_value('Update Target Network', 0, overwrite=False)
+        logger.logger.update_wall_clock_time(self.current_episode)
 
         for signal in self.signals:
-            logger.create_signal_value("{}/Mean".format(signal.name), signal.get_mean())
-            logger.create_signal_value("{}/Stdev".format(signal.name), signal.get_stdev())
-            logger.create_signal_value("{}/Max".format(signal.name), signal.get_max())
-            logger.create_signal_value("{}/Min".format(signal.name), signal.get_min())
+            logger.logger.create_signal_value("{}/Mean".format(signal.name), signal.get_mean())
+            logger.logger.create_signal_value("{}/Stdev".format(signal.name), signal.get_stdev())
+            logger.logger.create_signal_value("{}/Max".format(signal.name), signal.get_max())
+            logger.logger.create_signal_value("{}/Min".format(signal.name), signal.get_min())
 
         # dump
         if self.current_episode % self.tp.visualization.dump_signals_to_csv_every_x_episodes == 0 \
                 and self.current_episode > 0:
-            logger.dump_output_csv()
+            logger.logger.dump_output_csv()
 
     def reset_game(self, do_not_reset_env=False):
         """
@@ -211,7 +206,7 @@ class Agent(object):
                     self.episode_running_info[action] = []
             plt.clf()
 
-        if self.tp.agent.middleware_type == MiddlewareTypes.LSTM:
+        if self.tp.agent.middleware_type == configurations.MiddlewareTypes.LSTM:
             for network in self.networks:
                 network.online_network.curr_rnn_c_in = network.online_network.middleware_embedder.c_init
                 network.online_network.curr_rnn_h_in = network.online_network.middleware_embedder.h_init
@@ -281,9 +276,9 @@ class Agent(object):
         if self.total_steps_counter % self.tp.agent.num_steps_between_copying_online_weights_to_target == 0:
             for network in self.networks:
                 network.update_target_network(self.tp.agent.rate_for_copying_weights_to_target)
-            logger.create_signal_value('Update Target Network', 1)
+            logger.logger.create_signal_value('Update Target Network', 1)
         else:
-            logger.create_signal_value('Update Target Network', 0, overwrite=False)
+            logger.logger.create_signal_value('Update Target Network', 0, overwrite=False)
 
         return loss
 
@@ -321,7 +316,7 @@ class Agent(object):
         plt.legend()
         plt.pause(0.00000001)
 
-    def choose_action(self, curr_state, phase=RunPhase.TRAIN):
+    def choose_action(self, curr_state, phase=utils.RunPhase.TRAIN):
         """
         choose an action to act with in the current episode being played. Different behavior might be exhibited when training
          or testing.
@@ -351,15 +346,15 @@ class Agent(object):
         for input_name in self.tp.agent.input_types.keys():
             input_state[input_name] = np.expand_dims(np.array(curr_state[input_name]), 0)
         return input_state
-        
+
     def prepare_initial_state(self):
         """
         Create an initial state when starting a new episode
         :return: None
         """
         observation = self.preprocess_observation(self.env.state['observation'])
-        self.curr_stack = deque([observation]*self.tp.env.observation_stack_size, maxlen=self.tp.env.observation_stack_size)
-        observation = LazyStack(self.curr_stack, -1)
+        self.curr_stack = collections.deque([observation]*self.tp.env.observation_stack_size, maxlen=self.tp.env.observation_stack_size)
+        observation = utils.LazyStack(self.curr_stack, -1)
 
         self.curr_state = {
             'observation': observation
@@ -369,21 +364,21 @@ class Agent(object):
             if self.tp.agent.use_accumulated_reward_as_measurement:
                 self.curr_state['measurements'] = np.append(self.curr_state['measurements'], 0)
 
-    def act(self, phase=RunPhase.TRAIN):
+    def act(self, phase=utils.RunPhase.TRAIN):
         """
         Take one step in the environment according to the network prediction and store the transition in memory
         :param phase: Either Train or Test to specify if greedy actions should be used and if transitions should be stored
         :return: A boolean value that signals an episode termination
         """
 
-        if phase != RunPhase.TEST:
+        if phase != utils.RunPhase.TEST:
             self.total_steps_counter += 1
         self.current_episode_steps_counter += 1
 
         # get new action
         action_info = {"action_probability": 1.0 / self.env.action_space_size, "action_value": 0, "max_action_value": 0}
 
-        if phase == RunPhase.HEATUP and not self.tp.heatup_using_network_decisions:
+        if phase == utils.RunPhase.HEATUP and not self.tp.heatup_using_network_decisions:
             action = self.env.get_random_action()
         else:
             action, action_info = self.choose_action(self.curr_state, phase=phase)
@@ -402,13 +397,13 @@ class Agent(object):
         next_state['observation'] = self.preprocess_observation(next_state['observation'])
 
         # plot action values online
-        if self.tp.visualization.plot_action_values_online and phase != RunPhase.HEATUP:
+        if self.tp.visualization.plot_action_values_online and phase != utils.RunPhase.HEATUP:
             self.plot_action_values_online()
 
         # initialize the next state
         # TODO: provide option to stack more than just the observation
         self.curr_stack.append(next_state['observation'])
-        observation = LazyStack(self.curr_stack, -1)
+        observation = utils.LazyStack(self.curr_stack, -1)
 
         next_state['observation'] = observation
         if self.tp.agent.use_measurements and 'measurements' in result.keys():
@@ -417,14 +412,14 @@ class Agent(object):
                 next_state['measurements'] = np.append(next_state['measurements'], self.total_reward_in_current_episode)
 
         # store the transition only if we are training
-        if phase == RunPhase.TRAIN or phase == RunPhase.HEATUP:
-            transition = Transition(self.curr_state, result['action'], shaped_reward, next_state, result['done'])
+        if phase == utils.RunPhase.TRAIN or phase == utils.RunPhase.HEATUP:
+            transition = memory.Transition(self.curr_state, result['action'], shaped_reward, next_state, result['done'])
             for key in action_info.keys():
                 transition.info[key] = action_info[key]
             if self.tp.agent.add_a_normalized_timestep_to_the_observation:
                 transition.info['timestep'] = float(self.current_episode_steps_counter) / self.env.timestep_limit
             self.memory.store(transition)
-        elif phase == RunPhase.TEST and self.tp.visualization.dump_gifs:
+        elif phase == utils.RunPhase.TEST and self.tp.visualization.dump_gifs:
             # we store the transitions only for saving gifs
             self.last_episode_images.append(self.env.get_rendered_image())
 
@@ -437,7 +432,7 @@ class Agent(object):
                 self.update_log(phase=phase)
             self.log_to_screen(phase=phase)
 
-            if phase == RunPhase.TRAIN or phase == RunPhase.HEATUP:
+            if phase == utils.RunPhase.TRAIN or phase == utils.RunPhase.HEATUP:
                 self.reset_game()
 
             self.current_episode += 1
@@ -456,8 +451,8 @@ class Agent(object):
 
         max_reward_achieved = -float('inf')
         average_evaluation_reward = 0
-        screen.log_title("Running evaluation")
-        self.env.change_phase(RunPhase.TEST)
+        logger.screen.log_title("Running evaluation")
+        self.env.change_phase(utils.RunPhase.TEST)
         for i in range(num_episodes):
             # keep the online network in sync with the global network
             if keep_networks_synced:
@@ -466,7 +461,7 @@ class Agent(object):
 
             episode_ended = False
             while not episode_ended:
-                episode_ended = self.act(phase=RunPhase.TEST)
+                episode_ended = self.act(phase=utils.RunPhase.TEST)
 
                 if keep_networks_synced \
                    and self.total_steps_counter % self.tp.agent.update_evaluation_agent_network_after_every_num_steps:
@@ -477,7 +472,7 @@ class Agent(object):
                 max_reward_achieved = self.total_reward_in_current_episode
                 frame_skipping = int(5/self.tp.env.frame_skip)
                 if self.tp.visualization.dump_gifs:
-                    logger.create_gif(self.last_episode_images[::frame_skipping],
+                    logger.logger.create_gif(self.last_episode_images[::frame_skipping],
                                       name='score-{}'.format(max_reward_achieved), fps=10)
 
             average_evaluation_reward += self.total_reward_in_current_episode
@@ -485,8 +480,8 @@ class Agent(object):
 
         average_evaluation_reward /= float(num_episodes)
 
-        self.env.change_phase(RunPhase.TRAIN)
-        screen.log_title("Evaluation done. Average reward = {}.".format(average_evaluation_reward))
+        self.env.change_phase(utils.RunPhase.TRAIN)
+        logger.screen.log_title("Evaluation done. Average reward = {}.".format(average_evaluation_reward))
 
     def post_training_commands(self):
         pass
@@ -505,15 +500,15 @@ class Agent(object):
         # heatup phase
         if self.tp.num_heatup_steps != 0:
             self.in_heatup = True
-            screen.log_title("Starting heatup {}".format(self.task_id))
+            logger.screen.log_title("Starting heatup {}".format(self.task_id))
             num_steps_required_for_one_training_batch = self.tp.batch_size * self.tp.env.observation_stack_size
             for step in range(max(self.tp.num_heatup_steps, num_steps_required_for_one_training_batch)):
-                self.act(phase=RunPhase.HEATUP)
+                self.act(phase=utils.RunPhase.HEATUP)
 
         # training phase
         self.in_heatup = False
-        screen.log_title("Starting training {}".format(self.task_id))
-        self.exploration_policy.change_phase(RunPhase.TRAIN)
+        logger.screen.log_title("Starting training {}".format(self.task_id))
+        self.exploration_policy.change_phase(utils.RunPhase.TRAIN)
         training_start_time = time.time()
         model_snapshots_periods_passed = -1
         self.reset_game()
@@ -557,7 +552,7 @@ class Agent(object):
                     self.loss.add_sample(loss)
                     self.training_iteration += 1
                     if self.imitation:
-                        self.log_to_screen(RunPhase.TRAIN)
+                        self.log_to_screen(utils.RunPhase.TRAIN)
                 self.post_training_commands()
 
     def save_model(self, model_id):
diff --git a/agents/bc_agent.py b/agents/bc_agent.py
index 70fe3e6..af01720 100644
--- a/agents/bc_agent.py
+++ b/agents/bc_agent.py
@@ -13,16 +13,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
 import numpy as np
 
-from agents.imitation_agent import ImitationAgent
+from agents import imitation_agent
 
 
 # Behavioral Cloning Agent
-class BCAgent(ImitationAgent):
+class BCAgent(imitation_agent.ImitationAgent):
     def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
-        ImitationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
+        imitation_agent.ImitationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
 
     def learn_from_batch(self, batch):
         current_states, _, actions, _, _, _ = self.extract_batch(batch)
diff --git a/agents/bootstrapped_dqn_agent.py b/agents/bootstrapped_dqn_agent.py
index 3476022..41aea9f 100644
--- a/agents/bootstrapped_dqn_agent.py
+++ b/agents/bootstrapped_dqn_agent.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,17 +13,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import numpy as np
 
-from agents.value_optimization_agent import *
-
+from agents import value_optimization_agent as voa
+import utils
 
 # Bootstrapped DQN - https://arxiv.org/pdf/1602.04621.pdf
-class BootstrappedDQNAgent(ValueOptimizationAgent):
+class BootstrappedDQNAgent(voa.ValueOptimizationAgent):
     def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
-        ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
+        voa.ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
 
     def reset_game(self, do_not_reset_env=False):
-        ValueOptimizationAgent.reset_game(self, do_not_reset_env)
+        voa.ValueOptimizationAgent.reset_game(self, do_not_reset_env)
         self.exploration_policy.select_head()
 
     def learn_from_batch(self, batch):
@@ -51,8 +52,8 @@ class BootstrappedDQNAgent(ValueOptimizationAgent):
 
         return total_loss
 
-    def act(self, phase=RunPhase.TRAIN):
-        ValueOptimizationAgent.act(self, phase)
+    def act(self, phase=utils.RunPhase.TRAIN):
+        voa.ValueOptimizationAgent.act(self, phase)
         mask = np.random.binomial(1, self.tp.exploration.bootstrapped_data_sharing_probability,
                                   self.tp.exploration.architecture_num_q_heads)
         self.memory.update_last_transition_info({'mask': mask})
diff --git a/agents/categorical_dqn_agent.py b/agents/categorical_dqn_agent.py
index dec8ba2..8e442fc 100644
--- a/agents/categorical_dqn_agent.py
+++ b/agents/categorical_dqn_agent.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,14 +13,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import numpy as np
 
-from agents.value_optimization_agent import *
+from agents import value_optimization_agent as voa
 
 
 # Categorical Deep Q Network - https://arxiv.org/pdf/1707.06887.pdf
-class CategoricalDQNAgent(ValueOptimizationAgent):
+class CategoricalDQNAgent(voa.ValueOptimizationAgent):
     def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
-        ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
+        voa.ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
         self.z_values = np.linspace(self.tp.agent.v_min, self.tp.agent.v_max, self.tp.agent.atoms)
 
     # prediction's format is (batch,actions,atoms)
@@ -57,4 +58,3 @@ class CategoricalDQNAgent(ValueOptimizationAgent):
         total_loss = result[0]
 
         return total_loss
-
diff --git a/agents/clipped_ppo_agent.py b/agents/clipped_ppo_agent.py
index 88a70b0..07343c8 100644
--- a/agents/clipped_ppo_agent.py
+++ b/agents/clipped_ppo_agent.py
@@ -13,27 +13,34 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
-from agents.actor_critic_agent import *
+import collections
+import copy
 from random import shuffle
 
+import numpy as np
+
+from agents import actor_critic_agent as aca
+from agents import policy_optimization_agent as poa
+import logger
+import utils
+
 
 # Clipped Proximal Policy Optimization - https://arxiv.org/abs/1707.06347
-class ClippedPPOAgent(ActorCriticAgent):
+class ClippedPPOAgent(aca.ActorCriticAgent):
     def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
-        ActorCriticAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id,
-                                  create_target_network=True)
+        aca.ActorCriticAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id,
+                                      create_target_network=True)
         # signals definition
-        self.value_loss = Signal('Value Loss')
+        self.value_loss = utils.Signal('Value Loss')
         self.signals.append(self.value_loss)
-        self.policy_loss = Signal('Policy Loss')
+        self.policy_loss = utils.Signal('Policy Loss')
         self.signals.append(self.policy_loss)
         self.total_kl_divergence_during_training_process = 0.0
-        self.unclipped_grads = Signal('Grads (unclipped)')
+        self.unclipped_grads = utils.Signal('Grads (unclipped)')
         self.signals.append(self.unclipped_grads)
-        self.value_targets = Signal('Value Targets')
+        self.value_targets = utils.Signal('Value Targets')
         self.signals.append(self.value_targets)
-        self.kl_divergence = Signal('KL Divergence')
+        self.kl_divergence = utils.Signal('KL Divergence')
         self.signals.append(self.kl_divergence)
 
     def fill_advantages(self, batch):
@@ -46,9 +53,9 @@ class ClippedPPOAgent(ActorCriticAgent):
         # calculate advantages
         advantages = []
         value_targets = []
-        if self.policy_gradient_rescaler == PolicyGradientRescaler.A_VALUE:
+        if self.policy_gradient_rescaler == poa.PolicyGradientRescaler.A_VALUE:
             advantages = total_return - current_state_values
-        elif self.policy_gradient_rescaler == PolicyGradientRescaler.GAE:
+        elif self.policy_gradient_rescaler == poa.PolicyGradientRescaler.GAE:
             # get bootstraps
             episode_start_idx = 0
             advantages = np.array([])
@@ -66,7 +73,7 @@ class ClippedPPOAgent(ActorCriticAgent):
                     advantages = np.append(advantages, rollout_advantages)
                     value_targets = np.append(value_targets, gae_based_value_targets)
         else:
-            screen.warning("WARNING: The requested policy gradient rescaler is not available")
+            logger.screen.warning("WARNING: The requested policy gradient rescaler is not available")
 
         # standardize
         advantages = (advantages - np.mean(advantages)) / np.std(advantages)
@@ -144,8 +151,8 @@ class ClippedPPOAgent(ActorCriticAgent):
                 curr_learning_rate = self.tp.learning_rate
 
             # log training parameters
-            screen.log_dict(
-                OrderedDict([
+            logger.screen.log_dict(
+                collections.OrderedDict([
                     ("Surrogate loss", loss['policy_losses'][0]),
                     ("KL divergence", loss['fetch_result'][0]),
                     ("Entropy", loss['fetch_result'][1]),
@@ -184,13 +191,13 @@ class ClippedPPOAgent(ActorCriticAgent):
         self.update_log()  # should be done in order to update the data that has been accumulated * while not playing *
         return np.append(losses[0], losses[1])
 
-    def choose_action(self, current_state, phase=RunPhase.TRAIN):
+    def choose_action(self, current_state, phase=utils.RunPhase.TRAIN):
         if self.env.discrete_controls:
             # DISCRETE
             _, action_values = self.main_network.online_network.predict(self.tf_input_state(current_state))
             action_values = action_values.squeeze()
 
-            if phase == RunPhase.TRAIN:
+            if phase == utils.RunPhase.TRAIN:
                 action = self.exploration_policy.get_action(action_values)
             else:
                 action = np.argmax(action_values)
@@ -201,7 +208,7 @@ class ClippedPPOAgent(ActorCriticAgent):
             _, action_values_mean, action_values_std = self.main_network.online_network.predict(self.tf_input_state(current_state))
             action_values_mean = action_values_mean.squeeze()
             action_values_std = action_values_std.squeeze()
-            if phase == RunPhase.TRAIN:
+            if phase == utils.RunPhase.TRAIN:
                 action = np.squeeze(np.random.randn(1, self.action_space_size) * action_values_std + action_values_mean)
                 # if self.current_episode % 5 == 0 and self.current_episode_steps_counter < 5:
                 #     print action
diff --git a/agents/ddpg_agent.py b/agents/ddpg_agent.py
index 425f1de..df05395 100644
--- a/agents/ddpg_agent.py
+++ b/agents/ddpg_agent.py
@@ -13,28 +13,34 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import copy
 
-from agents.actor_critic_agent import *
-from configurations import *
+import numpy as np
+
+from agents import actor_critic_agent as aca
+from agents import agent
+from architectures import network_wrapper as nw
+import configurations as conf
+import utils
 
 
 # Deep Deterministic Policy Gradients Network - https://arxiv.org/pdf/1509.02971.pdf
-class DDPGAgent(ActorCriticAgent):
+class DDPGAgent(aca.ActorCriticAgent):
     def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
-        ActorCriticAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id,
-                                  create_target_network=True)
+        aca.ActorCriticAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id,
+                                      create_target_network=True)
         # define critic network
         self.critic_network = self.main_network
         # self.networks.append(self.critic_network)
 
         # define actor network
-        tuning_parameters.agent.input_types = {'observation': InputTypes.Observation}
-        tuning_parameters.agent.output_types = [OutputTypes.Pi]
-        self.actor_network = NetworkWrapper(tuning_parameters, True, self.has_global, 'actor',
-                                            self.replicated_device, self.worker_device)
+        tuning_parameters.agent.input_types = {'observation': conf.InputTypes.Observation}
+        tuning_parameters.agent.output_types = [conf.OutputTypes.Pi]
+        self.actor_network = nw.NetworkWrapper(tuning_parameters, True, self.has_global, 'actor',
+                                               self.replicated_device, self.worker_device)
         self.networks.append(self.actor_network)
 
-        self.q_values = Signal("Q")
+        self.q_values = utils.Signal("Q")
         self.signals.append(self.q_values)
 
         self.reset_game(do_not_reset_env=True)
@@ -82,14 +88,14 @@ class DDPGAgent(ActorCriticAgent):
         return total_loss
 
     def train(self):
-        return Agent.train(self)
+        return agent.Agent.train(self)
 
-    def choose_action(self, curr_state, phase=RunPhase.TRAIN):
+    def choose_action(self, curr_state, phase=utils.RunPhase.TRAIN):
         assert not self.env.discrete_controls, 'DDPG works only for continuous control problems'
         result = self.actor_network.online_network.predict(self.tf_input_state(curr_state))
         action_values = result[0].squeeze()
 
-        if phase == RunPhase.TRAIN:
+        if phase == utils.RunPhase.TRAIN:
             action = self.exploration_policy.get_action(action_values)
         else:
             action = action_values
diff --git a/agents/ddqn_agent.py b/agents/ddqn_agent.py
index 838ae3f..9f19e8a 100644
--- a/agents/ddqn_agent.py
+++ b/agents/ddqn_agent.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,14 +13,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import numpy as np
 
-from agents.value_optimization_agent import *
+from agents import value_optimization_agent as voa
 
 
 # Double DQN - https://arxiv.org/abs/1509.06461
-class DDQNAgent(ValueOptimizationAgent):
+class DDQNAgent(voa.ValueOptimizationAgent):
     def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
-        ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
+        voa.ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
 
     def learn_from_batch(self, batch):
         current_states, next_states, actions, rewards, game_overs, _ = self.extract_batch(batch)
diff --git a/agents/dfp_agent.py b/agents/dfp_agent.py
index c055d2c..f778fff 100644
--- a/agents/dfp_agent.py
+++ b/agents/dfp_agent.py
@@ -13,17 +13,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import numpy as np
 
-from agents.agent import *
+from agents import agent
+from architectures import network_wrapper as nw
+import utils
 
 
 # Direct Future Prediction Agent - http://vladlen.info/papers/learning-to-act.pdf
-class DFPAgent(Agent):
+class DFPAgent(agent.Agent):
     def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
-        Agent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
+        agent.Agent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
         self.current_goal = self.tp.agent.goal_vector
-        self.main_network = NetworkWrapper(tuning_parameters, False, self.has_global, 'main',
-                                           self.replicated_device, self.worker_device)
+        self.main_network = nw.NetworkWrapper(tuning_parameters, False, self.has_global, 'main',
+                                              self.replicated_device, self.worker_device)
         self.networks.append(self.main_network)
 
     def learn_from_batch(self, batch):
@@ -45,7 +48,7 @@ class DFPAgent(Agent):
 
         return total_loss
 
-    def choose_action(self, curr_state, phase=RunPhase.TRAIN):
+    def choose_action(self, curr_state, phase=utils.RunPhase.TRAIN):
         # convert to batch so we can run it through the network
         observation = np.expand_dims(np.array(curr_state['observation']), 0)
         measurements = np.expand_dims(np.array(curr_state['measurements']), 0)
@@ -66,7 +69,7 @@ class DFPAgent(Agent):
                                                self.tp.agent.future_measurements_weights)
 
         # choose action according to the exploration policy and the current phase (evaluating or training the agent)
-        if phase == RunPhase.TRAIN:
+        if phase == utils.RunPhase.TRAIN:
             action = self.exploration_policy.get_action(action_values)
         else:
             action = np.argmax(action_values)
diff --git a/agents/distributional_dqn_agent.py b/agents/distributional_dqn_agent.py
index d7c0088..33e7a5e 100644
--- a/agents/distributional_dqn_agent.py
+++ b/agents/distributional_dqn_agent.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,14 +13,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import numpy as np
 
-from agents.value_optimization_agent import *
+from agents import value_optimization_agent as voa
 
 
 # Distributional Deep Q Network - https://arxiv.org/pdf/1707.06887.pdf
-class DistributionalDQNAgent(ValueOptimizationAgent):
+class DistributionalDQNAgent(voa.ValueOptimizationAgent):
     def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
-        ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
+        voa.ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
         self.z_values = np.linspace(self.tp.agent.v_min, self.tp.agent.v_max, self.tp.agent.atoms)
 
     # prediction's format is (batch,actions,atoms)
@@ -57,4 +58,3 @@ class DistributionalDQNAgent(ValueOptimizationAgent):
         total_loss = result[0]
 
         return total_loss
-
diff --git a/agents/dqn_agent.py b/agents/dqn_agent.py
index 70c0c7d..8660def 100644
--- a/agents/dqn_agent.py
+++ b/agents/dqn_agent.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,14 +13,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import numpy as np
 
-from agents.value_optimization_agent import *
+from agents import value_optimization_agent as voa
 
 
 # Deep Q Network - https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf
-class DQNAgent(ValueOptimizationAgent):
+class DQNAgent(voa.ValueOptimizationAgent):
     def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
-        ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
+        voa.ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
 
     def learn_from_batch(self, batch):
         current_states, next_states, actions, rewards, game_overs, _ = self.extract_batch(batch)
diff --git a/agents/human_agent.py b/agents/human_agent.py
index c75c2a2..7f8e491 100644
--- a/agents/human_agent.py
+++ b/agents/human_agent.py
@@ -13,31 +13,37 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import collections
+import os
 
-from agents.agent import *
 import pygame
+from pandas.io import pickle
+
+from agents import agent
+import logger
+import utils
 
 
-class HumanAgent(Agent):
+class HumanAgent(agent.Agent):
     def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
-        Agent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
+        agent.Agent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
 
         self.clock = pygame.time.Clock()
         self.max_fps = int(self.tp.visualization.max_fps_for_human_control)
 
-        screen.log_title("Human Control Mode")
+        utils.screen.log_title("Human Control Mode")
         available_keys = self.env.get_available_keys()
         if available_keys:
-            screen.log("Use keyboard keys to move. Press escape to quit. Available keys:")
-            screen.log("")
+            utils.screen.log("Use keyboard keys to move. Press escape to quit. Available keys:")
+            utils.screen.log("")
             for action, key in self.env.get_available_keys():
-                screen.log("\t- {}: {}".format(action, key))
-            screen.separator()
+                utils.screen.log("\t- {}: {}".format(action, key))
+            utils.screen.separator()
 
     def train(self):
         return 0
 
-    def choose_action(self, curr_state, phase=RunPhase.TRAIN):
+    def choose_action(self, curr_state, phase=utils.RunPhase.TRAIN):
         action = self.env.get_action_from_user()
 
         # keep constant fps
@@ -49,16 +55,16 @@ class HumanAgent(Agent):
         return action, {"action_value": 0}
 
     def save_replay_buffer_and_exit(self):
-        replay_buffer_path = os.path.join(logger.experiments_path, 'replay_buffer.p')
+        replay_buffer_path = os.path.join(logger.logger.experiments_path, 'replay_buffer.p')
         self.memory.tp = None
-        to_pickle(self.memory, replay_buffer_path)
-        screen.log_title("Replay buffer was stored in {}".format(replay_buffer_path))
+        pickle.to_pickle(self.memory, replay_buffer_path)
+        utils.screen.log_title("Replay buffer was stored in {}".format(replay_buffer_path))
         exit()
 
     def log_to_screen(self, phase):
-        # log to screen
-        screen.log_dict(
-            OrderedDict([
+        # log to utils.screen
+        utils.screen.log_dict(
+            collections.OrderedDict([
                 ("Episode", self.current_episode),
                 ("total reward", self.total_reward_in_current_episode),
                 ("steps", self.total_steps_counter)
diff --git a/agents/imitation_agent.py b/agents/imitation_agent.py
index f893fbe..522c569 100644
--- a/agents/imitation_agent.py
+++ b/agents/imitation_agent.py
@@ -13,23 +13,27 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import collections
 
-from agents.agent import *
+from agents import agent
+from architectures import network_wrapper as nw
+import utils
+import logging
 
 
 # Imitation Agent
-class ImitationAgent(Agent):
+class ImitationAgent(agent.Agent):
     def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
-        Agent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
-        self.main_network = NetworkWrapper(tuning_parameters, False, self.has_global, 'main',
-                                           self.replicated_device, self.worker_device)
+        agent.Agent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
+        self.main_network = nw.NetworkWrapper(tuning_parameters, False, self.has_global, 'main',
+                                              self.replicated_device, self.worker_device)
         self.networks.append(self.main_network)
         self.imitation = True
 
     def extract_action_values(self, prediction):
         return prediction.squeeze()
 
-    def choose_action(self, curr_state, phase=RunPhase.TRAIN):
+    def choose_action(self, curr_state, phase=utils.RunPhase.TRAIN):
         # convert to batch so we can run it through the network
         prediction = self.main_network.online_network.predict(self.tf_input_state(curr_state))
 
@@ -49,10 +53,10 @@ class ImitationAgent(Agent):
 
     def log_to_screen(self, phase):
         # log to screen
-        if phase == RunPhase.TRAIN:
+        if phase == utils.RunPhase.TRAIN:
             # for the training phase - we log during the episode to visualize the progress in training
-            screen.log_dict(
-                OrderedDict([
+            logging.screen.log_dict(
+                collections.OrderedDict([
                     ("Worker", self.task_id),
                     ("Episode", self.current_episode),
                     ("Loss", self.loss.values[-1]),
@@ -62,4 +66,4 @@ class ImitationAgent(Agent):
             )
         else:
             # for the evaluation phase - logging as in regular RL
-            Agent.log_to_screen(self, phase)
+            agent.Agent.log_to_screen(self, phase)
diff --git a/agents/mmc_agent.py b/agents/mmc_agent.py
index 2b5a2cb..4473b06 100644
--- a/agents/mmc_agent.py
+++ b/agents/mmc_agent.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,13 +13,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import numpy as np
 
-from agents.value_optimization_agent import *
+from agents import value_optimization_agent as voa
 
 
-class MixedMonteCarloAgent(ValueOptimizationAgent):
+class MixedMonteCarloAgent(voa.ValueOptimizationAgent):
     def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
-        ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
+        voa.ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
         self.mixing_rate = tuning_parameters.agent.monte_carlo_mixing_rate
 
     def learn_from_batch(self, batch):
diff --git a/agents/n_step_q_agent.py b/agents/n_step_q_agent.py
index 5a74fb5..a5b773c 100644
--- a/agents/n_step_q_agent.py
+++ b/agents/n_step_q_agent.py
@@ -14,22 +14,21 @@
 # limitations under the License.
 #
 import numpy as np
-import scipy.signal
 
-from agents.value_optimization_agent import ValueOptimizationAgent
-from agents.policy_optimization_agent import PolicyOptimizationAgent
-from logger import logger
-from utils import Signal, last_sample
+from agents import value_optimization_agent as voa
+from agents import policy_optimization_agent as poa
+import logger
+import utils
 
 
 # N Step Q Learning Agent - https://arxiv.org/abs/1602.01783
-class NStepQAgent(ValueOptimizationAgent, PolicyOptimizationAgent):
+class NStepQAgent(voa.ValueOptimizationAgent, poa.PolicyOptimizationAgent):
     def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
-        ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id, create_target_network=True)
+        voa.ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id, create_target_network=True)
         self.last_gradient_update_step_idx = 0
-        self.q_values = Signal('Q Values')
-        self.unclipped_grads = Signal('Grads (unclipped)')
-        self.value_loss = Signal('Value Loss')
+        self.q_values = utils.Signal('Q Values')
+        self.unclipped_grads = utils.Signal('Grads (unclipped)')
+        self.value_loss = utils.Signal('Value Loss')
         self.signals.append(self.q_values)
         self.signals.append(self.unclipped_grads)
         self.signals.append(self.value_loss)
@@ -57,7 +56,7 @@ class NStepQAgent(ValueOptimizationAgent, PolicyOptimizationAgent):
             if game_overs[-1]:
                 R = 0
             else:
-                R = np.max(self.main_network.target_network.predict(last_sample(next_states)))
+                R = np.max(self.main_network.target_network.predict(utils.last_sample(next_states)))
 
             for i in reversed(range(num_transitions)):
                 R = rewards[i] + self.tp.agent.discount * R
@@ -85,4 +84,4 @@ class NStepQAgent(ValueOptimizationAgent, PolicyOptimizationAgent):
         else:
             logger.create_signal_value('Update Target Network', 0, overwrite=False)
 
-        return PolicyOptimizationAgent.train(self)
+        return poa.PolicyOptimizationAgent.train(self)
diff --git a/agents/naf_agent.py b/agents/naf_agent.py
index 65ca83c..35072f7 100644
--- a/agents/naf_agent.py
+++ b/agents/naf_agent.py
@@ -13,21 +13,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
 import numpy as np
 
 from agents.value_optimization_agent import ValueOptimizationAgent
-from utils import RunPhase, Signal
+import utils
 
 
 # Normalized Advantage Functions - https://arxiv.org/pdf/1603.00748.pdf
 class NAFAgent(ValueOptimizationAgent):
     def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
         ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
-        self.l_values = Signal("L")
-        self.a_values = Signal("Advantage")
-        self.mu_values = Signal("Action")
-        self.v_values = Signal("V")
+        self.l_values = utils.Signal("L")
+        self.a_values = utils.Signal("Advantage")
+        self.mu_values = utils.Signal("Action")
+        self.v_values = utils.Signal("V")
         self.signals += [self.l_values, self.a_values, self.mu_values, self.v_values]
 
     def learn_from_batch(self, batch):
@@ -49,7 +48,7 @@ class NAFAgent(ValueOptimizationAgent):
 
         return total_loss
 
-    def choose_action(self, curr_state, phase=RunPhase.TRAIN):
+    def choose_action(self, curr_state, phase=utils.RunPhase.TRAIN):
         assert not self.env.discrete_controls, 'NAF works only for continuous control problems'
 
         # convert to batch so we can run it through the network
@@ -60,7 +59,7 @@ class NAFAgent(ValueOptimizationAgent):
             outputs=naf_head.mu,
             squeeze_output=False,
         )
-        if phase == RunPhase.TRAIN:
+        if phase == utils.RunPhase.TRAIN:
             action = self.exploration_policy.get_action(action_values)
         else:
             action = action_values
diff --git a/agents/nec_agent.py b/agents/nec_agent.py
index 77520a4..47aa33f 100644
--- a/agents/nec_agent.py
+++ b/agents/nec_agent.py
@@ -13,19 +13,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
-import numpy as np
-
-from agents.value_optimization_agent import ValueOptimizationAgent
+from agents import value_optimization_agent as voa
 from logger import screen
-from utils import RunPhase
+import utils
 
 
 # Neural Episodic Control - https://arxiv.org/pdf/1703.01988.pdf
-class NECAgent(ValueOptimizationAgent):
+class NECAgent(voa.ValueOptimizationAgent):
     def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
-        ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id,
-                                        create_target_network=False)
+        voa.ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id,
+                                            create_target_network=False)
         self.current_episode_state_embeddings = []
         self.training_started = False
 
@@ -52,7 +49,7 @@ class NECAgent(ValueOptimizationAgent):
 
         return total_loss
 
-    def act(self, phase=RunPhase.TRAIN):
+    def act(self, phase=utils.RunPhase.TRAIN):
         if self.in_heatup:
             # get embedding in heatup (otherwise we get it through choose_action)
             embedding = self.main_network.online_network.predict(
diff --git a/agents/pal_agent.py b/agents/pal_agent.py
index 68ff675..9a11e00 100644
--- a/agents/pal_agent.py
+++ b/agents/pal_agent.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,14 +13,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import numpy as np
 
-from agents.value_optimization_agent import *
+from agents import value_optimization_agent as voa
 
 
 # Persistent Advantage Learning - https://arxiv.org/pdf/1512.04860.pdf
-class PALAgent(ValueOptimizationAgent):
+class PALAgent(voa.ValueOptimizationAgent):
     def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
-        ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
+        voa.ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
         self.alpha = tuning_parameters.agent.pal_alpha
         self.persistent = tuning_parameters.agent.persistent_advantage_learning
         self.monte_carlo_mixing_rate = tuning_parameters.agent.monte_carlo_mixing_rate
diff --git a/agents/policy_gradients_agent.py b/agents/policy_gradients_agent.py
index 3a592d1..037dc88 100644
--- a/agents/policy_gradients_agent.py
+++ b/agents/policy_gradients_agent.py
@@ -13,25 +13,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
-from agents.policy_optimization_agent import *
 import numpy as np
-from logger import *
-import tensorflow as tf
-try:
-    import matplotlib.pyplot as plt
-except:
-    from logger import failed_imports
-    failed_imports.append("matplotlib")
 
-from utils import *
+from agents import policy_optimization_agent as poa
+import logger
+import utils
 
 
-class PolicyGradientsAgent(PolicyOptimizationAgent):
+class PolicyGradientsAgent(poa.PolicyOptimizationAgent):
     def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
-        PolicyOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
-        self.returns_mean = Signal('Returns Mean')
-        self.returns_variance = Signal('Returns Variance')
+        poa.PolicyOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
+        self.returns_mean = utils.Signal('Returns Mean')
+        self.returns_variance = utils.Signal('Returns Variance')
         self.signals.append(self.returns_mean)
         self.signals.append(self.returns_variance)
         self.last_gradient_update_step_idx = 0
@@ -41,21 +34,21 @@ class PolicyGradientsAgent(PolicyOptimizationAgent):
         current_states, next_states, actions, rewards, game_overs, total_returns = self.extract_batch(batch)
 
         for i in reversed(range(len(total_returns))):
-            if self.policy_gradient_rescaler == PolicyGradientRescaler.TOTAL_RETURN:
+            if self.policy_gradient_rescaler == poa.PolicyGradientRescaler.TOTAL_RETURN:
                 total_returns[i] = total_returns[0]
-            elif self.policy_gradient_rescaler == PolicyGradientRescaler.FUTURE_RETURN:
+            elif self.policy_gradient_rescaler == poa.PolicyGradientRescaler.FUTURE_RETURN:
                 # just take the total return as it is
                 pass
-            elif self.policy_gradient_rescaler == PolicyGradientRescaler.FUTURE_RETURN_NORMALIZED_BY_EPISODE:
+            elif self.policy_gradient_rescaler == poa.PolicyGradientRescaler.FUTURE_RETURN_NORMALIZED_BY_EPISODE:
                 # we can get a single transition episode while playing Doom Basic, causing the std to be 0
                 if self.std_discounted_return != 0:
                     total_returns[i] = (total_returns[i] - self.mean_discounted_return) / self.std_discounted_return
                 else:
                     total_returns[i] = 0
-            elif self.policy_gradient_rescaler == PolicyGradientRescaler.FUTURE_RETURN_NORMALIZED_BY_TIMESTEP:
+            elif self.policy_gradient_rescaler == poa.PolicyGradientRescaler.FUTURE_RETURN_NORMALIZED_BY_TIMESTEP:
                 total_returns[i] -= self.mean_return_over_multiple_episodes[i]
             else:
-                screen.warning("WARNING: The requested policy gradient rescaler is not available")
+                logger.screen.warning("WARNING: The requested policy gradient rescaler is not available")
 
         targets = total_returns
         if not self.env.discrete_controls and len(actions.shape) < 2:
@@ -69,12 +62,12 @@ class PolicyGradientsAgent(PolicyOptimizationAgent):
 
         return total_loss
 
-    def choose_action(self, curr_state, phase=RunPhase.TRAIN):
+    def choose_action(self, curr_state, phase=utils.RunPhase.TRAIN):
         # convert to batch so we can run it through the network
         if self.env.discrete_controls:
             # DISCRETE
             action_values = self.main_network.online_network.predict(self.tf_input_state(curr_state)).squeeze()
-            if phase == RunPhase.TRAIN:
+            if phase == utils.RunPhase.TRAIN:
                 action = self.exploration_policy.get_action(action_values)
             else:
                 action = np.argmax(action_values)
@@ -84,7 +77,7 @@ class PolicyGradientsAgent(PolicyOptimizationAgent):
             # CONTINUOUS
             result = self.main_network.online_network.predict(self.tf_input_state(curr_state))
             action_values = result[0].squeeze()
-            if phase == RunPhase.TRAIN:
+            if phase == utils.RunPhase.TRAIN:
                 action = self.exploration_policy.get_action(action_values)
             else:
                 action = action_values
diff --git a/agents/policy_optimization_agent.py b/agents/policy_optimization_agent.py
index be23760..175ca7f 100644
--- a/agents/policy_optimization_agent.py
+++ b/agents/policy_optimization_agent.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,12 +13,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import collections
 
-from agents.agent import *
-from memories.memory import Episode
+import numpy as np
+
+from agents import agent
+from architectures import network_wrapper as nw
+import logger
+import utils
 
 
-class PolicyGradientRescaler(Enum):
+class PolicyGradientRescaler(utils.Enum):
     TOTAL_RETURN = 0
     FUTURE_RETURN = 1
     FUTURE_RETURN_NORMALIZED_BY_EPISODE = 2
@@ -30,11 +35,11 @@ class PolicyGradientRescaler(Enum):
     GAE = 8
 
 
-class PolicyOptimizationAgent(Agent):
+class PolicyOptimizationAgent(agent.Agent):
     def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0, create_target_network=False):
-        Agent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
-        self.main_network = NetworkWrapper(tuning_parameters, create_target_network, self.has_global, 'main',
-                                           self.replicated_device, self.worker_device)
+        agent.Agent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
+        self.main_network = nw.NetworkWrapper(tuning_parameters, create_target_network, self.has_global, 'main',
+                                              self.replicated_device, self.worker_device)
         self.networks.append(self.main_network)
 
         self.policy_gradient_rescaler = PolicyGradientRescaler().get(self.tp.agent.policy_gradient_rescaler)
@@ -44,7 +49,7 @@ class PolicyOptimizationAgent(Agent):
         self.max_episode_length = 100000
         self.mean_return_over_multiple_episodes = np.zeros(self.max_episode_length)
         self.num_episodes_where_step_has_been_seen = np.zeros(self.max_episode_length)
-        self.entropy = Signal('Entropy')
+        self.entropy = utils.Signal('Entropy')
         self.signals.append(self.entropy)
 
         self.reset_game(do_not_reset_env=True)
@@ -52,8 +57,8 @@ class PolicyOptimizationAgent(Agent):
     def log_to_screen(self, phase):
         # log to screen
         if self.current_episode > 0:
-            screen.log_dict(
-                OrderedDict([
+            logger.screen.log_dict(
+                collections.OrderedDict([
                     ("Worker", self.task_id),
                     ("Episode", self.current_episode),
                     ("total reward", self.total_reward_in_current_episode),
diff --git a/agents/ppo_agent.py b/agents/ppo_agent.py
index 4a37e69..35d1b98 100644
--- a/agents/ppo_agent.py
+++ b/agents/ppo_agent.py
@@ -13,36 +13,44 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import collections
+import copy
 
-from agents.actor_critic_agent import *
-from random import shuffle
+import numpy as np
+
+from agents import actor_critic_agent as aca
+from agents import policy_optimization_agent as poa
+from architectures import network_wrapper as nw
+import configurations
+import logger
+import utils
 
 
 # Proximal Policy Optimization - https://arxiv.org/pdf/1707.06347.pdf
-class PPOAgent(ActorCriticAgent):
+class PPOAgent(aca.ActorCriticAgent):
     def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
-        ActorCriticAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id,
-                                  create_target_network=True)
+        aca.ActorCriticAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id,
+                                      create_target_network=True)
         self.critic_network = self.main_network
 
         # define the policy network
-        tuning_parameters.agent.input_types = {'observation': InputTypes.Observation}
-        tuning_parameters.agent.output_types = [OutputTypes.PPO]
+        tuning_parameters.agent.input_types = {'observation': configurations.InputTypes.Observation}
+        tuning_parameters.agent.output_types = [configurations.OutputTypes.PPO]
         tuning_parameters.agent.optimizer_type = 'Adam'
         tuning_parameters.agent.l2_regularization = 0
-        self.policy_network = NetworkWrapper(tuning_parameters, True, self.has_global, 'policy',
-                                             self.replicated_device, self.worker_device)
+        self.policy_network = nw.NetworkWrapper(tuning_parameters, True, self.has_global, 'policy',
+                                                self.replicated_device, self.worker_device)
         self.networks.append(self.policy_network)
 
         # signals definition
-        self.value_loss = Signal('Value Loss')
+        self.value_loss = utils.Signal('Value Loss')
         self.signals.append(self.value_loss)
-        self.policy_loss = Signal('Policy Loss')
+        self.policy_loss = utils.Signal('Policy Loss')
         self.signals.append(self.policy_loss)
-        self.kl_divergence = Signal('KL Divergence')
+        self.kl_divergence = utils.Signal('KL Divergence')
         self.signals.append(self.kl_divergence)
         self.total_kl_divergence_during_training_process = 0.0
-        self.unclipped_grads = Signal('Grads (unclipped)')
+        self.unclipped_grads = utils.Signal('Grads (unclipped)')
         self.signals.append(self.unclipped_grads)
 
         self.reset_game(do_not_reset_env=True)
@@ -57,9 +65,9 @@ class PPOAgent(ActorCriticAgent):
 
         # calculate advantages
         advantages = []
-        if self.policy_gradient_rescaler == PolicyGradientRescaler.A_VALUE:
+        if self.policy_gradient_rescaler == poa.PolicyGradientRescaler.A_VALUE:
             advantages = total_return - current_state_values
-        elif self.policy_gradient_rescaler == PolicyGradientRescaler.GAE:
+        elif self.policy_gradient_rescaler == poa.PolicyGradientRescaler.GAE:
             # get bootstraps
             episode_start_idx = 0
             advantages = np.array([])
@@ -76,7 +84,7 @@ class PPOAgent(ActorCriticAgent):
                     episode_start_idx = idx + 1
                     advantages = np.append(advantages, rollout_advantages)
         else:
-            screen.warning("WARNING: The requested policy gradient rescaler is not available")
+            logger.screen.warning("WARNING: The requested policy gradient rescaler is not available")
 
         # standardize
         advantages = (advantages - np.mean(advantages)) / np.std(advantages)
@@ -107,7 +115,7 @@ class PPOAgent(ActorCriticAgent):
                     for k, v in current_states.items()
                 }
                 total_return_batch = total_return[i * batch_size:(i + 1) * batch_size]
-                old_policy_values = force_list(self.critic_network.target_network.predict(
+                old_policy_values = utils.force_list(self.critic_network.target_network.predict(
                     current_states_batch).squeeze())
                 if self.critic_network.online_network.optimizer_type != 'LBFGS':
                     targets = total_return_batch
@@ -155,7 +163,7 @@ class PPOAgent(ActorCriticAgent):
                     actions = np.expand_dims(actions, -1)
 
                 # get old policy probabilities and distribution
-                old_policy = force_list(self.policy_network.target_network.predict(current_states))
+                old_policy = utils.force_list(self.policy_network.target_network.predict(current_states))
 
                 # calculate gradients and apply on both the local policy network and on the global policy network
                 fetches = [self.policy_network.online_network.output_heads[0].kl_divergence,
@@ -196,8 +204,8 @@ class PPOAgent(ActorCriticAgent):
                 curr_learning_rate = self.tp.learning_rate
 
             # log training parameters
-            screen.log_dict(
-                OrderedDict([
+            logger.screen.log_dict(
+                collections.OrderedDict([
                     ("Surrogate loss", loss['policy_losses'][0]),
                     ("KL divergence", loss['fetch_result'][0]),
                     ("Entropy", loss['fetch_result'][1]),
@@ -215,7 +223,7 @@ class PPOAgent(ActorCriticAgent):
     def update_kl_coefficient(self):
         # John Schulman takes the mean kl divergence only over the last epoch which is strange but we will follow
         # his implementation for now because we know it works well
-        screen.log_title("KL = {}".format(self.total_kl_divergence_during_training_process))
+        logger.screen.log_title("KL = {}".format(self.total_kl_divergence_during_training_process))
 
         # update kl coefficient
         kl_target = self.tp.agent.target_kl_divergence
@@ -236,7 +244,7 @@ class PPOAgent(ActorCriticAgent):
                 new_kl_coefficient,
                 self.policy_network.online_network.output_heads[0].kl_coefficient_ph)
 
-        screen.log_title("KL penalty coefficient change = {} -> {}".format(kl_coefficient, new_kl_coefficient))
+        logger.screen.log_title("KL penalty coefficient change = {} -> {}".format(kl_coefficient, new_kl_coefficient))
 
     def post_training_commands(self):
         if self.tp.agent.use_kl_regularization:
@@ -264,12 +272,12 @@ class PPOAgent(ActorCriticAgent):
         self.update_log()  # should be done in order to update the data that has been accumulated * while not playing *
         return np.append(value_loss, policy_loss)
 
-    def choose_action(self, curr_state, phase=RunPhase.TRAIN):
+    def choose_action(self, curr_state, phase=utils.RunPhase.TRAIN):
         if self.env.discrete_controls:
             # DISCRETE
             action_values = self.policy_network.online_network.predict(self.tf_input_state(curr_state)).squeeze()
 
-            if phase == RunPhase.TRAIN:
+            if phase == utils.RunPhase.TRAIN:
                 action = self.exploration_policy.get_action(action_values)
             else:
                 action = np.argmax(action_values)
@@ -280,7 +288,7 @@ class PPOAgent(ActorCriticAgent):
             action_values_mean, action_values_std = self.policy_network.online_network.predict(self.tf_input_state(curr_state))
             action_values_mean = action_values_mean.squeeze()
             action_values_std = action_values_std.squeeze()
-            if phase == RunPhase.TRAIN:
+            if phase == utils.RunPhase.TRAIN:
                 action = np.squeeze(np.random.randn(1, self.action_space_size) * action_values_std + action_values_mean)
             else:
                 action = action_values_mean
diff --git a/agents/qr_dqn_agent.py b/agents/qr_dqn_agent.py
index 8888d18..c36b861 100644
--- a/agents/qr_dqn_agent.py
+++ b/agents/qr_dqn_agent.py
@@ -13,14 +13,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import numpy as np
 
-from agents.value_optimization_agent import *
+from agents import value_optimization_agent as voa
 
 
 # Quantile Regression Deep Q Network - https://arxiv.org/pdf/1710.10044v1.pdf
-class QuantileRegressionDQNAgent(ValueOptimizationAgent):
+class QuantileRegressionDQNAgent(voa.ValueOptimizationAgent):
     def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
-        ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
+        voa.ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
         self.quantile_probabilities = np.ones(self.tp.agent.atoms) / float(self.tp.agent.atoms)
 
     # prediction's format is (batch,actions,atoms)
diff --git a/agents/value_optimization_agent.py b/agents/value_optimization_agent.py
index 75708d7..d91a98e 100644
--- a/agents/value_optimization_agent.py
+++ b/agents/value_optimization_agent.py
@@ -13,21 +13,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
 import numpy as np
 
-from agents.agent import Agent
-from architectures.network_wrapper import NetworkWrapper
-from utils import RunPhase, Signal
+from agents import agent
+from architectures import network_wrapper as nw
+import utils
 
 
-class ValueOptimizationAgent(Agent):
+class ValueOptimizationAgent(agent.Agent):
     def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0, create_target_network=True):
-        Agent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
-        self.main_network = NetworkWrapper(tuning_parameters, create_target_network, self.has_global, 'main',
-                                           self.replicated_device, self.worker_device)
+        agent.Agent.__init__(self, env, tuning_parameters, replicated_device, thread_id)
+        self.main_network = nw.NetworkWrapper(tuning_parameters, create_target_network, self.has_global, 'main',
+                                              self.replicated_device, self.worker_device)
         self.networks.append(self.main_network)
-        self.q_values = Signal("Q")
+        self.q_values = utils.Signal("Q")
         self.signals.append(self.q_values)
 
         self.reset_game(do_not_reset_env=True)
@@ -47,12 +46,12 @@ class ValueOptimizationAgent(Agent):
                 'require exploration policies which return a single action.'
             ).format(policy.__class__.__name__))
 
-    def choose_action(self, curr_state, phase=RunPhase.TRAIN):
+    def choose_action(self, curr_state, phase=utils.RunPhase.TRAIN):
         prediction = self.get_prediction(curr_state)
         actions_q_values = self.get_q_values(prediction)
 
         # choose action according to the exploration policy and the current phase (evaluating or training the agent)
-        if phase == RunPhase.TRAIN:
+        if phase == utils.RunPhase.TRAIN:
             exploration_policy = self.exploration_policy
         else:
             exploration_policy = self.evaluation_exploration_policy
diff --git a/architectures/__init__.py b/architectures/__init__.py
index cbf2ac5..e72fb00 100644
--- a/architectures/__init__.py
+++ b/architectures/__init__.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,19 +13,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
-from architectures.architecture import *
-from logger import failed_imports
-try:
-    from architectures.tensorflow_components.general_network import *
-    from architectures.tensorflow_components.architecture import *
-except ImportError:
-    failed_imports.append("TensorFlow")
+import logger
 
 try:
-    from architectures.neon_components.general_network import *
-    from architectures.neon_components.architecture import *
+    from architectures.tensorflow_components import general_network as ts_gn
+    from architectures.tensorflow_components import architecture as ts_arch
 except ImportError:
-    failed_imports.append("Neon")
+    logger.failed_imports.append("TensorFlow")
 
-from architectures.network_wrapper import *
\ No newline at end of file
+try:
+    from architectures.neon_components import general_network as neon_gn
+    from architectures.neon_components import architecture as neon_arch
+except ImportError:
+    logger.failed_imports.append("Neon")
diff --git a/architectures/architecture.py b/architectures/architecture.py
index d3175b7..03c48d8 100644
--- a/architectures/architecture.py
+++ b/architectures/architecture.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,8 +14,6 @@
 # limitations under the License.
 #
 
-from configurations import Preset
-
 
 class Architecture(object):
     def __init__(self, tuning_parameters, name=""):
@@ -73,4 +71,4 @@ class Architecture(object):
         pass
 
     def set_variable_value(self, assign_op, value, placeholder=None):
-        pass
\ No newline at end of file
+        pass
diff --git a/architectures/neon_components/architecture.py b/architectures/neon_components/architecture.py
index de600c1..1577ed8 100644
--- a/architectures/neon_components/architecture.py
+++ b/architectures/neon_components/architecture.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,19 +13,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
-import sys
-import copy
-from ngraph.frontends.neon import *
 import ngraph as ng
-from architectures.architecture import *
 import numpy as np
-from utils import *
+
+from architectures import architecture
+import utils
 
 
-class NeonArchitecture(Architecture):
+class NeonArchitecture(architecture.Architecture):
     def __init__(self, tuning_parameters, name="", global_network=None, network_is_local=True):
-        Architecture.__init__(self, tuning_parameters, name)
+        architecture.Architecture.__init__(self, tuning_parameters, name)
         assert tuning_parameters.agent.neon_support, 'Neon is not supported for this agent'
         self.clip_error = tuning_parameters.clip_gradients
         self.total_loss = None
@@ -113,8 +110,8 @@ class NeonArchitecture(Architecture):
     def accumulate_gradients(self, inputs, targets):
         # Neon doesn't currently allow separating the grads calculation and grad apply operations
         # so this feature is not currently available. instead we do a full training iteration
-        inputs = force_list(inputs)
-        targets = force_list(targets)
+        inputs = utils.force_list(inputs)
+        targets = utils.force_list(targets)
 
         for idx, input in enumerate(inputs):
             inputs[idx] = input.swapaxes(0, -1)
diff --git a/architectures/neon_components/embedders.py b/architectures/neon_components/embedders.py
index 5f594a3..9d20a9d 100644
--- a/architectures/neon_components/embedders.py
+++ b/architectures/neon_components/embedders.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,10 +13,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
-import ngraph.frontends.neon as neon
 import ngraph as ng
-from ngraph.util.names import name_scope
+import ngraph.frontends.neon as neon
+import ngraph.util.names as ngraph_names
 
 
 class InputEmbedder(object):
@@ -31,7 +30,7 @@ class InputEmbedder(object):
         self.output = None
 
     def __call__(self, prev_input_placeholder=None):
-        with name_scope(self.get_name()):
+        with ngraph_names.name_scope(self.get_name()):
             # create the input axes
             axes = []
             if len(self.input_size) == 2:
diff --git a/architectures/neon_components/general_network.py b/architectures/neon_components/general_network.py
index 99ac6e9..f837f71 100644
--- a/architectures/neon_components/general_network.py
+++ b/architectures/neon_components/general_network.py
@@ -13,15 +13,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import ngraph as ng
+from ngraph.frontends import neon
+from ngraph.util import names as ngraph_names
 
-from architectures.neon_components.embedders import *
-from architectures.neon_components.heads import *
-from architectures.neon_components.middleware import *
-from architectures.neon_components.architecture import *
-from configurations import InputTypes, OutputTypes, MiddlewareTypes
+from architectures.neon_components import architecture
+from architectures.neon_components import embedders
+from architectures.neon_components import middleware
+from architectures.neon_components import heads
+import configurations as conf
 
 
-class GeneralNeonNetwork(NeonArchitecture):
+class GeneralNeonNetwork(architecture.NeonArchitecture):
     def __init__(self, tuning_parameters, name="", global_network=None, network_is_local=True):
         self.global_network = global_network
         self.network_is_local = network_is_local
@@ -34,7 +37,7 @@ class GeneralNeonNetwork(NeonArchitecture):
         self.activation_function = self.get_activation_function(
             tuning_parameters.agent.hidden_layers_activation_function)
 
-        NeonArchitecture.__init__(self, tuning_parameters, name, global_network, network_is_local)
+        architecture.NeonArchitecture.__init__(self, tuning_parameters, name, global_network, network_is_local)
 
     def get_activation_function(self, activation_function_string):
         activation_functions = {
@@ -53,36 +56,36 @@ class GeneralNeonNetwork(NeonArchitecture):
         # the observation can be either an image or a vector
         def get_observation_embedding(with_timestep=False):
             if self.input_height > 1:
-                return ImageEmbedder((self.input_depth, self.input_height, self.input_width), self.batch_size,
-                                     name="observation")
+                return embedders.ImageEmbedder((self.input_depth, self.input_height, self.input_width), self.batch_size,
+                                               name="observation")
             else:
-                return VectorEmbedder((self.input_depth, self.input_width + int(with_timestep)), self.batch_size,
-                                      name="observation")
+                return embedders.VectorEmbedder((self.input_depth, self.input_width + int(with_timestep)), self.batch_size,
+                                                name="observation")
 
         input_mapping = {
-            InputTypes.Observation: get_observation_embedding(),
-            InputTypes.Measurements: VectorEmbedder(self.measurements_size, self.batch_size, name="measurements"),
-            InputTypes.GoalVector: VectorEmbedder(self.measurements_size, self.batch_size, name="goal_vector"),
-            InputTypes.Action: VectorEmbedder((self.num_actions,), self.batch_size, name="action"),
-            InputTypes.TimedObservation: get_observation_embedding(with_timestep=True),
+            conf.InputTypes.Observation: get_observation_embedding(),
+            conf.InputTypes.Measurements: embedders.VectorEmbedder(self.measurements_size, self.batch_size, name="measurements"),
+            conf.InputTypes.GoalVector: embedders.VectorEmbedder(self.measurements_size, self.batch_size, name="goal_vector"),
+            conf.InputTypes.Action: embedders.VectorEmbedder((self.num_actions,), self.batch_size, name="action"),
+            conf.InputTypes.TimedObservation: get_observation_embedding(with_timestep=True),
         }
         return input_mapping[embedder_type]
 
     def get_middleware_embedder(self, middleware_type):
-        return {MiddlewareTypes.LSTM: None,   # LSTM over Neon is currently not supported in Coach
-                MiddlewareTypes.FC: FC_Embedder}.get(middleware_type)(self.activation_function)
+        return {conf.MiddlewareTypes.LSTM: None,   # LSTM over Neon is currently not supported in Coach
+                conf.MiddlewareTypes.FC: middleware.FC_Embedder}.get(middleware_type)(self.activation_function)
 
     def get_output_head(self, head_type, head_idx, loss_weight=1.):
         output_mapping = {
-            OutputTypes.Q: QHead,
-            OutputTypes.DuelingQ: DuelingQHead,
-            OutputTypes.V: None, # Policy Optimization algorithms over Neon are currently not supported in Coach
-            OutputTypes.Pi: None,  # Policy Optimization algorithms over Neon are currently not supported in Coach
-            OutputTypes.MeasurementsPrediction: None, # DFP over Neon is currently not supported in Coach
-            OutputTypes.DNDQ: None,  # NEC over Neon is currently not supported in Coach
-            OutputTypes.NAF: None,  # NAF over Neon is currently not supported in Coach
-            OutputTypes.PPO: None, # PPO over Neon is currently not supported in Coach
-            OutputTypes.PPO_V: None  # PPO over Neon is currently not supported in Coach
+            conf.OutputTypes.Q: heads.QHead,
+            conf.OutputTypes.DuelingQ: heads.DuelingQHead,
+            conf.OutputTypes.V: None, # Policy Optimization algorithms over Neon are currently not supported in Coach
+            conf.OutputTypes.Pi: None,  # Policy Optimization algorithms over Neon are currently not supported in Coach
+            conf.OutputTypes.MeasurementsPrediction: None, # DFP over Neon is currently not supported in Coach
+            conf.OutputTypes.DNDQ: None,  # NEC over Neon is currently not supported in Coach
+            conf.OutputTypes.NAF: None,  # NAF over Neon is currently not supported in Coach
+            conf.OutputTypes.PPO: None, # PPO over Neon is currently not supported in Coach
+            conf.OutputTypes.PPO_V: None  # PPO over Neon is currently not supported in Coach
         }
         return output_mapping[head_type](self.tp, head_idx, loss_weight, self.network_is_local)
 
@@ -104,7 +107,7 @@ class GeneralNeonNetwork(NeonArchitecture):
         done_creating_input_placeholders = False
 
         for network_idx in range(self.num_networks):
-            with name_scope('network_{}'.format(network_idx)):
+            with ngraph_names.name_scope('network_{}'.format(network_idx)):
                 ####################
                 # Input Embeddings #
                 ####################
diff --git a/architectures/neon_components/heads.py b/architectures/neon_components/heads.py
index df49867..21af758 100644
--- a/architectures/neon_components/heads.py
+++ b/architectures/neon_components/heads.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,13 +13,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
 import ngraph as ng
-from ngraph.util.names import name_scope
-import ngraph.frontends.neon as neon
-import numpy as np
-from utils import force_list
-from architectures.neon_components.losses import *
+from ngraph.frontends import neon
+from ngraph.util import names as ngraph_names
+
+import utils
+from architectures.neon_components import losses
 
 
 class Head(object):
@@ -30,7 +29,7 @@ class Head(object):
         self.loss = []
         self.loss_type = []
         self.regularizations = []
-        self.loss_weight = force_list(loss_weight)
+        self.loss_weight = utils.force_list(loss_weight)
         self.weights_init = neon.GlorotInit()
         self.biases_init = neon.ConstantInit()
         self.target = []
@@ -44,15 +43,15 @@ class Head(object):
         :param input_layer: the input to the graph
         :return: the output of the last layer and the target placeholder
         """
-        with name_scope(self.get_name()):
+        with ngraph_names.name_scope(self.get_name()):
             self._build_module(input_layer)
 
-            self.output = force_list(self.output)
-            self.target = force_list(self.target)
-            self.input = force_list(self.input)
-            self.loss_type = force_list(self.loss_type)
-            self.loss = force_list(self.loss)
-            self.regularizations = force_list(self.regularizations)
+            self.output = utils.force_list(self.output)
+            self.target = utils.force_list(self.target)
+            self.input = utils.force_list(self.input)
+            self.loss_type = utils.force_list(self.loss_type)
+            self.loss = utils.force_list(self.loss)
+            self.regularizations = utils.force_list(self.regularizations)
             if self.is_local:
                self.set_loss()
 
@@ -106,7 +105,7 @@ class QHead(Head):
         if tuning_parameters.agent.replace_mse_with_huber_loss:
             raise Exception("huber loss is not supported in neon")
         else:
-            self.loss_type = mean_squared_error
+            self.loss_type = losses.mean_squared_error
 
     def _build_module(self, input_layer):
         # Standard Q Network
@@ -159,7 +158,7 @@ class MeasurementsPredictionHead(Head):
         if tuning_parameters.agent.replace_mse_with_huber_loss:
             raise Exception("huber loss is not supported in neon")
         else:
-            self.loss_type = mean_squared_error
+            self.loss_type = losses.mean_squared_error
 
     def _build_module(self, input_layer):
         # This is almost exactly the same as Dueling Network but we predict the future measurements for each action
@@ -167,7 +166,7 @@ class MeasurementsPredictionHead(Head):
         multistep_measurements_size = self.measurements_size[0] * self.num_predicted_steps_ahead
 
         # actions expectation tower (expectation stream) - E
-        with name_scope("expectation_stream"):
+        with ngraph_names.name_scope("expectation_stream"):
             expectation_stream = neon.Sequential([
                 neon.Affine(nout=256, activation=neon.Rectlin(),
                             weight_init=self.weights_init, bias_init=self.biases_init),
@@ -176,7 +175,7 @@ class MeasurementsPredictionHead(Head):
             ])(input_layer)
 
         # action fine differences tower (action stream) - A
-        with name_scope("action_stream"):
+        with ngraph_names.name_scope("action_stream"):
             action_stream_unnormalized = neon.Sequential([
                 neon.Affine(nout=256, activation=neon.Rectlin(),
                             weight_init=self.weights_init, bias_init=self.biases_init),
@@ -191,4 +190,3 @@ class MeasurementsPredictionHead(Head):
 
         # merge to future measurements predictions
         self.output = repeated_expectation_stream + action_stream
-
diff --git a/architectures/neon_components/losses.py b/architectures/neon_components/losses.py
index 26e8644..a6fc064 100644
--- a/architectures/neon_components/losses.py
+++ b/architectures/neon_components/losses.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,15 +13,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
 import ngraph as ng
-import ngraph.frontends.neon as neon
-from ngraph.util.names import name_scope
-import numpy as np
+from ngraph.util import names as ngraph_names
 
 
 def mean_squared_error(targets, outputs, weights=1.0, scope=""):
-    with name_scope(scope):
+    with ngraph_names.name_scope(scope):
         # TODO: reduce mean over the action axis
         loss = ng.squared_L2(targets - outputs)
         weighted_loss = loss * weights
diff --git a/architectures/neon_components/middleware.py b/architectures/neon_components/middleware.py
index 2aa02fd..fad7b9c 100644
--- a/architectures/neon_components/middleware.py
+++ b/architectures/neon_components/middleware.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,11 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
-import ngraph as ng
 import ngraph.frontends.neon as neon
-from ngraph.util.names import name_scope
-import numpy as np
+from ngraph.util import names as ngraph_names
 
 
 class MiddlewareEmbedder(object):
@@ -30,7 +27,7 @@ class MiddlewareEmbedder(object):
         self.activation_function = activation_function
 
     def __call__(self, input_layer):
-        with name_scope(self.get_name()):
+        with ngraph_names.name_scope(self.get_name()):
             self.input = input_layer
             self._build_module()
 
diff --git a/architectures/network_wrapper.py b/architectures/network_wrapper.py
index ef026e6..d21bd77 100644
--- a/architectures/network_wrapper.py
+++ b/architectures/network_wrapper.py
@@ -13,20 +13,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import os
+import collections
 
-from collections import OrderedDict
-from configurations import Preset, Frameworks
-from logger import *
+import configurations as conf
+import logger
 try:
     import tensorflow as tf
-    from architectures.tensorflow_components.general_network import GeneralTensorFlowNetwork
+    from architectures.tensorflow_components import general_network as tf_net  #import GeneralTensorFlowNetwork
 except ImportError:
-    failed_imports.append("TensorFlow")
+    logger.failed_imports.append("TensorFlow")
 
 try:
-    from architectures.neon_components.general_network import GeneralNeonNetwork
+    from architectures.neon_components import general_network as neon_net
 except ImportError:
-    failed_imports.append("Neon")
+    logger.failed_imports.append("Neon")
 
 
 class NetworkWrapper(object):
@@ -50,12 +51,12 @@ class NetworkWrapper(object):
         self.name = name
         self.sess = tuning_parameters.sess
 
-        if self.tp.framework == Frameworks.TensorFlow:
-            general_network = GeneralTensorFlowNetwork
-        elif self.tp.framework == Frameworks.Neon:
-            general_network = GeneralNeonNetwork
+        if self.tp.framework == conf.Frameworks.TensorFlow:
+            general_network = tf_net.GeneralTensorFlowNetwork
+        elif self.tp.framework == conf.Frameworks.Neon:
+            general_network = neon_net.GeneralNeonNetwork
         else:
-            raise Exception("{} Framework is not supported".format(Frameworks().to_string(self.tp.framework)))
+            raise Exception("{} Framework is not supported".format(conf.Frameworks().to_string(self.tp.framework)))
 
         # Global network - the main network shared between threads
         self.global_network = None
@@ -77,13 +78,13 @@ class NetworkWrapper(object):
                 self.target_network = general_network(tuning_parameters, '{}/target'.format(name),
                                                       network_is_local=True)
 
-        if not self.tp.distributed and self.tp.framework == Frameworks.TensorFlow:
+        if not self.tp.distributed and self.tp.framework == conf.Frameworks.TensorFlow:
             variables_to_restore = tf.global_variables()
             variables_to_restore = [v for v in variables_to_restore if '/online' in v.name]
             self.model_saver = tf.train.Saver(variables_to_restore)
             if self.tp.sess and self.tp.checkpoint_restore_dir:
                 checkpoint = tf.train.latest_checkpoint(self.tp.checkpoint_restore_dir)
-                screen.log_title("Loading checkpoint: {}".format(checkpoint))
+                logger.screen.log_title("Loading checkpoint: {}".format(checkpoint))
                 self.model_saver.restore(self.tp.sess, checkpoint)
                 self.update_target_network()
 
@@ -178,8 +179,8 @@ class NetworkWrapper(object):
     def save_model(self, model_id):
         saved_model_path = self.model_saver.save(self.tp.sess, os.path.join(self.tp.save_model_dir,
                                                                         str(model_id) + '.ckpt'))
-        screen.log_dict(
-            OrderedDict([
+        logger.screen.log_dict(
+            collections.OrderedDict([
                 ("Saving model", saved_model_path),
             ]),
             prefix="Checkpoint"
diff --git a/architectures/tensorflow_components/architecture.py b/architectures/tensorflow_components/architecture.py
index 3474ff4..41396b1 100644
--- a/architectures/tensorflow_components/architecture.py
+++ b/architectures/tensorflow_components/architecture.py
@@ -15,12 +15,11 @@
 #
 import time
 
-import numpy as np
 import tensorflow as tf
 
-from architectures.architecture import Architecture
-from utils import force_list, squeeze_list
-from configurations import Preset, MiddlewareTypes
+from architectures import architecture
+import configurations as conf
+import utils
 
 def variable_summaries(var):
     """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
@@ -37,14 +36,14 @@ def variable_summaries(var):
             tf.summary.scalar('min', tf.reduce_min(var))
             tf.summary.histogram('histogram', var)
 
-class TensorFlowArchitecture(Architecture):
+class TensorFlowArchitecture(architecture.Architecture):
     def __init__(self, tuning_parameters, name="", global_network=None, network_is_local=True):
         """
         :param tuning_parameters: The parameters used for running the algorithm
         :type tuning_parameters: Preset
         :param name: The name of the network
         """
-        Architecture.__init__(self, tuning_parameters, name)
+        architecture.Architecture.__init__(self, tuning_parameters, name)
         self.middleware_embedder = None
         self.network_is_local = network_is_local
         assert tuning_parameters.agent.tensorflow_support, 'TensorFlow is not supported for this agent'
@@ -174,7 +173,7 @@ class TensorFlowArchitecture(Architecture):
         feed_dict = self._feed_dict(inputs)
 
         # feed targets
-        targets = force_list(targets)
+        targets = utils.force_list(targets)
         for placeholder_idx, target in enumerate(targets):
             feed_dict[self.targets[placeholder_idx]] = target
 
@@ -186,13 +185,13 @@ class TensorFlowArchitecture(Architecture):
             else:
                 fetches.append(self.tensor_gradients)
             fetches += [self.total_loss, self.losses]
-            if self.tp.agent.middleware_type == MiddlewareTypes.LSTM:
+            if self.tp.agent.middleware_type == conf.MiddlewareTypes.LSTM:
                 fetches.append(self.middleware_embedder.state_out)
             additional_fetches_start_idx = len(fetches)
             fetches += additional_fetches
 
             # feed the lstm state if necessary
-            if self.tp.agent.middleware_type == MiddlewareTypes.LSTM:
+            if self.tp.agent.middleware_type == conf.MiddlewareTypes.LSTM:
                 # we can't always assume that we are starting from scratch here can we?
                 feed_dict[self.middleware_embedder.c_in] = self.middleware_embedder.c_init
                 feed_dict[self.middleware_embedder.h_in] = self.middleware_embedder.h_init
@@ -206,7 +205,7 @@ class TensorFlowArchitecture(Architecture):
 
             # extract the fetches
             norm_unclipped_grads, grads, total_loss, losses = result[:4]
-            if self.tp.agent.middleware_type == MiddlewareTypes.LSTM:
+            if self.tp.agent.middleware_type == conf.MiddlewareTypes.LSTM:
                 (self.curr_rnn_c_in, self.curr_rnn_h_in) = result[4]
             fetched_tensors = []
             if len(additional_fetches) > 0:
@@ -308,7 +307,7 @@ class TensorFlowArchitecture(Architecture):
         if outputs is None:
             outputs = self.outputs
 
-        if self.tp.agent.middleware_type == MiddlewareTypes.LSTM:
+        if self.tp.agent.middleware_type == conf.MiddlewareTypes.LSTM:
             feed_dict[self.middleware_embedder.c_in] = self.curr_rnn_c_in
             feed_dict[self.middleware_embedder.h_in] = self.curr_rnn_h_in
 
@@ -317,7 +316,7 @@ class TensorFlowArchitecture(Architecture):
             output = self.tp.sess.run(outputs, feed_dict)
 
         if squeeze_output:
-            output = squeeze_list(output)
+            output = utils.squeeze_list(output)
 
         return output
 
diff --git a/architectures/tensorflow_components/embedders.py b/architectures/tensorflow_components/embedders.py
index 880de2f..b0a4ff1 100644
--- a/architectures/tensorflow_components/embedders.py
+++ b/architectures/tensorflow_components/embedders.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,8 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
 import tensorflow as tf
+
 from configurations import EmbedderComplexity
 
 
diff --git a/architectures/tensorflow_components/general_network.py b/architectures/tensorflow_components/general_network.py
index 03bb2a9..b9b4a78 100644
--- a/architectures/tensorflow_components/general_network.py
+++ b/architectures/tensorflow_components/general_network.py
@@ -13,15 +13,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import tensorflow as tf
 
-from architectures.tensorflow_components.embedders import *
-from architectures.tensorflow_components.heads import *
-from architectures.tensorflow_components.middleware import *
-from architectures.tensorflow_components.architecture import *
-from configurations import InputTypes, OutputTypes, MiddlewareTypes
+from architectures.tensorflow_components import architecture
+from architectures.tensorflow_components import embedders
+from architectures.tensorflow_components import middleware
+from architectures.tensorflow_components import heads
+import configurations as conf
 
 
-class GeneralTensorFlowNetwork(TensorFlowArchitecture):
+class GeneralTensorFlowNetwork(architecture.TensorFlowArchitecture):
     """
     A generalized version of all possible networks implemented using tensorflow.
     """
@@ -37,7 +38,7 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture):
         self.activation_function = self.get_activation_function(
             tuning_parameters.agent.hidden_layers_activation_function)
 
-        TensorFlowArchitecture.__init__(self, tuning_parameters, name, global_network, network_is_local)
+        architecture.TensorFlowArchitecture.__init__(self, tuning_parameters, name, global_network, network_is_local)
 
     def get_activation_function(self, activation_function_string):
         activation_functions = {
@@ -56,37 +57,37 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture):
         # the observation can be either an image or a vector
         def get_observation_embedding(with_timestep=False):
             if self.input_height > 1:
-                return ImageEmbedder((self.input_height, self.input_width, self.input_depth), name="observation",
-                                     input_rescaler=self.tp.agent.input_rescaler)
+                return embedders.ImageEmbedder((self.input_height, self.input_width, self.input_depth), name="observation",
+                                               input_rescaler=self.tp.agent.input_rescaler)
             else:
-                return VectorEmbedder((self.input_width + int(with_timestep), self.input_depth), name="observation")
+                return embedders.VectorEmbedder((self.input_width + int(with_timestep), self.input_depth), name="observation")
 
         input_mapping = {
-            InputTypes.Observation: get_observation_embedding(),
-            InputTypes.Measurements: VectorEmbedder(self.measurements_size, name="measurements"),
-            InputTypes.GoalVector: VectorEmbedder(self.measurements_size, name="goal_vector"),
-            InputTypes.Action: VectorEmbedder((self.num_actions,), name="action"),
-            InputTypes.TimedObservation: get_observation_embedding(with_timestep=True),
+            conf.InputTypes.Observation: get_observation_embedding(),
+            conf.InputTypes.Measurements: embedders.VectorEmbedder(self.measurements_size, name="measurements"),
+            conf.InputTypes.GoalVector: embedders.VectorEmbedder(self.measurements_size, name="goal_vector"),
+            conf.InputTypes.Action: embedders.VectorEmbedder((self.num_actions,), name="action"),
+            conf.InputTypes.TimedObservation: get_observation_embedding(with_timestep=True),
         }
         return input_mapping[embedder_type]
 
     def get_middleware_embedder(self, middleware_type):
-        return {MiddlewareTypes.LSTM: LSTM_Embedder,
-                MiddlewareTypes.FC: FC_Embedder}.get(middleware_type)(self.activation_function)
+        return {conf.MiddlewareTypes.LSTM: middleware.LSTM_Embedder,
+                conf.MiddlewareTypes.FC: middleware.FC_Embedder}.get(middleware_type)(self.activation_function)
 
     def get_output_head(self, head_type, head_idx, loss_weight=1.):
         output_mapping = {
-            OutputTypes.Q: QHead,
-            OutputTypes.DuelingQ: DuelingQHead,
-            OutputTypes.V: VHead,
-            OutputTypes.Pi: PolicyHead,
-            OutputTypes.MeasurementsPrediction: MeasurementsPredictionHead,
-            OutputTypes.DNDQ: DNDQHead,
-            OutputTypes.NAF: NAFHead,
-            OutputTypes.PPO: PPOHead,
-            OutputTypes.PPO_V: PPOVHead,
-            OutputTypes.CategoricalQ: CategoricalQHead,
-            OutputTypes.QuantileRegressionQ: QuantileRegressionQHead
+            conf.OutputTypes.Q: heads.QHead,
+            conf.OutputTypes.DuelingQ: heads.DuelingQHead,
+            conf.OutputTypes.V: heads.VHead,
+            conf.OutputTypes.Pi: heads.PolicyHead,
+            conf.OutputTypes.MeasurementsPrediction: heads.MeasurementsPredictionHead,
+            conf.OutputTypes.DNDQ: heads.DNDQHead,
+            conf.OutputTypes.NAF: heads.NAFHead,
+            conf.OutputTypes.PPO: heads.PPOHead,
+            conf.OutputTypes.PPO_V: heads.PPOVHead,
+            conf.OutputTypes.CategoricalQ: heads.CategoricalQHead,
+            conf.OutputTypes.QuantileRegressionQ: heads.QuantileRegressionQHead
         }
         return output_mapping[head_type](self.tp, head_idx, loss_weight, self.network_is_local)
 
diff --git a/architectures/tensorflow_components/heads.py b/architectures/tensorflow_components/heads.py
index 616ab13..fdf1919 100644
--- a/architectures/tensorflow_components/heads.py
+++ b/architectures/tensorflow_components/heads.py
@@ -13,10 +13,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
 import tensorflow as tf
 import numpy as np
-from utils import force_list
+
+import utils
 
 
 # Used to initialize weights for policy and value output layers
@@ -36,7 +36,7 @@ class Head(object):
         self.loss = []
         self.loss_type = []
         self.regularizations = []
-        self.loss_weight = force_list(loss_weight)
+        self.loss_weight = utils.force_list(loss_weight)
         self.target = []
         self.input = []
         self.is_local = is_local
@@ -50,12 +50,12 @@ class Head(object):
         with tf.variable_scope(self.get_name(), initializer=tf.contrib.layers.xavier_initializer()):
             self._build_module(input_layer)
 
-            self.output = force_list(self.output)
-            self.target = force_list(self.target)
-            self.input = force_list(self.input)
-            self.loss_type = force_list(self.loss_type)
-            self.loss = force_list(self.loss)
-            self.regularizations = force_list(self.regularizations)
+            self.output = utils.force_list(self.output)
+            self.target = utils.force_list(self.target)
+            self.input = utils.force_list(self.input)
+            self.loss_type = utils.force_list(self.loss_type)
+            self.loss = utils.force_list(self.loss)
+            self.regularizations = utils.force_list(self.regularizations)
             if self.is_local:
                 self.set_loss()
             self._post_build()
diff --git a/architectures/tensorflow_components/middleware.py b/architectures/tensorflow_components/middleware.py
index dfe1597..3ef2631 100644
--- a/architectures/tensorflow_components/middleware.py
+++ b/architectures/tensorflow_components/middleware.py
@@ -13,7 +13,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
 import tensorflow as tf
 import numpy as np
 
diff --git a/architectures/tensorflow_components/shared_variables.py b/architectures/tensorflow_components/shared_variables.py
index 2775251..23d179e 100644
--- a/architectures/tensorflow_components/shared_variables.py
+++ b/architectures/tensorflow_components/shared_variables.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,7 +13,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
 import tensorflow as tf
 import numpy as np
 
@@ -79,4 +78,4 @@ class SharedRunningStats(object):
 
     @property
     def shape(self):
-        return self._shape
\ No newline at end of file
+        return self._shape
diff --git a/coach.py b/coach.py
index 8ba8cf3..73f40dc 100644
--- a/coach.py
+++ b/coach.py
@@ -13,46 +13,42 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
-import sys, inspect, re
-import os
-import json
-import presets
-from presets import *
-from utils import set_gpu, list_all_classes_in_module
-from architectures import *
-from environments import *
-from agents import *
-from utils import *
-from logger import screen, logger
-import argparse
-from subprocess import Popen
-import datetime
-import presets
 import atexit
-import sys
+import json
+import os
+import re
 import subprocess
-from threading import Thread
+import sys
+import time
 
-if len(set(failed_imports)) > 0:
-    screen.warning("Warning: failed to import the following packages - {}".format(', '.join(set(failed_imports))))
+import agents
+import argparse
+import configurations as conf
+import environments
+import logger
+import presets
+import utils
+
+
+if len(set(logger.failed_imports)) > 0:
+    logger.screen.warning("Warning: failed to import the following packages - {}".format(', '.join(set(logger.failed_imports))))
 
 
 def set_framework(framework_type):
     # choosing neural network framework
-    framework = Frameworks().get(framework_type)
+    framework = conf.Frameworks().get(framework_type)
     sess = None
-    if framework == Frameworks.TensorFlow:
+    if framework == conf.Frameworks.TensorFlow:
         import tensorflow as tf
         config = tf.ConfigProto()
         config.allow_soft_placement = True
         config.gpu_options.allow_growth = True
         config.gpu_options.per_process_gpu_memory_fraction = 0.2
         sess = tf.Session(config=config)
-    elif framework == Frameworks.Neon:
+    elif framework == conf.Frameworks.Neon:
         import ngraph as ng
         sess = ng.transformers.make_transformer()
-    screen.log_title("Using {} framework".format(Frameworks().to_string(framework)))
+    logger.screen.log_title("Using {} framework".format(conf.Frameworks().to_string(framework)))
     return sess
 
 
@@ -66,8 +62,8 @@ def check_input_and_fill_run_dict(parser):
 
     # list available presets
     if args.list:
-        presets_lists = list_all_classes_in_module(presets)
-        screen.log_title("Available Presets:")
+        presets_lists = utils.list_all_classes_in_module(presets)
+        logger.screen.log_title("Available Presets:")
         for preset in presets_lists:
             print(preset)
         sys.exit(0)
@@ -77,28 +73,28 @@ def check_input_and_fill_run_dict(parser):
         # num_workers = int(args.num_workers)
         num_workers = int(re.match("^\d+$", args.num_workers).group(0))
     except ValueError:
-        screen.error("Parameter num_workers should be an integer.")
+        logger.screen.error("Parameter num_workers should be an integer.")
 
-    preset_names = list_all_classes_in_module(presets)
+    preset_names = utils.list_all_classes_in_module(presets)
     if args.preset is not None and args.preset not in preset_names:
-        screen.error("A non-existing preset was selected. ")
+        logger.screen.error("A non-existing preset was selected. ")
 
     if args.checkpoint_restore_dir is not None and not os.path.exists(args.checkpoint_restore_dir):
-        screen.error("The requested checkpoint folder to load from does not exist. ")
+        logger.screen.error("The requested checkpoint folder to load from does not exist. ")
 
     if args.save_model_sec is not None:
         try:
             args.save_model_sec = int(args.save_model_sec)
         except ValueError:
-            screen.error("Parameter save_model_sec should be an integer.")
+            logger.screen.error("Parameter save_model_sec should be an integer.")
 
     if args.preset is None and (args.agent_type is None or args.environment_type is None
                                        or args.exploration_policy_type is None) and not args.play:
-        screen.error('When no preset is given for Coach to run, the user is expected to input the desired agent_type,'
+        logger.screen.error('When no preset is given for Coach to run, the user is expected to input the desired agent_type,'
                      ' environment_type and exploration_policy_type to assemble a preset. '
                      '\nAt least one of these parameters was not given.')
     elif args.preset is None and args.play and args.environment_type is None:
-        screen.error('When no preset is given for Coach to run, and the user requests human control over the environment,'
+        logger.screen.error('When no preset is given for Coach to run, and the user requests human control over the environment,'
                      ' the user is expected to input the desired environment_type and level.'
                      '\nAt least one of these parameters was not given.')
     elif args.preset is None and args.play and args.environment_type:
@@ -106,11 +102,11 @@ def check_input_and_fill_run_dict(parser):
         args.exploration_policy_type = 'ExplorationParameters'
 
     # get experiment name and path
-    experiment_name = logger.get_experiment_name(args.experiment_name)
-    experiment_path = logger.get_experiment_path(experiment_name)
+    experiment_name = logger.logger.get_experiment_name(args.experiment_name)
+    experiment_path = logger.logger.get_experiment_path(experiment_name)
 
     if args.play and num_workers > 1:
-        screen.warning("Playing the game as a human is only available with a single worker. "
+        logger.screen.warning("Playing the game as a human is only available with a single worker. "
                        "The number of workers will be reduced to 1")
         num_workers = 1
 
@@ -123,7 +119,7 @@ def check_input_and_fill_run_dict(parser):
     run_dict['preset'] = args.preset
     run_dict['custom_parameter'] = args.custom_parameter
     run_dict['experiment_path'] = experiment_path
-    run_dict['framework'] = Frameworks().get(args.framework)
+    run_dict['framework'] = conf.Frameworks().get(args.framework)
     run_dict['play'] = args.play
     run_dict['evaluate'] = args.evaluate# or args.play
 
@@ -251,16 +247,16 @@ if __name__ == "__main__":
         os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
 
     # dump documentation
-    logger.set_dump_dir(run_dict['experiment_path'], add_timestamp=True)
+    logger.logger.set_dump_dir(run_dict['experiment_path'], add_timestamp=True)
     if not args.no_summary:
-        atexit.register(logger.summarize_experiment)
-        screen.change_terminal_title(logger.experiment_name)
+        atexit.register(logger.logger.summarize_experiment)
+        logger.screen.change_terminal_title(logger.logger.experiment_name)
 
     # Single-threaded runs
     if run_dict['num_threads'] == 1:
         # set tuning parameters
         json_run_dict_path = run_dict_to_json(run_dict)
-        tuning_parameters = json_to_preset(json_run_dict_path)
+        tuning_parameters = presets.json_to_preset(json_run_dict_path)
         tuning_parameters.sess = set_framework(args.framework)
 
         if args.print_parameters:
@@ -268,8 +264,9 @@ if __name__ == "__main__":
 
         # Single-thread runs
         tuning_parameters.task_index = 0
-        env_instance = create_environment(tuning_parameters)
-        agent = eval(tuning_parameters.agent.type + '(env_instance, tuning_parameters)')
+        env_instance = environments.create_environment(tuning_parameters)
+        agent = eval('agents.' + tuning_parameters.agent.type +
+                     '(env_instance, tuning_parameters)')
 
         # Start the training or evaluation
         if tuning_parameters.evaluate:
@@ -282,11 +279,11 @@ if __name__ == "__main__":
         assert args.framework.lower() == 'tensorflow', "Distributed training works only with TensorFlow"
         os.environ["OMP_NUM_THREADS"]="1"
         # set parameter server and workers addresses
-        ps_hosts = "localhost:{}".format(get_open_port())
-        worker_hosts = ",".join(["localhost:{}".format(get_open_port()) for i in range(run_dict['num_threads'] + 1)])
+        ps_hosts = "localhost:{}".format(utils.get_open_port())
+        worker_hosts = ",".join(["localhost:{}".format(utils.get_open_port()) for i in range(run_dict['num_threads'] + 1)])
 
         # Make sure to disable GPU so that all the workers will use the CPU
-        set_cpu()
+        utils.set_cpu()
 
         # create a parameter server
         cmd = [
@@ -296,9 +293,9 @@ if __name__ == "__main__":
            "--worker_hosts={}".format(worker_hosts),
            "--job_name=ps",
         ]
-        parameter_server = Popen(cmd)
+        parameter_server = subprocess.Popen(cmd)
 
-        screen.log_title("*** Distributed Training ***")
+        logger.screen.log_title("*** Distributed Training ***")
         time.sleep(1)
 
         # create N training workers and 1 evaluating worker
@@ -321,7 +318,7 @@ if __name__ == "__main__":
                             "--job_name=worker",
                             "--load_json={}".format(json_run_dict_path)]
 
-            p = Popen(workers_args)
+            p = subprocess.Popen(workers_args)
 
             if i != run_dict['num_threads']:
                 workers.append(p)
diff --git a/configurations.py b/configurations.py
index 5e553d8..f2054a3 100644
--- a/configurations.py
+++ b/configurations.py
@@ -13,13 +13,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
-from utils import Enum
 import json
+
 import types
+import utils
 
 
-class Frameworks(Enum):
+class Frameworks(utils.Enum):
     TensorFlow = 1
     Neon = 2
 
diff --git a/dashboard.py b/dashboard.py
index f21b13b..b2aaa0f 100644
--- a/dashboard.py
+++ b/dashboard.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -19,29 +19,24 @@ To run Coach Dashboard, run the following command:
 python3 dashboard.py
 """
 
-from utils import *
-import os
-import datetime
-
-import sys
-import wx
-import random
-import pandas as pd
-from pandas.io.common import EmptyDataError
-import numpy as np
 import colorsys
-from bokeh.palettes import Dark2
-from bokeh.layouts import row, column, widgetbox, Spacer
-from bokeh.models import ColumnDataSource, Range1d, LinearAxis, HoverTool, WheelZoomTool, PanTool, Legend
-from bokeh.models.widgets import RadioButtonGroup, MultiSelect, Button, Select, Slider, Div, CheckboxGroup
-from bokeh.models.glyphs import Patch
-from bokeh.plotting import figure, show, curdoc
-from utils import force_list
-from utils import squeeze_list
-from itertools import cycle
-from os import listdir
-from os.path import isfile, join, isdir, basename
-from enum import Enum
+import datetime
+import enum
+import itertools
+import os
+import random
+
+from bokeh import palettes
+from bokeh import layouts as bl
+from bokeh import models as bm
+from bokeh.models import widgets as bw
+from bokeh import plotting as bp
+import numpy as np
+import pandas as pd
+from pandas.io import pandas_common
+import wx
+
+import utils
 
 
 class DialogApp(wx.App):
@@ -67,7 +62,7 @@ class Signal:
         self.name = name
         self.full_name = "{}/{}".format(parent.filename, self.name)
         self.selected = False
-        self.color = random.choice(Dark2[8])
+        self.color = random.choice(palettes.Dark2[8])
         self.line = None
         self.bands = None
         self.bokeh_source = parent.bokeh_source
@@ -79,12 +74,12 @@ class Signal:
             if (len(name.split('/')) == 1 and name == self.name) or '/'.join(name.split('/')[:-1]) == self.name:
                 self.sub_signals.append(name)
         if len(self.sub_signals) > 1:
-            self.mean_signal = squeeze_list([name for name in self.sub_signals if 'Mean' in name.split('/')[-1]])
-            self.stdev_signal = squeeze_list([name for name in self.sub_signals if 'Stdev' in name.split('/')[-1]])
-            self.min_signal = squeeze_list([name for name in self.sub_signals if 'Min' in name.split('/')[-1]])
-            self.max_signal = squeeze_list([name for name in self.sub_signals if 'Max' in name.split('/')[-1]])
+            self.mean_signal = utils.squeeze_list([name for name in self.sub_signals if 'Mean' in name.split('/')[-1]])
+            self.stdev_signal = utils.squeeze_list([name for name in self.sub_signals if 'Stdev' in name.split('/')[-1]])
+            self.min_signal = utils.squeeze_list([name for name in self.sub_signals if 'Min' in name.split('/')[-1]])
+            self.max_signal = utils.squeeze_list([name for name in self.sub_signals if 'Max' in name.split('/')[-1]])
         else:
-            self.mean_signal = squeeze_list(self.name)
+            self.mean_signal = utils.squeeze_list(self.name)
             self.stdev_signal = None
             self.min_signal = None
             self.max_signal = None
@@ -107,16 +102,16 @@ class Signal:
         if self.selected != val:
             self.selected = val
             if self.line:
-                # self.set_color(Dark2[8][current_color])
-                # current_color = (current_color + 1) % len(Dark2[8])
+                # self.set_color(palettes.Dark2[8][current_color])
+                # current_color = (current_color + 1) % len(palettes.Dark2[8])
                 self.line.visible = self.selected
                 if self.bands:
                     self.bands.visible = self.selected and self.show_bollinger_bands
             elif self.selected:
                 # lazy plotting - plot only when selected for the first time
                 show_spinner()
-                self.set_color(Dark2[8][current_color])
-                current_color = (current_color + 1) % len(Dark2[8])
+                self.set_color(palettes.Dark2[8][current_color])
+                current_color = (current_color + 1) % len(palettes.Dark2[8])
                 if self.has_bollinger_bands:
                     self.set_bands_source()
                     self.create_bands()
@@ -149,7 +144,7 @@ class Signal:
         if self.bollinger_bands_source:
             self.bollinger_bands_source.data = source_data
         else:
-            self.bollinger_bands_source = ColumnDataSource(source_data)
+            self.bollinger_bands_source = bm.ColumnDataSource(source_data)
 
     def change_bollinger_bands_state(self, new_state):
         self.show_bollinger_bands = new_state
@@ -192,11 +187,11 @@ class SignalsFileBase:
 
     def update_source_and_signals(self):
         # create bokeh data sources
-        self.bokeh_source_orig = ColumnDataSource(self.csv)
+        self.bokeh_source_orig = bm.ColumnDataSource(self.csv)
         self.bokeh_source_orig.data['index'] = self.bokeh_source_orig.data[x_axis]
 
         if self.bokeh_source is None:
-            self.bokeh_source = ColumnDataSource(self.csv)
+            self.bokeh_source = bm.ColumnDataSource(self.csv)
         else:
             # self.bokeh_source.data = self.bokeh_source_orig.data
             # smooth the data if necessary
@@ -282,7 +277,7 @@ class SignalsFile(SignalsFileBase):
     def __init__(self, csv_path, load=True):
         SignalsFileBase.__init__(self)
         self.full_csv_path = csv_path
-        self.dir, self.filename, _ = break_file_path(csv_path)
+        self.dir, self.filename, _ = utils.break_file_path(csv_path)
         if load:
             self.load()
             # this helps set the correct x axis
@@ -296,7 +291,7 @@ class SignalsFile(SignalsFileBase):
             try:
                 self.csv = pd.read_csv(self.full_csv_path)
                 break
-            except EmptyDataError:
+            except pandas_common.EmptyDataError:
                 self.csv = None
                 continue
         self.csv = self.csv.interpolate()
@@ -327,7 +322,7 @@ class SignalsFilesGroup(SignalsFileBase):
         else:
             # get the common directory for all the experiments
             self.dir = os.path.dirname(os.path.commonprefix(csv_paths))
-        self.filename = '{} - Group({})'.format(basename(self.dir), len(self.signals_files))
+        self.filename = '{} - Group({})'.format(os.path.basename(self.dir), len(self.signals_files))
         self.load()
 
         # this helps set the correct x axis
@@ -425,7 +420,7 @@ class SignalsFilesGroup(SignalsFileBase):
                     pass
 
 
-class RunType(Enum):
+class RunType(enum.Enum):
     SINGLE_FOLDER_SINGLE_FILE = 1
     SINGLE_FOLDER_MULTIPLE_FILES = 2
     MULTIPLE_FOLDERS_SINGLE_FILES = 3
@@ -433,7 +428,7 @@ class RunType(Enum):
     UNKNOWN = 0
 
 
-class FolderType(Enum):
+class FolderType(enum.Enum):
     SINGLE_FILE = 1
     MULTIPLE_FILES = 2
     MULTIPLE_FOLDERS = 3
@@ -454,24 +449,24 @@ root_dir = os.path.dirname(os.path.abspath(__file__))
 with open(os.path.join(root_dir, 'spinner.css'), 'r') as f:
     spinner_style = """<style>{}</style>""".format(f.read())
 spinner_html = """<ul class="spinner"><li></li><li></li><li></li><li></li></ul>"""
-spinner = Div(text="""""")
+spinner = bw.Div(text="""""")
 
 # file refresh time placeholder
-refresh_info = Div(text="""""", width=210)
+refresh_info = bw.Div(text="""""", width=210)
 
 # create figures
-plot = figure(plot_width=1200, plot_height=800,
-              tools='pan,box_zoom,wheel_zoom,crosshair,undo,redo,reset,save',
-              toolbar_location='above', x_axis_label='Episodes',
-              x_range=Range1d(0, 10000), y_range=Range1d(0, 100000))
-plot.extra_y_ranges = {"secondary": Range1d(start=-100, end=200)}
-plot.add_layout(LinearAxis(y_range_name="secondary"), 'right')
+plot = bp.figure(plot_width=1200, plot_height=800,
+                 tools='pan,box_zoom,wheel_zoom,crosshair,undo,redo,reset,save',
+                 toolbar_location='above', x_axis_label='Episodes',
+                 x_range=bm.Range1d(0, 10000), y_range=bm.Range1d(0, 100000))
+plot.extra_y_ranges = {"secondary": bm.Range1d(start=-100, end=200)}
+plot.add_layout(bm.LinearAxis(y_range_name="secondary"), 'right')
 
 # legend
-div = Div(text="""""")
-legend = widgetbox([div])
+div = bw.Div(text="""""")
+legend = bl.widgetbox([div])
 
-bokeh_legend = Legend(
+bokeh_legend = bm.Legend(
     items=[("12345678901234567890123456789012345678901234567890", [])],  # 50 letters
     # items=[("                                                  ", [])],  # 50 letters
     location=(-20, 0), orientation="vertical",
@@ -605,8 +600,8 @@ def load_files_group():
 
 # classify the folder as containing a single file, multiple files or only folders
 def classify_folder(dir_path):
-    files = [f for f in listdir(dir_path) if isfile(join(dir_path, f)) and f.endswith('.csv')]
-    folders = [d for d in listdir(dir_path) if isdir(join(dir_path, d))]
+    files = [f for f in os.listdir(dir_path) if os.path.isfile(os.path.join(dir_path, f)) and f.endswith('.csv')]
+    folders = [d for d in os.listdir(dir_path) if os.path.isdir(os.path.join(dir_path, d))]
     if len(files) == 1:
         return FolderType.SINGLE_FILE
     elif len(files) > 1:
@@ -628,7 +623,7 @@ def get_run_type(dir_path):
 
     elif folder_type == FolderType.MULTIPLE_FOLDERS:
         # folder contains sub dirs -> we assume we can classify the folder using only the first sub dir
-        sub_dirs = [d for d in listdir(dir_path) if isdir(join(dir_path, d))]
+        sub_dirs = [d for d in os.listdir(dir_path) if os.path.isdir(os.path.join(dir_path, d))]
 
         # checking only the first folder in the root dir for its type, since we assume that all sub dirs will share the
         # same structure (i.e. if one is a result of multi-threaded run, so will all the other).
@@ -645,12 +640,12 @@ def add_directory_csv_files(dir_path, paths=None):
     if not paths:
         paths = []
 
-    for p in listdir(dir_path):
-        path = join(dir_path, p)
-        if isdir(path):
+    for p in os.listdir(dir_path):
+        path = os.path.join(dir_path, p)
+        if os.path.isdir(path):
             # call recursively for each dir
             paths = add_directory_csv_files(path, paths)
-        elif isfile(path) and path.endswith('.csv'):
+        elif os.path.isfile(path) and path.endswith('.csv'):
             # add every file to the list
             paths.append(path)
 
@@ -667,7 +662,7 @@ def handle_dir(dir_path, run_type):
     elif run_type == RunType.MULTIPLE_FOLDERS_SINGLE_FILES:
         create_files_group_signal(paths)
     elif run_type == RunType.MULTIPLE_FOLDERS_MULTIPLE_FILES:
-        sub_dirs = [d for d in listdir(dir_path) if isdir(join(dir_path, d))]
+        sub_dirs = [d for d in os.listdir(dir_path) if os.path.isdir(os.path.join(dir_path, d))]
         # for d in sub_dirs:
         #     paths = add_directory_csv_files(os.path.join(dir_path, d))
         #     create_files_group_signal(paths)
@@ -731,7 +726,7 @@ def unload_file():
     selected_file.hide_all_signals()
     del signals_files[selected_file.filename]
     data_selector.options = [""]
-    filenames = cycle(files_selector.options)
+    filenames = itertools.cycle(files_selector.options)
     files_selector.options.remove(selected_file.filename)
     if len(files_selector.options) > 0:
         files_selector.value = next(filenames)
@@ -869,48 +864,48 @@ crcolor, crRGBs = generate_color_range(color_resolution, brightness)  # produce
 # ---------------- Build Website Layout -------------------
 
 # select file
-file_selection_button = Button(label="Select Files", button_type="success", width=120)
+file_selection_button = bw.Button(label="Select Files", button_type="success", width=120)
 file_selection_button.on_click(load_files_group)
 
-files_selector_spacer = Spacer(width=10)
+files_selector_spacer = bl.Spacer(width=10)
 
-group_selection_button = Button(label="Select Directory", button_type="primary", width=140)
+group_selection_button = bw.Button(label="Select Directory", button_type="primary", width=140)
 group_selection_button.on_click(load_directory_group)
 
-unload_file_button = Button(label="Unload", button_type="danger", width=50)
+unload_file_button = bw.Button(label="Unload", button_type="danger", width=50)
 unload_file_button.on_click(unload_file)
 
 # files selection box
-files_selector = Select(title="Files:", options=[], width=200)
+files_selector = bw.Select(title="Files:", options=[], width=200)
 files_selector.on_change('value', change_data_selector)
 
 # data selection box
-data_selector = MultiSelect(title="Data:", options=[], size=12)
+data_selector = bw.MultiSelect(title="Data:", options=[], size=12)
 data_selector.on_change('value', select_data)
 
 # x axis selection box
-x_axis_selector_title = Div(text="""X Axis:""")
-x_axis_selector = RadioButtonGroup(labels=x_axis_options, active=0)
+x_axis_selector_title = bw.Div(text="""X Axis:""")
+x_axis_selector = bw.RadioButtonGroup(labels=x_axis_options, active=0)
 x_axis_selector.on_click(change_x_axis)
 
-# toggle second axis button
-toggle_second_axis_button = Button(label="Toggle Second Axis", button_type="success")
+# toggle second axis bw.button
+toggle_second_axis_button = bw.Button(label="Toggle Second Axis", button_type="success")
 toggle_second_axis_button.on_click(toggle_second_axis)
 
 # averaging slider
-averaging_slider = Slider(title="Averaging window", start=1, end=101, step=10)
+averaging_slider = bw.Slider(title="Averaging window", start=1, end=101, step=10)
 averaging_slider.on_change('value', update_averaging)
 
 # group properties checkbox
-group_cb = CheckboxGroup(labels=["Show statistics bands", "Ungroup signals"], active=[])
+group_cb = bw.CheckboxGroup(labels=["Show statistics bands", "Ungroup signals"], active=[])
 group_cb.on_click(toggle_group_property)
 
 # color selector
-color_selector_title = Div(text="""Select Color:""")
-crsource = ColumnDataSource(data=dict(x=crx, y=cry, crcolor=crcolor, RGBs=crRGBs))
-color_selector = figure(x_range=(0, color_resolution), y_range=(0, 10),
-                        plot_width=300, plot_height=40,
-                        tools='tap')
+color_selector_title = bw.Div(text="""Select Color:""")
+crsource = bm.ColumnDataSource(data=dict(x=crx, y=cry, crcolor=crcolor, RGBs=crRGBs))
+color_selector = bp.figure(x_range=(0, color_resolution), y_range=(0, 10),
+                           plot_width=300, plot_height=40,
+                           tools='tap')
 color_selector.axis.visible = False
 color_range = color_selector.rect(x='x', y='y', width=1, height=10,
                                   color='crcolor', source=crsource)
@@ -920,43 +915,43 @@ color_selector.toolbar.logo = None
 color_selector.toolbar_location = None
 
 # title
-title = Div(text="""<h1>Coach Dashboard</h1>""")
+title = bw.Div(text="""<h1>Coach Dashboard</h1>""")
 
 # landing page
-landing_page_description = Div(text="""<h3>Start by selecting an experiment file or directory to open:</h3>""")
-center = Div(text="""<style>html { text-align: center; } </style>""")
-center_buttons = Div(text="""<style>.bk-grid-row .bk-layout-fixed { margin: 0 auto; }</style>""", width=0)
-landing_page = column(center,
+landing_page_description = bw.Div(text="""<h3>Start by selecting an experiment file or directory to open:</h3>""")
+center = bw.Div(text="""<style>html { text-align: center; } </style>""")
+center_buttons = bw.Div(text="""<style>.bk-grid-row .bk-layout-fixed { margin: 0 auto; }</style>""", width=0)
+landing_page = bl.column(center,
                       title,
                       landing_page_description,
-                      row(center_buttons),
-                      row(file_selection_button, sizing_mode='scale_width'),
-                      row(group_selection_button, sizing_mode='scale_width'),
+                      bl.row(center_buttons),
+                      bl.row(file_selection_button, sizing_mode='scale_width'),
+                      bl.row(group_selection_button, sizing_mode='scale_width'),
                       sizing_mode='scale_width')
 
 # main layout of the document
-layout = row(file_selection_button, files_selector_spacer, group_selection_button, width=300)
-layout = column(layout, files_selector)
-layout = column(layout, row(refresh_info, unload_file_button))
-layout = column(layout, data_selector)
-layout = column(layout, color_selector_title)
-layout = column(layout, color_selector)
-layout = column(layout, x_axis_selector_title)
-layout = column(layout, x_axis_selector)
-layout = column(layout, group_cb)
-layout = column(layout, toggle_second_axis_button)
-layout = column(layout, averaging_slider)
-# layout = column(layout, legend)
-layout = row(layout, plot)
-layout = column(title, layout)
-layout = column(layout, spinner)
+layout = bl.row(file_selection_button, files_selector_spacer, group_selection_button, width=300)
+layout = bl.column(layout, files_selector)
+layout = bl.column(layout, bl.row(refresh_info, unload_file_button))
+layout = bl.column(layout, data_selector)
+layout = bl.column(layout, color_selector_title)
+layout = bl.column(layout, color_selector)
+layout = bl.column(layout, x_axis_selector_title)
+layout = bl.column(layout, x_axis_selector)
+layout = bl.column(layout, group_cb)
+layout = bl.column(layout, toggle_second_axis_button)
+layout = bl.column(layout, averaging_slider)
+# layout = bl.column(layout, legend)
+layout = bl.row(layout, plot)
+layout = bl.column(title, layout)
+layout = bl.column(layout, spinner)
 
-doc = curdoc()
+doc = bp.curdoc()
 doc.add_root(landing_page)
 
 doc.add_periodic_callback(reload_all_files, 20000)
-plot.y_range = Range1d(0, 100)
-plot.extra_y_ranges['secondary'] = Range1d(0, 100)
+plot.y_range = bm.Range1d(0, 100)
+plot.extra_y_ranges['secondary'] = bm.Range1d(0, 100)
 
 # show load file dialog immediately on start
 #doc.add_timeout_callback(load_files, 1000)
diff --git a/debug_utils.py b/debug_utils.py
index 11db9fc..9ee79a1 100644
--- a/debug_utils.py
+++ b/debug_utils.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,7 +13,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
 import matplotlib.pyplot as plt
 import numpy as np
 
diff --git a/docs/docs/mdx_math.py b/docs/docs/mdx_math.py
index fe28d11..b59158e 100644
--- a/docs/docs/mdx_math.py
+++ b/docs/docs/mdx_math.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -24,9 +24,9 @@ Adds support for displaying math formulas using [MathJax](http://www.mathjax.org
 
 Author: 2015, Dmitry Shachnev <mitya57@gmail.com>.
 '''
-
 import markdown
 
+
 class MathExtension(markdown.extensions.Extension):
     def __init__(self, *args, **kwargs):
         self.config = {
diff --git a/docs/docs/setup.py b/docs/docs/setup.py
index bfac4df..a45ed50 100644
--- a/docs/docs/setup.py
+++ b/docs/docs/setup.py
@@ -1,5 +1,6 @@
+#!/usr/bin/env python3
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,11 +14,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
-#!/usr/bin/env python3
-
 from distutils.core import setup
 
+
 long_description = \
 """This extension adds math formulas support to Python-Markdown_
 (works with version 2.6 or newer).
diff --git a/docs/fix_index.py b/docs/fix_index.py
index 45a79c4..87e8373 100644
--- a/docs/fix_index.py
+++ b/docs/fix_index.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,8 +13,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import fnmatch
+import os
+
 
-import os, fnmatch, sys
 def findReplace(directory, find, replace, filePattern):
     for path, dirs, files in os.walk(os.path.abspath(directory)):
         for filename in fnmatch.filter(files, filePattern):
@@ -25,7 +27,8 @@ def findReplace(directory, find, replace, filePattern):
             with open(filepath, "w") as f:
                 f.write(s)
 
-if __name__=="__main__":
+
+if __name__ == "__main__":
     findReplace('./site/', '/"', '/index.html"', "*.html")
     findReplace('./site/', '"/index.html"', '"./index.html"', "*.html")
     findReplace('./site/', '"."', '"./index.html"', "*.html")
@@ -34,4 +37,4 @@ if __name__=="__main__":
     findReplace('./site/', '/#', '/index.html#', "search_index.json")
     findReplace('./site/assets/javascripts/', 'search_index.json', 'search_index.txt', "*.js")
     findReplace('./site/mkdocs/js/', 'search_index.json', 'search_index.txt', "search.js")
-    os.rename("./site/mkdocs/search_index.json", "./site/mkdocs/search_index.txt")
\ No newline at end of file
+    os.rename("./site/mkdocs/search_index.json", "./site/mkdocs/search_index.txt")
diff --git a/environments/__init__.py b/environments/__init__.py
index 1dd8d1d..0b03f0a 100644
--- a/environments/__init__.py
+++ b/environments/__init__.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,15 +13,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
-from logger import *
-from utils import Enum, get_open_port
-from environments.gym_environment_wrapper import *
-from environments.doom_environment_wrapper import *
-from environments.carla_environment_wrapper import *
+from environments.gym_environment_wrapper import GymEnvironmentWrapper
+from environments.doom_environment_wrapper import DoomEnvironmentWrapper
+from environments.carla_environment_wrapper import CarlaEnvironmentWrapper
+import utils
 
 
-class EnvTypes(Enum):
+class EnvTypes(utils.Enum):
     Doom = "DoomEnvironmentWrapper"
     Gym = "GymEnvironmentWrapper"
     Carla = "CarlaEnvironmentWrapper"
@@ -31,6 +29,3 @@ def create_environment(tuning_parameters):
     env_type_name, env_type = EnvTypes().verify(tuning_parameters.env.type)
     env = eval(env_type)(tuning_parameters)
     return env
-
-
-
diff --git a/environments/carla_environment_wrapper.py b/environments/carla_environment_wrapper.py
index b4657a3..60c4b38 100644
--- a/environments/carla_environment_wrapper.py
+++ b/environments/carla_environment_wrapper.py
@@ -1,34 +1,31 @@
+import logging
+import os
+import signal
+import subprocess
 import sys
-from os import path, environ
-
-try:
-    if 'CARLA_ROOT' in environ:
-        sys.path.append(path.join(environ.get('CARLA_ROOT'), 'PythonClient'))
-    from carla.client import CarlaClient
-    from carla.settings import CarlaSettings
-    from carla.tcp import TCPConnectionError
-    from carla.sensor import Camera
-    from carla.client import VehicleControl
-except ImportError:
-    from logger import failed_imports
-    failed_imports.append("CARLA")
 
 import numpy as np
-import time
-import logging
-import subprocess
-import signal
-from environments.environment_wrapper import EnvironmentWrapper
-from utils import *
-from logger import screen, logger
-from PIL import Image
+
+import logger
+try:
+    if 'CARLA_ROOT' in os.environ:
+        sys.path.append(os.path.join(os.environ.get('CARLA_ROOT'),
+                                     'PythonClient'))
+    from carla import client as carla_client
+    from carla import settings as carla_settings
+    from carla import sensor as carla_sensor
+except ImportError:
+    logger.failed_imports.append("CARLA")
+from environments import environment_wrapper as ew
+import utils
 
 
 # enum of the available levels and their path
-class CarlaLevel(Enum):
+class CarlaLevel(utils.Enum):
     TOWN1 = "/Game/Maps/Town01"
     TOWN2 = "/Game/Maps/Town02"
 
+
 key_map = {
     'BRAKE': (274,),  # down arrow
     'GAS': (273,),  # up arrow
@@ -41,16 +38,16 @@ key_map = {
 }
 
 
-class CarlaEnvironmentWrapper(EnvironmentWrapper):
+class CarlaEnvironmentWrapper(ew.EnvironmentWrapper):
     def __init__(self, tuning_parameters):
-        EnvironmentWrapper.__init__(self, tuning_parameters)
+        ew.EnvironmentWrapper.__init__(self, tuning_parameters)
 
         self.tp = tuning_parameters
 
         # server configuration
         self.server_height = self.tp.env.server_height
         self.server_width = self.tp.env.server_width
-        self.port = get_open_port()
+        self.port = utils.get_open_port()
         self.host = 'localhost'
         self.map = CarlaLevel().get(self.tp.env.level)
 
@@ -70,7 +67,7 @@ class CarlaEnvironmentWrapper(EnvironmentWrapper):
                 self.settings = fp.read()
         else:
             # hard coded settings
-            self.settings = CarlaSettings()
+            self.settings = carla_settings.CarlaSettings()
             self.settings.set(
                 SynchronousMode=True,
                 SendNonPlayerAgentsInfo=False,
@@ -80,7 +77,7 @@ class CarlaEnvironmentWrapper(EnvironmentWrapper):
             self.settings.randomize_seeds()
 
             # add cameras
-            camera = Camera('CameraRGB')
+            camera = carla_sensor.Camera('CameraRGB')
             camera.set_image_size(self.width, self.height)
             camera.set_position(200, 0, 140)
             camera.set_rotation(0, 0, 0)
@@ -92,7 +89,7 @@ class CarlaEnvironmentWrapper(EnvironmentWrapper):
         logging.disable(40)
 
         # open the client
-        self.game = CarlaClient(self.host, self.port, timeout=99999999)
+        self.game = carla_client.CarlaClient(self.host, self.port, timeout=99999999)
         self.game.connect()
         scene = self.game.load_settings(self.settings)
 
@@ -141,12 +138,12 @@ class CarlaEnvironmentWrapper(EnvironmentWrapper):
             self.renderer.create_screen(image.shape[1], image.shape[0])
 
     def _open_server(self):
-        log_path = path.join(logger.experiments_path, "CARLA_LOG_{}.txt".format(self.port))
+        log_path = os.path.join(logger.logger.experiments_path, "CARLA_LOG_{}.txt".format(self.port))
         with open(log_path, "wb") as out:
-            cmd = [path.join(environ.get('CARLA_ROOT'), 'CarlaUE4.sh'), self.map,
-                                  "-benchmark", "-carla-server", "-fps=10", "-world-port={}".format(self.port),
-                                  "-windowed -ResX={} -ResY={}".format(self.server_width, self.server_height),
-                                  "-carla-no-hud"]
+            cmd = [os.path.join(os.environ.get('CARLA_ROOT'), 'CarlaUE4.sh'), self.map,
+                   "-benchmark", "-carla-server", "-fps=10", "-world-port={}".format(self.port),
+                   "-windowed -ResX={} -ResY={}".format(self.server_width, self.server_height),
+                   "-carla-no-hud"]
             if self.config:
                 cmd.append("-carla-settings={}".format(self.config))
             p = subprocess.Popen(cmd, stdout=out, stderr=out)
@@ -201,7 +198,7 @@ class CarlaEnvironmentWrapper(EnvironmentWrapper):
             action = action_idx
         self.last_action_idx = action
 
-        self.control = VehicleControl()
+        self.control = carla_client.VehicleControl()
         self.control.throttle = np.clip(action[0], 0, 1)
         self.control.steer = np.clip(action[1], -1, 1)
         self.control.brake = np.abs(np.clip(action[0], -1, 0))
diff --git a/environments/doom_environment_wrapper.py b/environments/doom_environment_wrapper.py
index a0c618d..716a6d0 100644
--- a/environments/doom_environment_wrapper.py
+++ b/environments/doom_environment_wrapper.py
@@ -13,23 +13,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import enum
+import os
 
+import numpy as np
 
+import logger
 try:
     import vizdoom
 except ImportError:
-    from logger import failed_imports
-    failed_imports.append("ViZDoom")
+    logger.failed_imports.append("ViZDoom")
 
-import numpy as np
-from environments.environment_wrapper import EnvironmentWrapper
-from os import path, environ
-from utils import *
-from logger import *
+from environments import environment_wrapper as ew
+import utils
 
 
 # enum of the available levels and their path
-class DoomLevel(Enum):
+class DoomLevel(utils.Enum):
     BASIC = "basic.cfg"
     DEFEND = "defend_the_center.cfg"
     DEATHMATCH = "deathmatch.cfg"
@@ -40,6 +40,7 @@ class DoomLevel(Enum):
     DEFEND_THE_LINE = "defend_the_line.cfg"
     DEADLY_CORRIDOR = "deadly_corridor.cfg"
 
+
 key_map = {
     'NO-OP': 96,  # `
     'ATTACK': 13,  # enter
@@ -78,15 +79,16 @@ key_map = {
 }
 
 
-class DoomEnvironmentWrapper(EnvironmentWrapper):
+class DoomEnvironmentWrapper(ew.EnvironmentWrapper):
     def __init__(self, tuning_parameters):
-        EnvironmentWrapper.__init__(self, tuning_parameters)
+        ew.EnvironmentWrapper.__init__(self, tuning_parameters)
 
         # load the emulator with the required level
         self.level = DoomLevel().get(self.tp.env.level)
-        self.scenarios_dir = path.join(environ.get('VIZDOOM_ROOT'), 'scenarios')
+        self.scenarios_dir = os.path.join(os.environ.get('VIZDOOM_ROOT'),
+                                          'scenarios')
         self.game = vizdoom.DoomGame()
-        self.game.load_config(path.join(self.scenarios_dir, self.level))
+        self.game.load_config(os.path.join(self.scenarios_dir, self.level))
         self.game.set_window_visible(False)
         self.game.add_game_args("+vid_forcesurface 1")
 
diff --git a/environments/environment_wrapper.py b/environments/environment_wrapper.py
index d077c39..640ac91 100644
--- a/environments/environment_wrapper.py
+++ b/environments/environment_wrapper.py
@@ -13,14 +13,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
-import numpy as np
-from utils import *
-from configurations import Preset
-from renderer import Renderer
 import operator
 import time
 
+import numpy as np
+
+import renderer
+import utils
+
 
 class EnvironmentWrapper(object):
     def __init__(self, tuning_parameters):
@@ -50,7 +50,7 @@ class EnvironmentWrapper(object):
         self.height = 1
         self.is_state_type_image = True
         self.measurements_size = 0
-        self.phase = RunPhase.TRAIN
+        self.phase = utils.RunPhase.TRAIN
         self.tp = tuning_parameters
         self.record_video_every = self.tp.visualization.record_video_every
         self.env_id = self.tp.env.level
@@ -62,7 +62,7 @@ class EnvironmentWrapper(object):
         self.wait_for_explicit_human_action = False
         self.is_rendered = self.is_rendered or self.human_control
         self.game_is_open = True
-        self.renderer = Renderer()
+        self.renderer = renderer.Renderer()
 
     @property
     def measurements(self):
diff --git a/environments/gym_environment_wrapper.py b/environments/gym_environment_wrapper.py
index c821bf8..0ec22d8 100644
--- a/environments/gym_environment_wrapper.py
+++ b/environments/gym_environment_wrapper.py
@@ -13,40 +13,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import random
 
-import sys
-from logger import *
 import gym
 import numpy as np
-import time
-import random
-try:
-    import roboschool
-    from OpenGL import GL
-except ImportError:
-    from logger import failed_imports
-    failed_imports.append("RoboSchool")
 
-try:
-    from gym_extensions.continuous import mujoco
-except:
-    from logger import failed_imports
-    failed_imports.append("GymExtensions")
-
-try:
-    import pybullet_envs
-except ImportError:
-    from logger import failed_imports
-    failed_imports.append("PyBullet")
-
-from gym import wrappers
-from utils import force_list, RunPhase
-from environments.environment_wrapper import EnvironmentWrapper
+from environments import environment_wrapper as ew
+import utils
 
 
-class GymEnvironmentWrapper(EnvironmentWrapper):
+class GymEnvironmentWrapper(ew.EnvironmentWrapper):
     def __init__(self, tuning_parameters):
-        EnvironmentWrapper.__init__(self, tuning_parameters)
+        ew.EnvironmentWrapper.__init__(self, tuning_parameters)
 
         # env parameters
         if ':' in self.env_id:
@@ -124,7 +102,7 @@ class GymEnvironmentWrapper(EnvironmentWrapper):
 
     def _update_state(self):
         if hasattr(self.env, 'env') and hasattr(self.env.env, 'ale'):
-            if self.phase == RunPhase.TRAIN and hasattr(self, 'current_ale_lives'):
+            if self.phase == utils.RunPhase.TRAIN and hasattr(self, 'current_ale_lives'):
                 # signal termination for life loss
                 if self.current_ale_lives != self.env.env.ale.lives():
                     self.done = True
@@ -146,7 +124,7 @@ class GymEnvironmentWrapper(EnvironmentWrapper):
             if type(action_idx) == int and action_idx == 0:
                 # deal with the "reset" action 0
                 action = [0] * self.env.action_space.shape[0]
-            action = np.array(force_list(action))
+            action = np.array(utils.force_list(action))
             # removing redundant dimensions such that the action size will match the expected action size from gym
             if action.shape != self.env.action_space.shape:
                 action = np.squeeze(action)
diff --git a/exploration_policies/__init__.py b/exploration_policies/__init__.py
index 260d958..101794a 100644
--- a/exploration_policies/__init__.py
+++ b/exploration_policies/__init__.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,16 +13,29 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+from exploration_policies.additive_noise import AdditiveNoise
+from exploration_policies.approximated_thompson_sampling_using_dropout import ApproximatedThompsonSamplingUsingDropout
+from exploration_policies.bayesian import Bayesian
+from exploration_policies.boltzmann import Boltzmann
+from exploration_policies.bootstrapped import Bootstrapped
+from exploration_policies.categorical import Categorical
+from exploration_policies.continuous_entropy import ContinuousEntropy
+from exploration_policies.e_greedy import EGreedy
+from exploration_policies.exploration_policy import ExplorationPolicy
+from exploration_policies.greedy import Greedy
+from exploration_policies.ou_process import OUProcess
+from exploration_policies.thompson_sampling import ThompsonSampling
 
-from exploration_policies.additive_noise import *
-from exploration_policies.approximated_thompson_sampling_using_dropout import *
-from exploration_policies.bayesian import *
-from exploration_policies.boltzmann import *
-from exploration_policies.bootstrapped import *
-from exploration_policies.categorical import *
-from exploration_policies.continuous_entropy import *
-from exploration_policies.e_greedy import *
-from exploration_policies.exploration_policy import *
-from exploration_policies.greedy import *
-from exploration_policies.ou_process import *
-from exploration_policies.thompson_sampling import *
+
+__all__ = [AdditiveNoise,
+           ApproximatedThompsonSamplingUsingDropout,
+           Bayesian,
+           Boltzmann,
+           Bootstrapped,
+           Categorical,
+           ContinuousEntropy,
+           EGreedy,
+           ExplorationPolicy,
+           Greedy,
+           OUProcess,
+           ThompsonSampling]
diff --git a/exploration_policies/additive_noise.py b/exploration_policies/additive_noise.py
index d8cd7c9..124daa3 100644
--- a/exploration_policies/additive_noise.py
+++ b/exploration_policies/additive_noise.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,18 +13,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
 import numpy as np
-from exploration_policies.exploration_policy import *
+
+from exploration_policies import exploration_policy
+import utils
 
 
-class AdditiveNoise(ExplorationPolicy):
+class AdditiveNoise(exploration_policy.ExplorationPolicy):
     def __init__(self, tuning_parameters):
         """
         :param tuning_parameters: A Preset class instance with all the running paramaters
         :type tuning_parameters: Preset
         """
-        ExplorationPolicy.__init__(self, tuning_parameters)
+        exploration_policy.ExplorationPolicy.__init__(self, tuning_parameters)
         self.variance = tuning_parameters.exploration.initial_noise_variance_percentage
         self.final_variance = tuning_parameters.exploration.final_noise_variance_percentage
         self.decay_steps = tuning_parameters.exploration.noise_variance_decay_steps
@@ -37,7 +38,7 @@ class AdditiveNoise(ExplorationPolicy):
             self.variance = self.final_variance
 
     def get_action(self, action_values):
-        if self.phase == RunPhase.TRAIN:
+        if self.phase == utils.RunPhase.TRAIN:
             self.decay_exploration()
         action = np.random.normal(action_values, 2 * self.variance * self.action_abs_range)
         return action #np.clip(action, -self.action_abs_range, self.action_abs_range).squeeze()
diff --git a/exploration_policies/approximated_thompson_sampling_using_dropout.py b/exploration_policies/approximated_thompson_sampling_using_dropout.py
index 2a51ae2..13be0d6 100644
--- a/exploration_policies/approximated_thompson_sampling_using_dropout.py
+++ b/exploration_policies/approximated_thompson_sampling_using_dropout.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,17 +13,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import numpy as np
 
-from exploration_policies.exploration_policy import *
+from exploration_policies import exploration_policy
 
 
-class ApproximatedThompsonSamplingUsingDropout(ExplorationPolicy):
+class ApproximatedThompsonSamplingUsingDropout(exploration_policy.ExplorationPolicy):
     def __init__(self, tuning_parameters):
         """
         :param tuning_parameters: A Preset class instance with all the running paramaters
         :type tuning_parameters: Preset
         """
-        ExplorationPolicy.__init__(self, tuning_parameters)
+        exploration_policy.ExplorationPolicy.__init__(self, tuning_parameters)
         self.dropout_discard_probability = tuning_parameters.exploration.dropout_discard_probability
         self.network = tuning_parameters.network
         self.assign_op = self.network.dropout_discard_probability.assign(self.dropout_discard_probability)
diff --git a/exploration_policies/bayesian.py b/exploration_policies/bayesian.py
index f5ab6b3..75f394d 100644
--- a/exploration_policies/bayesian.py
+++ b/exploration_policies/bayesian.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,18 +13,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import numpy as np
 
-from exploration_policies.exploration_policy import *
-import tensorflow as tf
+from exploration_policies import exploration_policy
+import utils
 
 
-class Bayesian(ExplorationPolicy):
+class Bayesian(exploration_policy.ExplorationPolicy):
     def __init__(self, tuning_parameters):
         """
         :param tuning_parameters: A Preset class instance with all the running paramaters
         :type tuning_parameters: Preset
         """
-        ExplorationPolicy.__init__(self, tuning_parameters)
+        exploration_policy.ExplorationPolicy.__init__(self, tuning_parameters)
         self.keep_probability = tuning_parameters.exploration.initial_keep_probability
         self.final_keep_probability = tuning_parameters.exploration.final_keep_probability
         self.keep_probability_decay_delta = (
@@ -40,7 +41,7 @@ class Bayesian(ExplorationPolicy):
             self.keep_probability -= self.keep_probability_decay_delta
 
     def get_action(self, action_values):
-        if self.phase == RunPhase.TRAIN:
+        if self.phase == utils.RunPhase.TRAIN:
             self.decay_keep_probability()
         # dropout = self.network.get_layer('variable_dropout_1')
         # with tf.Session() as sess:
diff --git a/exploration_policies/boltzmann.py b/exploration_policies/boltzmann.py
index de954be..c8bc351 100644
--- a/exploration_policies/boltzmann.py
+++ b/exploration_policies/boltzmann.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,17 +13,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import numpy as np
 
-from exploration_policies.exploration_policy import *
+from exploration_policies import exploration_policy
+import utils
 
-
-class Boltzmann(ExplorationPolicy):
+class Boltzmann(exploration_policy.ExplorationPolicy):
     def __init__(self, tuning_parameters):
         """
         :param tuning_parameters: A Preset class instance with all the running paramaters
         :type tuning_parameters: Preset
         """
-        ExplorationPolicy.__init__(self, tuning_parameters)
+        exploration_policy.ExplorationPolicy.__init__(self, tuning_parameters)
         self.temperature = tuning_parameters.exploration.initial_temperature
         self.final_temperature = tuning_parameters.exploration.final_temperature
         self.temperature_decay_delta = (
@@ -35,7 +36,7 @@ class Boltzmann(ExplorationPolicy):
             self.temperature -= self.temperature_decay_delta
 
     def get_action(self, action_values):
-        if self.phase == RunPhase.TRAIN:
+        if self.phase == utils.RunPhase.TRAIN:
             self.decay_temperature()
         # softmax calculation
         exp_probabilities = np.exp(action_values / self.temperature)
diff --git a/exploration_policies/bootstrapped.py b/exploration_policies/bootstrapped.py
index 14b6d2c..5d17e75 100644
--- a/exploration_policies/bootstrapped.py
+++ b/exploration_policies/bootstrapped.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,17 +13,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import numpy as np
 
-from exploration_policies.e_greedy import *
+from exploration_policies import e_greedy
 
 
-class Bootstrapped(EGreedy):
+class Bootstrapped(e_greedy.EGreedy):
     def __init__(self, tuning_parameters):
         """
         :param tuning_parameters: A Preset class instance with all the running parameters
         :type tuning_parameters: Preset
         """
-        EGreedy.__init__(self, tuning_parameters)
+        e_greedy.EGreedy.__init__(self, tuning_parameters)
         self.num_heads = tuning_parameters.exploration.architecture_num_q_heads
         self.selected_head = 0
 
@@ -31,7 +32,7 @@ class Bootstrapped(EGreedy):
         self.selected_head = np.random.randint(self.num_heads)
 
     def get_action(self, action_values):
-        return EGreedy.get_action(self, action_values[self.selected_head])
+        return e_greedy.EGreedy.get_action(self, action_values[self.selected_head])
 
     def get_control_param(self):
         return self.selected_head
diff --git a/exploration_policies/categorical.py b/exploration_policies/categorical.py
index 1be5509..cf8e0a5 100644
--- a/exploration_policies/categorical.py
+++ b/exploration_policies/categorical.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,17 +13,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import numpy as np
 
-from exploration_policies.exploration_policy import *
+from exploration_policies import exploration_policy
 
 
-class Categorical(ExplorationPolicy):
+class Categorical(exploration_policy.ExplorationPolicy):
     def __init__(self, tuning_parameters):
         """
         :param tuning_parameters: A Preset class instance with all the running paramaters
         :type tuning_parameters: Preset
         """
-        ExplorationPolicy.__init__(self, tuning_parameters)
+        exploration_policy.ExplorationPolicy.__init__(self, tuning_parameters)
 
     def get_action(self, action_values):
         # choose actions according to the probabilities
diff --git a/exploration_policies/continuous_entropy.py b/exploration_policies/continuous_entropy.py
index 8d7ee66..edfc3b5 100644
--- a/exploration_policies/continuous_entropy.py
+++ b/exploration_policies/continuous_entropy.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,10 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
-import numpy as np
-from exploration_policies.exploration_policy import *
+from exploration_policies import exploration_policy
 
 
-class ContinuousEntropy(ExplorationPolicy):
+class ContinuousEntropy(exploration_policy.ExplorationPolicy):
     pass
diff --git a/exploration_policies/e_greedy.py b/exploration_policies/e_greedy.py
index f0cb01f..9aa2dd7 100644
--- a/exploration_policies/e_greedy.py
+++ b/exploration_policies/e_greedy.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,17 +13,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import numpy as np
 
-from exploration_policies.exploration_policy import *
+from exploration_policies import exploration_policy
+import utils
 
 
-class EGreedy(ExplorationPolicy):
+class EGreedy(exploration_policy.ExplorationPolicy):
     def __init__(self, tuning_parameters):
         """
         :param tuning_parameters: A Preset class instance with all the running paramaters
         :type tuning_parameters: Preset
         """
-        ExplorationPolicy.__init__(self, tuning_parameters)
+        exploration_policy.ExplorationPolicy.__init__(self, tuning_parameters)
         self.epsilon = tuning_parameters.exploration.initial_epsilon
         self.final_epsilon = tuning_parameters.exploration.final_epsilon
         self.epsilon_decay_delta = (
@@ -52,9 +54,9 @@ class EGreedy(ExplorationPolicy):
                 self.variance = self.final_variance
 
     def get_action(self, action_values):
-        if self.phase == RunPhase.TRAIN:
+        if self.phase == utils.RunPhase.TRAIN:
             self.decay_exploration()
-        epsilon = self.evaluation_epsilon if self.phase == RunPhase.TEST else self.epsilon
+        epsilon = self.evaluation_epsilon if self.phase == utils.RunPhase.TEST else self.epsilon
 
         if self.discrete_controls:
             top_action = np.argmax(action_values)
@@ -67,4 +69,4 @@ class EGreedy(ExplorationPolicy):
             return np.squeeze(action_values + (np.random.rand() < epsilon) * noise)
 
     def get_control_param(self):
-        return self.evaluation_epsilon if self.phase == RunPhase.TEST else self.epsilon
+        return self.evaluation_epsilon if self.phase == utils.RunPhase.TEST else self.epsilon
diff --git a/exploration_policies/exploration_policy.py b/exploration_policies/exploration_policy.py
index d211054..6a9d9a7 100644
--- a/exploration_policies/exploration_policy.py
+++ b/exploration_policies/exploration_policy.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,10 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
-import numpy as np
-from utils import *
-from configurations import *
+import utils
 
 
 class ExplorationPolicy(object):
@@ -25,7 +22,7 @@ class ExplorationPolicy(object):
         :param tuning_parameters: A Preset class instance with all the running paramaters
         :type tuning_parameters: Preset
         """
-        self.phase = RunPhase.HEATUP
+        self.phase = utils.RunPhase.HEATUP
         self.action_space_size = tuning_parameters.env.action_space_size
         self.action_abs_range = tuning_parameters.env_instance.action_space_abs_range
         self.discrete_controls = tuning_parameters.env_instance.discrete_controls
@@ -39,7 +36,7 @@ class ExplorationPolicy(object):
 
     def get_action(self, action_values):
         """
-        Given a list of values corresponding to each action, 
+        Given a list of values corresponding to each action,
         choose one actions according to the exploration policy
         :param action_values: A list of action values
         :return: The chosen action
@@ -55,4 +52,4 @@ class ExplorationPolicy(object):
         self.phase = phase
 
     def get_control_param(self):
-        return 0
\ No newline at end of file
+        return 0
diff --git a/exploration_policies/greedy.py b/exploration_policies/greedy.py
index 34acf1b..1306731 100644
--- a/exploration_policies/greedy.py
+++ b/exploration_policies/greedy.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,17 +13,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import numpy as np
 
-from exploration_policies.exploration_policy import *
+from exploration_policies import exploration_policy
 
 
-class Greedy(ExplorationPolicy):
+class Greedy(exploration_policy.ExplorationPolicy):
     def __init__(self, tuning_parameters):
         """
         :param tuning_parameters: A Preset class instance with all the running paramaters
         :type tuning_parameters: Preset
         """
-        ExplorationPolicy.__init__(self, tuning_parameters)
+        exploration_policy.ExplorationPolicy.__init__(self, tuning_parameters)
 
     def get_action(self, action_values):
         return np.argmax(action_values)
diff --git a/exploration_policies/ou_process.py b/exploration_policies/ou_process.py
index c7d5851..fcc8a9f 100644
--- a/exploration_policies/ou_process.py
+++ b/exploration_policies/ou_process.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,21 +13,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
 import numpy as np
-from exploration_policies.exploration_policy import *
+
+from exploration_policies import exploration_policy
 
 # Based on on the description in:
 # https://math.stackexchange.com/questions/1287634/implementing-ornstein-uhlenbeck-in-matlab
 
 # Ornstein-Uhlenbeck process
-class OUProcess(ExplorationPolicy):
+class OUProcess(exploration_policy.ExplorationPolicy):
     def __init__(self, tuning_parameters):
         """
         :param tuning_parameters: A Preset class instance with all the running paramaters
         :type tuning_parameters: Preset
         """
-        ExplorationPolicy.__init__(self, tuning_parameters)
+        exploration_policy.ExplorationPolicy.__init__(self, tuning_parameters)
         self.action_space_size = tuning_parameters.env.action_space_size
         self.mu = float(tuning_parameters.exploration.mu) * np.ones(self.action_space_size)
         self.theta = tuning_parameters.exploration.theta
diff --git a/exploration_policies/thompson_sampling.py b/exploration_policies/thompson_sampling.py
index 265ce60..8324d87 100644
--- a/exploration_policies/thompson_sampling.py
+++ b/exploration_policies/thompson_sampling.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,17 +13,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import numpy as np
 
-from exploration_policies.exploration_policy import *
+from exploration_policies import exploration_policy
 
 
-class ThompsonSampling(ExplorationPolicy):
+class ThompsonSampling(exploration_policy.ExplorationPolicy):
     def __init__(self, tuning_parameters):
         """
         :param tuning_parameters: A Preset class instance with all the running paramaters
         :type tuning_parameters: Preset
         """
-        ExplorationPolicy.__init__(self, tuning_parameters)
+        exploration_policy.ExplorationPolicy.__init__(self, tuning_parameters)
         self.action_space_size = tuning_parameters.env.action_space_size
 
     def get_action(self, action_values):
diff --git a/logger.py b/logger.py
index c070cc0..4fca581 100644
--- a/logger.py
+++ b/logger.py
@@ -13,19 +13,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
-from pandas import *
+import datetime
 import os
 import re
-from pprint import pprint
-import threading
-from subprocess import Popen, PIPE
-import time
-import datetime
-from six.moves import input
-from PIL import Image
-from typing import Union
 import shutil
+import time
+import typing
+
+import pandas
+import PIL
+from six.moves import input
 
 global failed_imports
 failed_imports = []
@@ -90,7 +87,7 @@ class ScreenLogger(object):
     def ask_input(self, title):
         return input("{}{}{}".format(Colors.BG_CYAN, title, Colors.END))
 
-    def ask_yes_no(self, title: str, default: Union[None, bool]=None):
+    def ask_yes_no(self, title: str, default: typing.Union[None, bool]=None):
         """
         Ask the user for a yes / no question and return True if the answer is yes and False otherwise.
         The function will keep asking the user for an answer until he answers one of the possible responses.
@@ -156,7 +153,7 @@ class BaseLogger(object):
 class Logger(BaseLogger):
     def __init__(self):
         BaseLogger.__init__(self)
-        self.data = DataFrame()
+        self.data = pandas.DataFrame()
         self.csv_path = ''
         self.doc_path = ''
         self.aggregated_data_across_threads = None
@@ -249,7 +246,7 @@ class Logger(BaseLogger):
         if not os.path.exists(output_dir):
             os.makedirs(output_dir)
         output_path = os.path.join(output_dir, output_file)
-        pil_images = [Image.fromarray(image) for image in images]
+        pil_images = [PIL.Image.fromarray(image) for image in images]
         pil_images[0].save(output_path, save_all=True, append_images=pil_images[1:], duration=1.0 / fps, loop=0)
 
     def remove_experiment_dir(self):
diff --git a/memories/__init__.py b/memories/__init__.py
index 29a0894..0468453 100644
--- a/memories/__init__.py
+++ b/memories/__init__.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,7 +13,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+from memories.differentiable_neural_dictionary import AnnoyDictionary
+from memories.differentiable_neural_dictionary import AnnoyIndex
+from memories.differentiable_neural_dictionary import QDND
+from memories.episodic_experience_replay import EpisodicExperienceReplay
+from memories.memory import Episode
+from memories.memory import Memory
+from memories.memory import Transition
 
-from memories.differentiable_neural_dictionary import *
-from memories.episodic_experience_replay import *
-from memories.memory import *
+__all__ = [AnnoyDictionary,
+           AnnoyIndex,
+           Episode,
+           EpisodicExperienceReplay,
+           Memory,
+           QDND,
+           Transition]
diff --git a/memories/differentiable_neural_dictionary.py b/memories/differentiable_neural_dictionary.py
index 1a1fdc7..d5f0246 100644
--- a/memories/differentiable_neural_dictionary.py
+++ b/memories/differentiable_neural_dictionary.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,10 +13,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import os
+import pickle
 
 import numpy as np
 from annoy import AnnoyIndex
-import os, pickle
 
 
 class AnnoyDictionary(object):
diff --git a/memories/episodic_experience_replay.py b/memories/episodic_experience_replay.py
index 5930d78..936b084 100644
--- a/memories/episodic_experience_replay.py
+++ b/memories/episodic_experience_replay.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,24 +13,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import typing
 
-from memories.memory import *
-import threading
-from typing import Union
+import numpy as np
+
+from memories import memory
 
 
-class EpisodicExperienceReplay(Memory):
+class EpisodicExperienceReplay(memory.Memory):
     def __init__(self, tuning_parameters):
         """
         :param tuning_parameters: A Preset class instance with all the running paramaters
         :type tuning_parameters: Preset
         """
-        Memory.__init__(self, tuning_parameters)
+        memory.Memory.__init__(self, tuning_parameters)
         self.tp = tuning_parameters
         self.max_size_in_episodes = tuning_parameters.agent.num_episodes_in_experience_replay
         self.max_size_in_transitions = tuning_parameters.agent.num_transitions_in_experience_replay
         self.discount = tuning_parameters.agent.discount
-        self.buffer = [Episode()]  # list of episodes
+        self.buffer = [memory.Episode()]  # list of episodes
         self.transitions = []
         self._length = 1
         self._num_transitions = 0
@@ -96,7 +97,7 @@ class EpisodicExperienceReplay(Memory):
 
     def store(self, transition):
         if len(self.buffer) == 0:
-            self.buffer.append(Episode())
+            self.buffer.append(memory.Episode())
         last_episode = self.buffer[-1]
         last_episode.insert(transition)
         self.transitions.append(transition)
@@ -109,7 +110,7 @@ class EpisodicExperienceReplay(Memory):
                                            n_step_return=self.tp.agent.n_step)
             self.buffer[-1].update_measurements_targets(self.tp.agent.num_predicted_steps_ahead)
             # self.buffer[-1].update_actions_probabilities() # used for off-policy policy optimization
-            self.buffer.append(Episode())
+            self.buffer.append(memory.Episode())
 
         self.enforce_length()
 
@@ -148,7 +149,7 @@ class EpisodicExperienceReplay(Memory):
     def get(self, index):
         return self.get_episode(index)
 
-    def get_last_complete_episode(self) -> Union[None, Episode]:
+    def get_last_complete_episode(self) -> typing.Union[None, memory.Episode]:
         """
         Returns the last complete episode in the memory or None if there are no complete episodes
         :return: None or the last complete episode
@@ -170,7 +171,7 @@ class EpisodicExperienceReplay(Memory):
 
     def clean(self):
         self.transitions = []
-        self.buffer = [Episode()]
+        self.buffer = [memory.Episode()]
         self._length = 1
         self._num_transitions = 0
         self._num_transitions_in_complete_episodes = 0
diff --git a/memories/memory.py b/memories/memory.py
index f4c6a87..88be6c5 100644
--- a/memories/memory.py
+++ b/memories/memory.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,10 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
 import numpy as np
-import copy
-from configurations import *
 
 
 class Memory(object):
diff --git a/parallel_actor.py b/parallel_actor.py
index c988e49..581a357 100644
--- a/parallel_actor.py
+++ b/parallel_actor.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2017 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,19 +13,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
 import argparse
-import tensorflow as tf
-from architectures import *
-from environments import *
-from agents import *
-from utils import *
+import os
 import time
-import copy
-from logger import *
-from configurations import *
-from presets import *
-import shutil
+
+import tensorflow as tf
+
+import agents
+import environments
+import logger
+import presets
 
 start_time = time.time()
 
@@ -66,15 +63,15 @@ if __name__ == "__main__":
 
     elif args.job_name == "worker":
         # get tuning parameters
-        tuning_parameters = json_to_preset(args.load_json_path)
+        tuning_parameters = presets.json_to_preset(args.load_json_path)
 
         # dump documentation
         if not os.path.exists(tuning_parameters.experiment_path):
             os.makedirs(tuning_parameters.experiment_path)
         if tuning_parameters.evaluate_only:
-            logger.set_dump_dir(tuning_parameters.experiment_path, tuning_parameters.task_id, filename='evaluator')
+            logger.logger.set_dump_dir(tuning_parameters.experiment_path, tuning_parameters.task_id, filename='evaluator')
         else:
-            logger.set_dump_dir(tuning_parameters.experiment_path, tuning_parameters.task_id)
+            logger.logger.set_dump_dir(tuning_parameters.experiment_path, tuning_parameters.task_id)
 
         # multi-threading parameters
         tuning_parameters.start_time = start_time
@@ -98,8 +95,8 @@ if __name__ == "__main__":
                                                 cluster=cluster)
 
         # create the agent and the environment
-        env_instance = create_environment(tuning_parameters)
-        exec('agent = ' + tuning_parameters.agent.type + '(env_instance, tuning_parameters, replicated_device=device, '
+        env_instance = environments.create_environment(tuning_parameters)
+        exec('agent = agents.' + tuning_parameters.agent.type + '(env_instance, tuning_parameters, replicated_device=device, '
                                                     'thread_id=tuning_parameters.task_id)')
 
         # building the scaffold
@@ -169,6 +166,6 @@ if __name__ == "__main__":
         else:
             agent.improve()
     else:
-        screen.error("Invalid mode requested for parallel_actor.")
+        logger.screen.error("Invalid mode requested for parallel_actor.")
         exit(1)
 
diff --git a/plot_atari.py b/plot_atari.py
index 8732fb0..7241000 100644
--- a/plot_atari.py
+++ b/plot_atari.py
@@ -1,8 +1,10 @@
 import argparse
+import os
+
 import matplotlib
 import matplotlib.pyplot as plt
+
 from dashboard import SignalsFile
-import os
 
 
 class FigureMaker(object):
diff --git a/presets.py b/presets.py
index dcfb765..3e24c38 100644
--- a/presets.py
+++ b/presets.py
@@ -13,37 +13,43 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
-from configurations import *
 import ast
+import json
 import sys
 
+import agents
+import configurations as conf
+import environments as env
+import exploration_policies as ep
+import presets
+
 
 def json_to_preset(json_path):
     with open(json_path, 'r') as json_file:
         run_dict = json.loads(json_file.read())
 
     if run_dict['preset'] is None:
-        tuning_parameters = Preset(eval(run_dict['agent_type']), eval(run_dict['environment_type']),
-                                   eval(run_dict['exploration_policy_type']))
+        tuning_parameters = conf.Preset(eval('agents.' + run_dict['agent_type']),
+                                        eval('env.' + run_dict['environment_type']),
+                                        eval('ep.' + run_dict['exploration_policy_type']))
     else:
-        tuning_parameters = eval(run_dict['preset'])()
+        tuning_parameters = eval('presets.' + run_dict['preset'])()
         # Override existing parts of the preset
         if run_dict['agent_type'] is not None:
-            tuning_parameters.agent = eval(run_dict['agent_type'])()
+            tuning_parameters.agent = eval('agents.' + run_dict['agent_type'])()
 
         if run_dict['environment_type'] is not None:
-            tuning_parameters.env = eval(run_dict['environment_type'])()
+            tuning_parameters.env = eval('env.' + run_dict['environment_type'])()
 
         if run_dict['exploration_policy_type'] is not None:
-            tuning_parameters.exploration = eval(run_dict['exploration_policy_type'])()
+            tuning_parameters.exploration = eval('ep.' + run_dict['exploration_policy_type'])()
 
     # human control
     if run_dict['play']:
         tuning_parameters.agent.type = 'HumanAgent'
         tuning_parameters.env.human_control = True
         tuning_parameters.num_heatup_steps = 0
-        
+
     if run_dict['level']:
         tuning_parameters.env.level = run_dict['level']
 
@@ -69,9 +75,9 @@ def json_to_preset(json_path):
     return tuning_parameters
 
 
-class Doom_Basic_DQN(Preset):
+class Doom_Basic_DQN(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, DQN, Doom, ExplorationParameters)
+        conf.Preset.__init__(self, conf.DQN, conf.Doom, conf.ExplorationParameters)
         self.env.level = 'basic'
         self.agent.num_episodes_in_experience_replay = 200
         self.learning_rate = 0.00025
@@ -79,9 +85,9 @@ class Doom_Basic_DQN(Preset):
         self.num_heatup_steps = 1000
 
 
-class Doom_Basic_QRDQN(Preset):
+class Doom_Basic_QRDQN(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, QuantileRegressionDQN, Doom, ExplorationParameters)
+        conf.Preset.__init__(self, conf.QuantileRegressionDQN, conf.Doom, conf.ExplorationParameters)
         self.env.level = 'basic'
         self.agent.num_steps_between_copying_online_weights_to_target = 1000
         self.learning_rate = 0.00025
@@ -89,9 +95,9 @@ class Doom_Basic_QRDQN(Preset):
         self.num_heatup_steps = 1000
 
 
-class Doom_Basic_OneStepQ(Preset):
+class Doom_Basic_OneStepQ(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, NStepQ, Doom, ExplorationParameters)
+        conf.Preset.__init__(self, conf.NStepQ, conf.Doom, conf.ExplorationParameters)
         self.env.level = 'basic'
         self.learning_rate = 0.00025
         self.num_heatup_steps = 0
@@ -101,9 +107,9 @@ class Doom_Basic_OneStepQ(Preset):
         self.agent.targets_horizon = '1-Step'
 
 
-class Doom_Basic_NStepQ(Preset):
+class Doom_Basic_NStepQ(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, NStepQ, Doom, ExplorationParameters)
+        conf.Preset.__init__(self, conf.NStepQ, conf.Doom, conf.ExplorationParameters)
         self.env.level = 'basic'
         self.learning_rate = 0.000025
         self.num_heatup_steps = 0
@@ -112,9 +118,9 @@ class Doom_Basic_NStepQ(Preset):
         self.clip_gradients = 1000
 
 
-class Doom_Basic_A2C(Preset):
+class Doom_Basic_A2C(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, ActorCritic, Doom, CategoricalExploration)
+        conf.Preset.__init__(self, conf.ActorCritic, conf.Doom, conf.CategoricalExploration)
         self.env.level = 'basic'
         self.agent.policy_gradient_rescaler = 'A_VALUE'
         self.learning_rate = 0.00025
@@ -122,19 +128,19 @@ class Doom_Basic_A2C(Preset):
         self.env.reward_scaling = 100.
 
 
-class Doom_Basic_Dueling_DDQN(Preset):
+class Doom_Basic_Dueling_DDQN(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, DDQN, Doom, ExplorationParameters)
+        conf.Preset.__init__(self, conf.DDQN, conf.Doom, conf.ExplorationParameters)
         self.env.level = 'basic'
-        self.agent.output_types = [OutputTypes.DuelingQ]
+        self.agent.output_types = [conf.OutputTypes.DuelingQ]
         self.agent.num_episodes_in_experience_replay = 200
         self.learning_rate = 0.00025
         self.agent.num_steps_between_copying_online_weights_to_target = 1000
         self.num_heatup_steps = 1000
 
-class Doom_Basic_Dueling_DQN(Preset):
+class Doom_Basic_Dueling_DQN(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, DuelingDQN, Doom, ExplorationParameters)
+        conf.Preset.__init__(self, conf.DuelingDQN, conf.Doom, conf.ExplorationParameters)
         self.env.level = 'basic'
         self.agent.num_episodes_in_experience_replay = 200
         self.learning_rate = 0.00025
@@ -142,11 +148,11 @@ class Doom_Basic_Dueling_DQN(Preset):
         self.num_heatup_steps = 1000
 
 
-class CartPole_Dueling_DDQN(Preset):
+class CartPole_Dueling_DDQN(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, DDQN, GymVectorObservation, ExplorationParameters)
+        conf.Preset.__init__(self, conf.DDQN, conf.GymVectorObservation, conf.ExplorationParameters)
         self.env.level = 'CartPole-v0'
-        self.agent.output_types = [OutputTypes.DuelingQ]
+        self.agent.output_types = [conf.OutputTypes.DuelingQ]
         self.agent.num_episodes_in_experience_replay = 200
         self.learning_rate = 0.00025
         self.agent.num_steps_between_copying_online_weights_to_target = 100
@@ -159,9 +165,9 @@ class CartPole_Dueling_DDQN(Preset):
         self.test_min_return_threshold = 150
 
 
-class Doom_Health_MMC(Preset):
+class Doom_Health_MMC(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, MMC, Doom, ExplorationParameters)
+        conf.Preset.__init__(self, conf.MMC, conf.Doom, conf.ExplorationParameters)
         self.env.level = 'HEALTH_GATHERING'
         self.agent.num_episodes_in_experience_replay = 200
         self.learning_rate = 0.00025
@@ -169,9 +175,9 @@ class Doom_Health_MMC(Preset):
         self.num_heatup_steps = 1000
         self.exploration.epsilon_decay_steps = 10000
 
-class CartPole_MMC(Preset):
+class CartPole_MMC(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, MMC, GymVectorObservation, ExplorationParameters)
+        conf.Preset.__init__(self, conf.MMC, conf.GymVectorObservation, conf.ExplorationParameters)
         self.env.level = 'CartPole-v0'
         self.agent.num_steps_between_copying_online_weights_to_target = 100
         self.learning_rate = 0.00025
@@ -185,9 +191,9 @@ class CartPole_MMC(Preset):
         self.test_min_return_threshold = 150
 
 
-class CartPole_PAL(Preset):
+class CartPole_PAL(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, PAL, GymVectorObservation, ExplorationParameters)
+        conf.Preset.__init__(self, conf.PAL, conf.GymVectorObservation, conf.ExplorationParameters)
         self.env.level = 'CartPole-v0'
         self.agent.num_steps_between_copying_online_weights_to_target = 100
         self.learning_rate = 0.00025
@@ -200,9 +206,9 @@ class CartPole_PAL(Preset):
         self.test_max_step_threshold = 100
         self.test_min_return_threshold = 150
 
-class Doom_Basic_DFP(Preset):
+class Doom_Basic_DFP(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, DFP, Doom, ExplorationParameters)
+        conf.Preset.__init__(self, conf.DFP, conf.Doom, conf.ExplorationParameters)
         self.env.level = 'BASIC'
         self.agent.num_episodes_in_experience_replay = 200
         self.learning_rate = 0.0001
@@ -213,9 +219,9 @@ class Doom_Basic_DFP(Preset):
         # self.agent.num_consecutive_playing_steps = 10
 
 
-class Doom_Health_DFP(Preset):
+class Doom_Health_DFP(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, DFP, Doom, ExplorationParameters)
+        conf.Preset.__init__(self, conf.DFP, conf.Doom, conf.ExplorationParameters)
         self.env.level = 'HEALTH_GATHERING'
         self.agent.num_episodes_in_experience_replay = 200
         self.learning_rate = 0.00025
@@ -224,9 +230,9 @@ class Doom_Health_DFP(Preset):
         self.agent.use_accumulated_reward_as_measurement = True
 
 
-class Doom_Deadly_Corridor_Bootstrapped_DQN(Preset):
+class Doom_Deadly_Corridor_Bootstrapped_DQN(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, BootstrappedDQN, Doom, BootstrappedDQNExploration)
+        conf.Preset.__init__(self, conf.BootstrappedDQN, conf.Doom, conf.BootstrappedDQNExploration)
         self.env.level = 'deadly_corridor'
         self.agent.num_episodes_in_experience_replay = 200
         self.learning_rate = 0.00025
@@ -234,9 +240,9 @@ class Doom_Deadly_Corridor_Bootstrapped_DQN(Preset):
         self.num_heatup_steps = 1000
 
 
-class CartPole_Bootstrapped_DQN(Preset):
+class CartPole_Bootstrapped_DQN(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, BootstrappedDQN, GymVectorObservation, BootstrappedDQNExploration)
+        conf.Preset.__init__(self, conf.BootstrappedDQN, conf.GymVectorObservation, conf.BootstrappedDQNExploration)
         self.env.level = 'CartPole-v0'
         self.agent.num_steps_between_copying_online_weights_to_target = 200
         self.learning_rate = 0.00025
@@ -249,9 +255,9 @@ class CartPole_Bootstrapped_DQN(Preset):
         self.test_max_step_threshold = 200
         self.test_min_return_threshold = 150
 
-class CartPole_PG(Preset):
+class CartPole_PG(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, PolicyGradient, GymVectorObservation, CategoricalExploration)
+        conf.Preset.__init__(self, conf.PolicyGradient, conf.GymVectorObservation, conf.CategoricalExploration)
         self.env.level = 'CartPole-v0'
         self.agent.policy_gradient_rescaler = 'FUTURE_RETURN_NORMALIZED_BY_TIMESTEP'
         self.learning_rate = 0.001
@@ -263,9 +269,9 @@ class CartPole_PG(Preset):
         self.test_min_return_threshold = 150
 
 
-class CartPole_PPO(Preset):
+class CartPole_PPO(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, PPO, GymVectorObservation, CategoricalExploration)
+        conf.Preset.__init__(self, conf.PPO, conf.GymVectorObservation, conf.CategoricalExploration)
         self.env.level = 'CartPole-v0'
         self.learning_rate = 0.0001
         self.num_heatup_steps = 0
@@ -281,9 +287,9 @@ class CartPole_PPO(Preset):
         self.test_max_step_threshold = 200
         self.test_min_return_threshold = 150
 
-class CartPole_ClippedPPO(Preset):
+class CartPole_ClippedPPO(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, ClippedPPO, GymVectorObservation, CategoricalExploration)
+        conf.Preset.__init__(self, conf.ClippedPPO, conf.GymVectorObservation, conf.CategoricalExploration)
         self.env.level = 'CartPole-v0'
         self.learning_rate = 0.0001
         self.num_heatup_steps = 0
@@ -301,9 +307,9 @@ class CartPole_ClippedPPO(Preset):
         self.test_max_step_threshold = 200
         self.test_min_return_threshold = 150
 
-class CartPole_A2C(Preset):
+class CartPole_A2C(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, ActorCritic, GymVectorObservation, CategoricalExploration)
+        conf.Preset.__init__(self, conf.ActorCritic, conf.GymVectorObservation, conf.CategoricalExploration)
         self.env.level = 'CartPole-v0'
         self.agent.policy_gradient_rescaler = 'A_VALUE'
         self.learning_rate = 0.001
@@ -316,9 +322,9 @@ class CartPole_A2C(Preset):
         self.test_min_return_threshold = 150
 
 
-class CartPole_OneStepQ(Preset):
+class CartPole_OneStepQ(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, NStepQ, GymVectorObservation, ExplorationParameters)
+        conf.Preset.__init__(self, conf.NStepQ, conf.GymVectorObservation, conf.ExplorationParameters)
         self.env.level = 'CartPole-v0'
         self.agent.num_steps_between_copying_online_weights_to_target = 100
         self.learning_rate = 0.0001
@@ -327,9 +333,9 @@ class CartPole_OneStepQ(Preset):
         self.agent.targets_horizon = '1-Step'
 
 
-class CartPole_NStepQ(Preset):
+class CartPole_NStepQ(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, NStepQ, GymVectorObservation, ExplorationParameters)
+        conf.Preset.__init__(self, conf.NStepQ, conf.GymVectorObservation, conf.ExplorationParameters)
         self.env.level = 'CartPole-v0'
         self.agent.num_steps_between_copying_online_weights_to_target = 100
         self.learning_rate = 0.0001
@@ -343,9 +349,9 @@ class CartPole_NStepQ(Preset):
         self.test_min_return_threshold = 150
         self.test_num_workers = 8
 
-class CartPole_DQN(Preset):
+class CartPole_DQN(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, DQN, GymVectorObservation, ExplorationParameters)
+        conf.Preset.__init__(self, conf.DQN, conf.GymVectorObservation, conf.ExplorationParameters)
         self.env.level = 'CartPole-v0'
         self.agent.num_steps_between_copying_online_weights_to_target = 100
         self.learning_rate = 0.00025
@@ -359,9 +365,9 @@ class CartPole_DQN(Preset):
         self.test_min_return_threshold = 150
 
 
-class CartPole_C51(Preset):
+class CartPole_C51(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, CategoricalDQN, GymVectorObservation, ExplorationParameters)
+        conf.Preset.__init__(self, conf.CategoricalDQN, conf.GymVectorObservation, conf.ExplorationParameters)
         self.env.level = 'CartPole-v0'
         self.agent.num_steps_between_copying_online_weights_to_target = 100
         self.learning_rate = 0.00025
@@ -378,9 +384,9 @@ class CartPole_C51(Preset):
         self.test_min_return_threshold = 150
 
 
-class CartPole_QRDQN(Preset):
+class CartPole_QRDQN(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, QuantileRegressionDQN, GymVectorObservation, ExplorationParameters)
+        conf.Preset.__init__(self, conf.QuantileRegressionDQN, conf.GymVectorObservation, conf.ExplorationParameters)
         self.env.level = 'CartPole-v0'
         self.agent.num_steps_between_copying_online_weights_to_target = 100
         self.learning_rate = 0.00025
@@ -394,9 +400,9 @@ class CartPole_QRDQN(Preset):
 # This a very resource intensive preset, and might easily blow up your RAM (> 100GB of usage).
 # Try reducing the number of transitions in the experience replay (50e3 might be a reasonable number to start with),
 # so to make sure it fits your RAM.
-class Breakout_DQN(Preset):
+class Breakout_DQN(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, DQN, Atari, ExplorationParameters)
+        conf.Preset.__init__(self, conf.DQN, conf.Atari, conf.ExplorationParameters)
         self.env.level = 'BreakoutDeterministic-v4'
         self.agent.num_steps_between_copying_online_weights_to_target = 10000
         self.learning_rate = 0.00025
@@ -415,9 +421,9 @@ class Breakout_DQN(Preset):
         # self.rescaling_interpolation_type = 'nearest' # TODO: remove
 
 
-class Breakout_DDQN(Preset):
+class Breakout_DDQN(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, DDQN, Atari, ExplorationParameters)
+        conf.Preset.__init__(self, conf.DDQN, conf.Atari, conf.ExplorationParameters)
         self.env.level = 'BreakoutDeterministic-v4'
         self.agent.num_steps_between_copying_online_weights_to_target = 30000
         self.learning_rate = 0.00025
@@ -434,11 +440,11 @@ class Breakout_DDQN(Preset):
         self.agent.replace_mse_with_huber_loss = True
 
 
-class Breakout_Dueling_DDQN(Preset):
+class Breakout_Dueling_DDQN(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, DDQN, Atari, ExplorationParameters)
+        conf.Preset.__init__(self, conf.DDQN, conf.Atari, conf.ExplorationParameters)
         self.env.level = 'BreakoutDeterministic-v4'
-        self.agent.output_types = [OutputTypes.DuelingQ]
+        self.agent.output_types = [conf.OutputTypes.DuelingQ]
         self.agent.num_steps_between_copying_online_weights_to_target = 30000
         self.learning_rate = 0.00025
         self.agent.num_transitions_in_experience_replay = 1000000
@@ -453,9 +459,9 @@ class Breakout_Dueling_DDQN(Preset):
         self.evaluate_every_x_episodes = 25
         self.agent.replace_mse_with_huber_loss = True
 
-class Alien_DQN(Preset):
+class Alien_DQN(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, DQN, Atari, ExplorationParameters)
+        conf.Preset.__init__(self, conf.DQN, conf.Atari, conf.ExplorationParameters)
         self.env.level = 'AlienDeterministic-v4'
         self.agent.num_steps_between_copying_online_weights_to_target = 10000
         self.learning_rate = 0.00025
@@ -470,9 +476,9 @@ class Alien_DQN(Preset):
         self.evaluate_every_x_episodes = 5
 
 
-class Breakout_C51(Preset):
+class Breakout_C51(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, CategoricalDQN, Atari, ExplorationParameters)
+        conf.Preset.__init__(self, conf.CategoricalDQN, conf.Atari, conf.ExplorationParameters)
         self.env.level = 'BreakoutDeterministic-v4'
         self.agent.num_steps_between_copying_online_weights_to_target = 10000
         self.learning_rate = 0.00025
@@ -490,9 +496,9 @@ class Breakout_C51(Preset):
 
 
 
-class Breakout_QRDQN(Preset):
+class Breakout_QRDQN(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, QuantileRegressionDQN, Atari, ExplorationParameters)
+        conf.Preset.__init__(self, conf.QuantileRegressionDQN, conf.Atari, conf.ExplorationParameters)
         self.env.level = 'BreakoutDeterministic-v4'
         self.agent.num_steps_between_copying_online_weights_to_target = 10000
         self.learning_rate = 0.00025
@@ -507,9 +513,9 @@ class Breakout_QRDQN(Preset):
         self.evaluate_every_x_episodes = 50
 
 
-class Atari_DQN_TestBench(Preset):
+class Atari_DQN_TestBench(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, DQN, Atari, ExplorationParameters)
+        conf.Preset.__init__(self, conf.DQN, conf.Atari, conf.ExplorationParameters)
         self.env.level = 'BreakoutDeterministic-v4'
         self.agent.num_steps_between_copying_online_weights_to_target = 10000
         self.learning_rate = 0.00025
@@ -525,9 +531,9 @@ class Atari_DQN_TestBench(Preset):
         self.num_training_iterations = 500
 
 
-class Doom_Basic_PG(Preset):
+class Doom_Basic_PG(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, PolicyGradient, Doom, CategoricalExploration)
+        conf.Preset.__init__(self, conf.PolicyGradient, conf.Doom, conf.CategoricalExploration)
         self.env.level = 'basic'
         self.agent.policy_gradient_rescaler = 'FUTURE_RETURN_NORMALIZED_BY_TIMESTEP'
         self.learning_rate = 0.00001
@@ -535,18 +541,18 @@ class Doom_Basic_PG(Preset):
         self.agent.beta_entropy = 0.01
 
 
-class InvertedPendulum_PG(Preset):
+class InvertedPendulum_PG(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, PolicyGradient, GymVectorObservation, AdditiveNoiseExploration)
+        conf.Preset.__init__(self, conf.PolicyGradient, conf.GymVectorObservation, conf.AdditiveNoiseExploration)
         self.env.level = 'InvertedPendulum-v1'
         self.agent.policy_gradient_rescaler = 'FUTURE_RETURN_NORMALIZED_BY_TIMESTEP'
         self.learning_rate = 0.001
         self.num_heatup_steps = 0
 
 
-class Pendulum_PG(Preset):
+class Pendulum_PG(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, PolicyGradient, GymVectorObservation, AdditiveNoiseExploration)
+        conf.Preset.__init__(self, conf.PolicyGradient, conf.GymVectorObservation, conf.AdditiveNoiseExploration)
         self.env.level = 'Pendulum-v0'
         self.agent.policy_gradient_rescaler = 'FUTURE_RETURN_NORMALIZED_BY_TIMESTEP'
         self.learning_rate = 0.001
@@ -554,9 +560,9 @@ class Pendulum_PG(Preset):
         self.agent.apply_gradients_every_x_episodes = 10
 
 
-class Pendulum_DDPG(Preset):
+class Pendulum_DDPG(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, DDPG, GymVectorObservation, AdditiveNoiseExploration)
+        conf.Preset.__init__(self, conf.DDPG, conf.GymVectorObservation, conf.AdditiveNoiseExploration)
         self.env.level = 'Pendulum-v0'
         self.learning_rate = 0.001
         self.num_heatup_steps = 1000
@@ -567,18 +573,18 @@ class Pendulum_DDPG(Preset):
         self.test_min_return_threshold = -250
 
 
-class InvertedPendulum_DDPG(Preset):
+class InvertedPendulum_DDPG(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, DDPG, GymVectorObservation, OUExploration)
+        conf.Preset.__init__(self, conf.DDPG, conf.GymVectorObservation, conf.OUExploration)
         self.env.level = 'InvertedPendulum-v1'
         self.learning_rate = 0.00025
         self.num_heatup_steps = 100
         self.env.normalize_observation = True
 
 
-class InvertedPendulum_PPO(Preset):
+class InvertedPendulum_PPO(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, PPO, GymVectorObservation, ExplorationParameters)
+        conf.Preset.__init__(self, conf.PPO, conf.GymVectorObservation, conf.ExplorationParameters)
         self.env.level = 'InvertedPendulum-v1'
         self.learning_rate = 0.001
         self.num_heatup_steps = 0
@@ -595,9 +601,9 @@ class InvertedPendulum_PPO(Preset):
         self.env.normalize_observation = True
 
 
-class Pendulum_ClippedPPO(Preset):
+class Pendulum_ClippedPPO(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, ClippedPPO, GymVectorObservation, ExplorationParameters)
+        conf.Preset.__init__(self, conf.ClippedPPO, conf.GymVectorObservation, conf.ExplorationParameters)
         self.env.level = 'Pendulum-v0'
         self.learning_rate = 0.00005
         self.num_heatup_steps = 0
@@ -613,9 +619,9 @@ class Pendulum_ClippedPPO(Preset):
         self.agent.beta_entropy = 0.01
 
 
-class Hopper_DPPO(Preset):
+class Hopper_DPPO(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, PPO, GymVectorObservation, ExplorationParameters)
+        conf.Preset.__init__(self, conf.PPO, conf.GymVectorObservation, conf.ExplorationParameters)
         self.env.level = 'Hopper-v1'
         self.learning_rate = 0.00001
         self.num_heatup_steps = 0
@@ -631,9 +637,9 @@ class Hopper_DPPO(Preset):
         self.env.normalize_observation = True
 
 
-class InvertedPendulum_ClippedPPO(Preset):
+class InvertedPendulum_ClippedPPO(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, ClippedPPO, GymVectorObservation, ExplorationParameters)
+        conf.Preset.__init__(self, conf.ClippedPPO, conf.GymVectorObservation, conf.ExplorationParameters)
         self.env.level = 'InvertedPendulum-v1'
         self.learning_rate = 0.00005
         self.num_heatup_steps = 0
@@ -647,9 +653,9 @@ class InvertedPendulum_ClippedPPO(Preset):
         self.agent.optimizer_type = 'Adam'
         self.env.normalize_observation = True
 
-class Humanoid_ClippedPPO(Preset):
+class Humanoid_ClippedPPO(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, ClippedPPO, GymVectorObservation, ExplorationParameters)
+        conf.Preset.__init__(self, conf.ClippedPPO, conf.GymVectorObservation, conf.ExplorationParameters)
         self.env.level = 'Humanoid-v1'
         self.learning_rate = 0.0001
         self.num_heatup_steps = 0
@@ -664,9 +670,9 @@ class Humanoid_ClippedPPO(Preset):
         self.env.normalize_observation = True
 
 
-class Hopper_ClippedPPO(Preset):
+class Hopper_ClippedPPO(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, ClippedPPO, GymVectorObservation, ExplorationParameters)
+        conf.Preset.__init__(self, conf.ClippedPPO, conf.GymVectorObservation, conf.ExplorationParameters)
         self.env.level = 'Hopper-v1'
         self.learning_rate = 0.0001
         self.num_heatup_steps = 0
@@ -681,9 +687,9 @@ class Hopper_ClippedPPO(Preset):
         self.env.normalize_observation = True
 
 
-class InvertedPendulum_ClippedPPO_Roboschool(Preset):
+class InvertedPendulum_ClippedPPO_Roboschool(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, ClippedPPO, Roboschool, ExplorationParameters)
+        conf.Preset.__init__(self, conf.ClippedPPO, conf.Roboschool, conf.ExplorationParameters)
         self.env.level = 'RoboschoolInvertedPendulum-v1'
         self.learning_rate = 0.0001
         self.num_heatup_steps = 0
@@ -698,9 +704,9 @@ class InvertedPendulum_ClippedPPO_Roboschool(Preset):
         self.env.normalize_observation = True
 
 
-class HalfCheetah_ClippedPPO_Roboschool(Preset):
+class HalfCheetah_ClippedPPO_Roboschool(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, ClippedPPO, Roboschool, ExplorationParameters)
+        conf.Preset.__init__(self, conf.ClippedPPO, conf.Roboschool, conf.ExplorationParameters)
         self.env.level = 'RoboschoolHalfCheetah-v1'
         self.learning_rate = 0.0001
         self.num_heatup_steps = 0
@@ -715,9 +721,9 @@ class HalfCheetah_ClippedPPO_Roboschool(Preset):
         self.env.normalize_observation = True
 
 
-class Hopper_ClippedPPO_Roboschool(Preset):
+class Hopper_ClippedPPO_Roboschool(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, ClippedPPO, Roboschool, ExplorationParameters)
+        conf.Preset.__init__(self, conf.ClippedPPO, conf.Roboschool, conf.ExplorationParameters)
         self.env.level = 'RoboschoolHopper-v1'
         self.learning_rate = 0.0001
         self.num_heatup_steps = 0
@@ -732,9 +738,9 @@ class Hopper_ClippedPPO_Roboschool(Preset):
         self.env.normalize_observation = True
 
 
-class Ant_ClippedPPO(Preset):
+class Ant_ClippedPPO(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, ClippedPPO, GymVectorObservation, ExplorationParameters)
+        conf.Preset.__init__(self, conf.ClippedPPO, conf.GymVectorObservation, conf.ExplorationParameters)
         self.env.level = 'Ant-v1'
         self.learning_rate = 0.0001
         self.num_heatup_steps = 0
@@ -749,9 +755,9 @@ class Ant_ClippedPPO(Preset):
         self.env.normalize_observation = True
 
 
-class Hopper_ClippedPPO_Distributed(Preset):
+class Hopper_ClippedPPO_Distributed(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, ClippedPPO, GymVectorObservation, ExplorationParameters)
+        conf.Preset.__init__(self, conf.ClippedPPO, conf.GymVectorObservation, conf.ExplorationParameters)
         self.env.level = 'Hopper-v1'
         self.learning_rate = 0.00001
         self.num_heatup_steps = 0
@@ -766,17 +772,17 @@ class Hopper_ClippedPPO_Distributed(Preset):
         self.env.normalize_observation = True
 
 
-class Hopper_DDPG_Roboschool(Preset):
+class Hopper_DDPG_Roboschool(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, DDPG, Roboschool, OUExploration)
+        conf.Preset.__init__(self, conf.DDPG, conf.Roboschool, conf.OUExploration)
         self.env.level = 'RoboschoolHopper-v1'
         self.learning_rate = 0.00025
         self.num_heatup_steps = 100
 
 
-class Hopper_PPO_Roboschool(Preset):
+class Hopper_PPO_Roboschool(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, PPO, Roboschool, ExplorationParameters)
+        conf.Preset.__init__(self, conf.PPO, conf.Roboschool, conf.ExplorationParameters)
         self.env.level = 'RoboschoolHopper-v1'
         self.learning_rate = 0.001
         self.num_heatup_steps = 0
@@ -790,27 +796,27 @@ class Hopper_PPO_Roboschool(Preset):
         self.agent.optimizer_type = 'LBFGS'
 
 
-class Hopper_DDPG(Preset):
+class Hopper_DDPG(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, DDPG, GymVectorObservation, OUExploration)
+        conf.Preset.__init__(self, conf.DDPG, conf.GymVectorObservation, conf.OUExploration)
         self.env.level = 'Hopper-v1'
         self.learning_rate = 0.00025
         self.num_heatup_steps = 100
         self.env.normalize_observation = True
 
 
-class Hopper_DDDPG(Preset):
+class Hopper_DDDPG(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, DDDPG, GymVectorObservation, OUExploration)
+        conf.Preset.__init__(self, conf.DDDPG, conf.GymVectorObservation, conf.OUExploration)
         self.env.level = 'Hopper-v1'
         self.learning_rate = 0.00025
         self.num_heatup_steps = 100
         self.env.normalize_observation = True
 
 
-class Hopper_PPO(Preset):
+class Hopper_PPO(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, PPO, GymVectorObservation, ExplorationParameters)
+        conf.Preset.__init__(self, conf.PPO, conf.GymVectorObservation, conf.ExplorationParameters)
         self.env.level = 'Hopper-v1'
         self.learning_rate = 0.001
         self.num_heatup_steps = 0
@@ -826,9 +832,9 @@ class Hopper_PPO(Preset):
         self.env.normalize_observation = True
 
 
-class Walker_PPO(Preset):
+class Walker_PPO(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, PPO, GymVectorObservation, AdditiveNoiseExploration)
+        conf.Preset.__init__(self, conf.PPO, conf.GymVectorObservation, conf.AdditiveNoiseExploration)
         self.env.level = 'Walker2d-v1'
         self.learning_rate = 0.001
         self.num_heatup_steps = 0
@@ -843,27 +849,27 @@ class Walker_PPO(Preset):
         self.env.normalize_observation = True
 
 
-class HalfCheetah_DDPG(Preset):
+class HalfCheetah_DDPG(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, DDPG, GymVectorObservation, OUExploration)
+        conf.Preset.__init__(self, conf.DDPG, conf.GymVectorObservation, conf.OUExploration)
         self.env.level = 'HalfCheetah-v1'
         self.learning_rate = 0.00025
         self.num_heatup_steps = 1000
         self.env.normalize_observation = True
 
 
-class Ant_DDPG(Preset):
+class Ant_DDPG(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, DDPG, GymVectorObservation, OUExploration)
+        conf.Preset.__init__(self, conf.DDPG, conf.GymVectorObservation, conf.OUExploration)
         self.env.level = 'Ant-v1'
         self.learning_rate = 0.00025
         self.num_heatup_steps = 1000
         self.env.normalize_observation = True
 
 
-class Pendulum_NAF(Preset):
+class Pendulum_NAF(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, NAF, GymVectorObservation, AdditiveNoiseExploration)
+        conf.Preset.__init__(self, conf.NAF, conf.GymVectorObservation, conf.AdditiveNoiseExploration)
         self.env.level = 'Pendulum-v0'
         self.learning_rate = 0.001
         self.num_heatup_steps = 1000
@@ -875,18 +881,18 @@ class Pendulum_NAF(Preset):
         self.test_min_return_threshold = -250
 
 
-class InvertedPendulum_NAF(Preset):
+class InvertedPendulum_NAF(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, NAF, GymVectorObservation, AdditiveNoiseExploration)
+        conf.Preset.__init__(self, conf.NAF, conf.GymVectorObservation, conf.AdditiveNoiseExploration)
         self.env.level = 'InvertedPendulum-v1'
         self.learning_rate = 0.001
         self.num_heatup_steps = 1000
         self.batch_size = 100
 
 
-class Hopper_NAF(Preset):
+class Hopper_NAF(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, NAF, GymVectorObservation, AdditiveNoiseExploration)
+        conf.Preset.__init__(self, conf.NAF, conf.GymVectorObservation, conf.AdditiveNoiseExploration)
         self.env.level = 'Hopper-v1'
         self.learning_rate = 0.0005
         self.num_heatup_steps = 1000
@@ -895,9 +901,9 @@ class Hopper_NAF(Preset):
         self.env.normalize_observation = True
 
 
-class CartPole_NEC(Preset):
+class CartPole_NEC(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, NEC, GymVectorObservation, ExplorationParameters)
+        conf.Preset.__init__(self, conf.NEC, conf.GymVectorObservation, conf.ExplorationParameters)
         self.env.level = 'CartPole-v0'
         self.learning_rate = 0.00025
         self.agent.num_episodes_in_experience_replay = 200
@@ -912,9 +918,9 @@ class CartPole_NEC(Preset):
         self.test_min_return_threshold = 150
 
 
-class Doom_Basic_NEC(Preset):
+class Doom_Basic_NEC(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, NEC, Doom, ExplorationParameters)
+        conf.Preset.__init__(self, conf.NEC, conf.Doom, conf.ExplorationParameters)
         self.env.level = 'basic'
         self.learning_rate = 0.00001
         self.agent.num_transitions_in_experience_replay = 100000
@@ -928,9 +934,9 @@ class Doom_Basic_NEC(Preset):
 
 
 
-class Montezuma_NEC(Preset):
+class Montezuma_NEC(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, NEC, Atari, ExplorationParameters)
+        conf.Preset.__init__(self, conf.NEC, conf.Atari, conf.ExplorationParameters)
         self.env.level = 'MontezumaRevenge-v0'
         self.agent.num_episodes_in_experience_replay = 200
         self.learning_rate = 0.00025
@@ -938,9 +944,9 @@ class Montezuma_NEC(Preset):
         self.agent.num_playing_steps_between_two_training_steps = 1
 
 
-class Breakout_NEC(Preset):
+class Breakout_NEC(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, NEC, Atari, ExplorationParameters)
+        conf.Preset.__init__(self, conf.NEC, conf.Atari, conf.ExplorationParameters)
         self.env.level = 'BreakoutDeterministic-v4'
         self.agent.num_steps_between_copying_online_weights_to_target = 10000
         self.learning_rate = 0.00001
@@ -958,9 +964,9 @@ class Breakout_NEC(Preset):
         self.seed = 123
 
 
-class Doom_Health_NEC(Preset):
+class Doom_Health_NEC(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, NEC, Doom, ExplorationParameters)
+        conf.Preset.__init__(self, conf.NEC, conf.Doom, conf.ExplorationParameters)
         self.env.level = 'HEALTH_GATHERING'
         self.agent.num_episodes_in_experience_replay = 200
         self.learning_rate = 0.00025
@@ -969,9 +975,9 @@ class Doom_Health_NEC(Preset):
         self.agent.num_playing_steps_between_two_training_steps = 1
 
 
-class Doom_Health_DQN(Preset):
+class Doom_Health_DQN(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, DQN, Doom, ExplorationParameters)
+        conf.Preset.__init__(self, conf.DQN, conf.Doom, conf.ExplorationParameters)
         self.env.level = 'HEALTH_GATHERING'
         self.agent.num_episodes_in_experience_replay = 200
         self.learning_rate = 0.00025
@@ -980,22 +986,22 @@ class Doom_Health_DQN(Preset):
         self.agent.num_steps_between_copying_online_weights_to_target = 1000
 
 
-class Pong_NEC_LSTM(Preset):
+class Pong_NEC_LSTM(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, NEC, Atari, ExplorationParameters)
+        conf.Preset.__init__(self, conf.NEC, conf.Atari, conf.ExplorationParameters)
         self.env.level = 'PongDeterministic-v4'
         self.learning_rate = 0.001
         self.agent.num_transitions_in_experience_replay = 1000000
-        self.agent.middleware_type = MiddlewareTypes.LSTM
+        self.agent.middleware_type = conf.MiddlewareTypes.LSTM
         self.exploration.initial_epsilon = 0.5
         self.exploration.final_epsilon = 0.1
         self.exploration.epsilon_decay_steps = 1000000
         self.num_heatup_steps = 500
 
 
-class Pong_NEC(Preset):
+class Pong_NEC(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, NEC, Atari, ExplorationParameters)
+        conf.Preset.__init__(self, conf.NEC, conf.Atari, conf.ExplorationParameters)
         self.env.level = 'PongDeterministic-v4'
         self.learning_rate = 0.00001
         self.agent.num_transitions_in_experience_replay = 100000
@@ -1012,9 +1018,9 @@ class Pong_NEC(Preset):
         # self.seed = 123
 
 
-class Alien_NEC(Preset):
+class Alien_NEC(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, NEC, Atari, ExplorationParameters)
+        conf.Preset.__init__(self, conf.NEC, conf.Atari, conf.ExplorationParameters)
         self.env.level = 'AlienDeterministic-v4'
         self.learning_rate = 0.0001
         self.agent.num_transitions_in_experience_replay = 100000
@@ -1029,9 +1035,9 @@ class Alien_NEC(Preset):
         self.seed = 123
 
 
-class Pong_DQN(Preset):
+class Pong_DQN(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, DQN, Atari, ExplorationParameters)
+        conf.Preset.__init__(self, conf.DQN, conf.Atari, conf.ExplorationParameters)
         self.env.level = 'PongDeterministic-v4'
         self.agent.num_steps_between_copying_online_weights_to_target = 10000
         self.learning_rate = 0.00025
@@ -1047,9 +1053,9 @@ class Pong_DQN(Preset):
         self.seed = 123
 
 
-class CartPole_A3C(Preset):
+class CartPole_A3C(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, ActorCritic, GymVectorObservation, CategoricalExploration)
+        conf.Preset.__init__(self, conf.ActorCritic, conf.GymVectorObservation, conf.CategoricalExploration)
         self.env.level = 'CartPole-v0'
         self.agent.policy_gradient_rescaler = 'GAE'
         self.learning_rate = 0.0001
@@ -1060,7 +1066,7 @@ class CartPole_A3C(Preset):
         self.agent.gae_lambda = 1
         self.agent.beta_entropy = 0.01
         self.agent.num_steps_between_gradient_updates = 5
-        self.agent.middleware_type = MiddlewareTypes.FC
+        self.agent.middleware_type = conf.MiddlewareTypes.FC
 
         self.test = True
         self.test_max_step_threshold = 1000
@@ -1068,9 +1074,9 @@ class CartPole_A3C(Preset):
         self.test_num_workers = 8
 
 
-class MountainCar_A3C(Preset):
+class MountainCar_A3C(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, ActorCritic, GymVectorObservation, CategoricalExploration)
+        conf.Preset.__init__(self, conf.ActorCritic, conf.GymVectorObservation, conf.CategoricalExploration)
         self.env.level = 'MountainCar-v0'
         self.agent.policy_gradient_rescaler = 'GAE'
         self.learning_rate = 0.0001
@@ -1081,12 +1087,12 @@ class MountainCar_A3C(Preset):
         self.agent.gae_lambda = 1
         self.agent.beta_entropy = 0.01
         self.agent.num_steps_between_gradient_updates = 5
-        self.agent.middleware_type = MiddlewareTypes.FC
+        self.agent.middleware_type = conf.MiddlewareTypes.FC
 
 
-class InvertedPendulum_A3C(Preset):
+class InvertedPendulum_A3C(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, ActorCritic, GymVectorObservation, EntropyExploration)
+        conf.Preset.__init__(self, conf.ActorCritic, conf.GymVectorObservation, conf.EntropyExploration)
         self.env.level = 'InvertedPendulum-v1'
         self.agent.policy_gradient_rescaler = 'A_VALUE'
         self.agent.optimizer_type = 'Adam'
@@ -1099,12 +1105,12 @@ class InvertedPendulum_A3C(Preset):
         self.agent.gae_lambda = 1
         self.agent.beta_entropy = 0.005
         self.clip_gradients = 40
-        self.agent.middleware_type = MiddlewareTypes.FC
+        self.agent.middleware_type = conf.MiddlewareTypes.FC
 
 
-class Hopper_A3C(Preset):
+class Hopper_A3C(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, ActorCritic, GymVectorObservation, EntropyExploration)
+        conf.Preset.__init__(self, conf.ActorCritic, conf.GymVectorObservation, conf.EntropyExploration)
         self.env.level = 'Hopper-v1'
         self.agent.policy_gradient_rescaler = 'GAE'
         self.agent.optimizer_type = 'Adam'
@@ -1117,7 +1123,7 @@ class Hopper_A3C(Preset):
         self.agent.gae_lambda = 0.98
         self.agent.beta_entropy = 0.005
         self.clip_gradients = 40
-        self.agent.middleware_type = MiddlewareTypes.FC
+        self.agent.middleware_type = conf.MiddlewareTypes.FC
 
 
 class HopperIceWall_A3C(Hopper_A3C):
@@ -1138,9 +1144,9 @@ class HopperBullet_A3C(Hopper_A3C):
         self.env.level = 'HopperBulletEnv-v0'
 
 
-class Kuka_ClippedPPO(Preset):
+class Kuka_ClippedPPO(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, ClippedPPO, GymVectorObservation, ExplorationParameters)
+        conf.Preset.__init__(self, conf.ClippedPPO, conf.GymVectorObservation, conf.ExplorationParameters)
         self.env.level = 'KukaBulletEnv-v0'
         self.learning_rate = 0.0001
         self.num_heatup_steps = 0
@@ -1155,9 +1161,9 @@ class Kuka_ClippedPPO(Preset):
         self.env.normalize_observation = True
 
 
-class Minitaur_ClippedPPO(Preset):
+class Minitaur_ClippedPPO(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, ClippedPPO, GymVectorObservation, ExplorationParameters)
+        conf.Preset.__init__(self, conf.ClippedPPO, conf.GymVectorObservation, conf.ExplorationParameters)
         self.env.level = 'MinitaurBulletEnv-v0'
         self.learning_rate = 0.0001
         self.num_heatup_steps = 0
@@ -1172,9 +1178,9 @@ class Minitaur_ClippedPPO(Preset):
         self.env.normalize_observation = True
 
 
-class Walker_A3C(Preset):
+class Walker_A3C(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, ActorCritic, GymVectorObservation, EntropyExploration)
+        conf.Preset.__init__(self, conf.ActorCritic, conf.GymVectorObservation, conf.EntropyExploration)
         self.env.level = 'Walker2d-v1'
         self.agent.policy_gradient_rescaler = 'A_VALUE'
         self.agent.optimizer_type = 'Adam'
@@ -1187,12 +1193,12 @@ class Walker_A3C(Preset):
         self.agent.gae_lambda = 1
         self.agent.beta_entropy = 0.005
         self.clip_gradients = 40
-        self.agent.middleware_type = MiddlewareTypes.FC
+        self.agent.middleware_type = conf.MiddlewareTypes.FC
 
 
-class Ant_A3C(Preset):
+class Ant_A3C(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, ActorCritic, GymVectorObservation, EntropyExploration)
+        conf.Preset.__init__(self, conf.ActorCritic, conf.GymVectorObservation, conf.EntropyExploration)
         self.env.level = 'Ant-v1'
         self.agent.policy_gradient_rescaler = 'A_VALUE'
         self.agent.optimizer_type = 'Adam'
@@ -1205,7 +1211,7 @@ class Ant_A3C(Preset):
         self.agent.gae_lambda = 1
         self.agent.beta_entropy = 0.005
         self.clip_gradients = 40
-        self.agent.middleware_type = MiddlewareTypes.FC
+        self.agent.middleware_type = conf.MiddlewareTypes.FC
         self.env.normalize_observation = True
 
 
@@ -1221,9 +1227,9 @@ class AntMaze_A3C(Ant_A3C):
         self.env.level = 'AntMaze-v0'
 
 
-class Humanoid_A3C(Preset):
+class Humanoid_A3C(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, ActorCritic, GymVectorObservation, EntropyExploration)
+        conf.Preset.__init__(self, conf.ActorCritic, conf.GymVectorObservation, conf.EntropyExploration)
         self.env.level = 'Humanoid-v1'
         self.agent.policy_gradient_rescaler = 'A_VALUE'
         self.agent.optimizer_type = 'Adam'
@@ -1236,13 +1242,13 @@ class Humanoid_A3C(Preset):
         self.agent.gae_lambda = 1
         self.agent.beta_entropy = 0.005
         self.clip_gradients = 40
-        self.agent.middleware_type = MiddlewareTypes.FC
+        self.agent.middleware_type = conf.MiddlewareTypes.FC
         self.env.normalize_observation = True
 
 
-class Pendulum_A3C(Preset):
+class Pendulum_A3C(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, ActorCritic, GymVectorObservation, EntropyExploration)
+        conf.Preset.__init__(self, conf.ActorCritic, conf.GymVectorObservation, conf.EntropyExploration)
         self.env.level = 'Pendulum-v0'
         self.agent.policy_gradient_rescaler = 'GAE'
         self.agent.optimizer_type = 'Adam'
@@ -1254,9 +1260,9 @@ class Pendulum_A3C(Preset):
 
 
 
-class BipedalWalker_A3C(Preset):
+class BipedalWalker_A3C(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, ActorCritic, GymVectorObservation, EntropyExploration)
+        conf.Preset.__init__(self, conf.ActorCritic, conf.GymVectorObservation, conf.EntropyExploration)
         self.env.level = 'BipedalWalker-v2'
         self.agent.policy_gradient_rescaler = 'A_VALUE'
         self.agent.optimizer_type = 'RMSProp'
@@ -1269,12 +1275,12 @@ class BipedalWalker_A3C(Preset):
         self.agent.gae_lambda = 1
         self.agent.beta_entropy = 0.005
         self.clip_gradients = None
-        self.agent.middleware_type = MiddlewareTypes.FC
+        self.agent.middleware_type = conf.MiddlewareTypes.FC
 
 
-class Doom_Basic_A3C(Preset):
+class Doom_Basic_A3C(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, ActorCritic, Doom, CategoricalExploration)
+        conf.Preset.__init__(self, conf.ActorCritic, conf.Doom, conf.CategoricalExploration)
         self.env.level = 'basic'
         self.agent.policy_gradient_rescaler = 'GAE'
         self.learning_rate = 0.0001
@@ -1286,12 +1292,12 @@ class Doom_Basic_A3C(Preset):
         self.agent.gae_lambda = 1
         self.agent.beta_entropy = 0.01
         self.clip_gradients = 40
-        self.agent.middleware_type = MiddlewareTypes.FC
+        self.agent.middleware_type = conf.MiddlewareTypes.FC
 
 
-class Pong_A3C(Preset):
+class Pong_A3C(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, ActorCritic, Atari, CategoricalExploration)
+        conf.Preset.__init__(self, conf.ActorCritic, conf.Atari, conf.CategoricalExploration)
         self.env.level = 'PongDeterministic-v4'
         self.agent.policy_gradient_rescaler = 'GAE'
         self.learning_rate = 0.0001
@@ -1302,12 +1308,12 @@ class Pong_A3C(Preset):
         self.agent.gae_lambda = 1.
         self.agent.beta_entropy = 0.01
         self.clip_gradients = 40.0
-        self.agent.middleware_type = MiddlewareTypes.FC
+        self.agent.middleware_type = conf.MiddlewareTypes.FC
 
 
-class Breakout_A3C(Preset):
+class Breakout_A3C(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, ActorCritic, Atari, CategoricalExploration)
+        conf.Preset.__init__(self, conf.ActorCritic, conf.Atari, conf.CategoricalExploration)
         self.env.level = 'BreakoutDeterministic-v4'
         self.agent.policy_gradient_rescaler = 'GAE'
         self.learning_rate = 0.0001
@@ -1318,13 +1324,13 @@ class Breakout_A3C(Preset):
         self.agent.gae_lambda = 1
         self.agent.beta_entropy = 0.05
         self.clip_gradients = 40.0
-        self.agent.middleware_type = MiddlewareTypes.FC
+        self.agent.middleware_type = conf.MiddlewareTypes.FC
 
 
-class Carla_A3C(Preset):
+class Carla_A3C(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, ActorCritic, Carla, EntropyExploration)
-        self.agent.embedder_complexity = EmbedderComplexity.Deep
+        conf.Preset.__init__(self, conf.ActorCritic, conf.Carla, conf.EntropyExploration)
+        self.agent.embedder_complexity = conf.EmbedderComplexity.Deep
         self.agent.policy_gradient_rescaler = 'GAE'
         self.learning_rate = 0.0001
         self.num_heatup_steps = 0
@@ -1335,22 +1341,22 @@ class Carla_A3C(Preset):
         self.agent.gae_lambda = 1
         self.agent.beta_entropy = 0.01
         self.clip_gradients = 40
-        self.agent.middleware_type = MiddlewareTypes.FC
+        self.agent.middleware_type = conf.MiddlewareTypes.FC
 
 
-class Carla_DDPG(Preset):
+class Carla_DDPG(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, DDPG, Carla, OUExploration)
-        self.agent.embedder_complexity = EmbedderComplexity.Deep
+        conf.Preset.__init__(self, conf.DDPG, conf.Carla, conf.OUExploration)
+        self.agent.embedder_complexity = conf.EmbedderComplexity.Deep
         self.learning_rate = 0.0001
         self.num_heatup_steps = 1000
         self.agent.num_consecutive_training_steps = 5
 
 
-class Carla_BC(Preset):
+class Carla_BC(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, BC, Carla, ExplorationParameters)
-        self.agent.embedder_complexity = EmbedderComplexity.Deep
+        conf.Preset.__init__(self, conf.BC, conf.Carla, conf.ExplorationParameters)
+        self.agent.embedder_complexity = conf.EmbedderComplexity.Deep
         self.agent.load_memory_from_file_path = 'datasets/carla_town1.p'
         self.learning_rate = 0.0005
         self.num_heatup_steps = 0
@@ -1359,9 +1365,9 @@ class Carla_BC(Preset):
         self.evaluate_every_x_training_iterations = 5000
 
 
-class Doom_Basic_BC(Preset):
+class Doom_Basic_BC(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, BC, Doom, ExplorationParameters)
+        conf.Preset.__init__(self, conf.BC, conf.Doom, conf.ExplorationParameters)
         self.env.level = 'basic'
         self.agent.load_memory_from_file_path = 'datasets/doom_basic.p'
         self.learning_rate = 0.0005
@@ -1372,9 +1378,9 @@ class Doom_Basic_BC(Preset):
         self.num_training_iterations = 2000
 
 
-class Doom_Defend_BC(Preset):
+class Doom_Defend_BC(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, BC, Doom, ExplorationParameters)
+        conf.Preset.__init__(self, conf.BC, conf.Doom, conf.ExplorationParameters)
         self.env.level = 'defend'
         self.agent.load_memory_from_file_path = 'datasets/doom_defend.p'
         self.learning_rate = 0.0005
@@ -1384,9 +1390,9 @@ class Doom_Defend_BC(Preset):
         self.evaluate_every_x_training_iterations = 100
 
 
-class Doom_Deathmatch_BC(Preset):
+class Doom_Deathmatch_BC(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, BC, Doom, ExplorationParameters)
+        conf.Preset.__init__(self, conf.BC, conf.Doom, conf.ExplorationParameters)
         self.env.level = 'deathmatch'
         self.agent.load_memory_from_file_path = 'datasets/doom_deathmatch.p'
         self.learning_rate = 0.0005
@@ -1396,9 +1402,9 @@ class Doom_Deathmatch_BC(Preset):
         self.evaluate_every_x_training_iterations = 100
 
 
-class MontezumaRevenge_BC(Preset):
+class MontezumaRevenge_BC(conf.Preset):
     def __init__(self):
-        Preset.__init__(self, BC, Atari, ExplorationParameters)
+        conf.Preset.__init__(self, conf.BC, conf.Atari, conf.ExplorationParameters)
         self.env.level = 'MontezumaRevenge-v0'
         self.agent.load_memory_from_file_path = 'datasets/montezuma_revenge.p'
         self.learning_rate = 0.0005
diff --git a/renderer.py b/renderer.py
index fee19af..2a1637f 100644
--- a/renderer.py
+++ b/renderer.py
@@ -1,6 +1,6 @@
-import pygame
-from pygame.locals import *
 import numpy as np
+import pygame
+from pygame import locals as loc
 
 
 class Renderer(object):
@@ -21,7 +21,8 @@ class Renderer(object):
         :return: None
         """
         self.size = (width, height)
-        self.screen = self.display.set_mode(self.size, HWSURFACE | DOUBLEBUF)
+        self.screen = self.display.set_mode(self.size,
+                                            loc.HWSURFACE | loc.DOUBLEBUF)
         self.display.set_caption("Coach")
         self.is_open = True
 
diff --git a/run_test.py b/run_test.py
index 196056e..36cb2c6 100644
--- a/run_test.py
+++ b/run_test.py
@@ -13,23 +13,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
-# -*- coding: utf-8 -*-
-import presets
-import numpy as np
-import pandas as pd
-from os import path
-import os
+import argparse
 import glob
+import os
 import shutil
+import signal
+import subprocess
 import sys
 import time
-from logger import screen
-from utils import list_all_classes_in_module, threaded_cmd_line_run, killed_processes
-import subprocess
-import signal
-import argparse
 
+import numpy as np
+import pandas as pd
+
+import logger
+import presets
+import utils
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
@@ -61,7 +59,7 @@ if __name__ == '__main__':
     if args.preset is not None:
         presets_lists = [args.preset]
     else:
-        presets_lists = list_all_classes_in_module(presets)
+        presets_lists = utils.list_all_classes_in_module(presets)
     win_size = 10
     fail_count = 0
     test_count = 0
@@ -70,7 +68,7 @@ if __name__ == '__main__':
     # create a clean experiment directory
     test_name = '__test'
     test_path = os.path.join('./experiments', test_name)
-    if path.exists(test_path):
+    if os.path.exists(test_path):
         shutil.rmtree(test_path)
     if args.ignore_presets is not None:
         presets_to_ignore = args.ignore_presets.split(',')
@@ -100,7 +98,7 @@ if __name__ == '__main__':
                 test_count += 1
 
                 # run the experiment in a separate thread
-                screen.log_title("Running test {} - {}".format(preset_name, framework))
+                logger.screen.log_title("Running test {} - {}".format(preset_name, framework))
                 log_file_name = 'test_log_{preset_name}_{framework}.txt'.format(
                     preset_name=preset_name,
                     framework=framework,
@@ -139,7 +137,7 @@ if __name__ == '__main__':
 
                 tries_counter = 0
                 while not csv_paths:
-                    csv_paths = glob.glob(path.join(test_path, '*', filename_pattern))
+                    csv_paths = glob.glob(os.path.join(test_path, '*', filename_pattern))
                     if tries_counter > read_csv_tries:
                         break
                     tries_counter += 1
@@ -195,26 +193,26 @@ if __name__ == '__main__':
                 # kill test and print result
                 os.killpg(os.getpgid(p.pid), signal.SIGTERM)
                 if test_passed:
-                    screen.success("Passed successfully")
+                    logger.screen.success("Passed successfully")
                 else:
                     if csv_paths:
-                        screen.error("Failed due to insufficient reward", crash=False)
-                        screen.error("preset.test_max_step_threshold: {}".format(preset.test_max_step_threshold), crash=False)
-                        screen.error("preset.test_min_return_threshold: {}".format(preset.test_min_return_threshold), crash=False)
-                        screen.error("averaged_rewards: {}".format(averaged_rewards), crash=False)
-                        screen.error("episode number: {}".format(csv['Episode #'].values[-1]), crash=False)
+                        logger.screen.error("Failed due to insufficient reward", crash=False)
+                        logger.screen.error("preset.test_max_step_threshold: {}".format(preset.test_max_step_threshold), crash=False)
+                        logger.screen.error("preset.test_min_return_threshold: {}".format(preset.test_min_return_threshold), crash=False)
+                        logger.screen.error("averaged_rewards: {}".format(averaged_rewards), crash=False)
+                        logger.screen.error("episode number: {}".format(csv['Episode #'].values[-1]), crash=False)
                     else:
-                        screen.error("csv file never found", crash=False)
+                        logger.screen.error("csv file never found", crash=False)
                         if args.verbose:
-                            screen.error("command exitcode: {}".format(p.returncode), crash=False)
-                            screen.error(open(log_file_name).read(), crash=False)
+                            logger.screen.error("command exitcode: {}".format(p.returncode), crash=False)
+                            logger.screen.error(open(log_file_name).read(), crash=False)
 
                     fail_count += 1
                 shutil.rmtree(test_path)
 
 
-    screen.separator()
+    logger.screen.separator()
     if fail_count == 0:
-        screen.success(" Summary: " + str(test_count) + "/" + str(test_count) + " tests passed successfully")
+        logger.screen.success(" Summary: " + str(test_count) + "/" + str(test_count) + " tests passed successfully")
     else:
-        screen.error(" Summary: " + str(test_count - fail_count) + "/" + str(test_count) + " tests passed successfully")
+        logger.screen.error(" Summary: " + str(test_count - fail_count) + "/" + str(test_count) + " tests passed successfully")
diff --git a/utils.py b/utils.py
index 7f75ac5..f6cfc7a 100644
--- a/utils.py
+++ b/utils.py
@@ -13,20 +13,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
-import json
-import inspect
-import os
-import numpy as np
-import threading
-from subprocess import call, Popen
-import signal
 import copy
+import inspect
+import json
+import os
+import signal
+import subprocess
+import threading
+
+import numpy as np
+
 
 killed_processes = []
 
 eps = np.finfo(np.float32).eps
 
+
 class Enum(object):
     def __init__(self):
         pass
@@ -161,7 +163,7 @@ def ClassToDict(x):
 
 
 def cmd_line_run(result, run_cmd, id=-1):
-    p = Popen(run_cmd, shell=True, executable="/bin/bash")
+    p = subprocess.Popen(run_cmd, shell=True, executable="/bin/bash")
     while result[0] is None or result[0] == [None]:
         if id in killed_processes:
             p.kill()