diff --git a/agents/agent.py b/agents/agent.py index 8a9cc12..0c41c5d 100644 --- a/agents/agent.py +++ b/agents/agent.py @@ -14,6 +14,7 @@ # limitations under the License. # import collections +import copy import random import time @@ -30,8 +31,8 @@ import scipy from architectures.tensorflow_components import shared_variables as sv import configurations -import exploration_policies as ep -import memories +import exploration_policies as ep # noqa, used in eval() +import memories # noqa, used in eval() from memories import memory import renderer import utils diff --git a/agents/human_agent.py b/agents/human_agent.py index 7f8e491..4d16819 100644 --- a/agents/human_agent.py +++ b/agents/human_agent.py @@ -31,14 +31,14 @@ class HumanAgent(agent.Agent): self.clock = pygame.time.Clock() self.max_fps = int(self.tp.visualization.max_fps_for_human_control) - utils.screen.log_title("Human Control Mode") + logger.screen.log_title("Human Control Mode") available_keys = self.env.get_available_keys() if available_keys: - utils.screen.log("Use keyboard keys to move. Press escape to quit. Available keys:") - utils.screen.log("") + logger.screen.log("Use keyboard keys to move. Press escape to quit. Available keys:") + logger.screen.log("") for action, key in self.env.get_available_keys(): - utils.screen.log("\t- {}: {}".format(action, key)) - utils.screen.separator() + logger.screen.log("\t- {}: {}".format(action, key)) + logger.screen.separator() def train(self): return 0 @@ -58,12 +58,12 @@ class HumanAgent(agent.Agent): replay_buffer_path = os.path.join(logger.logger.experiments_path, 'replay_buffer.p') self.memory.tp = None pickle.to_pickle(self.memory, replay_buffer_path) - utils.screen.log_title("Replay buffer was stored in {}".format(replay_buffer_path)) + logger.screen.log_title("Replay buffer was stored in {}".format(replay_buffer_path)) exit() def log_to_screen(self, phase): - # log to utils.screen - utils.screen.log_dict( + # log to logger.screen + logger.screen.log_dict( collections.OrderedDict([ ("Episode", self.current_episode), ("total reward", self.total_reward_in_current_episode), diff --git a/coach.py b/coach.py index 73f40dc..3452f79 100644 --- a/coach.py +++ b/coach.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python # # Copyright (c) 2017 Intel Corporation # diff --git a/parallel_actor.py b/parallel_actor.py index 581a357..ddbb163 100644 --- a/parallel_actor.py +++ b/parallel_actor.py @@ -15,6 +15,7 @@ # import argparse import os +import sys import time import tensorflow as tf