1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-17 11:10:20 +01:00

Release 0.9

Main changes are detailed below:

New features -
* CARLA 0.7 simulator integration
* Human control of the game play
* Recording of human game play and storing / loading the replay buffer
* Behavioral cloning agent and presets
* Golden tests for several presets
* Selecting between deep / shallow image embedders
* Rendering through pygame (with some boost in performance)

API changes -
* Improved environment wrapper API
* Added an evaluate flag to allow convenient evaluation of existing checkpoints
* Improve frameskip definition in Gym

Bug fixes -
* Fixed loading of checkpoints for agents with more than one network
* Fixed the N Step Q learning agent python3 compatibility
This commit is contained in:
Itai Caspi
2017-12-19 19:27:16 +02:00
committed by GitHub
parent 11faf19649
commit 125c7ee38d
41 changed files with 1713 additions and 260 deletions

View File

@@ -15,15 +15,18 @@
#
import tensorflow as tf
from configurations import EmbedderComplexity
class InputEmbedder(object):
def __init__(self, input_size, activation_function=tf.nn.relu, name="embedder"):
def __init__(self, input_size, activation_function=tf.nn.relu,
embedder_complexity=EmbedderComplexity.Shallow, name="embedder"):
self.name = name
self.input_size = input_size
self.activation_function = activation_function
self.input = None
self.output = None
self.embedder_complexity = embedder_complexity
def __call__(self, prev_input_placeholder=None):
with tf.variable_scope(self.get_name()):
@@ -43,31 +46,77 @@ class InputEmbedder(object):
class ImageEmbedder(InputEmbedder):
def __init__(self, input_size, input_rescaler=255.0, activation_function=tf.nn.relu, name="embedder"):
InputEmbedder.__init__(self, input_size, activation_function, name)
def __init__(self, input_size, input_rescaler=255.0, activation_function=tf.nn.relu,
embedder_complexity=EmbedderComplexity.Shallow, name="embedder"):
InputEmbedder.__init__(self, input_size, activation_function, embedder_complexity, name)
self.input_rescaler = input_rescaler
def _build_module(self):
# image observation
rescaled_observation_stack = self.input / self.input_rescaler
self.observation_conv1 = tf.layers.conv2d(rescaled_observation_stack,
filters=32, kernel_size=(8, 8), strides=(4, 4),
activation=self.activation_function, data_format='channels_last')
self.observation_conv2 = tf.layers.conv2d(self.observation_conv1,
filters=64, kernel_size=(4, 4), strides=(2, 2),
activation=self.activation_function, data_format='channels_last')
self.observation_conv3 = tf.layers.conv2d(self.observation_conv2,
filters=64, kernel_size=(3, 3), strides=(1, 1),
activation=self.activation_function, data_format='channels_last')
self.output = tf.contrib.layers.flatten(self.observation_conv3)
if self.embedder_complexity == EmbedderComplexity.Shallow:
# same embedder as used in the original DQN paper
self.observation_conv1 = tf.layers.conv2d(rescaled_observation_stack,
filters=32, kernel_size=(8, 8), strides=(4, 4),
activation=self.activation_function, data_format='channels_last')
self.observation_conv2 = tf.layers.conv2d(self.observation_conv1,
filters=64, kernel_size=(4, 4), strides=(2, 2),
activation=self.activation_function, data_format='channels_last')
self.observation_conv3 = tf.layers.conv2d(self.observation_conv2,
filters=64, kernel_size=(3, 3), strides=(1, 1),
activation=self.activation_function, data_format='channels_last')
self.output = tf.contrib.layers.flatten(self.observation_conv3)
elif self.embedder_complexity == EmbedderComplexity.Deep:
# the embedder used in the CARLA papers
self.observation_conv1 = tf.layers.conv2d(rescaled_observation_stack,
filters=32, kernel_size=(5, 5), strides=(2, 2),
activation=self.activation_function, data_format='channels_last')
self.observation_conv2 = tf.layers.conv2d(self.observation_conv1,
filters=32, kernel_size=(3, 3), strides=(1, 1),
activation=self.activation_function, data_format='channels_last')
self.observation_conv3 = tf.layers.conv2d(self.observation_conv2,
filters=64, kernel_size=(3, 3), strides=(2, 2),
activation=self.activation_function, data_format='channels_last')
self.observation_conv4 = tf.layers.conv2d(self.observation_conv3,
filters=64, kernel_size=(3, 3), strides=(1, 1),
activation=self.activation_function, data_format='channels_last')
self.observation_conv5 = tf.layers.conv2d(self.observation_conv4,
filters=128, kernel_size=(3, 3), strides=(2, 2),
activation=self.activation_function, data_format='channels_last')
self.observation_conv6 = tf.layers.conv2d(self.observation_conv5,
filters=128, kernel_size=(3, 3), strides=(1, 1),
activation=self.activation_function, data_format='channels_last')
self.observation_conv7 = tf.layers.conv2d(self.observation_conv6,
filters=256, kernel_size=(3, 3), strides=(2, 2),
activation=self.activation_function, data_format='channels_last')
self.observation_conv8 = tf.layers.conv2d(self.observation_conv7,
filters=256, kernel_size=(3, 3), strides=(1, 1),
activation=self.activation_function, data_format='channels_last')
self.output = tf.contrib.layers.flatten(self.observation_conv8)
else:
raise ValueError("The defined embedder complexity value is invalid")
class VectorEmbedder(InputEmbedder):
def __init__(self, input_size, activation_function=tf.nn.relu, name="embedder"):
InputEmbedder.__init__(self, input_size, activation_function, name)
def __init__(self, input_size, activation_function=tf.nn.relu,
embedder_complexity=EmbedderComplexity.Shallow, name="embedder"):
InputEmbedder.__init__(self, input_size, activation_function, embedder_complexity, name)
def _build_module(self):
# vector observation
input_layer = tf.contrib.layers.flatten(self.input)
self.output = tf.layers.dense(input_layer, 256, activation=self.activation_function)
if self.embedder_complexity == EmbedderComplexity.Shallow:
self.output = tf.layers.dense(input_layer, 256, activation=self.activation_function)
elif self.embedder_complexity == EmbedderComplexity.Deep:
# the embedder used in the CARLA papers
self.observation_fc1 = tf.layers.dense(input_layer, 128, activation=self.activation_function)
self.observation_fc2 = tf.layers.dense(self.observation_fc1, 128, activation=self.activation_function)
self.output = tf.layers.dense(self.observation_fc2, 128, activation=self.activation_function)
else:
raise ValueError("The defined embedder complexity value is invalid")