1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-17 19:20:19 +01:00

Release 0.9

Main changes are detailed below:

New features -
* CARLA 0.7 simulator integration
* Human control of the game play
* Recording of human game play and storing / loading the replay buffer
* Behavioral cloning agent and presets
* Golden tests for several presets
* Selecting between deep / shallow image embedders
* Rendering through pygame (with some boost in performance)

API changes -
* Improved environment wrapper API
* Added an evaluate flag to allow convenient evaluation of existing checkpoints
* Improve frameskip definition in Gym

Bug fixes -
* Fixed loading of checkpoints for agents with more than one network
* Fixed the N Step Q learning agent python3 compatibility
This commit is contained in:
Itai Caspi
2017-12-19 19:27:16 +02:00
committed by GitHub
parent 11faf19649
commit 125c7ee38d
41 changed files with 1713 additions and 260 deletions

View File

@@ -32,6 +32,11 @@ class InputTypes(object):
TimedObservation = 5
class EmbedderComplexity(object):
Shallow = 1
Deep = 2
class OutputTypes(object):
Q = 1
DuelingQ = 2
@@ -60,6 +65,7 @@ class AgentParameters(object):
middleware_type = MiddlewareTypes.FC
loss_weights = [1.0]
stop_gradients_from_head = [False]
embedder_complexity = EmbedderComplexity.Shallow
num_output_head_copies = 1
use_measurements = False
use_accumulated_reward_as_measurement = False
@@ -90,6 +96,8 @@ class AgentParameters(object):
step_until_collecting_full_episodes = False
targets_horizon = 'N-Step'
replace_mse_with_huber_loss = False
load_memory_from_file_path = None
collect_new_data = True
# PPO related params
target_kl_divergence = 0.01
@@ -132,6 +140,7 @@ class EnvironmentParameters(object):
reward_scaling = 1.0
reward_clipping_min = None
reward_clipping_max = None
human_control = False
class ExplorationParameters(object):
@@ -188,6 +197,7 @@ class GeneralParameters(object):
kl_divergence_constraint = 100000
num_training_iterations = 10000000000
num_heatup_steps = 1000
heatup_using_network_decisions = False
batch_size = 32
save_model_sec = None
save_model_dir = None
@@ -197,6 +207,7 @@ class GeneralParameters(object):
learning_rate_decay_steps = 0
evaluation_episodes = 5
evaluate_every_x_episodes = 1000000
evaluate_every_x_training_iterations = 0
rescaling_interpolation_type = 'bilinear'
# setting a seed will only work for non-parallel algorithms. Parallel algorithms add uncontrollable noise in
@@ -224,6 +235,7 @@ class VisualizationParameters(object):
dump_signals_to_csv_every_x_episodes = 10
render = False
dump_gifs = True
max_fps_for_human_control = 10
class Roboschool(EnvironmentParameters):
@@ -252,7 +264,7 @@ class Bullet(EnvironmentParameters):
class Atari(EnvironmentParameters):
type = 'Gym'
frame_skip = 1
frame_skip = 4
observation_stack_size = 4
desired_observation_height = 84
desired_observation_width = 84
@@ -268,6 +280,31 @@ class Doom(EnvironmentParameters):
desired_observation_width = 76
class Carla(EnvironmentParameters):
type = 'Carla'
frame_skip = 1
observation_stack_size = 4
desired_observation_height = 128
desired_observation_width = 180
normalize_observation = False
server_height = 256
server_width = 360
config = 'environments/CarlaSettings.ini'
level = 'town1'
verbose = True
stereo = False
semantic_segmentation = False
depth = False
episode_max_time = 100000 # miliseconds for each episode
continuous_to_bool_threshold = 0.5
allow_braking = False
class Human(AgentParameters):
type = 'HumanAgent'
num_episodes_in_experience_replay = 10000000
class NStepQ(AgentParameters):
type = 'NStepQAgent'
input_types = [InputTypes.Observation]
@@ -299,10 +336,12 @@ class DQN(AgentParameters):
class DDQN(DQN):
type = 'DDQNAgent'
class DuelingDQN(DQN):
type = 'DQNAgent'
output_types = [OutputTypes.DuelingQ]
class BootstrappedDQN(DQN):
type = 'BootstrappedDQNAgent'
num_output_head_copies = 10
@@ -314,6 +353,7 @@ class CategoricalDQN(DQN):
v_min = -10.0
v_max = 10.0
atoms = 51
neon_support = False
class QuantileRegressionDQN(DQN):
@@ -452,6 +492,7 @@ class ClippedPPO(AgentParameters):
step_until_collecting_full_episodes = True
beta_entropy = 0.01
class DFP(AgentParameters):
type = 'DFPAgent'
input_types = [InputTypes.Observation, InputTypes.Measurements, InputTypes.GoalVector]
@@ -485,6 +526,15 @@ class PAL(AgentParameters):
neon_support = True
class BC(AgentParameters):
type = 'BCAgent'
input_types = [InputTypes.Observation]
output_types = [OutputTypes.Q]
loss_weights = [1.0]
collect_new_data = False
evaluate_every_x_training_iterations = 50000
class EGreedyExploration(ExplorationParameters):
policy = 'EGreedy'
initial_epsilon = 0.5