mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 19:20:19 +01:00
load and save function for non-episodic replay buffers + carla improvements + network bug fixes
This commit is contained in:
@@ -74,12 +74,12 @@ class Agent(AgentInterface):
|
|||||||
self.memory = self.shared_memory_scratchpad.get(self.memory_lookup_name)
|
self.memory = self.shared_memory_scratchpad.get(self.memory_lookup_name)
|
||||||
else:
|
else:
|
||||||
# modules
|
# modules
|
||||||
|
self.memory = dynamic_import_and_instantiate_module_from_params(self.ap.memory)
|
||||||
|
|
||||||
if agent_parameters.memory.load_memory_from_file_path:
|
if agent_parameters.memory.load_memory_from_file_path:
|
||||||
screen.log_title("Loading replay buffer from pickle. Pickle path: {}"
|
screen.log_title("Loading replay buffer from pickle. Pickle path: {}"
|
||||||
.format(agent_parameters.memory.load_memory_from_file_path))
|
.format(agent_parameters.memory.load_memory_from_file_path))
|
||||||
self.memory = read_pickle(agent_parameters.memory.load_memory_from_file_path)
|
self.memory.load(agent_parameters.memory.load_memory_from_file_path)
|
||||||
else:
|
|
||||||
self.memory = dynamic_import_and_instantiate_module_from_params(self.ap.memory)
|
|
||||||
|
|
||||||
if self.shared_memory and self.is_chief:
|
if self.shared_memory and self.is_chief:
|
||||||
self.shared_memory_scratchpad.add(self.memory_lookup_name, self.memory)
|
self.shared_memory_scratchpad.add(self.memory_lookup_name, self.memory)
|
||||||
@@ -149,6 +149,7 @@ class Agent(AgentInterface):
|
|||||||
self.unclipped_grads = self.register_signal('Grads (unclipped)')
|
self.unclipped_grads = self.register_signal('Grads (unclipped)')
|
||||||
self.reward = self.register_signal('Reward', dump_one_value_per_episode=False, dump_one_value_per_step=True)
|
self.reward = self.register_signal('Reward', dump_one_value_per_episode=False, dump_one_value_per_step=True)
|
||||||
self.shaped_reward = self.register_signal('Shaped Reward', dump_one_value_per_episode=False, dump_one_value_per_step=True)
|
self.shaped_reward = self.register_signal('Shaped Reward', dump_one_value_per_episode=False, dump_one_value_per_step=True)
|
||||||
|
self.discounted_return = self.register_signal('Discounted Return')
|
||||||
if isinstance(self.in_action_space, GoalsSpace):
|
if isinstance(self.in_action_space, GoalsSpace):
|
||||||
self.distance_from_goal = self.register_signal('Distance From Goal', dump_one_value_per_step=True)
|
self.distance_from_goal = self.register_signal('Distance From Goal', dump_one_value_per_step=True)
|
||||||
|
|
||||||
@@ -427,6 +428,10 @@ class Agent(AgentInterface):
|
|||||||
:return: None
|
:return: None
|
||||||
"""
|
"""
|
||||||
self.current_episode_buffer.is_complete = True
|
self.current_episode_buffer.is_complete = True
|
||||||
|
self.current_episode_buffer.update_returns()
|
||||||
|
|
||||||
|
for transition in self.current_episode_buffer.transitions:
|
||||||
|
self.discounted_return.add_sample(transition.total_return)
|
||||||
|
|
||||||
if self.phase != RunPhase.TEST or self.ap.task_parameters.evaluate_only:
|
if self.phase != RunPhase.TEST or self.ap.task_parameters.evaluate_only:
|
||||||
self.current_episode += 1
|
self.current_episode += 1
|
||||||
@@ -435,7 +440,6 @@ class Agent(AgentInterface):
|
|||||||
if isinstance(self.memory, EpisodicExperienceReplay):
|
if isinstance(self.memory, EpisodicExperienceReplay):
|
||||||
self.call_memory('store_episode', self.current_episode_buffer)
|
self.call_memory('store_episode', self.current_episode_buffer)
|
||||||
elif self.ap.algorithm.store_transitions_only_when_episodes_are_terminated:
|
elif self.ap.algorithm.store_transitions_only_when_episodes_are_terminated:
|
||||||
self.current_episode_buffer.update_returns()
|
|
||||||
for transition in self.current_episode_buffer.transitions:
|
for transition in self.current_episode_buffer.transitions:
|
||||||
self.call_memory('store', transition)
|
self.call_memory('store', transition)
|
||||||
|
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ from rl_coach.core_types import ActionInfo
|
|||||||
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
from rl_coach.exploration_policies.e_greedy import EGreedyParameters
|
||||||
from rl_coach.logger import screen
|
from rl_coach.logger import screen
|
||||||
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
|
||||||
|
from rl_coach.memories.non_episodic.experience_replay import ExperienceReplayParameters
|
||||||
|
|
||||||
|
|
||||||
class HumanAlgorithmParameters(AlgorithmParameters):
|
class HumanAlgorithmParameters(AlgorithmParameters):
|
||||||
@@ -57,7 +58,7 @@ class HumanAgentParameters(AgentParameters):
|
|||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__(algorithm=HumanAlgorithmParameters(),
|
super().__init__(algorithm=HumanAlgorithmParameters(),
|
||||||
exploration=EGreedyParameters(),
|
exploration=EGreedyParameters(),
|
||||||
memory=EpisodicExperienceReplayParameters(),
|
memory=ExperienceReplayParameters(),
|
||||||
networks={"main": BCNetworkParameters()})
|
networks={"main": BCNetworkParameters()})
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@@ -103,7 +104,7 @@ class HumanAgent(Agent):
|
|||||||
def save_replay_buffer_and_exit(self):
|
def save_replay_buffer_and_exit(self):
|
||||||
replay_buffer_path = os.path.join(self.agent_logger.experiments_path, 'replay_buffer.p')
|
replay_buffer_path = os.path.join(self.agent_logger.experiments_path, 'replay_buffer.p')
|
||||||
self.memory.tp = None
|
self.memory.tp = None
|
||||||
to_pickle(self.memory, replay_buffer_path)
|
self.memory.save(replay_buffer_path)
|
||||||
screen.log_title("Replay buffer was stored in {}".format(replay_buffer_path))
|
screen.log_title("Replay buffer was stored in {}".format(replay_buffer_path))
|
||||||
exit()
|
exit()
|
||||||
|
|
||||||
|
|||||||
@@ -61,7 +61,7 @@ class Conv2d(object):
|
|||||||
"""
|
"""
|
||||||
self.params = params
|
self.params = params
|
||||||
|
|
||||||
def __call__(self, input_layer, name: str):
|
def __call__(self, input_layer, name: str=None):
|
||||||
"""
|
"""
|
||||||
returns a tensorflow conv2d layer
|
returns a tensorflow conv2d layer
|
||||||
:param input_layer: previous layer
|
:param input_layer: previous layer
|
||||||
@@ -79,7 +79,7 @@ class Dense(object):
|
|||||||
"""
|
"""
|
||||||
self.params = force_list(params)
|
self.params = force_list(params)
|
||||||
|
|
||||||
def __call__(self, input_layer, name: str, kernel_initializer=None, activation=None):
|
def __call__(self, input_layer, name: str=None, kernel_initializer=None, activation=None):
|
||||||
"""
|
"""
|
||||||
returns a tensorflow dense layer
|
returns a tensorflow dense layer
|
||||||
:param input_layer: previous layer
|
:param input_layer: previous layer
|
||||||
|
|||||||
@@ -253,9 +253,11 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture):
|
|||||||
else:
|
else:
|
||||||
# if we use a single network with multiple embedders, then the head type is the current head idx
|
# if we use a single network with multiple embedders, then the head type is the current head idx
|
||||||
head_type_idx = head_idx
|
head_type_idx = head_idx
|
||||||
|
|
||||||
|
# create output head and add it to the output heads list
|
||||||
self.output_heads.append(
|
self.output_heads.append(
|
||||||
self.get_output_head(self.network_parameters.heads_parameters[head_type_idx],
|
self.get_output_head(self.network_parameters.heads_parameters[head_type_idx],
|
||||||
head_copy_idx,
|
head_idx*self.network_parameters.num_output_head_copies + head_copy_idx,
|
||||||
self.network_parameters.loss_weights[head_type_idx])
|
self.network_parameters.loss_weights[head_type_idx])
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -59,8 +59,8 @@ class Head(object):
|
|||||||
self.loss = []
|
self.loss = []
|
||||||
self.loss_type = []
|
self.loss_type = []
|
||||||
self.regularizations = []
|
self.regularizations = []
|
||||||
# self.loss_weight = force_list(loss_weight)
|
self.loss_weight = tf.Variable([float(w) for w in force_list(loss_weight)],
|
||||||
self.loss_weight = tf.Variable(force_list(loss_weight), trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES])
|
trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES])
|
||||||
self.loss_weight_placeholder = tf.placeholder("float")
|
self.loss_weight_placeholder = tf.placeholder("float")
|
||||||
self.set_loss_weight = tf.assign(self.loss_weight, self.loss_weight_placeholder)
|
self.set_loss_weight = tf.assign(self.loss_weight, self.loss_weight_placeholder)
|
||||||
self.target = []
|
self.target = []
|
||||||
|
|||||||
@@ -49,7 +49,7 @@ class PolicyHead(Head):
|
|||||||
# a scalar weight that penalizes low entropy values to encourage exploration
|
# a scalar weight that penalizes low entropy values to encourage exploration
|
||||||
if hasattr(agent_parameters.algorithm, 'beta_entropy'):
|
if hasattr(agent_parameters.algorithm, 'beta_entropy'):
|
||||||
# we set the beta value as a tf variable so it can be updated later if needed
|
# we set the beta value as a tf variable so it can be updated later if needed
|
||||||
self.beta = tf.Variable(agent_parameters.algorithm.beta_entropy,
|
self.beta = tf.Variable(float(agent_parameters.algorithm.beta_entropy),
|
||||||
trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES])
|
trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES])
|
||||||
self.beta_placeholder = tf.placeholder('float')
|
self.beta_placeholder = tf.placeholder('float')
|
||||||
self.set_beta = tf.assign(self.beta, self.beta_placeholder)
|
self.set_beta = tf.assign(self.beta, self.beta_placeholder)
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ import random
|
|||||||
import sys
|
import sys
|
||||||
from os import path, environ
|
from os import path, environ
|
||||||
|
|
||||||
|
from rl_coach.logger import screen
|
||||||
from rl_coach.filters.action.partial_discrete_action_space_map import PartialDiscreteActionSpaceMap
|
from rl_coach.filters.action.partial_discrete_action_space_map import PartialDiscreteActionSpaceMap
|
||||||
from rl_coach.filters.observation.observation_rgb_to_y_filter import ObservationRGBToYFilter
|
from rl_coach.filters.observation.observation_rgb_to_y_filter import ObservationRGBToYFilter
|
||||||
from rl_coach.filters.observation.observation_to_uint8_filter import ObservationToUInt8Filter
|
from rl_coach.filters.observation.observation_to_uint8_filter import ObservationToUInt8Filter
|
||||||
@@ -25,6 +26,8 @@ from rl_coach.filters.observation.observation_to_uint8_filter import Observation
|
|||||||
try:
|
try:
|
||||||
if 'CARLA_ROOT' in environ:
|
if 'CARLA_ROOT' in environ:
|
||||||
sys.path.append(path.join(environ.get('CARLA_ROOT'), 'PythonClient'))
|
sys.path.append(path.join(environ.get('CARLA_ROOT'), 'PythonClient'))
|
||||||
|
else:
|
||||||
|
screen.error("CARLA_ROOT was not defined. Please set it to point to the CARLA root directory and try again.")
|
||||||
from carla.client import CarlaClient
|
from carla.client import CarlaClient
|
||||||
from carla.settings import CarlaSettings
|
from carla.settings import CarlaSettings
|
||||||
from carla.tcp import TCPConnectionError
|
from carla.tcp import TCPConnectionError
|
||||||
@@ -237,25 +240,28 @@ class CarlaEnvironment(Environment):
|
|||||||
# add a front facing camera
|
# add a front facing camera
|
||||||
if CameraTypes.FRONT in cameras:
|
if CameraTypes.FRONT in cameras:
|
||||||
camera = Camera(CameraTypes.FRONT.value)
|
camera = Camera(CameraTypes.FRONT.value)
|
||||||
|
camera.set(FOV=100)
|
||||||
camera.set_image_size(camera_width, camera_height)
|
camera.set_image_size(camera_width, camera_height)
|
||||||
camera.set_position(0.2, 0, 1.3)
|
camera.set_position(2.0, 0, 1.4)
|
||||||
camera.set_rotation(8, 0, 0)
|
camera.set_rotation(-15.0, 0, 0)
|
||||||
settings.add_sensor(camera)
|
settings.add_sensor(camera)
|
||||||
|
|
||||||
# add a left facing camera
|
# add a left facing camera
|
||||||
if CameraTypes.LEFT in cameras:
|
if CameraTypes.LEFT in cameras:
|
||||||
camera = Camera(CameraTypes.LEFT.value)
|
camera = Camera(CameraTypes.LEFT.value)
|
||||||
|
camera.set(FOV=100)
|
||||||
camera.set_image_size(camera_width, camera_height)
|
camera.set_image_size(camera_width, camera_height)
|
||||||
camera.set_position(0.2, 0, 1.3)
|
camera.set_position(2.0, 0, 1.4)
|
||||||
camera.set_rotation(8, -30, 0)
|
camera.set_rotation(-15.0, -30, 0)
|
||||||
settings.add_sensor(camera)
|
settings.add_sensor(camera)
|
||||||
|
|
||||||
# add a right facing camera
|
# add a right facing camera
|
||||||
if CameraTypes.RIGHT in cameras:
|
if CameraTypes.RIGHT in cameras:
|
||||||
camera = Camera(CameraTypes.RIGHT.value)
|
camera = Camera(CameraTypes.RIGHT.value)
|
||||||
|
camera.set(FOV=100)
|
||||||
camera.set_image_size(camera_width, camera_height)
|
camera.set_image_size(camera_width, camera_height)
|
||||||
camera.set_position(0.2, 0, 1.3)
|
camera.set_position(2.0, 0, 1.4)
|
||||||
camera.set_rotation(8, 30, 0)
|
camera.set_rotation(-15.0, 30, 0)
|
||||||
settings.add_sensor(camera)
|
settings.add_sensor(camera)
|
||||||
|
|
||||||
# add a front facing depth camera
|
# add a front facing depth camera
|
||||||
|
|||||||
@@ -15,6 +15,7 @@
|
|||||||
#
|
#
|
||||||
|
|
||||||
from typing import List, Tuple, Union, Dict, Any
|
from typing import List, Tuple, Union, Dict, Any
|
||||||
|
import pickle
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@@ -218,3 +219,21 @@ class ExperienceReplay(Memory):
|
|||||||
self.reader_writer_lock.release_writing()
|
self.reader_writer_lock.release_writing()
|
||||||
|
|
||||||
return mean
|
return mean
|
||||||
|
|
||||||
|
def save(self, file_path: str) -> None:
|
||||||
|
"""
|
||||||
|
Save the replay buffer contents to a pickle file
|
||||||
|
:param file_path: the path to the file that will be used to store the pickled transitions
|
||||||
|
"""
|
||||||
|
with open(file_path, 'wb') as file:
|
||||||
|
pickle.dump(self.transitions, file)
|
||||||
|
|
||||||
|
def load(self, file_path: str) -> None:
|
||||||
|
"""
|
||||||
|
Restore the replay buffer contents from a pickle file.
|
||||||
|
The pickle file is assumed to include a list of transitions.
|
||||||
|
:param file_path: The path to a pickle file to restore
|
||||||
|
"""
|
||||||
|
with open(file_path, 'rb') as file:
|
||||||
|
self.transitions = pickle.load(file)
|
||||||
|
self._num_transitions = len(self.transitions)
|
||||||
|
|||||||
Reference in New Issue
Block a user