1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-17 11:10:20 +01:00
Files
coach/rl_coach/environments/environment_group.py
2018-08-13 17:11:34 +03:00

150 lines
5.8 KiB
Python

########################################################################################################################
####### Currently we are ignoring more complex cases including EnvironmentGroups - DO NOT USE THIS FILE ****************
########################################################################################################################
# #
# # Copyright (c) 2017 Intel Corporation
# #
# # Licensed under the Apache License, Version 2.0 (the "License");
# # you may not use this file except in compliance with the License.
# # You may obtain a copy of the License at
# #
# # http://www.apache.org/licenses/LICENSE-2.0
# #
# # Unless required by applicable law or agreed to in writing, software
# # distributed under the License is distributed on an "AS IS" BASIS,
# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# # See the License for the specific language governing permissions and
# # limitations under the License.
# #
#
# from typing import Union, List, Dict
# import numpy as np
# from environments import create_environment
# from environments.environment import Environment
# from environments.environment_interface import EnvironmentInterface, ActionType, ActionSpace
# from core_types import GoalType, Transition
#
#
# class EnvironmentGroup(EnvironmentInterface):
# """
# An EnvironmentGroup is a group of different environments.
# In the simple case, it will contain a single environment. But it can also contain multiple environments,
# where the agent can then act on them as a batch, such that the prediction of the action is more efficient.
# """
# def __init__(self, environments_parameters: List[Environment]):
# self.environments_parameters = environments_parameters
# self.environments = []
# self.action_space = []
# self.outgoing_control = []
# self._last_env_response = []
#
# @property
# def action_space(self) -> Union[List[ActionSpace], ActionSpace]:
# """
# Get the action space of the environment
# :return: the action space
# """
# return self.action_space
#
# @action_space.setter
# def action_space(self, val: Union[List[ActionSpace], ActionSpace]):
# """
# Set the action space of the environment
# :return: None
# """
# self.action_space = val
#
# @property
# def phase(self) -> RunPhase:
# """
# Get the phase of the environments group
# :return: the current phase
# """
# return self.phase
#
# @phase.setter
# def phase(self, val: RunPhase):
# """
# Change the phase of each one of the environments in the group
# :param val: the new phase
# :return: None
# """
# self.phase = val
# call_method_for_all(self.environments, 'phase', val)
#
# def _create_environments(self):
# """
# Create the environments using the given parameters and update the environments list
# :return: None
# """
# for environment_parameters in self.environments_parameters:
# environment = create_environment(environment_parameters)
# self.action_space = self.action_space.append(environment.action_space)
# self.environments.append(environment)
#
# @property
# def last_env_response(self) -> Union[List[Transition], Transition]:
# """
# Get the last environment response
# :return: a dictionary that contains the state, reward, etc.
# """
# return squeeze_list(self._last_env_response)
#
# @last_env_response.setter
# def last_env_response(self, val: Union[List[Transition], Transition]):
# """
# Set the last environment response
# :param val: the last environment response
# """
# self._last_env_response = force_list(val)
#
# def step(self, actions: Union[List[ActionType], ActionType]) -> List[Transition]:
# """
# Act in all the environments in the group.
# :param actions: can be either a single action if there is a single environment in the group, or a list of
# actions in case there are multiple environments in the group. Each action can be an action index
# or a numpy array representing a continuous action for example.
# :return: The responses from all the environments in the group
# """
#
# actions = force_list(actions)
# if len(actions) != len(self.environments):
# raise ValueError("The number of actions does not match the number of environments in the group")
#
# result = []
# for environment, action in zip(self.environments, actions):
# result.append(environment.step(action))
#
# self.last_env_response = result
#
# return result
#
# def reset(self, force_environment_reset: bool=False) -> List[Transition]:
# """
# Reset all the environments in the group
# :param force_environment_reset: force the reset of each one of the environments
# :return: a list of the environments responses
# """
# return call_method_for_all(self.environments, 'reset', force_environment_reset)
#
# def get_random_action(self) -> List[ActionType]:
# """
# Get a list of random action that can be applied on the environments in the group
# :return: a list of random actions
# """
# return call_method_for_all(self.environments, 'get_random_action')
#
# def set_goal(self, goal: GoalType) -> None:
# """
# Set the goal of each one of the environments in the group to be the given goal
# :param goal: a goal vector
# :return: None
# """
# # TODO: maybe enable setting multiple goals?
# call_method_for_all(self.environments, 'set_goal', goal)