mirror of
https://github.com/gryf/coach.git
synced 2025-12-18 03:30:19 +01:00
SAC algorithm (#282)
* SAC algorithm * SAC - updates to agent (learn_from_batch), sac_head and sac_q_head to fix problem in gradient calculation. Now SAC agents is able to train. gym_environment - fixing an error in access to gym.spaces * Soft Actor Critic - code cleanup * code cleanup * V-head initialization fix * SAC benchmarks * SAC Documentation * typo fix * documentation fixes * documentation and version update * README typo
This commit is contained in:
@@ -382,26 +382,14 @@
|
||||
<td style="width: 33%; vertical-align: top;"><ul>
|
||||
<li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.e_greedy.EGreedy">EGreedy (class in rl_coach.exploration_policies.e_greedy)</a>
|
||||
</li>
|
||||
<li><a href="components/agents/index.html#rl_coach.agents.agent.Agent.emulate_act_on_trainer">emulate_act_on_trainer() (rl_coach.agents.agent.Agent method)</a>
|
||||
|
||||
<ul>
|
||||
<li><a href="test.html#rl_coach.agents.dqn_agent.DQNAgent.emulate_act_on_trainer">(rl_coach.agents.dqn_agent.DQNAgent method)</a>
|
||||
</li>
|
||||
</ul></li>
|
||||
<li><a href="components/agents/index.html#rl_coach.agents.agent.Agent.emulate_observe_on_trainer">emulate_observe_on_trainer() (rl_coach.agents.agent.Agent method)</a>
|
||||
|
||||
<ul>
|
||||
<li><a href="test.html#rl_coach.agents.dqn_agent.DQNAgent.emulate_observe_on_trainer">(rl_coach.agents.dqn_agent.DQNAgent method)</a>
|
||||
</li>
|
||||
</ul></li>
|
||||
<li><a href="components/environments/index.html#rl_coach.environments.environment.Environment">Environment (class in rl_coach.environments.environment)</a>
|
||||
</li>
|
||||
</ul></td>
|
||||
<td style="width: 33%; vertical-align: top;"><ul>
|
||||
<li><a href="components/core_types.html#rl_coach.core_types.EnvResponse">EnvResponse (class in rl_coach.core_types)</a>
|
||||
</li>
|
||||
<li><a href="components/core_types.html#rl_coach.core_types.Episode">Episode (class in rl_coach.core_types)</a>
|
||||
</li>
|
||||
</ul></td>
|
||||
<td style="width: 33%; vertical-align: top;"><ul>
|
||||
<li><a href="components/memories/index.html#rl_coach.memories.episodic.EpisodicExperienceReplay">EpisodicExperienceReplay (class in rl_coach.memories.episodic)</a>
|
||||
</li>
|
||||
<li><a href="components/memories/index.html#rl_coach.memories.episodic.EpisodicHindsightExperienceReplay">EpisodicHindsightExperienceReplay (class in rl_coach.memories.episodic)</a>
|
||||
@@ -503,6 +491,8 @@
|
||||
<table style="width: 100%" class="indextable genindextable"><tr>
|
||||
<td style="width: 33%; vertical-align: top;"><ul>
|
||||
<li><a href="components/spaces.html#rl_coach.spaces.ImageObservationSpace">ImageObservationSpace (class in rl_coach.spaces)</a>
|
||||
</li>
|
||||
<li><a href="test.html#rl_coach.agents.dqn_agent.DQNAgent.improve_reward_model">improve_reward_model() (rl_coach.agents.dqn_agent.DQNAgent method)</a>
|
||||
</li>
|
||||
<li><a href="components/core_types.html#rl_coach.core_types.Batch.info">info() (rl_coach.core_types.Batch method)</a>
|
||||
</li>
|
||||
@@ -738,8 +728,6 @@
|
||||
<li><a href="test.html#rl_coach.agents.dqn_agent.DQNAgent.reset_evaluation_state">(rl_coach.agents.dqn_agent.DQNAgent method)</a>
|
||||
</li>
|
||||
</ul></li>
|
||||
</ul></td>
|
||||
<td style="width: 33%; vertical-align: top;"><ul>
|
||||
<li><a href="components/agents/index.html#rl_coach.agents.agent.Agent.reset_internal_state">reset_internal_state() (rl_coach.agents.agent.Agent method)</a>
|
||||
|
||||
<ul>
|
||||
@@ -748,6 +736,8 @@
|
||||
<li><a href="components/environments/index.html#rl_coach.environments.environment.Environment.reset_internal_state">(rl_coach.environments.environment.Environment method)</a>
|
||||
</li>
|
||||
</ul></li>
|
||||
</ul></td>
|
||||
<td style="width: 33%; vertical-align: top;"><ul>
|
||||
<li><a href="components/agents/index.html#rl_coach.agents.agent.Agent.restore_checkpoint">restore_checkpoint() (rl_coach.agents.agent.Agent method)</a>
|
||||
|
||||
<ul>
|
||||
@@ -762,6 +752,12 @@
|
||||
</li>
|
||||
<li><a href="components/core_types.html#rl_coach.core_types.Batch.rewards">rewards() (rl_coach.core_types.Batch method)</a>
|
||||
</li>
|
||||
<li><a href="components/agents/index.html#rl_coach.agents.agent.Agent.run_off_policy_evaluation">run_off_policy_evaluation() (rl_coach.agents.agent.Agent method)</a>
|
||||
|
||||
<ul>
|
||||
<li><a href="test.html#rl_coach.agents.dqn_agent.DQNAgent.run_off_policy_evaluation">(rl_coach.agents.dqn_agent.DQNAgent method)</a>
|
||||
</li>
|
||||
</ul></li>
|
||||
<li><a href="components/agents/index.html#rl_coach.agents.agent.Agent.run_pre_network_filter_for_inference">run_pre_network_filter_for_inference() (rl_coach.agents.agent.Agent method)</a>
|
||||
|
||||
<ul>
|
||||
@@ -839,6 +835,8 @@
|
||||
<li><a href="components/core_types.html#rl_coach.core_types.Batch.size">size (rl_coach.core_types.Batch attribute)</a>
|
||||
</li>
|
||||
<li><a href="components/core_types.html#rl_coach.core_types.Batch.slice">slice() (rl_coach.core_types.Batch method)</a>
|
||||
</li>
|
||||
<li><a href="components/agents/policy_optimization/sac.html#rl_coach.agents.soft_actor_critic_agent.SoftActorCriticAlgorithmParameters">SoftActorCriticAlgorithmParameters (class in rl_coach.agents.soft_actor_critic_agent)</a>
|
||||
</li>
|
||||
<li><a href="components/spaces.html#rl_coach.spaces.Space">Space (class in rl_coach.spaces)</a>
|
||||
</li>
|
||||
|
||||
Reference in New Issue
Block a user