mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 11:10:20 +01:00
Updating PPO references per issue #11
This commit is contained in:
@@ -19,7 +19,7 @@ from random import shuffle
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
# Proximal Policy Optimization - https://arxiv.org/pdf/1707.02286.pdf
|
||||
# Proximal Policy Optimization - https://arxiv.org/pdf/1707.06347.pdf
|
||||
class PPOAgent(ActorCriticAgent):
|
||||
def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0):
|
||||
ActorCriticAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id,
|
||||
|
||||
Reference in New Issue
Block a user