mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 19:20:19 +01:00
fix nec_agent
This commit is contained in:
@@ -18,6 +18,7 @@ import numpy as np
|
|||||||
|
|
||||||
from agents.value_optimization_agent import ValueOptimizationAgent
|
from agents.value_optimization_agent import ValueOptimizationAgent
|
||||||
from logger import screen
|
from logger import screen
|
||||||
|
from utils import RunPhase
|
||||||
|
|
||||||
|
|
||||||
# Neural Episodic Control - https://arxiv.org/pdf/1703.01988.pdf
|
# Neural Episodic Control - https://arxiv.org/pdf/1703.01988.pdf
|
||||||
@@ -40,7 +41,7 @@ class NECAgent(ValueOptimizationAgent):
|
|||||||
screen.log_title("Finished collecting initial entries in DND. Starting to train network...")
|
screen.log_title("Finished collecting initial entries in DND. Starting to train network...")
|
||||||
|
|
||||||
current_states, next_states, actions, rewards, game_overs, total_return = self.extract_batch(batch)
|
current_states, next_states, actions, rewards, game_overs, total_return = self.extract_batch(batch)
|
||||||
result = self.main_network.train_and_sync_networks([current_states, actions], total_return)
|
result = self.main_network.train_and_sync_networks(current_states, total_return)
|
||||||
total_loss = result[0]
|
total_loss = result[0]
|
||||||
|
|
||||||
return total_loss
|
return total_loss
|
||||||
|
|||||||
Reference in New Issue
Block a user