1
0
mirror of https://github.com/gryf/coach.git synced 2026-01-01 19:42:31 +01:00
Files
coach/rl_coach/traces/Pendulum_HAC/trace.csv
2018-08-20 13:01:30 +03:00

7 lines
1.7 KiB
CSV

Episode #,Training Iter,In Heatup,ER #Transitions,ER #Episodes,Episode Length,Total steps,Epsilon,Shaped Training Reward,Training Reward,Update Target Network,Evaluation Reward,Shaped Evaluation Reward,Success Rate,Loss/Mean,Loss/Stdev,Loss/Max,Loss/Min,Learning Rate/Mean,Learning Rate/Stdev,Learning Rate/Max,Learning Rate/Min,Grads (unclipped)/Mean,Grads (unclipped)/Stdev,Grads (unclipped)/Max,Grads (unclipped)/Min,Entropy/Mean,Entropy/Stdev,Entropy/Max,Entropy/Min,Advantages/Mean,Advantages/Stdev,Advantages/Max,Advantages/Min,Values/Mean,Values/Stdev,Values/Max,Values/Min,Value Loss/Mean,Value Loss/Stdev,Value Loss/Max,Value Loss/Min,Policy Loss/Mean,Policy Loss/Stdev,Policy Loss/Max,Policy Loss/Min,Q/Mean,Q/Stdev,Q/Max,Q/Min,TD targets/Mean,TD targets/Stdev,TD targets/Max,TD targets/Min,actions/Mean,actions/Stdev,actions/Max,actions/Min
1,0.0,1.0,97.0,1.0,25.0,25.0,0.0,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,0.0,1.0,194.0,2.0,25.0,50.0,0.0,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,0.0,0.0,291.0,3.0,25.0,75.0,-0.03819695695002292,-1000.0,-1000.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,-0.05867912,0.040427182,0.038633604,-0.13119522,,,,,-0.5875804651149715,0.9883034640881114,0.2924503923099136,-3.1955509185791016
4,0.0,0.0,388.0,4.0,25.0,100.0,0.008508156342542239,-1000.0,-1000.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,-0.04915462,0.027965656000000002,0.015574882,-0.11603892,,,,,-0.5310139374222866,0.9150246753002113,0.2726461971315715,-2.9480751131842533
5,0.0,0.0,485.0,5.0,25.0,125.0,0.0,-1000.0,-1000.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,-0.047291752,0.027684617999999998,0.030320742999999997,-0.11130883,,,,,-0.5612901779256286,0.929480152044698,0.23112091422080994,-2.8455907461559957