1
0
mirror of https://github.com/gryf/coach.git synced 2026-04-20 15:11:24 +02:00

Trace tests update

This commit is contained in:
Shadi Endrawis
2018-08-20 13:01:17 +03:00
parent c1f428666e
commit 3abb6cd415
99 changed files with 12876 additions and 39 deletions
@@ -0,0 +1,6 @@
Episode #,Training Iter,In Heatup,ER #Transitions,ER #Episodes,Episode Length,Total steps,Epsilon,Shaped Training Reward,Training Reward,Update Target Network,Evaluation Reward,Shaped Evaluation Reward,Success Rate,Loss/Mean,Loss/Stdev,Loss/Max,Loss/Min,Learning Rate/Mean,Learning Rate/Stdev,Learning Rate/Max,Learning Rate/Min,Grads (unclipped)/Mean,Grads (unclipped)/Stdev,Grads (unclipped)/Max,Grads (unclipped)/Min,Q/Mean,Q/Stdev,Q/Max,Q/Min
1,0.0,1.0,1117.0,1117.0,1117.0,1117.0,1.0,,,0.0,,,,,,,,,,,,,,,,,,,
2,221.0,0.0,2002.0,2002.0,885.0,2002.0,0.999123850000019,-21.0,-21.0,0.0,,,,0.006624795104714366,0.00394576811971849,0.01863841339945793,6.383289291989058e-05,6.250000000000003e-05,2.7105054312137605e-20,6.25e-05,6.25e-05,0.032127135,0.014603343000000001,0.12838697,0.005512589,,,,
3,455.0,0.0,2938.0,2938.0,936.0,2938.0,0.9981972100000392,-20.0,-20.0,0.0,,,,0.006993958523544746,0.0031627418936934102,0.01826494000852108,0.000633664894849062,6.250000000000003e-05,2.7105054312137605e-20,6.25e-05,6.25e-05,0.026382675,0.010049541,0.06018944,0.009578557,-0.08102258,0.054663535,-0.0028564844,-0.15667786
4,659.0,0.0,3754.0,3754.0,816.0,3754.0,0.9973893700000568,-21.0,-21.0,0.0,,,,0.00653242061713065,0.0030014368076197325,0.014597361907362938,3.4910688555100926e-05,6.250000000000001e-05,1.3552527156068802e-20,6.25e-05,6.25e-05,0.019908648,0.0060336159999999995,0.03786578,0.003926692,,,,
5,906.0,0.0,4739.0,4739.0,985.0,4739.0,0.9964142200000778,-20.0,-20.0,0.0,,,,0.005325366398493989,0.00258031872854336,0.01823988556861877,6.391682836692779e-05,6.250000000000003e-05,2.7105054312137605e-20,6.25e-05,6.25e-05,0.016708475,0.006444646,0.051227405999999996,0.0036940586,-0.042256642000000004,0.010646114,-0.030611286,-0.06712968
1 Episode # Training Iter In Heatup ER #Transitions ER #Episodes Episode Length Total steps Epsilon Shaped Training Reward Training Reward Update Target Network Evaluation Reward Shaped Evaluation Reward Success Rate Loss/Mean Loss/Stdev Loss/Max Loss/Min Learning Rate/Mean Learning Rate/Stdev Learning Rate/Max Learning Rate/Min Grads (unclipped)/Mean Grads (unclipped)/Stdev Grads (unclipped)/Max Grads (unclipped)/Min Q/Mean Q/Stdev Q/Max Q/Min
2 1 0.0 1.0 1117.0 1117.0 1117.0 1117.0 1.0 0.0
3 2 221.0 0.0 2002.0 2002.0 885.0 2002.0 0.999123850000019 -21.0 -21.0 0.0 0.006624795104714366 0.00394576811971849 0.01863841339945793 6.383289291989058e-05 6.250000000000003e-05 2.7105054312137605e-20 6.25e-05 6.25e-05 0.032127135 0.014603343000000001 0.12838697 0.005512589
4 3 455.0 0.0 2938.0 2938.0 936.0 2938.0 0.9981972100000392 -20.0 -20.0 0.0 0.006993958523544746 0.0031627418936934102 0.01826494000852108 0.000633664894849062 6.250000000000003e-05 2.7105054312137605e-20 6.25e-05 6.25e-05 0.026382675 0.010049541 0.06018944 0.009578557 -0.08102258 0.054663535 -0.0028564844 -0.15667786
5 4 659.0 0.0 3754.0 3754.0 816.0 3754.0 0.9973893700000568 -21.0 -21.0 0.0 0.00653242061713065 0.0030014368076197325 0.014597361907362938 3.4910688555100926e-05 6.250000000000001e-05 1.3552527156068802e-20 6.25e-05 6.25e-05 0.019908648 0.0060336159999999995 0.03786578 0.003926692
6 5 906.0 0.0 4739.0 4739.0 985.0 4739.0 0.9964142200000778 -20.0 -20.0 0.0 0.005325366398493989 0.00258031872854336 0.01823988556861877 6.391682836692779e-05 6.250000000000003e-05 2.7105054312137605e-20 6.25e-05 6.25e-05 0.016708475 0.006444646 0.051227405999999996 0.0036940586 -0.042256642000000004 0.010646114 -0.030611286 -0.06712968