1
0
mirror of https://github.com/gryf/coach.git synced 2026-04-20 23:41:24 +02:00

Trace tests update

This commit is contained in:
Shadi Endrawis
2018-08-20 13:01:17 +03:00
parent c1f428666e
commit 3abb6cd415
99 changed files with 12876 additions and 39 deletions
@@ -0,0 +1,6 @@
Episode #,Training Iter,In Heatup,ER #Transitions,ER #Episodes,Episode Length,Total steps,Epsilon,Shaped Training Reward,Training Reward,Update Target Network,Evaluation Reward,Shaped Evaluation Reward,Success Rate,Loss/Mean,Loss/Stdev,Loss/Max,Loss/Min,Learning Rate/Mean,Learning Rate/Stdev,Learning Rate/Max,Learning Rate/Min,Grads (unclipped)/Mean,Grads (unclipped)/Stdev,Grads (unclipped)/Max,Grads (unclipped)/Min,Q/Mean,Q/Stdev,Q/Max,Q/Min
1,0.0,1.0,1117.0,1117.0,1117.0,1117.0,1.0,,,0.0,,,,,,,,,,,,,,,,,,,
2,210.0,0.0,1958.0,1958.0,841.0,1958.0,0.999167410000018,-20.0,-20.0,0.0,,,,0.011338375554208012,0.012271934396749055,0.04895064979791641,4.0612991142552346e-05,0.00010000000000000002,1.3552527156068802e-20,0.0001,0.0001,0.089302726,0.052687183,0.26947329999999997,0.008767666,,,,
3,402.0,0.0,2726.0,2726.0,768.0,2726.0,0.9984070900000346,-21.0,-21.0,0.0,,,,0.012148191395510821,0.0140916556236684,0.08563371002674103,4.236549284541979e-05,0.00010000000000000003,2.7105054312137605e-20,0.0001,0.0001,0.07570279,0.04711025,0.3658558,0.004744183,0.0034259886,0.0050672004,0.010562051000000001,-0.004941341
4,601.0,0.0,3519.0,3519.0,793.0,3519.0,0.9976220200000516,-21.0,-21.0,0.0,,,,0.013526306782753192,0.013285856236359452,0.048545010387897485,6.407919136108829e-05,0.0001,0.0,0.0001,0.0001,0.061247554,0.032466255,0.1804012,0.009472755,-0.0073855095999999995,0.0022593734,-0.0044954764,-0.009999375999999999
5,809.0,0.0,4352.0,4352.0,833.0,4352.0,0.9967973500000696,-21.0,-21.0,0.0,,,,0.011593266384177415,0.0126054028157575,0.06050398200750351,2.748135375441052e-05,0.00010000000000000003,2.7105054312137605e-20,0.0001,0.0001,0.058888417,0.03245456,0.21228382,0.00490136,0.050208718,0.0025627778,0.05198091,0.04461886
1 Episode # Training Iter In Heatup ER #Transitions ER #Episodes Episode Length Total steps Epsilon Shaped Training Reward Training Reward Update Target Network Evaluation Reward Shaped Evaluation Reward Success Rate Loss/Mean Loss/Stdev Loss/Max Loss/Min Learning Rate/Mean Learning Rate/Stdev Learning Rate/Max Learning Rate/Min Grads (unclipped)/Mean Grads (unclipped)/Stdev Grads (unclipped)/Max Grads (unclipped)/Min Q/Mean Q/Stdev Q/Max Q/Min
2 1 0.0 1.0 1117.0 1117.0 1117.0 1117.0 1.0 0.0
3 2 210.0 0.0 1958.0 1958.0 841.0 1958.0 0.999167410000018 -20.0 -20.0 0.0 0.011338375554208012 0.012271934396749055 0.04895064979791641 4.0612991142552346e-05 0.00010000000000000002 1.3552527156068802e-20 0.0001 0.0001 0.089302726 0.052687183 0.26947329999999997 0.008767666
4 3 402.0 0.0 2726.0 2726.0 768.0 2726.0 0.9984070900000346 -21.0 -21.0 0.0 0.012148191395510821 0.0140916556236684 0.08563371002674103 4.236549284541979e-05 0.00010000000000000003 2.7105054312137605e-20 0.0001 0.0001 0.07570279 0.04711025 0.3658558 0.004744183 0.0034259886 0.0050672004 0.010562051000000001 -0.004941341
5 4 601.0 0.0 3519.0 3519.0 793.0 3519.0 0.9976220200000516 -21.0 -21.0 0.0 0.013526306782753192 0.013285856236359452 0.048545010387897485 6.407919136108829e-05 0.0001 0.0 0.0001 0.0001 0.061247554 0.032466255 0.1804012 0.009472755 -0.0073855095999999995 0.0022593734 -0.0044954764 -0.009999375999999999
6 5 809.0 0.0 4352.0 4352.0 833.0 4352.0 0.9967973500000696 -21.0 -21.0 0.0 0.011593266384177415 0.0126054028157575 0.06050398200750351 2.748135375441052e-05 0.00010000000000000003 2.7105054312137605e-20 0.0001 0.0001 0.058888417 0.03245456 0.21228382 0.00490136 0.050208718 0.0025627778 0.05198091 0.04461886