1
0
mirror of https://github.com/gryf/coach.git synced 2026-04-07 13:43:32 +02:00

Trace tests update

This commit is contained in:
Shadi Endrawis
2018-08-20 13:01:17 +03:00
parent c1f428666e
commit 3abb6cd415
99 changed files with 12876 additions and 39 deletions

View File

@@ -0,0 +1,6 @@
Episode #,Training Iter,In Heatup,ER #Transitions,ER #Episodes,Episode Length,Total steps,Epsilon,Shaped Training Reward,Training Reward,Update Target Network,Evaluation Reward,Shaped Evaluation Reward,Success Rate,Loss/Mean,Loss/Stdev,Loss/Max,Loss/Min,Learning Rate/Mean,Learning Rate/Stdev,Learning Rate/Max,Learning Rate/Min,Grads (unclipped)/Mean,Grads (unclipped)/Stdev,Grads (unclipped)/Max,Grads (unclipped)/Min,Q/Mean,Q/Stdev,Q/Max,Q/Min
1,0.0,1.0,1117.0,1117.0,1117.0,1117.0,1.0,,,0.0,,,,,,,,,,,,,,,,,,,
2,210.0,0.0,1958.0,1958.0,841.0,1958.0,0.9992431000000248,-20.0,-20.0,0.0,,,,0.011158549779723952,0.01233800718156463,0.04892086610198021,7.747895870124921e-05,0.00010000000000000002,1.3552527156068802e-20,0.0001,0.0001,0.07765737,0.051450502,0.27204409999999996,0.016480377,,,,
3,402.0,0.0,2726.0,2726.0,768.0,2726.0,0.9985519000000476,-21.0,-21.0,0.0,,,,0.011682878495226607,0.013976986698806206,0.07550939172506332,3.554971408448182e-05,0.00010000000000000003,2.7105054312137605e-20,0.0001,0.0001,0.054967567,0.03760215,0.23677647,0.007137654300000001,0.059924055,0.010001821999999999,0.070588365,0.045257278
4,601.0,0.0,3519.0,3519.0,793.0,3519.0,0.9978382000000712,-21.0,-21.0,0.0,,,,0.013331305195076387,0.013162853602752194,0.0471726730465889,9.195879101753236e-05,0.0001,0.0,0.0001,0.0001,0.05391158,0.02641614,0.14699543,0.017958568,0.038910400000000005,0.006119223000000001,0.046009037999999995,0.030036567000000004
5,837.0,0.0,4466.0,4466.0,947.0,4466.0,0.9969859000000992,-20.0,-20.0,0.0,,,,0.011204646104627085,0.012869155071181351,0.06053701043128967,6.284505798248574e-05,0.00010000000000000002,1.3552527156068802e-20,0.0001,0.0001,0.047131248,0.026914247999999998,0.13275696,0.010900318999999999,,,,
1 Episode # Training Iter In Heatup ER #Transitions ER #Episodes Episode Length Total steps Epsilon Shaped Training Reward Training Reward Update Target Network Evaluation Reward Shaped Evaluation Reward Success Rate Loss/Mean Loss/Stdev Loss/Max Loss/Min Learning Rate/Mean Learning Rate/Stdev Learning Rate/Max Learning Rate/Min Grads (unclipped)/Mean Grads (unclipped)/Stdev Grads (unclipped)/Max Grads (unclipped)/Min Q/Mean Q/Stdev Q/Max Q/Min
2 1 0.0 1.0 1117.0 1117.0 1117.0 1117.0 1.0 0.0
3 2 210.0 0.0 1958.0 1958.0 841.0 1958.0 0.9992431000000248 -20.0 -20.0 0.0 0.011158549779723952 0.01233800718156463 0.04892086610198021 7.747895870124921e-05 0.00010000000000000002 1.3552527156068802e-20 0.0001 0.0001 0.07765737 0.051450502 0.27204409999999996 0.016480377
4 3 402.0 0.0 2726.0 2726.0 768.0 2726.0 0.9985519000000476 -21.0 -21.0 0.0 0.011682878495226607 0.013976986698806206 0.07550939172506332 3.554971408448182e-05 0.00010000000000000003 2.7105054312137605e-20 0.0001 0.0001 0.054967567 0.03760215 0.23677647 0.007137654300000001 0.059924055 0.010001821999999999 0.070588365 0.045257278
5 4 601.0 0.0 3519.0 3519.0 793.0 3519.0 0.9978382000000712 -21.0 -21.0 0.0 0.013331305195076387 0.013162853602752194 0.0471726730465889 9.195879101753236e-05 0.0001 0.0 0.0001 0.0001 0.05391158 0.02641614 0.14699543 0.017958568 0.038910400000000005 0.006119223000000001 0.046009037999999995 0.030036567000000004
6 5 837.0 0.0 4466.0 4466.0 947.0 4466.0 0.9969859000000992 -20.0 -20.0 0.0 0.011204646104627085 0.012869155071181351 0.06053701043128967 6.284505798248574e-05 0.00010000000000000002 1.3552527156068802e-20 0.0001 0.0001 0.047131248 0.026914247999999998 0.13275696 0.010900318999999999