1
0
mirror of https://github.com/gryf/coach.git synced 2026-04-11 07:33:37 +02:00

Trace tests update

This commit is contained in:
Shadi Endrawis
2018-08-20 13:01:17 +03:00
parent c1f428666e
commit 3abb6cd415
99 changed files with 12876 additions and 39 deletions

View File

@@ -0,0 +1,6 @@
Episode #,Training Iter,In Heatup,ER #Transitions,ER #Episodes,Episode Length,Total steps,Epsilon,Shaped Training Reward,Training Reward,Update Target Network,Evaluation Reward,Shaped Evaluation Reward,Success Rate,Loss/Mean,Loss/Stdev,Loss/Max,Loss/Min,Learning Rate/Mean,Learning Rate/Stdev,Learning Rate/Max,Learning Rate/Min,Grads (unclipped)/Mean,Grads (unclipped)/Stdev,Grads (unclipped)/Max,Grads (unclipped)/Min,Q/Mean,Q/Stdev,Q/Max,Q/Min
1,0.0,1.0,1117.0,1117.0,1117.0,1117.0,1.0,,,0.0,,,,,,,,,,,,,,,,,,,
2,210.0,0.0,1958.0,1958.0,841.0,1958.0,0.999167410000018,-20.0,-20.0,0.0,,,,0.011756908099604993,0.01245646310720048,0.05387234315276146,0.00010689756891224532,0.0002500000000000001,1.0842021724855042e-19,0.00025,0.00025,0.057962038,0.04616896400000001,0.26208854,0.0071766186,,,,
3,402.0,0.0,2726.0,2726.0,768.0,2726.0,0.9984070900000346,-21.0,-21.0,0.0,,,,0.012809355009267165,0.013771132011321113,0.07975033670663834,5.99101695115678e-05,0.0002500000000000001,5.421010862427521e-20,0.00025,0.00025,0.052051324,0.028359309,0.17658195,0.008862591,-0.017426128,0.0060299635,-0.008042792,-0.026319288
4,601.0,0.0,3519.0,3519.0,793.0,3519.0,0.9976220200000516,-21.0,-21.0,0.0,,,,0.015272312543569037,0.013672084153799915,0.05628284066915512,0.00023415754549205303,0.0002500000000000001,5.421010862427521e-20,0.00025,0.00025,0.052314125,0.023336997,0.1473458,0.012913031,-0.031559315,0.0042713494,-0.023393027,-0.036418874
5,809.0,0.0,4352.0,4352.0,833.0,4352.0,0.9967973500000696,-21.0,-21.0,0.0,,,,0.013082799735107424,0.01255374334846098,0.06567259877920151,0.0004701522993855178,0.0002500000000000001,1.0842021724855042e-19,0.00025,0.00025,0.043265857000000005,0.014534917,0.089655906,0.017195849,-0.0053307074,0.0027605025,-0.0019208845999999999,-0.00974094
1 Episode # Training Iter In Heatup ER #Transitions ER #Episodes Episode Length Total steps Epsilon Shaped Training Reward Training Reward Update Target Network Evaluation Reward Shaped Evaluation Reward Success Rate Loss/Mean Loss/Stdev Loss/Max Loss/Min Learning Rate/Mean Learning Rate/Stdev Learning Rate/Max Learning Rate/Min Grads (unclipped)/Mean Grads (unclipped)/Stdev Grads (unclipped)/Max Grads (unclipped)/Min Q/Mean Q/Stdev Q/Max Q/Min
2 1 0.0 1.0 1117.0 1117.0 1117.0 1117.0 1.0 0.0
3 2 210.0 0.0 1958.0 1958.0 841.0 1958.0 0.999167410000018 -20.0 -20.0 0.0 0.011756908099604993 0.01245646310720048 0.05387234315276146 0.00010689756891224532 0.0002500000000000001 1.0842021724855042e-19 0.00025 0.00025 0.057962038 0.04616896400000001 0.26208854 0.0071766186
4 3 402.0 0.0 2726.0 2726.0 768.0 2726.0 0.9984070900000346 -21.0 -21.0 0.0 0.012809355009267165 0.013771132011321113 0.07975033670663834 5.99101695115678e-05 0.0002500000000000001 5.421010862427521e-20 0.00025 0.00025 0.052051324 0.028359309 0.17658195 0.008862591 -0.017426128 0.0060299635 -0.008042792 -0.026319288
5 4 601.0 0.0 3519.0 3519.0 793.0 3519.0 0.9976220200000516 -21.0 -21.0 0.0 0.015272312543569037 0.013672084153799915 0.05628284066915512 0.00023415754549205303 0.0002500000000000001 5.421010862427521e-20 0.00025 0.00025 0.052314125 0.023336997 0.1473458 0.012913031 -0.031559315 0.0042713494 -0.023393027 -0.036418874
6 5 809.0 0.0 4352.0 4352.0 833.0 4352.0 0.9967973500000696 -21.0 -21.0 0.0 0.013082799735107424 0.01255374334846098 0.06567259877920151 0.0004701522993855178 0.0002500000000000001 1.0842021724855042e-19 0.00025 0.00025 0.043265857000000005 0.014534917 0.089655906 0.017195849 -0.0053307074 0.0027605025 -0.0019208845999999999 -0.00974094