1
0
mirror of https://github.com/gryf/coach.git synced 2026-03-03 23:35:51 +01:00

trace tests update

This commit is contained in:
Shadi Endrawis
2018-10-02 17:55:16 +03:00
parent 51726a5b80
commit f7990d4003
79 changed files with 10105 additions and 9539 deletions

View File

@@ -1,6 +1,6 @@
Episode #,Training Iter,In Heatup,ER #Transitions,ER #Episodes,Episode Length,Total steps,Epsilon,Shaped Training Reward,Training Reward,Update Target Network,Evaluation Reward,Shaped Evaluation Reward,Success Rate,Loss/Mean,Loss/Stdev,Loss/Max,Loss/Min,Learning Rate/Mean,Learning Rate/Stdev,Learning Rate/Max,Learning Rate/Min,Grads (unclipped)/Mean,Grads (unclipped)/Stdev,Grads (unclipped)/Max,Grads (unclipped)/Min,Discounted Return/Mean,Discounted Return/Stdev,Discounted Return/Max,Discounted Return/Min,Entropy/Mean,Entropy/Stdev,Entropy/Max,Entropy/Min,Q/Mean,Q/Stdev,Q/Max,Q/Min,Q Values/Mean,Q Values/Stdev,Q Values/Max,Q Values/Min,Value Loss/Mean,Value Loss/Stdev,Value Loss/Max,Value Loss/Min
1,0.0,1.0,1117.0,1.0,1117.0,1117.0,0.5,,,0.0,,,,,,,,,,,,,,,,-1.5180229894995567,0.6998808293377133,-0.08930329112720292,-3.148474706421977,,,,,,,,,,,,,,,,
2,163.0,0.0,821.0,1.0,821.0,1938.0,0.4919541999999965,-21.0,-21.0,0.0,,,,,,,,,,,,,,,,-2.405652578063971,0.6237147471281423,-0.7105532272722921,-3.3691179328950627,,,,,,,,,0.25339470000000003,0.06996354,0.40677336,-0.35897204,0.035283737,0.10252844,1.0475135,1.1831225500000001e-05
3,320.0,0.0,782.0,1.0,782.0,2720.0,0.4842905999999932,-21.0,-21.0,0.0,,,,,,,,,,,,,,,,-2.4614277069600043,0.5586658402302739,-0.7105532272722921,-3.354852824180864,,,,,,,,,0.20715186,0.062277785999999995,0.35004243,0.0036477323,0.05950941,0.13284620000000003,0.55984885,1.9053832e-05
4,522.0,0.0,1009.0,1.0,1009.0,3729.0,0.4744023999999889,-19.0,-19.0,0.0,,,,,,,,,,,,,,,,-1.74034851817599,0.8736518980911252,0.29537702481737355,-3.229858453919355,,,,,,,,,0.1964524,0.06919237,0.40447715,0.0016004617,0.08728501,0.21507107,0.96532106,3.7585607e-05
5,673.0,0.0,755.0,1.0,755.0,4484.0,0.4670033999999857,-21.0,-21.0,0.0,,,,,,,,,,,,,,,,-2.5246431129611286,0.5835765895797549,-0.7105532272722921,-3.3699982440767453,,,,,,,,,0.16121916,0.030521521,0.26771998,0.09214279,0.11407282,0.2374467,0.7852985,0.00861873
2,151.0,0.0,760.0,1.0,760.0,1877.0,0.4925519999999968,-21.0,-21.0,0.0,,,,,,,,,,,,,,,,-2.5205372468300253,0.5838419974113738,-0.7105532272722921,-3.3699982440767453,,,,,,,,,0.07112427,0.07581978,0.32078072,-0.06063268,0.11560041,0.23668602,0.97285825,0.00011849090000000001
3,353.0,0.0,1008.0,1.0,1008.0,2885.0,0.4826735999999925,-20.0,-20.0,0.0,,,,,,,,,,,,,,,,-1.8720954986783211,0.7097144372888278,-0.3754689651451796,-3.3225778431943085,,,,,,,,,0.12676543,0.06586319,0.30209106,-0.04302346,0.06763674,0.17496337,0.88473463,9.743495999999999e-05
4,516.0,0.0,814.0,1.0,814.0,3699.0,0.474696399999989,-20.0,-20.0,0.0,,,,,,,,,,,,,,,,-2.193174392327956,0.69198455704147,-0.4780715307780122,-3.3372597252516765,,,,,,,,,0.10700438,0.038330942,0.31717354,0.03721839,0.10371645,0.23203200000000002,1.0052496,0.00057114265
5,703.0,0.0,932.0,1.0,932.0,4631.0,0.465562799999985,-21.0,-21.0,0.0,,,,,,,,,,,,,,,,-2.122317833023211,0.6454978854674346,-0.7105532272722921,-3.354852824180864,,,,,,,,,0.16040073,0.044896505999999996,0.29440245,0.063172355,0.04608961,0.11784693,0.5444492,0.00014167171000000002
1 Episode # Training Iter In Heatup ER #Transitions ER #Episodes Episode Length Total steps Epsilon Shaped Training Reward Training Reward Update Target Network Evaluation Reward Shaped Evaluation Reward Success Rate Loss/Mean Loss/Stdev Loss/Max Loss/Min Learning Rate/Mean Learning Rate/Stdev Learning Rate/Max Learning Rate/Min Grads (unclipped)/Mean Grads (unclipped)/Stdev Grads (unclipped)/Max Grads (unclipped)/Min Discounted Return/Mean Discounted Return/Stdev Discounted Return/Max Discounted Return/Min Entropy/Mean Entropy/Stdev Entropy/Max Entropy/Min Q/Mean Q/Stdev Q/Max Q/Min Q Values/Mean Q Values/Stdev Q Values/Max Q Values/Min Value Loss/Mean Value Loss/Stdev Value Loss/Max Value Loss/Min
2 1 0.0 1.0 1117.0 1.0 1117.0 1117.0 0.5 0.0 -1.5180229894995567 0.6998808293377133 -0.08930329112720292 -3.148474706421977
3 2 163.0 151.0 0.0 821.0 760.0 1.0 821.0 760.0 1938.0 1877.0 0.4919541999999965 0.4925519999999968 -21.0 -21.0 0.0 -2.405652578063971 -2.5205372468300253 0.6237147471281423 0.5838419974113738 -0.7105532272722921 -3.3691179328950627 -3.3699982440767453 0.25339470000000003 0.07112427 0.06996354 0.07581978 0.40677336 0.32078072 -0.35897204 -0.06063268 0.035283737 0.11560041 0.10252844 0.23668602 1.0475135 0.97285825 1.1831225500000001e-05 0.00011849090000000001
4 3 320.0 353.0 0.0 782.0 1008.0 1.0 782.0 1008.0 2720.0 2885.0 0.4842905999999932 0.4826735999999925 -21.0 -20.0 -21.0 -20.0 0.0 -2.4614277069600043 -1.8720954986783211 0.5586658402302739 0.7097144372888278 -0.7105532272722921 -0.3754689651451796 -3.354852824180864 -3.3225778431943085 0.20715186 0.12676543 0.062277785999999995 0.06586319 0.35004243 0.30209106 0.0036477323 -0.04302346 0.05950941 0.06763674 0.13284620000000003 0.17496337 0.55984885 0.88473463 1.9053832e-05 9.743495999999999e-05
5 4 522.0 516.0 0.0 1009.0 814.0 1.0 1009.0 814.0 3729.0 3699.0 0.4744023999999889 0.474696399999989 -19.0 -20.0 -19.0 -20.0 0.0 -1.74034851817599 -2.193174392327956 0.8736518980911252 0.69198455704147 0.29537702481737355 -0.4780715307780122 -3.229858453919355 -3.3372597252516765 0.1964524 0.10700438 0.06919237 0.038330942 0.40447715 0.31717354 0.0016004617 0.03721839 0.08728501 0.10371645 0.21507107 0.23203200000000002 0.96532106 1.0052496 3.7585607e-05 0.00057114265
6 5 673.0 703.0 0.0 755.0 932.0 1.0 755.0 932.0 4484.0 4631.0 0.4670033999999857 0.465562799999985 -21.0 -21.0 0.0 -2.5246431129611286 -2.122317833023211 0.5835765895797549 0.6454978854674346 -0.7105532272722921 -3.3699982440767453 -3.354852824180864 0.16121916 0.16040073 0.030521521 0.044896505999999996 0.26771998 0.29440245 0.09214279 0.063172355 0.11407282 0.04608961 0.2374467 0.11784693 0.7852985 0.5444492 0.00861873 0.00014167171000000002