1
0
mirror of https://github.com/gryf/coach.git synced 2026-03-16 06:33:36 +01:00

trace tests update

This commit is contained in:
Shadi Endrawis
2018-10-02 17:55:16 +03:00
parent 51726a5b80
commit f7990d4003
79 changed files with 10105 additions and 9539 deletions

View File

@@ -1,6 +1,6 @@
Episode #,Training Iter,In Heatup,ER #Transitions,ER #Episodes,Episode Length,Total steps,Epsilon,Shaped Training Reward,Training Reward,Update Target Network,Evaluation Reward,Shaped Evaluation Reward,Success Rate,Loss/Mean,Loss/Stdev,Loss/Max,Loss/Min,Learning Rate/Mean,Learning Rate/Stdev,Learning Rate/Max,Learning Rate/Min,Grads (unclipped)/Mean,Grads (unclipped)/Stdev,Grads (unclipped)/Max,Grads (unclipped)/Min,Discounted Return/Mean,Discounted Return/Stdev,Discounted Return/Max,Discounted Return/Min,Entropy/Mean,Entropy/Stdev,Entropy/Max,Entropy/Min,Advantages/Mean,Advantages/Stdev,Advantages/Max,Advantages/Min,Values/Mean,Values/Stdev,Values/Max,Values/Min,Value Loss/Mean,Value Loss/Stdev,Value Loss/Max,Value Loss/Min,Policy Loss/Mean,Policy Loss/Stdev,Policy Loss/Max,Policy Loss/Min
1,0.0,1.0,881.0,1.0,881.0,881.0,0.0,,,0.0,,,,,,,,,,,,,,,,-2.041213323423532,0.9183659584454216,0.4796594773496936,-3.352701688899176,,,,,,,,,,,,,,,,,,,,
2,0.0,1.0,1043.0,1.0,1043.0,1924.0,0.0,,,0.0,,,,,,,,,,,,,,,,-1.7995206029952229,0.6440801924897366,-0.3927560490055896,-3.2471439401326068,,,,,,,,,,,,,,,,,,,,
3,38.0,0.0,763.0,1.0,763.0,2687.0,0.0,-21.0,-21.0,0.0,,,,,,,,,,,,1.3813618,1.3755096000000002,5.5379830000000005,0.0015163413000000001,-2.5178046202451694,0.5843148195084643,-0.7105532272722921,-3.3699982440767453,1.7471887,0.055258866,1.7901558999999998,1.5866796,-0.1592098490531115,0.4154601793782861,0.5181965827941895,-1.0076508522033691,-1.0213921000000001,0.9139581,0.05576827400000001,-2.8376002000000002,0.09897748,0.11257933,0.3836943,2.9006495999999997e-06,-0.28488272,0.5594197,0.49081215,-1.4856113000000002
4,75.0,0.0,740.0,1.0,740.0,3427.0,0.0,-21.0,-21.0,0.0,,,,,,,,,,,,3.2764843,1.857115,7.888277499999999,0.25566658,-2.533184641659896,0.5861942513660167,-0.7105532272722921,-3.3699982440767453,1.1448658,0.26420638,1.5718536,0.75876945,-0.053371320168177284,0.4283249731162562,0.6155099868774414,-0.9759833812713624,-2.7395887,0.34059632,-2.3099797000000004,-3.3791809999999995,0.09315539,0.06426737,0.24503651,0.033598172999999995,-0.07731818,0.43035093,0.41724620000000007,-1.0952158
5,113.0,0.0,755.0,1.0,755.0,4182.0,0.0,-21.0,-21.0,0.0,,,,,,,,,,,,2.9129169999999998,1.8344038,7.434381,0.2987832,-2.5246431129611286,0.5835765895797549,-0.7105532272722921,-3.3699982440767453,0.9606895000000001,0.09958161,1.0971520000000001,0.79424876,-0.08736807929502953,0.4281761045632924,0.5726819038391113,-0.9797717332839966,-2.3036256,0.30664575,-1.9442793999999999,-3.1548097,0.09548396599999999,0.0768301,0.26862615,0.023751104,-0.08123907,0.3593619,0.44418138,-0.9607479999999999
3,38.0,0.0,763.0,1.0,763.0,2687.0,0.0,-21.0,-21.0,0.0,,,,,,,,,,,,1.6584430000000001,1.5743899,6.016515,0.0040417006,-2.5178046202451694,0.5843148195084643,-0.7105532272722921,-3.3699982440767453,1.7365953,0.060906168,1.789465,1.5938344,-0.13552558197345782,0.4155380274318002,0.5708987712860107,-1.000117301940918,-1.2616656000000002,1.0273496,-0.025292996,-3.1295607000000003,0.09551952,0.105760135,0.37662047,3.6489364000000002e-06,-0.24352022,0.5668899000000001,0.5707099,-1.4730957
4,75.0,0.0,740.0,1.0,740.0,3427.0,0.0,-21.0,-21.0,0.0,,,,,,,,,,,,3.23845,1.8577351999999998,7.4290956999999995,0.28597927,-2.533184641659896,0.5861942513660167,-0.7105532272722921,-3.3699982440767453,1.3982204,0.15588406,1.5950133999999998,1.0844505,-0.06379161493645774,0.4273890014930976,0.5925705432891846,-0.9768838882446288,-2.6351327999999996,0.30348834,-2.275514,-3.2641757,0.093365364,0.06693241,0.24693722,0.032394797,-0.09340415,0.49059078,0.462054,-1.0635808
5,113.0,0.0,755.0,1.0,755.0,4182.0,0.0,-21.0,-21.0,0.0,,,,,,,,,,,,3.0537505,1.9605529,8.232577000000001,0.2008676,-2.5246431129611286,0.5835765895797549,-0.7105532272722921,-3.3699982440767453,1.265182,0.080454364,1.3671972,1.1099908,-0.08276949251020277,0.4270986000616472,0.5576775074005127,-0.9796819686889648,-2.353897,0.2969966,-2.0010118,-3.076977,0.09463200000000001,0.07529009,0.26480454,0.025184255,-0.10958841400000001,0.47672352,0.5033947,-1.1684946000000003
1 Episode # Training Iter In Heatup ER #Transitions ER #Episodes Episode Length Total steps Epsilon Shaped Training Reward Training Reward Update Target Network Evaluation Reward Shaped Evaluation Reward Success Rate Loss/Mean Loss/Stdev Loss/Max Loss/Min Learning Rate/Mean Learning Rate/Stdev Learning Rate/Max Learning Rate/Min Grads (unclipped)/Mean Grads (unclipped)/Stdev Grads (unclipped)/Max Grads (unclipped)/Min Discounted Return/Mean Discounted Return/Stdev Discounted Return/Max Discounted Return/Min Entropy/Mean Entropy/Stdev Entropy/Max Entropy/Min Advantages/Mean Advantages/Stdev Advantages/Max Advantages/Min Values/Mean Values/Stdev Values/Max Values/Min Value Loss/Mean Value Loss/Stdev Value Loss/Max Value Loss/Min Policy Loss/Mean Policy Loss/Stdev Policy Loss/Max Policy Loss/Min
2 1 0.0 1.0 881.0 1.0 881.0 881.0 0.0 0.0 -2.041213323423532 0.9183659584454216 0.4796594773496936 -3.352701688899176
3 2 0.0 1.0 1043.0 1.0 1043.0 1924.0 0.0 0.0 -1.7995206029952229 0.6440801924897366 -0.3927560490055896 -3.2471439401326068
4 3 38.0 0.0 763.0 1.0 763.0 2687.0 0.0 -21.0 -21.0 0.0 1.3813618 1.6584430000000001 1.3755096000000002 1.5743899 5.5379830000000005 6.016515 0.0015163413000000001 0.0040417006 -2.5178046202451694 0.5843148195084643 -0.7105532272722921 -3.3699982440767453 1.7471887 1.7365953 0.055258866 0.060906168 1.7901558999999998 1.789465 1.5866796 1.5938344 -0.1592098490531115 -0.13552558197345782 0.4154601793782861 0.4155380274318002 0.5181965827941895 0.5708987712860107 -1.0076508522033691 -1.000117301940918 -1.0213921000000001 -1.2616656000000002 0.9139581 1.0273496 0.05576827400000001 -0.025292996 -2.8376002000000002 -3.1295607000000003 0.09897748 0.09551952 0.11257933 0.105760135 0.3836943 0.37662047 2.9006495999999997e-06 3.6489364000000002e-06 -0.28488272 -0.24352022 0.5594197 0.5668899000000001 0.49081215 0.5707099 -1.4856113000000002 -1.4730957
5 4 75.0 0.0 740.0 1.0 740.0 3427.0 0.0 -21.0 -21.0 0.0 3.2764843 3.23845 1.857115 1.8577351999999998 7.888277499999999 7.4290956999999995 0.25566658 0.28597927 -2.533184641659896 0.5861942513660167 -0.7105532272722921 -3.3699982440767453 1.1448658 1.3982204 0.26420638 0.15588406 1.5718536 1.5950133999999998 0.75876945 1.0844505 -0.053371320168177284 -0.06379161493645774 0.4283249731162562 0.4273890014930976 0.6155099868774414 0.5925705432891846 -0.9759833812713624 -0.9768838882446288 -2.7395887 -2.6351327999999996 0.34059632 0.30348834 -2.3099797000000004 -2.275514 -3.3791809999999995 -3.2641757 0.09315539 0.093365364 0.06426737 0.06693241 0.24503651 0.24693722 0.033598172999999995 0.032394797 -0.07731818 -0.09340415 0.43035093 0.49059078 0.41724620000000007 0.462054 -1.0952158 -1.0635808
6 5 113.0 0.0 755.0 1.0 755.0 4182.0 0.0 -21.0 -21.0 0.0 2.9129169999999998 3.0537505 1.8344038 1.9605529 7.434381 8.232577000000001 0.2987832 0.2008676 -2.5246431129611286 0.5835765895797549 -0.7105532272722921 -3.3699982440767453 0.9606895000000001 1.265182 0.09958161 0.080454364 1.0971520000000001 1.3671972 0.79424876 1.1099908 -0.08736807929502953 -0.08276949251020277 0.4281761045632924 0.4270986000616472 0.5726819038391113 0.5576775074005127 -0.9797717332839966 -0.9796819686889648 -2.3036256 -2.353897 0.30664575 0.2969966 -1.9442793999999999 -2.0010118 -3.1548097 -3.076977 0.09548396599999999 0.09463200000000001 0.0768301 0.07529009 0.26862615 0.26480454 0.023751104 0.025184255 -0.08123907 -0.10958841400000001 0.3593619 0.47672352 0.44418138 0.5033947 -0.9607479999999999 -1.1684946000000003