1
0
mirror of https://github.com/gryf/coach.git synced 2026-04-01 01:23:33 +02:00

new traces

This commit is contained in:
itaicaspi-intel
2018-09-12 15:29:42 +03:00
parent 673911ff7f
commit fa4895f840
76 changed files with 12786 additions and 12606 deletions

View File

@@ -1,6 +1,6 @@
Episode #,Training Iter,In Heatup,ER #Transitions,ER #Episodes,Episode Length,Total steps,Epsilon,Shaped Training Reward,Training Reward,Update Target Network,Evaluation Reward,Shaped Evaluation Reward,Success Rate,Loss/Mean,Loss/Stdev,Loss/Max,Loss/Min,Learning Rate/Mean,Learning Rate/Stdev,Learning Rate/Max,Learning Rate/Min,Grads (unclipped)/Mean,Grads (unclipped)/Stdev,Grads (unclipped)/Max,Grads (unclipped)/Min,Entropy/Mean,Entropy/Stdev,Entropy/Max,Entropy/Min,Advantages/Mean,Advantages/Stdev,Advantages/Max,Advantages/Min,Values/Mean,Values/Stdev,Values/Max,Values/Min,Value Loss/Mean,Value Loss/Stdev,Value Loss/Max,Value Loss/Min,Policy Loss/Mean,Policy Loss/Stdev,Policy Loss/Max,Policy Loss/Min
1,0.0,1.0,881.0,1.0,881.0,881.0,0.0,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,0.0,1.0,1043.0,1.0,1043.0,1924.0,0.0,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,38.0,0.0,763.0,1.0,763.0,2687.0,0.0,-21.0,-21.0,0.0,,,,,,,,,,,,1.4867063,1.4938432,5.887912,0.0015561687,1.7584827,0.03291289,1.788348,1.6738943,-0.15541847978173265,0.4162753016651965,0.5290131568908691,-1.0030009746551514,-1.0561148,0.93491656,0.021942224,-2.8995342,0.09872001400000001,0.11163085,0.38036227,1.426808e-06,-0.27536926,0.5678659,0.53289455,-1.4932774
4,75.0,0.0,740.0,1.0,740.0,3427.0,0.0,-21.0,-21.0,0.0,,,,,,,,,,,,3.4039152,1.9638362,7.991121000000001,0.21933316,1.4528251,0.16249819,1.6673243999999998,1.1318555000000001,-0.06318947109911177,0.4280228160756264,0.6142082214355469,-0.9774222373962402,-2.6464324,0.35272834,-2.2407157000000004,-3.3780959999999998,0.09359822,0.06729852,0.24981685,0.03076201,-0.096310705,0.51300126,0.43143788,-1.0997788999999998
5,113.0,0.0,755.0,1.0,755.0,4182.0,0.0,-21.0,-21.0,0.0,,,,,,,,,,,,3.233333,1.986448,7.6551165999999995,0.19970839,1.2935143999999998,0.11847049,1.439909,1.0601448,-0.08421046411668932,0.4278507532658671,0.5715954303741455,-0.980087161064148,-2.3434882000000004,0.31644145,-1.9696671000000001,-3.1619172000000004,0.09507382,0.07568375,0.26689446,0.024116684,-0.1209513,0.48729447,0.5198732,-1.1845143
Episode #,Training Iter,In Heatup,ER #Transitions,ER #Episodes,Episode Length,Total steps,Epsilon,Shaped Training Reward,Training Reward,Update Target Network,Evaluation Reward,Shaped Evaluation Reward,Success Rate,Loss/Mean,Loss/Stdev,Loss/Max,Loss/Min,Learning Rate/Mean,Learning Rate/Stdev,Learning Rate/Max,Learning Rate/Min,Grads (unclipped)/Mean,Grads (unclipped)/Stdev,Grads (unclipped)/Max,Grads (unclipped)/Min,Discounted Return/Mean,Discounted Return/Stdev,Discounted Return/Max,Discounted Return/Min,Entropy/Mean,Entropy/Stdev,Entropy/Max,Entropy/Min,Advantages/Mean,Advantages/Stdev,Advantages/Max,Advantages/Min,Values/Mean,Values/Stdev,Values/Max,Values/Min,Value Loss/Mean,Value Loss/Stdev,Value Loss/Max,Value Loss/Min,Policy Loss/Mean,Policy Loss/Stdev,Policy Loss/Max,Policy Loss/Min
1,0.0,1.0,881.0,1.0,881.0,881.0,0.0,,,0.0,,,,,,,,,,,,,,,,-2.041213323423532,0.9183659584454216,0.4796594773496936,-3.352701688899176,,,,,,,,,,,,,,,,,,,,
2,0.0,1.0,1043.0,1.0,1043.0,1924.0,0.0,,,0.0,,,,,,,,,,,,,,,,-1.7995206029952229,0.6440801924897366,-0.3927560490055896,-3.2471439401326068,,,,,,,,,,,,,,,,,,,,
3,38.0,0.0,763.0,1.0,763.0,2687.0,0.0,-21.0,-21.0,0.0,,,,,,,,,,,,1.3813618,1.3755096000000002,5.5379830000000005,0.0015163413000000001,-2.5178046202451694,0.5843148195084643,-0.7105532272722921,-3.3699982440767453,1.7471887,0.055258866,1.7901558999999998,1.5866796,-0.1592098490531115,0.4154601793782861,0.5181965827941895,-1.0076508522033691,-1.0213921000000001,0.9139581,0.05576827400000001,-2.8376002000000002,0.09897748,0.11257933,0.3836943,2.9006495999999997e-06,-0.28488272,0.5594197,0.49081215,-1.4856113000000002
4,75.0,0.0,740.0,1.0,740.0,3427.0,0.0,-21.0,-21.0,0.0,,,,,,,,,,,,3.2764843,1.857115,7.888277499999999,0.25566658,-2.533184641659896,0.5861942513660167,-0.7105532272722921,-3.3699982440767453,1.1448658,0.26420638,1.5718536,0.75876945,-0.053371320168177284,0.4283249731162562,0.6155099868774414,-0.9759833812713624,-2.7395887,0.34059632,-2.3099797000000004,-3.3791809999999995,0.09315539,0.06426737,0.24503651,0.033598172999999995,-0.07731818,0.43035093,0.41724620000000007,-1.0952158
5,113.0,0.0,755.0,1.0,755.0,4182.0,0.0,-21.0,-21.0,0.0,,,,,,,,,,,,2.9129169999999998,1.8344038,7.434381,0.2987832,-2.5246431129611286,0.5835765895797549,-0.7105532272722921,-3.3699982440767453,0.9606895000000001,0.09958161,1.0971520000000001,0.79424876,-0.08736807929502953,0.4281761045632924,0.5726819038391113,-0.9797717332839966,-2.3036256,0.30664575,-1.9442793999999999,-3.1548097,0.09548396599999999,0.0768301,0.26862615,0.023751104,-0.08123907,0.3593619,0.44418138,-0.9607479999999999
1 Episode # Training Iter In Heatup ER #Transitions ER #Episodes Episode Length Total steps Epsilon Shaped Training Reward Training Reward Update Target Network Evaluation Reward Shaped Evaluation Reward Success Rate Loss/Mean Loss/Stdev Loss/Max Loss/Min Learning Rate/Mean Learning Rate/Stdev Learning Rate/Max Learning Rate/Min Grads (unclipped)/Mean Grads (unclipped)/Stdev Grads (unclipped)/Max Grads (unclipped)/Min Discounted Return/Mean Discounted Return/Stdev Discounted Return/Max Discounted Return/Min Entropy/Mean Entropy/Stdev Entropy/Max Entropy/Min Advantages/Mean Advantages/Stdev Advantages/Max Advantages/Min Values/Mean Values/Stdev Values/Max Values/Min Value Loss/Mean Value Loss/Stdev Value Loss/Max Value Loss/Min Policy Loss/Mean Policy Loss/Stdev Policy Loss/Max Policy Loss/Min
2 1 0.0 1.0 881.0 1.0 881.0 881.0 0.0 0.0 -2.041213323423532 0.9183659584454216 0.4796594773496936 -3.352701688899176
3 2 0.0 1.0 1043.0 1.0 1043.0 1924.0 0.0 0.0 -1.7995206029952229 0.6440801924897366 -0.3927560490055896 -3.2471439401326068
4 3 38.0 0.0 763.0 1.0 763.0 2687.0 0.0 -21.0 -21.0 0.0 1.4867063 1.3813618 1.4938432 1.3755096000000002 5.887912 5.5379830000000005 0.0015561687 0.0015163413000000001 -2.5178046202451694 0.5843148195084643 -0.7105532272722921 -3.3699982440767453 1.7584827 1.7471887 0.03291289 0.055258866 1.788348 1.7901558999999998 1.6738943 1.5866796 -0.15541847978173265 -0.1592098490531115 0.4162753016651965 0.4154601793782861 0.5290131568908691 0.5181965827941895 -1.0030009746551514 -1.0076508522033691 -1.0561148 -1.0213921000000001 0.93491656 0.9139581 0.021942224 0.05576827400000001 -2.8995342 -2.8376002000000002 0.09872001400000001 0.09897748 0.11163085 0.11257933 0.38036227 0.3836943 1.426808e-06 2.9006495999999997e-06 -0.27536926 -0.28488272 0.5678659 0.5594197 0.53289455 0.49081215 -1.4932774 -1.4856113000000002
5 4 75.0 0.0 740.0 1.0 740.0 3427.0 0.0 -21.0 -21.0 0.0 3.4039152 3.2764843 1.9638362 1.857115 7.991121000000001 7.888277499999999 0.21933316 0.25566658 -2.533184641659896 0.5861942513660167 -0.7105532272722921 -3.3699982440767453 1.4528251 1.1448658 0.16249819 0.26420638 1.6673243999999998 1.5718536 1.1318555000000001 0.75876945 -0.06318947109911177 -0.053371320168177284 0.4280228160756264 0.4283249731162562 0.6142082214355469 0.6155099868774414 -0.9774222373962402 -0.9759833812713624 -2.6464324 -2.7395887 0.35272834 0.34059632 -2.2407157000000004 -2.3099797000000004 -3.3780959999999998 -3.3791809999999995 0.09359822 0.09315539 0.06729852 0.06426737 0.24981685 0.24503651 0.03076201 0.033598172999999995 -0.096310705 -0.07731818 0.51300126 0.43035093 0.43143788 0.41724620000000007 -1.0997788999999998 -1.0952158
6 5 113.0 0.0 755.0 1.0 755.0 4182.0 0.0 -21.0 -21.0 0.0 3.233333 2.9129169999999998 1.986448 1.8344038 7.6551165999999995 7.434381 0.19970839 0.2987832 -2.5246431129611286 0.5835765895797549 -0.7105532272722921 -3.3699982440767453 1.2935143999999998 0.9606895000000001 0.11847049 0.09958161 1.439909 1.0971520000000001 1.0601448 0.79424876 -0.08421046411668932 -0.08736807929502953 0.4278507532658671 0.4281761045632924 0.5715954303741455 0.5726819038391113 -0.980087161064148 -0.9797717332839966 -2.3434882000000004 -2.3036256 0.31644145 0.30664575 -1.9696671000000001 -1.9442793999999999 -3.1619172000000004 -3.1548097 0.09507382 0.09548396599999999 0.07568375 0.0768301 0.26689446 0.26862615 0.024116684 0.023751104 -0.1209513 -0.08123907 0.48729447 0.3593619 0.5198732 0.44418138 -1.1845143 -0.9607479999999999