1
0
mirror of https://github.com/gryf/coach.git synced 2026-04-07 13:43:32 +02:00

new traces

This commit is contained in:
itaicaspi-intel
2018-09-12 15:29:42 +03:00
parent 673911ff7f
commit fa4895f840
76 changed files with 12786 additions and 12606 deletions

View File

@@ -1,6 +1,6 @@
Episode #,Training Iter,In Heatup,ER #Transitions,ER #Episodes,Episode Length,Total steps,Epsilon,Shaped Training Reward,Training Reward,Update Target Network,Evaluation Reward,Shaped Evaluation Reward,Success Rate,Loss/Mean,Loss/Stdev,Loss/Max,Loss/Min,Learning Rate/Mean,Learning Rate/Stdev,Learning Rate/Max,Learning Rate/Min,Grads (unclipped)/Mean,Grads (unclipped)/Stdev,Grads (unclipped)/Max,Grads (unclipped)/Min,Q/Mean,Q/Stdev,Q/Max,Q/Min
1,0.0,1.0,1117.0,1117.0,1117.0,1117.0,1.0,,,0.0,,,,,,,,,,,,,,,,,,,
2,205.0,0.0,1937.0,1937.0,820.0,1937.0,0.9992620000000244,-21.0,-21.0,0.0,,,,0.011010780938079,0.013098460400306485,0.06118807196617127,6.86898929416202e-05,0.00010000000000000002,1.3552527156068802e-20,0.0001,0.0001,0.08733994,0.06833449,0.47135752,0.016372742,,,,
3,413.0,0.0,2768.0,2768.0,831.0,2768.0,0.9985141000000488,-21.0,-21.0,0.0,,,,0.01163802880151147,0.013571124716079436,0.08714678883552551,3.9931001083459705e-05,0.00010000000000000003,2.7105054312137605e-20,0.0001,0.0001,0.06724033,0.035371285,0.2241408,0.011829718999999999,0.10583201,0.011610512,0.12072124,0.08555735
4,667.0,0.0,3783.0,3783.0,1015.0,3783.0,0.9976006000000791,-20.0,-20.0,0.0,,,,0.01136319609350886,0.012043113812065086,0.049625951796770096,9.354137000627816e-05,0.00010000000000000002,1.3552527156068802e-20,0.0001,0.0001,0.060902383,0.032815605,0.17838788,0.015925674,0.0978057,0.014090337,0.123560354,0.07580207
5,947.0,0.0,4906.0,4906.0,1123.0,4906.0,0.9965899000001124,-18.0,-18.0,0.0,,,,0.010341535720908724,0.011934284708938809,0.06498207896947861,6.708659930154681e-05,0.00010000000000000002,1.3552527156068802e-20,0.0001,0.0001,0.054970358,0.03215441,0.26232755,0.009252935,0.09154041,0.009532932,0.10656521,0.07300271
Episode #,Training Iter,In Heatup,ER #Transitions,ER #Episodes,Episode Length,Total steps,Epsilon,Shaped Training Reward,Training Reward,Update Target Network,Evaluation Reward,Shaped Evaluation Reward,Success Rate,Loss/Mean,Loss/Stdev,Loss/Max,Loss/Min,Learning Rate/Mean,Learning Rate/Stdev,Learning Rate/Max,Learning Rate/Min,Grads (unclipped)/Mean,Grads (unclipped)/Stdev,Grads (unclipped)/Max,Grads (unclipped)/Min,Discounted Return/Mean,Discounted Return/Stdev,Discounted Return/Max,Discounted Return/Min,Q/Mean,Q/Stdev,Q/Max,Q/Min
1,0.0,1.0,1117.0,1117.0,1117.0,1117.0,1.0,,,0.0,,,,,,,,,,,,,,,,-1.5180229894995567,0.6998808293377133,-0.08930329112720292,-3.148474706421977,,,,
2,205.0,0.0,1937.0,1937.0,820.0,1937.0,0.9992620000000244,-21.0,-21.0,0.0,,,,0.010741595556704321,0.012929108810701093,0.059776991605758674,1.955357765837107e-05,0.00010000000000000002,1.3552527156068802e-20,0.0001,0.0001,0.06700998,0.05211416,0.28223783,0.010616378999999999,-2.3361342922088504,0.784322378590693,-0.38878391807422696,-3.369599601005491,,,,
3,413.0,0.0,2768.0,2768.0,831.0,2768.0,0.9985141000000488,-21.0,-21.0,0.0,,,,0.011494234373143487,0.013620432539034891,0.08901263028383255,7.457365427399054e-05,0.00010000000000000003,2.7105054312137605e-20,0.0001,0.0001,0.049767,0.03138836,0.17539442,0.011668723999999998,-2.320394201181889,0.6047235028955231,-0.7105532272722921,-3.350537576335216,0.025700996,0.008655511,0.0347657,0.008379098
4,667.0,0.0,3783.0,3783.0,1015.0,3783.0,0.9976006000000791,-20.0,-20.0,0.0,,,,0.011251820331667623,0.01194864814747996,0.04630041867494583,9.452360245632008e-05,0.00010000000000000002,1.3552527156068802e-20,0.0001,0.0001,0.046036932999999995,0.02848018,0.14712586,0.010549873999999999,-1.7531357837449677,0.7448577440634202,-0.1288331810939122,-3.2971074888190803,0.014692268999999999,0.00800893,0.026871104,-0.00019203185000000002
5,954.0,0.0,4934.0,4934.0,1151.0,4934.0,0.9965647000001132,-18.0,-18.0,0.0,,,,0.010213364634658,0.011762000707730695,0.06160044670104981,0.00010682888387236744,0.00010000000000000002,1.3552527156068802e-20,0.0001,0.0001,0.041461904,0.025549995,0.14279638,0.012242042,-1.491249294691959,1.0312726923259208,0.5895735538220139,-3.345591480809256,0.021241617999999997,0.008987491,0.036087215,-0.0034641959999999997
1 Episode # Training Iter In Heatup ER #Transitions ER #Episodes Episode Length Total steps Epsilon Shaped Training Reward Training Reward Update Target Network Evaluation Reward Shaped Evaluation Reward Success Rate Loss/Mean Loss/Stdev Loss/Max Loss/Min Learning Rate/Mean Learning Rate/Stdev Learning Rate/Max Learning Rate/Min Grads (unclipped)/Mean Grads (unclipped)/Stdev Grads (unclipped)/Max Grads (unclipped)/Min Discounted Return/Mean Discounted Return/Stdev Discounted Return/Max Discounted Return/Min Q/Mean Q/Stdev Q/Max Q/Min
2 1 0.0 1.0 1117.0 1117.0 1117.0 1117.0 1.0 0.0 -1.5180229894995567 0.6998808293377133 -0.08930329112720292 -3.148474706421977
3 2 205.0 0.0 1937.0 1937.0 820.0 1937.0 0.9992620000000244 -21.0 -21.0 0.0 0.011010780938079 0.010741595556704321 0.013098460400306485 0.012929108810701093 0.06118807196617127 0.059776991605758674 6.86898929416202e-05 1.955357765837107e-05 0.00010000000000000002 1.3552527156068802e-20 0.0001 0.0001 0.08733994 0.06700998 0.06833449 0.05211416 0.47135752 0.28223783 0.016372742 0.010616378999999999 -2.3361342922088504 0.784322378590693 -0.38878391807422696 -3.369599601005491
4 3 413.0 0.0 2768.0 2768.0 831.0 2768.0 0.9985141000000488 -21.0 -21.0 0.0 0.01163802880151147 0.011494234373143487 0.013571124716079436 0.013620432539034891 0.08714678883552551 0.08901263028383255 3.9931001083459705e-05 7.457365427399054e-05 0.00010000000000000003 2.7105054312137605e-20 0.0001 0.0001 0.06724033 0.049767 0.035371285 0.03138836 0.2241408 0.17539442 0.011829718999999999 0.011668723999999998 -2.320394201181889 0.6047235028955231 -0.7105532272722921 -3.350537576335216 0.10583201 0.025700996 0.011610512 0.008655511 0.12072124 0.0347657 0.08555735 0.008379098
5 4 667.0 0.0 3783.0 3783.0 1015.0 3783.0 0.9976006000000791 -20.0 -20.0 0.0 0.01136319609350886 0.011251820331667623 0.012043113812065086 0.01194864814747996 0.049625951796770096 0.04630041867494583 9.354137000627816e-05 9.452360245632008e-05 0.00010000000000000002 1.3552527156068802e-20 0.0001 0.0001 0.060902383 0.046036932999999995 0.032815605 0.02848018 0.17838788 0.14712586 0.015925674 0.010549873999999999 -1.7531357837449677 0.7448577440634202 -0.1288331810939122 -3.2971074888190803 0.0978057 0.014692268999999999 0.014090337 0.00800893 0.123560354 0.026871104 0.07580207 -0.00019203185000000002
6 5 947.0 954.0 0.0 4906.0 4934.0 4906.0 4934.0 1123.0 1151.0 4906.0 4934.0 0.9965899000001124 0.9965647000001132 -18.0 -18.0 0.0 0.010341535720908724 0.010213364634658 0.011934284708938809 0.011762000707730695 0.06498207896947861 0.06160044670104981 6.708659930154681e-05 0.00010682888387236744 0.00010000000000000002 1.3552527156068802e-20 0.0001 0.0001 0.054970358 0.041461904 0.03215441 0.025549995 0.26232755 0.14279638 0.009252935 0.012242042 -1.491249294691959 1.0312726923259208 0.5895735538220139 -3.345591480809256 0.09154041 0.021241617999999997 0.009532932 0.008987491 0.10656521 0.036087215 0.07300271 -0.0034641959999999997