1
0
mirror of https://github.com/gryf/coach.git synced 2026-04-30 04:24:09 +02:00

new traces

This commit is contained in:
itaicaspi-intel
2018-09-12 15:29:42 +03:00
parent 673911ff7f
commit fa4895f840
76 changed files with 12786 additions and 12606 deletions
@@ -1,6 +1,6 @@
Episode #,Training Iter,In Heatup,ER #Transitions,ER #Episodes,Episode Length,Total steps,Epsilon,Shaped Training Reward,Training Reward,Update Target Network,Evaluation Reward,Shaped Evaluation Reward,Success Rate,Loss/Mean,Loss/Stdev,Loss/Max,Loss/Min,Learning Rate/Mean,Learning Rate/Stdev,Learning Rate/Max,Learning Rate/Min,Grads (unclipped)/Mean,Grads (unclipped)/Stdev,Grads (unclipped)/Max,Grads (unclipped)/Min,Q/Mean,Q/Stdev,Q/Max,Q/Min
1,0.0,1.0,1117.0,1117.0,1117.0,1117.0,1.0,,,0.0,,,,,,,,,,,,,,,,,,,
2,197.0,0.0,1905.0,1905.0,788.0,1905.0,0.9992198800000168,-21.0,-21.0,0.0,,,,0.0065035605150511894,0.004365216942868011,0.04185768589377403,1.6300582501571625e-05,6.250000000000001e-05,1.3552527156068802e-20,6.25e-05,6.25e-05,0.04899958,0.035690054,0.465425,0.0031771401,,,,
3,436.0,0.0,2862.0,2862.0,957.0,2862.0,0.9982724500000376,-20.0,-20.0,0.0,,,,0.006882304690776307,0.0032755384482328074,0.018768906593322757,0.00028316525276750326,6.250000000000003e-05,2.7105054312137605e-20,6.25e-05,6.25e-05,0.037334877999999995,0.016123397,0.11000781,0.007852386,-0.25035575,0.057181817,-0.1695276,-0.34914327
4,627.0,0.0,3623.0,3623.0,761.0,3623.0,0.997519060000054,-21.0,-21.0,0.0,,,,0.004881470595769075,0.0024654802506201947,0.01351994462311268,3.340750481584109e-05,6.250000000000001e-05,1.3552527156068802e-20,6.25e-05,6.25e-05,0.028977735,0.016445445,0.09510474,0.0037849140000000003,,,,
5,855.0,0.0,4535.0,4535.0,912.0,4535.0,0.9966161800000736,-20.0,-20.0,0.0,,,,0.004249975731765612,0.0017149519969122415,0.01000758446753025,5.5568867537658655e-05,6.250000000000003e-05,2.7105054312137605e-20,6.25e-05,6.25e-05,0.020409843,0.013720203,0.084716946,0.005521884,-0.11609744,0.011784006000000001,-0.10053374,-0.13682899
Episode #,Training Iter,In Heatup,ER #Transitions,ER #Episodes,Episode Length,Total steps,Epsilon,Shaped Training Reward,Training Reward,Update Target Network,Evaluation Reward,Shaped Evaluation Reward,Success Rate,Loss/Mean,Loss/Stdev,Loss/Max,Loss/Min,Learning Rate/Mean,Learning Rate/Stdev,Learning Rate/Max,Learning Rate/Min,Grads (unclipped)/Mean,Grads (unclipped)/Stdev,Grads (unclipped)/Max,Grads (unclipped)/Min,Discounted Return/Mean,Discounted Return/Stdev,Discounted Return/Max,Discounted Return/Min,Q/Mean,Q/Stdev,Q/Max,Q/Min
1,0.0,1.0,1117.0,1117.0,1117.0,1117.0,1.0,,,0.0,,,,,,,,,,,,,,,,-1.5180229894995567,0.6998808293377133,-0.08930329112720292,-3.148474706421977,,,,
2,197.0,0.0,1905.0,1905.0,788.0,1905.0,0.9992198800000168,-21.0,-21.0,0.0,,,,0.0064866334039760465,0.004131804922553068,0.03104443661868572,1.4448891306528823e-05,6.250000000000001e-05,1.3552527156068802e-20,6.25e-05,6.25e-05,0.03878765,0.023484444,0.22815436,0.00323429,-2.4312329564518818,0.5717677860635341,-0.7105532272722921,-3.3662833646890835,,,,
3,436.0,0.0,2862.0,2862.0,957.0,2862.0,0.9982724500000376,-20.0,-20.0,0.0,,,,0.005787130516174904,0.00260959432086106,0.01427764445543289,0.0001548390282550827,6.250000000000003e-05,2.7105054312137605e-20,6.25e-05,6.25e-05,0.028759224,0.011704876000000001,0.07163235,0.008223475,-1.9651299496437336,0.7810357358559473,-0.3655772928295825,-3.2941461345643885,-0.16823806,0.05095485599999999,-0.08356075,-0.26015088
4,627.0,0.0,3623.0,3623.0,761.0,3623.0,0.997519060000054,-21.0,-21.0,0.0,,,,0.005963045392452745,0.0028514600960427012,0.014360946603119372,2.424295598757453e-05,6.250000000000001e-05,1.3552527156068802e-20,6.25e-05,6.25e-05,0.027299996,0.012375978999999999,0.07347562,0.0030203340000000004,-2.5196481268264272,0.5839729089128289,-0.7105532272722921,-3.3699982440767453,,,,
5,855.0,0.0,4535.0,4535.0,912.0,4535.0,0.9966161800000736,-20.0,-20.0,0.0,,,,0.006235880403357697,0.002686495318714779,0.015876490622758862,0.00016267428873106837,6.250000000000003e-05,2.7105054312137605e-20,6.25e-05,6.25e-05,0.028169753,0.01084809,0.07740243,0.011433218,-1.9480557195901917,0.7908203737498453,-0.21371412100620468,-3.2726053764291825,-0.19339468,0.031096203,-0.15218829999999997,-0.26763937
1 Episode # Training Iter In Heatup ER #Transitions ER #Episodes Episode Length Total steps Epsilon Shaped Training Reward Training Reward Update Target Network Evaluation Reward Shaped Evaluation Reward Success Rate Loss/Mean Loss/Stdev Loss/Max Loss/Min Learning Rate/Mean Learning Rate/Stdev Learning Rate/Max Learning Rate/Min Grads (unclipped)/Mean Grads (unclipped)/Stdev Grads (unclipped)/Max Grads (unclipped)/Min Discounted Return/Mean Discounted Return/Stdev Discounted Return/Max Discounted Return/Min Q/Mean Q/Stdev Q/Max Q/Min
2 1 0.0 1.0 1117.0 1117.0 1117.0 1117.0 1.0 0.0 -1.5180229894995567 0.6998808293377133 -0.08930329112720292 -3.148474706421977
3 2 197.0 0.0 1905.0 1905.0 788.0 1905.0 0.9992198800000168 -21.0 -21.0 0.0 0.0065035605150511894 0.0064866334039760465 0.004365216942868011 0.004131804922553068 0.04185768589377403 0.03104443661868572 1.6300582501571625e-05 1.4448891306528823e-05 6.250000000000001e-05 1.3552527156068802e-20 6.25e-05 6.25e-05 0.04899958 0.03878765 0.035690054 0.023484444 0.465425 0.22815436 0.0031771401 0.00323429 -2.4312329564518818 0.5717677860635341 -0.7105532272722921 -3.3662833646890835
4 3 436.0 0.0 2862.0 2862.0 957.0 2862.0 0.9982724500000376 -20.0 -20.0 0.0 0.006882304690776307 0.005787130516174904 0.0032755384482328074 0.00260959432086106 0.018768906593322757 0.01427764445543289 0.00028316525276750326 0.0001548390282550827 6.250000000000003e-05 2.7105054312137605e-20 6.25e-05 6.25e-05 0.037334877999999995 0.028759224 0.016123397 0.011704876000000001 0.11000781 0.07163235 0.007852386 0.008223475 -1.9651299496437336 0.7810357358559473 -0.3655772928295825 -3.2941461345643885 -0.25035575 -0.16823806 0.057181817 0.05095485599999999 -0.1695276 -0.08356075 -0.34914327 -0.26015088
5 4 627.0 0.0 3623.0 3623.0 761.0 3623.0 0.997519060000054 -21.0 -21.0 0.0 0.004881470595769075 0.005963045392452745 0.0024654802506201947 0.0028514600960427012 0.01351994462311268 0.014360946603119372 3.340750481584109e-05 2.424295598757453e-05 6.250000000000001e-05 1.3552527156068802e-20 6.25e-05 6.25e-05 0.028977735 0.027299996 0.016445445 0.012375978999999999 0.09510474 0.07347562 0.0037849140000000003 0.0030203340000000004 -2.5196481268264272 0.5839729089128289 -0.7105532272722921 -3.3699982440767453
6 5 855.0 0.0 4535.0 4535.0 912.0 4535.0 0.9966161800000736 -20.0 -20.0 0.0 0.004249975731765612 0.006235880403357697 0.0017149519969122415 0.002686495318714779 0.01000758446753025 0.015876490622758862 5.5568867537658655e-05 0.00016267428873106837 6.250000000000003e-05 2.7105054312137605e-20 6.25e-05 6.25e-05 0.020409843 0.028169753 0.013720203 0.01084809 0.084716946 0.07740243 0.005521884 0.011433218 -1.9480557195901917 0.7908203737498453 -0.21371412100620468 -3.2726053764291825 -0.11609744 -0.19339468 0.011784006000000001 0.031096203 -0.10053374 -0.15218829999999997 -0.13682899 -0.26763937