1
0
mirror of https://github.com/gryf/coach.git synced 2026-04-19 22:23:32 +02:00

new traces

This commit is contained in:
itaicaspi-intel
2018-09-12 15:29:42 +03:00
parent 673911ff7f
commit fa4895f840
76 changed files with 12786 additions and 12606 deletions

View File

@@ -1,6 +1,6 @@
Episode #,Training Iter,In Heatup,ER #Transitions,ER #Episodes,Episode Length,Total steps,Epsilon,Shaped Training Reward,Training Reward,Update Target Network,Evaluation Reward,Shaped Evaluation Reward,Success Rate,Loss/Mean,Loss/Stdev,Loss/Max,Loss/Min,Learning Rate/Mean,Learning Rate/Stdev,Learning Rate/Max,Learning Rate/Min,Grads (unclipped)/Mean,Grads (unclipped)/Stdev,Grads (unclipped)/Max,Grads (unclipped)/Min,Q/Mean,Q/Stdev,Q/Max,Q/Min
1,0.0,1.0,1117.0,1117.0,1117.0,1117.0,1.0,,,0.0,,,,,,,,,,,,,,,,,,,
2,197.0,0.0,1905.0,1905.0,788.0,1905.0,0.9992908000000232,-21.0,-21.0,0.0,,,,0.0051924175274927565,0.003918679938872439,0.04185768589377403,2.9565440854639746e-05,0.0002500000000000001,5.421010862427521e-20,0.00025,0.00025,0.01784605,0.03255357,0.465425,0.0038900522,,,,
3,436.0,0.0,2862.0,2862.0,957.0,2862.0,0.9984295000000516,-20.0,-20.0,0.0,,,,0.004909432677758631,0.0024521858486776424,0.012306905351579191,0.00032079339143820107,0.0002500000000000001,1.0842021724855042e-19,0.00025,0.00025,0.0113589475,0.0037933819,0.025680352000000004,0.0035025426,0.030741736000000002,0.025549445,0.07848698,-0.02225282
4,627.0,0.0,3623.0,3623.0,761.0,3623.0,0.9977446000000744,-21.0,-21.0,0.0,,,,0.0052940571797080345,0.002501595309474277,0.012016894295811651,0.0003992373822256922,0.0002500000000000001,5.421010862427521e-20,0.00025,0.00025,0.010990373999999999,0.0038335419,0.027035048,0.005245461,,,,
5,855.0,0.0,4535.0,4535.0,912.0,4535.0,0.9969238000001012,-20.0,-20.0,0.0,,,,0.004946799854224082,0.0024341152117377785,0.013126095756888391,0.0003701391979120672,0.0002500000000000001,1.0842021724855042e-19,0.00025,0.00025,0.010130615,0.0032620803,0.022317264,0.0045093056,0.026840469,0.01787639,0.051877695999999994,-0.005629579
Episode #,Training Iter,In Heatup,ER #Transitions,ER #Episodes,Episode Length,Total steps,Epsilon,Shaped Training Reward,Training Reward,Update Target Network,Evaluation Reward,Shaped Evaluation Reward,Success Rate,Loss/Mean,Loss/Stdev,Loss/Max,Loss/Min,Learning Rate/Mean,Learning Rate/Stdev,Learning Rate/Max,Learning Rate/Min,Grads (unclipped)/Mean,Grads (unclipped)/Stdev,Grads (unclipped)/Max,Grads (unclipped)/Min,Discounted Return/Mean,Discounted Return/Stdev,Discounted Return/Max,Discounted Return/Min,Q/Mean,Q/Stdev,Q/Max,Q/Min
1,0.0,1.0,1117.0,1117.0,1117.0,1117.0,1.0,,,0.0,,,,,,,,,,,,,,,,-1.5180229894995567,0.6998808293377133,-0.08930329112720292,-3.148474706421977,,,,
2,197.0,0.0,1905.0,1905.0,788.0,1905.0,0.9992908000000232,-21.0,-21.0,0.0,,,,0.0067403961892786675,0.0043218267539128705,0.03104443661868572,4.4621716369874775e-05,0.0002500000000000001,5.421010862427521e-20,0.00025,0.00025,0.023866987000000003,0.01743193,0.22815436,0.004472835,-2.4312329564518818,0.5717677860635341,-0.7105532272722921,-3.3662833646890835,,,,
3,436.0,0.0,2862.0,2862.0,957.0,2862.0,0.9984295000000516,-20.0,-20.0,0.0,,,,0.006779984783655944,0.003341004393095993,0.01628851890563965,0.00037204055115580564,0.0002500000000000001,1.0842021724855042e-19,0.00025,0.00025,0.01701229,0.0063794130000000004,0.042816594000000006,0.0057905857,-1.9651299496437336,0.7810357358559473,-0.3655772928295825,-3.2941461345643885,-0.031879205,0.019740151,-0.0044416133,-0.077873565
4,627.0,0.0,3623.0,3623.0,761.0,3623.0,0.9977446000000744,-21.0,-21.0,0.0,,,,0.006753587473251269,0.003223489523106914,0.014178510755300522,0.0004225387529004365,0.0002500000000000001,5.421010862427521e-20,0.00025,0.00025,0.014921344,0.0052625714,0.032878317000000004,0.0076742154,-2.5196481268264272,0.5839729089128289,-0.7105532272722921,-3.3699982440767453,,,,
5,855.0,0.0,4535.0,4535.0,912.0,4535.0,0.9969238000001012,-20.0,-20.0,0.0,,,,0.006566466095376343,0.0032564465770561412,0.01596076600253582,0.00040981321944855154,0.0002500000000000001,1.0842021724855042e-19,0.00025,0.00025,0.013728661999999999,0.00475778,0.04037358,0.00424472,-1.9480557195901917,0.7908203737498453,-0.21371412100620468,-3.2726053764291825,-0.038136218,0.013395666,-0.016326109,-0.0594768
1 Episode # Training Iter In Heatup ER #Transitions ER #Episodes Episode Length Total steps Epsilon Shaped Training Reward Training Reward Update Target Network Evaluation Reward Shaped Evaluation Reward Success Rate Loss/Mean Loss/Stdev Loss/Max Loss/Min Learning Rate/Mean Learning Rate/Stdev Learning Rate/Max Learning Rate/Min Grads (unclipped)/Mean Grads (unclipped)/Stdev Grads (unclipped)/Max Grads (unclipped)/Min Discounted Return/Mean Discounted Return/Stdev Discounted Return/Max Discounted Return/Min Q/Mean Q/Stdev Q/Max Q/Min
2 1 0.0 1.0 1117.0 1117.0 1117.0 1117.0 1.0 0.0 -1.5180229894995567 0.6998808293377133 -0.08930329112720292 -3.148474706421977
3 2 197.0 0.0 1905.0 1905.0 788.0 1905.0 0.9992908000000232 -21.0 -21.0 0.0 0.0051924175274927565 0.0067403961892786675 0.003918679938872439 0.0043218267539128705 0.04185768589377403 0.03104443661868572 2.9565440854639746e-05 4.4621716369874775e-05 0.0002500000000000001 5.421010862427521e-20 0.00025 0.00025 0.01784605 0.023866987000000003 0.03255357 0.01743193 0.465425 0.22815436 0.0038900522 0.004472835 -2.4312329564518818 0.5717677860635341 -0.7105532272722921 -3.3662833646890835
4 3 436.0 0.0 2862.0 2862.0 957.0 2862.0 0.9984295000000516 -20.0 -20.0 0.0 0.004909432677758631 0.006779984783655944 0.0024521858486776424 0.003341004393095993 0.012306905351579191 0.01628851890563965 0.00032079339143820107 0.00037204055115580564 0.0002500000000000001 1.0842021724855042e-19 0.00025 0.00025 0.0113589475 0.01701229 0.0037933819 0.0063794130000000004 0.025680352000000004 0.042816594000000006 0.0035025426 0.0057905857 -1.9651299496437336 0.7810357358559473 -0.3655772928295825 -3.2941461345643885 0.030741736000000002 -0.031879205 0.025549445 0.019740151 0.07848698 -0.0044416133 -0.02225282 -0.077873565
5 4 627.0 0.0 3623.0 3623.0 761.0 3623.0 0.9977446000000744 -21.0 -21.0 0.0 0.0052940571797080345 0.006753587473251269 0.002501595309474277 0.003223489523106914 0.012016894295811651 0.014178510755300522 0.0003992373822256922 0.0004225387529004365 0.0002500000000000001 5.421010862427521e-20 0.00025 0.00025 0.010990373999999999 0.014921344 0.0038335419 0.0052625714 0.027035048 0.032878317000000004 0.005245461 0.0076742154 -2.5196481268264272 0.5839729089128289 -0.7105532272722921 -3.3699982440767453
6 5 855.0 0.0 4535.0 4535.0 912.0 4535.0 0.9969238000001012 -20.0 -20.0 0.0 0.004946799854224082 0.006566466095376343 0.0024341152117377785 0.0032564465770561412 0.013126095756888391 0.01596076600253582 0.0003701391979120672 0.00040981321944855154 0.0002500000000000001 1.0842021724855042e-19 0.00025 0.00025 0.010130615 0.013728661999999999 0.0032620803 0.00475778 0.022317264 0.04037358 0.0045093056 0.00424472 -1.9480557195901917 0.7908203737498453 -0.21371412100620468 -3.2726053764291825 0.026840469 -0.038136218 0.01787639 0.013395666 0.051877695999999994 -0.016326109 -0.005629579 -0.0594768