mirror of
https://github.com/gryf/coach.git
synced 2026-04-19 22:23:32 +02:00
new traces
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
Episode #,Training Iter,In Heatup,ER #Transitions,ER #Episodes,Episode Length,Total steps,Epsilon,Shaped Training Reward,Training Reward,Update Target Network,Evaluation Reward,Shaped Evaluation Reward,Success Rate,Loss/Mean,Loss/Stdev,Loss/Max,Loss/Min,Learning Rate/Mean,Learning Rate/Stdev,Learning Rate/Max,Learning Rate/Min,Grads (unclipped)/Mean,Grads (unclipped)/Stdev,Grads (unclipped)/Max,Grads (unclipped)/Min,Q/Mean,Q/Stdev,Q/Max,Q/Min
|
||||
1,0.0,1.0,1117.0,1117.0,1117.0,1117.0,1.0,,,0.0,,,,,,,,,,,,,,,,,,,
|
||||
2,197.0,0.0,1905.0,1905.0,788.0,1905.0,0.9992908000000232,-21.0,-21.0,0.0,,,,0.0051924175274927565,0.003918679938872439,0.04185768589377403,2.9565440854639746e-05,0.0002500000000000001,5.421010862427521e-20,0.00025,0.00025,0.01784605,0.03255357,0.465425,0.0038900522,,,,
|
||||
3,436.0,0.0,2862.0,2862.0,957.0,2862.0,0.9984295000000516,-20.0,-20.0,0.0,,,,0.004909432677758631,0.0024521858486776424,0.012306905351579191,0.00032079339143820107,0.0002500000000000001,1.0842021724855042e-19,0.00025,0.00025,0.0113589475,0.0037933819,0.025680352000000004,0.0035025426,0.030741736000000002,0.025549445,0.07848698,-0.02225282
|
||||
4,627.0,0.0,3623.0,3623.0,761.0,3623.0,0.9977446000000744,-21.0,-21.0,0.0,,,,0.0052940571797080345,0.002501595309474277,0.012016894295811651,0.0003992373822256922,0.0002500000000000001,5.421010862427521e-20,0.00025,0.00025,0.010990373999999999,0.0038335419,0.027035048,0.005245461,,,,
|
||||
5,855.0,0.0,4535.0,4535.0,912.0,4535.0,0.9969238000001012,-20.0,-20.0,0.0,,,,0.004946799854224082,0.0024341152117377785,0.013126095756888391,0.0003701391979120672,0.0002500000000000001,1.0842021724855042e-19,0.00025,0.00025,0.010130615,0.0032620803,0.022317264,0.0045093056,0.026840469,0.01787639,0.051877695999999994,-0.005629579
|
||||
Episode #,Training Iter,In Heatup,ER #Transitions,ER #Episodes,Episode Length,Total steps,Epsilon,Shaped Training Reward,Training Reward,Update Target Network,Evaluation Reward,Shaped Evaluation Reward,Success Rate,Loss/Mean,Loss/Stdev,Loss/Max,Loss/Min,Learning Rate/Mean,Learning Rate/Stdev,Learning Rate/Max,Learning Rate/Min,Grads (unclipped)/Mean,Grads (unclipped)/Stdev,Grads (unclipped)/Max,Grads (unclipped)/Min,Discounted Return/Mean,Discounted Return/Stdev,Discounted Return/Max,Discounted Return/Min,Q/Mean,Q/Stdev,Q/Max,Q/Min
|
||||
1,0.0,1.0,1117.0,1117.0,1117.0,1117.0,1.0,,,0.0,,,,,,,,,,,,,,,,-1.5180229894995567,0.6998808293377133,-0.08930329112720292,-3.148474706421977,,,,
|
||||
2,197.0,0.0,1905.0,1905.0,788.0,1905.0,0.9992908000000232,-21.0,-21.0,0.0,,,,0.0067403961892786675,0.0043218267539128705,0.03104443661868572,4.4621716369874775e-05,0.0002500000000000001,5.421010862427521e-20,0.00025,0.00025,0.023866987000000003,0.01743193,0.22815436,0.004472835,-2.4312329564518818,0.5717677860635341,-0.7105532272722921,-3.3662833646890835,,,,
|
||||
3,436.0,0.0,2862.0,2862.0,957.0,2862.0,0.9984295000000516,-20.0,-20.0,0.0,,,,0.006779984783655944,0.003341004393095993,0.01628851890563965,0.00037204055115580564,0.0002500000000000001,1.0842021724855042e-19,0.00025,0.00025,0.01701229,0.0063794130000000004,0.042816594000000006,0.0057905857,-1.9651299496437336,0.7810357358559473,-0.3655772928295825,-3.2941461345643885,-0.031879205,0.019740151,-0.0044416133,-0.077873565
|
||||
4,627.0,0.0,3623.0,3623.0,761.0,3623.0,0.9977446000000744,-21.0,-21.0,0.0,,,,0.006753587473251269,0.003223489523106914,0.014178510755300522,0.0004225387529004365,0.0002500000000000001,5.421010862427521e-20,0.00025,0.00025,0.014921344,0.0052625714,0.032878317000000004,0.0076742154,-2.5196481268264272,0.5839729089128289,-0.7105532272722921,-3.3699982440767453,,,,
|
||||
5,855.0,0.0,4535.0,4535.0,912.0,4535.0,0.9969238000001012,-20.0,-20.0,0.0,,,,0.006566466095376343,0.0032564465770561412,0.01596076600253582,0.00040981321944855154,0.0002500000000000001,1.0842021724855042e-19,0.00025,0.00025,0.013728661999999999,0.00475778,0.04037358,0.00424472,-1.9480557195901917,0.7908203737498453,-0.21371412100620468,-3.2726053764291825,-0.038136218,0.013395666,-0.016326109,-0.0594768
|
||||
|
||||
|
Reference in New Issue
Block a user