1
0
mirror of https://github.com/gryf/coach.git synced 2026-03-30 16:43:33 +02:00

Trace tests update

This commit is contained in:
Shadi Endrawis
2018-08-20 13:01:17 +03:00
parent c1f428666e
commit 3abb6cd415
99 changed files with 12876 additions and 39 deletions

View File

@@ -0,0 +1,6 @@
Episode #,Training Iter,In Heatup,ER #Transitions,ER #Episodes,Episode Length,Total steps,Epsilon,Shaped Training Reward,Training Reward,Update Target Network,Evaluation Reward,Shaped Evaluation Reward,Success Rate,Loss/Mean,Loss/Stdev,Loss/Max,Loss/Min,Learning Rate/Mean,Learning Rate/Stdev,Learning Rate/Max,Learning Rate/Min,Grads (unclipped)/Mean,Grads (unclipped)/Stdev,Grads (unclipped)/Max,Grads (unclipped)/Min,Q/Mean,Q/Stdev,Q/Max,Q/Min
1,0.0,1.0,1117.0,1117.0,1117.0,1117.0,1.0,,,0.0,,,,,,,,,,,,,,,,,,,
2,210.0,0.0,1958.0,1958.0,841.0,1958.0,0.999167410000018,-20.0,-20.0,0.0,,,,3.9302484875633596,0.000980246273321835,3.9315416812896733,3.926891326904297,0.0002500000000000001,1.0842021724855042e-19,0.00025,0.00025,0.0021740668,0.0023227779999999997,0.014739634,0.0009191924499999999,,,,
3,402.0,0.0,2726.0,2726.0,768.0,2726.0,0.9984070900000346,-21.0,-21.0,0.0,,,,3.928837850689888,0.0010547064317355432,3.9301910400390634,3.9240779876708975,0.0002500000000000001,5.421010862427521e-20,0.00025,0.00025,0.0015834095000000002,0.0025085623000000003,0.016421815,0.0005392361,0.06382764484733404,0.02358938873903284,0.10002454034984172,0.024584384262562403
4,601.0,0.0,3519.0,3519.0,793.0,3519.0,0.9976220200000516,-21.0,-21.0,0.0,,,,3.927992107880176,0.0010090422055890882,3.929178953170776,3.925344705581665,0.0002500000000000001,5.421010862427521e-20,0.00025,0.00025,0.0015888658,0.002871752,0.017285319,0.00037317397000000005,0.06921376281728404,0.007428921215239665,0.08169361427426401,0.05617701336741498
5,809.0,0.0,4352.0,4352.0,833.0,4352.0,0.9967973500000696,-21.0,-21.0,0.0,,,,3.928017064929009,0.0009430171418315623,3.9289817810058594,3.92388129234314,0.0002500000000000001,1.0842021724855042e-19,0.00025,0.00025,0.0016348981,0.0031759862,0.016966071,0.0003033526,0.07216475835690954,0.00436285987925874,0.07676253654062795,0.06509595513343866
1 Episode # Training Iter In Heatup ER #Transitions ER #Episodes Episode Length Total steps Epsilon Shaped Training Reward Training Reward Update Target Network Evaluation Reward Shaped Evaluation Reward Success Rate Loss/Mean Loss/Stdev Loss/Max Loss/Min Learning Rate/Mean Learning Rate/Stdev Learning Rate/Max Learning Rate/Min Grads (unclipped)/Mean Grads (unclipped)/Stdev Grads (unclipped)/Max Grads (unclipped)/Min Q/Mean Q/Stdev Q/Max Q/Min
2 1 0.0 1.0 1117.0 1117.0 1117.0 1117.0 1.0 0.0
3 2 210.0 0.0 1958.0 1958.0 841.0 1958.0 0.999167410000018 -20.0 -20.0 0.0 3.9302484875633596 0.000980246273321835 3.9315416812896733 3.926891326904297 0.0002500000000000001 1.0842021724855042e-19 0.00025 0.00025 0.0021740668 0.0023227779999999997 0.014739634 0.0009191924499999999
4 3 402.0 0.0 2726.0 2726.0 768.0 2726.0 0.9984070900000346 -21.0 -21.0 0.0 3.928837850689888 0.0010547064317355432 3.9301910400390634 3.9240779876708975 0.0002500000000000001 5.421010862427521e-20 0.00025 0.00025 0.0015834095000000002 0.0025085623000000003 0.016421815 0.0005392361 0.06382764484733404 0.02358938873903284 0.10002454034984172 0.024584384262562403
5 4 601.0 0.0 3519.0 3519.0 793.0 3519.0 0.9976220200000516 -21.0 -21.0 0.0 3.927992107880176 0.0010090422055890882 3.929178953170776 3.925344705581665 0.0002500000000000001 5.421010862427521e-20 0.00025 0.00025 0.0015888658 0.002871752 0.017285319 0.00037317397000000005 0.06921376281728404 0.007428921215239665 0.08169361427426401 0.05617701336741498
6 5 809.0 0.0 4352.0 4352.0 833.0 4352.0 0.9967973500000696 -21.0 -21.0 0.0 3.928017064929009 0.0009430171418315623 3.9289817810058594 3.92388129234314 0.0002500000000000001 1.0842021724855042e-19 0.00025 0.00025 0.0016348981 0.0031759862 0.016966071 0.0003033526 0.07216475835690954 0.00436285987925874 0.07676253654062795 0.06509595513343866