1
0
mirror of https://github.com/gryf/coach.git synced 2026-04-19 22:23:32 +02:00

Trace tests update

This commit is contained in:
Shadi Endrawis
2018-08-20 13:01:17 +03:00
parent c1f428666e
commit 3abb6cd415
99 changed files with 12876 additions and 39 deletions

View File

@@ -0,0 +1,11 @@
Episode #,Training Iter,In Heatup,ER #Transitions,ER #Episodes,Episode Length,Total steps,Epsilon,Shaped Training Reward,Training Reward,Update Target Network,Evaluation Reward,Shaped Evaluation Reward,Success Rate,Loss/Mean,Loss/Stdev,Loss/Max,Loss/Min,Learning Rate/Mean,Learning Rate/Stdev,Learning Rate/Max,Learning Rate/Min,Grads (unclipped)/Mean,Grads (unclipped)/Stdev,Grads (unclipped)/Max,Grads (unclipped)/Min,Q/Mean,Q/Stdev,Q/Max,Q/Min
1,0.0,1.0,478.0,478.0,478.0,478.0,1.0,,,0.0,,,,,,,,,,,,,,,,,,,
2,0.0,1.0,956.0,956.0,478.0,956.0,1.0,,,0.0,,,,,,,,,,,,,,,,,,,
3,0.0,1.0,1434.0,1434.0,478.0,1434.0,1.0,,,0.0,,,,,,,,,,,,,,,,,,,
4,120.0,0.0,1912.0,1912.0,478.0,1912.0,0.9995698000000142,9.0,9.0,0.0,,,,0.009663151060431118,0.013676663237646413,0.060224123299121864,4.512001760303974e-05,0.00010000000000000003,2.7105054312137605e-20,0.0001,0.0001,0.06051638,0.042551804000000006,0.22626723,0.0057771252,,,,
5,239.0,0.0,2390.0,2390.0,478.0,2390.0,0.9991396000000284,14.0,14.0,0.0,,,,0.010185969424235415,0.01273001908156945,0.05133835971355438,0.0001901108626043424,0.0001,1.3552527156068802e-20,0.0001,0.0001,0.071132325,0.042716417,0.23749977,0.014687435,,,,
6,359.0,0.0,2868.0,2868.0,478.0,2868.0,0.9987094000000424,5.0,5.0,0.0,,,,0.009524562051471246,0.009567511935832385,0.036678917706012726,0.00025706662563607097,0.00010000000000000003,2.7105054312137605e-20,0.0001,0.0001,0.076720946,0.039715942000000004,0.18440181,0.012763031999999999,,,,
7,478.0,0.0,3346.0,3346.0,478.0,3346.0,0.9982792000000568,7.0,7.0,0.0,,,,0.008778206794962929,0.01083872213612712,0.05636880546808243,6.587376992683858e-05,0.0001,1.3552527156068802e-20,0.0001,0.0001,0.07076845,0.041428506000000004,0.20715882,0.006995787,,,,
8,598.0,0.0,3824.0,3824.0,478.0,3824.0,0.9978490000000708,9.0,9.0,0.0,,,,0.009846380206727191,0.013011878941093766,0.06951349228620529,0.00021574157290160656,0.00010000000000000003,2.7105054312137605e-20,0.0001,0.0001,0.077692814,0.045143295,0.22578509,0.013560652,,,,
9,717.0,0.0,4302.0,4302.0,478.0,4302.0,0.997418800000085,5.0,5.0,0.0,,,,0.007978798927589584,0.008021061916904503,0.029912894591689113,0.0002371315349591896,0.0001,1.3552527156068802e-20,0.0001,0.0001,0.07165227,0.037239626,0.16855638,0.012386461000000001,0.043533697999999996,0.021518622999999997,0.08633506,-0.007646310999999999
10,837.0,0.0,4780.0,4780.0,478.0,4780.0,0.9969886000000991,7.0,7.0,0.0,,,,0.006560324567666006,0.009694367656889826,0.0584893636405468,0.00016549747670069337,0.00010000000000000003,2.7105054312137605e-20,0.0001,0.0001,0.062156662,0.042624008,0.20502545,0.010010223,,,,
1 Episode # Training Iter In Heatup ER #Transitions ER #Episodes Episode Length Total steps Epsilon Shaped Training Reward Training Reward Update Target Network Evaluation Reward Shaped Evaluation Reward Success Rate Loss/Mean Loss/Stdev Loss/Max Loss/Min Learning Rate/Mean Learning Rate/Stdev Learning Rate/Max Learning Rate/Min Grads (unclipped)/Mean Grads (unclipped)/Stdev Grads (unclipped)/Max Grads (unclipped)/Min Q/Mean Q/Stdev Q/Max Q/Min
2 1 0.0 1.0 478.0 478.0 478.0 478.0 1.0 0.0
3 2 0.0 1.0 956.0 956.0 478.0 956.0 1.0 0.0
4 3 0.0 1.0 1434.0 1434.0 478.0 1434.0 1.0 0.0
5 4 120.0 0.0 1912.0 1912.0 478.0 1912.0 0.9995698000000142 9.0 9.0 0.0 0.009663151060431118 0.013676663237646413 0.060224123299121864 4.512001760303974e-05 0.00010000000000000003 2.7105054312137605e-20 0.0001 0.0001 0.06051638 0.042551804000000006 0.22626723 0.0057771252
6 5 239.0 0.0 2390.0 2390.0 478.0 2390.0 0.9991396000000284 14.0 14.0 0.0 0.010185969424235415 0.01273001908156945 0.05133835971355438 0.0001901108626043424 0.0001 1.3552527156068802e-20 0.0001 0.0001 0.071132325 0.042716417 0.23749977 0.014687435
7 6 359.0 0.0 2868.0 2868.0 478.0 2868.0 0.9987094000000424 5.0 5.0 0.0 0.009524562051471246 0.009567511935832385 0.036678917706012726 0.00025706662563607097 0.00010000000000000003 2.7105054312137605e-20 0.0001 0.0001 0.076720946 0.039715942000000004 0.18440181 0.012763031999999999
8 7 478.0 0.0 3346.0 3346.0 478.0 3346.0 0.9982792000000568 7.0 7.0 0.0 0.008778206794962929 0.01083872213612712 0.05636880546808243 6.587376992683858e-05 0.0001 1.3552527156068802e-20 0.0001 0.0001 0.07076845 0.041428506000000004 0.20715882 0.006995787
9 8 598.0 0.0 3824.0 3824.0 478.0 3824.0 0.9978490000000708 9.0 9.0 0.0 0.009846380206727191 0.013011878941093766 0.06951349228620529 0.00021574157290160656 0.00010000000000000003 2.7105054312137605e-20 0.0001 0.0001 0.077692814 0.045143295 0.22578509 0.013560652
10 9 717.0 0.0 4302.0 4302.0 478.0 4302.0 0.997418800000085 5.0 5.0 0.0 0.007978798927589584 0.008021061916904503 0.029912894591689113 0.0002371315349591896 0.0001 1.3552527156068802e-20 0.0001 0.0001 0.07165227 0.037239626 0.16855638 0.012386461000000001 0.043533697999999996 0.021518622999999997 0.08633506 -0.007646310999999999
11 10 837.0 0.0 4780.0 4780.0 478.0 4780.0 0.9969886000000991 7.0 7.0 0.0 0.006560324567666006 0.009694367656889826 0.0584893636405468 0.00016549747670069337 0.00010000000000000003 2.7105054312137605e-20 0.0001 0.0001 0.062156662 0.042624008 0.20502545 0.010010223