1
0
mirror of https://github.com/gryf/coach.git synced 2026-04-01 17:43:32 +02:00

Trace tests update

This commit is contained in:
Shadi Endrawis
2018-08-20 13:01:17 +03:00
parent c1f428666e
commit 3abb6cd415
99 changed files with 12876 additions and 39 deletions

View File

@@ -0,0 +1,11 @@
Episode #,Training Iter,In Heatup,ER #Transitions,ER #Episodes,Episode Length,Total steps,Epsilon,Shaped Training Reward,Training Reward,Update Target Network,Evaluation Reward,Shaped Evaluation Reward,Success Rate,Loss/Mean,Loss/Stdev,Loss/Max,Loss/Min,Learning Rate/Mean,Learning Rate/Stdev,Learning Rate/Max,Learning Rate/Min,Grads (unclipped)/Mean,Grads (unclipped)/Stdev,Grads (unclipped)/Max,Grads (unclipped)/Min,Entropy/Mean,Entropy/Stdev,Entropy/Max,Entropy/Min,Advantages/Mean,Advantages/Stdev,Advantages/Max,Advantages/Min,Values/Mean,Values/Stdev,Values/Max,Values/Min,Value Loss/Mean,Value Loss/Stdev,Value Loss/Max,Value Loss/Min,Policy Loss/Mean,Policy Loss/Stdev,Policy Loss/Max,Policy Loss/Min
1,0.0,1.0,478.0,1.0,478.0,478.0,0.05,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,0.0,1.0,478.0,1.0,478.0,956.0,0.05,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,0.0,1.0,478.0,1.0,478.0,1434.0,0.05,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,24.0,0.0,478.0,1.0,478.0,1912.0,0.05,8.0,8.0,0.0,,,,,,,,,,,,9.03846,22.764229,91.399254,0.024588391,,,,,0.19621490005836226,0.6458532366327383,5.482244629827941,-0.005247424216452507,0.00640432,0.002160495,0.012249463,0.0014542785999999998,0.14904597,0.37052974,1.4923493,3.4976929999999994e-07,8.977317999999999,32.553540000000005,162.44003,-0.028575617999999997
5,48.0,0.0,478.0,1.0,478.0,2390.0,0.05,9.0,9.0,0.0,,,,,,,,,,,,4.4456,17.140404,84.14791,0.023326423,,,,,0.1479198162277525,0.71010967842704,5.625428104407833,-0.007421561740338802,0.0076782983,0.0021980035,0.02260917,0.0039483183,0.41337219999999997,1.9008543000000002,9.521527,1.8114433999999999e-07,2.4626303,9.565386,46.66413,-0.12873393
6,72.0,0.0,478.0,1.0,478.0,2868.0,0.05,3.0,3.0,0.0,,,,,,,,,,,,2.1008675,9.690727,48.575404999999996,0.018254806999999998,,,,,0.035332021420209865,0.2248095502289586,2.490547376300775,-0.00784981157630682,0.010131956999999999,0.0010065025,0.012743896000000001,0.006272370500000001,0.03812732,0.18283993,0.91499686,3.5925550000000004e-07,0.58153045,2.8835547000000004,14.410379,-0.0739578
7,96.0,0.0,478.0,1.0,478.0,3346.0,0.05,2.0,2.0,0.0,,,,,,,,,,,,1.6284056999999998,7.452083999999999,37.36667,0.026381112999999998,,,,,0.011138008777318895,0.13505660278131726,1.9015993173338024,-0.010887796059250832,0.00682558,0.0011339751,0.011535725,0.0042418690000000005,0.010128246,0.04855813,0.24300486,1.8838693e-07,0.25838378,1.3140138000000001,6.558439999999999,-0.1505112
8,120.0,0.0,478.0,1.0,478.0,3824.0,0.05,5.0,5.0,0.0,,,,,,,,,,,,11.444039,54.49880600000001,272.81097,0.027892927,,,,,0.07993672026650593,0.5031299601404577,4.384975101059878,-0.006901365310698747,0.013585217,0.0016256317000000002,0.020009885,0.005179128,0.1915621,0.9186873,4.5974317000000005,3.5950129e-07,1.7022394,8.294214,41.47974,-0.121058926
9,144.0,0.0,478.0,1.0,478.0,4302.0,0.05,3.0,3.0,0.0,,,,,,,,,,,,2.7104049999999997,9.594693,45.367016,0.027722575,,,,,0.02490674868837548,0.18166402576452345,1.7803836216389222,-0.014499503944814203,0.002085986,0.002495002,0.015049544,-0.0033446013,0.02151707,0.09809124,0.49133897,1.7442491e-07,0.5284148,2.2715082000000004,11.310697,-0.19066198
10,168.0,0.0,478.0,1.0,478.0,4780.0,0.05,4.0,4.0,0.0,,,,,,,,,,,,4.791616,22.659098,113.46068999999999,0.014692583,,,,,0.058044653194176676,0.40381801054561617,3.535810493184707,-0.003858987061708015,0.011001096,0.0020611994,0.014140483999999998,-0.00013872093,0.11461395,0.54966277,2.7507042999999998,4.3972415000000004e-07,1.1093427,5.3802943,26.912289,-0.025421416000000002
1 Episode # Training Iter In Heatup ER #Transitions ER #Episodes Episode Length Total steps Epsilon Shaped Training Reward Training Reward Update Target Network Evaluation Reward Shaped Evaluation Reward Success Rate Loss/Mean Loss/Stdev Loss/Max Loss/Min Learning Rate/Mean Learning Rate/Stdev Learning Rate/Max Learning Rate/Min Grads (unclipped)/Mean Grads (unclipped)/Stdev Grads (unclipped)/Max Grads (unclipped)/Min Entropy/Mean Entropy/Stdev Entropy/Max Entropy/Min Advantages/Mean Advantages/Stdev Advantages/Max Advantages/Min Values/Mean Values/Stdev Values/Max Values/Min Value Loss/Mean Value Loss/Stdev Value Loss/Max Value Loss/Min Policy Loss/Mean Policy Loss/Stdev Policy Loss/Max Policy Loss/Min
2 1 0.0 1.0 478.0 1.0 478.0 478.0 0.05 0.0
3 2 0.0 1.0 478.0 1.0 478.0 956.0 0.05 0.0
4 3 0.0 1.0 478.0 1.0 478.0 1434.0 0.05 0.0
5 4 24.0 0.0 478.0 1.0 478.0 1912.0 0.05 8.0 8.0 0.0 9.03846 22.764229 91.399254 0.024588391 0.19621490005836226 0.6458532366327383 5.482244629827941 -0.005247424216452507 0.00640432 0.002160495 0.012249463 0.0014542785999999998 0.14904597 0.37052974 1.4923493 3.4976929999999994e-07 8.977317999999999 32.553540000000005 162.44003 -0.028575617999999997
6 5 48.0 0.0 478.0 1.0 478.0 2390.0 0.05 9.0 9.0 0.0 4.4456 17.140404 84.14791 0.023326423 0.1479198162277525 0.71010967842704 5.625428104407833 -0.007421561740338802 0.0076782983 0.0021980035 0.02260917 0.0039483183 0.41337219999999997 1.9008543000000002 9.521527 1.8114433999999999e-07 2.4626303 9.565386 46.66413 -0.12873393
7 6 72.0 0.0 478.0 1.0 478.0 2868.0 0.05 3.0 3.0 0.0 2.1008675 9.690727 48.575404999999996 0.018254806999999998 0.035332021420209865 0.2248095502289586 2.490547376300775 -0.00784981157630682 0.010131956999999999 0.0010065025 0.012743896000000001 0.006272370500000001 0.03812732 0.18283993 0.91499686 3.5925550000000004e-07 0.58153045 2.8835547000000004 14.410379 -0.0739578
8 7 96.0 0.0 478.0 1.0 478.0 3346.0 0.05 2.0 2.0 0.0 1.6284056999999998 7.452083999999999 37.36667 0.026381112999999998 0.011138008777318895 0.13505660278131726 1.9015993173338024 -0.010887796059250832 0.00682558 0.0011339751 0.011535725 0.0042418690000000005 0.010128246 0.04855813 0.24300486 1.8838693e-07 0.25838378 1.3140138000000001 6.558439999999999 -0.1505112
9 8 120.0 0.0 478.0 1.0 478.0 3824.0 0.05 5.0 5.0 0.0 11.444039 54.49880600000001 272.81097 0.027892927 0.07993672026650593 0.5031299601404577 4.384975101059878 -0.006901365310698747 0.013585217 0.0016256317000000002 0.020009885 0.005179128 0.1915621 0.9186873 4.5974317000000005 3.5950129e-07 1.7022394 8.294214 41.47974 -0.121058926
10 9 144.0 0.0 478.0 1.0 478.0 4302.0 0.05 3.0 3.0 0.0 2.7104049999999997 9.594693 45.367016 0.027722575 0.02490674868837548 0.18166402576452345 1.7803836216389222 -0.014499503944814203 0.002085986 0.002495002 0.015049544 -0.0033446013 0.02151707 0.09809124 0.49133897 1.7442491e-07 0.5284148 2.2715082000000004 11.310697 -0.19066198
11 10 168.0 0.0 478.0 1.0 478.0 4780.0 0.05 4.0 4.0 0.0 4.791616 22.659098 113.46068999999999 0.014692583 0.058044653194176676 0.40381801054561617 3.535810493184707 -0.003858987061708015 0.011001096 0.0020611994 0.014140483999999998 -0.00013872093 0.11461395 0.54966277 2.7507042999999998 4.3972415000000004e-07 1.1093427 5.3802943 26.912289 -0.025421416000000002