1
0
mirror of https://github.com/gryf/coach.git synced 2026-03-13 04:55:47 +01:00

trace tests update

This commit is contained in:
Shadi Endrawis
2018-10-02 17:55:16 +03:00
parent 51726a5b80
commit f7990d4003
79 changed files with 10105 additions and 9539 deletions

View File

@@ -1,11 +1,11 @@
Episode #,Training Iter,In Heatup,ER #Transitions,ER #Episodes,Episode Length,Total steps,Epsilon,Shaped Training Reward,Training Reward,Update Target Network,Evaluation Reward,Shaped Evaluation Reward,Success Rate,Loss/Mean,Loss/Stdev,Loss/Max,Loss/Min,Learning Rate/Mean,Learning Rate/Stdev,Learning Rate/Max,Learning Rate/Min,Grads (unclipped)/Mean,Grads (unclipped)/Stdev,Grads (unclipped)/Max,Grads (unclipped)/Min,Entropy/Mean,Entropy/Stdev,Entropy/Max,Entropy/Min,Advantages/Mean,Advantages/Stdev,Advantages/Max,Advantages/Min,Values/Mean,Values/Stdev,Values/Max,Values/Min,Value Loss/Mean,Value Loss/Stdev,Value Loss/Max,Value Loss/Min,Policy Loss/Mean,Policy Loss/Stdev,Policy Loss/Max,Policy Loss/Min
1,0.0,1.0,478.0,1.0,478.0,478.0,0.05,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,0.0,1.0,478.0,1.0,478.0,956.0,0.05,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,0.0,1.0,478.0,1.0,478.0,1434.0,0.05,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,24.0,0.0,478.0,1.0,478.0,1912.0,0.05,8.0,8.0,0.0,,,,,,,,,,,,9.03846,22.764229,91.399254,0.024588391,,,,,0.19621490005836226,0.6458532366327383,5.482244629827941,-0.005247424216452507,0.00640432,0.002160495,0.012249463,0.0014542785999999998,0.14904597,0.37052974,1.4923493,3.4976929999999994e-07,8.977317999999999,32.553540000000005,162.44003,-0.028575617999999997
5,48.0,0.0,478.0,1.0,478.0,2390.0,0.05,9.0,9.0,0.0,,,,,,,,,,,,4.4456,17.140404,84.14791,0.023326423,,,,,0.1479198162277525,0.71010967842704,5.625428104407833,-0.007421561740338802,0.0076782983,0.0021980035,0.02260917,0.0039483183,0.41337219999999997,1.9008543000000002,9.521527,1.8114433999999999e-07,2.4626303,9.565386,46.66413,-0.12873393
6,72.0,0.0,478.0,1.0,478.0,2868.0,0.05,3.0,3.0,0.0,,,,,,,,,,,,2.1008675,9.690727,48.575404999999996,0.018254806999999998,,,,,0.035332021420209865,0.2248095502289586,2.490547376300775,-0.00784981157630682,0.010131956999999999,0.0010065025,0.012743896000000001,0.006272370500000001,0.03812732,0.18283993,0.91499686,3.5925550000000004e-07,0.58153045,2.8835547000000004,14.410379,-0.0739578
7,96.0,0.0,478.0,1.0,478.0,3346.0,0.05,2.0,2.0,0.0,,,,,,,,,,,,1.6284056999999998,7.452083999999999,37.36667,0.026381112999999998,,,,,0.011138008777318895,0.13505660278131726,1.9015993173338024,-0.010887796059250832,0.00682558,0.0011339751,0.011535725,0.0042418690000000005,0.010128246,0.04855813,0.24300486,1.8838693e-07,0.25838378,1.3140138000000001,6.558439999999999,-0.1505112
8,120.0,0.0,478.0,1.0,478.0,3824.0,0.05,5.0,5.0,0.0,,,,,,,,,,,,11.444039,54.49880600000001,272.81097,0.027892927,,,,,0.07993672026650593,0.5031299601404577,4.384975101059878,-0.006901365310698747,0.013585217,0.0016256317000000002,0.020009885,0.005179128,0.1915621,0.9186873,4.5974317000000005,3.5950129e-07,1.7022394,8.294214,41.47974,-0.121058926
9,144.0,0.0,478.0,1.0,478.0,4302.0,0.05,3.0,3.0,0.0,,,,,,,,,,,,2.7104049999999997,9.594693,45.367016,0.027722575,,,,,0.02490674868837548,0.18166402576452345,1.7803836216389222,-0.014499503944814203,0.002085986,0.002495002,0.015049544,-0.0033446013,0.02151707,0.09809124,0.49133897,1.7442491e-07,0.5284148,2.2715082000000004,11.310697,-0.19066198
10,168.0,0.0,478.0,1.0,478.0,4780.0,0.05,4.0,4.0,0.0,,,,,,,,,,,,4.791616,22.659098,113.46068999999999,0.014692583,,,,,0.058044653194176676,0.40381801054561617,3.535810493184707,-0.003858987061708015,0.011001096,0.0020611994,0.014140483999999998,-0.00013872093,0.11461395,0.54966277,2.7507042999999998,4.3972415000000004e-07,1.1093427,5.3802943,26.912289,-0.025421416000000002
Episode #,Training Iter,In Heatup,ER #Transitions,ER #Episodes,Episode Length,Total steps,Epsilon,Shaped Training Reward,Training Reward,Update Target Network,Evaluation Reward,Shaped Evaluation Reward,Success Rate,Loss/Mean,Loss/Stdev,Loss/Max,Loss/Min,Learning Rate/Mean,Learning Rate/Stdev,Learning Rate/Max,Learning Rate/Min,Grads (unclipped)/Mean,Grads (unclipped)/Stdev,Grads (unclipped)/Max,Grads (unclipped)/Min,Discounted Return/Mean,Discounted Return/Stdev,Discounted Return/Max,Discounted Return/Min,Entropy/Mean,Entropy/Stdev,Entropy/Max,Entropy/Min,Advantages/Mean,Advantages/Stdev,Advantages/Max,Advantages/Min,Values/Mean,Values/Stdev,Values/Max,Values/Min,Value Loss/Mean,Value Loss/Stdev,Value Loss/Max,Value Loss/Min,Policy Loss/Mean,Policy Loss/Stdev,Policy Loss/Max,Policy Loss/Min
1,0.0,1.0,478.0,1.0,478.0,478.0,0.05,,,0.0,,,,,,,,,,,,,,,,0.6772166738139332,1.3606583998522768,7.111435392915562,0.0,,,,,,,,,,,,,,,,,,,,
2,0.0,1.0,478.0,1.0,478.0,956.0,0.05,,,0.0,,,,,,,,,,,,,,,,0.3461585865957836,0.7129541964353258,3.4825898934060247,0.0,,,,,,,,,,,,,,,,,,,,
3,0.0,1.0,478.0,1.0,478.0,1434.0,0.05,,,0.0,,,,,,,,,,,,,,,,0.8221885517216162,1.0710747025505476,7.5093869236316815,0.0,,,,,,,,,,,,,,,,,,,,
4,23.0,0.0,478.0,1.0,478.0,1912.0,0.05,3.0,3.0,0.0,,,,,,,,,,,,6.035387999999999,19.725208,82.50349,0.039248765,0.12383280129798925,0.5223793934675833,2.79990211919977,0.0,,,,,0.0452739237450773,0.2183428467460978,1.8627784317196168,-0.0030643000370221317,-0.004365872,0.0017351342000000002,-0.0009706963,-0.013218256000000001,0.03401077,0.11164869,0.451847,4.2722320000000005e-07,1.0746696000000002,3.4710016,13.569623000000002,-0.042516552
5,47.0,0.0,478.0,1.0,478.0,2390.0,0.05,4.0,4.0,0.0,,,,,,,,,,,,4.0586433,13.706188000000001,61.88896,0.01625376,0.1100642112599555,0.4374646870965546,3.5802294655332134,0.0,,,,,0.06605603117614679,0.2828204733180097,2.495711163345754,-0.003739734889965058,-0.014320336000000001,0.001017332,-0.011700756999999999,-0.019422526000000002,0.06243892400000001,0.22280176,1.0344708,5.7007526e-07,1.0248749,3.348449,14.245454999999998,0.0033072747999999996
6,71.0,0.0,478.0,1.0,478.0,2868.0,0.05,5.0,5.0,0.0,,,,,,,,,,,,5.894111,25.05461,122.94565,0.023589091,0.09171214216631716,0.4844599433567191,4.654235241755948,0.0,,,,,0.049990671231890695,0.3035343549912095,3.2950532226726725,-0.002476382634522309,-0.013464768,0.0015044829999999998,-0.0038845115000000004,-0.017055605,0.06480691,0.29358354,1.4410233000000001,3.734214e-07,0.8669504000000001,3.7694442,18.52095,-0.0042024343
7,95.0,0.0,478.0,1.0,478.0,3346.0,0.05,5.0,5.0,0.0,,,,,,,,,,,,7.687636,20.715212,84.180954,0.028861007,0.3429683427703999,0.5788530473857294,3.4984647932875417,0.0,,,,,0.08723572006555756,0.3080234455301096,2.525255932768265,-0.0040773123184797465,0.0049094097,0.0043921572999999995,0.011444922,-0.007210325,0.07753111,0.24353394,1.1304462,3.6353214e-07,2.1490667,5.9981136,24.37817,-0.043448977
8,119.0,0.0,478.0,1.0,478.0,3824.0,0.05,2.0,2.0,0.0,,,,,,,,,,,,1.5874879,6.5180235,32.02797,0.023313208,0.04601948363571996,0.22472693959922946,1.8345137614500877,0.0,,,,,0.008022510747359511,0.08955419064160207,1.0005814651570817,-0.003758238096650968,0.0059175556999999995,0.0011794145,0.008881162,0.0026903595,0.0042831437,0.015627237,0.073625945,4.8217976e-07,0.14996052,0.7111329000000001,3.4817169000000003,-0.056617767
9,143.0,0.0,478.0,1.0,478.0,4302.0,0.05,2.0,2.0,0.0,,,,,,,,,,,,2.9201734,9.209149,34.853527,0.024623917000000002,0.09876412763598223,0.3162928485152334,1.6894490858690778,0.0,,,,,0.030283883503414787,0.15808164771534552,1.0039386003974171,-0.004561017796980071,0.0048816567,0.0016686192000000002,0.0079843905,-0.0026530582,0.017186822,0.056464136,0.22929375,2.3640719e-07,0.52893436,1.7419220000000002,6.477185700000001,-0.04866889
10,167.0,0.0,478.0,1.0,478.0,4780.0,0.05,0.0,0.0,0.0,,,,,,,,,,,,0.07378042,0.035627235,0.1401871,0.011221955,0.0,0.0,0.0,0.0,,,,,0.0006671480381455103,0.0011843871302212494,0.0037133864868810437,-0.003517640307545661,-0.008119860999999999,0.0009026574000000001,-0.005373695,-0.010452669,1.3184773000000002e-06,1.1577918e-06,4.8367815e-06,3.1812826e-07,0.012648877,0.012855935,0.034687527,-0.009518709
1 Episode # Training Iter In Heatup ER #Transitions ER #Episodes Episode Length Total steps Epsilon Shaped Training Reward Training Reward Update Target Network Evaluation Reward Shaped Evaluation Reward Success Rate Loss/Mean Loss/Stdev Loss/Max Loss/Min Learning Rate/Mean Learning Rate/Stdev Learning Rate/Max Learning Rate/Min Grads (unclipped)/Mean Grads (unclipped)/Stdev Grads (unclipped)/Max Grads (unclipped)/Min Discounted Return/Mean Discounted Return/Stdev Discounted Return/Max Discounted Return/Min Entropy/Mean Entropy/Stdev Entropy/Max Entropy/Min Advantages/Mean Advantages/Stdev Advantages/Max Advantages/Min Values/Mean Values/Stdev Values/Max Values/Min Value Loss/Mean Value Loss/Stdev Value Loss/Max Value Loss/Min Policy Loss/Mean Policy Loss/Stdev Policy Loss/Max Policy Loss/Min
2 1 0.0 1.0 478.0 1.0 478.0 478.0 0.05 0.0 0.6772166738139332 1.3606583998522768 7.111435392915562 0.0
3 2 0.0 1.0 478.0 1.0 478.0 956.0 0.05 0.0 0.3461585865957836 0.7129541964353258 3.4825898934060247 0.0
4 3 0.0 1.0 478.0 1.0 478.0 1434.0 0.05 0.0 0.8221885517216162 1.0710747025505476 7.5093869236316815 0.0
5 4 24.0 23.0 0.0 478.0 1.0 478.0 1912.0 0.05 8.0 3.0 8.0 3.0 0.0 9.03846 6.035387999999999 22.764229 19.725208 91.399254 82.50349 0.024588391 0.039248765 0.12383280129798925 0.5223793934675833 2.79990211919977 0.0 0.19621490005836226 0.0452739237450773 0.6458532366327383 0.2183428467460978 5.482244629827941 1.8627784317196168 -0.005247424216452507 -0.0030643000370221317 0.00640432 -0.004365872 0.002160495 0.0017351342000000002 0.012249463 -0.0009706963 0.0014542785999999998 -0.013218256000000001 0.14904597 0.03401077 0.37052974 0.11164869 1.4923493 0.451847 3.4976929999999994e-07 4.2722320000000005e-07 8.977317999999999 1.0746696000000002 32.553540000000005 3.4710016 162.44003 13.569623000000002 -0.028575617999999997 -0.042516552
6 5 48.0 47.0 0.0 478.0 1.0 478.0 2390.0 0.05 9.0 4.0 9.0 4.0 0.0 4.4456 4.0586433 17.140404 13.706188000000001 84.14791 61.88896 0.023326423 0.01625376 0.1100642112599555 0.4374646870965546 3.5802294655332134 0.0 0.1479198162277525 0.06605603117614679 0.71010967842704 0.2828204733180097 5.625428104407833 2.495711163345754 -0.007421561740338802 -0.003739734889965058 0.0076782983 -0.014320336000000001 0.0021980035 0.001017332 0.02260917 -0.011700756999999999 0.0039483183 -0.019422526000000002 0.41337219999999997 0.06243892400000001 1.9008543000000002 0.22280176 9.521527 1.0344708 1.8114433999999999e-07 5.7007526e-07 2.4626303 1.0248749 9.565386 3.348449 46.66413 14.245454999999998 -0.12873393 0.0033072747999999996
7 6 72.0 71.0 0.0 478.0 1.0 478.0 2868.0 0.05 3.0 5.0 3.0 5.0 0.0 2.1008675 5.894111 9.690727 25.05461 48.575404999999996 122.94565 0.018254806999999998 0.023589091 0.09171214216631716 0.4844599433567191 4.654235241755948 0.0 0.035332021420209865 0.049990671231890695 0.2248095502289586 0.3035343549912095 2.490547376300775 3.2950532226726725 -0.00784981157630682 -0.002476382634522309 0.010131956999999999 -0.013464768 0.0010065025 0.0015044829999999998 0.012743896000000001 -0.0038845115000000004 0.006272370500000001 -0.017055605 0.03812732 0.06480691 0.18283993 0.29358354 0.91499686 1.4410233000000001 3.5925550000000004e-07 3.734214e-07 0.58153045 0.8669504000000001 2.8835547000000004 3.7694442 14.410379 18.52095 -0.0739578 -0.0042024343
8 7 96.0 95.0 0.0 478.0 1.0 478.0 3346.0 0.05 2.0 5.0 2.0 5.0 0.0 1.6284056999999998 7.687636 7.452083999999999 20.715212 37.36667 84.180954 0.026381112999999998 0.028861007 0.3429683427703999 0.5788530473857294 3.4984647932875417 0.0 0.011138008777318895 0.08723572006555756 0.13505660278131726 0.3080234455301096 1.9015993173338024 2.525255932768265 -0.010887796059250832 -0.0040773123184797465 0.00682558 0.0049094097 0.0011339751 0.0043921572999999995 0.011535725 0.011444922 0.0042418690000000005 -0.007210325 0.010128246 0.07753111 0.04855813 0.24353394 0.24300486 1.1304462 1.8838693e-07 3.6353214e-07 0.25838378 2.1490667 1.3140138000000001 5.9981136 6.558439999999999 24.37817 -0.1505112 -0.043448977
9 8 120.0 119.0 0.0 478.0 1.0 478.0 3824.0 0.05 5.0 2.0 5.0 2.0 0.0 11.444039 1.5874879 54.49880600000001 6.5180235 272.81097 32.02797 0.027892927 0.023313208 0.04601948363571996 0.22472693959922946 1.8345137614500877 0.0 0.07993672026650593 0.008022510747359511 0.5031299601404577 0.08955419064160207 4.384975101059878 1.0005814651570817 -0.006901365310698747 -0.003758238096650968 0.013585217 0.0059175556999999995 0.0016256317000000002 0.0011794145 0.020009885 0.008881162 0.005179128 0.0026903595 0.1915621 0.0042831437 0.9186873 0.015627237 4.5974317000000005 0.073625945 3.5950129e-07 4.8217976e-07 1.7022394 0.14996052 8.294214 0.7111329000000001 41.47974 3.4817169000000003 -0.121058926 -0.056617767
10 9 144.0 143.0 0.0 478.0 1.0 478.0 4302.0 0.05 3.0 2.0 3.0 2.0 0.0 2.7104049999999997 2.9201734 9.594693 9.209149 45.367016 34.853527 0.027722575 0.024623917000000002 0.09876412763598223 0.3162928485152334 1.6894490858690778 0.0 0.02490674868837548 0.030283883503414787 0.18166402576452345 0.15808164771534552 1.7803836216389222 1.0039386003974171 -0.014499503944814203 -0.004561017796980071 0.002085986 0.0048816567 0.002495002 0.0016686192000000002 0.015049544 0.0079843905 -0.0033446013 -0.0026530582 0.02151707 0.017186822 0.09809124 0.056464136 0.49133897 0.22929375 1.7442491e-07 2.3640719e-07 0.5284148 0.52893436 2.2715082000000004 1.7419220000000002 11.310697 6.477185700000001 -0.19066198 -0.04866889
11 10 168.0 167.0 0.0 478.0 1.0 478.0 4780.0 0.05 4.0 0.0 4.0 0.0 0.0 4.791616 0.07378042 22.659098 0.035627235 113.46068999999999 0.1401871 0.014692583 0.011221955 0.0 0.0 0.0 0.0 0.058044653194176676 0.0006671480381455103 0.40381801054561617 0.0011843871302212494 3.535810493184707 0.0037133864868810437 -0.003858987061708015 -0.003517640307545661 0.011001096 -0.008119860999999999 0.0020611994 0.0009026574000000001 0.014140483999999998 -0.005373695 -0.00013872093 -0.010452669 0.11461395 1.3184773000000002e-06 0.54966277 1.1577918e-06 2.7507042999999998 4.8367815e-06 4.3972415000000004e-07 3.1812826e-07 1.1093427 0.012648877 5.3802943 0.012855935 26.912289 0.034687527 -0.025421416000000002 -0.009518709