1
0
mirror of https://github.com/gryf/coach.git synced 2026-03-04 07:45:53 +01:00

Itaicaspi/episode reset refactoring (#105)

* reordering of the episode reset operation and allowing to store episodes only when they are terminated

* reordering of the episode reset operation and allowing to store episodes only when they are terminated

* revert tensorflow-gpu to 1.9.0 + bug fix in should_train()

* tests readme file and refactoring of policy optimization agent train function

* Update README.md

* Update README.md

* additional policy optimization train function simplifications

* Updated the traces after the reordering of the environment reset

* docker and jenkins files

* updated the traces to the ones from within the docker container

* updated traces and added control suite to the docker

* updated jenkins file with the intel proxy + updated doom basic a3c test params

* updated line breaks in jenkins file

* added a missing line break in jenkins file

* refining trace tests ignored presets + adding a configurable beta entropy value

* switch the order of trace and golden tests in jenkins + fix golden tests processes not killed issue

* updated benchmarks for dueling ddqn breakout and pong

* allowing dynamic updates to the loss weights + bug fix in episode.update_returns

* remove docker and jenkins file
This commit is contained in:
Itai Caspi
2018-09-04 15:07:54 +03:00
committed by GitHub
parent 7086492127
commit 72a1d9d426
92 changed files with 9803 additions and 9740 deletions

View File

@@ -3,19 +3,19 @@ Episode #,Training Iter,In Heatup,ER #Transitions,ER #Episodes,Episode Length,To
2,0.0,1.0,87.0,1.0,87.0,573.0,0.5,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,0.0,1.0,149.0,1.0,149.0,722.0,0.5,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,0.0,1.0,335.0,1.0,335.0,1057.0,0.5,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,36.0,0.0,180.0,1.0,180.0,1237.0,0.4982359999999992,3.0,30.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,0.0709828,0.02884768,0.14847249,-0.025073245,0.15536511,0.67742556,4.0763674,0.00025629386
6,51.0,0.0,74.0,1.0,74.0,1311.0,0.4975107999999989,2.0,15.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,0.09154996,0.034854878,0.17196007,0.029762035,0.06300321,0.1426023,0.49494484,0.0034788419
7,77.0,0.0,131.0,1.0,131.0,1442.0,0.4962269999999984,2.0,35.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,0.07617202,0.023897866,0.14918622,0.035134307999999996,0.016550515,0.04922439,0.18912512,0.00011201962
8,118.0,0.0,204.0,1.0,204.0,1646.0,0.4942277999999975,2.0,15.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,0.059103607999999995,0.020852849,0.11587171,0.006847885400000001,0.03776522,0.16741602,0.9484094999999999,7.4577442e-06
9,137.0,0.0,92.0,1.0,92.0,1738.0,0.4933261999999971,1.0,5.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,0.07352363,0.034035592999999996,0.15143472,0.0044105817,0.05140639,0.20671843,0.9284075999999999,0.00020915868000000003
10,201.0,0.0,321.0,1.0,321.0,2059.0,0.4901803999999957,10.0,115.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,0.08682183,0.04259716,0.30125758,0.0020528187,0.08377568,0.18461238,0.6967797,0.00044282363
11,265.0,0.0,317.0,1.0,317.0,2376.0,0.4870737999999944,8.0,130.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,0.107165605,0.036707986,0.23180877,0.01380832,0.09232898,0.22525571,0.87342286,0.0018040526999999998
12,310.0,0.0,224.0,1.0,224.0,2600.0,0.4848785999999934,3.0,20.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,0.086089715,0.027629882,0.19709364,0.025783142000000002,0.025214866000000002,0.08048548,0.39741653,9.443184499999999e-05
13,338.0,0.0,138.0,1.0,138.0,2738.0,0.4835261999999929,1.0,10.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,0.056624517,0.014742057,0.09184578,0.023095844,0.013557022,0.06671045,0.36011392,7.6326745e-05
14,378.0,0.0,200.0,1.0,200.0,2938.0,0.481566199999992,1.0,5.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,0.069043785,0.030751564,0.16467288,0.0038135927,0.01971127,0.1051151,0.6758529999999999,4.7275649999999995e-05
15,422.0,0.0,221.0,1.0,221.0,3159.0,0.4794003999999911,3.0,30.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,0.06917530000000001,0.027813602000000003,0.13201289,0.016976256000000002,0.04862738,0.18498637,1.0439113000000002,6.747579e-05
16,460.0,0.0,190.0,1.0,190.0,3349.0,0.4775383999999903,2.0,45.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,0.056344293,0.023635779,0.10303167,-0.009649781,0.032846852999999995,0.14394106,0.83443564,0.00015374989
17,511.0,0.0,255.0,1.0,255.0,3604.0,0.4750393999999892,6.0,75.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,0.07982238,0.03705571,0.15894309,-0.003064227,0.07116045,0.20118825,0.8639594999999999,0.00021242326000000001
18,583.0,0.0,360.0,1.0,360.0,3964.0,0.4715113999999877,4.0,50.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,0.0702058,0.03336375,0.18290229,-0.0028483917,0.027493622000000002,0.12953442,1.0179706,4.404120299999999e-06
19,623.0,0.0,199.0,1.0,199.0,4163.0,0.4695611999999868,2.0,35.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,0.06575828,0.020313479,0.1296886,0.031751662,0.02896362,0.14715028,0.930523,9.032046999999998e-05
20,673.0,0.0,247.0,1.0,247.0,4410.0,0.4671405999999858,5.0,35.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,0.08191794,0.025515838,0.16764577,0.02347238,0.051037148,0.15077179999999998,0.6975503000000001,2.4204688e-05
5,30.0,0.0,152.0,1.0,152.0,1209.0,0.4985103999999994,2.0,15.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,0.01806015,0.031154681,0.11256089999999999,-0.03153646,0.040528998,0.15495184,0.7766681,7.3362275999999995e-06
6,84.0,0.0,270.0,1.0,270.0,1479.0,0.4958643999999982,8.0,120.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,0.04832644,0.029433122000000003,0.116728835,-0.013950496000000001,0.054705366,0.14291170000000003,0.70854425,0.00038360796
7,149.0,0.0,324.0,1.0,324.0,1803.0,0.4926891999999968,9.0,120.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,0.06519938,0.036996773999999996,0.20431875,-0.0006479138,0.09192595,0.23194770000000003,0.8836251,9.27005e-05
8,197.0,0.0,237.0,1.0,237.0,2040.0,0.4903665999999958,6.0,70.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,0.081993505,0.031750474,0.17067611,0.01893028,0.06609978,0.17276828,0.84518427,0.00049587624
9,231.0,0.0,171.0,1.0,171.0,2211.0,0.4886907999999951,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,0.06561219,0.027578448999999998,0.16583520000000002,0.0023947426000000003,0.0045568603,0.0040728809999999996,0.019272441,0.0012047348
10,352.0,0.0,604.0,1.0,604.0,2815.0,0.4827715999999925,16.0,240.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,0.054065555,0.029770117000000002,0.14167584,-0.025165185,0.06984026,0.19431692,0.8947495999999999,1.5888494e-05
11,399.0,0.0,232.0,1.0,232.0,3047.0,0.4804979999999915,4.0,25.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,0.09317397,0.037268302999999996,0.1879414,0.017247636,0.04507253,0.13425689999999998,0.7788928,0.0016535529999999999
12,430.0,0.0,154.0,1.0,154.0,3201.0,0.4789887999999909,2.0,15.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,0.060374584,0.026983725,0.1327358,0.00017325346999999997,0.03700112,0.15603235,0.8584324000000001,9.365492e-05
13,464.0,0.0,169.0,1.0,169.0,3370.0,0.4773325999999902,3.0,60.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,0.07076912,0.024960317000000003,0.171489,0.022848563,0.07708849,0.23528506,0.8999446,0.0009268887
14,502.0,0.0,189.0,1.0,189.0,3559.0,0.4754803999999894,4.0,50.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,0.08175371599999999,0.059707563,0.23806223,-0.0022388997,0.080376275,0.21826938,0.9014337,0.000156738
15,530.0,0.0,138.0,1.0,138.0,3697.0,0.4741279999999888,1.0,25.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,0.06588258599999999,0.031772457000000004,0.16913122,-0.0033009246000000004,0.037671477,0.17314273,0.92035943,0.0008084267599999999
16,549.0,0.0,95.0,1.0,95.0,3792.0,0.4731969999999884,1.0,30.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,0.08419551,0.021721134,0.13456126,0.042178745999999996,0.022773635,0.08062003,0.35514408,0.00152435
17,630.0,0.0,404.0,1.0,404.0,4196.0,0.4692377999999866,9.0,75.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,0.06739886,0.03467486,0.14949478,-0.03615028,0.063086614,0.17489205,0.7053564,0.00030611295
18,714.0,0.0,420.0,1.0,420.0,4616.0,0.4651217999999849,10.0,160.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,0.06760059,0.022386358999999998,0.1480442,0.0114746755,0.065966256,0.18684588,0.90956885,0.00010415676
19,809.0,0.0,473.0,1.0,473.0,5089.0,0.4604863999999829,7.0,135.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,0.057970256,0.020290807,0.13571687,0.012275728,0.04331644,0.16250839999999997,0.8834267,0.00020417363000000002
20,850.0,0.0,204.0,1.0,204.0,5293.0,0.45848719999998205,3.0,20.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,0.07100958,0.030987637000000002,0.14825977,0.011810873000000001,0.047851723,0.16509958,0.8592968000000001,0.00032094717999999996
1 Episode # Training Iter In Heatup ER #Transitions ER #Episodes Episode Length Total steps Epsilon Shaped Training Reward Training Reward Update Target Network Evaluation Reward Shaped Evaluation Reward Success Rate Loss/Mean Loss/Stdev Loss/Max Loss/Min Learning Rate/Mean Learning Rate/Stdev Learning Rate/Max Learning Rate/Min Grads (unclipped)/Mean Grads (unclipped)/Stdev Grads (unclipped)/Max Grads (unclipped)/Min Entropy/Mean Entropy/Stdev Entropy/Max Entropy/Min Q/Mean Q/Stdev Q/Max Q/Min Q Values/Mean Q Values/Stdev Q Values/Max Q Values/Min Value Loss/Mean Value Loss/Stdev Value Loss/Max Value Loss/Min
3 2 0.0 1.0 87.0 1.0 87.0 573.0 0.5 0.0
4 3 0.0 1.0 149.0 1.0 149.0 722.0 0.5 0.0
5 4 0.0 1.0 335.0 1.0 335.0 1057.0 0.5 0.0
6 5 36.0 30.0 0.0 180.0 152.0 1.0 180.0 152.0 1237.0 1209.0 0.4982359999999992 0.4985103999999994 3.0 2.0 30.0 15.0 0.0 0.0709828 0.01806015 0.02884768 0.031154681 0.14847249 0.11256089999999999 -0.025073245 -0.03153646 0.15536511 0.040528998 0.67742556 0.15495184 4.0763674 0.7766681 0.00025629386 7.3362275999999995e-06
7 6 51.0 84.0 0.0 74.0 270.0 1.0 74.0 270.0 1311.0 1479.0 0.4975107999999989 0.4958643999999982 2.0 8.0 15.0 120.0 0.0 0.09154996 0.04832644 0.034854878 0.029433122000000003 0.17196007 0.116728835 0.029762035 -0.013950496000000001 0.06300321 0.054705366 0.1426023 0.14291170000000003 0.49494484 0.70854425 0.0034788419 0.00038360796
8 7 77.0 149.0 0.0 131.0 324.0 1.0 131.0 324.0 1442.0 1803.0 0.4962269999999984 0.4926891999999968 2.0 9.0 35.0 120.0 0.0 0.07617202 0.06519938 0.023897866 0.036996773999999996 0.14918622 0.20431875 0.035134307999999996 -0.0006479138 0.016550515 0.09192595 0.04922439 0.23194770000000003 0.18912512 0.8836251 0.00011201962 9.27005e-05
9 8 118.0 197.0 0.0 204.0 237.0 1.0 204.0 237.0 1646.0 2040.0 0.4942277999999975 0.4903665999999958 2.0 6.0 15.0 70.0 0.0 0.059103607999999995 0.081993505 0.020852849 0.031750474 0.11587171 0.17067611 0.006847885400000001 0.01893028 0.03776522 0.06609978 0.16741602 0.17276828 0.9484094999999999 0.84518427 7.4577442e-06 0.00049587624
10 9 137.0 231.0 0.0 92.0 171.0 1.0 92.0 171.0 1738.0 2211.0 0.4933261999999971 0.4886907999999951 1.0 0.0 5.0 0.0 0.0 0.07352363 0.06561219 0.034035592999999996 0.027578448999999998 0.15143472 0.16583520000000002 0.0044105817 0.0023947426000000003 0.05140639 0.0045568603 0.20671843 0.0040728809999999996 0.9284075999999999 0.019272441 0.00020915868000000003 0.0012047348
11 10 201.0 352.0 0.0 321.0 604.0 1.0 321.0 604.0 2059.0 2815.0 0.4901803999999957 0.4827715999999925 10.0 16.0 115.0 240.0 0.0 0.08682183 0.054065555 0.04259716 0.029770117000000002 0.30125758 0.14167584 0.0020528187 -0.025165185 0.08377568 0.06984026 0.18461238 0.19431692 0.6967797 0.8947495999999999 0.00044282363 1.5888494e-05
12 11 265.0 399.0 0.0 317.0 232.0 1.0 317.0 232.0 2376.0 3047.0 0.4870737999999944 0.4804979999999915 8.0 4.0 130.0 25.0 0.0 0.107165605 0.09317397 0.036707986 0.037268302999999996 0.23180877 0.1879414 0.01380832 0.017247636 0.09232898 0.04507253 0.22525571 0.13425689999999998 0.87342286 0.7788928 0.0018040526999999998 0.0016535529999999999
13 12 310.0 430.0 0.0 224.0 154.0 1.0 224.0 154.0 2600.0 3201.0 0.4848785999999934 0.4789887999999909 3.0 2.0 20.0 15.0 0.0 0.086089715 0.060374584 0.027629882 0.026983725 0.19709364 0.1327358 0.025783142000000002 0.00017325346999999997 0.025214866000000002 0.03700112 0.08048548 0.15603235 0.39741653 0.8584324000000001 9.443184499999999e-05 9.365492e-05
14 13 338.0 464.0 0.0 138.0 169.0 1.0 138.0 169.0 2738.0 3370.0 0.4835261999999929 0.4773325999999902 1.0 3.0 10.0 60.0 0.0 0.056624517 0.07076912 0.014742057 0.024960317000000003 0.09184578 0.171489 0.023095844 0.022848563 0.013557022 0.07708849 0.06671045 0.23528506 0.36011392 0.8999446 7.6326745e-05 0.0009268887
15 14 378.0 502.0 0.0 200.0 189.0 1.0 200.0 189.0 2938.0 3559.0 0.481566199999992 0.4754803999999894 1.0 4.0 5.0 50.0 0.0 0.069043785 0.08175371599999999 0.030751564 0.059707563 0.16467288 0.23806223 0.0038135927 -0.0022388997 0.01971127 0.080376275 0.1051151 0.21826938 0.6758529999999999 0.9014337 4.7275649999999995e-05 0.000156738
16 15 422.0 530.0 0.0 221.0 138.0 1.0 221.0 138.0 3159.0 3697.0 0.4794003999999911 0.4741279999999888 3.0 1.0 30.0 25.0 0.0 0.06917530000000001 0.06588258599999999 0.027813602000000003 0.031772457000000004 0.13201289 0.16913122 0.016976256000000002 -0.0033009246000000004 0.04862738 0.037671477 0.18498637 0.17314273 1.0439113000000002 0.92035943 6.747579e-05 0.0008084267599999999
17 16 460.0 549.0 0.0 190.0 95.0 1.0 190.0 95.0 3349.0 3792.0 0.4775383999999903 0.4731969999999884 2.0 1.0 45.0 30.0 0.0 0.056344293 0.08419551 0.023635779 0.021721134 0.10303167 0.13456126 -0.009649781 0.042178745999999996 0.032846852999999995 0.022773635 0.14394106 0.08062003 0.83443564 0.35514408 0.00015374989 0.00152435
18 17 511.0 630.0 0.0 255.0 404.0 1.0 255.0 404.0 3604.0 4196.0 0.4750393999999892 0.4692377999999866 6.0 9.0 75.0 0.0 0.07982238 0.06739886 0.03705571 0.03467486 0.15894309 0.14949478 -0.003064227 -0.03615028 0.07116045 0.063086614 0.20118825 0.17489205 0.8639594999999999 0.7053564 0.00021242326000000001 0.00030611295
19 18 583.0 714.0 0.0 360.0 420.0 1.0 360.0 420.0 3964.0 4616.0 0.4715113999999877 0.4651217999999849 4.0 10.0 50.0 160.0 0.0 0.0702058 0.06760059 0.03336375 0.022386358999999998 0.18290229 0.1480442 -0.0028483917 0.0114746755 0.027493622000000002 0.065966256 0.12953442 0.18684588 1.0179706 0.90956885 4.404120299999999e-06 0.00010415676
20 19 623.0 809.0 0.0 199.0 473.0 1.0 199.0 473.0 4163.0 5089.0 0.4695611999999868 0.4604863999999829 2.0 7.0 35.0 135.0 0.0 0.06575828 0.057970256 0.020313479 0.020290807 0.1296886 0.13571687 0.031751662 0.012275728 0.02896362 0.04331644 0.14715028 0.16250839999999997 0.930523 0.8834267 9.032046999999998e-05 0.00020417363000000002
21 20 673.0 850.0 0.0 247.0 204.0 1.0 247.0 204.0 4410.0 5293.0 0.4671405999999858 0.45848719999998205 5.0 3.0 35.0 20.0 0.0 0.08191794 0.07100958 0.025515838 0.030987637000000002 0.16764577 0.14825977 0.02347238 0.011810873000000001 0.051037148 0.047851723 0.15077179999999998 0.16509958 0.6975503000000001 0.8592968000000001 2.4204688e-05 0.00032094717999999996