1
0
mirror of https://github.com/gryf/coach.git synced 2026-04-18 13:43:32 +02:00

Itaicaspi/episode reset refactoring (#105)

* reordering of the episode reset operation and allowing to store episodes only when they are terminated

* reordering of the episode reset operation and allowing to store episodes only when they are terminated

* revert tensorflow-gpu to 1.9.0 + bug fix in should_train()

* tests readme file and refactoring of policy optimization agent train function

* Update README.md

* Update README.md

* additional policy optimization train function simplifications

* Updated the traces after the reordering of the environment reset

* docker and jenkins files

* updated the traces to the ones from within the docker container

* updated traces and added control suite to the docker

* updated jenkins file with the intel proxy + updated doom basic a3c test params

* updated line breaks in jenkins file

* added a missing line break in jenkins file

* refining trace tests ignored presets + adding a configurable beta entropy value

* switch the order of trace and golden tests in jenkins + fix golden tests processes not killed issue

* updated benchmarks for dueling ddqn breakout and pong

* allowing dynamic updates to the loss weights + bug fix in episode.update_returns

* remove docker and jenkins file
This commit is contained in:
Itai Caspi
2018-09-04 15:07:54 +03:00
committed by GitHub
parent 7086492127
commit 72a1d9d426
92 changed files with 9803 additions and 9740 deletions

View File

@@ -1,6 +1,6 @@
Episode #,Training Iter,In Heatup,ER #Transitions,ER #Episodes,Episode Length,Total steps,Epsilon,Shaped Training Reward,Training Reward,Update Target Network,Evaluation Reward,Shaped Evaluation Reward,Success Rate,Loss/Mean,Loss/Stdev,Loss/Max,Loss/Min,Learning Rate/Mean,Learning Rate/Stdev,Learning Rate/Max,Learning Rate/Min,Grads (unclipped)/Mean,Grads (unclipped)/Stdev,Grads (unclipped)/Max,Grads (unclipped)/Min,Q/Mean,Q/Stdev,Q/Max,Q/Min
1,0.0,1.0,1117.0,1117.0,1117.0,1117.0,1.0,,,0.0,,,,,,,,,,,,,,,,,,,
2,210.0,0.0,1958.0,1958.0,841.0,1958.0,0.999167410000018,-20.0,-20.0,0.0,,,,37.30497363862537,40.199281603456505,153.4302520751953,2.467848777770996,5.000000000000001e-05,6.776263578034403e-21,5e-05,5e-05,14.603501000000001,10.578437,80.69334,3.9762766000000003,,,,
3,402.0,0.0,2726.0,2726.0,768.0,2726.0,0.9984070900000346,-21.0,-21.0,0.0,,,,38.07947255298495,43.23459368266095,241.53515625,2.320526123046875,5.0000000000000016e-05,1.3552527156068802e-20,5e-05,5e-05,32.867527,23.103817000000003,127.04106999999999,8.249042,-0.01784491027499219,0.007088911611692895,-0.009264047715696506,-0.027098445154260846
4,601.0,0.0,3519.0,3519.0,793.0,3519.0,0.9976220200000516,-21.0,-21.0,0.0,,,,40.78985584680758,36.92834222767065,138.93878173828122,2.9669189453125,5e-05,0.0,5e-05,5e-05,62.59441999999999,33.358902,183.7286,26.568246999999996,-0.039346434755522436,0.004771626651866583,-0.03437434455496259,-0.04845772998523898
5,809.0,0.0,4352.0,4352.0,833.0,4352.0,0.9967973500000696,-21.0,-21.0,0.0,,,,34.69845709204674,35.17935046195014,175.6295623779297,2.969444751739502,5.0000000000000016e-05,1.3552527156068802e-20,5e-05,5e-05,54.865233999999994,28.737910999999997,232.94142000000002,26.412553999999997,-0.03589245805727842,0.005320110982296958,-0.028433983605063988,-0.045204031605389904
2,205.0,0.0,1937.0,1937.0,820.0,1937.0,0.9991882000000176,-21.0,-21.0,0.0,,,,36.60464997756772,42.04124769391064,201.15611267089844,2.788020610809326,5.000000000000001e-05,6.776263578034403e-21,5e-05,5e-05,14.734329999999998,11.578652,83.24656999999999,3.6869566,,,,
3,413.0,0.0,2768.0,2768.0,831.0,2768.0,0.9983655100000356,-21.0,-21.0,0.0,,,,37.448825304324814,40.97555825854826,265.18701171875,2.7428863048553467,5.0000000000000016e-05,1.3552527156068802e-20,5e-05,5e-05,46.146587,37.73792,313.11514,12.797323,-0.02228271633396313,0.010482918460358506,-0.008034438502509147,-0.03863051085398183
4,667.0,0.0,3783.0,3783.0,1015.0,3783.0,0.9973606600000572,-20.0,-20.0,0.0,,,,35.222983159418185,33.638557732845605,134.39295959472656,3.3111674785614014,5.000000000000001e-05,6.776263578034403e-21,5e-05,5e-05,54.700793999999995,28.679327,185.94606000000002,25.897139000000003,-0.05276434649310735,0.013212184652596557,-0.03154730399168329,-0.06887179555138573
5,867.0,0.0,4585.0,4585.0,802.0,4585.0,0.9965666800000744,-21.0,-21.0,0.0,,,,33.36415538668633,33.794293936783085,170.81182861328125,3.2840056419372563,5.000000000000001e-05,6.776263578034403e-21,5e-05,5e-05,53.996002000000004,31.833138,239.36745,27.415855,-0.03878277134735982,0.010679782367249705,-0.01826882790250238,-0.05715514831594193
1 Episode # Training Iter In Heatup ER #Transitions ER #Episodes Episode Length Total steps Epsilon Shaped Training Reward Training Reward Update Target Network Evaluation Reward Shaped Evaluation Reward Success Rate Loss/Mean Loss/Stdev Loss/Max Loss/Min Learning Rate/Mean Learning Rate/Stdev Learning Rate/Max Learning Rate/Min Grads (unclipped)/Mean Grads (unclipped)/Stdev Grads (unclipped)/Max Grads (unclipped)/Min Q/Mean Q/Stdev Q/Max Q/Min
2 1 0.0 1.0 1117.0 1117.0 1117.0 1117.0 1.0 0.0
3 2 210.0 205.0 0.0 1958.0 1937.0 1958.0 1937.0 841.0 820.0 1958.0 1937.0 0.999167410000018 0.9991882000000176 -20.0 -21.0 -20.0 -21.0 0.0 37.30497363862537 36.60464997756772 40.199281603456505 42.04124769391064 153.4302520751953 201.15611267089844 2.467848777770996 2.788020610809326 5.000000000000001e-05 6.776263578034403e-21 5e-05 5e-05 14.603501000000001 14.734329999999998 10.578437 11.578652 80.69334 83.24656999999999 3.9762766000000003 3.6869566
4 3 402.0 413.0 0.0 2726.0 2768.0 2726.0 2768.0 768.0 831.0 2726.0 2768.0 0.9984070900000346 0.9983655100000356 -21.0 -21.0 0.0 38.07947255298495 37.448825304324814 43.23459368266095 40.97555825854826 241.53515625 265.18701171875 2.320526123046875 2.7428863048553467 5.0000000000000016e-05 1.3552527156068802e-20 5e-05 5e-05 32.867527 46.146587 23.103817000000003 37.73792 127.04106999999999 313.11514 8.249042 12.797323 -0.01784491027499219 -0.02228271633396313 0.007088911611692895 0.010482918460358506 -0.009264047715696506 -0.008034438502509147 -0.027098445154260846 -0.03863051085398183
5 4 601.0 667.0 0.0 3519.0 3783.0 3519.0 3783.0 793.0 1015.0 3519.0 3783.0 0.9976220200000516 0.9973606600000572 -21.0 -20.0 -21.0 -20.0 0.0 40.78985584680758 35.222983159418185 36.92834222767065 33.638557732845605 138.93878173828122 134.39295959472656 2.9669189453125 3.3111674785614014 5e-05 5.000000000000001e-05 0.0 6.776263578034403e-21 5e-05 5e-05 62.59441999999999 54.700793999999995 33.358902 28.679327 183.7286 185.94606000000002 26.568246999999996 25.897139000000003 -0.039346434755522436 -0.05276434649310735 0.004771626651866583 0.013212184652596557 -0.03437434455496259 -0.03154730399168329 -0.04845772998523898 -0.06887179555138573
6 5 809.0 867.0 0.0 4352.0 4585.0 4352.0 4585.0 833.0 802.0 4352.0 4585.0 0.9967973500000696 0.9965666800000744 -21.0 -21.0 0.0 34.69845709204674 33.36415538668633 35.17935046195014 33.794293936783085 175.6295623779297 170.81182861328125 2.969444751739502 3.2840056419372563 5.0000000000000016e-05 5.000000000000001e-05 1.3552527156068802e-20 6.776263578034403e-21 5e-05 5e-05 54.865233999999994 53.996002000000004 28.737910999999997 31.833138 232.94142000000002 239.36745 26.412553999999997 27.415855 -0.03589245805727842 -0.03878277134735982 0.005320110982296958 0.010679782367249705 -0.028433983605063988 -0.01826882790250238 -0.045204031605389904 -0.05715514831594193