1
0
mirror of https://github.com/gryf/coach.git synced 2026-02-28 21:35:46 +01:00

trace tests update

This commit is contained in:
Shadi Endrawis
2018-10-02 17:55:16 +03:00
parent 51726a5b80
commit f7990d4003
79 changed files with 10105 additions and 9539 deletions

View File

@@ -1,6 +1,6 @@
Episode #,Training Iter,In Heatup,ER #Transitions,ER #Episodes,Episode Length,Total steps,Epsilon,Shaped Training Reward,Training Reward,Update Target Network,Evaluation Reward,Shaped Evaluation Reward,Success Rate,Loss/Mean,Loss/Stdev,Loss/Max,Loss/Min,Learning Rate/Mean,Learning Rate/Stdev,Learning Rate/Max,Learning Rate/Min,Grads (unclipped)/Mean,Grads (unclipped)/Stdev,Grads (unclipped)/Max,Grads (unclipped)/Min,Discounted Return/Mean,Discounted Return/Stdev,Discounted Return/Max,Discounted Return/Min,Entropy/Mean,Entropy/Stdev,Entropy/Max,Entropy/Min,Advantages/Mean,Advantages/Stdev,Advantages/Max,Advantages/Min,Values/Mean,Values/Stdev,Values/Max,Values/Min,Value Loss/Mean,Value Loss/Stdev,Value Loss/Max,Value Loss/Min,Policy Loss/Mean,Policy Loss/Stdev,Policy Loss/Max,Policy Loss/Min,Q/Mean,Q/Stdev,Q/Max,Q/Min,TD targets/Mean,TD targets/Stdev,TD targets/Max,TD targets/Min,actions/Mean,actions/Stdev,actions/Max,actions/Min
1,0.0,1.0,1001.0,1.0,1001.0,1001.0,0.0,,,0.0,,,,,,,,,,,,,,,,0.1810549437584988,0.08342612458204374,0.3657155727590055,0.012114535848885052,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,0.0,1.0,2002.0,2.0,1001.0,2002.0,0.0,,,1.0,,,,,,,,,,,,,,,,0.10514369548395547,0.05043065738920054,0.21524430347618226,0.0011643643789458708,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,1000.0,0.0,3003.0,3.0,1001.0,3003.0,-0.1185302492771778,6.74600433432463,67.46004334324633,1.0,,,,1.0688050798876248e-05,1.766983092613708e-05,0.0002591642551124096,1.702799409031286e-06,0.00010000000000000003,2.7105054312137605e-20,0.0001,0.0001,0.0045520826,0.0030055756,0.023246742999999997,0.00061706273,0.6303898598111053,0.14191577052955434,0.8002050670805612,0.012917920234129009,,,,,,,,,,,,,,,,,,,,,0.00010149915000000001,0.17838655,0.24224899999999996,-0.3742638,0.031711048067017016,0.06663038905079713,0.16946774334467746,-0.1425395913783225,-1.1330065308041135,0.2760152561198482,-0.11754712369609001,-1.564363479104263
4,2001.0,0.0,4004.0,4.0,1001.0,4004.0,-0.2048510260598676,7.668551490997389,76.68551490997385,1.0,,,,3.408961573131819e-05,6.750553695485743e-05,0.0007010267581790687,1.4872452993586194e-06,0.00010000000000000003,2.7105054312137605e-20,0.0001,0.0001,0.010960271,0.013024458000000001,0.09523078,0.0007027931,0.6965929526733899,0.1622994335943064,0.9453308568504694,0.013324665458032537,,,,,,,,,,,,,,,,,,,,,0.0027730353,0.18788128,0.250535,-0.3431186,0.029557790599657345,0.11711363926377175,0.2386419371743873,-0.3346987397531008,-1.113269411960027,0.1980527596764508,-0.6249308459515377,-1.5458447591062914
5,3002.0,0.0,5005.0,5.0,1001.0,5005.0,-0.02134772535498328,7.368122753870011,73.6812275387001,0.0,,,,1.2014473524686764e-05,1.2546101794472948e-05,0.00015411879576276988,1.0827171763594379e-06,0.00010000000000000003,2.7105054312137605e-20,0.0001,0.0001,0.007843023000000001,0.0051095295,0.038092513,0.0006715836400000001,0.66996138932216,0.15703584718374808,0.8908932610369035,0.0060312519674541746,,,,,,,,,,,,,,,,,,,,,0.015175661000000002,0.18127811,0.24180134,-0.31560785,0.03080747100578105,0.13684940389057226,0.2399127439483701,-0.3243346170643305,-0.9330713833476759,0.12577776040520755,-0.5994658138545264,-1.21464647257472
3,1000.0,0.0,3003.0,3.0,1001.0,3003.0,-0.1185302492771778,7.715022587137692,77.15022587137695,1.0,,,,2.392776927604245e-05,0.0001238392879134552,0.003135734703391791,1.3632770787808113e-06,0.00010000000000000003,2.7105054312137605e-20,0.0001,0.0001,0.0050078793,0.0064642574,0.098884284,0.0005620557,0.7529912365203498,0.4617358686541096,1.594227125555209,0.00353194497002051,,,,,,,,,,,,,,,,,,,,,0.050345387000000005,0.05729694,0.19809167,-0.08215243,0.05447568218516847,0.04079396823019403,0.1626849260711809,-0.023734710598228542,-0.34397406029780203,0.5815794471343353,1.0123427704556696,-1.4948724317067326
4,2001.0,0.0,4004.0,4.0,1001.0,4004.0,-0.2048510260598676,11.15149430448684,111.51494304486856,1.0,,,,4.744879189274797e-05,0.00012290942505022024,0.0018345331773161886,1.965395085790078e-06,0.00010000000000000003,2.7105054312137605e-20,0.0001,0.0001,0.011079958999999999,0.012011923,0.13484268,0.0006308630000000001,1.0427006689416267,0.4698636052853145,2.4480673370988217,0.02353826061555796,,,,,,,,,,,,,,,,,,,,,0.08695571,0.039015066,0.20370862,-0.04777467,0.07752333968630509,0.04623246871067645,0.2261723560740124,-0.0225689002695848,-0.8518067738026681,0.6092545155137064,1.0646579642019018,-1.5449345365759264
5,3002.0,0.0,5005.0,5.0,1001.0,5005.0,-0.02134772535498328,13.93309848305012,139.33098483050114,0.0,,,,4.3079992066850536e-05,6.204749497676766e-05,0.0006331046461127697,2.68419535132125e-06,0.00010000000000000003,2.7105054312137605e-20,0.0001,0.0001,0.012529638000000001,0.010803898999999999,0.07596945,0.0010158704,1.2820087653786647,0.3980341965211882,2.1725203141180294,6.57125185393007e-05,,,,,,,,,,,,,,,,,,,,,0.42543635,0.20454627,0.8917187,0.05104744,0.08902145835758844,0.04616376194044128,0.2426698236415496,-0.00472626581328319,-0.5524111559396501,0.6382799035600116,1.0798330140144352,-1.2145754834427926
1 Episode # Training Iter In Heatup ER #Transitions ER #Episodes Episode Length Total steps Epsilon Shaped Training Reward Training Reward Update Target Network Evaluation Reward Shaped Evaluation Reward Success Rate Loss/Mean Loss/Stdev Loss/Max Loss/Min Learning Rate/Mean Learning Rate/Stdev Learning Rate/Max Learning Rate/Min Grads (unclipped)/Mean Grads (unclipped)/Stdev Grads (unclipped)/Max Grads (unclipped)/Min Discounted Return/Mean Discounted Return/Stdev Discounted Return/Max Discounted Return/Min Entropy/Mean Entropy/Stdev Entropy/Max Entropy/Min Advantages/Mean Advantages/Stdev Advantages/Max Advantages/Min Values/Mean Values/Stdev Values/Max Values/Min Value Loss/Mean Value Loss/Stdev Value Loss/Max Value Loss/Min Policy Loss/Mean Policy Loss/Stdev Policy Loss/Max Policy Loss/Min Q/Mean Q/Stdev Q/Max Q/Min TD targets/Mean TD targets/Stdev TD targets/Max TD targets/Min actions/Mean actions/Stdev actions/Max actions/Min
2 1 0.0 1.0 1001.0 1.0 1001.0 1001.0 0.0 0.0 0.1810549437584988 0.08342612458204374 0.3657155727590055 0.012114535848885052
3 2 0.0 1.0 2002.0 2.0 1001.0 2002.0 0.0 1.0 0.10514369548395547 0.05043065738920054 0.21524430347618226 0.0011643643789458708
4 3 1000.0 0.0 3003.0 3.0 1001.0 3003.0 -0.1185302492771778 6.74600433432463 7.715022587137692 67.46004334324633 77.15022587137695 1.0 1.0688050798876248e-05 2.392776927604245e-05 1.766983092613708e-05 0.0001238392879134552 0.0002591642551124096 0.003135734703391791 1.702799409031286e-06 1.3632770787808113e-06 0.00010000000000000003 2.7105054312137605e-20 0.0001 0.0001 0.0045520826 0.0050078793 0.0030055756 0.0064642574 0.023246742999999997 0.098884284 0.00061706273 0.0005620557 0.6303898598111053 0.7529912365203498 0.14191577052955434 0.4617358686541096 0.8002050670805612 1.594227125555209 0.012917920234129009 0.00353194497002051 0.00010149915000000001 0.050345387000000005 0.17838655 0.05729694 0.24224899999999996 0.19809167 -0.3742638 -0.08215243 0.031711048067017016 0.05447568218516847 0.06663038905079713 0.04079396823019403 0.16946774334467746 0.1626849260711809 -0.1425395913783225 -0.023734710598228542 -1.1330065308041135 -0.34397406029780203 0.2760152561198482 0.5815794471343353 -0.11754712369609001 1.0123427704556696 -1.564363479104263 -1.4948724317067326
5 4 2001.0 0.0 4004.0 4.0 1001.0 4004.0 -0.2048510260598676 7.668551490997389 11.15149430448684 76.68551490997385 111.51494304486856 1.0 3.408961573131819e-05 4.744879189274797e-05 6.750553695485743e-05 0.00012290942505022024 0.0007010267581790687 0.0018345331773161886 1.4872452993586194e-06 1.965395085790078e-06 0.00010000000000000003 2.7105054312137605e-20 0.0001 0.0001 0.010960271 0.011079958999999999 0.013024458000000001 0.012011923 0.09523078 0.13484268 0.0007027931 0.0006308630000000001 0.6965929526733899 1.0427006689416267 0.1622994335943064 0.4698636052853145 0.9453308568504694 2.4480673370988217 0.013324665458032537 0.02353826061555796 0.0027730353 0.08695571 0.18788128 0.039015066 0.250535 0.20370862 -0.3431186 -0.04777467 0.029557790599657345 0.07752333968630509 0.11711363926377175 0.04623246871067645 0.2386419371743873 0.2261723560740124 -0.3346987397531008 -0.0225689002695848 -1.113269411960027 -0.8518067738026681 0.1980527596764508 0.6092545155137064 -0.6249308459515377 1.0646579642019018 -1.5458447591062914 -1.5449345365759264
6 5 3002.0 0.0 5005.0 5.0 1001.0 5005.0 -0.02134772535498328 7.368122753870011 13.93309848305012 73.6812275387001 139.33098483050114 0.0 1.2014473524686764e-05 4.3079992066850536e-05 1.2546101794472948e-05 6.204749497676766e-05 0.00015411879576276988 0.0006331046461127697 1.0827171763594379e-06 2.68419535132125e-06 0.00010000000000000003 2.7105054312137605e-20 0.0001 0.0001 0.007843023000000001 0.012529638000000001 0.0051095295 0.010803898999999999 0.038092513 0.07596945 0.0006715836400000001 0.0010158704 0.66996138932216 1.2820087653786647 0.15703584718374808 0.3980341965211882 0.8908932610369035 2.1725203141180294 0.0060312519674541746 6.57125185393007e-05 0.015175661000000002 0.42543635 0.18127811 0.20454627 0.24180134 0.8917187 -0.31560785 0.05104744 0.03080747100578105 0.08902145835758844 0.13684940389057226 0.04616376194044128 0.2399127439483701 0.2426698236415496 -0.3243346170643305 -0.00472626581328319 -0.9330713833476759 -0.5524111559396501 0.12577776040520755 0.6382799035600116 -0.5994658138545264 1.0798330140144352 -1.21464647257472 -1.2145754834427926