1
0
mirror of https://github.com/gryf/coach.git synced 2026-03-15 22:23:36 +01:00

trace tests update

This commit is contained in:
Shadi Endrawis
2018-10-02 17:55:16 +03:00
parent 51726a5b80
commit f7990d4003
79 changed files with 10105 additions and 9539 deletions

View File

@@ -1,6 +1,6 @@
Episode #,Training Iter,In Heatup,ER #Transitions,ER #Episodes,Episode Length,Total steps,Epsilon,Shaped Training Reward,Training Reward,Update Target Network,Evaluation Reward,Shaped Evaluation Reward,Success Rate,Loss/Mean,Loss/Stdev,Loss/Max,Loss/Min,Learning Rate/Mean,Learning Rate/Stdev,Learning Rate/Max,Learning Rate/Min,Grads (unclipped)/Mean,Grads (unclipped)/Stdev,Grads (unclipped)/Max,Grads (unclipped)/Min,Discounted Return/Mean,Discounted Return/Stdev,Discounted Return/Max,Discounted Return/Min,Q/Mean,Q/Stdev,Q/Max,Q/Min
1,0.0,1.0,986.0,986.0,986.0,986.0,7.0,,,0.0,,,,,,,,,,,,,,,,-1.8205545076821419,0.7192845707051421,-0.2081522550905921,-3.1698994392478896,,,,
2,0.0,1.0,1806.0,1806.0,820.0,1806.0,4.0,,,0.0,,,,,,,,,,,,,,,,-2.3370969394351864,0.575288014748253,-0.7105532272722921,-3.355172823288848,,,,
3,206.0,0.0,2629.0,2629.0,823.0,2629.0,5.0,-21.0,-21.0,0.0,,,,0.013241646022737044,0.013426115799074972,0.06692679971456528,0.000705955782905221,0.0002500000000000001,1.0842021724855042e-19,0.00025,0.00025,0.012983278999999999,0.004881826,0.032856163,0.00533005,-2.3342722836314502,0.7834970909114538,-0.38878391807422696,-3.369599601005491,,,,
4,398.0,0.0,3397.0,3397.0,768.0,3397.0,3.0,-21.0,-21.0,0.0,,,,0.014017146643709566,0.013581066769959666,0.06306872516870499,0.0006765050929971039,0.0002500000000000001,5.421010862427521e-20,0.00025,0.00025,0.012632045,0.004335136,0.02522624,0.005467761,-2.4495140411664926,0.5558315778011723,-0.7105532272722921,-3.354852824180864,,,,
5,617.0,0.0,4274.0,4274.0,877.0,4274.0,6.0,-21.0,-21.0,0.0,,,,0.014781689254851953,0.014562911817935488,0.07851850241422652,0.0011200609151273966,0.0002500000000000001,1.0842021724855042e-19,0.00025,0.00025,0.012727573999999998,0.004514621,0.035298187,0.0072377953000000005,-2.24386277951745,0.7822289069788971,-0.3810471181045498,-3.3685376079191567,-0.017959576,0.016207013,0.0050252294,-0.04572457
3,206.0,0.0,2629.0,2629.0,823.0,2629.0,5.0,-21.0,-21.0,0.0,,,,0.014186631905104856,0.013655308200271828,0.06909694522619247,0.0005460917018353938,0.0002500000000000001,1.0842021724855042e-19,0.00025,0.00025,0.014938818000000001,0.0055247187,0.034780357000000005,0.0049935523,-2.3342722836314502,0.7834970909114538,-0.38878391807422696,-3.369599601005491,,,,
4,398.0,0.0,3397.0,3397.0,768.0,3397.0,3.0,-21.0,-21.0,0.0,,,,0.014518419023564396,0.013256214475088386,0.06440683454275131,0.0005935237277299166,0.0002500000000000001,5.421010862427521e-20,0.00025,0.00025,0.013618752,0.003883305,0.028320136,0.0057370984,-2.4495140411664926,0.5558315778011723,-0.7105532272722921,-3.354852824180864,,,,
5,705.0,0.0,4626.0,4626.0,1229.0,4626.0,6.0,-19.0,-19.0,0.0,,,,0.013912314557241342,0.013573258327554268,0.08049257844686508,0.00038326982758007933,0.0002500000000000001,5.421010862427521e-20,0.00025,0.00025,0.0129638435,0.004854921,0.035924666,0.0042663114,-1.4469428047536403,0.7634920719307412,-0.008604775224526406,-3.170625540860168,-0.013995509,0.012983983999999999,0.019298933,-0.037532326
1 Episode # Training Iter In Heatup ER #Transitions ER #Episodes Episode Length Total steps Epsilon Shaped Training Reward Training Reward Update Target Network Evaluation Reward Shaped Evaluation Reward Success Rate Loss/Mean Loss/Stdev Loss/Max Loss/Min Learning Rate/Mean Learning Rate/Stdev Learning Rate/Max Learning Rate/Min Grads (unclipped)/Mean Grads (unclipped)/Stdev Grads (unclipped)/Max Grads (unclipped)/Min Discounted Return/Mean Discounted Return/Stdev Discounted Return/Max Discounted Return/Min Q/Mean Q/Stdev Q/Max Q/Min
2 1 0.0 1.0 986.0 986.0 986.0 986.0 7.0 0.0 -1.8205545076821419 0.7192845707051421 -0.2081522550905921 -3.1698994392478896
3 2 0.0 1.0 1806.0 1806.0 820.0 1806.0 4.0 0.0 -2.3370969394351864 0.575288014748253 -0.7105532272722921 -3.355172823288848
4 3 206.0 0.0 2629.0 2629.0 823.0 2629.0 5.0 -21.0 -21.0 0.0 0.013241646022737044 0.014186631905104856 0.013426115799074972 0.013655308200271828 0.06692679971456528 0.06909694522619247 0.000705955782905221 0.0005460917018353938 0.0002500000000000001 1.0842021724855042e-19 0.00025 0.00025 0.012983278999999999 0.014938818000000001 0.004881826 0.0055247187 0.032856163 0.034780357000000005 0.00533005 0.0049935523 -2.3342722836314502 0.7834970909114538 -0.38878391807422696 -3.369599601005491
5 4 398.0 0.0 3397.0 3397.0 768.0 3397.0 3.0 -21.0 -21.0 0.0 0.014017146643709566 0.014518419023564396 0.013581066769959666 0.013256214475088386 0.06306872516870499 0.06440683454275131 0.0006765050929971039 0.0005935237277299166 0.0002500000000000001 5.421010862427521e-20 0.00025 0.00025 0.012632045 0.013618752 0.004335136 0.003883305 0.02522624 0.028320136 0.005467761 0.0057370984 -2.4495140411664926 0.5558315778011723 -0.7105532272722921 -3.354852824180864
6 5 617.0 705.0 0.0 4274.0 4626.0 4274.0 4626.0 877.0 1229.0 4274.0 4626.0 6.0 -21.0 -19.0 -21.0 -19.0 0.0 0.014781689254851953 0.013912314557241342 0.014562911817935488 0.013573258327554268 0.07851850241422652 0.08049257844686508 0.0011200609151273966 0.00038326982758007933 0.0002500000000000001 1.0842021724855042e-19 5.421010862427521e-20 0.00025 0.00025 0.012727573999999998 0.0129638435 0.004514621 0.004854921 0.035298187 0.035924666 0.0072377953000000005 0.0042663114 -2.24386277951745 -1.4469428047536403 0.7822289069788971 0.7634920719307412 -0.3810471181045498 -0.008604775224526406 -3.3685376079191567 -3.170625540860168 -0.017959576 -0.013995509 0.016207013 0.012983983999999999 0.0050252294 0.019298933 -0.04572457 -0.037532326