1
0
mirror of https://github.com/gryf/coach.git synced 2026-02-20 16:55:48 +01:00

trace tests update

This commit is contained in:
Shadi Endrawis
2018-10-02 17:55:16 +03:00
parent 51726a5b80
commit f7990d4003
79 changed files with 10105 additions and 9539 deletions

View File

@@ -1,6 +1,6 @@
Episode #,Training Iter,In Heatup,ER #Transitions,ER #Episodes,Episode Length,Total steps,Epsilon,Shaped Training Reward,Training Reward,Update Target Network,Evaluation Reward,Shaped Evaluation Reward,Success Rate,Loss/Mean,Loss/Stdev,Loss/Max,Loss/Min,Learning Rate/Mean,Learning Rate/Stdev,Learning Rate/Max,Learning Rate/Min,Grads (unclipped)/Mean,Grads (unclipped)/Stdev,Grads (unclipped)/Max,Grads (unclipped)/Min,Discounted Return/Mean,Discounted Return/Stdev,Discounted Return/Max,Discounted Return/Min,Entropy/Mean,Entropy/Stdev,Entropy/Max,Entropy/Min,Advantages/Mean,Advantages/Stdev,Advantages/Max,Advantages/Min,Values/Mean,Values/Stdev,Values/Max,Values/Min,Value Loss/Mean,Value Loss/Stdev,Value Loss/Max,Value Loss/Min,Policy Loss/Mean,Policy Loss/Stdev,Policy Loss/Max,Policy Loss/Min,Q/Mean,Q/Stdev,Q/Max,Q/Min,TD targets/Mean,TD targets/Stdev,TD targets/Max,TD targets/Min,actions/Mean,actions/Stdev,actions/Max,actions/Min
1,0.0,1.0,1000.0,1.0,1000.0,1000.0,0.0,,,0.0,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,0.0,1.0,2000.0,2.0,1000.0,2000.0,0.0,,,1.0,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,999.0,0.0,3000.0,3.0,1000.0,3000.0,-0.017666830179174003,0.0,0.0,1.0,,,,0.0035039535888519192,0.0036899070860429047,0.0448087714612484,0.0006857202388346195,0.00010000000000000003,4.0657581468206416e-20,0.0001,0.0001,0.15666896,0.14784017,1.7027674,0.027932363999999998,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,0.28616783,0.1527039,0.82195866,-0.052169282000000004,0.12254468997797058,0.14238915773279914,0.9096015518903732,-0.459991791844368,0.07060378249719693,0.9005342625617344,1.6849354556578502,-1.5135477528043664
4,1999.0,0.0,4000.0,4.0,1000.0,4000.0,-0.039999362478752916,0.0,0.0,1.0,,,,0.0023975625592941143,0.0020512967193832047,0.03644856810569763,0.0006296735955402255,0.00010000000000000003,4.0657581468206416e-20,0.0001,0.0001,0.10159315,0.08087424,1.1576957,0.023222,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,0.24290629,0.11069059,0.7463965,-0.0065027475,0.2942720267621687,0.14379040300676074,1.032085758447647,-0.2381104633212089,0.4210885653083537,0.7995310847152353,1.7607054373862634,-1.3770179740736286
5,2999.0,0.0,5000.0,5.0,1000.0,5000.0,0.17145601483403705,0.0,0.0,0.0,,,,0.0013986770755837895,0.0011338123535962153,0.018688598647713658,0.00039429025491699576,0.00010000000000000003,4.0657581468206416e-20,0.0001,0.0001,0.057997093,0.035725467000000004,0.5853672,0.015558452,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,0.19842595,0.07881614,0.6942527,-0.16243972,0.367736402077882,0.1450020289455392,1.0550748002529144,-0.04278800502419472,-0.009665988609877104,0.8992571498573005,1.8188017021946057,-1.4079581912324373
3,999.0,0.0,3000.0,3.0,1000.0,3000.0,-0.017666830179174003,0.0,0.0,1.0,,,,0.0029464550291862087,0.0025701377750570642,0.02788718044757843,0.0006394493393599987,0.00010000000000000003,4.0657581468206416e-20,0.0001,0.0001,0.13357769,0.117093444,1.2991068000000001,0.026759505,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,0.15686034,0.0627305,0.39373034,-0.3585922,-0.0033900204140713077,0.15875771875068714,0.5218342781066895,-0.5829120719432831,0.018559266145446292,0.8379639652873171,1.3133153498255412,-1.2431993702510542
4,1999.0,0.0,4000.0,4.0,1000.0,4000.0,-0.039999362478752916,0.0021780076323496323,0.02178007632349632,1.0,,,,0.0006978411426705012,0.00034956689330895783,0.002974547212943435,0.0001349833473796025,0.00010000000000000003,4.0657581468206416e-20,0.0001,0.0001,0.041489832000000004,0.02103676,0.1902087,0.010260164,2.3785131604782346e-05,0.00021786301549431403,0.002166953447005496,0.0,,,,,,,,,,,,,,,,,,,,,0.10348537,0.037828527,0.58358335,-0.20245944,0.1538198277351862,0.12114966051922052,0.6158200460672378,-0.3744521605968476,0.08143989407802325,0.8094344175263435,1.2337204414679308,-1.3201327969582874
5,2999.0,0.0,5000.0,5.0,1000.0,5000.0,0.17145601483403705,0.0,0.0,0.0,,,,0.0004858621195543909,0.0005712790351431513,0.00709826499223709,9.616250463295728e-05,0.00010000000000000003,4.0657581468206416e-20,0.0001,0.0001,0.028758908,0.02100075,0.23695771,0.007761135,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,-0.082560025,0.042022076,0.32945228,-0.19682288,0.20584864377714734,0.1282641644604344,0.6959143048524856,-0.15129065528511998,-0.22636502595053595,0.7716659678121603,1.6171153782369072,-1.244061515013705
1 Episode # Training Iter In Heatup ER #Transitions ER #Episodes Episode Length Total steps Epsilon Shaped Training Reward Training Reward Update Target Network Evaluation Reward Shaped Evaluation Reward Success Rate Loss/Mean Loss/Stdev Loss/Max Loss/Min Learning Rate/Mean Learning Rate/Stdev Learning Rate/Max Learning Rate/Min Grads (unclipped)/Mean Grads (unclipped)/Stdev Grads (unclipped)/Max Grads (unclipped)/Min Discounted Return/Mean Discounted Return/Stdev Discounted Return/Max Discounted Return/Min Entropy/Mean Entropy/Stdev Entropy/Max Entropy/Min Advantages/Mean Advantages/Stdev Advantages/Max Advantages/Min Values/Mean Values/Stdev Values/Max Values/Min Value Loss/Mean Value Loss/Stdev Value Loss/Max Value Loss/Min Policy Loss/Mean Policy Loss/Stdev Policy Loss/Max Policy Loss/Min Q/Mean Q/Stdev Q/Max Q/Min TD targets/Mean TD targets/Stdev TD targets/Max TD targets/Min actions/Mean actions/Stdev actions/Max actions/Min
2 1 0.0 1.0 1000.0 1.0 1000.0 1000.0 0.0 0.0 0.0 0.0 0.0 0.0
3 2 0.0 1.0 2000.0 2.0 1000.0 2000.0 0.0 1.0 0.0 0.0 0.0 0.0
4 3 999.0 0.0 3000.0 3.0 1000.0 3000.0 -0.017666830179174003 0.0 0.0 1.0 0.0035039535888519192 0.0029464550291862087 0.0036899070860429047 0.0025701377750570642 0.0448087714612484 0.02788718044757843 0.0006857202388346195 0.0006394493393599987 0.00010000000000000003 4.0657581468206416e-20 0.0001 0.0001 0.15666896 0.13357769 0.14784017 0.117093444 1.7027674 1.2991068000000001 0.027932363999999998 0.026759505 0.0 0.0 0.0 0.0 0.28616783 0.15686034 0.1527039 0.0627305 0.82195866 0.39373034 -0.052169282000000004 -0.3585922 0.12254468997797058 -0.0033900204140713077 0.14238915773279914 0.15875771875068714 0.9096015518903732 0.5218342781066895 -0.459991791844368 -0.5829120719432831 0.07060378249719693 0.018559266145446292 0.9005342625617344 0.8379639652873171 1.6849354556578502 1.3133153498255412 -1.5135477528043664 -1.2431993702510542
5 4 1999.0 0.0 4000.0 4.0 1000.0 4000.0 -0.039999362478752916 0.0 0.0021780076323496323 0.0 0.02178007632349632 1.0 0.0023975625592941143 0.0006978411426705012 0.0020512967193832047 0.00034956689330895783 0.03644856810569763 0.002974547212943435 0.0006296735955402255 0.0001349833473796025 0.00010000000000000003 4.0657581468206416e-20 0.0001 0.0001 0.10159315 0.041489832000000004 0.08087424 0.02103676 1.1576957 0.1902087 0.023222 0.010260164 0.0 2.3785131604782346e-05 0.0 0.00021786301549431403 0.0 0.002166953447005496 0.0 0.24290629 0.10348537 0.11069059 0.037828527 0.7463965 0.58358335 -0.0065027475 -0.20245944 0.2942720267621687 0.1538198277351862 0.14379040300676074 0.12114966051922052 1.032085758447647 0.6158200460672378 -0.2381104633212089 -0.3744521605968476 0.4210885653083537 0.08143989407802325 0.7995310847152353 0.8094344175263435 1.7607054373862634 1.2337204414679308 -1.3770179740736286 -1.3201327969582874
6 5 2999.0 0.0 5000.0 5.0 1000.0 5000.0 0.17145601483403705 0.0 0.0 0.0 0.0013986770755837895 0.0004858621195543909 0.0011338123535962153 0.0005712790351431513 0.018688598647713658 0.00709826499223709 0.00039429025491699576 9.616250463295728e-05 0.00010000000000000003 4.0657581468206416e-20 0.0001 0.0001 0.057997093 0.028758908 0.035725467000000004 0.02100075 0.5853672 0.23695771 0.015558452 0.007761135 0.0 0.0 0.0 0.0 0.19842595 -0.082560025 0.07881614 0.042022076 0.6942527 0.32945228 -0.16243972 -0.19682288 0.367736402077882 0.20584864377714734 0.1450020289455392 0.1282641644604344 1.0550748002529144 0.6959143048524856 -0.04278800502419472 -0.15129065528511998 -0.009665988609877104 -0.22636502595053595 0.8992571498573005 0.7716659678121603 1.8188017021946057 1.6171153782369072 -1.4079581912324373 -1.244061515013705