1
0
mirror of https://github.com/gryf/coach.git synced 2026-04-20 06:33:31 +02:00

trace tests update

This commit is contained in:
Shadi Endrawis
2018-10-02 17:55:16 +03:00
parent 51726a5b80
commit f7990d4003
79 changed files with 10105 additions and 9539 deletions

View File

@@ -1,6 +1,6 @@
Episode #,Training Iter,In Heatup,ER #Transitions,ER #Episodes,Episode Length,Total steps,Epsilon,Shaped Training Reward,Training Reward,Update Target Network,Evaluation Reward,Shaped Evaluation Reward,Success Rate,Loss/Mean,Loss/Stdev,Loss/Max,Loss/Min,Learning Rate/Mean,Learning Rate/Stdev,Learning Rate/Max,Learning Rate/Min,Grads (unclipped)/Mean,Grads (unclipped)/Stdev,Grads (unclipped)/Max,Grads (unclipped)/Min,Discounted Return/Mean,Discounted Return/Stdev,Discounted Return/Max,Discounted Return/Min,Q/Mean,Q/Stdev,Q/Max,Q/Min
1,0.0,1.0,909.0,909.0,909.0,909.0,0.0,,,0.0,,,,,,,,,,,,,,,,-1.9655950472199328,0.7232623981585671,-0.33416993016801505,-3.2856761435887236,,,,
2,0.0,1.0,1849.0,1849.0,940.0,1849.0,0.0,,,0.0,,,,,,,,,,,,,,,,-2.0850702358812647,0.5206151988478068,-0.7105532272722921,-3.229858453919355,,,,
3,191.0,0.0,2612.0,2612.0,763.0,2612.0,0.0,-21.0,-21.0,0.0,,,,0.012449520095178359,0.013048937023542005,0.048593450337648385,0.0007832169649191201,0.0002500000000000001,5.421010862427521e-20,0.00025,0.00025,0.011991171,0.0049315933,0.031664677,0.005639204,-2.5178046202451694,0.5843148195084643,-0.7105532272722921,-3.3699982440767453,,,,
4,460.0,0.0,3688.0,3688.0,1076.0,3688.0,0.06158000603318215,-19.0,-19.0,0.0,,,,0.013090697613537835,0.01423496703653869,0.06296952068805695,0.0003786738670896739,0.0002500000000000001,5.421010862427521e-20,0.00025,0.00025,0.01207951,0.0049282457,0.02845837,0.0044667325,-1.600130842171531,0.7841052063633746,-0.05879559276456229,-3.1930866215011884,-0.027162252,0.0089824805,-0.0103284065,-0.038634557
5,679.0,0.0,4563.0,4563.0,875.0,4563.0,0.0686592236161232,-21.0,-21.0,0.0,,,,0.01267829067451182,0.01314711143634318,0.06590334326028824,0.0006290889577940106,0.0002500000000000001,1.0842021724855042e-19,0.00025,0.00025,0.012010485,0.0045945253,0.02634775,0.005362396,-2.206191047518212,0.5577254452906393,-0.7105532272722921,-3.3372597252516765,-0.020140575,0.010422909000000001,-0.0047886134,-0.037010644
3,191.0,0.0,2612.0,2612.0,763.0,2612.0,0.0,-21.0,-21.0,0.0,,,,0.013129071715587741,0.013057997576226256,0.04954143986105919,0.0009260990773327649,0.0002500000000000001,5.421010862427521e-20,0.00025,0.00025,0.01426667,0.0059959437,0.044108193,0.0058790944,-2.5178046202451694,0.5843148195084643,-0.7105532272722921,-3.3699982440767453,,,,
4,460.0,0.0,3688.0,3688.0,1076.0,3688.0,0.07349783927202225,-19.0,-19.0,0.0,,,,0.014194289974731544,0.014401563172422728,0.07057971507310867,0.0008334509911946952,0.0002500000000000001,5.421010862427521e-20,0.00025,0.00025,0.013837245,0.0044770753,0.030951055,0.0056485827,-1.600130842171531,0.7841052063633746,-0.05879559276456229,-3.1930866215011884,0.020253738,0.008782792,0.036346525,0.008415546999999999
5,679.0,0.0,4563.0,4563.0,875.0,4563.0,0.0275859609246254,-21.0,-21.0,0.0,,,,0.013575718098917559,0.013540301212523092,0.06875357776880264,0.0007445287192240357,0.0002500000000000001,1.0842021724855042e-19,0.00025,0.00025,0.013481666000000002,0.0047130687,0.03047333,0.005769696600000001,-2.206191047518212,0.5577254452906393,-0.7105532272722921,-3.3372597252516765,-0.007968453,0.008165936,0.011851054,-0.01858479
1 Episode # Training Iter In Heatup ER #Transitions ER #Episodes Episode Length Total steps Epsilon Shaped Training Reward Training Reward Update Target Network Evaluation Reward Shaped Evaluation Reward Success Rate Loss/Mean Loss/Stdev Loss/Max Loss/Min Learning Rate/Mean Learning Rate/Stdev Learning Rate/Max Learning Rate/Min Grads (unclipped)/Mean Grads (unclipped)/Stdev Grads (unclipped)/Max Grads (unclipped)/Min Discounted Return/Mean Discounted Return/Stdev Discounted Return/Max Discounted Return/Min Q/Mean Q/Stdev Q/Max Q/Min
2 1 0.0 1.0 909.0 909.0 909.0 909.0 0.0 0.0 -1.9655950472199328 0.7232623981585671 -0.33416993016801505 -3.2856761435887236
3 2 0.0 1.0 1849.0 1849.0 940.0 1849.0 0.0 0.0 -2.0850702358812647 0.5206151988478068 -0.7105532272722921 -3.229858453919355
4 3 191.0 0.0 2612.0 2612.0 763.0 2612.0 0.0 -21.0 -21.0 0.0 0.012449520095178359 0.013129071715587741 0.013048937023542005 0.013057997576226256 0.048593450337648385 0.04954143986105919 0.0007832169649191201 0.0009260990773327649 0.0002500000000000001 5.421010862427521e-20 0.00025 0.00025 0.011991171 0.01426667 0.0049315933 0.0059959437 0.031664677 0.044108193 0.005639204 0.0058790944 -2.5178046202451694 0.5843148195084643 -0.7105532272722921 -3.3699982440767453
5 4 460.0 0.0 3688.0 3688.0 1076.0 3688.0 0.06158000603318215 0.07349783927202225 -19.0 -19.0 0.0 0.013090697613537835 0.014194289974731544 0.01423496703653869 0.014401563172422728 0.06296952068805695 0.07057971507310867 0.0003786738670896739 0.0008334509911946952 0.0002500000000000001 5.421010862427521e-20 0.00025 0.00025 0.01207951 0.013837245 0.0049282457 0.0044770753 0.02845837 0.030951055 0.0044667325 0.0056485827 -1.600130842171531 0.7841052063633746 -0.05879559276456229 -3.1930866215011884 -0.027162252 0.020253738 0.0089824805 0.008782792 -0.0103284065 0.036346525 -0.038634557 0.008415546999999999
6 5 679.0 0.0 4563.0 4563.0 875.0 4563.0 0.0686592236161232 0.0275859609246254 -21.0 -21.0 0.0 0.01267829067451182 0.013575718098917559 0.01314711143634318 0.013540301212523092 0.06590334326028824 0.06875357776880264 0.0006290889577940106 0.0007445287192240357 0.0002500000000000001 1.0842021724855042e-19 0.00025 0.00025 0.012010485 0.013481666000000002 0.0045945253 0.0047130687 0.02634775 0.03047333 0.005362396 0.005769696600000001 -2.206191047518212 0.5577254452906393 -0.7105532272722921 -3.3372597252516765 -0.020140575 -0.007968453 0.010422909000000001 0.008165936 -0.0047886134 0.011851054 -0.037010644 -0.01858479