1
0
mirror of https://github.com/gryf/coach.git synced 2026-04-08 06:03:33 +02:00

new traces

This commit is contained in:
itaicaspi-intel
2018-09-12 15:29:42 +03:00
parent 673911ff7f
commit fa4895f840
76 changed files with 12786 additions and 12606 deletions

View File

@@ -1,6 +1,6 @@
Episode #,Training Iter,In Heatup,ER #Transitions,ER #Episodes,Episode Length,Total steps,Epsilon,Shaped Training Reward,Training Reward,Update Target Network,Evaluation Reward,Shaped Evaluation Reward,Success Rate,Loss/Mean,Loss/Stdev,Loss/Max,Loss/Min,Learning Rate/Mean,Learning Rate/Stdev,Learning Rate/Max,Learning Rate/Min,Grads (unclipped)/Mean,Grads (unclipped)/Stdev,Grads (unclipped)/Max,Grads (unclipped)/Min,Q/Mean,Q/Stdev,Q/Max,Q/Min
1,0.0,1.0,1117.0,1117.0,1117.0,1117.0,1.0,,,0.0,,,,,,,,,,,,,,,,,,,
2,205.0,0.0,1937.0,1937.0,820.0,1937.0,0.9991882000000176,-21.0,-21.0,0.0,,,,0.013271789207150194,0.014381215654183937,0.08661144971847534,7.284892490133643e-05,0.0002500000000000001,1.0842021724855042e-19,0.00025,0.00025,0.09793413,0.109029554,1.2459028,0.010081228000000001,,,,
3,413.0,0.0,2768.0,2768.0,831.0,2768.0,0.9983655100000356,-21.0,-21.0,0.0,,,,0.013095782662258687,0.014563835652836424,0.09017306566238403,4.85398450109642e-05,0.0002500000000000001,1.0842021724855042e-19,0.00025,0.00025,0.06699568,0.10204898,0.9738844000000001,0.005621953000000001,-0.06337769,0.006071376999999999,-0.05691424,-0.07540042
4,667.0,0.0,3783.0,3783.0,1015.0,3783.0,0.9973606600000572,-20.0,-20.0,0.0,,,,0.014243900448040163,0.012460161619208224,0.05600857362151146,8.375291145057417e-06,0.0002500000000000001,5.421010862427521e-20,0.00025,0.00025,0.08014218,0.05026457,0.24418142,0.0018464670999999999,-0.08484802400000001,0.007937772,-0.07532068,-0.09821871
5,867.0,0.0,4585.0,4585.0,802.0,4585.0,0.9965666800000744,-21.0,-21.0,0.0,,,,0.0149451127843804,0.012661744241431476,0.057885006070137024,2.08603323699208e-05,0.0002500000000000001,5.421010862427521e-20,0.00025,0.00025,0.084665276,0.07432766,0.39534,0.0034519034000000002,-0.09767585,0.029707237999999997,-0.061746947,-0.13731477
Episode #,Training Iter,In Heatup,ER #Transitions,ER #Episodes,Episode Length,Total steps,Epsilon,Shaped Training Reward,Training Reward,Update Target Network,Evaluation Reward,Shaped Evaluation Reward,Success Rate,Loss/Mean,Loss/Stdev,Loss/Max,Loss/Min,Learning Rate/Mean,Learning Rate/Stdev,Learning Rate/Max,Learning Rate/Min,Grads (unclipped)/Mean,Grads (unclipped)/Stdev,Grads (unclipped)/Max,Grads (unclipped)/Min,Discounted Return/Mean,Discounted Return/Stdev,Discounted Return/Max,Discounted Return/Min,Q/Mean,Q/Stdev,Q/Max,Q/Min
1,0.0,1.0,1117.0,1117.0,1117.0,1117.0,1.0,,,0.0,,,,,,,,,,,,,,,,-1.5180229894995567,0.6998808293377133,-0.08930329112720292,-3.148474706421977,,,,
2,205.0,0.0,1937.0,1937.0,820.0,1937.0,0.9991882000000176,-21.0,-21.0,0.0,,,,0.012876360744334441,0.013169937079938372,0.058244716376066215,3.0656796297989786e-05,0.0002500000000000001,1.0842021724855042e-19,0.00025,0.00025,0.08459491,0.07130137,0.5982875,0.0070038475,-2.3361342922088504,0.784322378590693,-0.38878391807422696,-3.369599601005491,,,,
3,413.0,0.0,2768.0,2768.0,831.0,2768.0,0.9983655100000356,-21.0,-21.0,0.0,,,,0.013282082582778595,0.013458619568678002,0.08588293939828873,0.00010849477257579564,0.0002500000000000001,1.0842021724855042e-19,0.00025,0.00025,0.05503054,0.028424903999999997,0.1483146,0.009207340999999999,-2.320394201181889,0.6047235028955231,-0.7105532272722921,-3.350537576335216,-0.015621655,0.008273302,-0.0005000639,-0.026964545
4,667.0,0.0,3783.0,3783.0,1015.0,3783.0,0.9973606600000572,-20.0,-20.0,0.0,,,,0.013194311608587424,0.012947686004709887,0.054273433983325965,7.553023169748485e-05,0.0002500000000000001,5.421010862427521e-20,0.00025,0.00025,0.051557817,0.026205682999999997,0.15292059,0.007897207,-1.7531357837449677,0.7448577440634202,-0.1288331810939122,-3.2971074888190803,-0.03597512,0.008356134,-0.020397818,-0.04812158
5,867.0,0.0,4585.0,4585.0,802.0,4585.0,0.9965666800000744,-21.0,-21.0,0.0,,,,0.012997063024049566,0.012349733954071369,0.05852360650897026,3.451433076406829e-05,0.0002500000000000001,5.421010862427521e-20,0.00025,0.00025,0.055003084,0.041335538,0.28613466,0.0044473526,-2.406465837413259,0.5636980823469648,-0.7105532272722921,-3.36383697254212,-0.07864461,0.018148461,-0.04457518,-0.106537335
1 Episode # Training Iter In Heatup ER #Transitions ER #Episodes Episode Length Total steps Epsilon Shaped Training Reward Training Reward Update Target Network Evaluation Reward Shaped Evaluation Reward Success Rate Loss/Mean Loss/Stdev Loss/Max Loss/Min Learning Rate/Mean Learning Rate/Stdev Learning Rate/Max Learning Rate/Min Grads (unclipped)/Mean Grads (unclipped)/Stdev Grads (unclipped)/Max Grads (unclipped)/Min Discounted Return/Mean Discounted Return/Stdev Discounted Return/Max Discounted Return/Min Q/Mean Q/Stdev Q/Max Q/Min
2 1 0.0 1.0 1117.0 1117.0 1117.0 1117.0 1.0 0.0 -1.5180229894995567 0.6998808293377133 -0.08930329112720292 -3.148474706421977
3 2 205.0 0.0 1937.0 1937.0 820.0 1937.0 0.9991882000000176 -21.0 -21.0 0.0 0.013271789207150194 0.012876360744334441 0.014381215654183937 0.013169937079938372 0.08661144971847534 0.058244716376066215 7.284892490133643e-05 3.0656796297989786e-05 0.0002500000000000001 1.0842021724855042e-19 0.00025 0.00025 0.09793413 0.08459491 0.109029554 0.07130137 1.2459028 0.5982875 0.010081228000000001 0.0070038475 -2.3361342922088504 0.784322378590693 -0.38878391807422696 -3.369599601005491
4 3 413.0 0.0 2768.0 2768.0 831.0 2768.0 0.9983655100000356 -21.0 -21.0 0.0 0.013095782662258687 0.013282082582778595 0.014563835652836424 0.013458619568678002 0.09017306566238403 0.08588293939828873 4.85398450109642e-05 0.00010849477257579564 0.0002500000000000001 1.0842021724855042e-19 0.00025 0.00025 0.06699568 0.05503054 0.10204898 0.028424903999999997 0.9738844000000001 0.1483146 0.005621953000000001 0.009207340999999999 -2.320394201181889 0.6047235028955231 -0.7105532272722921 -3.350537576335216 -0.06337769 -0.015621655 0.006071376999999999 0.008273302 -0.05691424 -0.0005000639 -0.07540042 -0.026964545
5 4 667.0 0.0 3783.0 3783.0 1015.0 3783.0 0.9973606600000572 -20.0 -20.0 0.0 0.014243900448040163 0.013194311608587424 0.012460161619208224 0.012947686004709887 0.05600857362151146 0.054273433983325965 8.375291145057417e-06 7.553023169748485e-05 0.0002500000000000001 5.421010862427521e-20 0.00025 0.00025 0.08014218 0.051557817 0.05026457 0.026205682999999997 0.24418142 0.15292059 0.0018464670999999999 0.007897207 -1.7531357837449677 0.7448577440634202 -0.1288331810939122 -3.2971074888190803 -0.08484802400000001 -0.03597512 0.007937772 0.008356134 -0.07532068 -0.020397818 -0.09821871 -0.04812158
6 5 867.0 0.0 4585.0 4585.0 802.0 4585.0 0.9965666800000744 -21.0 -21.0 0.0 0.0149451127843804 0.012997063024049566 0.012661744241431476 0.012349733954071369 0.057885006070137024 0.05852360650897026 2.08603323699208e-05 3.451433076406829e-05 0.0002500000000000001 5.421010862427521e-20 0.00025 0.00025 0.084665276 0.055003084 0.07432766 0.041335538 0.39534 0.28613466 0.0034519034000000002 0.0044473526 -2.406465837413259 0.5636980823469648 -0.7105532272722921 -3.36383697254212 -0.09767585 -0.07864461 0.029707237999999997 0.018148461 -0.061746947 -0.04457518 -0.13731477 -0.106537335