1
0
mirror of https://github.com/gryf/coach.git synced 2026-04-20 06:33:31 +02:00

Trace tests update

This commit is contained in:
Shadi Endrawis
2018-08-20 13:01:17 +03:00
parent c1f428666e
commit 3abb6cd415
99 changed files with 12876 additions and 39 deletions

View File

@@ -0,0 +1,6 @@
Episode #,Training Iter,In Heatup,ER #Transitions,ER #Episodes,Episode Length,Total steps,Epsilon,Shaped Training Reward,Training Reward,Update Target Network,Evaluation Reward,Shaped Evaluation Reward,Success Rate,Loss/Mean,Loss/Stdev,Loss/Max,Loss/Min,Learning Rate/Mean,Learning Rate/Stdev,Learning Rate/Max,Learning Rate/Min,Grads (unclipped)/Mean,Grads (unclipped)/Stdev,Grads (unclipped)/Max,Grads (unclipped)/Min,Q/Mean,Q/Stdev,Q/Max,Q/Min
1,0.0,1.0,1117.0,1117.0,1117.0,1117.0,1.0,,,0.0,,,,,,,,,,,,,,,,,,,
2,221.0,0.0,2002.0,2002.0,885.0,2002.0,0.9992035000000262,-21.0,-21.0,0.0,,,,0.0066113567236284546,0.003946234120878863,0.016941886395215988,3.0340672310558148e-05,0.0002500000000000001,1.0842021724855042e-19,0.00025,0.00025,0.020578874,0.011285608,0.12838697,0.003849274,,,,
3,455.0,0.0,2938.0,2938.0,936.0,2938.0,0.9983611000000541,-20.0,-20.0,0.0,,,,0.007220610483817191,0.00384386883256313,0.02201320417225361,0.0004259901470504701,0.0002500000000000001,1.0842021724855042e-19,0.00025,0.00025,0.014196658000000001,0.0053113990000000005,0.040406343,0.005419724599999999,-0.012426703,0.021457887999999998,0.023741005,-0.051037904
4,659.0,0.0,3754.0,3754.0,816.0,3754.0,0.997626700000078,-21.0,-21.0,0.0,,,,0.007067595686713306,0.00349683739085928,0.016786431893706318,0.0004974190378561616,0.0002500000000000001,5.421010862427521e-20,0.00025,0.00025,0.012732236999999999,0.0038257977000000004,0.02420173,0.00600734,,,,
5,961.0,0.0,4961.0,4961.0,1207.0,4961.0,0.996540400000114,-18.0,-18.0,0.0,,,,0.007034662726550326,0.003637364351878082,0.022078890353441242,0.0004736386181320995,0.0002500000000000001,5.421010862427521e-20,0.00025,0.00025,0.012767965,0.0043293815,0.031500462,0.0063609104,-0.01521363,0.011859578,0.006441065,-0.04179345
1 Episode # Training Iter In Heatup ER #Transitions ER #Episodes Episode Length Total steps Epsilon Shaped Training Reward Training Reward Update Target Network Evaluation Reward Shaped Evaluation Reward Success Rate Loss/Mean Loss/Stdev Loss/Max Loss/Min Learning Rate/Mean Learning Rate/Stdev Learning Rate/Max Learning Rate/Min Grads (unclipped)/Mean Grads (unclipped)/Stdev Grads (unclipped)/Max Grads (unclipped)/Min Q/Mean Q/Stdev Q/Max Q/Min
2 1 0.0 1.0 1117.0 1117.0 1117.0 1117.0 1.0 0.0
3 2 221.0 0.0 2002.0 2002.0 885.0 2002.0 0.9992035000000262 -21.0 -21.0 0.0 0.0066113567236284546 0.003946234120878863 0.016941886395215988 3.0340672310558148e-05 0.0002500000000000001 1.0842021724855042e-19 0.00025 0.00025 0.020578874 0.011285608 0.12838697 0.003849274
4 3 455.0 0.0 2938.0 2938.0 936.0 2938.0 0.9983611000000541 -20.0 -20.0 0.0 0.007220610483817191 0.00384386883256313 0.02201320417225361 0.0004259901470504701 0.0002500000000000001 1.0842021724855042e-19 0.00025 0.00025 0.014196658000000001 0.0053113990000000005 0.040406343 0.005419724599999999 -0.012426703 0.021457887999999998 0.023741005 -0.051037904
5 4 659.0 0.0 3754.0 3754.0 816.0 3754.0 0.997626700000078 -21.0 -21.0 0.0 0.007067595686713306 0.00349683739085928 0.016786431893706318 0.0004974190378561616 0.0002500000000000001 5.421010862427521e-20 0.00025 0.00025 0.012732236999999999 0.0038257977000000004 0.02420173 0.00600734
6 5 961.0 0.0 4961.0 4961.0 1207.0 4961.0 0.996540400000114 -18.0 -18.0 0.0 0.007034662726550326 0.003637364351878082 0.022078890353441242 0.0004736386181320995 0.0002500000000000001 5.421010862427521e-20 0.00025 0.00025 0.012767965 0.0043293815 0.031500462 0.0063609104 -0.01521363 0.011859578 0.006441065 -0.04179345