1
0
mirror of https://github.com/gryf/coach.git synced 2026-01-06 05:44:14 +01:00

trace tests update

This commit is contained in:
Shadi Endrawis
2018-10-02 17:55:16 +03:00
parent 51726a5b80
commit f7990d4003
79 changed files with 10105 additions and 9539 deletions

View File

@@ -1,6 +1,6 @@
Episode #,Training Iter,In Heatup,ER #Transitions,ER #Episodes,Episode Length,Total steps,Epsilon,Shaped Training Reward,Training Reward,Update Target Network,Evaluation Reward,Shaped Evaluation Reward,Success Rate,Loss/Mean,Loss/Stdev,Loss/Max,Loss/Min,Learning Rate/Mean,Learning Rate/Stdev,Learning Rate/Max,Learning Rate/Min,Grads (unclipped)/Mean,Grads (unclipped)/Stdev,Grads (unclipped)/Max,Grads (unclipped)/Min,Discounted Return/Mean,Discounted Return/Stdev,Discounted Return/Max,Discounted Return/Min,Entropy/Mean,Entropy/Stdev,Entropy/Max,Entropy/Min,Advantages/Mean,Advantages/Stdev,Advantages/Max,Advantages/Min,Values/Mean,Values/Stdev,Values/Max,Values/Min,Value Loss/Mean,Value Loss/Stdev,Value Loss/Max,Value Loss/Min,Policy Loss/Mean,Policy Loss/Stdev,Policy Loss/Max,Policy Loss/Min,Q/Mean,Q/Stdev,Q/Max,Q/Min,TD targets/Mean,TD targets/Stdev,TD targets/Max,TD targets/Min,actions/Mean,actions/Stdev,actions/Max,actions/Min
1,0.0,1.0,97.0,1.0,25.0,25.0,0.0,,,0.0,,,,,,,,,,,,,,,,-480.6903328824848,254.92315277284558,-40.0,-888.7145624034126,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,0.0,1.0,194.0,2.0,25.0,50.0,0.0,,,0.0,,,,,,,,,,,,,,,,-480.6903328824848,254.92315277284558,-40.0,-888.7145624034126,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,0.0,0.0,291.0,3.0,25.0,75.0,-0.013705192291281485,-1000.0,-1000.0,0.0,,,,,,,,,,,,,,,,-480.6903328824848,254.92315277284558,-40.0,-888.7145624034126,,,,,,,,,,,,,,,,,,,,,-0.21267389,0.09754460000000001,-0.103998035,-0.46418846,,,,,-0.2677615218140108,0.9221390139573292,1.6445876359939575,-4.0093758958623535
4,0.0,0.0,388.0,4.0,25.0,100.0,-0.02430443169727376,-1000.0,-1000.0,0.0,,,,,,,,,,,,,,,,-480.6903328824848,254.92315277284558,-40.0,-888.7145624034126,,,,,,,,,,,,,,,,,,,,,-0.5520972,0.33804613,-0.16024278,-1.3700855,,,,,-0.059834032790882175,2.0965905392737207,5.847123362995334,-9.20645523071289
5,0.0,0.0,485.0,5.0,25.0,125.0,0.0,-1000.0,-1000.0,0.0,,,,,,,,,,,,,,,,-480.6903328824848,254.92315277284558,-40.0,-888.7145624034126,,,,,,,,,,,,,,,,,,,,,-0.27497244,0.19105045,-0.08130584,-0.90415394,,,,,-0.3390362190323586,1.3539567588293069,1.7026247944415995,-7.820933549975827
3,0.0,0.0,291.0,3.0,25.0,75.0,-0.013705192291281485,-1000.0,-1000.0,0.0,,,,,,,,,,,,,,,,-480.6903328824848,254.92315277284558,-40.0,-888.7145624034126,,,,,,,,,,,,,,,,,,,,,-0.04823625,0.023806227000000003,0.021652615,-0.10660829,,,,,-0.12859384303253607,0.18325901915279744,0.04875503852963448,-0.7279058694839478
4,0.0,0.0,388.0,4.0,25.0,100.0,-0.02430443169727376,-1000.0,-1000.0,0.0,,,,,,,,,,,,,,,,-480.6903328824848,254.92315277284558,-40.0,-888.7145624034126,,,,,,,,,,,,,,,,,,,,,-0.04093397,0.045165304,0.09869731,-0.120250694,,,,,-0.16543949867939364,0.2667459507740165,0.1111396551132202,-1.4467874369949862
5,0.0,0.0,485.0,5.0,25.0,125.0,0.0,-1000.0,-1000.0,0.0,,,,,,,,,,,,,,,,-480.6903328824848,254.92315277284558,-40.0,-888.7145624034126,,,,,,,,,,,,,,,,,,,,,-0.050059527,0.0325627,0.057268333,-0.10848339,,,,,-0.10842234288811206,0.1848078590593072,0.10341470901523106,-0.6624982986866047
1 Episode # Training Iter In Heatup ER #Transitions ER #Episodes Episode Length Total steps Epsilon Shaped Training Reward Training Reward Update Target Network Evaluation Reward Shaped Evaluation Reward Success Rate Loss/Mean Loss/Stdev Loss/Max Loss/Min Learning Rate/Mean Learning Rate/Stdev Learning Rate/Max Learning Rate/Min Grads (unclipped)/Mean Grads (unclipped)/Stdev Grads (unclipped)/Max Grads (unclipped)/Min Discounted Return/Mean Discounted Return/Stdev Discounted Return/Max Discounted Return/Min Entropy/Mean Entropy/Stdev Entropy/Max Entropy/Min Advantages/Mean Advantages/Stdev Advantages/Max Advantages/Min Values/Mean Values/Stdev Values/Max Values/Min Value Loss/Mean Value Loss/Stdev Value Loss/Max Value Loss/Min Policy Loss/Mean Policy Loss/Stdev Policy Loss/Max Policy Loss/Min Q/Mean Q/Stdev Q/Max Q/Min TD targets/Mean TD targets/Stdev TD targets/Max TD targets/Min actions/Mean actions/Stdev actions/Max actions/Min
2 1 0.0 1.0 97.0 1.0 25.0 25.0 0.0 0.0 -480.6903328824848 254.92315277284558 -40.0 -888.7145624034126
3 2 0.0 1.0 194.0 2.0 25.0 50.0 0.0 0.0 -480.6903328824848 254.92315277284558 -40.0 -888.7145624034126
4 3 0.0 0.0 291.0 3.0 25.0 75.0 -0.013705192291281485 -1000.0 -1000.0 0.0 -480.6903328824848 254.92315277284558 -40.0 -888.7145624034126 -0.21267389 -0.04823625 0.09754460000000001 0.023806227000000003 -0.103998035 0.021652615 -0.46418846 -0.10660829 -0.2677615218140108 -0.12859384303253607 0.9221390139573292 0.18325901915279744 1.6445876359939575 0.04875503852963448 -4.0093758958623535 -0.7279058694839478
5 4 0.0 0.0 388.0 4.0 25.0 100.0 -0.02430443169727376 -1000.0 -1000.0 0.0 -480.6903328824848 254.92315277284558 -40.0 -888.7145624034126 -0.5520972 -0.04093397 0.33804613 0.045165304 -0.16024278 0.09869731 -1.3700855 -0.120250694 -0.059834032790882175 -0.16543949867939364 2.0965905392737207 0.2667459507740165 5.847123362995334 0.1111396551132202 -9.20645523071289 -1.4467874369949862
6 5 0.0 0.0 485.0 5.0 25.0 125.0 0.0 -1000.0 -1000.0 0.0 -480.6903328824848 254.92315277284558 -40.0 -888.7145624034126 -0.27497244 -0.050059527 0.19105045 0.0325627 -0.08130584 0.057268333 -0.90415394 -0.10848339 -0.3390362190323586 -0.10842234288811206 1.3539567588293069 0.1848078590593072 1.7026247944415995 0.10341470901523106 -7.820933549975827 -0.6624982986866047