1
0
mirror of https://github.com/gryf/coach.git synced 2026-02-28 13:25:46 +01:00

new traces

This commit is contained in:
itaicaspi-intel
2018-09-12 15:29:42 +03:00
parent 673911ff7f
commit fa4895f840
76 changed files with 12786 additions and 12606 deletions

View File

@@ -1,6 +1,6 @@
Episode #,Training Iter,In Heatup,ER #Transitions,ER #Episodes,Episode Length,Total steps,Epsilon,Shaped Training Reward,Training Reward,Update Target Network,Evaluation Reward,Shaped Evaluation Reward,Success Rate,Loss/Mean,Loss/Stdev,Loss/Max,Loss/Min,Learning Rate/Mean,Learning Rate/Stdev,Learning Rate/Max,Learning Rate/Min,Grads (unclipped)/Mean,Grads (unclipped)/Stdev,Grads (unclipped)/Max,Grads (unclipped)/Min,Entropy/Mean,Entropy/Stdev,Entropy/Max,Entropy/Min,Advantages/Mean,Advantages/Stdev,Advantages/Max,Advantages/Min,Values/Mean,Values/Stdev,Values/Max,Values/Min,Value Loss/Mean,Value Loss/Stdev,Value Loss/Max,Value Loss/Min,Policy Loss/Mean,Policy Loss/Stdev,Policy Loss/Max,Policy Loss/Min,Q/Mean,Q/Stdev,Q/Max,Q/Min,TD targets/Mean,TD targets/Stdev,TD targets/Max,TD targets/Min,actions/Mean,actions/Stdev,actions/Max,actions/Min
1,0.0,1.0,1000.0,1.0,1000.0,1000.0,0.0,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,0.0,1.0,2000.0,2.0,1000.0,2000.0,0.0,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,999.0,0.0,3000.0,3.0,1000.0,3000.0,-0.017666830179174003,0.0,0.0,1.0,,,,0.005126546850151572,0.004660130005352106,0.05132860690355301,0.0005627279169857502,0.00010000000000000003,4.0657581468206416e-20,0.0001,0.0001,0.1678458,0.12852536,1.5926425,0.024081124,,,,,,,,,,,,,,,,,,,,,0.24388833,0.11236252,0.42840713,-0.8727883,-0.01800971130044855,0.1942566799457346,0.5385999780893326,-0.9172834092378616,-0.11966089038501045,0.8962365587209448,1.3716363433793126,-1.5680451743766328
4,1999.0,0.0,4000.0,4.0,1000.0,4000.0,-0.039999362478752916,0.0,0.0,1.0,,,,0.0008180646479820358,0.000529273102626917,0.0054473504424095145,0.00014673141413368285,0.00010000000000000003,4.0657581468206416e-20,0.0001,0.0001,0.0469651,0.025094092000000002,0.22221590000000002,0.010784525,,,,,,,,,,,,,,,,,,,,,0.14337498,0.17592207,0.33719423,-0.28446856,0.20208258858056294,0.13431578391837634,0.5768654608726501,-0.5833876812458039,-0.2705900161928217,0.9272508528236816,1.9572209345620784,-1.4727463554915825
5,2999.0,0.0,5000.0,5.0,1000.0,5000.0,0.17145601483403705,0.0,0.0,0.0,,,,0.0003958249435308753,0.00031769597300822634,0.0040870513767004004,0.00010442566417623311,0.00010000000000000003,4.0657581468206416e-20,0.0001,0.0001,0.025218817999999997,0.013975793500000002,0.14064097,0.0070197446999999994,,,,,,,,,,,,,,,,,,,,,-0.04435015,0.030164617999999997,0.124313995,-0.2207815,0.26081367032274555,0.12723809202247516,0.5931554335355759,-0.2620022776722908,-0.2959362262223715,0.6939703144112135,1.0669463809202309,-1.416814717430604
Episode #,Training Iter,In Heatup,ER #Transitions,ER #Episodes,Episode Length,Total steps,Epsilon,Shaped Training Reward,Training Reward,Update Target Network,Evaluation Reward,Shaped Evaluation Reward,Success Rate,Loss/Mean,Loss/Stdev,Loss/Max,Loss/Min,Learning Rate/Mean,Learning Rate/Stdev,Learning Rate/Max,Learning Rate/Min,Grads (unclipped)/Mean,Grads (unclipped)/Stdev,Grads (unclipped)/Max,Grads (unclipped)/Min,Discounted Return/Mean,Discounted Return/Stdev,Discounted Return/Max,Discounted Return/Min,Entropy/Mean,Entropy/Stdev,Entropy/Max,Entropy/Min,Advantages/Mean,Advantages/Stdev,Advantages/Max,Advantages/Min,Values/Mean,Values/Stdev,Values/Max,Values/Min,Value Loss/Mean,Value Loss/Stdev,Value Loss/Max,Value Loss/Min,Policy Loss/Mean,Policy Loss/Stdev,Policy Loss/Max,Policy Loss/Min,Q/Mean,Q/Stdev,Q/Max,Q/Min,TD targets/Mean,TD targets/Stdev,TD targets/Max,TD targets/Min,actions/Mean,actions/Stdev,actions/Max,actions/Min
1,0.0,1.0,1000.0,1.0,1000.0,1000.0,0.0,,,0.0,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,0.0,1.0,2000.0,2.0,1000.0,2000.0,0.0,,,1.0,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,999.0,0.0,3000.0,3.0,1000.0,3000.0,-0.017666830179174003,0.0,0.0,1.0,,,,0.0035039535888519192,0.0036899070860429047,0.0448087714612484,0.0006857202388346195,0.00010000000000000003,4.0657581468206416e-20,0.0001,0.0001,0.15666896,0.14784017,1.7027674,0.027932363999999998,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,0.28616783,0.1527039,0.82195866,-0.052169282000000004,0.12254468997797058,0.14238915773279914,0.9096015518903732,-0.459991791844368,0.07060378249719693,0.9005342625617344,1.6849354556578502,-1.5135477528043664
4,1999.0,0.0,4000.0,4.0,1000.0,4000.0,-0.039999362478752916,0.0,0.0,1.0,,,,0.0023975625592941143,0.0020512967193832047,0.03644856810569763,0.0006296735955402255,0.00010000000000000003,4.0657581468206416e-20,0.0001,0.0001,0.10159315,0.08087424,1.1576957,0.023222,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,0.24290629,0.11069059,0.7463965,-0.0065027475,0.2942720267621687,0.14379040300676074,1.032085758447647,-0.2381104633212089,0.4210885653083537,0.7995310847152353,1.7607054373862634,-1.3770179740736286
5,2999.0,0.0,5000.0,5.0,1000.0,5000.0,0.17145601483403705,0.0,0.0,0.0,,,,0.0013986770755837895,0.0011338123535962153,0.018688598647713658,0.00039429025491699576,0.00010000000000000003,4.0657581468206416e-20,0.0001,0.0001,0.057997093,0.035725467000000004,0.5853672,0.015558452,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,0.19842595,0.07881614,0.6942527,-0.16243972,0.367736402077882,0.1450020289455392,1.0550748002529144,-0.04278800502419472,-0.009665988609877104,0.8992571498573005,1.8188017021946057,-1.4079581912324373
1 Episode # Training Iter In Heatup ER #Transitions ER #Episodes Episode Length Total steps Epsilon Shaped Training Reward Training Reward Update Target Network Evaluation Reward Shaped Evaluation Reward Success Rate Loss/Mean Loss/Stdev Loss/Max Loss/Min Learning Rate/Mean Learning Rate/Stdev Learning Rate/Max Learning Rate/Min Grads (unclipped)/Mean Grads (unclipped)/Stdev Grads (unclipped)/Max Grads (unclipped)/Min Discounted Return/Mean Discounted Return/Stdev Discounted Return/Max Discounted Return/Min Entropy/Mean Entropy/Stdev Entropy/Max Entropy/Min Advantages/Mean Advantages/Stdev Advantages/Max Advantages/Min Values/Mean Values/Stdev Values/Max Values/Min Value Loss/Mean Value Loss/Stdev Value Loss/Max Value Loss/Min Policy Loss/Mean Policy Loss/Stdev Policy Loss/Max Policy Loss/Min Q/Mean Q/Stdev Q/Max Q/Min TD targets/Mean TD targets/Stdev TD targets/Max TD targets/Min actions/Mean actions/Stdev actions/Max actions/Min
2 1 0.0 1.0 1000.0 1.0 1000.0 1000.0 0.0 0.0 0.0 0.0 0.0 0.0
3 2 0.0 1.0 2000.0 2.0 1000.0 2000.0 0.0 1.0 0.0 0.0 0.0 0.0
4 3 999.0 0.0 3000.0 3.0 1000.0 3000.0 -0.017666830179174003 0.0 0.0 1.0 0.005126546850151572 0.0035039535888519192 0.004660130005352106 0.0036899070860429047 0.05132860690355301 0.0448087714612484 0.0005627279169857502 0.0006857202388346195 0.00010000000000000003 4.0657581468206416e-20 0.0001 0.0001 0.1678458 0.15666896 0.12852536 0.14784017 1.5926425 1.7027674 0.024081124 0.027932363999999998 0.0 0.0 0.0 0.0 0.24388833 0.28616783 0.11236252 0.1527039 0.42840713 0.82195866 -0.8727883 -0.052169282000000004 -0.01800971130044855 0.12254468997797058 0.1942566799457346 0.14238915773279914 0.5385999780893326 0.9096015518903732 -0.9172834092378616 -0.459991791844368 -0.11966089038501045 0.07060378249719693 0.8962365587209448 0.9005342625617344 1.3716363433793126 1.6849354556578502 -1.5680451743766328 -1.5135477528043664
5 4 1999.0 0.0 4000.0 4.0 1000.0 4000.0 -0.039999362478752916 0.0 0.0 1.0 0.0008180646479820358 0.0023975625592941143 0.000529273102626917 0.0020512967193832047 0.0054473504424095145 0.03644856810569763 0.00014673141413368285 0.0006296735955402255 0.00010000000000000003 4.0657581468206416e-20 0.0001 0.0001 0.0469651 0.10159315 0.025094092000000002 0.08087424 0.22221590000000002 1.1576957 0.010784525 0.023222 0.0 0.0 0.0 0.0 0.14337498 0.24290629 0.17592207 0.11069059 0.33719423 0.7463965 -0.28446856 -0.0065027475 0.20208258858056294 0.2942720267621687 0.13431578391837634 0.14379040300676074 0.5768654608726501 1.032085758447647 -0.5833876812458039 -0.2381104633212089 -0.2705900161928217 0.4210885653083537 0.9272508528236816 0.7995310847152353 1.9572209345620784 1.7607054373862634 -1.4727463554915825 -1.3770179740736286
6 5 2999.0 0.0 5000.0 5.0 1000.0 5000.0 0.17145601483403705 0.0 0.0 0.0 0.0003958249435308753 0.0013986770755837895 0.00031769597300822634 0.0011338123535962153 0.0040870513767004004 0.018688598647713658 0.00010442566417623311 0.00039429025491699576 0.00010000000000000003 4.0657581468206416e-20 0.0001 0.0001 0.025218817999999997 0.057997093 0.013975793500000002 0.035725467000000004 0.14064097 0.5853672 0.0070197446999999994 0.015558452 0.0 0.0 0.0 0.0 -0.04435015 0.19842595 0.030164617999999997 0.07881614 0.124313995 0.6942527 -0.2207815 -0.16243972 0.26081367032274555 0.367736402077882 0.12723809202247516 0.1450020289455392 0.5931554335355759 1.0550748002529144 -0.2620022776722908 -0.04278800502419472 -0.2959362262223715 -0.009665988609877104 0.6939703144112135 0.8992571498573005 1.0669463809202309 1.8188017021946057 -1.416814717430604 -1.4079581912324373