Episode #,Training Iter,In Heatup,ER #Transitions,ER #Episodes,Episode Length,Total steps,Epsilon,Shaped Training Reward,Training Reward,Update Target Network,Evaluation Reward,Shaped Evaluation Reward,Success Rate,Loss/Mean,Loss/Stdev,Loss/Max,Loss/Min,Learning Rate/Mean,Learning Rate/Stdev,Learning Rate/Max,Learning Rate/Min,Grads (unclipped)/Mean,Grads (unclipped)/Stdev,Grads (unclipped)/Max,Grads (unclipped)/Min,Q/Mean,Q/Stdev,Q/Max,Q/Min 1,0.0,1.0,1117.0,1117.0,1117.0,1117.0,1.0,,,0.0,,,,,,,,,,,,,,,,,,, 2,221.0,0.0,2002.0,2002.0,885.0,2002.0,0.999123850000019,-21.0,-21.0,0.0,,,,0.006624795104714366,0.00394576811971849,0.01863841339945793,6.383289291989058e-05,6.250000000000003e-05,2.7105054312137605e-20,6.25e-05,6.25e-05,0.032127135,0.014603343000000001,0.12838697,0.005512589,,,, 3,455.0,0.0,2938.0,2938.0,936.0,2938.0,0.9981972100000392,-20.0,-20.0,0.0,,,,0.006993958523544746,0.0031627418936934102,0.01826494000852108,0.000633664894849062,6.250000000000003e-05,2.7105054312137605e-20,6.25e-05,6.25e-05,0.026382675,0.010049541,0.06018944,0.009578557,-0.08102258,0.054663535,-0.0028564844,-0.15667786 4,659.0,0.0,3754.0,3754.0,816.0,3754.0,0.9973893700000568,-21.0,-21.0,0.0,,,,0.00653242061713065,0.0030014368076197325,0.014597361907362938,3.4910688555100926e-05,6.250000000000001e-05,1.3552527156068802e-20,6.25e-05,6.25e-05,0.019908648,0.0060336159999999995,0.03786578,0.003926692,,,, 5,906.0,0.0,4739.0,4739.0,985.0,4739.0,0.9964142200000778,-20.0,-20.0,0.0,,,,0.005325366398493989,0.00258031872854336,0.01823988556861877,6.391682836692779e-05,6.250000000000003e-05,2.7105054312137605e-20,6.25e-05,6.25e-05,0.016708475,0.006444646,0.051227405999999996,0.0036940586,-0.042256642000000004,0.010646114,-0.030611286,-0.06712968