Nstep の定義が間違っている
[今]
nstep = 4
R_t + \gamma R_{t+1} + \gamma ^2 R_{t+2} + \gamma ^3 R_{t+3} + \gamma ^4 R_{t+4}
[正]
nstep = 4
R_t + \gamma R_{t+1} + \gamma ^2 R_{t+2} + \gamma ^3 R_{t+3}
[今]
nstep = 4
R_t + \gamma R_{t+1} + \gamma ^2 R_{t+2} + \gamma ^3 R_{t+3} + \gamma ^4 R_{t+4}
[正]
nstep = 4
R_t + \gamma R_{t+1} + \gamma ^2 R_{t+2} + \gamma ^3 R_{t+3}