PERにて weights が更新されない
下記のサンプルスクリプトのように、addするときにpriorities=1e-4、更にupdate_priotiriesするときに同様の値で設定するが、weightsが1のままで更新されない。
import numpy as np
from cpprb.experimental import PrioritizedReplayBuffer
def make_replay_buffer(dim_state, dim_action):
rb_dict = {
"size": int(1e6),
"default_dtype": np.float32,
"env_dict": {
"obs": {
"shape": (dim_state,)},
"next_obs": {
"shape": (dim_state,)},
"act": {
"shape": (dim_action,)},
"rew": {},
"done": {}}}
return PrioritizedReplayBuffer(**rb_dict)
dim_state = 10
dim_action = 3
buf = make_replay_buffer(dim_state, dim_action)
batch_size = 4
obses = np.zeros((batch_size, dim_state), dtype=np.float32)
next_obses = np.copy(obses)
actions = np.ones(shape=(batch_size, dim_action))
dones = np.zeros(shape=(batch_size,), dtype=np.float32)
rews = np.zeros(shape=(batch_size,), dtype=np.float32)
priorities = np.zeros(shape=(batch_size,), dtype=np.float32) + 1e-4
buf.add(
obs=obses, act=actions, rew=rews, next_obs=next_obses,
done=dones, priorities=priorities)
samples = buf.sample(batch_size)
print(samples["weights"])
buf.update_priorities(samples["indexes"], priorities)
samples = buf.sample(batch_size)
print(samples["weights"])