Merge branch 'Feature_Numpy2.0' (9dae17d8) · Commits · Yamada Hiroyuki / cpprb

.github/workflows/build.yml

+6 −0

Original line number	Diff line number	Diff line
		@@ -21,7 +21,11 @@ jobs:
		matrix:
		runs-on: [ubuntu-latest, macos-13, macos-14, windows-latest]
		python: ['3.8', '3.9', '3.10', '3.11', '3.12']
		numpy: ["numpy<2.0.0", "numpy>=2.0.0rc2"]
		gym: ['gym', 'gymnasium']
		exclude:
		- python: "3.8"
		numpy: "numpy>=2.0.0rc2"
		fail-fast: false
		runs-on: ${{ matrix.runs-on }}
		steps:
		@@ -39,6 +43,8 @@ jobs:
		path: ${{ steps.pip-cache.outputs.dir }}
		key: ${{ runner.os }}-pip${{ matrix.python }}
		restore-keys: ${{ runner.os }}-pip${{ matrix.python }}
		- name: Install NumPy
		run: pip install '${{ matrix.numpy }}'
		- name: Install cpprb
		run: pip install '.[all]'
		- name: Install Gym(nasium)

Dockerfile

+5 −2

Original line number	Diff line number	Diff line
		@@ -12,8 +12,11 @@ COPY --from=README /work/README.md /work/README.md
		COPY pyproject.toml setup.py LICENSE MANIFEST.in .
		COPY cpprb cpprb/
		ARG ON_CI
		RUN ON_CI=${ON_CI} find /opt/python -name 'cp*' \
		-exec {}/bin/pip wheel . -w /work/wheel --no-deps \; && \
		RUN ON_CI=${ON_CI} /opt/python/cp38-cp38/bin/pip wheel . -w /work/wheel --no-deps && \
		ON_CI=${ON_CI} /opt/python/cp39-cp39/bin/pip wheel . -w /work/wheel --no-deps && \
		ON_CI=${ON_CI} /opt/python/cp310-cp310/bin/pip wheel . -w /work/wheel --no-deps && \
		ON_CI=${ON_CI} /opt/python/cp311-cp311/bin/pip wheel . -w /work/wheel --no-deps && \
		ON_CI=${ON_CI} /opt/python/cp312-cp312/bin/pip wheel . -w /work/wheel --no-deps && \
		auditwheel repair /work/wheel/cpprb-*.whl -w /dist

cpprb/PyReplayBuffer.pyx

+37 −25

Original line number	Diff line number	Diff line
		@@ -22,6 +22,9 @@ from .VectorWrapper import (VectorWrapper,
		VectorDouble,PointerDouble,VectorFloat)


		COPY_ONLY_NECESSARY: bool \| None = False if int(np.version.version[0]) < 2 else None


		def default_logger(level=INFO):
		"""
		Create default logger for cpprb
		@@ -43,14 +46,17 @@ def default_logger(level=INFO):
		return logger

		cdef double [::1] Cdouble(array):
		return np.ravel(np.array(array,copy=False,dtype=np.double,ndmin=1,order='C'))
		return np.ravel(np.array(array, copy=COPY_ONLY_NECESSARY,
		dtype=np.double, ndmin=1, order='C'))

		cdef inline const size_t [::1] Csize(array):
		return np.ravel(np.array(array,copy=False,dtype=np.uint64,ndmin=1,order='C'))
		return np.ravel(np.array(array, copy=COPY_ONLY_NECESSARY,
		dtype=np.uint64, ndmin=1, order='C'))

		@cython.embedsignature(True)
		cdef inline const float [::1] Cfloat(array):
		return np.ravel(np.array(array,copy=False,dtype=np.single,ndmin=1,order='C'))
		return np.ravel(np.array(array, copy=COPY_ONLY_NECESSARY,
		dtype=np.single, ndmin=1, order='C'))


		def unwrap(d):
		@@ -444,7 +450,7 @@ cdef class SharedBuffer:
		else:
		raise

		len = int(np.array(shape,copy=False,dtype="int").prod())
		len = int(np.array(shape, copy=COPY_ONLY_NECESSARY, dtype="int").prod())
		self.data = RawArray(ctx,ctype,len,self.backend)
		else:
		self.data = data
		@@ -552,7 +558,8 @@ def find_array(dict,key):
		If `dict` has `key`, returns the values with numpy.ndarray with the minimum
		dimension of 1. Otherwise, returns `None`.
		"""
		return None if not key in dict else np.array(dict[key],ndmin=1,copy=False)
		return None if not key in dict else np.array(dict[key], ndmin=1,
		copy=COPY_ONLY_NECESSARY)

		@cython.embedsignature(True)
		cdef class StepChecker:
		@@ -584,7 +591,7 @@ cdef class StepChecker:
		kwargs: dict
		Added values.
		"""
		return np.reshape(np.array(kwargs[self.check_str], copy=False),
		return np.reshape(np.asarray(kwargs[self.check_str]),
		self.check_shape,order='A').shape[0]

		@cython.embedsignature(True)
		@@ -614,7 +621,7 @@ cdef class NstepBuffer:
		self.default_dtype = default_dtype or np.single

		if next_of is not None: # next_of is not support yet.
		for name in np.array(next_of,copy=False,ndmin=1):
		for name in np.array(next_of, copy=COPY_ONLY_NECESSARY, ndmin=1):
		self.env_dict[f"next_{name}"] = self.env_dict[name]

		self.Nstep_size = Nstep["size"]
		@@ -784,7 +791,7 @@ cdef class NstepBuffer:

		cdef _extract(self,kwargs,name):
		_dict = self.env_dict[name]
		return np.reshape(np.array(kwargs[name],copy=False,ndmin=2,
		return np.reshape(np.array(kwargs[name], copy=COPY_ONLY_NECESSARY, ndmin=2,
		dtype=_dict.get("dtype",self.default_dtype)),
		_dict["add_shape"])

		@@ -963,13 +970,13 @@ cdef class ReplayBuffer:
		self.episode_len = 0

		self.compress_any = stack_compress
		self.stack_compress = np.array(stack_compress,ndmin=1,copy=False)
		self.stack_compress = np.array(stack_compress, ndmin=1, copy=COPY_ONLY_NECESSARY)

		self.default_dtype = default_dtype or np.single

		self.has_next_of = next_of
		self.next_of = np.array(next_of,
		ndmin=1,copy=False) if self.has_next_of else None
		self.next_of = np.array(next_of, ndmin=1,
		copy=COPY_ONLY_NECESSARY) if self.has_next_of else None
		self.next_ = {}
		self.cache = {} if (self.has_next_of or self.compress_any) else None

		@@ -1001,7 +1008,7 @@ cdef class ReplayBuffer:
		for name in self.stack_compress:
		self.cache_size = max(self.cache_size,
		np.array(self.env_dict[name]["shape"],
		ndmin=1,copy=False)[-1] -1)
		ndmin=1, copy=COPY_ONLY_NECESSARY)[-1] -1)

		if self.has_next_of:
		self.cache_size += 1
		@@ -1164,13 +1171,14 @@ cdef class ReplayBuffer:
		self.add_cache_i(key, index)

		for name, b in self.buffer.items():
		b[add_idx] = np.reshape(np.array(kwargs[name],copy=False,ndmin=2),
		b[add_idx] = np.reshape(np.array(kwargs[name],
		copy=COPY_ONLY_NECESSARY, ndmin=2),
		self.env_dict[name]["add_shape"])

		if self.has_next_of:
		for name in self.next_of:
		self.next_[name][...]=np.reshape(np.array(kwargs[f"next_{name}"],
		copy=False,
		copy=COPY_ONLY_NECESSARY,
		ndmin=2),
		self.env_dict[name]["add_shape"])[-1]

		@@ -1324,7 +1332,7 @@ cdef class ReplayBuffer:
		cdef cache_idx
		cdef bool use_cache

		idx = np.array(idx,copy=False,ndmin=1)
		idx = np.array(idx,copy=COPY_ONLY_NECESSARY, ndmin=1)
		for name, b in self.buffer.items():
		sample[name] = b[idx]

		@@ -1582,7 +1590,7 @@ cdef class PrioritizedReplayBuffer(ReplayBuffer):
		self.check_for_update = check_for_update
		if self.check_for_update:
		self.unchange_since_sample = np.ones(np.array(size,
		copy=False,
		copy=COPY_ONLY_NECESSARY,
		dtype='int'),
		dtype='bool')

		@@ -1659,7 +1667,7 @@ cdef class PrioritizedReplayBuffer(ReplayBuffer):
		"""
		cdef size_t N = self.size_check.step_size(kwargs)
		if priorities is not None:
		priorities = np.ravel(np.array(priorities,copy=False,
		priorities = np.ravel(np.array(priorities, copy=COPY_ONLY_NECESSARY,
		ndmin=1, dtype=np.single))
		if N != priorities.shape[0]:
		raise ValueError("`priorities` shape is incompatible")
		@@ -1683,7 +1691,8 @@ cdef class PrioritizedReplayBuffer(ReplayBuffer):
		cdef const float [:] ps

		if priorities is not None:
		ps = np.ravel(np.array(priorities,copy=False,ndmin=1,dtype=np.single))
		ps = np.ravel(np.array(priorities, copy=COPY_ONLY_NECESSARY,
		ndmin=1, dtype=np.single))
		self.per.set_priorities(index,&ps[0],N,self.get_buffer_size())
		else:
		self.per.set_priorities(index,N,self.get_buffer_size())
		@@ -2098,7 +2107,8 @@ cdef class MPReplayBuffer:
		self._lock_explorer()

		for name, b in self.buffer.items():
		b[add_idx] = np.reshape(np.array(kwargs[name],copy=False,ndmin=2),
		b[add_idx] = np.reshape(np.array(kwargs[name], ndmin=2,
		copy=COPY_ONLY_NECESSARY),
		self.env_dict[name]["add_shape"])

		self._unlock_explorer()
		@@ -2132,7 +2142,7 @@ cdef class MPReplayBuffer:
		def _encode_sample(self,idx):
		cdef sample = {}

		idx = np.array(idx,copy=False,ndmin=1)
		idx = np.array(idx, copy=COPY_ONLY_NECESSARY, ndmin=1)

		for name, b in self.buffer.items():
		sample[name] = b[idx]
		@@ -2454,8 +2464,8 @@ cdef class MPPrioritizedReplayBuffer(MPReplayBuffer):
		cdef const float [:] ps

		if priorities is not None:
		priorities = np.ravel(np.array(priorities,copy=False,
		ndmin=1,dtype=np.single))
		priorities = np.ravel(np.array(priorities, ndmin=1, dtype=np.single,
		copy=COPY_ONLY_NECESSARY))
		if N != priorities.shape[0]:
		raise ValueError("`priorities` shape is incompatible")

		@@ -2470,7 +2480,8 @@ cdef class MPPrioritizedReplayBuffer(MPReplayBuffer):
		self._lock_explorer_per()

		if priorities is not None:
		ps = np.ravel(np.array(priorities,copy=False,ndmin=1,dtype=np.single))
		ps = np.ravel(np.array(priorities, copy=COPY_ONLY_NECESSARY,
		ndmin=1, dtype=np.single))
		self.per.ptr().set_priorities(index,&ps[0],N,self.get_buffer_size())
		else:
		self.per.ptr().set_priorities(index,N,self.get_buffer_size())
		@@ -2485,7 +2496,8 @@ cdef class MPPrioritizedReplayBuffer(MPReplayBuffer):
		self._unlock_explorer_per()

		for name, b in self.buffer.items():
		b[add_idx] = np.reshape(np.array(kwargs[name],copy=False,ndmin=2),
		b[add_idx] = np.reshape(np.array(kwargs[name], ndmin=2,
		copy=COPY_ONLY_NECESSARY),
		self.env_dict[name]["add_shape"])

		self._unlock_explorer()

pyproject.toml

+27 −2

Original line number	Diff line number	Diff line
		@@ -18,21 +18,45 @@ dependencies = [
		DEBUG_CPPRB = "1"

		[tool.hatch.envs.test.scripts]
		install = "pip install -e ."
		run = "python -m unittest discover test"
		run-cov = "coverage run -m unittest discover test"

		[[tool.hatch.envs.test.matrix]]
		python = ["3.8", "3.9", "3.10", "3.11", "3.12"]
		python = ["3.9", "3.10", "3.11", "3.12"]
		gym = ["gym", "gymnasium"]
		numpy = ["numpy1", "numpy2"]

		[tool.hatch.envs.test.overrides]
		matrix.python.dependencies = [
		{ value = "ray", if = ["3.8", "3.9", "3.10", "3.11"] },
		{ value = "ray", if = ["3.9", "3.10", "3.11"] },
		]
		matrix.gym.dependencies = [
		{ value = "gym", if = ["gym"] },
		{ value = "gymnasium", if = ["gymnasium"] },
		]
		matrix.numpy.dependencies = [
		{ value = "numpy<2.0.0", if = ["numpy1"] },
		{ value = "numpy>=2.0.0rc2", if = ["numpy2"] },
		]

		[tool.hatch.envs.test-38]
		template = "test"
		python = "3.8"
		extra-dependencies = [
		"numpy<2.0.0",
		"ray",
		]

		[[tool.hatch.envs.test-38.matrix]]
		gym = ["gym", "gymnasium"]

		[tool.hatch.envs.test-38.overrides]
		matrix.gym.dependencies = [
		{ value = "gym", if = ["gym"] },
		{ value = "gymnasium", if = ["gymnasium"] },
		]


		[tool.hatch.envs.cov]
		dependencies = [
		@@ -77,6 +101,7 @@ extend-exclude = [
		]

		[tool.ruff.lint]
		select = ["NPY201"]
		ignore = [
		"PT009", # We still use standard unittest
		"N999" , # This breaks API compatibility

run-test.sh

+6 −0

Original line number	Diff line number	Diff line
		@@ -3,7 +3,13 @@
		set -eux

		hatch run cov:erase

		hatch run test:install
		hatch run test-38:install

		hatch run test:run-cov
		hatch run test-38:run-cov

		hatch run cov:combine
		hatch run cov:report