Merge branch '2.0.x' into 'master' (b76573fa) · Commits · datafold-dev / datafold

.gitlab-ci.yml

+6 −6

Original line number	Diff line number	Diff line
		@@ -132,17 +132,17 @@ pages:


		# For infos on testPyPI see https://packaging.python.org/guides/using-testpypi/
		# Note that
		# Note that130b057dc06f8e472b83627cafbfb9e0f57c56f91685d61c58f39645fda7a94f
		# 1) It is required to set the variables TWINE_USERNAME and TWINE_PASSWORD in gitlab CI/CD
		# 2) The database for TestPyPI may be periodically pruned, so it can happen that user
		# accounts are deleted on testPyPI.
		# 3) It is best to use the same account and password in testPyPI as for the "true PyPI".
		# This way the twine variables set up in point 1) can be re-used.
		pypi_upload_test:
		stage: deploy
		script:
		- make test_pypi
		when: manual
		#pypi_upload_test:
		# stage: deploy
		# script:
		# - make test_pypi
		# when: manual

		pypi_upload:
		stage: deploy

.pre-commit-config.yaml

+6 −6

Original line number	Diff line number	Diff line
		repos:

		- repo: https://github.com/pre-commit/pre-commit-hooks
		rev: v4.4.0
		rev: v4.5.0
		hooks:
		- id: end-of-file-fixer
		exclude: .ipynb
		@@ -26,18 +26,18 @@ repos:
		# - id: gitlab-ci-linter

		- repo: https://github.com/asottile/pyupgrade
		rev: v3.10.1
		rev: v3.15.0
		hooks:
		- id: pyupgrade
		args: [ --py39-plus ]

		- repo: https://github.com/python/black
		rev: 23.10.1
		rev: 23.12.0
		hooks:
		- id: black

		- repo: https://github.com/pycqa/isort
		rev: 5.12.0
		rev: 5.13.2
		hooks:
		- id: isort

		@@ -67,11 +67,11 @@ repos:
		- id: nbqa-black
		additional_dependencies: [black==23.10.1]
		- id: nbqa-ruff
		additional_dependencies: [ruff==0.0.286]
		additional_dependencies: [ruff==0.1.8]
		args: [--ignore=B018, --ignore=W605]

		- repo: https://github.com/pre-commit/mirrors-mypy
		rev: v1.0.1
		rev: v1.7.1
		hooks:
		- id: mypy
		args: [--ignore-missing-imports, --no-implicit-optional, --allow-redefinition]

CONTRIBUTORS

+1 −0

Original line number	Diff line number	Diff line
		@@ -4,6 +4,7 @@ Software maintainer
		Last name, First name; <email>; Years of contribution
		Cukarska, Ana; <ana.cukarska@tum.de>; 2026-
		Dietrich, Felix; <felix.dietrich@tum.de>; 2019-
		Kern, Sabrina; <sabrina.kern@hm.edu>; 2026-
		Lehmberg, Daniel; <daniel.lehmberg@hm.edu>; 2019-

datafold/appfold/edmd.py

+88 −24

Original line number	Diff line number	Diff line
		@@ -433,6 +433,20 @@ class EDMD(
		# TODO: should TSCPredictMixin include feature names for prediction?
		return self._feature_names_pred

		@property
		def n_parameter_in_(self):
		if self.is_parametric_:
		return self.dmd_model.n_parameter_in_
		else:
		return None

		@property
		def parameter_names_in_(self):
		if self.is_parametric_:
		return self.dmd_model.parameter_names_in_
		else:
		return None

		def _validate_dictionary(self) -> bool:
		"""Validates that all elements in the EDMD dictionary.

		@@ -709,7 +723,13 @@ class EDMD(
		self.steps[step_idx] = (name, fitted_transformer)
		return X

		def _reconstruct(self, X: TSCDataFrame, U: Optional[TSCDataFrame], qois):
		def _reconstruct(
		self,
		X: TSCDataFrame,
		U: Optional[TSCDataFrame],
		P: Optional[pd.DataFrame],
		qois,
		):
		X_reconstruct = []

		if self.dmd_model.is_time_invariant and not X.is_datetime_index():
		@@ -729,15 +749,24 @@ class EDMD(
		else:
		U_select = None

		if P is not None:
		P_select = P.loc[X_ic.ids, :]
		else:
		P_select = None

		if self.stepwise_transform:
		X_est_ts = self._predict_stepwise_transform(
		X=X_ic, U=U_select, time_values=time_values, qois=qois
		X=X_ic, U=U_select, P=P_select, time_values=time_values, qois=qois
		)
		else:
		# transform initial condition to EDMD-dictionary space
		X_dict_ic = self.transform(X_ic)
		X_est_ts = self._predict_dict_ic(
		X_dict=X_dict_ic, U=U_select, time_values=time_values, qois=qois
		X_dict=X_dict_ic,
		U=U_select,
		P=P_select,
		time_values=time_values,
		qois=qois,
		)

		X_reconstruct.append(X_est_ts)
		@@ -772,7 +801,7 @@ class EDMD(
		return X_reconstruct

		def _predict_dict_ic(
		self, X_dict: TSCDataFrame, U, time_values, qois
		self, X_dict: TSCDataFrame, U, P, time_values, qois
		) -> TSCDataFrame:
		"""Prediction with initial condition in dictionary states.

		@@ -820,6 +849,7 @@ class EDMD(

		dmd_params = dict(
		U=U,
		P=P,
		time_values=time_values,
		modes=modes,
		feature_columns=feature_columns,
		@@ -834,7 +864,7 @@ class EDMD(
		# system.

		# computes system in EDMD-dictionary space
		dmd_params = dict(U=U, time_values=time_values)
		dmd_params = dict(U=U, P=P, time_values=time_values)
		dmd_params = {k: v for k, v in dmd_params.items() if v is not None}

		X_ts = self.dmd_model.predict(X_dict, **dmd_params)
		@@ -857,7 +887,7 @@ class EDMD(

		else:
		# predict all EDMD-dictionary time series
		X_ts = self.dmd_model.predict(X_dict, time_values=time_values)
		X_ts = self.dmd_model.predict(X_dict, U=U, P=P, time_values=time_values)

		# transform from EDMD-dictionary space by pipeline inverse_transform
		X_ts = self.inverse_transform(X_ts)
		@@ -865,20 +895,24 @@ class EDMD(

		return X_ts

		def _predict_stepwise_transform(self, X: TSCDataFrame, U, time_values, qois):
		def _predict_stepwise_transform(self, X: TSCDataFrame, U, P, time_values, qois):
		if self.is_state_transition_map_ and not self.id_state_in_transition_map_:
		raise ValueError(
		"It is not possible tp perform stepwise transform predictions with "
		"It is not possible to perform stepwise transform predictions with "
		"state transition map that does not include the original full-state."
		)
		elif self.is_state_transition_map_:
		raise NotImplementedError(
		"stepwise transform with state transition map is not implemented yet"
		)
		extract_id_columns = self.feature_names_in_
		else:
		extract_id_columns = None

		# if P is not None:
		# raise NotImplementedError(
		# "Parametric EDMD with stepwise transform is not implemented yet"
		# )

		X = X.tsc.expand_time_values(time_values=time_values[1:])
		all_time_values = X.time_values()

		@@ -894,10 +928,16 @@ class EDMD(
		else:
		_U_current = None

		if P is not None:
		_P_current = P.loc[_X_current.ids, :]
		else:
		_P_current = None

		_X_dict = self.transform(_X_current)
		_X_predict = self._predict_dict_ic(
		_X_dict,
		U=_U_current,
		P=_P_current,
		time_values=all_time_values[i - 1 : i + 1],
		qois=None,
		)
		@@ -944,7 +984,9 @@ class EDMD(
		DMD model, at which point the time indices must be identical to the states in `X`.

		P
		ignored -- reservered for parameters
		Parameter for each time series in ``X``. Currently, this input is
		ignored for the dictionary and only passed to the internal DMD
		method.

		y
		A different set of target values than the original states to map to with
		@@ -994,6 +1036,7 @@ class EDMD(
		self.is_partial_fit_ = False

		self.is_controlled_ = U is not None
		self.is_parametric_ = P is not None
		self.is_dict_learning_ = isinstance(self.dmd_model, DMDDictLearning)

		# 1) first get the EDMD fit_params, 2) validate the fit_params for the pipeline,
		@@ -1045,10 +1088,8 @@ class EDMD(
		U = U.loc[inters_keys, :] # type: ignore

		with _print_elapsed_time("Pipeline", self._log_message(len(self.steps) - 1)):
		if self.is_controlled_:
		self.dmd_model.fit(X=X_dict, U=U, y=y, **dmd_fit_params)
		else:
		self.dmd_model.fit(X=X_dict, y=y, **dmd_fit_params)
		# note that DMD models do not support y input yet
		self.dmd_model.fit(X=X_dict, U=U, P=P, y=y, **dmd_fit_params)

		if self.is_dict_learning_:
		# apply final transformation based on trained dictionary
		@@ -1209,12 +1250,12 @@ class EDMD(

		if self.stepwise_transform:
		X_ts = self._predict_stepwise_transform(
		X=X, U=U, time_values=time_values, qois=qois
		X=X, U=U, P=P, time_values=time_values, qois=qois
		)
		else:
		X_dict = self.transform(X)
		X_ts = self._predict_dict_ic(
		X_dict=X_dict, U=U, time_values=time_values, qois=qois
		X_dict=X_dict, U=U, P=P, time_values=time_values, qois=qois
		)
		return X_ts

		@@ -1267,7 +1308,9 @@ class EDMD(
		Time series collection restrictions in X: (1) time delta must be constant
		(2) all values must be finite (no `NaN` or `inf`)
		"""
		return self.fit(X=X, U=U, y=y, **fit_params).reconstruct(X=X, U=U, qois=qois)
		return self.fit(X=X, U=U, P=P, y=y, **fit_params).reconstruct(
		X=X, U=U, P=P, qois=qois
		)

		def __getitem__(self, ind):
		# Overwrite the super class to distinguish the
		@@ -1316,10 +1359,10 @@ class EDMD(
		setting with control.

		P
		ignored -- reserved for parameters
		Currently, there is no implementation that supports parametric learning.

		y
		ignored
		Currently, no transfer learning is not supported.

		fit_params
		Parameters passed to the ``fit`` method of each step, where
		@@ -1348,6 +1391,11 @@ class EDMD(
		"Currently there are no DMD models that that support both streaming "
		"and control."
		)
		if P is not None:
		raise NotImplementedError(
		"Currently there are no DMD models that support both streaming "
		"and parametric learning"
		)

		self.is_controlled_ = False

		@@ -1495,6 +1543,7 @@ class EDMD(
		X: TSCDataFrame,
		*,
		U: Optional[TSCDataFrame] = None,
		P: Optional[pd.DataFrame] = None,
		qois: Optional[Union[pd.Index, list[str]]] = None,
		) -> TSCDataFrame:
		"""Reconstruct existing time series collection.
		@@ -1535,12 +1584,13 @@ class EDMD(
		not validated)

		"""
		# TODO: support y!

		check_is_fitted(self)

		X = self._validate_datafold_data(
		X,
		ensure_tsc=True,
		#
		tsc_kwargs={"ensure_min_timesteps": self.n_samples_ic_ + 1}
		# Note: no const_delta_time required here. The required const samples for
		# time series initial conditions is included in the predict method.
		@@ -1548,7 +1598,7 @@ class EDMD(
		self._validate_feature_names(X=X, U=U)
		self._validate_qois(qois=qois, valid_feature_names=self.feature_names_pred_)

		return self._reconstruct(X=X, U=U, qois=qois)
		return self._reconstruct(X=X, U=U, P=P, qois=qois)

		def inverse_transform(self, X: TransformType) -> TransformType:
		"""Perform inverse dictionary transformations on dictionary time series.
		@@ -1589,6 +1639,7 @@ class EDMD(
		self,
		X: TSCDataFrame,
		U=None,
		P=None,
		y=None,
		sample_weight: Optional[np.ndarray] = None,
		):
		@@ -1600,8 +1651,14 @@ class EDMD(
		The time series collection to reconstruct. The first ``n_samples_ic_`` of
		each time series must fulfill the requirements of an initial condition.

		U
		Control input per time series in X.

		P
		Parameter input per time series in X.

		y: None
		ignored
		ignored - not supported yet (implementation required)

		sample_weight
		If not None, this argument is passed as ``sample_weight`` keyword
		@@ -1622,7 +1679,7 @@ class EDMD(
		self._check_attributes_set_up(check_attributes=["_score_eval"])

		# does all the checks:
		X_reconstruct = self.reconstruct(X=X, U=U)
		X_reconstruct = self.reconstruct(X=X, U=U, P=P)

		if self.n_samples_ic_ > 1:
		# Note that during `reconstruct` samples can be discarded (e.g. when
		@@ -2131,6 +2188,7 @@ class EDMDWindowPrediction:
		offset: int,
		*,
		U: Optional[TSCDataFrame] = None,
		P: Optional[pd.DataFrame] = None,
		y=None,
		qois=None,
		return_windows: bool = False,
		@@ -2182,6 +2240,7 @@ class EDMDWindowPrediction:
		)

		is_controlled = edmd.is_controlled_
		is_parametric = edmd.is_parametric_

		if is_controlled and U is None:
		raise ValueError(
		@@ -2189,6 +2248,11 @@ class EDMDWindowPrediction:
		f"({edmd.is_controlled_=}), but no control input was provided ({U=})"
		)

		if is_parametric:
		raise NotImplementedError(
		"Parametric is not yet implemented. Code contributions welcome!"
		)

		X = edmd._validate_datafold_data(
		X,
		ensure_tsc=True,
		@@ -2283,7 +2347,7 @@ class EDMDWindowPrediction:
		U_windows, index_final_windows_U = None, None

		# finally reconstruct the data
		X_reconstruct = edmd._reconstruct(X=X_windows, U=U_windows, qois=qois)
		X_reconstruct = edmd._reconstruct(X=X_windows, U=U_windows, P=P, qois=qois)

		# recover true index:
		X_reconstruct.index = index_final_reconstruct_X

datafold/appfold/tests/test_edmd.py

+64 −2

Original line number	Diff line number	Diff line
		@@ -24,6 +24,7 @@ from datafold import (
		DMDStandard,
		EDMDWindowPrediction,
		OnlineDMD,
		PartitionedDMD,
		StreamingDMD,
		TSCColumnTransformer,
		TSCDataFrame,
		@@ -766,7 +767,7 @@ class EDMDTest(unittest.TestCase):

		diff = inverse_dict - self.sine_wave_tsc
		# sort out the removed rows from Takens (NaN values)
		self.assertTrue((diff.dropna() < 1e-14).to_numpy().all())
		self.assertTrue((diff.dropna().to_numpy() < 1e-14).all().all())

		if plot:
		ax = self.sine_wave_tsc.plot()
		@@ -805,7 +806,7 @@ class EDMDTest(unittest.TestCase):

		diff = inverse_dict - self.sine_wave_tsc
		# sort out the removed rows from Takens (NaN values)
		self.assertTrue((diff.dropna() < 1e-14).to_numpy().all())
		self.assertTrue((diff.dropna().to_numpy() < 1e-14).all().all())

		# test that the fit_param dmd__store_generator_matrix was really passed to the
		# DMD model.
		@@ -1173,6 +1174,7 @@ class EDMDTest(unittest.TestCase):
		n_latent_states = X_tsc.shape[1] * (n_delays + 1)

		edmd.fit(X=X_tsc, U=U_tsc)

		actual_transform = edmd.transform(X_tsc)

		n_final = comb(n_latent_states, n_degrees) + 2 * n_latent_states
		@@ -1198,6 +1200,9 @@ class EDMDTest(unittest.TestCase):
		)
		pdtest.assert_frame_equal(actual_predict, actual_predict2)

		self.assertEqual(edmd.n_control_in_, U.shape[1])
		self.assertEqual(edmd.control_names_in_, U.columns)

		def test_edmdcontrol_id(self):
		ic = np.array([0, 0, np.pi, 0])
		X_tsc, U_tsc = EDMDTest.setup_inverted_pendulum(training_size=1)
		@@ -1316,6 +1321,63 @@ class EDMDTest(unittest.TestCase):
		actual = edmdid.fit_predict(X_tsc, U=U_tsc)
		pdtest.assert_frame_equal(expected, actual)

		def test_edmd_parametric_pipeline(self, plot=False):
		from datafold.dynfold.tests.test_dmd import PartitionedDMDTest

		X_train, P_train = PartitionedDMDTest.sample_parametrized_linear_system()
		X_test, P_test = PartitionedDMDTest.sample_parametrized_linear_system(
		n_time_steps=10, n_param=3
		)

		edmd_standard = EDMD(
		dict_steps=[("id", TSCIdentity())],
		dmd_model=PartitionedDMD(),
		stepwise_transform=False,
		)

		edmd_stepwise = EDMD(
		dict_steps=[("id", TSCIdentity())],
		dmd_model=PartitionedDMD(),
		stepwise_transform=True,
		)

		edmd_standard.fit(X_train, P=P_train)

		self.assertEqual(edmd_standard.n_parameter_in_, 1)
		self.assertEqual(edmd_standard.parameter_names_in_, P_test.columns.to_numpy())

		predict1 = edmd_standard.reconstruct(X_train, P=P_train)
		predict2 = edmd_standard.predict(
		X_train.initial_states(), P=P_train, time_values=X_train.time_values()
		)

		pdtest.assert_frame_equal(predict1, predict2)

		predict1 = edmd_standard.reconstruct(X_test, P=P_test)
		predict2 = edmd_standard.predict(
		X_test.initial_states(), P=P_test, time_values=X_test.time_values()
		)

		pdtest.assert_frame_equal(predict1, predict2)

		score_train = edmd_standard.score(X_train, P=P_train)
		score_test = edmd_standard.score(X_test, P=P_test)

		# adapt if necessary
		self.assertLessEqual(score_train, -1.0e-14)
		self.assertLessEqual(score_test, -3.0e-05)

		edmd_stepwise.fit(X_train, P=P_train)
		score_test = edmd_stepwise.score(X_test, P=P_test)

		self.assertLessEqual(score_test, -3.0e-13)

		if plot:
		ax = X_test.plot()
		predict1.plot(c="blue", ax=ax, linestyle="--")

		plt.show()


		class EDMDPredictionTest(unittest.TestCase):
		def setUp(self) -> None: