Commit b76573fa authored by Ana Cukarska's avatar Ana Cukarska
Browse files

Merge branch '2.0.x' into 'master'

2.0.2

See merge request !124
parents c599bb85 e27a70a2
Loading
Loading
Loading
Loading
Loading
+6 −6
Original line number Diff line number Diff line
@@ -132,17 +132,17 @@ pages:


# For infos on testPyPI see https://packaging.python.org/guides/using-testpypi/
# Note that
# Note that130b057dc06f8e472b83627cafbfb9e0f57c56f91685d61c58f39645fda7a94f
# 1) It is required to set the variables TWINE_USERNAME and TWINE_PASSWORD in gitlab CI/CD
# 2) The database for TestPyPI may be periodically pruned, so it can happen that user
#    accounts are deleted on testPyPI.
# 3) It is best to use the same account and password in testPyPI as for the "true PyPI".
#    This way the twine variables set up in point 1) can be re-used.
pypi_upload_test:
  stage: deploy
  script:
    - make test_pypi
  when: manual
#pypi_upload_test:
#  stage: deploy
#  script:
#    - make test_pypi
#  when: manual

pypi_upload:
  stage: deploy
+6 −6
Original line number Diff line number Diff line
repos:

-   repo: https://github.com/pre-commit/pre-commit-hooks
    rev: v4.4.0
    rev: v4.5.0
    hooks:
    -   id: end-of-file-fixer
        exclude: .ipynb
@@ -26,18 +26,18 @@ repos:
#     -   id: gitlab-ci-linter

-   repo: https://github.com/asottile/pyupgrade
    rev: v3.10.1
    rev: v3.15.0
    hooks:
    -   id: pyupgrade
        args: [ --py39-plus ]

-   repo: https://github.com/python/black
    rev: 23.10.1
    rev: 23.12.0
    hooks:
    - id: black

-   repo: https://github.com/pycqa/isort
    rev: 5.12.0
    rev: 5.13.2
    hooks:
    - id: isort

@@ -67,11 +67,11 @@ repos:
    - id: nbqa-black
      additional_dependencies: [black==23.10.1]
    - id: nbqa-ruff
      additional_dependencies: [ruff==0.0.286]
      additional_dependencies: [ruff==0.1.8]
      args: [--ignore=B018, --ignore=W605]

-   repo: https://github.com/pre-commit/mirrors-mypy
    rev: v1.0.1
    rev: v1.7.1
    hooks:
    -   id: mypy
        args: [--ignore-missing-imports, --no-implicit-optional, --allow-redefinition]
+1 −0
Original line number Diff line number Diff line
@@ -4,6 +4,7 @@ Software maintainer
Last name, First name; <email>; Years of contribution
Cukarska, Ana; <ana.cukarska@tum.de>; 2026-
Dietrich, Felix; <felix.dietrich@tum.de>; 2019-
Kern, Sabrina; <sabrina.kern@hm.edu>; 2026-
Lehmberg, Daniel; <daniel.lehmberg@hm.edu>; 2019-


+88 −24
Original line number Diff line number Diff line
@@ -433,6 +433,20 @@ class EDMD(
        # TODO: should TSCPredictMixin include feature names for prediction?
        return self._feature_names_pred

    @property
    def n_parameter_in_(self):
        if self.is_parametric_:
            return self.dmd_model.n_parameter_in_
        else:
            return None

    @property
    def parameter_names_in_(self):
        if self.is_parametric_:
            return self.dmd_model.parameter_names_in_
        else:
            return None

    def _validate_dictionary(self) -> bool:
        """Validates that all elements in the EDMD dictionary.

@@ -709,7 +723,13 @@ class EDMD(
            self.steps[step_idx] = (name, fitted_transformer)
        return X

    def _reconstruct(self, X: TSCDataFrame, U: Optional[TSCDataFrame], qois):
    def _reconstruct(
        self,
        X: TSCDataFrame,
        U: Optional[TSCDataFrame],
        P: Optional[pd.DataFrame],
        qois,
    ):
        X_reconstruct = []

        if self.dmd_model.is_time_invariant and not X.is_datetime_index():
@@ -729,15 +749,24 @@ class EDMD(
            else:
                U_select = None

            if P is not None:
                P_select = P.loc[X_ic.ids, :]
            else:
                P_select = None

            if self.stepwise_transform:
                X_est_ts = self._predict_stepwise_transform(
                    X=X_ic, U=U_select, time_values=time_values, qois=qois
                    X=X_ic, U=U_select, P=P_select, time_values=time_values, qois=qois
                )
            else:
                # transform initial condition to EDMD-dictionary space
                X_dict_ic = self.transform(X_ic)
                X_est_ts = self._predict_dict_ic(
                    X_dict=X_dict_ic, U=U_select, time_values=time_values, qois=qois
                    X_dict=X_dict_ic,
                    U=U_select,
                    P=P_select,
                    time_values=time_values,
                    qois=qois,
                )

            X_reconstruct.append(X_est_ts)
@@ -772,7 +801,7 @@ class EDMD(
        return X_reconstruct

    def _predict_dict_ic(
        self, X_dict: TSCDataFrame, U, time_values, qois
        self, X_dict: TSCDataFrame, U, P, time_values, qois
    ) -> TSCDataFrame:
        """Prediction with initial condition in dictionary states.

@@ -820,6 +849,7 @@ class EDMD(

                dmd_params = dict(
                    U=U,
                    P=P,
                    time_values=time_values,
                    modes=modes,
                    feature_columns=feature_columns,
@@ -834,7 +864,7 @@ class EDMD(
                # system.

                # computes system in EDMD-dictionary space
                dmd_params = dict(U=U, time_values=time_values)
                dmd_params = dict(U=U, P=P, time_values=time_values)
                dmd_params = {k: v for k, v in dmd_params.items() if v is not None}

                X_ts = self.dmd_model.predict(X_dict, **dmd_params)
@@ -857,7 +887,7 @@ class EDMD(

        else:
            # predict all EDMD-dictionary time series
            X_ts = self.dmd_model.predict(X_dict, time_values=time_values)
            X_ts = self.dmd_model.predict(X_dict, U=U, P=P, time_values=time_values)

            # transform from EDMD-dictionary space by pipeline inverse_transform
            X_ts = self.inverse_transform(X_ts)
@@ -865,20 +895,24 @@ class EDMD(

        return X_ts

    def _predict_stepwise_transform(self, X: TSCDataFrame, U, time_values, qois):
    def _predict_stepwise_transform(self, X: TSCDataFrame, U, P, time_values, qois):
        if self.is_state_transition_map_ and not self.id_state_in_transition_map_:
            raise ValueError(
                "It is not possible tp perform stepwise transform predictions with "
                "It is not possible to perform stepwise transform predictions with "
                "state transition map that does not include the original full-state."
            )
        elif self.is_state_transition_map_:
            raise NotImplementedError(
                "stepwise transform with state transition map is not implemented yet"
            )
            extract_id_columns = self.feature_names_in_
        else:
            extract_id_columns = None

        # if P is not None:
        #     raise NotImplementedError(
        #         "Parametric EDMD with stepwise transform is not implemented yet"
        #     )

        X = X.tsc.expand_time_values(time_values=time_values[1:])
        all_time_values = X.time_values()

@@ -894,10 +928,16 @@ class EDMD(
            else:
                _U_current = None

            if P is not None:
                _P_current = P.loc[_X_current.ids, :]
            else:
                _P_current = None

            _X_dict = self.transform(_X_current)
            _X_predict = self._predict_dict_ic(
                _X_dict,
                U=_U_current,
                P=_P_current,
                time_values=all_time_values[i - 1 : i + 1],
                qois=None,
            )
@@ -944,7 +984,9 @@ class EDMD(
            DMD model, at which point the time indices must be identical to the states in `X`.

        P
            ignored -- reservered for parameters
            Parameter for each time series in ``X``. Currently, this input is
            ignored for the dictionary and only passed to the internal DMD
            method.

        y
            A different set of target values than the original states to map to with
@@ -994,6 +1036,7 @@ class EDMD(
            self.is_partial_fit_ = False

        self.is_controlled_ = U is not None
        self.is_parametric_ = P is not None
        self.is_dict_learning_ = isinstance(self.dmd_model, DMDDictLearning)

        # 1) first get the EDMD fit_params, 2) validate the fit_params for the pipeline,
@@ -1045,10 +1088,8 @@ class EDMD(
            U = U.loc[inters_keys, :]  # type: ignore

        with _print_elapsed_time("Pipeline", self._log_message(len(self.steps) - 1)):
            if self.is_controlled_:
                self.dmd_model.fit(X=X_dict, U=U, y=y, **dmd_fit_params)
            else:
                self.dmd_model.fit(X=X_dict, y=y, **dmd_fit_params)
            # note that DMD models do not support y input yet
            self.dmd_model.fit(X=X_dict, U=U, P=P, y=y, **dmd_fit_params)

        if self.is_dict_learning_:
            # apply final transformation based on trained dictionary
@@ -1209,12 +1250,12 @@ class EDMD(

        if self.stepwise_transform:
            X_ts = self._predict_stepwise_transform(
                X=X, U=U, time_values=time_values, qois=qois
                X=X, U=U, P=P, time_values=time_values, qois=qois
            )
        else:
            X_dict = self.transform(X)
            X_ts = self._predict_dict_ic(
                X_dict=X_dict, U=U, time_values=time_values, qois=qois
                X_dict=X_dict, U=U, P=P, time_values=time_values, qois=qois
            )
        return X_ts

@@ -1267,7 +1308,9 @@ class EDMD(
            Time series collection restrictions in **X**: (1) time delta must be constant
            (2) all values must be finite (no `NaN` or `inf`)
        """
        return self.fit(X=X, U=U, y=y, **fit_params).reconstruct(X=X, U=U, qois=qois)
        return self.fit(X=X, U=U, P=P, y=y, **fit_params).reconstruct(
            X=X, U=U, P=P, qois=qois
        )

    def __getitem__(self, ind):
        # Overwrite the super class to distinguish the
@@ -1316,10 +1359,10 @@ class EDMD(
            setting with control.

        P
            ignored -- reserved for parameters
            Currently, there is no implementation that supports parametric learning.

        y
            ignored
            Currently, no transfer learning is not supported.

        fit_params
            Parameters passed to the ``fit`` method of each step, where
@@ -1348,6 +1391,11 @@ class EDMD(
                "Currently there are no DMD models that that support both streaming "
                "and control."
            )
        if P is not None:
            raise NotImplementedError(
                "Currently there are no DMD models that support both streaming "
                "and parametric learning"
            )

        self.is_controlled_ = False

@@ -1495,6 +1543,7 @@ class EDMD(
        X: TSCDataFrame,
        *,
        U: Optional[TSCDataFrame] = None,
        P: Optional[pd.DataFrame] = None,
        qois: Optional[Union[pd.Index, list[str]]] = None,
    ) -> TSCDataFrame:
        """Reconstruct existing time series collection.
@@ -1535,12 +1584,13 @@ class EDMD(
              not validated)

        """
        # TODO: support y!

        check_is_fitted(self)

        X = self._validate_datafold_data(
            X,
            ensure_tsc=True,
            #
            tsc_kwargs={"ensure_min_timesteps": self.n_samples_ic_ + 1}
            # Note: no const_delta_time required here. The required const samples for
            # time series initial conditions is included in the predict method.
@@ -1548,7 +1598,7 @@ class EDMD(
        self._validate_feature_names(X=X, U=U)
        self._validate_qois(qois=qois, valid_feature_names=self.feature_names_pred_)

        return self._reconstruct(X=X, U=U, qois=qois)
        return self._reconstruct(X=X, U=U, P=P, qois=qois)

    def inverse_transform(self, X: TransformType) -> TransformType:
        """Perform inverse dictionary transformations on dictionary time series.
@@ -1589,6 +1639,7 @@ class EDMD(
        self,
        X: TSCDataFrame,
        U=None,
        P=None,
        y=None,
        sample_weight: Optional[np.ndarray] = None,
    ):
@@ -1600,8 +1651,14 @@ class EDMD(
            The time series collection to reconstruct. The first ``n_samples_ic_`` of
            each time series must fulfill the requirements of an initial condition.

        U
            Control input per time series in X.

        P
            Parameter input per time series in X.

        y: None
            ignored
            ignored - not supported yet (implementation required)

        sample_weight
            If not None, this argument is passed as ``sample_weight`` keyword
@@ -1622,7 +1679,7 @@ class EDMD(
        self._check_attributes_set_up(check_attributes=["_score_eval"])

        # does all the checks:
        X_reconstruct = self.reconstruct(X=X, U=U)
        X_reconstruct = self.reconstruct(X=X, U=U, P=P)

        if self.n_samples_ic_ > 1:
            # Note that during `reconstruct` samples can be discarded (e.g. when
@@ -2131,6 +2188,7 @@ class EDMDWindowPrediction:
        offset: int,
        *,
        U: Optional[TSCDataFrame] = None,
        P: Optional[pd.DataFrame] = None,
        y=None,
        qois=None,
        return_windows: bool = False,
@@ -2182,6 +2240,7 @@ class EDMDWindowPrediction:
            )

        is_controlled = edmd.is_controlled_
        is_parametric = edmd.is_parametric_

        if is_controlled and U is None:
            raise ValueError(
@@ -2189,6 +2248,11 @@ class EDMDWindowPrediction:
                f"({edmd.is_controlled_=}), but no control input was provided ({U=})"
            )

        if is_parametric:
            raise NotImplementedError(
                "Parametric is not yet implemented. Code contributions welcome!"
            )

        X = edmd._validate_datafold_data(
            X,
            ensure_tsc=True,
@@ -2283,7 +2347,7 @@ class EDMDWindowPrediction:
            U_windows, index_final_windows_U = None, None

        # finally reconstruct the data
        X_reconstruct = edmd._reconstruct(X=X_windows, U=U_windows, qois=qois)
        X_reconstruct = edmd._reconstruct(X=X_windows, U=U_windows, P=P, qois=qois)

        # recover true index:
        X_reconstruct.index = index_final_reconstruct_X
+64 −2
Original line number Diff line number Diff line
@@ -24,6 +24,7 @@ from datafold import (
    DMDStandard,
    EDMDWindowPrediction,
    OnlineDMD,
    PartitionedDMD,
    StreamingDMD,
    TSCColumnTransformer,
    TSCDataFrame,
@@ -766,7 +767,7 @@ class EDMDTest(unittest.TestCase):

        diff = inverse_dict - self.sine_wave_tsc
        # sort out the removed rows from Takens (NaN values)
        self.assertTrue((diff.dropna() < 1e-14).to_numpy().all())
        self.assertTrue((diff.dropna().to_numpy() < 1e-14).all().all())

        if plot:
            ax = self.sine_wave_tsc.plot()
@@ -805,7 +806,7 @@ class EDMDTest(unittest.TestCase):

        diff = inverse_dict - self.sine_wave_tsc
        # sort out the removed rows from Takens (NaN values)
        self.assertTrue((diff.dropna() < 1e-14).to_numpy().all())
        self.assertTrue((diff.dropna().to_numpy() < 1e-14).all().all())

        # test that the fit_param dmd__store_generator_matrix was really passed to the
        # DMD model.
@@ -1173,6 +1174,7 @@ class EDMDTest(unittest.TestCase):
        n_latent_states = X_tsc.shape[1] * (n_delays + 1)

        edmd.fit(X=X_tsc, U=U_tsc)

        actual_transform = edmd.transform(X_tsc)

        n_final = comb(n_latent_states, n_degrees) + 2 * n_latent_states
@@ -1198,6 +1200,9 @@ class EDMDTest(unittest.TestCase):
        )
        pdtest.assert_frame_equal(actual_predict, actual_predict2)

        self.assertEqual(edmd.n_control_in_, U.shape[1])
        self.assertEqual(edmd.control_names_in_, U.columns)

    def test_edmdcontrol_id(self):
        ic = np.array([0, 0, np.pi, 0])
        X_tsc, U_tsc = EDMDTest.setup_inverted_pendulum(training_size=1)
@@ -1316,6 +1321,63 @@ class EDMDTest(unittest.TestCase):
        actual = edmdid.fit_predict(X_tsc, U=U_tsc)
        pdtest.assert_frame_equal(expected, actual)

    def test_edmd_parametric_pipeline(self, plot=False):
        from datafold.dynfold.tests.test_dmd import PartitionedDMDTest

        X_train, P_train = PartitionedDMDTest.sample_parametrized_linear_system()
        X_test, P_test = PartitionedDMDTest.sample_parametrized_linear_system(
            n_time_steps=10, n_param=3
        )

        edmd_standard = EDMD(
            dict_steps=[("id", TSCIdentity())],
            dmd_model=PartitionedDMD(),
            stepwise_transform=False,
        )

        edmd_stepwise = EDMD(
            dict_steps=[("id", TSCIdentity())],
            dmd_model=PartitionedDMD(),
            stepwise_transform=True,
        )

        edmd_standard.fit(X_train, P=P_train)

        self.assertEqual(edmd_standard.n_parameter_in_, 1)
        self.assertEqual(edmd_standard.parameter_names_in_, P_test.columns.to_numpy())

        predict1 = edmd_standard.reconstruct(X_train, P=P_train)
        predict2 = edmd_standard.predict(
            X_train.initial_states(), P=P_train, time_values=X_train.time_values()
        )

        pdtest.assert_frame_equal(predict1, predict2)

        predict1 = edmd_standard.reconstruct(X_test, P=P_test)
        predict2 = edmd_standard.predict(
            X_test.initial_states(), P=P_test, time_values=X_test.time_values()
        )

        pdtest.assert_frame_equal(predict1, predict2)

        score_train = edmd_standard.score(X_train, P=P_train)
        score_test = edmd_standard.score(X_test, P=P_test)

        # adapt if necessary
        self.assertLessEqual(score_train, -1.0e-14)
        self.assertLessEqual(score_test, -3.0e-05)

        edmd_stepwise.fit(X_train, P=P_train)
        score_test = edmd_stepwise.score(X_test, P=P_test)

        self.assertLessEqual(score_test, -3.0e-13)

        if plot:
            ax = X_test.plot()
            predict1.plot(c="blue", ax=ax, linestyle="--")

            plt.show()


class EDMDPredictionTest(unittest.TestCase):
    def setUp(self) -> None:
Loading