[EHN] Inclusion the whosmat here

At the moment, scipy doesn't provide support for the whosmat file, and I think we should push for it here.

I created this functionality for mne, and could push for here if you accept this contribution.


def _whosmat_hdf5(fname: str):
    """List variables in a MATLAB v7.3 (HDF5) .mat file without loading data.

    This function provides similar functionality to :func:`scipy.io.whosmat` but
    for MATLAB v7.3 files stored in HDF5 format, which are not supported by SciPy.

    Parameters
    ----------
    fname : str | PathLike
        Path to the MATLAB v7.3 (.mat) file.

    Returns
    -------
    variables : list of tuple
        A list of (name, shape, class) tuples for each variable in the file.
        The name is a string, shape is a tuple of ints, and class is a string
        indicating the MATLAB data type (e.g., 'double', 'int32', 'struct').

    Notes
    -----
    This function only works with MATLAB v7.3 (HDF5) files. For earlier versions,
    use :func:`scipy.io.whosmat` instead.

    See Also
    --------
    scipy.io.whosmat : List variables in classic MATLAB files.
    """
    h5py = _soft_import("h5py", purpose="MATLAB v7.3 I/O")
    if h5py is None:
        raise ModuleNotFoundError(
            "h5py is required to inspect MATLAB v7.3 files preload=`False` "
            "Please install h5py to use this functionality."
        )

    variables = []

    with h5py.File(str(fname), "r") as f:
        for name in f.keys():
            node = f[name]

            # Extract shape from HDF5 object
            if isinstance(node, h5py.Dataset):
                shape = tuple(int(x) for x in node.shape)
            else:
                shape = ()
                for attr_key in (
                    "MATLAB_shape",
                    "MATLAB_Size",
                    "MATLAB_size",
                    "dims",
                    "MATLAB_dims",
                ):
                    shp = node.attrs.get(attr_key)
                    if shp is not None:
                        try:
                            shape = tuple(int(x) for x in shp)
                            break
                        except Exception:
                            pass
                if not shape and "size" in node:
                    try:
                        shape = tuple(int(x) for x in node["size"][()])
                    except Exception:
                        pass

            # Infer MATLAB class from HDF5 object
            mcls = node.attrs.get("MATLAB_class", "").lower()
            if mcls:
                matlab_class = "char" if mcls == "string" else mcls
            elif isinstance(node, h5py.Dataset):
                dt = node.dtype
                # Handle complex numbers stored as {real, imag} struct
                if getattr(dt, "names", None) and {"real", "imag"} <= set(dt.names):
                    matlab_class = (
                        "double" if dt["real"].base.itemsize == 8 else "single"
                    )
                # Map NumPy dtype to MATLAB class
                elif (kind := dt.kind) == "f":
                    matlab_class = "double" if dt.itemsize == 8 else "single"
                elif kind == "i":
                    matlab_class = f"int{8 * dt.itemsize}"
                elif kind == "u":
                    matlab_class = f"uint{8 * dt.itemsize}"
                elif kind == "b":
                    matlab_class = "logical"
                elif kind in ("S", "U", "O"):
                    matlab_class = "char"
                else:
                    matlab_class = "unknown"
            # Check for sparse matrix structure
            elif {"ir", "jc", "data"}.issubset(set(node.keys())):
                matlab_class = "sparse"
            else:
                matlab_class = "unknown"

            variables.append((name, shape, matlab_class))

    return variables
Assignee Loading
Time tracking Loading