Decouple datasaving with Quantify from MC

Description

Below is an example implementation of data saving independently of the MeasurementControl.

Discussed with @AdriaanRol and @kel85uk in project meeting.

"""data_saving.py
This module contains functions for saving data using Quantify.
To the user, two things in this module are mostly relevant:
- Quantity: a named tuple that holds the data to be saved.
- save_experiment: a function that takes a list of quantities and saves them
  to the Quantify datastore.

Matteo Pompili, 2021
"""
import logging
import re
import time
from os import getenv
from os.path import join
from typing import Any, Dict, List, NamedTuple, Optional, Tuple, Union

import numpy as np
from qcodes import ManualParameter
from quantify_core.data.handling import (
    create_exp_folder,
    gen_tuid,
    initialize_dataset,
    locate_experiment_container,
    set_datadir,
    to_gridded_dataset,
    write_dataset,
)
from quantify_core.data.types import TUID
from quantify_core.measurement.control import grid_setpoints

_logger = logging.getLogger(__name__)


class Quantity(NamedTuple):
    """Quantity definition. Combines information about the
    physical quantity measured (name, plot label, unit) as well
    as the actual data (values).
    """

    name: str
    values: np.ndarray
    label: Optional[str] = None
    unit: Optional[str] = None


def _sanitize_attributes(attrs: Dict[str, Any]) -> Dict[str, Any]:
    """Must be a number, a string, an ndarray or a list/tuple of numbers/strings
    for serialization to netCDF files.
    """
    new_attrs: Dict[str, Any] = {}
    for key, value in attrs.items():
        if isinstance(value, (int, float, complex, str, np.ndarray)):
            new_attrs[key] = value
        elif isinstance(value, (list, tuple)):
            all_good = True
            for item in value:
                if not isinstance(item, (int, float, complex, str)):
                    all_good = False
            if all_good:
                new_attrs[key] = value
            else:
                _logger.warning(f"Could not save attribute `{key}`, not homogeneous." + f"\nHere the value: {value}")

    return new_attrs


def _standardize_name(name: str) -> str:
    """
    ConvertsAWeird measurement NameFormat -> converts_a_weird_measurement_name_format
    """
    # https://regex101.com/r/2vEI3M/1
    regex = r"[A-Z]{2,}(?=[A-Z][a-z]+[0-9]*|\b)|[A-Z]?[a-z]+[0-9]*|[A-Z]|[0-9]+"
    return "_".join(map(str.lower, re.findall(regex, name)))


def unix_timestamp_from_tuid(tuid: TUID) -> float:
    """
    Return the unix timestamp from the given TUID.
    """
    return time.mktime(TUID.datetime(tuid).timetuple())


def init_exp_folder(name: str = "", standardize_name: bool = True) -> Tuple[TUID, str]:
    """
    Initialise an experiment folder by generating a new TUID and creating
    the folder with the optional passed name.

    Args:
        name: Name of the experiment folder, default is empty string.
        standardize_name: If True, the name will be standardized, using `_standardize_name`.

    Returns:
        Tuple of TUID and path to the experiment folder.

    Example:
        >>> tuid, exp_folder = init_exp_folder(name="my_experiment")
    """
    tuid = gen_tuid()
    if standardize_name:
        name = _standardize_name(name)
    return tuid, create_exp_folder(tuid=tuid, name=name)


def save_experiment(
    swept: Union[Quantity, List[Quantity]],
    measured: List[Quantity],
    name: str = "",
    tuid: Optional[TUID] = None,
    attrs: Optional[dict] = None,
    dataset_name: str = "dataset.hdf5",
) -> Tuple[TUID, str]:
    """Save the experiment data using quantify as a backend.

    Args:
        swept: Swept Quantity (or Quantities).
        measured: Measured Quantities.
        name: Name of the experiment folder, default is empty string.
        tuid: TUID of the experiment, if None a new one will be generated.
        attrs: Additional attributes to be saved in the HDF5 file.
        dataset_name: Name of the HDF5 file.

    Returns:
        Tuple of TUID and path to the experiment folder.

    Example:
        >>> step_numbers = Quantity(
                name="eom_step_number",
                label=r"EOM bias voltage optimization",
                values=step_number,
            )
        >>> voltages_applied = Quantity(
                name="eom_voltage_applied",
                label=r"EOM bias voltage optimization",
                unit="V",
                values=voltage_applied,
            )
        >>> powers_measured = Quantity(
                name="eom_power_measured",
                label=r"EOM bias voltage optimization",
                unit="W",
                values=power_measured,
            )

        >>> save_experiment(
                swept=step_numbers,
                measured=[voltages_applied, powers_measured],
                dataset_name="dataset.hdf5",
            )
    """

    if tuid is None:
        tuid = gen_tuid()
        exp_folder = create_exp_folder(tuid=tuid, name=name)
    else:
        exp_folder = locate_experiment_container(tuid=tuid)
        name = exp_folder.split(tuid + "-")[1]

    if not isinstance(swept, list):
        swept = [
            swept,
        ]

    swept_parameters: List[ManualParameter] = []

    for s in swept:
        unit = s.unit
        if s.unit is None:
            unit = " "  # To prevent quantify plotting from adding [ ] to label

        swept_parameters.append(ManualParameter(_standardize_name(s.name), label=s.label, unit=unit))

    measured_parameters: List[ManualParameter] = []

    for m in measured:
        unit = m.unit
        if m.unit is None:
            unit = " "  # To prevent quantify plotting from adding [ ] to label
        measured_parameters.append(ManualParameter(_standardize_name(m.name), label=m.label, unit=unit))

    setpoints = grid_setpoints(setpoints=[s.values for s in swept], settables=swept_parameters)

    dataset = initialize_dataset(swept_parameters, setpoints, measured_parameters)
    dataset = to_gridded_dataset(dataset)

    dataset.attrs["tuid"] = tuid
    dataset.attrs["name"] = name
    dataset.attrs["timestamp"] = unix_timestamp_from_tuid(tuid)

    if attrs is not None:
        dataset.attrs.update(_sanitize_attributes(attrs))

    for i, m in enumerate(measured):
        dataset[f"y{i}"].values = m.values

    write_dataset(path=join(exp_folder, dataset_name), dataset=dataset)
    _logger.info(f"Saved dataset at {join(exp_folder, dataset_name)}")
    return tuid, exp_folder