Commit b8da58b3 authored by Anne Hommelberg's avatar Anne Hommelberg

Add NetCDFMixin

Adds a NetCDFMixin to import and export data to
and from NetCDF files.
parent eb5e4b7f
This diff is collapsed.
import logging
import os
import rtctools.data.netcdf as netcdf
from rtctools.data import rtc
from rtctools.optimization.io_mixin import IOMixin
logger = logging.getLogger("rtctools")
# todo add support for ensembles
class NetCDFMixin(IOMixin):
"""
Adds NetCDF I/O to your optimization problem.
During preprocessing, a file named timeseries_import.nc is read from the ``input`` subfolder.
During postprocessing a file named timeseries_export.nc is written to the ``output`` subfolder.
Both the input and output nc files are expected to follow the FEWS format for scalar data in a Netcdf file, i.e.:
- They must contain a variable with the station id's (location id's) which can be recognized by the attribute
'cf_role' set to 'timeseries_id'.
- They must contain a time variable with attributes 'standard_name' = 'time' and 'axis' = 'T'
From the input file, all 2d variables with dimensions equal to the station id's and time variable are read.
To determine the rtc-tools variable name, the NetCDF mixin uses the station id (location id) and name of the
timeseries variable in the file (parameter). An rtcDataConfig.xml file can be given in the input folder to
configure variable names for specific location and parameter combinations. If this file is present, and contains
a configured variable name for a read timeseries, this variable name will be used. If the file is present, but does
not contain a configured variable name, a default variable name is constructed and a warning is given to alert the
user that the current rtcDataConfig may contain a mistake. To suppress this warning if this is intentional, set the
check_missing_variable_names attribute to False. Finally, if no file is present, the default variable name will
always be used, and no warnings will be given. With debug logging enabled, the NetCDF mixin will report the chosen
variable name for each location and parameter combination.
To construct the default variable name, the station id is concatenated with the name of the variable in the NetCDF
file, separted by the location_parameter_delimeter (set to a double underscore - '__' - by default). For example,
if a NetCDF file contains two stations 'loc_1' and 'loc_2', and a timeseries variable called 'water_level', this
will result in two rtc-tools variables called 'loc_1__water_level' and 'loc_2__water_level' (with the default
location_parameter_delimiter of '__').
:cvar location_parameter_delimiter:
Delimiter used between location and parameter id when constructing the variable name.
:cvar check_missing_variable_names:
Warn if an rtcDataConfig.xml file is given but does not contain a variable name for a read timeseries.
Default is ``True``
:cvar netcdf_validate_timeseries:
Check consistency of timeseries. Default is ``True``
"""
#: Delimiter used between location and parameter id when constructing the variable name.
location_parameter_delimiter = '__'
#: Warn if an rtcDataConfig.xml file is given but does not contain a variable name for a read timeseries.
check_missing_variable_names = True
#: Check consistency of timeseries.
netcdf_validate_timeseries = True
def __init__(self, **kwargs):
# call parent class for default behaviour
super().__init__(**kwargs)
path = os.path.join(self._input_folder, "rtcDataConfig.xml")
self.__data_config = rtc.DataConfig(self._input_folder) if os.path.isfile(path) else None
def read(self):
# Call parent class first for default behaviour
super().read()
dataset = netcdf.ImportDataset(self._input_folder, self.timeseries_import_basename)
# convert and store the import times
self.__import_datetimes = dataset.read_import_times()
times = self.io.datetime_to_sec(self.__import_datetimes, self.__import_datetimes[self.io.get_forecast_index()])
self.io.set_times(times)
if self.netcdf_validate_timeseries:
# check if strictly increasing
for i in range(len(times) - 1):
if times[i] >= times[i + 1]:
raise Exception('NetCDFMixin: Time stamps must be strictly increasing.')
self.__dt = times[1] - times[0] if len(times) >= 2 else 0
for i in range(len(times) - 1):
if times[i + 1] - times[i] != self.__dt:
self.__dt = None
break
# store the station data for later use
self.__stations = dataset.read_station_data()
# read all available timeseries from the dataset
timeseries_var_keys = dataset.find_timeseries_variables()
# todo add support for ensembles
for parameter in timeseries_var_keys:
for i, location_id in enumerate(self.__stations.station_ids):
default_name = location_id + self.location_parameter_delimiter + parameter
if self.__data_config is not None:
try:
name = self.__data_config.parameter(parameter, location_id)
except KeyError:
if self.check_missing_variable_names:
logger.warning('No configured variable name found in rtcDataConfig.xml for location id "{}"'
' and parameter id "{}", using default variable name "{}" instead. '
'(To suppress this warning set check_missing_variable_names to False.)'
.format(location_id, parameter, default_name))
name = default_name
else:
name = default_name
values = dataset.read_timeseries_values(i, parameter)
self.io.set_timeseries_values(name, values)
logger.debug('Read timeseries data for location id "{}" and parameter "{}", '
'stored under variable name "{}"'
.format(location_id, parameter, name))
logger.debug("NetCDFMixin: Read timeseries")
def write(self):
dataset = netcdf.ExportDataset(self._output_folder, self.timeseries_export_basename)
times = self.times()
forecast_index = self.io.get_forecast_index()
dataset.write_times(times, self.initial_time, self.__import_datetimes[forecast_index])
output_variables = [sym.name() for sym in self.output_variables]
output_location_parameter_ids = {var_name: self.extract_station_id(var_name) for var_name in output_variables}
output_station_ids = {loc_par[0] for loc_par in output_location_parameter_ids.values()}
dataset.write_station_data(self.__stations, output_station_ids)
output_parameter_ids = {loc_par[1] for loc_par in output_location_parameter_ids.values()}
dataset.create_variables(output_parameter_ids)
for ensemble_member in range(self.ensemble_size):
results = self.extract_results(ensemble_member)
for var_name in output_variables:
# determine the output values
try:
values = results[var_name]
if len(values) != len(times):
values = self.interpolate(
times, self.times(var_name), values, self.interpolation_method(var_name))
except KeyError:
try:
ts = self.get_timeseries(var_name, ensemble_member)
if len(ts.times) != len(times):
values = self.interpolate(
times, ts.times, ts.values)
else:
values = ts.values
except KeyError:
logger.error(
'NetCDFMixin: Output requested for non-existent variable {}. '
'Will not be in output file.'.format(var_name))
continue
# determine where to put this output
location_parameter_id = output_location_parameter_ids[var_name]
location_id = location_parameter_id[0]
parameter_id = location_parameter_id[1]
dataset.write_output_values(location_id, parameter_id, values)
dataset.close()
def extract_station_id(self, variable_name: str) -> tuple:
"""
Returns the station id corresponding to the given RTC-Tools variable name.
:param variable_name: The name of the RTC-Tools variable
:return: the station id
"""
try:
return self.__data_config.pi_variable_ids(variable_name)[:2]
except KeyError:
return tuple(variable_name.split(self.location_parameter_delimiter))
@property
def equidistant(self):
return self.__dt is not None
import os
from datetime import datetime, timedelta
from unittest import TestCase
from netCDF4 import Dataset
import numpy as np
import rtctools.data.netcdf as netcdf
from .data_path import data_path
class TestImportDataset(TestCase):
def setUp(self):
self.dataset = netcdf.ImportDataset(data_path(), 'timeseries_import')
def test_init(self):
time_var = self.dataset.time_variable
self.assertEqual(time_var._name, 'time')
self.assertEqual(time_var.standard_name, 'time')
self.assertEqual(time_var.long_name, 'time')
self.assertEqual(time_var.axis, 'T')
self.assertEqual(time_var.units, 'minutes since 1970-01-01 00:00:00.0 +0000')
station_var = self.dataset.station_variable
self.assertEqual(station_var._name, 'station_id')
self.assertEqual(station_var.long_name, 'station identification code')
self.assertEqual(station_var.cf_role, 'timeseries_id')
def test_read_times(self):
datetimes = self.dataset.read_import_times()
forecast_datetime = datetime(2013, 1, 15)
expected_datetimes = [forecast_datetime + timedelta(hours=i) for i in range(25)]
self.assertTrue(np.array_equal(datetimes, expected_datetimes))
def test_find_timeseries_variables(self):
variables = self.dataset.find_timeseries_variables()
self.assertEqual(variables, ['waterlevel'])
def test_stations(self):
stations = self.dataset.read_station_data()
ids = stations.station_ids
self.assertEqual(len(ids), 3)
self.assertTrue('LocA' in ids)
self.assertTrue('LocB' in ids)
self.assertTrue('LocC' in ids)
for id in ids:
read_attributes = stations.attributes[id].keys()
self.assertTrue(len(read_attributes), 5)
self.assertTrue('lat' in read_attributes)
self.assertTrue('lon' in read_attributes)
self.assertTrue('x' in read_attributes)
self.assertTrue('y' in read_attributes)
self.assertTrue('z' in read_attributes)
self.assertEqual(stations.attributes['LocA']['lat'], 53.0)
class TestExportDataset(TestCase):
def get_exported_dataset(self):
filename = os.path.join(
data_path(),
'timeseries_export.nc'
)
return Dataset(filename)
def setUp(self):
self.dataset = netcdf.ExportDataset(data_path(), 'timeseries_export')
def test_write_times(self):
times = np.array([-120, -300, -60, 300, 360])
self.dataset.write_times(times, -180.0, datetime(2018, 12, 21, 17, 30))
self.dataset.close()
dataset = self.get_exported_dataset()
self.assertTrue('time' in dataset.variables)
time_var = dataset.variables['time']
self.assertEqual(time_var.units, 'seconds since 2018-12-21 17:28:00')
self.assertEqual(time_var.axis, 'T')
self.assertEqual(time_var.standard_name, 'time')
self.assertTrue(np.array_equal(time_var[:], times + 300))
# todo create tests for write_station_data, create_variables and write_output_values
model NetcdfModel
Real loc_a__x(start=1.1);
Real loc_a__w(start=0.0);
Real alias;
parameter Real k = 1.0;
input Real loc_b__u(fixed=false);
output Real loc_c__y;
output Real loc_a__z;
input Real loc_a__x_delayed(fixed=false);
output Real loc_c__switched;
input Real loc_a__constant_input(fixed=true);
output Real loc_a__constant_output;
equation
der(loc_a__x) = k * loc_a__x + loc_b__u;
der(loc_a__w) = loc_a__x;
alias = loc_a__x;
loc_c__y + loc_a__x = 3.0;
loc_a__z = alias^2 + sin(time);
loc_a__x_delayed = delay(loc_a__x, 0.1);
if loc_a__x > 0.5 then
loc_c__switched = 1.0;
else
loc_c__switched = 2.0;
end if;
loc_a__constant_output = loc_a__constant_input;
end NetcdfModel;
\ No newline at end of file
import os
from unittest import TestCase
from netCDF4 import Dataset, chartostring
import numpy as np
import numpy.ma as ma
from rtctools.optimization.collocated_integrated_optimization_problem import CollocatedIntegratedOptimizationProblem
from rtctools.optimization.modelica_mixin import ModelicaMixin
from rtctools.optimization.netcdf_mixin import NetCDFMixin
from .data_path import data_path
class NetcdfModel(NetCDFMixin, ModelicaMixin, CollocatedIntegratedOptimizationProblem):
def __init__(self):
super().__init__(
input_folder=data_path(),
output_folder=data_path(),
model_name="NetcdfModel",
model_folder=data_path()
)
def read(self):
super().read()
# just add the parameters ourselves for now (values taken from test_pi_mixin)
params = {'k': 1.01, 'x': 1.02, 'SV_V_y': 22.02, 'j': 12.01, 'b': 13.01, 'y': 12.02, 'SV_H_y': 22.02}
for key, value in params.items():
self.io.set_parameter(key, value)
def objective(self, ensemble_member):
# Quadratic penalty on state 'x' at final time
xf = self.state_at("loc_a__x", self.times()[-1])
f = xf ** 2
return f
def constraints(self, ensemble_member):
# No additional constraints
return []
def compiler_options(self):
compiler_options = super().compiler_options()
compiler_options["cache"] = False
return compiler_options
class TestNetCDFMixin(TestCase):
def setUp(self):
self.problem = NetcdfModel()
self.tolerance = 1e-5
def test_read(self):
self.problem.read()
datastore = self.problem.io
self.assertTrue(np.all(datastore.get_timeseries_values('loc_a__u_min') == -3.0))
self.assertTrue(np.all(datastore.get_timeseries_values('loc_b__u_min') == -2.0))
self.assertTrue(np.all(datastore.get_timeseries_values('loc_a__u_max') == 3.0))
self.assertTrue(np.all(datastore.get_timeseries_values('loc_b__u_max') == 2.0))
expected_values = np.zeros((22,), dtype=float)
expected_values[0] = 1.02
expected_values[2] = 0.03
self.assertTrue(np.array_equal(datastore.get_timeseries_values('loc_a__x'), expected_values))
self.assertTrue(np.all(np.isnan(datastore.get_timeseries_values('loc_b__x'))))
expected_values = np.zeros((22,), dtype=float)
expected_values[2] = 0.03
self.assertTrue(np.array_equal(datastore.get_timeseries_values('loc_a__w'), expected_values))
self.assertTrue(np.all(np.isnan(datastore.get_timeseries_values('loc_b__w'))))
self.assertTrue(np.all(datastore.get_timeseries_values('loc_a__constant_input') == 1.0))
self.assertTrue(np.all(datastore.get_timeseries_values('loc_b__constant_input') == 1.5))
def test_write(self):
self.problem.optimize()
self.results = self.problem.extract_results()
# open the exported file
filename = os.path.join(
data_path(),
self.problem.timeseries_export_basename + ".nc"
)
dataset = Dataset(filename)
written_variables = dataset.variables.keys()
self.assertEqual(len(written_variables), 10)
self.assertTrue('time' in written_variables)
self.assertTrue('station_id' in written_variables)
self.assertTrue('lon' in written_variables)
self.assertTrue('lat' in written_variables)
self.assertTrue('y' in written_variables)
self.assertTrue('constant_output' in written_variables)
self.assertTrue('u' in written_variables)
self.assertTrue('z' in written_variables)
self.assertTrue('switched' in written_variables)
self.assertTrue('x_delayed' in written_variables)
ids_var = dataset.variables['station_id']
self.assertEqual(ids_var.shape, (3, 5))
self.assertEqual(ids_var.cf_role, 'timeseries_id')
station_ids = []
for i in range(3):
station_ids.append(str(chartostring(ids_var[i])))
self.assertTrue('loc_a'in station_ids)
self.assertTrue('loc_b' in station_ids)
self.assertTrue('loc_c' in station_ids)
# order of location ids is random each time the test runs...
loc_a_index = station_ids.index('loc_a')
loc_b_index = station_ids.index('loc_b')
loc_c_index = station_ids.index('loc_c')
self.assertAlmostEqual(dataset.variables['lon'][loc_a_index], 4.3780269, delta=self.tolerance)
y = dataset.variables['y']
self.assertEqual(y.shape, (22, 3))
for i in range(3):
data = ma.filled(y[:, i], np.nan)
if i == loc_c_index:
self.assertAlmostEqual(data[0], 1.98, delta=self.tolerance)
for j in range(1, 22):
self.assertAlmostEqual(data[j], 3.0, delta=self.tolerance)
else:
self.assertTrue(np.all(np.isnan(data)))
u = dataset.variables['u']
self.assertEqual(u.shape, (22, 3))
for i in range(3):
data = ma.filled(u[:, i], np.nan)
if i == loc_b_index:
self.assertTrue(np.all(~np.isnan(data)))
else:
self.assertTrue(np.all(np.isnan(data)))
constant_output = dataset.variables['constant_output']
self.assertEqual(constant_output.shape, (22, 3))
for i in range(3):
data = ma.filled(constant_output[:, i], np.nan)
if i == loc_a_index:
self.assertTrue(np.all(data == 1.0))
else:
self.assertTrue(np.all(np.isnan(data)))
time = dataset.variables['time']
self.assertEqual(time.units, 'seconds since 2013-05-09 22:00:00')
self.assertEqual(time.standard_name, 'time')
self.assertEqual(time.axis, 'T')
self.assertTrue(np.allclose(time[:], np.arange(0, 22*3600, 3600, dtype=float)))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment