String to integer conversion
Describe the bug
Some of the CMIP6 data has branch time metadata with e.g. trailing full stops or written as '149749.0D'. This causes a failure in e.g. https://gitlab.com/netcdf-scm/netcdf-scm/-/blob/master/src/netcdf_scm/stitching.py#L598.
Failing Test
@pytest.mark.parametrize("infile", (
# requires some path mangling and crunching first
"CMIP6/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r1i1p1f1/Amon/tas/gr/v20200310/tas_Amon_EC-Earth3_historical_r1i1p1f1_gr_185001-185012.nc", # branch_time_in_parent = "149749.0D"
"CMIP6/ScenarioMIP/EC-Earth-Consortium/EC-Earth3/ssp370/r4i1p1f1/Amon/tas/gr/v20200425/tas_Amon_EC-Earth3_ssp370_r4i1p1f1_gr_201501-201512.nc", # branch_time_in_parent = "60265."
"CMIP6/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r11i1p1f1/Amon/tas/gr/v20200201/tas_Amon_EC-Earth3_historical_r11i1p1f1_gr_184912-185012.nc", # branch_time_in_parent = 0.
))
def test_stitching_dangerous_strings_issue_62(infile):
(
scmrun,
picontrol_branching_time,
picontrol_file,
) = netcdf_scm.stitching.get_continuous_timeseries_with_meta(infile, drs="CMIP6Output")
picontrol_scmrun = netcdf_scm.io.load_scmrun(picontrol_file)
normaliser = netcdf_scm.normalisation.NormaliserRunningMean(nyears=nyears)
normalised = normaliser.normalise_against_picontrol(
scmrun, picontrol_scmrun, picontrol_branching_time
)
We'd probably also want something like
@pytest.mark.parametrize("in_string,exp_int", (
("12", 12),
("12.0", 12),
("12.1", None),
("149749.0D", 14979),
("149749.1D", None),
("0", 0),
("0.0", 0),
("0.1", None),
))
def test_str_to_int(in_string, exp_int, valid):
if exp_int is not None:
res = _str_to_int(in_string)
assert isinstance(res, int)
assert res == exp_int
else:
error_msg = re.escape("Cannot convert str to int: {}".format(in_string))
with pytest.raises(ValueError, match=error_msg):
_str_to_int(in_string)
Expected behaviour
We should be able to handle some of these more exotic ways of writing the branch times.