diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index fe43e867c..bb1c8524e 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -1020,26 +1020,41 @@ def read( # ------------------------------------------------------------ # Find the CF version for the file, and the CFA version. + # (See '2.6.1 Identification of Conventions' in the CF Conformance + # document for valid inputs for the 'Conventions' property.) # ------------------------------------------------------------ Conventions = g["global_attributes"].get("Conventions", "") - # If the string contains any commas, it is assumed to be a # comma-separated list. - all_conventions = re.split(",\s*", Conventions) + all_conventions = re.split(r",\s*", Conventions) if all_conventions[0] == Conventions: all_conventions = Conventions.split() file_version = None for c in all_conventions: - if c.startswith("CF-"): - file_version = c.replace("CF-", "", 1) - elif c.startswith("UGRID-"): + # Be particularly strict with the regex to account for ambiguous + # values e.g. CF-<badversionformat> or CF-1.X/CF-1.Y. Note that: + # * the '^' and '$' start and end of string tokens ensure that + # only zero or one match can be found per given string c; + # * the regex below ensures a valid input to + # Version(), allowing any level of versioning identifier + # detail e.g. 1.23.34.45.6 (for future-proofing). + # See https://packaging.python.org/en/latest/specifications/ + # version-specifiers/ for more on valid input to Version() + v_id = r"^{}-(\d+(\.\d+)*)$" + cf_v = re.search(v_id.format("CF"), c) + u_v = re.search(v_id.format("UGRID"), c) + cfa_v = re.search(v_id.format("CFA"), c) + + if cf_v: + file_version = cf_v.group(1) + elif u_v: # Allow UGRID if it has been specified in Conventions, # regardless of the version of CF. - g["UGRID_version"] = Version(c.replace("UGRID-", "", 1)) - elif c.startswith("CFA-"): + g["UGRID_version"] = Version(u_v.group(1)) + elif cfa_v: g["cfa"] = True - g["CFA_version"] = Version(c.replace("CFA-", "", 1)) + g["CFA_version"] = Version(cfa_v.group(1)) elif c == "CFA": g["cfa"] = True g["CFA_version"] = Version("0.4") @@ -1054,6 +1069,11 @@ def read( file_version = self.implementation.get_cf_version() g["file_version"] = Version(file_version) + if is_log_level_debug(logger): + logger.debug( + " Versioning:\n read_vars['file_version'] =" + f"{g['file_version']}" + ) # pragma: no cover # Set minimum/maximum versions for vn in ("1.6", "1.7", "1.8", "1.9", "1.10", "1.11"): diff --git a/cfdm/read_write/read.py b/cfdm/read_write/read.py index b96345878..2289be5eb 100644 --- a/cfdm/read_write/read.py +++ b/cfdm/read_write/read.py @@ -18,6 +18,7 @@ def read( mask=True, domain=False, _implementation=_implementation, + _scan_only=False, ): """Read field or domain constructs from a dataset. @@ -335,6 +336,7 @@ def read( mask=mask, domain=domain, extra_read_vars=None, + _scan_only=_scan_only, ) except MaskError: # Some data required for field interpretation is missing, diff --git a/cfdm/test/test_read_write.py b/cfdm/test/test_read_write.py index 5e9c0ab8a..dc70e0e2d 100644 --- a/cfdm/test/test_read_write.py +++ b/cfdm/test/test_read_write.py @@ -702,6 +702,96 @@ def test_read_write_string(self): ): self.assertTrue(i.equals(j, verbose=3)) + def test_read_write_Conventions_version(self): + """Test processing of `Conventions` attribute to field property.""" + from packaging.version import Version + + f = cfdm.read(self.filename)[0] + + # Construct single valid values for standards + valid_version_ends = ["1.11", "1", "2.30.4"] + cf_valid_conv = [f"CF-{v}" for v in valid_version_ends] + ugrid_valid_conv = [f"UGRID-{v}" for v in valid_version_ends] + cfa_valid_conv = [f"CFA-{v}" for v in valid_version_ends] + other_valid_conv = [f"somestandard-{v}" for v in valid_version_ends] + + # Construct some mixed compound valid values for standards. Reverse + # one list to make version IDs differ on at least one standard. + zip_valid = list(zip( + cf_valid_conv, reversed(ugrid_valid_conv), cfa_valid_conv, + other_valid_conv + )) + # Only space and comma delimiters are valid (see Conformance doc.) + combinations_comma_delim = [",".join(c) for c in zip_valid] + combinations_space_delim = [" ".join(c) for c in zip_valid] + + all_valid_conv = ( + cf_valid_conv + ugrid_valid_conv + cfa_valid_conv + + other_valid_conv + combinations_comma_delim + + combinations_space_delim + ) + + # Check that valid Conventions version specifications get set as the + # corresponding version on the Conventions property. + for set_conv_value in all_valid_conv: + cfdm.write(f, tmpfile) + + # Can't use cfdm to change Conventions property so must use netCDF4 + # Open with append mode, just want to update the global attribute + n = netCDF4.Dataset(tmpfile, "a") + n.Conventions = set_conv_value + n.close() + + g = cfdm.read(tmpfile)[0] + self.assertEqual(g.get_property("Conventions"), set_conv_value) + + invalid_version_ends = ["1.1/1.2", "bad", ".11", ""] + cf_invalid_conv = [f"CF-{v}" for v in invalid_version_ends] + ugrid_invalid_conv = [f"UGRID-{v}" for v in invalid_version_ends] + cfa_invalid_conv = [f"CFA-{v}" for v in invalid_version_ends] + other_invalid_conv = [f"somestandard-{v}" for v in invalid_version_ends] + zip_invalid = zip( + cf_invalid_conv, ugrid_invalid_conv, cfa_invalid_conv, + other_invalid_conv + ) + bad_combinations_good_delim = [",".join(c) for c in zip_invalid] + # Include valid values with bad (unsupported) delimiters + good_combinations_bad_delim = ["- ".join(c) for c in zip_valid] + + all_invalid_conv = ( + cf_invalid_conv + ugrid_invalid_conv + cfa_invalid_conv + + other_invalid_conv + bad_combinations_good_delim + + good_combinations_bad_delim + ) + + # Include a mixture of valid and invalid version specifiers + some_valid_some_invalid_conv = [ + " ".join(c) for c in zip( + cf_invalid_conv, reversed(ugrid_valid_conv), cfa_invalid_conv, + other_valid_conv) + ] + + # Check that invalid version specifications get ignored, so that the + # file is successfully read in, but is given default-logic version. + for set_conv_value in ( + all_invalid_conv + some_valid_some_invalid_conv): + get_conv_value = f.get_property("Conventions") + cfdm.write(f, tmpfile) + + # Can't use cfdm to change Conventions property so must use netCDF4 + n = netCDF4.Dataset(tmpfile, "a") + n.Conventions = set_conv_value + n.close() + + self.assertEqual( + cfdm.read(tmpfile, _scan_only=True)["file_version"], + Version(get_conv_value.lstrip("CF-")) + ) + g = cfdm.read(tmpfile)[0] + # TODO: do we want to re-set the Conventions property as well, + # given it is invalid? + self.assertEqual(g.get_property("Conventions"), set_conv_value) + def test_read_write_Conventions(self): """Test the `Conventions` keyword argument to `write`.""" f = cfdm.read(self.filename)[0]