From 2d204eaa990a376e5c1641903239a5afb5b3e5de Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Thu, 16 May 2024 17:31:45 +0100 Subject: [PATCH 01/13] Account for Conventions property value edge case with 2x'CF-' --- cfdm/read_write/netcdf/netcdfread.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index fe43e867c..dff4934e3 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -1032,7 +1032,9 @@ def read( file_version = None for c in all_conventions: if c.startswith("CF-"): - file_version = c.replace("CF-", "", 1) + # Exclude ambiguous edge cases e.g. CF-1.X/CF-1.Y (seen IRL) + if c.count("CF-") == 1: + file_version = c.replace("CF-", "", 1) elif c.startswith("UGRID-"): # Allow UGRID if it has been specified in Conventions, # regardless of the version of CF. From eafd46400aba0d7caec37efaf6a0d99bd88eecd2 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Fri, 17 May 2024 17:51:00 +0100 Subject: [PATCH 02/13] netcdfread: more robust processing of Conventions prop. w/ regex --- cfdm/read_write/netcdf/netcdfread.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index dff4934e3..f637d98b8 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -1030,18 +1030,23 @@ def read( all_conventions = Conventions.split() file_version = None + valid_version_pattern = re.compile("(\d+.\d+)") for c in all_conventions: - if c.startswith("CF-"): - # Exclude ambiguous edge cases e.g. CF-1.X/CF-1.Y (seen IRL) - if c.count("CF-") == 1: - file_version = c.replace("CF-", "", 1) - elif c.startswith("UGRID-"): + cf_v = re.findall(r"CF-(.*)", c) + u_v = re.findall(r"UGRID-(.*)", c) + cfa_v = re.findall(r"CFA-(.*)", c) + + # Else ambiguous e.g. CF- or CF-1.X/CF-1.Y + if len(cf_v) == 1 and re.findall(valid_version_pattern, cf_v[0]): + file_version = cf_v[0] + elif len(u_v) == 1 and re.findall(valid_version_pattern, u_v[0]): # Allow UGRID if it has been specified in Conventions, # regardless of the version of CF. - g["UGRID_version"] = Version(c.replace("UGRID-", "", 1)) - elif c.startswith("CFA-"): + g["UGRID_version"] = Version(u_v[0]) + elif len(cfa_v) == 1 and re.findall( + valid_version_pattern, cfa_v[0]): g["cfa"] = True - g["CFA_version"] = Version(c.replace("CFA-", "", 1)) + g["CFA_version"] = Version(cfa_v[0]) elif c == "CFA": g["cfa"] = True g["CFA_version"] = Version("0.4") From 408765b8ac42b99e31d851768c643bfed35969f6 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Fri, 17 May 2024 18:15:16 +0100 Subject: [PATCH 03/13] netcdfread: consolidate regex Conventions value processing --- cfdm/read_write/netcdf/netcdfread.py | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index f637d98b8..e12f71427 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -1030,23 +1030,25 @@ def read( all_conventions = Conventions.split() file_version = None - valid_version_pattern = re.compile("(\d+.\d+)") for c in all_conventions: - cf_v = re.findall(r"CF-(.*)", c) - u_v = re.findall(r"UGRID-(.*)", c) - cfa_v = re.findall(r"CFA-(.*)", c) - - # Else ambiguous e.g. CF- or CF-1.X/CF-1.Y - if len(cf_v) == 1 and re.findall(valid_version_pattern, cf_v[0]): - file_version = cf_v[0] - elif len(u_v) == 1 and re.findall(valid_version_pattern, u_v[0]): + # Be particularly strict with the regex to account for ambiguous + # values e.g. CF- or CF-1.X/CF-1.Y. Note that + # the '^' and '$' start and end of string tokens ensure that + # only zero or one match can be found per given string c (hence + # taking group(1) when given conditional is True below is safe). + cf_v = re.search(r"^CF-(\d+.\d+)$", c) + u_v = re.search(r"^UGRID-(\d+.\d+)$", c) + cfa_v = re.search(r"^CFA-(\d+.\d+)$", c) + + if cf_v: + file_version = cf_v.group(1) + elif u_v: # Allow UGRID if it has been specified in Conventions, # regardless of the version of CF. - g["UGRID_version"] = Version(u_v[0]) - elif len(cfa_v) == 1 and re.findall( - valid_version_pattern, cfa_v[0]): + g["UGRID_version"] = Version(u_v.group(1)) + elif cfa_v: g["cfa"] = True - g["CFA_version"] = Version(cfa_v[0]) + g["CFA_version"] = Version(cfa_v.group(1)) elif c == "CFA": g["cfa"] = True g["CFA_version"] = Version("0.4") From 5575cb596a3fbba78d4c2bcb6d0bad484261b903 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Fri, 17 May 2024 18:25:33 +0100 Subject: [PATCH 04/13] netcdfread: prevent SyntaxWarning on invalid escape sequence --- cfdm/read_write/netcdf/netcdfread.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index e12f71427..2f7b8d698 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -1025,7 +1025,7 @@ def read( # If the string contains any commas, it is assumed to be a # comma-separated list. - all_conventions = re.split(",\s*", Conventions) + all_conventions = re.split(r",\s*", Conventions) if all_conventions[0] == Conventions: all_conventions = Conventions.split() From 9512e0ac9868395eb4ac0e93692cf5302a7b4642 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Tue, 21 May 2024 15:17:48 +0100 Subject: [PATCH 05/13] Allow more version ID'ing components in Conventions value regex --- cfdm/read_write/netcdf/netcdfread.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 2f7b8d698..bf18ccb5c 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -1020,9 +1020,10 @@ def read( # ------------------------------------------------------------ # Find the CF version for the file, and the CFA version. + # (See '2.6.1 Identification of Conventions' in the CF Conformance + # document for valid inputs for the 'Conventions' property.) # ------------------------------------------------------------ Conventions = g["global_attributes"].get("Conventions", "") - # If the string contains any commas, it is assumed to be a # comma-separated list. all_conventions = re.split(r",\s*", Conventions) @@ -1032,13 +1033,15 @@ def read( file_version = None for c in all_conventions: # Be particularly strict with the regex to account for ambiguous - # values e.g. CF- or CF-1.X/CF-1.Y. Note that - # the '^' and '$' start and end of string tokens ensure that - # only zero or one match can be found per given string c (hence - # taking group(1) when given conditional is True below is safe). - cf_v = re.search(r"^CF-(\d+.\d+)$", c) - u_v = re.search(r"^UGRID-(\d+.\d+)$", c) - cfa_v = re.search(r"^CFA-(\d+.\d+)$", c) + # values e.g. CF- or CF-1.X/CF-1.Y. Note that: + # * the '^' and '$' start and end of string tokens ensure that + # only zero or one match can be found per given string c; + # * the '(\d+(.\d+)*)' regex ensures a valid input to + # Version(), allowing any level of versioning identifier + # detail e.g. 1.23.34.45.6 (for future-proofing). + cf_v = re.search(r"^CF-(\d+(.\d+)*)$", c) + u_v = re.search(r"^UGRID-(\d+(.\d+)*)$", c) + cfa_v = re.search(r"^CFA-(\d+(.\d+)*)$", c) if cf_v: file_version = cf_v.group(1) From 1a5557ba9165565229a508c414601faefd695833 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Tue, 21 May 2024 22:57:54 +0100 Subject: [PATCH 06/13] Support reading of files w/ conformant Conventions='CF-X.Y.Z-draft' --- cfdm/read_write/netcdf/netcdfread.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index bf18ccb5c..aa2351f09 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -1039,12 +1039,20 @@ def read( # * the '(\d+(.\d+)*)' regex ensures a valid input to # Version(), allowing any level of versioning identifier # detail e.g. 1.23.34.45.6 (for future-proofing). - cf_v = re.search(r"^CF-(\d+(.\d+)*)$", c) - u_v = re.search(r"^UGRID-(\d+(.\d+)*)$", c) - cfa_v = re.search(r"^CFA-(\d+(.\d+)*)$", c) + v_id = r"^{}-(\d+(.\d+)*)$" + cf_v = re.search(v_id.format("CF"), c) + u_v = re.search(v_id.format("UGRID"), c) + cfa_v = re.search(v_id.format("CFA"), c) + + # For the case of CF, also valid is 'CF-X-draft', where X + # is the present but unreleased version, e.g. "CF-1.12-draft". + v_id_draft = v_id[:-2] + "-draft)$" # == + "draft" + v_id[-2:] + cf_v_draft = re.search(v_id_draft.format("CF"), c) if cf_v: file_version = cf_v.group(1) + elif cf_v_draft: + file_version = cf_v_draft.group(1) elif u_v: # Allow UGRID if it has been specified in Conventions, # regardless of the version of CF. From f4918f9e24824236857ba6c40a630027193c6d86 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Tue, 21 May 2024 23:01:21 +0100 Subject: [PATCH 07/13] netcdfread: add test to cover conformant Conventions attr. values --- cfdm/test/test_read_write.py | 49 ++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/cfdm/test/test_read_write.py b/cfdm/test/test_read_write.py index 5e9c0ab8a..df70ec806 100644 --- a/cfdm/test/test_read_write.py +++ b/cfdm/test/test_read_write.py @@ -702,6 +702,55 @@ def test_read_write_string(self): ): self.assertTrue(i.equals(j, verbose=3)) + def test_read_write_Conventions_version_processing(self): + """TODO.""" + f = cfdm.read(self.filename)[0] + + valid_version_ends = ["1.11", "1", "2.30.4"] + invalid_version_ends = ["1.1/1.2", "bad", ".11", ""] + + # Construct single valid values for standards + cf_valid_conv = [f"CF-{v}" for v in valid_version_ends] + # Also valid - see + # http://cfconventions.org/cf-conventions/conformance.html, section + # 2.6.1 ('Identification of Conventions') + cf_valid_conv.append("CF-1.12-draft") + ugrid_valid_conv = [f"UGRID-{v}" for v in valid_version_ends] + cfa_valid_conv = [f"CFA-{v}" for v in valid_version_ends] + other_valid_conv = [f"somestandard-{v}" for v in valid_version_ends] + + # Construct some mixed compound valid values for standards. Reverse + # one list to make version IDs differ on at least one standard and take + # final items of cf_valid_conv to include '-draft' non-trivial value. + zip_valid = list(zip( + cf_valid_conv[1:], ugrid_valid_conv, reversed(cfa_valid_conv), + other_valid_conv + )) + # Only space and comma delimiters are valid (see Conformance doc.) + combinations_comma_delim = [",".join(c) for c in zip_valid] + combinations_space_delim = [" ".join(e) for e in zip_valid] + + all_valid_conv = ( + cf_valid_conv + ugrid_valid_conv + cfa_valid_conv + + other_valid_conv + combinations_comma_delim + + combinations_space_delim + ) + + for set_conv_value in all_valid_conv: + cfdm.write(f, tmpfile) + + # TODO: get the update to Conventions globla attr. working using + # cf instead of netCDF4, for some reason cf setting isn't working. + # + # Open with append mode, just want to update the global attribute + n = netCDF4.Dataset(tmpfile, "a") + n.Conventions = set_conv_value + n.close() + + g = cfdm.read(tmpfile)[0] + self.assertEqual(g.get_property("Conventions"), set_conv_value) + + def test_read_write_Conventions(self): """Test the `Conventions` keyword argument to `write`.""" f = cfdm.read(self.filename)[0] From 8cce328b36f7749dc60a21c00c089a02e1b9cc46 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Tue, 21 May 2024 23:17:05 +0100 Subject: [PATCH 08/13] netcdfread: fix Conventions attr. processing to allow 'X.Y-draft' --- cfdm/read_write/netcdf/netcdfread.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index aa2351f09..f778d3e4f 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -1039,6 +1039,8 @@ def read( # * the '(\d+(.\d+)*)' regex ensures a valid input to # Version(), allowing any level of versioning identifier # detail e.g. 1.23.34.45.6 (for future-proofing). + # See https://packaging.python.org/en/latest/specifications/ + # version-specifiers/ for more on valid input to Version() v_id = r"^{}-(\d+(.\d+)*)$" cf_v = re.search(v_id.format("CF"), c) u_v = re.search(v_id.format("UGRID"), c) @@ -1046,13 +1048,17 @@ def read( # For the case of CF, also valid is 'CF-X-draft', where X # is the present but unreleased version, e.g. "CF-1.12-draft". - v_id_draft = v_id[:-2] + "-draft)$" # == + "draft" + v_id[-2:] + v_id_draft = v_id[:-2] + "-draft)$" # i.e. + "draft" + v_id[-2:] cf_v_draft = re.search(v_id_draft.format("CF"), c) if cf_v: file_version = cf_v.group(1) elif cf_v_draft: - file_version = cf_v_draft.group(1) + # TODO: what should we set when Conventions=X.Y-draft? + # Is it best to set to the X.Y i.e. upcoming version, though + # it only obeys a draft state of that which may be updated + # so that it becoes non-conformant? If so, set as follows: + file_version = cf_v_draft.group(1).rstrip("-draft") elif u_v: # Allow UGRID if it has been specified in Conventions, # regardless of the version of CF. From 8cfc2378435bf9cca4d0b841bae661bab1b31bd9 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Wed, 22 May 2024 11:35:33 +0100 Subject: [PATCH 09/13] netcdfread: escape period to fix Conventions processing regex --- cfdm/read_write/netcdf/netcdfread.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index f778d3e4f..0e0e69a49 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -1036,12 +1036,12 @@ def read( # values e.g. CF- or CF-1.X/CF-1.Y. Note that: # * the '^' and '$' start and end of string tokens ensure that # only zero or one match can be found per given string c; - # * the '(\d+(.\d+)*)' regex ensures a valid input to + # * the regex below ensures a valid input to # Version(), allowing any level of versioning identifier # detail e.g. 1.23.34.45.6 (for future-proofing). # See https://packaging.python.org/en/latest/specifications/ # version-specifiers/ for more on valid input to Version() - v_id = r"^{}-(\d+(.\d+)*)$" + v_id = r"^{}-(\d+(\.\d+)*)$" cf_v = re.search(v_id.format("CF"), c) u_v = re.search(v_id.format("UGRID"), c) cfa_v = re.search(v_id.format("CFA"), c) @@ -1051,6 +1051,7 @@ def read( v_id_draft = v_id[:-2] + "-draft)$" # i.e. + "draft" + v_id[-2:] cf_v_draft = re.search(v_id_draft.format("CF"), c) + print(c, "TESTING AGAINST", cf_v, u_v, cfa_v) if cf_v: file_version = cf_v.group(1) elif cf_v_draft: From 7ddadd0a1fed74da1f7cbacdbad46732994cfe76 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Thu, 30 May 2024 11:14:55 +0100 Subject: [PATCH 10/13] Update to account for cf-convention/discussions/321 findings --- cfdm/read_write/netcdf/netcdfread.py | 12 ------- cfdm/test/test_read_write.py | 51 +++++++++++++++++++++------- 2 files changed, 39 insertions(+), 24 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 0e0e69a49..60dd62f7c 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -1046,20 +1046,8 @@ def read( u_v = re.search(v_id.format("UGRID"), c) cfa_v = re.search(v_id.format("CFA"), c) - # For the case of CF, also valid is 'CF-X-draft', where X - # is the present but unreleased version, e.g. "CF-1.12-draft". - v_id_draft = v_id[:-2] + "-draft)$" # i.e. + "draft" + v_id[-2:] - cf_v_draft = re.search(v_id_draft.format("CF"), c) - - print(c, "TESTING AGAINST", cf_v, u_v, cfa_v) if cf_v: file_version = cf_v.group(1) - elif cf_v_draft: - # TODO: what should we set when Conventions=X.Y-draft? - # Is it best to set to the X.Y i.e. upcoming version, though - # it only obeys a draft state of that which may be updated - # so that it becoes non-conformant? If so, set as follows: - file_version = cf_v_draft.group(1).rstrip("-draft") elif u_v: # Allow UGRID if it has been specified in Conventions, # regardless of the version of CF. diff --git a/cfdm/test/test_read_write.py b/cfdm/test/test_read_write.py index df70ec806..1929d7600 100644 --- a/cfdm/test/test_read_write.py +++ b/cfdm/test/test_read_write.py @@ -702,28 +702,21 @@ def test_read_write_string(self): ): self.assertTrue(i.equals(j, verbose=3)) - def test_read_write_Conventions_version_processing(self): - """TODO.""" + def test_read_write_Conventions_version(self): + """TODO Test the `Conventions`.""" f = cfdm.read(self.filename)[0] - valid_version_ends = ["1.11", "1", "2.30.4"] - invalid_version_ends = ["1.1/1.2", "bad", ".11", ""] - # Construct single valid values for standards + valid_version_ends = ["1.11", "1", "2.30.4"] cf_valid_conv = [f"CF-{v}" for v in valid_version_ends] - # Also valid - see - # http://cfconventions.org/cf-conventions/conformance.html, section - # 2.6.1 ('Identification of Conventions') - cf_valid_conv.append("CF-1.12-draft") ugrid_valid_conv = [f"UGRID-{v}" for v in valid_version_ends] cfa_valid_conv = [f"CFA-{v}" for v in valid_version_ends] other_valid_conv = [f"somestandard-{v}" for v in valid_version_ends] # Construct some mixed compound valid values for standards. Reverse - # one list to make version IDs differ on at least one standard and take - # final items of cf_valid_conv to include '-draft' non-trivial value. + # one list to make version IDs differ on at least one standard. zip_valid = list(zip( - cf_valid_conv[1:], ugrid_valid_conv, reversed(cfa_valid_conv), + cf_valid_conv, reversed(ugrid_valid_conv), cfa_valid_conv, other_valid_conv )) # Only space and comma delimiters are valid (see Conformance doc.) @@ -736,6 +729,8 @@ def test_read_write_Conventions_version_processing(self): combinations_space_delim ) + # Check that valid Conventions version specifications get set as the + # corresponding version on the Conventions property. for set_conv_value in all_valid_conv: cfdm.write(f, tmpfile) @@ -750,6 +745,38 @@ def test_read_write_Conventions_version_processing(self): g = cfdm.read(tmpfile)[0] self.assertEqual(g.get_property("Conventions"), set_conv_value) + invalid_version_ends = ["1.1/1.2", "bad", ".11", ""] + cf_invalid_conv = [f"CF-{v}" for v in invalid_version_ends] + ugrid_invalid_conv = [f"UGRID-{v}" for v in invalid_version_ends] + cfa_invalid_conv = [f"CFA-{v}" for v in invalid_version_ends] + other_invalid_conv = [f"somestandard-{v}" for v in invalid_version_ends] + zip_invalid = zip( + cf_invalid_conv, ugrid_invalid_conv, cfa_invalid_conv, + other_invalid_conv + ) + combinations_comma_delim = [ + ",".join(c) for c in zip_invalid + ] + # TODO add combination with some valid and some invalid... + all_invalid_conv = ( + cf_invalid_conv + ugrid_invalid_conv + cfa_invalid_conv + + other_invalid_conv + combinations_comma_delim + ) + + # Check that invalid version specifications get ignored, so that the + # file is successfully read in, but is given default-logic version. + for set_conv_value in all_invalid_conv: + # cfdm.implementation().get_cf_version() + get_conv_value = f.get_property("Conventions") + cfdm.write(f, tmpfile) + + # TODO: as for equivalent logic in above block + n = netCDF4.Dataset(tmpfile, "a") + n.Conventions = set_conv_value + n.close() + + g = cfdm.read(tmpfile)[0] + self.assertEqual(g.get_property("Conventions"), get_conv_value) def test_read_write_Conventions(self): """Test the `Conventions` keyword argument to `write`.""" From 27a8193a6cee8c31545e0a7b132152a963553ec6 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Thu, 30 May 2024 16:36:06 +0100 Subject: [PATCH 11/13] netcdfread testing: tidy TODOs & include further test cases --- cfdm/test/test_read_write.py | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/cfdm/test/test_read_write.py b/cfdm/test/test_read_write.py index 1929d7600..d29b4bd7c 100644 --- a/cfdm/test/test_read_write.py +++ b/cfdm/test/test_read_write.py @@ -703,7 +703,7 @@ def test_read_write_string(self): self.assertTrue(i.equals(j, verbose=3)) def test_read_write_Conventions_version(self): - """TODO Test the `Conventions`.""" + """Test processing of `Conventions` attribute to field property.""" f = cfdm.read(self.filename)[0] # Construct single valid values for standards @@ -721,7 +721,7 @@ def test_read_write_Conventions_version(self): )) # Only space and comma delimiters are valid (see Conformance doc.) combinations_comma_delim = [",".join(c) for c in zip_valid] - combinations_space_delim = [" ".join(e) for e in zip_valid] + combinations_space_delim = [" ".join(c) for c in zip_valid] all_valid_conv = ( cf_valid_conv + ugrid_valid_conv + cfa_valid_conv + @@ -734,9 +734,7 @@ def test_read_write_Conventions_version(self): for set_conv_value in all_valid_conv: cfdm.write(f, tmpfile) - # TODO: get the update to Conventions globla attr. working using - # cf instead of netCDF4, for some reason cf setting isn't working. - # + # Can't use cfdm to change Conventions property so must use netCDF4 # Open with append mode, just want to update the global attribute n = netCDF4.Dataset(tmpfile, "a") n.Conventions = set_conv_value @@ -754,23 +752,31 @@ def test_read_write_Conventions_version(self): cf_invalid_conv, ugrid_invalid_conv, cfa_invalid_conv, other_invalid_conv ) - combinations_comma_delim = [ - ",".join(c) for c in zip_invalid - ] - # TODO add combination with some valid and some invalid... + bad_combinations_good_delim = [",".join(c) for c in zip_invalid] + # Include valid values with bad (unsupported) delimiters + good_combinations_bad_delim = ["- ".join(c) for c in zip_valid] + all_invalid_conv = ( cf_invalid_conv + ugrid_invalid_conv + cfa_invalid_conv + - other_invalid_conv + combinations_comma_delim + other_invalid_conv + bad_combinations_good_delim + + good_combinations_bad_delim ) + # Include a mixture of valid and invalid version specifiers + some_valid_some_invalid_conv = [ + " ".join(c) for c in zip( + cf_invalid_conv, reversed(ugrid_valid_conv), cfa_invalid_conv, + other_valid_conv) + ] + # Check that invalid version specifications get ignored, so that the # file is successfully read in, but is given default-logic version. - for set_conv_value in all_invalid_conv: - # cfdm.implementation().get_cf_version() + for set_conv_value in ( + all_invalid_conv + some_valid_some_invalid_conv): get_conv_value = f.get_property("Conventions") cfdm.write(f, tmpfile) - # TODO: as for equivalent logic in above block + # Can't use cfdm to change Conventions property so must use netCDF4 n = netCDF4.Dataset(tmpfile, "a") n.Conventions = set_conv_value n.close() From 3548381965ee371a4991c6bc7b70005cb3c527f4 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Tue, 4 Jun 2024 12:57:09 +0100 Subject: [PATCH 12/13] netcdfread: add file_version info. to debug logging --- cfdm/read_write/netcdf/netcdfread.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 60dd62f7c..bb1c8524e 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -1069,6 +1069,11 @@ def read( file_version = self.implementation.get_cf_version() g["file_version"] = Version(file_version) + if is_log_level_debug(logger): + logger.debug( + " Versioning:\n read_vars['file_version'] =" + f"{g['file_version']}" + ) # pragma: no cover # Set minimum/maximum versions for vn in ("1.6", "1.7", "1.8", "1.9", "1.10", "1.11"): From 95baf2af3c6054e331c598da5b056d75bbfdfe99 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Tue, 4 Jun 2024 14:22:02 +0100 Subject: [PATCH 13/13] Test read_vars file_version is set correctly w/ netCDF reading --- cfdm/read_write/read.py | 2 ++ cfdm/test/test_read_write.py | 10 +++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/cfdm/read_write/read.py b/cfdm/read_write/read.py index b96345878..2289be5eb 100644 --- a/cfdm/read_write/read.py +++ b/cfdm/read_write/read.py @@ -18,6 +18,7 @@ def read( mask=True, domain=False, _implementation=_implementation, + _scan_only=False, ): """Read field or domain constructs from a dataset. @@ -335,6 +336,7 @@ def read( mask=mask, domain=domain, extra_read_vars=None, + _scan_only=_scan_only, ) except MaskError: # Some data required for field interpretation is missing, diff --git a/cfdm/test/test_read_write.py b/cfdm/test/test_read_write.py index d29b4bd7c..dc70e0e2d 100644 --- a/cfdm/test/test_read_write.py +++ b/cfdm/test/test_read_write.py @@ -704,6 +704,8 @@ def test_read_write_string(self): def test_read_write_Conventions_version(self): """Test processing of `Conventions` attribute to field property.""" + from packaging.version import Version + f = cfdm.read(self.filename)[0] # Construct single valid values for standards @@ -781,8 +783,14 @@ def test_read_write_Conventions_version(self): n.Conventions = set_conv_value n.close() + self.assertEqual( + cfdm.read(tmpfile, _scan_only=True)["file_version"], + Version(get_conv_value.lstrip("CF-")) + ) g = cfdm.read(tmpfile)[0] - self.assertEqual(g.get_property("Conventions"), get_conv_value) + # TODO: do we want to re-set the Conventions property as well, + # given it is invalid? + self.assertEqual(g.get_property("Conventions"), set_conv_value) def test_read_write_Conventions(self): """Test the `Conventions` keyword argument to `write`."""