-
Notifications
You must be signed in to change notification settings - Fork 286
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
POC: Handle std names and aliases (#5257) #5313
base: main
Are you sure you want to change the base?
Changes from all commits
d8a4325
3adf5df
e94d654
dc98905
7b5467c
1ab8fd9
235ac02
d2216e7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For consistency with existing Iris code, we believe a new attribute should be introduced: This gives users the level of information and control that they are used to.
|
if standard_name is not None: | |
try: | |
cube.standard_name = _get_valid_standard_name(standard_name) | |
except ValueError: | |
if cube.long_name is not None: | |
cube.attributes["invalid_standard_name"] = standard_name | |
else: | |
cube.long_name = standard_name |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
# Copyright Iris contributors | ||
# | ||
# This file is part of Iris and is released under the LGPL license. | ||
# See COPYING and COPYING.LESSER in the root of the repository for full | ||
# licensing details. | ||
""" | ||
Handling of standard names and standard name aliases. | ||
""" | ||
|
||
import warnings | ||
|
||
import iris.std_names | ||
|
||
|
||
def get_convention(): | ||
"""Return the 'Conventions' string of the CF standard name table.""" | ||
try: | ||
convention = iris.std_names.CONVENTIONS_STRING | ||
except AttributeError: | ||
convention = None | ||
return convention | ||
|
||
|
||
def set_alias_processing(mode): | ||
""" | ||
Set how standard name aliases are handled. | ||
|
||
Arg: | ||
|
||
* mode `string` specifying handling: | ||
'accept' - aliases are handled as any other standard name, | ||
'warn' - as above, but a warning is issued, | ||
'replace' - aliased standard names are replaced with the current one. | ||
""" | ||
if not hasattr(iris.std_names, "ALIASES"): | ||
raise ValueError("The standard name table has no aliases defined.") | ||
if mode == "default": | ||
iris.std_names._MODE = iris.std_names._DEFAULT | ||
elif mode in iris.std_names._ALTERNATIVE_MODES: | ||
iris.std_names._MODE = mode | ||
else: | ||
raise ValueError( | ||
"{!r} is not a valid alternative for processing " | ||
"of standard name aliases.".format(mode) | ||
) | ||
|
||
|
||
def get_description(name): | ||
""" | ||
Return the standard name description as a `string`. | ||
|
||
Arg: | ||
|
||
* name `string` containing the standard name. | ||
""" | ||
if not hasattr(iris.std_names, "DESCRIPTIONS"): | ||
return None | ||
|
||
error = False | ||
if name in iris.std_names.STD_NAMES: | ||
descr = iris.std_names.DESCRIPTIONS[name] | ||
elif hasattr(iris.std_names, "ALIASES"): | ||
if name in iris.std_names.ALIASES: | ||
descr = iris.std_names.DESCRIPTIONS[iris.std_names.ALIASES[name]] | ||
if iris.std_names._MODE == iris.std_names._REPLACE: | ||
msg = ( | ||
"\nStandard name {!r} is aliased and is \nreplaced by {!r}.\n" | ||
"The description for the latter will be used." | ||
) | ||
warnings.warn(msg.format(name, iris.std_names.ALIASES[name])) | ||
else: | ||
error = True | ||
else: | ||
error = True | ||
|
||
if error: | ||
raise ValueError("{!r} is not a valid standard name.".format(name)) | ||
return descr | ||
|
||
|
||
def check_valid_std_name(name): | ||
""" | ||
Check and return if argument is a valid standard name or alias. | ||
|
||
Arg: | ||
|
||
* name `string` containing the prospective standard name. | ||
|
||
Depending on the setting of the alias proceessing the following will | ||
happen if 'name' is an aliased standard name: | ||
"accept" - the aliased standard name is accepted as valid and returned, | ||
"warn" - a warning is issued, otherwise the same as "accept", | ||
"replace" - the valid standard name is returned without warning. | ||
|
||
When 'name' is neither a standard name nor an alias an error results. | ||
""" | ||
error = False | ||
if name in iris.std_names.STD_NAMES: | ||
std_name = name | ||
elif hasattr(iris.std_names, "ALIASES"): | ||
if name in iris.std_names.ALIASES: | ||
if iris.std_names._MODE == iris.std_names._REPLACE: | ||
std_name = iris.std_names.ALIASES[name] | ||
else: | ||
std_name = name | ||
if iris.std_names._MODE == iris.std_names._WARN: | ||
msg = "\nThe standard name {!r} is aliased should be \nreplaced by {!r}." | ||
warnings.warn( | ||
msg.format(name, iris.std_names.ALIASES[name]) | ||
) | ||
else: | ||
error = True | ||
else: | ||
error = True | ||
|
||
if error: | ||
raise ValueError("{!r} is not a valid standard_name.".format(name)) | ||
return std_name |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,8 +7,9 @@ | |
A script to convert the standard names information from the provided XML | ||
file into a Python dictionary format. | ||
|
||
Takes two arguments: the first is the XML file to process and the second | ||
is the name of the file to write the Python dictionary file into. | ||
Takes two or three arguments: the first is the XML file to process and the second | ||
is the name of the file to write the Python dictionary file into. The optional | ||
third argument, '--descr', includes the standard name descriptions in the file. | ||
|
||
By default, Iris will use the source XML file: | ||
etc/cf-standard-name-table.xml | ||
|
@@ -20,23 +21,30 @@ | |
""" | ||
|
||
import argparse | ||
import pprint | ||
import xml.etree.ElementTree as ET | ||
|
||
|
||
STD_VALUES_FILE_TEMPLATE = ''' | ||
STD_NAME_TABLE_FILE_TEMPLATE = ''' | ||
# Copyright Iris contributors | ||
# | ||
# This file is part of Iris and is released under the LGPL license. | ||
# See COPYING and COPYING.LESSER in the root of the repository for full | ||
# licensing details. | ||
""" | ||
This file contains a dictionary of standard value names that are mapped | ||
to another dictionary of other standard name attributes. Currently only | ||
the `canonical_unit` exists in these attribute dictionaries. | ||
|
||
This file is automatically generated. Do not edit this file by hand. | ||
|
||
The file contains the following elements, formatted as python code: | ||
* A few variablles used internally in the standard name processing. | ||
These beginn with an underscore. | ||
* Information on the source standard name table version. | ||
* A dictionary of standard value names that are mapped | ||
to another dictionary of other standard name attributes. | ||
Currently only the `canonical_unit` exists in these attribute | ||
dictionaries. | ||
* A dictionary of aliased standard names that are mapped to the | ||
current standad name. | ||
* Optionally, a dictionary of standard names mapped to their descriptions. | ||
|
||
The file will be generated during a standard build/installation:: | ||
|
||
python setup.py build | ||
|
@@ -49,10 +57,17 @@ | |
Or for more control (e.g. to use an alternative XML file) via:: | ||
|
||
python tools/generate_std_names.py XML_FILE MODULE_FILE | ||
|
||
""" | ||
'''.lstrip() | ||
|
||
|
||
def found_or_none(elem): | ||
return elem.text if elem is not None else None | ||
|
||
|
||
STD_NAMES = '''.lstrip() | ||
# Take care of inconsistent quotes in standard name descriptions. | ||
def replace_quote(txt): | ||
return txt.replace('"', "'") if txt is not None else None | ||
|
||
|
||
def process_name_table(tree, element_name, *child_elements): | ||
|
@@ -62,32 +77,72 @@ def process_name_table(tree, element_name, *child_elements): | |
""" | ||
for elem in tree.iterfind(element_name): | ||
sub_section = {} | ||
|
||
for child_elem in child_elements: | ||
found_elem = elem.find(child_elem) | ||
sub_section[child_elem] = found_elem.text if found_elem is not None else None | ||
|
||
sub_section[child_elem] = found_or_none(elem.find(child_elem)) | ||
yield {elem.get("id") : sub_section} | ||
|
||
|
||
def to_dict(infile, outfile): | ||
values = {} | ||
aliases = {} | ||
|
||
def prettydict(outfile, varname, data): | ||
"""Pretty formatted output of the data (dict) assigned to the variable 'varname'.""" | ||
outfile.write(f'{varname} = {{\n') | ||
for k, v in dict(sorted(data.items())).items(): | ||
outfile.write(f' "{k}": "{v}",\n') | ||
outfile.write("}\n\n") | ||
|
||
|
||
def decode_version(outfile, tree): | ||
"""Decode the version information in the xml header information.""" | ||
version = {} | ||
for elem in ["table_name", "version_number", "last_modified", "institution", "contact"]: | ||
version[elem] = found_or_none(tree.find(elem)) | ||
if version["table_name"] is None: | ||
if (version["institution"] == "Centre for Environmental Data Analysis" | ||
and version["contact"] == "[email protected]"): | ||
version["table_name"] = "CF-StdNameTable" | ||
else: | ||
version["table_name"] = "USER-StdNameTable" | ||
prettydict(outfile, "VERSION", version) | ||
version_string = "-".join(version[k] for k in ["table_name", "version_number"]) | ||
outfile.write(f'CONVENTIONS_STRING = "{version_string}"\n\n') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note that this was sorted in #5423, albeit in a different way. Presumably no longer necessary in this PR? |
||
|
||
|
||
def write_useful_variables(outfile): | ||
outfile.write( | ||
'\n# The following variables are used for processing the standard names information below\n' | ||
'_ACCEPT = "accept"\n' | ||
'_WARN = "warn"\n' | ||
'_REPLACE ="replace"\n' | ||
'_ALTERNATIVE_MODES = [_ACCEPT, _WARN, _REPLACE]\n' | ||
'_DEFAULT = "warn"\n' | ||
'_MODE = _DEFAULT\n\n' | ||
Comment on lines
+112
to
+117
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can see what you are trying to achieve here, but we would like to achieve it using Iris' existing structure instead:
|
||
) | ||
|
||
|
||
def decode_standard_name_table(infile, outfile, description=False): | ||
"""Process the different parts of the xml file.""" | ||
tree = ET.parse(infile) | ||
|
||
outfile.write(STD_NAME_TABLE_FILE_TEMPLATE) | ||
write_useful_variables(outfile) | ||
decode_version(outfile, tree) | ||
|
||
data = {} | ||
for section in process_name_table(tree, 'entry', 'canonical_units'): | ||
values.update(section) | ||
data.update(section) | ||
prettydict(outfile, "STD_NAMES", data) | ||
|
||
data = {} | ||
for section in process_name_table(tree, 'alias', 'entry_id'): | ||
aliases.update(section) | ||
|
||
for key, valued in aliases.items(): | ||
values.update({ | ||
key : {'canonical_units' : values.get(valued['entry_id']).get('canonical_units')} | ||
}) | ||
for k, v in section.items(): | ||
data.update({k: v["entry_id"]}) | ||
prettydict(outfile, "ALIASES", data) | ||
|
||
outfile.write(STD_VALUES_FILE_TEMPLATE + pprint.pformat(values)) | ||
if description: | ||
data = {} | ||
for section in process_name_table(tree, 'entry', 'description'): | ||
for k, v in section.items(): | ||
data.update({k: replace_quote(v["description"])}) | ||
prettydict(outfile, "DESCRIPTIONS", data) | ||
|
||
|
||
if __name__ == "__main__": | ||
|
@@ -97,10 +152,13 @@ def to_dict(infile, outfile): | |
help='Path to CF standard name XML') | ||
parser.add_argument('output', metavar='OUTPUT', | ||
help='Path to resulting Python code') | ||
parser.add_argument('-d', '--descr', action="store_true", | ||
help="Include standard name descriptions") | ||
args = parser.parse_args() | ||
|
||
encoding = {'encoding': 'utf-8'} | ||
|
||
with open(args.input, 'r', **encoding) as in_fh: | ||
with open(args.output, 'w', **encoding) as out_fh: | ||
to_dict(in_fh, out_fh) | ||
decode_standard_name_table(in_fh, out_fh, args.descr) | ||
pass |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should consider whether the
rename()
method needs a switch to accept aliases, or some other equivalent granular user control.@pp-mo I can't recall why using a
FUTURE
flag would not be enough user control on its own?