SciTools · larsbarring · May 5, 2023 · May 10, 2023 · May 10, 2023 · May 9, 2023
diff --git a/etc/cf-standard-name-table.xml b/etc/cf-standard-name-table.xml
@@ -24594,8 +24594,7 @@
       <canonical_units>kg m-2 s-1</canonical_units>
       <grib></grib>
       <amip></amip>
-      <description>Methane emitted from the surface, generated by biomass burning (fires). Positive direction upwards.
-The surface called &quot;surface&quot; means the lower boundary of the atmosphere. &quot;Upward&quot; indicates a vector component which is positive when directed upward (negative downward). In accordance with common usage in geophysical disciplines, &quot;flux&quot; implies per unit area, called &quot;flux density&quot; in physics. The chemical formula for methane is CH4. The mass is the total mass of the molecules. The specification of a physical process by the phrase &quot;due_to_&quot; process means that the quantity named is a single term in a sum of terms which together compose the general quantity named by omitting the phrase. &quot;Emission&quot; means emission from a primary source located anywhere within the atmosphere, including at the lower boundary (i.e. the surface of the earth). &quot;Emission&quot; is a process entirely distinct from &quot;re-emission&quot; which is used in some standard names. The term &quot;fires&quot; means all biomass fires, whether naturally occurring or ignited by humans. The precise conditions under which fires produce and consume methane can vary between models.</description>
+      <description>Methane emitted from the surface, generated by biomass burning (fires). Positive direction upwards. The surface called &quot;surface&quot; means the lower boundary of the atmosphere. &quot;Upward&quot; indicates a vector component which is positive when directed upward (negative downward). In accordance with common usage in geophysical disciplines, &quot;flux&quot; implies per unit area, called &quot;flux density&quot; in physics. The chemical formula for methane is CH4. The mass is the total mass of the molecules. The specification of a physical process by the phrase &quot;due_to_&quot; process means that the quantity named is a single term in a sum of terms which together compose the general quantity named by omitting the phrase. &quot;Emission&quot; means emission from a primary source located anywhere within the atmosphere, including at the lower boundary (i.e. the surface of the earth). &quot;Emission&quot; is a process entirely distinct from &quot;re-emission&quot; which is used in some standard names. The term &quot;fires&quot; means all biomass fires, whether naturally occurring or ignited by humans. The precise conditions under which fires produce and consume methane can vary between models.</description>
    </entry>
 
    <entry id="surface_upward_mass_flux_of_methane_due_to_emission_from_herbivorous_mammals">

diff --git a/lib/iris/__init__.py b/lib/iris/__init__.py
@@ -100,6 +100,7 @@ def callback(cube, field, filename):
 import iris._constraints
 import iris.config
 import iris.io
+import iris.std_name_table
 
 from ._deprecation import IrisDeprecation, warn_deprecated
 

diff --git a/lib/iris/common/mixin.py b/lib/iris/common/mixin.py
@@ -13,7 +13,7 @@
 
 import cf_units
 
-import iris.std_names
+from iris.std_name_table import check_valid_std_name
 
 from .metadata import BaseMetadata
 
@@ -36,19 +36,22 @@ def _get_valid_standard_name(name):
         name_groups = name.split(maxsplit=1)
         if name_groups:
             std_name = name_groups[0]
-            name_is_valid = std_name in iris.std_names.STD_NAMES
+            try:
+                new_std_name = check_valid_std_name(name_groups[0])
+                name = name.replace(std_name, new_std_name)
+            except ValueError:
+                raise ValueError(
+                    "{!r} is not a valid standard_name".format(name)
+                )
             try:
                 std_name_modifier = name_groups[1]
             except IndexError:
                 pass  # No modifier
             else:
-                name_is_valid &= std_name_modifier in valid_std_name_modifiers
-
-            if not name_is_valid:
-                raise ValueError(
-                    "{!r} is not a valid standard_name".format(name)
-                )
-
+                if std_name_modifier not in valid_std_name_modifiers:
+                    raise ValueError(
+                        "{!r} is not a valid standard_name".format(name)
+                    )
     return name
 
 

diff --git a/lib/iris/std_name_table.py b/lib/iris/std_name_table.py
@@ -0,0 +1,118 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the LGPL license.
+# See COPYING and COPYING.LESSER in the root of the repository for full
+# licensing details.
+"""
+Handling of standard names and standard name aliases.
+"""
+
+import warnings
+
+import iris.std_names
+
+
+def get_convention():
+    """Return the 'Conventions' string of the CF standard name table."""
+    try:
+        convention = iris.std_names.CONVENTIONS_STRING
+    except AttributeError:
+        convention = None
+    return convention
+
+
+def set_alias_processing(mode):
+    """
+    Set how standard name aliases are handled.
+
+    Arg:
+
+    * mode `string` specifying handling:
+            'accept' - aliases are handled as any other standard name,
+            'warn' - as above, but a warning is issued,
+            'replace' - aliased standard names are replaced with the current one.
+    """
+    if not hasattr(iris.std_names, "ALIASES"):
+        raise ValueError("The standard name table has no aliases defined.")
+    if mode == "default":
+        iris.std_names._MODE = iris.std_names._DEFAULT
+    elif mode in iris.std_names._ALTERNATIVE_MODES:
+        iris.std_names._MODE = mode
+    else:
+        raise ValueError(
+            "{!r} is not a valid alternative for processing "
+            "of standard name aliases.".format(mode)
+        )
+
+
+def get_description(name):
+    """
+    Return the standard name description as a `string`.
+
+    Arg:
+
+    * name `string` containing the standard name.
+    """
+    if not hasattr(iris.std_names, "DESCRIPTIONS"):
+        return None
+
+    error = False
+    if name in iris.std_names.STD_NAMES:
+        descr = iris.std_names.DESCRIPTIONS[name]
+    elif hasattr(iris.std_names, "ALIASES"):
+        if name in iris.std_names.ALIASES:
+            descr = iris.std_names.DESCRIPTIONS[iris.std_names.ALIASES[name]]
+            if iris.std_names._MODE == iris.std_names._REPLACE:
+                msg = (
+                    "\nStandard name {!r} is aliased and is \nreplaced by {!r}.\n"
+                    "The description for the latter will be used."
+                )
+                warnings.warn(msg.format(name, iris.std_names.ALIASES[name]))
+        else:
+            error = True
+    else:
+        error = True
+
+    if error:
+        raise ValueError("{!r} is not a valid standard name.".format(name))
+    return descr
+
+
+def check_valid_std_name(name):
+    """
+    Check and return if argument is a valid standard name or alias.
+
+    Arg:
+
+    * name `string` containing the prospective standard name.
+
+    Depending on the setting of the alias proceessing the following will
+    happen if 'name' is an aliased standard name:
+    "accept" - the aliased standard name is accepted as valid and returned,
+    "warn" - a warning is issued, otherwise the same as "accept",
+    "replace" - the valid standard name is returned without warning.
+
+    When 'name' is neither a standard name nor an alias an error results.
+    """
+    error = False
+    if name in iris.std_names.STD_NAMES:
+        std_name = name
+    elif hasattr(iris.std_names, "ALIASES"):
+        if name in iris.std_names.ALIASES:
+            if iris.std_names._MODE == iris.std_names._REPLACE:
+                std_name = iris.std_names.ALIASES[name]
+            else:
+                std_name = name
+                if iris.std_names._MODE == iris.std_names._WARN:
+                    msg = "\nThe standard name {!r} is aliased should be \nreplaced by {!r}."
+                    warnings.warn(
+                        msg.format(name, iris.std_names.ALIASES[name])
+                    )
+        else:
+            error = True
+    else:
+        error = True
+
+    if error:
+        raise ValueError("{!r} is not a valid standard_name.".format(name))
+    return std_name
diff --git a/tools/generate_std_names.py b/tools/generate_std_names.py
@@ -7,8 +7,9 @@
 A script to convert the standard names information from the provided XML
 file into a Python dictionary format.
 
-Takes two arguments: the first is the XML file to process and the second
-is the name of the file to write the Python dictionary file into.
+Takes two or three arguments: the first is the XML file to process and the second
+is the name of the file to write the Python dictionary file into. The optional
+third argument, '--descr', includes the standard name descriptions in the file.
 
 By default, Iris will use the source XML file:
     etc/cf-standard-name-table.xml
@@ -20,23 +21,30 @@
 """
 
 import argparse
-import pprint
 import xml.etree.ElementTree as ET
 
 
-STD_VALUES_FILE_TEMPLATE = '''
+STD_NAME_TABLE_FILE_TEMPLATE = '''
 # Copyright Iris contributors
 #
 # This file is part of Iris and is released under the LGPL license.
 # See COPYING and COPYING.LESSER in the root of the repository for full
 # licensing details.
 """
-This file contains a dictionary of standard value names that are mapped
-to another dictionary of other standard name attributes. Currently only
-the `canonical_unit` exists in these attribute dictionaries.
-
 This file is automatically generated. Do not edit this file by hand.
 
+The file contains the following elements, formatted as python code:
+ * A few variablles used internally in the standard name processing.
+   These beginn with an underscore.
+ * Information on the source standard name table version.
+ * A dictionary of standard value names that are mapped
+   to another dictionary of other standard name attributes.
+   Currently only the `canonical_unit` exists in these attribute
+   dictionaries.
+ * A dictionary of aliased standard names that are mapped to the
+   current standad name.
+ * Optionally, a dictionary of standard names mapped to their descriptions.
+
 The file will be generated during a standard build/installation::
 
     python setup.py build
@@ -49,10 +57,17 @@
 Or for more control (e.g. to use an alternative XML file) via::
 
     python tools/generate_std_names.py XML_FILE MODULE_FILE
-
 """
+'''.lstrip()
+
+
+def found_or_none(elem):
+    return elem.text if elem is not None else None
+
 
-STD_NAMES = '''.lstrip()
+# Take care of inconsistent quotes in standard name descriptions.
+def replace_quote(txt):
+    return txt.replace('"', "'") if txt is not None else None
 
 
 def process_name_table(tree, element_name, *child_elements):
@@ -62,32 +77,72 @@ def process_name_table(tree, element_name, *child_elements):
     """
     for elem in tree.iterfind(element_name):
         sub_section = {}
-
         for child_elem in child_elements:
-            found_elem = elem.find(child_elem)
-            sub_section[child_elem] = found_elem.text if found_elem is not None else None
-
+            sub_section[child_elem] = found_or_none(elem.find(child_elem))
         yield {elem.get("id") : sub_section}
 
 
-def to_dict(infile, outfile):
-    values = {}
-    aliases = {}
-
+def prettydict(outfile, varname, data):
+    """Pretty formatted output of the data (dict) assigned to the variable 'varname'."""
+    outfile.write(f'{varname} = {{\n')
+    for k, v in dict(sorted(data.items())).items():
+        outfile.write(f'    "{k}": "{v}",\n')
+    outfile.write("}\n\n")
+
+
+def decode_version(outfile, tree):
+    """Decode the version information in the xml header information."""
+    version = {}
+    for elem in ["table_name", "version_number", "last_modified", "institution", "contact"]:
+        version[elem] = found_or_none(tree.find(elem))
+    if version["table_name"] is None:
+        if (version["institution"] == "Centre for Environmental Data Analysis"
+                and version["contact"] == "[email protected]"):
+            version["table_name"] = "CF-StdNameTable"
+        else:
+            version["table_name"] = "USER-StdNameTable"
+    prettydict(outfile, "VERSION", version)
+    version_string = "-".join(version[k] for k in ["table_name", "version_number"])
+    outfile.write(f'CONVENTIONS_STRING = "{version_string}"\n\n')
+
+
+def write_useful_variables(outfile):
+    outfile.write(
+        '\n# The following variables are used for processing the standard names information below\n'
+        '_ACCEPT = "accept"\n'
+        '_WARN = "warn"\n'
+        '_REPLACE ="replace"\n'
+        '_ALTERNATIVE_MODES = [_ACCEPT, _WARN, _REPLACE]\n'
+        '_DEFAULT = "warn"\n'
+        '_MODE = _DEFAULT\n\n'
+    )
+
+
+def decode_standard_name_table(infile, outfile, description=False):
+    """Process the different parts of the xml file."""
     tree = ET.parse(infile)
 
+    outfile.write(STD_NAME_TABLE_FILE_TEMPLATE)
+    write_useful_variables(outfile)
+    decode_version(outfile, tree)
+
+    data = {}
     for section in process_name_table(tree, 'entry', 'canonical_units'):
-        values.update(section)
+        data.update(section)
+    prettydict(outfile, "STD_NAMES", data)
 
+    data = {}
     for section in process_name_table(tree, 'alias', 'entry_id'):
-        aliases.update(section)
-
-    for key, valued in aliases.items():
-        values.update({
-                key : {'canonical_units' : values.get(valued['entry_id']).get('canonical_units')}
-            })
+        for k, v in section.items():
+            data.update({k: v["entry_id"]})
+    prettydict(outfile, "ALIASES", data)
 
-    outfile.write(STD_VALUES_FILE_TEMPLATE + pprint.pformat(values))
+    if description:
+        data = {}
+        for section in process_name_table(tree, 'entry', 'description'):
+            for k, v in section.items():
+                data.update({k: replace_quote(v["description"])})
+        prettydict(outfile, "DESCRIPTIONS", data)
 
 
 if __name__ == "__main__":
@@ -97,10 +152,13 @@ def to_dict(infile, outfile):
                         help='Path to CF standard name XML')
     parser.add_argument('output', metavar='OUTPUT',
                         help='Path to resulting Python code')
+    parser.add_argument('-d', '--descr', action="store_true",
+                        help="Include standard name descriptions")
     args = parser.parse_args()
 
     encoding = {'encoding': 'utf-8'}
 
     with open(args.input, 'r', **encoding) as in_fh:
         with open(args.output, 'w', **encoding) as out_fh:
-            to_dict(in_fh, out_fh)
+            decode_standard_name_table(in_fh, out_fh, args.descr)
+            pass