Skip to content

Commit 54df335

Browse files
authored
Merge pull request #183 from CitrineInformatics/maintain/update-unit-handling
Add additional tests for unexpected fields in units
2 parents ff25e73 + f2bb344 commit 54df335

File tree

5 files changed

+75
-30
lines changed

5 files changed

+75
-30
lines changed

.travis.yml

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ python:
55
- '3.8'
66
- '3.9'
77
- '3.10'
8+
- '3.11'
89
env:
910
- PINT_VERSION=0.18
1011
- PINT_VERSION=0.20

gemd/units/__init__.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# flake8: noqa
22
from .impl import parse_units, convert_units, change_definitions_file, \
3-
UndefinedUnitError, IncompatibleUnitsError
3+
UndefinedUnitError, IncompatibleUnitsError, DefinitionSyntaxError
44

55
__all__ = [parse_units, convert_units, change_definitions_file,
6-
UndefinedUnitError, IncompatibleUnitsError]
6+
UndefinedUnitError, IncompatibleUnitsError, DefinitionSyntaxError]

gemd/units/impl.py

+43-24
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,66 @@
11
"""Implementation of units."""
22
import re
33

4-
from pint import UnitRegistry, Unit, register_unit_format
4+
from pint import UnitRegistry, Unit, register_unit_format, Quantity
55
from pint.compat import tokenizer
66
from tokenize import NAME, NUMBER, OP
77
# alias the error that is thrown when units are incompatible
88
# this helps to isolate the dependence on pint
99
from pint.errors import DimensionalityError as IncompatibleUnitsError # noqa Import
10-
from pint.errors import UndefinedUnitError
10+
from pint.errors import UndefinedUnitError, DefinitionSyntaxError # noqa Import
1111

1212
import functools
1313
import pkg_resources
1414
from typing import Union
1515

1616
# use the default unit registry for now
1717
DEFAULT_FILE = pkg_resources.resource_filename("gemd.units", "citrine_en.txt")
18+
_ALLOWED_OPERATORS = {"+", "-", "*", "/", "//", "^", "**", "(", ")"}
1819

1920

2021
def _scaling_preprocessor(input_string: str) -> str:
2122
"""Preprocessor that turns scaling factors into non-dimensional units."""
2223
global _REGISTRY
23-
tokens = tokenizer(input_string)
24-
exponent = False
25-
division = False
26-
tight_division = False
24+
tokens = list(tokenizer(input_string))
2725
scales = []
2826

29-
for token in tokens:
30-
# Note that while this prevents adding a bunch of numbers to the registry,
31-
# no test would break if the `exponent` logic were removed
32-
if tight_division:
33-
# A unit for a scaling factor is in the denominator if the factor is
34-
scales[-1][-1] = token.type == NAME
35-
tight_division = False
36-
if not exponent and token.type == NUMBER:
37-
scales.append([token.string, False])
38-
tight_division = division
39-
exponent = token.type == OP and token.string in {"^", "**"}
40-
division = token.type == OP and token.string in {"/", "//"}
41-
42-
for scale, division in scales:
27+
unrecognized = [t for t in tokens if t.type == OP and t.string not in _ALLOWED_OPERATORS]
28+
if len(unrecognized) > 0:
29+
raise UndefinedUnitError(f"Unrecognized operator(s): {unrecognized}")
30+
31+
# Ignore leading numbers & operators, since Pint handles those itself
32+
start = next((i for i, token in enumerate(tokens) if token.type == NAME), len(tokens))
33+
34+
for i, token in enumerate(tokens[start:], start=start):
35+
if token.type != NUMBER:
36+
continue
37+
38+
# Note we can't run off the front because we started at a NAME
39+
first = i
40+
while tokens[first - 1].string in {'+', '-'}:
41+
first -= 1 # Include unary operations
42+
43+
if tokens[first - 1].string in {"^", "**"}:
44+
continue # Don't mangle exponents
45+
46+
# Names couple tightly to their preceding numbers, so is it a denominator?
47+
division = tokens[first - 1].string in {"/", "//"}
48+
tight = i < len(tokens) - 2 and tokens[i + 1].type == NAME
49+
50+
# Get the number
51+
substr = input_string[tokens[first].start[1]:token.end[1]]
52+
value = eval(substr)
53+
if value <= 0:
54+
raise DefinitionSyntaxError(f"Scaling factors must be positive: {substr}")
55+
scales.append([substr, value, division and tight])
56+
57+
for substr, value, division in scales:
4358
# There's probably something to be said for stashing these, but this sin
4459
# should be ameliorated by the LRU cache
45-
regex = rf"\b{re.escape(scale)}(?!=[0-9.])"
46-
valid = "_" + scale.replace(".", "_").replace("+", "").replace("-", "_")
60+
regex = rf"(?<!=[-+0-9.]){re.escape(substr)}(?!=[0-9.])"
61+
valid = "_" + substr.replace(".", "_").replace("+", "").replace("-", "_")
4762
trailing = "/" if division else ""
48-
_REGISTRY.define(f"{valid} = {scale} = {scale}")
63+
_REGISTRY.define(f"{valid} = {value} = {substr}")
4964
input_string = re.sub(regex, valid + trailing, input_string)
5065

5166
return input_string
@@ -103,7 +118,11 @@ def parse_units(units: Union[str, Unit, None]) -> Union[str, Unit, None]:
103118
elif units == '':
104119
return 'dimensionless'
105120
elif isinstance(units, str):
106-
return f"{_REGISTRY(units).u:clean}"
121+
# TODO: parse_units has a bug resolved in 0.19, but 3.7 only supports up to 0.18
122+
parsed = _REGISTRY(units)
123+
if not isinstance(parsed, Quantity) or parsed.magnitude != 1:
124+
raise ValueError(f"Unit expression cannot have a leading scaling factor. {units}")
125+
return f"{parsed.u:clean}"
107126
elif isinstance(units, Unit):
108127
return units
109128
else:

gemd/units/tests/test_parser.py

+27-3
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,18 @@
44
import pkg_resources
55
from contextlib import contextmanager
66
from pint import UnitRegistry
7-
from gemd.units import parse_units, convert_units, change_definitions_file, UndefinedUnitError
7+
from gemd.units import parse_units, convert_units, change_definitions_file, \
8+
UndefinedUnitError, DefinitionSyntaxError
89

910

1011
def test_parse_expected():
1112
"""Test that we can parse the units that we expect to be able to."""
1213
# use the default unit registry for now
1314
reg = UnitRegistry(filename=pkg_resources.resource_filename("gemd.units", "citrine_en.txt"))
1415

16+
# Pint's parse_units actually gets this wrong
17+
assert parse_units("m^-1 * newton / meter") == parse_units("N / m^2")
18+
1519
expected = [
1620
"degC", "degF", "K",
1721
"g", "kg", "mg", "ton",
@@ -24,6 +28,7 @@ def test_parse_expected():
2428
"Seconds", # Added support for some title-case units
2529
"delta_Celsius / hour", # Added to make sure pint version is right (>0.10)
2630
"g / 2.5 cm", # Scaling factors are acceptable
31+
"g / -+-25e-1 m" # Weird but fine
2732
]
2833
for unit in expected:
2934
parse_units(unit)
@@ -42,12 +47,31 @@ def test_parse_unexpected():
4247
5,
4348
"cp", # Removed because of risk of collision with cP
4449
"chain", # Survey units eliminated
45-
"SECONDS" # Not just case insensitivity
50+
"SECONDS", # Not just case insensitivity
51+
"lb : in^3", # : is not a valid operator
4652
]
4753
for unit in unexpected:
4854
with pytest.raises(UndefinedUnitError):
4955
parse_units(unit)
5056

57+
scaling = [
58+
"3 rpm", # No leading digits
59+
"16", # No values that are just integers
60+
"16.2" # No values that are just floats
61+
]
62+
for unit in scaling:
63+
with pytest.raises(ValueError, match="scaling"):
64+
parse_units(unit)
65+
66+
definition = [
67+
"/gram", # A leading operator makes no sense
68+
"g / 0 m", # Zero scaling factor
69+
"g / -2 m" # Negative scaling factor
70+
]
71+
for unit in definition:
72+
with pytest.raises(DefinitionSyntaxError):
73+
parse_units(unit)
74+
5175

5276
def test_parse_none():
5377
"""Test that None parses as None."""
@@ -59,7 +83,7 @@ def test_format():
5983
# use the default unit registry for now
6084
reg = UnitRegistry(filename=pkg_resources.resource_filename("gemd.units", "citrine_en.txt"))
6185

62-
result = parse_units("K^-2 m^-1 C^0 g^1 s^2")
86+
result = parse_units("K^-2.0 m^-1e0 C^0 g^1 s^2")
6387
assert "-" not in result
6488
assert "[time]" in reg(result).dimensionality
6589
assert "[current]" not in reg(result).dimensionality

setup.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33

44
setup(name='gemd',
5-
version='1.13.0',
5+
version='1.13.1',
66
url='http://github.com/CitrineInformatics/gemd-python',
77
description="Python binding for Citrine's GEMD data model",
88
author='Citrine Informatics',
@@ -39,5 +39,6 @@
3939
'Programming Language :: Python :: 3.8',
4040
'Programming Language :: Python :: 3.9',
4141
'Programming Language :: Python :: 3.10',
42+
'Programming Language :: Python :: 3.11',
4243
],
4344
)

0 commit comments

Comments
 (0)