Skip to content

Commit 0a806a5

Browse files
committed
Implement feedback
1 parent c304b62 commit 0a806a5

File tree

3 files changed

+66
-40
lines changed

3 files changed

+66
-40
lines changed

gemd/units/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# flake8: noqa
22
from .impl import parse_units, convert_units, change_definitions_file, \
3-
UndefinedUnitError, IncompatibleUnitsError
3+
UndefinedUnitError, IncompatibleUnitsError, DefinitionSyntaxError
44

55
__all__ = [parse_units, convert_units, change_definitions_file,
6-
UndefinedUnitError, IncompatibleUnitsError]
6+
UndefinedUnitError, IncompatibleUnitsError, DefinitionSyntaxError]

gemd/units/impl.py

Lines changed: 40 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,59 +1,66 @@
11
"""Implementation of units."""
22
import re
33

4-
from pint import UnitRegistry, Unit, register_unit_format
4+
from pint import UnitRegistry, Unit, register_unit_format, Quantity
55
from pint.compat import tokenizer
66
from tokenize import NAME, NUMBER, OP
77
# alias the error that is thrown when units are incompatible
88
# this helps to isolate the dependence on pint
99
from pint.errors import DimensionalityError as IncompatibleUnitsError # noqa Import
10-
from pint.errors import UndefinedUnitError
10+
from pint.errors import UndefinedUnitError, DefinitionSyntaxError # noqa Import
1111

1212
import functools
1313
import pkg_resources
1414
from typing import Union
1515

1616
# use the default unit registry for now
1717
DEFAULT_FILE = pkg_resources.resource_filename("gemd.units", "citrine_en.txt")
18+
_ALLOWED_OPERATORS = {"+", "-", "*", "/", "//", "^", "**", "(", ")"}
1819

1920

2021
def _scaling_preprocessor(input_string: str) -> str:
2122
"""Preprocessor that turns scaling factors into non-dimensional units."""
2223
global _REGISTRY
23-
tokens = tokenizer(input_string)
24-
exponent = False
25-
division = False
26-
tight_division = False
24+
tokens = list(tokenizer(input_string))
2725
scales = []
2826

29-
if next(token for token in tokens).type == NUMBER:
30-
return input_string # The unit can't have a leading number; scaling factors are internal
31-
32-
for token in tokens:
33-
# Note that while this prevents adding a bunch of numbers to the registry,
34-
# no test would break if the `exponent` logic were removed
35-
if tight_division:
36-
# A unit for a scaling factor is in the denominator if the factor is
37-
scales[-1][-1] = token.type == NAME
38-
tight_division = False
39-
if not exponent and token.type == NUMBER:
40-
scales.append([token.string, False])
41-
tight_division = division
42-
if token.type == OP:
43-
if token.string not in {"+", "-", "*", "/", "//", "^", "**", "(", ")"}:
44-
raise UndefinedUnitError(f"Unrecognized operator: {token.string}")
45-
exponent = token.string in {"^", "**"}
46-
division = token.string in {"/", "//"}
47-
else:
48-
exponent, division = False, False
49-
50-
for scale, division in scales:
27+
unrecognized = [t for t in tokens if t.type == OP and t.string not in _ALLOWED_OPERATORS]
28+
if len(unrecognized) > 0:
29+
raise UndefinedUnitError(f"Unrecognized operator(s): {unrecognized}")
30+
31+
# Ignore leading numbers & operators, since Pint handles those itself
32+
start = next((i for i, token in enumerate(tokens) if token.type == NAME), len(tokens))
33+
34+
for i, token in enumerate(tokens[start:], start=start):
35+
if token.type != NUMBER:
36+
continue
37+
38+
# Note we can't run off the front because we started at a NAME
39+
first = i
40+
while tokens[first - 1].string in {'+', '-'}:
41+
first -= 1 # Include unary operations
42+
43+
if tokens[first - 1].string in {"^", "**"}:
44+
continue # Don't mangle exponents
45+
46+
# Names couple tightly to their preceding numbers, so is it a denominator?
47+
division = tokens[first - 1].string in {"/", "//"}
48+
tight = i < len(tokens) - 2 and tokens[i + 1].type == NAME
49+
50+
# Get the number
51+
substr = input_string[tokens[first].start[1]:token.end[1]]
52+
value = eval(substr)
53+
if value <= 0:
54+
raise DefinitionSyntaxError(f"Scaling factors must be positive: {substr}")
55+
scales.append([substr, value, division and tight])
56+
57+
for substr, value, division in scales:
5158
# There's probably something to be said for stashing these, but this sin
5259
# should be ameliorated by the LRU cache
53-
regex = rf"\b{re.escape(scale)}(?!=[0-9.])"
54-
valid = "_" + scale.replace(".", "_").replace("+", "").replace("-", "_")
60+
regex = rf"(?<!=[-+0-9.]){re.escape(substr)}(?!=[0-9.])"
61+
valid = "_" + substr.replace(".", "_").replace("+", "").replace("-", "_")
5562
trailing = "/" if division else ""
56-
_REGISTRY.define(f"{valid} = {scale} = {scale}")
63+
_REGISTRY.define(f"{valid} = {value} = {substr}")
5764
input_string = re.sub(regex, valid + trailing, input_string)
5865

5966
return input_string
@@ -112,8 +119,8 @@ def parse_units(units: Union[str, Unit, None]) -> Union[str, Unit, None]:
112119
return 'dimensionless'
113120
elif isinstance(units, str):
114121
parsed = _REGISTRY(units)
115-
if isinstance(parsed, int) or parsed.magnitude != 1:
116-
raise ValueError("Unit expression cannot have a scaling factor.")
122+
if not isinstance(parsed, Quantity) or parsed.magnitude != 1:
123+
raise ValueError(f"Units cannot start with (or just be) numbers: {units}")
117124
return f"{parsed.u:clean}"
118125
elif isinstance(units, Unit):
119126
return units

gemd/units/tests/test_parser.py

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,18 @@
44
import pkg_resources
55
from contextlib import contextmanager
66
from pint import UnitRegistry
7-
from gemd.units import parse_units, convert_units, change_definitions_file, UndefinedUnitError
7+
from gemd.units import parse_units, convert_units, change_definitions_file, \
8+
UndefinedUnitError, DefinitionSyntaxError
89

910

1011
def test_parse_expected():
1112
"""Test that we can parse the units that we expect to be able to."""
1213
# use the default unit registry for now
1314
reg = UnitRegistry(filename=pkg_resources.resource_filename("gemd.units", "citrine_en.txt"))
1415

16+
# Pint's parse_units actually gets this wrong
17+
assert parse_units("m^-1 * newton / meter") == parse_units("N / m^2")
18+
1519
expected = [
1620
"degC", "degF", "K",
1721
"g", "kg", "mg", "ton",
@@ -24,6 +28,7 @@ def test_parse_expected():
2428
"Seconds", # Added support for some title-case units
2529
"delta_Celsius / hour", # Added to make sure pint version is right (>0.10)
2630
"g / 2.5 cm", # Scaling factors are acceptable
31+
"g / -+-25e-1 m" # Weird but fine
2732
]
2833
for unit in expected:
2934
parse_units(unit)
@@ -43,14 +48,28 @@ def test_parse_unexpected():
4348
"cp", # Removed because of risk of collision with cP
4449
"chain", # Survey units eliminated
4550
"SECONDS", # Not just case insensitivity
46-
"lb : in^3", # Not just case insensitivity
51+
"lb : in^3", # : is not a valid operator
4752
]
4853
for unit in unexpected:
4954
with pytest.raises(UndefinedUnitError):
5055
parse_units(unit)
5156

52-
for unit in ("3 rpm", "16"):
53-
with pytest.raises(ValueError, match="scaling"):
57+
scaling = [
58+
"3 rpm", # No leading digits
59+
"16", # No values that are just integers
60+
"16.2" # No values that are just floats
61+
]
62+
for unit in scaling:
63+
with pytest.raises(ValueError):
64+
parse_units(unit)
65+
66+
definition = [
67+
"/gram", # A leading operator makes no sense
68+
"g / 0 m", # Zero scaling factor
69+
"g / -2 m" # Negative scaling factor
70+
]
71+
for unit in definition:
72+
with pytest.raises(DefinitionSyntaxError):
5473
parse_units(unit)
5574

5675

@@ -64,7 +83,7 @@ def test_format():
6483
# use the default unit registry for now
6584
reg = UnitRegistry(filename=pkg_resources.resource_filename("gemd.units", "citrine_en.txt"))
6685

67-
result = parse_units("K^-2 m^-1 C^0 g^1 s^2")
86+
result = parse_units("K^-2.0 m^-1e0 C^0 g^1 s^2")
6887
assert "-" not in result
6988
assert "[time]" in reg(result).dimensionality
7089
assert "[current]" not in reg(result).dimensionality

0 commit comments

Comments
 (0)