|
1 | 1 | """Implementation of units.""" |
2 | 2 | import re |
3 | 3 |
|
4 | | -from pint import UnitRegistry, Unit, register_unit_format |
| 4 | +from pint import UnitRegistry, Unit, register_unit_format, Quantity |
5 | 5 | from pint.compat import tokenizer |
6 | 6 | from tokenize import NAME, NUMBER, OP |
7 | 7 | # alias the error that is thrown when units are incompatible |
8 | 8 | # this helps to isolate the dependence on pint |
9 | 9 | from pint.errors import DimensionalityError as IncompatibleUnitsError # noqa Import |
10 | | -from pint.errors import UndefinedUnitError |
| 10 | +from pint.errors import UndefinedUnitError, DefinitionSyntaxError # noqa Import |
11 | 11 |
|
12 | 12 | import functools |
13 | 13 | import pkg_resources |
14 | 14 | from typing import Union |
15 | 15 |
|
16 | 16 | # use the default unit registry for now |
17 | 17 | DEFAULT_FILE = pkg_resources.resource_filename("gemd.units", "citrine_en.txt") |
| 18 | +_ALLOWED_OPERATORS = {"+", "-", "*", "/", "//", "^", "**", "(", ")"} |
18 | 19 |
|
19 | 20 |
|
20 | 21 | def _scaling_preprocessor(input_string: str) -> str: |
21 | 22 | """Preprocessor that turns scaling factors into non-dimensional units.""" |
22 | 23 | global _REGISTRY |
23 | | - tokens = tokenizer(input_string) |
24 | | - exponent = False |
25 | | - division = False |
26 | | - tight_division = False |
| 24 | + tokens = list(tokenizer(input_string)) |
27 | 25 | scales = [] |
28 | 26 |
|
29 | | - if next(token for token in tokens).type == NUMBER: |
30 | | - return input_string # The unit can't have a leading number; scaling factors are internal |
31 | | - |
32 | | - for token in tokens: |
33 | | - # Note that while this prevents adding a bunch of numbers to the registry, |
34 | | - # no test would break if the `exponent` logic were removed |
35 | | - if tight_division: |
36 | | - # A unit for a scaling factor is in the denominator if the factor is |
37 | | - scales[-1][-1] = token.type == NAME |
38 | | - tight_division = False |
39 | | - if not exponent and token.type == NUMBER: |
40 | | - scales.append([token.string, False]) |
41 | | - tight_division = division |
42 | | - if token.type == OP: |
43 | | - if token.string not in {"+", "-", "*", "/", "//", "^", "**", "(", ")"}: |
44 | | - raise UndefinedUnitError(f"Unrecognized operator: {token.string}") |
45 | | - exponent = token.string in {"^", "**"} |
46 | | - division = token.string in {"/", "//"} |
47 | | - else: |
48 | | - exponent, division = False, False |
49 | | - |
50 | | - for scale, division in scales: |
| 27 | + unrecognized = [t for t in tokens if t.type == OP and t.string not in _ALLOWED_OPERATORS] |
| 28 | + if len(unrecognized) > 0: |
| 29 | + raise UndefinedUnitError(f"Unrecognized operator(s): {unrecognized}") |
| 30 | + |
| 31 | + # Ignore leading numbers & operators, since Pint handles those itself |
| 32 | + start = next((i for i, token in enumerate(tokens) if token.type == NAME), len(tokens)) |
| 33 | + |
| 34 | + for i, token in enumerate(tokens[start:], start=start): |
| 35 | + if token.type != NUMBER: |
| 36 | + continue |
| 37 | + |
| 38 | + # Note we can't run off the front because we started at a NAME |
| 39 | + first = i |
| 40 | + while tokens[first - 1].string in {'+', '-'}: |
| 41 | + first -= 1 # Include unary operations |
| 42 | + |
| 43 | + if tokens[first - 1].string in {"^", "**"}: |
| 44 | + continue # Don't mangle exponents |
| 45 | + |
| 46 | + # Names couple tightly to their preceding numbers, so is it a denominator? |
| 47 | + division = tokens[first - 1].string in {"/", "//"} |
| 48 | + tight = i < len(tokens) - 2 and tokens[i + 1].type == NAME |
| 49 | + |
| 50 | + # Get the number |
| 51 | + substr = input_string[tokens[first].start[1]:token.end[1]] |
| 52 | + value = eval(substr) |
| 53 | + if value <= 0: |
| 54 | + raise DefinitionSyntaxError(f"Scaling factors must be positive: {substr}") |
| 55 | + scales.append([substr, value, division and tight]) |
| 56 | + |
| 57 | + for substr, value, division in scales: |
51 | 58 | # There's probably something to be said for stashing these, but this sin |
52 | 59 | # should be ameliorated by the LRU cache |
53 | | - regex = rf"\b{re.escape(scale)}(?!=[0-9.])" |
54 | | - valid = "_" + scale.replace(".", "_").replace("+", "").replace("-", "_") |
| 60 | + regex = rf"(?<!=[-+0-9.]){re.escape(substr)}(?!=[0-9.])" |
| 61 | + valid = "_" + substr.replace(".", "_").replace("+", "").replace("-", "_") |
55 | 62 | trailing = "/" if division else "" |
56 | | - _REGISTRY.define(f"{valid} = {scale} = {scale}") |
| 63 | + _REGISTRY.define(f"{valid} = {value} = {substr}") |
57 | 64 | input_string = re.sub(regex, valid + trailing, input_string) |
58 | 65 |
|
59 | 66 | return input_string |
@@ -112,8 +119,8 @@ def parse_units(units: Union[str, Unit, None]) -> Union[str, Unit, None]: |
112 | 119 | return 'dimensionless' |
113 | 120 | elif isinstance(units, str): |
114 | 121 | parsed = _REGISTRY(units) |
115 | | - if isinstance(parsed, int) or parsed.magnitude != 1: |
116 | | - raise ValueError("Unit expression cannot have a scaling factor.") |
| 122 | + if not isinstance(parsed, Quantity) or parsed.magnitude != 1: |
| 123 | + raise ValueError(f"Units cannot start with (or just be) numbers: {units}") |
117 | 124 | return f"{parsed.u:clean}" |
118 | 125 | elif isinstance(units, Unit): |
119 | 126 | return units |
|
0 commit comments