|
1 | 1 | """Implementation of units."""
|
2 | 2 | import re
|
3 | 3 |
|
4 |
| -from pint import UnitRegistry, Unit, register_unit_format |
| 4 | +from pint import UnitRegistry, Unit, register_unit_format, Quantity |
5 | 5 | from pint.compat import tokenizer
|
6 | 6 | from tokenize import NAME, NUMBER, OP
|
7 | 7 | # alias the error that is thrown when units are incompatible
|
8 | 8 | # this helps to isolate the dependence on pint
|
9 | 9 | from pint.errors import DimensionalityError as IncompatibleUnitsError # noqa Import
|
10 |
| -from pint.errors import UndefinedUnitError |
| 10 | +from pint.errors import UndefinedUnitError, DefinitionSyntaxError # noqa Import |
11 | 11 |
|
12 | 12 | import functools
|
13 | 13 | import pkg_resources
|
14 | 14 | from typing import Union
|
15 | 15 |
|
16 | 16 | # use the default unit registry for now
|
17 | 17 | DEFAULT_FILE = pkg_resources.resource_filename("gemd.units", "citrine_en.txt")
|
| 18 | +_ALLOWED_OPERATORS = {"+", "-", "*", "/", "//", "^", "**", "(", ")"} |
18 | 19 |
|
19 | 20 |
|
20 | 21 | def _scaling_preprocessor(input_string: str) -> str:
|
21 | 22 | """Preprocessor that turns scaling factors into non-dimensional units."""
|
22 | 23 | global _REGISTRY
|
23 |
| - tokens = tokenizer(input_string) |
24 |
| - exponent = False |
25 |
| - division = False |
26 |
| - tight_division = False |
| 24 | + tokens = list(tokenizer(input_string)) |
27 | 25 | scales = []
|
28 | 26 |
|
29 |
| - if next(token for token in tokens).type == NUMBER: |
30 |
| - return input_string # The unit can't have a leading number; scaling factors are internal |
31 |
| - |
32 |
| - for token in tokens: |
33 |
| - # Note that while this prevents adding a bunch of numbers to the registry, |
34 |
| - # no test would break if the `exponent` logic were removed |
35 |
| - if tight_division: |
36 |
| - # A unit for a scaling factor is in the denominator if the factor is |
37 |
| - scales[-1][-1] = token.type == NAME |
38 |
| - tight_division = False |
39 |
| - if not exponent and token.type == NUMBER: |
40 |
| - scales.append([token.string, False]) |
41 |
| - tight_division = division |
42 |
| - if token.type == OP: |
43 |
| - if token.string not in {"+", "-", "*", "/", "//", "^", "**", "(", ")"}: |
44 |
| - raise UndefinedUnitError(f"Unrecognized operator: {token.string}") |
45 |
| - exponent = token.string in {"^", "**"} |
46 |
| - division = token.string in {"/", "//"} |
47 |
| - else: |
48 |
| - exponent, division = False, False |
49 |
| - |
50 |
| - for scale, division in scales: |
| 27 | + unrecognized = [t for t in tokens if t.type == OP and t.string not in _ALLOWED_OPERATORS] |
| 28 | + if len(unrecognized) > 0: |
| 29 | + raise UndefinedUnitError(f"Unrecognized operator(s): {unrecognized}") |
| 30 | + |
| 31 | + # Ignore leading numbers & operators, since Pint handles those itself |
| 32 | + start = next((i for i, token in enumerate(tokens) if token.type == NAME), len(tokens)) |
| 33 | + |
| 34 | + for i, token in enumerate(tokens[start:], start=start): |
| 35 | + if token.type != NUMBER: |
| 36 | + continue |
| 37 | + |
| 38 | + # Note we can't run off the front because we started at a NAME |
| 39 | + first = i |
| 40 | + while tokens[first - 1].string in {'+', '-'}: |
| 41 | + first -= 1 # Include unary operations |
| 42 | + |
| 43 | + if tokens[first - 1].string in {"^", "**"}: |
| 44 | + continue # Don't mangle exponents |
| 45 | + |
| 46 | + # Names couple tightly to their preceding numbers, so is it a denominator? |
| 47 | + division = tokens[first - 1].string in {"/", "//"} |
| 48 | + tight = i < len(tokens) - 2 and tokens[i + 1].type == NAME |
| 49 | + |
| 50 | + # Get the number |
| 51 | + substr = input_string[tokens[first].start[1]:token.end[1]] |
| 52 | + value = eval(substr) |
| 53 | + if value <= 0: |
| 54 | + raise DefinitionSyntaxError(f"Scaling factors must be positive: {substr}") |
| 55 | + scales.append([substr, value, division and tight]) |
| 56 | + |
| 57 | + for substr, value, division in scales: |
51 | 58 | # There's probably something to be said for stashing these, but this sin
|
52 | 59 | # should be ameliorated by the LRU cache
|
53 |
| - regex = rf"\b{re.escape(scale)}(?!=[0-9.])" |
54 |
| - valid = "_" + scale.replace(".", "_").replace("+", "").replace("-", "_") |
| 60 | + regex = rf"(?<!=[-+0-9.]){re.escape(substr)}(?!=[0-9.])" |
| 61 | + valid = "_" + substr.replace(".", "_").replace("+", "").replace("-", "_") |
55 | 62 | trailing = "/" if division else ""
|
56 |
| - _REGISTRY.define(f"{valid} = {scale} = {scale}") |
| 63 | + _REGISTRY.define(f"{valid} = {value} = {substr}") |
57 | 64 | input_string = re.sub(regex, valid + trailing, input_string)
|
58 | 65 |
|
59 | 66 | return input_string
|
@@ -112,8 +119,8 @@ def parse_units(units: Union[str, Unit, None]) -> Union[str, Unit, None]:
|
112 | 119 | return 'dimensionless'
|
113 | 120 | elif isinstance(units, str):
|
114 | 121 | parsed = _REGISTRY(units)
|
115 |
| - if isinstance(parsed, int) or parsed.magnitude != 1: |
116 |
| - raise ValueError("Unit expression cannot have a scaling factor.") |
| 122 | + if not isinstance(parsed, Quantity) or parsed.magnitude != 1: |
| 123 | + raise ValueError(f"Units cannot start with (or just be) numbers: {units}") |
117 | 124 | return f"{parsed.u:clean}"
|
118 | 125 | elif isinstance(units, Unit):
|
119 | 126 | return units
|
|
0 commit comments