|
1 | 1 | """Implementation of units."""
|
2 | 2 | import re
|
3 | 3 |
|
4 |
| -from pint import UnitRegistry, Unit, register_unit_format |
| 4 | +from pint import UnitRegistry, Unit, register_unit_format, Quantity |
5 | 5 | from pint.compat import tokenizer
|
6 | 6 | from tokenize import NAME, NUMBER, OP
|
7 | 7 | # alias the error that is thrown when units are incompatible
|
8 | 8 | # this helps to isolate the dependence on pint
|
9 | 9 | from pint.errors import DimensionalityError as IncompatibleUnitsError # noqa Import
|
10 |
| -from pint.errors import UndefinedUnitError |
| 10 | +from pint.errors import UndefinedUnitError, DefinitionSyntaxError # noqa Import |
11 | 11 |
|
12 | 12 | import functools
|
13 | 13 | import pkg_resources
|
14 | 14 | from typing import Union
|
15 | 15 |
|
16 | 16 | # use the default unit registry for now
|
17 | 17 | DEFAULT_FILE = pkg_resources.resource_filename("gemd.units", "citrine_en.txt")
|
| 18 | +_ALLOWED_OPERATORS = {"+", "-", "*", "/", "//", "^", "**", "(", ")"} |
18 | 19 |
|
19 | 20 |
|
20 | 21 | def _scaling_preprocessor(input_string: str) -> str:
|
21 | 22 | """Preprocessor that turns scaling factors into non-dimensional units."""
|
22 | 23 | global _REGISTRY
|
23 |
| - tokens = tokenizer(input_string) |
24 |
| - exponent = False |
25 |
| - division = False |
26 |
| - tight_division = False |
| 24 | + tokens = list(tokenizer(input_string)) |
27 | 25 | scales = []
|
28 | 26 |
|
29 |
| - for token in tokens: |
30 |
| - # Note that while this prevents adding a bunch of numbers to the registry, |
31 |
| - # no test would break if the `exponent` logic were removed |
32 |
| - if tight_division: |
33 |
| - # A unit for a scaling factor is in the denominator if the factor is |
34 |
| - scales[-1][-1] = token.type == NAME |
35 |
| - tight_division = False |
36 |
| - if not exponent and token.type == NUMBER: |
37 |
| - scales.append([token.string, False]) |
38 |
| - tight_division = division |
39 |
| - exponent = token.type == OP and token.string in {"^", "**"} |
40 |
| - division = token.type == OP and token.string in {"/", "//"} |
41 |
| - |
42 |
| - for scale, division in scales: |
| 27 | + unrecognized = [t for t in tokens if t.type == OP and t.string not in _ALLOWED_OPERATORS] |
| 28 | + if len(unrecognized) > 0: |
| 29 | + raise UndefinedUnitError(f"Unrecognized operator(s): {unrecognized}") |
| 30 | + |
| 31 | + # Ignore leading numbers & operators, since Pint handles those itself |
| 32 | + start = next((i for i, token in enumerate(tokens) if token.type == NAME), len(tokens)) |
| 33 | + |
| 34 | + for i, token in enumerate(tokens[start:], start=start): |
| 35 | + if token.type != NUMBER: |
| 36 | + continue |
| 37 | + |
| 38 | + # Note we can't run off the front because we started at a NAME |
| 39 | + first = i |
| 40 | + while tokens[first - 1].string in {'+', '-'}: |
| 41 | + first -= 1 # Include unary operations |
| 42 | + |
| 43 | + if tokens[first - 1].string in {"^", "**"}: |
| 44 | + continue # Don't mangle exponents |
| 45 | + |
| 46 | + # Names couple tightly to their preceding numbers, so is it a denominator? |
| 47 | + division = tokens[first - 1].string in {"/", "//"} |
| 48 | + tight = i < len(tokens) - 2 and tokens[i + 1].type == NAME |
| 49 | + |
| 50 | + # Get the number |
| 51 | + substr = input_string[tokens[first].start[1]:token.end[1]] |
| 52 | + value = eval(substr) |
| 53 | + if value <= 0: |
| 54 | + raise DefinitionSyntaxError(f"Scaling factors must be positive: {substr}") |
| 55 | + scales.append([substr, value, division and tight]) |
| 56 | + |
| 57 | + for substr, value, division in scales: |
43 | 58 | # There's probably something to be said for stashing these, but this sin
|
44 | 59 | # should be ameliorated by the LRU cache
|
45 |
| - regex = rf"\b{re.escape(scale)}(?!=[0-9.])" |
46 |
| - valid = "_" + scale.replace(".", "_").replace("+", "").replace("-", "_") |
| 60 | + regex = rf"(?<!=[-+0-9.]){re.escape(substr)}(?!=[0-9.])" |
| 61 | + valid = "_" + substr.replace(".", "_").replace("+", "").replace("-", "_") |
47 | 62 | trailing = "/" if division else ""
|
48 |
| - _REGISTRY.define(f"{valid} = {scale} = {scale}") |
| 63 | + _REGISTRY.define(f"{valid} = {value} = {substr}") |
49 | 64 | input_string = re.sub(regex, valid + trailing, input_string)
|
50 | 65 |
|
51 | 66 | return input_string
|
@@ -103,7 +118,11 @@ def parse_units(units: Union[str, Unit, None]) -> Union[str, Unit, None]:
|
103 | 118 | elif units == '':
|
104 | 119 | return 'dimensionless'
|
105 | 120 | elif isinstance(units, str):
|
106 |
| - return f"{_REGISTRY(units).u:clean}" |
| 121 | + # TODO: parse_units has a bug resolved in 0.19, but 3.7 only supports up to 0.18 |
| 122 | + parsed = _REGISTRY(units) |
| 123 | + if not isinstance(parsed, Quantity) or parsed.magnitude != 1: |
| 124 | + raise ValueError(f"Unit expression cannot have a leading scaling factor. {units}") |
| 125 | + return f"{parsed.u:clean}" |
107 | 126 | elif isinstance(units, Unit):
|
108 | 127 | return units
|
109 | 128 | else:
|
|
0 commit comments