Skip to content

Commit abdbc7a

Browse files
committed
Incorporate PR feedback
1 parent faee3f0 commit abdbc7a

File tree

1 file changed

+64
-24
lines changed

1 file changed

+64
-24
lines changed

gemd/units/impl.py

+64-24
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
11
"""Implementation of units."""
22
import re
33

4-
from pint import UnitRegistry, Unit, register_unit_format, Quantity
4+
from pint import UnitRegistry, Unit, register_unit_format
55
from pint.compat import tokenizer
6-
from tokenize import NAME, NUMBER, OP, Token, ERRORTOKEN
6+
from tokenize import NAME, NUMBER, OP, ERRORTOKEN, TokenInfo
77
# alias the error that is thrown when units are incompatible
88
# this helps to isolate the dependence on pint
99
from pint.errors import DimensionalityError as IncompatibleUnitsError # noqa Import
1010
from pint.errors import UndefinedUnitError, DefinitionSyntaxError # noqa Import
1111

1212
import functools
1313
import pkg_resources
14-
from typing import Union, List, Tuple
14+
from typing import Union, List, Tuple, Generator, Any
1515

1616
# use the default unit registry for now
1717
DEFAULT_FILE = pkg_resources.resource_filename("gemd.units", "citrine_en.txt")
@@ -32,21 +32,27 @@ def _as_scientific(matchobj: re.Match) -> str:
3232
return re.sub(number, _as_scientific, input_string)
3333

3434

35-
def _scaling_preprocessor(input_string: str) -> str:
36-
"""Preprocessor that turns scaling factors into non-dimensional units."""
37-
blocks: List[List[Token]] = [[]]
35+
def _scaling_find_blocks(token_stream: Generator[TokenInfo, Any, None]) -> List[List[TokenInfo]]:
36+
"""
37+
Supporting routine for _scaling_preprocessor; tokenizer stream -> blocks.
38+
39+
Takes a stream of tokens, and breaks it into a lists of tokens that represent
40+
multiplicative subunits of the original expression.
41+
42+
"""
43+
result = [[]]
3844
operator_stack = []
39-
for token in tokenizer(input_string):
45+
for token in token_stream:
4046
exponent_context = any(t.string in {"**", "^"} for t in operator_stack)
4147
if token.type == OP:
4248
if token.string not in _ALLOWED_OPERATORS:
4349
raise UndefinedUnitError(f"Unrecognized operator: {token.string}")
4450

4551
if exponent_context or token.string in {"**", "^", ".", "-", "+"}:
4652
# Exponents & unaries do not change context
47-
blocks[-1].append(token)
53+
result[-1].append(token)
4854
elif token.string not in {}:
49-
blocks.append([])
55+
result.append([])
5056

5157
if token.string == '(':
5258
operator_stack.append(token)
@@ -58,21 +64,35 @@ def _scaling_preprocessor(input_string: str) -> str:
5864
operator_stack.append(token)
5965
continue # Break flow since next token is in exponent context
6066
elif token.type == NAME:
61-
if exponent_context or len(blocks[-1]) == 0 or blocks[-1][-1].type != NAME:
62-
blocks[-1].append(token)
67+
if exponent_context or len(result[-1]) == 0 or result[-1][-1].type != NAME:
68+
result[-1].append(token)
6369
else: # Break blocks for two units in a row
64-
blocks.append([token])
70+
result.append([token])
6571
elif token.type == NUMBER:
66-
blocks[-1].append(token)
72+
result[-1].append(token)
6773
elif token.type == ERRORTOKEN: # Keep non-legal Python symbols like °
68-
blocks[-1].append(token)
74+
result[-1].append(token)
6975
# Drop other tokens, such as EOF
7076

7177
if len(operator_stack) > 0 and operator_stack[-1].string in {"**", "^"}:
7278
operator_stack.pop() # Exit context for this exponential
7379

80+
return result
81+
82+
83+
def _scaling_identify_factors(
84+
input_string: str,
85+
blocks: List[List[TokenInfo]]
86+
) -> List[Tuple[str, str, str]]:
87+
"""
88+
Supporting routine for _scaling_preprocessor; blocks -> scaling terms.
89+
90+
Takes the input_string and the blocks output by _scaling_find_blocks and
91+
returns a tuple of the substrings that contain scaling factors, the scaling
92+
factor itself, and the unit string.
93+
94+
"""
7495
todo = []
75-
blocks.pop(0) # Leading term is not allowed to be a scaling factor
7696
for block in blocks:
7797
i_exp = next((i for i, t in enumerate(block) if t.string in {"**", "^"}), len(block))
7898
i_name = next((i for i, t in enumerate(block) if t.type == NAME), None)
@@ -98,13 +118,23 @@ def _scaling_preprocessor(input_string: str) -> str:
98118
f"Replicate scaling factor ({[n[1] for n in numbers]}) in {input_string}"
99119
)
100120

101-
global _REGISTRY
121+
return todo
122+
123+
124+
def _scaling_store_and_mangle(input_string: str, todo: List[Tuple[str, str, str]]) -> str:
125+
"""
126+
Supporting routine for _scaling_preprocessor; scaling terms -> updated input_string.
127+
128+
Takes the terms to be updated, and actually updates the input_string as well as
129+
creating an entry for each in the registry.
130+
131+
"""
102132
for scaled_term, number_string, unit_string in todo:
103133
regex = rf"(?<![-+0-9.]){re.escape(scaled_term)}(?![0-9.])"
104-
stripped = re.sub(r"--", "", re.sub(r"[+\s]+", "", scaled_term))
134+
stripped = re.sub(r"[+\s]+", "", scaled_term).replace("--", "")
105135

106136
if unit_string is not None:
107-
stripped_unit = re.sub(r"--", "", re.sub(r"[+\s]+", "", unit_string))
137+
stripped_unit = re.sub(r"[+\s]+", "", unit_string).replace("--", "")
108138
long_unit = f"{_REGISTRY(stripped_unit).u}"
109139
short_unit = f"{_REGISTRY(stripped_unit).u:~}"
110140
long = stripped.replace(stripped_unit, "_" + long_unit)
@@ -124,6 +154,16 @@ def _scaling_preprocessor(input_string: str) -> str:
124154
return input_string
125155

126156

157+
def _scaling_preprocessor(input_string: str) -> str:
158+
"""Preprocessor that turns scaling factors into non-dimensional units."""
159+
blocks = _scaling_find_blocks(tokenizer(input_string))
160+
161+
blocks.pop(0) # Leading term is not allowed to be a scaling factor
162+
todo = _scaling_identify_factors(input_string, blocks)
163+
164+
return _scaling_store_and_mangle(input_string, todo)
165+
166+
127167
_REGISTRY = UnitRegistry(filename=DEFAULT_FILE,
128168
preprocessors=[_space_after_minus_preprocessor,
129169
_scientific_notation_preprocessor,
@@ -176,12 +216,12 @@ def _parse_units(units: str) -> Unit:
176216
177217
"""
178218
# TODO: parse_units has a bug resolved in 0.19, but 3.7 only supports up to 0.18
179-
parsed: Quantity = _REGISTRY(units)
180-
if isinstance(parsed, Quantity):
219+
parsed = _REGISTRY(units)
220+
try:
181221
magnitude = parsed.magnitude
182222
result = parsed.units
183-
else:
184-
magnitude = parsed # It was non-dimensional
223+
except AttributeError: # It was non-dimensional
224+
magnitude = parsed
185225
result = _REGISTRY("").u
186226
if magnitude == 0.0:
187227
raise ValueError(f"Unit expression had a zero scaling factor. {units}")
@@ -250,7 +290,7 @@ def convert_units(value: float, starting_unit: str, final_unit: str) -> float:
250290
if starting_unit == final_unit:
251291
return value # skip computation
252292
else:
253-
resolved_final_unit = _REGISTRY(final_unit) # `to` bypasses preparser
293+
resolved_final_unit = _REGISTRY(final_unit).u # `to` bypasses preparser
254294
return _REGISTRY.Quantity(value, starting_unit).to(resolved_final_unit).magnitude
255295

256296

@@ -271,7 +311,7 @@ def get_base_units(units: Union[str, Unit]) -> Tuple[Unit, float, float]:
271311
272312
"""
273313
if isinstance(units, str):
274-
units = _REGISTRY(units)
314+
units = _REGISTRY(units).u
275315
ratio, base_unit = _REGISTRY.get_base_units(units)
276316
offset = _REGISTRY.Quantity(0, units).to(_REGISTRY.Quantity(0, base_unit)).magnitude
277317
return base_unit, float(ratio), offset

0 commit comments

Comments
 (0)