1
1
"""Implementation of units."""
2
2
import re
3
3
4
- from pint import UnitRegistry , Unit , register_unit_format , Quantity
4
+ from pint import UnitRegistry , Unit , register_unit_format
5
5
from pint .compat import tokenizer
6
- from tokenize import NAME , NUMBER , OP , Token , ERRORTOKEN
6
+ from tokenize import NAME , NUMBER , OP , ERRORTOKEN , TokenInfo
7
7
# alias the error that is thrown when units are incompatible
8
8
# this helps to isolate the dependence on pint
9
9
from pint .errors import DimensionalityError as IncompatibleUnitsError # noqa Import
10
10
from pint .errors import UndefinedUnitError , DefinitionSyntaxError # noqa Import
11
11
12
12
import functools
13
13
import pkg_resources
14
- from typing import Union , List , Tuple
14
+ from typing import Union , List , Tuple , Generator , Any
15
15
16
16
# use the default unit registry for now
17
17
DEFAULT_FILE = pkg_resources .resource_filename ("gemd.units" , "citrine_en.txt" )
@@ -32,21 +32,27 @@ def _as_scientific(matchobj: re.Match) -> str:
32
32
return re .sub (number , _as_scientific , input_string )
33
33
34
34
35
- def _scaling_preprocessor (input_string : str ) -> str :
36
- """Preprocessor that turns scaling factors into non-dimensional units."""
37
- blocks : List [List [Token ]] = [[]]
35
+ def _scaling_find_blocks (token_stream : Generator [TokenInfo , Any , None ]) -> List [List [TokenInfo ]]:
36
+ """
37
+ Supporting routine for _scaling_preprocessor; tokenizer stream -> blocks.
38
+
39
+ Takes a stream of tokens, and breaks it into a lists of tokens that represent
40
+ multiplicative subunits of the original expression.
41
+
42
+ """
43
+ result = [[]]
38
44
operator_stack = []
39
- for token in tokenizer ( input_string ) :
45
+ for token in token_stream :
40
46
exponent_context = any (t .string in {"**" , "^" } for t in operator_stack )
41
47
if token .type == OP :
42
48
if token .string not in _ALLOWED_OPERATORS :
43
49
raise UndefinedUnitError (f"Unrecognized operator: { token .string } " )
44
50
45
51
if exponent_context or token .string in {"**" , "^" , "." , "-" , "+" }:
46
52
# Exponents & unaries do not change context
47
- blocks [- 1 ].append (token )
53
+ result [- 1 ].append (token )
48
54
elif token .string not in {}:
49
- blocks .append ([])
55
+ result .append ([])
50
56
51
57
if token .string == '(' :
52
58
operator_stack .append (token )
@@ -58,21 +64,35 @@ def _scaling_preprocessor(input_string: str) -> str:
58
64
operator_stack .append (token )
59
65
continue # Break flow since next token is in exponent context
60
66
elif token .type == NAME :
61
- if exponent_context or len (blocks [- 1 ]) == 0 or blocks [- 1 ][- 1 ].type != NAME :
62
- blocks [- 1 ].append (token )
67
+ if exponent_context or len (result [- 1 ]) == 0 or result [- 1 ][- 1 ].type != NAME :
68
+ result [- 1 ].append (token )
63
69
else : # Break blocks for two units in a row
64
- blocks .append ([token ])
70
+ result .append ([token ])
65
71
elif token .type == NUMBER :
66
- blocks [- 1 ].append (token )
72
+ result [- 1 ].append (token )
67
73
elif token .type == ERRORTOKEN : # Keep non-legal Python symbols like °
68
- blocks [- 1 ].append (token )
74
+ result [- 1 ].append (token )
69
75
# Drop other tokens, such as EOF
70
76
71
77
if len (operator_stack ) > 0 and operator_stack [- 1 ].string in {"**" , "^" }:
72
78
operator_stack .pop () # Exit context for this exponential
73
79
80
+ return result
81
+
82
+
83
+ def _scaling_identify_factors (
84
+ input_string : str ,
85
+ blocks : List [List [TokenInfo ]]
86
+ ) -> List [Tuple [str , str , str ]]:
87
+ """
88
+ Supporting routine for _scaling_preprocessor; blocks -> scaling terms.
89
+
90
+ Takes the input_string and the blocks output by _scaling_find_blocks and
91
+ returns a tuple of the substrings that contain scaling factors, the scaling
92
+ factor itself, and the unit string.
93
+
94
+ """
74
95
todo = []
75
- blocks .pop (0 ) # Leading term is not allowed to be a scaling factor
76
96
for block in blocks :
77
97
i_exp = next ((i for i , t in enumerate (block ) if t .string in {"**" , "^" }), len (block ))
78
98
i_name = next ((i for i , t in enumerate (block ) if t .type == NAME ), None )
@@ -98,13 +118,23 @@ def _scaling_preprocessor(input_string: str) -> str:
98
118
f"Replicate scaling factor ({ [n [1 ] for n in numbers ]} ) in { input_string } "
99
119
)
100
120
101
- global _REGISTRY
121
+ return todo
122
+
123
+
124
+ def _scaling_store_and_mangle (input_string : str , todo : List [Tuple [str , str , str ]]) -> str :
125
+ """
126
+ Supporting routine for _scaling_preprocessor; scaling terms -> updated input_string.
127
+
128
+ Takes the terms to be updated, and actually updates the input_string as well as
129
+ creating an entry for each in the registry.
130
+
131
+ """
102
132
for scaled_term , number_string , unit_string in todo :
103
133
regex = rf"(?<![-+0-9.]){ re .escape (scaled_term )} (?![0-9.])"
104
- stripped = re .sub (r"--" , "" , re . sub ( r" [+\s]+" , "" , scaled_term ))
134
+ stripped = re .sub (r"[+\s]+" , "" , scaled_term ). replace ( "--" , "" )
105
135
106
136
if unit_string is not None :
107
- stripped_unit = re .sub (r"--" , "" , re . sub ( r" [+\s]+" , "" , unit_string ))
137
+ stripped_unit = re .sub (r"[+\s]+" , "" , unit_string ). replace ( "--" , "" )
108
138
long_unit = f"{ _REGISTRY (stripped_unit ).u } "
109
139
short_unit = f"{ _REGISTRY (stripped_unit ).u :~} "
110
140
long = stripped .replace (stripped_unit , "_" + long_unit )
@@ -124,6 +154,16 @@ def _scaling_preprocessor(input_string: str) -> str:
124
154
return input_string
125
155
126
156
157
+ def _scaling_preprocessor (input_string : str ) -> str :
158
+ """Preprocessor that turns scaling factors into non-dimensional units."""
159
+ blocks = _scaling_find_blocks (tokenizer (input_string ))
160
+
161
+ blocks .pop (0 ) # Leading term is not allowed to be a scaling factor
162
+ todo = _scaling_identify_factors (input_string , blocks )
163
+
164
+ return _scaling_store_and_mangle (input_string , todo )
165
+
166
+
127
167
_REGISTRY = UnitRegistry (filename = DEFAULT_FILE ,
128
168
preprocessors = [_space_after_minus_preprocessor ,
129
169
_scientific_notation_preprocessor ,
@@ -176,12 +216,12 @@ def _parse_units(units: str) -> Unit:
176
216
177
217
"""
178
218
# TODO: parse_units has a bug resolved in 0.19, but 3.7 only supports up to 0.18
179
- parsed : Quantity = _REGISTRY (units )
180
- if isinstance ( parsed , Quantity ) :
219
+ parsed = _REGISTRY (units )
220
+ try :
181
221
magnitude = parsed .magnitude
182
222
result = parsed .units
183
- else :
184
- magnitude = parsed # It was non-dimensional
223
+ except AttributeError : # It was non-dimensional
224
+ magnitude = parsed
185
225
result = _REGISTRY ("" ).u
186
226
if magnitude == 0.0 :
187
227
raise ValueError (f"Unit expression had a zero scaling factor. { units } " )
@@ -250,7 +290,7 @@ def convert_units(value: float, starting_unit: str, final_unit: str) -> float:
250
290
if starting_unit == final_unit :
251
291
return value # skip computation
252
292
else :
253
- resolved_final_unit = _REGISTRY (final_unit ) # `to` bypasses preparser
293
+ resolved_final_unit = _REGISTRY (final_unit ). u # `to` bypasses preparser
254
294
return _REGISTRY .Quantity (value , starting_unit ).to (resolved_final_unit ).magnitude
255
295
256
296
@@ -271,7 +311,7 @@ def get_base_units(units: Union[str, Unit]) -> Tuple[Unit, float, float]:
271
311
272
312
"""
273
313
if isinstance (units , str ):
274
- units = _REGISTRY (units )
314
+ units = _REGISTRY (units ). u
275
315
ratio , base_unit = _REGISTRY .get_base_units (units )
276
316
offset = _REGISTRY .Quantity (0 , units ).to (_REGISTRY .Quantity (0 , base_unit )).magnitude
277
317
return base_unit , float (ratio ), offset
0 commit comments