3
3
4
4
from pint import UnitRegistry , Unit , register_unit_format , Quantity
5
5
from pint .compat import tokenizer
6
- from tokenize import NAME , NUMBER , OP
6
+ from tokenize import NAME , NUMBER , OP , Token , ERRORTOKEN
7
7
# alias the error that is thrown when units are incompatible
8
8
# this helps to isolate the dependence on pint
9
9
from pint .errors import DimensionalityError as IncompatibleUnitsError # noqa Import
10
10
from pint .errors import UndefinedUnitError , DefinitionSyntaxError # noqa Import
11
11
12
12
import functools
13
13
import pkg_resources
14
- from typing import Union
14
+ from typing import Union , List , Tuple
15
15
16
16
# use the default unit registry for now
17
17
DEFAULT_FILE = pkg_resources .resource_filename ("gemd.units" , "citrine_en.txt" )
18
18
_ALLOWED_OPERATORS = {"." , "+" , "-" , "*" , "/" , "//" , "^" , "**" , "(" , ")" }
19
19
20
20
21
+ def _scientific_notation_preprocessor (input_string : str ) -> str :
22
+ """Preprocessor that converts x * 10 ** y format to xEy."""
23
+ def _as_scientific (matchobj : re .Match ) -> str :
24
+ return f"{ matchobj .group (1 ) or '1' } e{ matchobj .group (2 )} "
25
+
26
+ number = r'\b(?:(\d+\.?\d*|\.\d+)\s*\*\s*)?10\s*(?:\*{2}|\^)\s*\+?(-?\d+\b)'
27
+ return re .sub (number , _as_scientific , input_string )
28
+
29
+
21
30
def _scaling_preprocessor (input_string : str ) -> str :
22
31
"""Preprocessor that turns scaling factors into non-dimensional units."""
32
+ blocks : List [List [Token ]] = [[]]
33
+ operator_stack = []
34
+ for token in tokenizer (input_string ):
35
+ exponent_context = any (t .string in {"**" , "^" } for t in operator_stack )
36
+ if token .type == OP :
37
+ if token .string not in _ALLOWED_OPERATORS :
38
+ raise UndefinedUnitError (f"Unrecognized operator: { token .string } " )
39
+
40
+ if exponent_context or token .string in {"**" , "^" , "." , "-" , "+" }:
41
+ # Exponents & unaries do not change context
42
+ blocks [- 1 ].append (token )
43
+ elif token .string not in {}:
44
+ blocks .append ([])
45
+
46
+ if token .string == '(' :
47
+ operator_stack .append (token )
48
+ elif token .string == ')' :
49
+ while operator_stack : # don't worry about enforcing balance
50
+ if operator_stack .pop ().string == '(' :
51
+ break # We found token's friend
52
+ elif token .string in {"**" , "^" }:
53
+ operator_stack .append (token )
54
+ continue # Break flow since next token is in exponent context
55
+ elif token .type == NAME :
56
+ if exponent_context or len (blocks [- 1 ]) == 0 or blocks [- 1 ][- 1 ].type != NAME :
57
+ blocks [- 1 ].append (token )
58
+ else : # Break blocks for two units in a row
59
+ blocks .append ([token ])
60
+ elif token .type == NUMBER :
61
+ blocks [- 1 ].append (token )
62
+ elif token .type == ERRORTOKEN : # Keep non-legal Python symbols like °
63
+ blocks [- 1 ].append (token )
64
+ # Drop other tokens, such as EOF
65
+
66
+ if len (operator_stack ) > 0 and operator_stack [- 1 ].string in {"**" , "^" }:
67
+ operator_stack .pop () # Exit context for this exponential
68
+
69
+ todo = []
70
+ blocks .pop (0 ) # Leading term is not allowed to be a scaling factor
71
+ for block in blocks :
72
+ i_exp = next ((i for i , t in enumerate (block ) if t .string in {"**" , "^" }), len (block ))
73
+ i_name = next ((i for i , t in enumerate (block ) if t .type == NAME ), None )
74
+ numbers = [(i , t .string ) for i , t in enumerate (block ) if t .type == NUMBER and i < i_exp ]
75
+
76
+ if len (numbers ) == 1 :
77
+ position , value = numbers [0 ]
78
+ if i_exp != len (block ):
79
+ raise ValueError (
80
+ f"Scaling factors ({ value } ) with exponents are not supported ({ input_string } )"
81
+ )
82
+ if i_name is not None and i_name < position :
83
+ raise ValueError (f"Scaling factor ({ value } ) follows unit in { input_string } " )
84
+ if float (value ) != 1.0 and float (value ) != 0.0 : # Don't create definitions for 0 or 1
85
+ block_string = input_string [block [0 ].start [1 ]:block [- 1 ].end [1 ]]
86
+ if i_name is None :
87
+ unit_string = None
88
+ else :
89
+ unit_string = input_string [block [position + 1 ].start [1 ]:block [i_name ].end [1 ]]
90
+ todo .append ((block_string , value , unit_string ))
91
+ elif len (numbers ) > 1 :
92
+ raise ValueError (
93
+ f"Replicate scaling factor ({ [n [1 ] for n in numbers ]} ) in { input_string } "
94
+ )
95
+
23
96
global _REGISTRY
24
- tokens = list (tokenizer (input_string ))
25
- scales = []
26
-
27
- unrecognized = [t for t in tokens if t .type == OP and t .string not in _ALLOWED_OPERATORS ]
28
- if len (unrecognized ) > 0 :
29
- raise UndefinedUnitError (f"Unrecognized operator(s): { unrecognized } " )
30
-
31
- # Ignore leading numbers & operators, since Pint handles those itself
32
- start = next ((i for i , token in enumerate (tokens ) if token .type == NAME ), len (tokens ))
33
-
34
- for i , token in enumerate (tokens [start :], start = start ):
35
- if token .type != NUMBER :
36
- continue
37
-
38
- # Note we can't run off the front because we started at a NAME
39
- first = i
40
- while tokens [first - 1 ].string in {'+' , '-' }:
41
- first -= 1 # Include unary operations
42
-
43
- if tokens [first - 1 ].string in {"^" , "**" }:
44
- continue # Don't mangle exponents
45
-
46
- # Names couple tightly to their preceding numbers, so is it a denominator?
47
- division = tokens [first - 1 ].string in {"/" , "//" }
48
- tight = i < len (tokens ) - 2 and tokens [i + 1 ].type == NAME
49
-
50
- # Get the number
51
- substr = input_string [tokens [first ].start [1 ]:token .end [1 ]]
52
- value = eval (substr )
53
- if value <= 0 :
54
- raise DefinitionSyntaxError (f"Scaling factors must be positive: { substr } " )
55
- scales .append ([substr , token .string , division and tight ])
56
-
57
- for substr , value , division in scales :
58
- # There's probably something to be said for stashing these, but this sin
59
- # should be ameliorated by the LRU cache
60
- regex = rf"(?<!=[-+0-9.]){ re .escape (substr )} (?!=[0-9.])"
61
- valid = "_" + value .replace ("." , "_" ).replace ("+" , "" ).replace ("-" , "_" )
62
- trailing = "/" if division else ""
63
- _REGISTRY .define (f"{ valid } = { value } = { value } " )
64
- input_string = re .sub (regex , valid + trailing , input_string )
97
+ for scaled_term , number_string , unit_string in todo :
98
+ regex = rf"(?<![-+0-9.]){ re .escape (scaled_term )} (?![0-9.])"
99
+ stripped = re .sub (r"--" , "" , re .sub (r"[+\s]+" , "" , scaled_term ))
100
+
101
+ if unit_string is not None :
102
+ stripped_unit = re .sub (r"--" , "" , re .sub (r"[+\s]+" , "" , unit_string ))
103
+ long_unit = f"{ _REGISTRY (stripped_unit ).u } "
104
+ short_unit = f"{ _REGISTRY (stripped_unit ).u :~} "
105
+ long = stripped .replace (stripped_unit , "_" + long_unit )
106
+ short = stripped .replace (stripped_unit , " " + short_unit )
107
+ else :
108
+ long = stripped
109
+ short = stripped
110
+
111
+ underscored = re .sub (r"[-.]" , "_" , long )
112
+ valid = f"_{ underscored } "
113
+ if valid not in _REGISTRY :
114
+ # Parse subexpression to clean things up for define
115
+ value = f"{ _REGISTRY .parse_expression (scaled_term )} "
116
+ _REGISTRY .define (f"{ valid } = { value } = { short } " )
117
+ input_string = re .sub (regex , valid , input_string )
65
118
66
119
return input_string
67
120
68
121
69
- _REGISTRY = UnitRegistry (filename = DEFAULT_FILE , preprocessors = [_scaling_preprocessor ])
122
+ _REGISTRY = UnitRegistry (filename = DEFAULT_FILE ,
123
+ preprocessors = [_scientific_notation_preprocessor ,
124
+ _scaling_preprocessor ],
125
+ autoconvert_offset_to_baseunit = True )
70
126
71
127
72
128
@register_unit_format ("clean" )
@@ -75,9 +131,9 @@ def _format_clean(unit, registry, **options):
75
131
numerator = []
76
132
denominator = []
77
133
for u , p in unit .items ():
78
- if re .match (r"_[\d_]+$ " , u ):
79
- # Munged scaling factor; drop leading underscore, restore . and -
80
- u = re . sub ( r"(?<=\d)_(?=\d)" , "." , u [ 1 :]). replace ( "_" , "-" )
134
+ if re .match (r"_[\d_]+" , u ):
135
+ # Munged scaling factor; grab symbol, which is the prettier
136
+ u = registry . get_symbol ( u )
81
137
82
138
if p == 1 :
83
139
numerator .append (u )
@@ -98,7 +154,7 @@ def _format_clean(unit, registry, **options):
98
154
99
155
100
156
@functools .lru_cache (maxsize = 1024 )
101
- def parse_units (units : Union [ str , Unit , None ] ) -> Union [ str , Unit , None ] :
157
+ def _parse_units (units : str ) -> Unit :
102
158
"""
103
159
Parse a string or Unit into a standard string representation of the unit.
104
160
@@ -112,24 +168,60 @@ def parse_units(units: Union[str, Unit, None]) -> Union[str, Unit, None]:
112
168
[Union[str, Unit, None]]
113
169
The representation; note that the same type that was passed is returned
114
170
171
+ """
172
+ # TODO: parse_units has a bug resolved in 0.19, but 3.7 only supports up to 0.18
173
+ parsed : Quantity = _REGISTRY (units )
174
+ if isinstance (parsed , Quantity ):
175
+ magnitude = parsed .magnitude
176
+ result = parsed .units
177
+ else :
178
+ magnitude = parsed # It was non-dimensional
179
+ result = _REGISTRY ("" ).u
180
+ if magnitude == 0.0 :
181
+ raise ValueError (f"Unit expression had a zero scaling factor. { units } " )
182
+ if magnitude != 1 :
183
+ raise ValueError (f"Unit expression cannot have a leading scaling factor. { units } " )
184
+ return result
185
+
186
+
187
+ def parse_units (units : Union [str , Unit , None ],
188
+ * ,
189
+ return_unit : bool = False
190
+ ) -> Union [str , Unit , None ]:
191
+ """
192
+ Parse a string or Unit into a standard string representation of the unit.
193
+
194
+ Parameters
195
+ ----------
196
+ units: Union[str, Unit, None]
197
+ The string or Unit representation of the object we wish to display
198
+ return_unit: boolean
199
+ Whether to return a Unit object, vs. whatever was initially passed
200
+
201
+ Returns
202
+ -------
203
+ [Union[str, Unit, None]]
204
+ The representation; note that the same type that was passed is returned
205
+
115
206
"""
116
207
if units is None :
117
- return None
118
- elif units == '' :
119
- return 'dimensionless'
208
+ if return_unit :
209
+ return _REGISTRY ("" ).u
210
+ else :
211
+ return None
120
212
elif isinstance (units , str ):
121
- # TODO: parse_units has a bug resolved in 0.19, but 3.7 only supports up to 0.18
122
- parsed = _REGISTRY ( units )
123
- if not isinstance ( parsed , Quantity ) or parsed . magnitude != 1 :
124
- raise ValueError ( f"Unit expression cannot have a leading scaling factor. { units } " )
125
- return f"{ parsed . u :clean} "
213
+ parsed = _parse_units ( units )
214
+ if return_unit :
215
+ return parsed
216
+ else :
217
+ return f"{ parsed :clean} "
126
218
elif isinstance (units , Unit ):
127
219
return units
128
220
else :
129
221
raise UndefinedUnitError ("Units must be given as a recognized unit string or Units object" )
130
222
131
223
132
- @functools .lru_cache (maxsize = None )
224
+ @functools .lru_cache (maxsize = 1024 * 1024 )
133
225
def convert_units (value : float , starting_unit : str , final_unit : str ) -> float :
134
226
"""
135
227
Convert the value from the starting_unit to the final_unit.
@@ -152,7 +244,31 @@ def convert_units(value: float, starting_unit: str, final_unit: str) -> float:
152
244
if starting_unit == final_unit :
153
245
return value # skip computation
154
246
else :
155
- return _REGISTRY .Quantity (value , starting_unit ).to (final_unit ).magnitude
247
+ resolved_final_unit = _REGISTRY (final_unit ) # `to` bypasses preparser
248
+ return _REGISTRY .Quantity (value , starting_unit ).to (resolved_final_unit ).magnitude
249
+
250
+
251
+ @functools .lru_cache (maxsize = 1024 )
252
+ def get_base_units (units : Union [str , Unit ]) -> Tuple [Unit , float , float ]:
253
+ """
254
+ Get the base units and conversion factors for the given unit.
255
+
256
+ Parameters
257
+ ----------
258
+ units: Union[str, Unit, None]
259
+ The string or Unit representation of the object we wish to display
260
+
261
+ Returns
262
+ -------
263
+ Tuple[Unit, Number, float]
264
+ The base unit, its
265
+
266
+ """
267
+ if isinstance (units , str ):
268
+ units = _REGISTRY (units )
269
+ ratio , base_unit = _REGISTRY .get_base_units (units )
270
+ offset = _REGISTRY .Quantity (0 , units ).to (_REGISTRY .Quantity (0 , base_unit )).magnitude
271
+ return base_unit , float (ratio ), offset
156
272
157
273
158
274
def change_definitions_file (filename : str = None ):
@@ -169,4 +285,9 @@ def change_definitions_file(filename: str = None):
169
285
convert_units .cache_clear () # Units will change
170
286
if filename is None :
171
287
filename = DEFAULT_FILE
172
- _REGISTRY = UnitRegistry (filename = filename , preprocessors = [_scaling_preprocessor ])
288
+ _REGISTRY = UnitRegistry (filename = filename ,
289
+ preprocessors = [
290
+ _scientific_notation_preprocessor ,
291
+ _scaling_preprocessor
292
+ ],
293
+ autoconvert_offset_to_baseunit = True )
0 commit comments