Skip to content

Commit e106808

Browse files
authored
cython: better exception handling (#606)
- use `except -1` instead of manual error handling - use `PyUnicode_AsUTF8AndSize()` - use `_pack()` and `_pack_inner()` instead of `while True:`
1 parent 3da5818 commit e106808

File tree

2 files changed

+131
-192
lines changed

2 files changed

+131
-192
lines changed

msgpack/_packer.pyx

Lines changed: 131 additions & 171 deletions
Original file line numberDiff line numberDiff line change
@@ -24,21 +24,20 @@ cdef extern from "pack.h":
2424
size_t buf_size
2525
bint use_bin_type
2626

27-
int msgpack_pack_nil(msgpack_packer* pk)
28-
int msgpack_pack_true(msgpack_packer* pk)
29-
int msgpack_pack_false(msgpack_packer* pk)
30-
int msgpack_pack_long_long(msgpack_packer* pk, long long d)
31-
int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d)
32-
int msgpack_pack_float(msgpack_packer* pk, float d)
33-
int msgpack_pack_double(msgpack_packer* pk, double d)
34-
int msgpack_pack_array(msgpack_packer* pk, size_t l)
35-
int msgpack_pack_map(msgpack_packer* pk, size_t l)
36-
int msgpack_pack_raw(msgpack_packer* pk, size_t l)
37-
int msgpack_pack_bin(msgpack_packer* pk, size_t l)
38-
int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l)
39-
int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l)
40-
int msgpack_pack_timestamp(msgpack_packer* x, long long seconds, unsigned long nanoseconds);
41-
int msgpack_pack_unicode(msgpack_packer* pk, object o, long long limit)
27+
int msgpack_pack_nil(msgpack_packer* pk) except -1
28+
int msgpack_pack_true(msgpack_packer* pk) except -1
29+
int msgpack_pack_false(msgpack_packer* pk) except -1
30+
int msgpack_pack_long_long(msgpack_packer* pk, long long d) except -1
31+
int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d) except -1
32+
int msgpack_pack_float(msgpack_packer* pk, float d) except -1
33+
int msgpack_pack_double(msgpack_packer* pk, double d) except -1
34+
int msgpack_pack_array(msgpack_packer* pk, size_t l) except -1
35+
int msgpack_pack_map(msgpack_packer* pk, size_t l) except -1
36+
int msgpack_pack_raw(msgpack_packer* pk, size_t l) except -1
37+
int msgpack_pack_bin(msgpack_packer* pk, size_t l) except -1
38+
int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) except -1
39+
int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l) except -1
40+
int msgpack_pack_timestamp(msgpack_packer* x, long long seconds, unsigned long nanoseconds) except -1
4241

4342

4443
cdef int DEFAULT_RECURSE_LIMIT=511
@@ -119,6 +118,10 @@ cdef class Packer:
119118
self.pk.buf_size = buf_size
120119
self.pk.length = 0
121120

121+
def __dealloc__(self):
122+
PyMem_Free(self.pk.buf)
123+
self.pk.buf = NULL
124+
122125
def __init__(self, *, default=None,
123126
bint use_single_float=False, bint autoreset=True, bint use_bin_type=True,
124127
bint strict_types=False, bint datetime=False, unicode_errors=None,
@@ -139,155 +142,127 @@ cdef class Packer:
139142
else:
140143
self.unicode_errors = self._berrors
141144

142-
def __dealloc__(self):
143-
PyMem_Free(self.pk.buf)
144-
self.pk.buf = NULL
145-
146-
cdef int _pack(self, object o, int nest_limit=DEFAULT_RECURSE_LIMIT) except -1:
145+
# returns -2 when default should(o) be called
146+
cdef int _pack_inner(self, object o, bint will_default, int nest_limit) except -1:
147147
cdef long long llval
148148
cdef unsigned long long ullval
149149
cdef unsigned long ulval
150-
cdef long longval
151-
cdef float fval
152-
cdef double dval
153-
cdef char* rawval
154-
cdef int ret
155-
cdef dict d
150+
cdef const char* rawval
156151
cdef Py_ssize_t L
157-
cdef int default_used = 0
158-
cdef bint strict_types = self.strict_types
152+
cdef bool strict_types = self.strict_types
159153
cdef Py_buffer view
160154

161-
if nest_limit < 0:
162-
raise ValueError("recursion limit exceeded.")
163-
164-
while True:
165-
if o is None:
166-
ret = msgpack_pack_nil(&self.pk)
167-
elif o is True:
168-
ret = msgpack_pack_true(&self.pk)
169-
elif o is False:
170-
ret = msgpack_pack_false(&self.pk)
171-
elif PyLong_CheckExact(o) if strict_types else PyLong_Check(o):
172-
# PyInt_Check(long) is True for Python 3.
173-
# So we should test long before int.
174-
try:
175-
if o > 0:
176-
ullval = o
177-
ret = msgpack_pack_unsigned_long_long(&self.pk, ullval)
178-
else:
179-
llval = o
180-
ret = msgpack_pack_long_long(&self.pk, llval)
181-
except OverflowError as oe:
182-
if not default_used and self._default is not None:
183-
o = self._default(o)
184-
default_used = True
185-
continue
186-
else:
187-
raise OverflowError("Integer value out of range")
188-
elif PyFloat_CheckExact(o) if strict_types else PyFloat_Check(o):
189-
if self.use_float:
190-
fval = o
191-
ret = msgpack_pack_float(&self.pk, fval)
155+
if o is None:
156+
msgpack_pack_nil(&self.pk)
157+
elif o is True:
158+
msgpack_pack_true(&self.pk)
159+
elif o is False:
160+
msgpack_pack_false(&self.pk)
161+
elif PyLong_CheckExact(o) if strict_types else PyLong_Check(o):
162+
try:
163+
if o > 0:
164+
ullval = o
165+
msgpack_pack_unsigned_long_long(&self.pk, ullval)
192166
else:
193-
dval = o
194-
ret = msgpack_pack_double(&self.pk, dval)
195-
elif PyBytesLike_CheckExact(o) if strict_types else PyBytesLike_Check(o):
196-
L = Py_SIZE(o)
197-
if L > ITEM_LIMIT:
198-
PyErr_Format(ValueError, b"%.200s object is too large", Py_TYPE(o).tp_name)
199-
rawval = o
200-
ret = msgpack_pack_bin(&self.pk, L)
201-
if ret == 0:
202-
ret = msgpack_pack_raw_body(&self.pk, rawval, L)
203-
elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o):
204-
if self.unicode_errors == NULL:
205-
ret = msgpack_pack_unicode(&self.pk, o, ITEM_LIMIT);
206-
if ret == -2:
207-
raise ValueError("unicode string is too large")
167+
llval = o
168+
msgpack_pack_long_long(&self.pk, llval)
169+
except OverflowError as oe:
170+
if will_default:
171+
return -2
208172
else:
209-
o = PyUnicode_AsEncodedString(o, NULL, self.unicode_errors)
210-
L = Py_SIZE(o)
211-
if L > ITEM_LIMIT:
212-
raise ValueError("unicode string is too large")
213-
ret = msgpack_pack_raw(&self.pk, L)
214-
if ret == 0:
215-
rawval = o
216-
ret = msgpack_pack_raw_body(&self.pk, rawval, L)
217-
elif PyDict_CheckExact(o):
218-
d = <dict>o
219-
L = len(d)
220-
if L > ITEM_LIMIT:
221-
raise ValueError("dict is too large")
222-
ret = msgpack_pack_map(&self.pk, L)
223-
if ret == 0:
224-
for k, v in d.items():
225-
ret = self._pack(k, nest_limit-1)
226-
if ret != 0: break
227-
ret = self._pack(v, nest_limit-1)
228-
if ret != 0: break
229-
elif not strict_types and PyDict_Check(o):
230-
L = len(o)
231-
if L > ITEM_LIMIT:
232-
raise ValueError("dict is too large")
233-
ret = msgpack_pack_map(&self.pk, L)
234-
if ret == 0:
235-
for k, v in o.items():
236-
ret = self._pack(k, nest_limit-1)
237-
if ret != 0: break
238-
ret = self._pack(v, nest_limit-1)
239-
if ret != 0: break
240-
elif type(o) is ExtType if strict_types else isinstance(o, ExtType):
241-
# This should be before Tuple because ExtType is namedtuple.
242-
longval = o.code
243-
rawval = o.data
244-
L = len(o.data)
245-
if L > ITEM_LIMIT:
246-
raise ValueError("EXT data is too large")
247-
ret = msgpack_pack_ext(&self.pk, longval, L)
248-
ret = msgpack_pack_raw_body(&self.pk, rawval, L)
249-
elif type(o) is Timestamp:
250-
llval = o.seconds
251-
ulval = o.nanoseconds
252-
ret = msgpack_pack_timestamp(&self.pk, llval, ulval)
253-
elif PyList_CheckExact(o) if strict_types else (PyTuple_Check(o) or PyList_Check(o)):
173+
raise OverflowError("Integer value out of range")
174+
elif PyFloat_CheckExact(o) if strict_types else PyFloat_Check(o):
175+
if self.use_float:
176+
msgpack_pack_float(&self.pk, <float>o)
177+
else:
178+
msgpack_pack_double(&self.pk, <double>o)
179+
elif PyBytesLike_CheckExact(o) if strict_types else PyBytesLike_Check(o):
180+
L = Py_SIZE(o)
181+
if L > ITEM_LIMIT:
182+
PyErr_Format(ValueError, b"%.200s object is too large", Py_TYPE(o).tp_name)
183+
rawval = o
184+
msgpack_pack_bin(&self.pk, L)
185+
msgpack_pack_raw_body(&self.pk, rawval, L)
186+
elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o):
187+
if self.unicode_errors == NULL:
188+
rawval = PyUnicode_AsUTF8AndSize(o, &L)
189+
if L >ITEM_LIMIT:
190+
raise ValueError("unicode string is too large")
191+
else:
192+
o = PyUnicode_AsEncodedString(o, NULL, self.unicode_errors)
254193
L = Py_SIZE(o)
255194
if L > ITEM_LIMIT:
256-
raise ValueError("list is too large")
257-
ret = msgpack_pack_array(&self.pk, L)
258-
if ret == 0:
259-
for v in o:
260-
ret = self._pack(v, nest_limit-1)
261-
if ret != 0: break
262-
elif PyMemoryView_Check(o):
263-
if PyObject_GetBuffer(o, &view, PyBUF_SIMPLE) != 0:
264-
raise ValueError("could not get buffer for memoryview")
265-
L = view.len
266-
if L > ITEM_LIMIT:
267-
PyBuffer_Release(&view);
268-
raise ValueError("memoryview is too large")
269-
ret = msgpack_pack_bin(&self.pk, L)
270-
if ret == 0:
271-
ret = msgpack_pack_raw_body(&self.pk, <char*>view.buf, L)
195+
raise ValueError("unicode string is too large")
196+
rawval = o
197+
msgpack_pack_raw(&self.pk, L)
198+
msgpack_pack_raw_body(&self.pk, rawval, L)
199+
elif PyDict_CheckExact(o) if strict_types else PyDict_Check(o):
200+
L = len(o)
201+
if L > ITEM_LIMIT:
202+
raise ValueError("dict is too large")
203+
msgpack_pack_map(&self.pk, L)
204+
for k, v in o.items():
205+
self._pack(k, nest_limit)
206+
self._pack(v, nest_limit)
207+
elif type(o) is ExtType if strict_types else isinstance(o, ExtType):
208+
# This should be before Tuple because ExtType is namedtuple.
209+
rawval = o.data
210+
L = len(o.data)
211+
if L > ITEM_LIMIT:
212+
raise ValueError("EXT data is too large")
213+
msgpack_pack_ext(&self.pk, <long>o.code, L)
214+
msgpack_pack_raw_body(&self.pk, rawval, L)
215+
elif type(o) is Timestamp:
216+
llval = o.seconds
217+
ulval = o.nanoseconds
218+
msgpack_pack_timestamp(&self.pk, llval, ulval)
219+
elif PyList_CheckExact(o) if strict_types else (PyTuple_Check(o) or PyList_Check(o)):
220+
L = Py_SIZE(o)
221+
if L > ITEM_LIMIT:
222+
raise ValueError("list is too large")
223+
msgpack_pack_array(&self.pk, L)
224+
for v in o:
225+
self._pack(v, nest_limit)
226+
elif PyMemoryView_Check(o):
227+
PyObject_GetBuffer(o, &view, PyBUF_SIMPLE)
228+
L = view.len
229+
if L > ITEM_LIMIT:
272230
PyBuffer_Release(&view);
273-
elif self.datetime and PyDateTime_CheckExact(o) and datetime_tzinfo(o) is not None:
274-
delta = o - epoch
275-
if not PyDelta_CheckExact(delta):
276-
raise ValueError("failed to calculate delta")
277-
llval = timedelta_days(delta) * <long long>(24*60*60) + timedelta_seconds(delta)
278-
ulval = timedelta_microseconds(delta) * 1000
279-
ret = msgpack_pack_timestamp(&self.pk, llval, ulval)
280-
elif not default_used and self._default:
231+
raise ValueError("memoryview is too large")
232+
try:
233+
msgpack_pack_bin(&self.pk, L)
234+
msgpack_pack_raw_body(&self.pk, <char*>view.buf, L)
235+
finally:
236+
PyBuffer_Release(&view);
237+
elif self.datetime and PyDateTime_CheckExact(o) and datetime_tzinfo(o) is not None:
238+
delta = o - epoch
239+
if not PyDelta_CheckExact(delta):
240+
raise ValueError("failed to calculate delta")
241+
llval = timedelta_days(delta) * <long long>(24*60*60) + timedelta_seconds(delta)
242+
ulval = timedelta_microseconds(delta) * 1000
243+
msgpack_pack_timestamp(&self.pk, llval, ulval)
244+
elif will_default:
245+
return -2
246+
elif self.datetime and PyDateTime_CheckExact(o):
247+
# this should be later than will_default
248+
PyErr_Format(ValueError, b"can not serialize '%.200s' object where tzinfo=None", Py_TYPE(o).tp_name)
249+
else:
250+
PyErr_Format(TypeError, b"can not serialize '%.200s' object", Py_TYPE(o).tp_name)
251+
252+
cdef int _pack(self, object o, int nest_limit=DEFAULT_RECURSE_LIMIT) except -1:
253+
cdef int ret
254+
if nest_limit < 0:
255+
raise ValueError("recursion limit exceeded.")
256+
nest_limit -= 1
257+
if self._default is not None:
258+
ret = self._pack_inner(o, 1, nest_limit)
259+
if ret == -2:
281260
o = self._default(o)
282-
default_used = 1
283-
continue
284-
elif self.datetime and PyDateTime_CheckExact(o):
285-
PyErr_Format(ValueError, b"can not serialize '%.200s' object where tzinfo=None", Py_TYPE(o).tp_name)
286261
else:
287-
PyErr_Format(TypeError, b"can not serialize '%.200s' object", Py_TYPE(o).tp_name)
288-
return ret
262+
return ret
263+
return self._pack_inner(o, 0, nest_limit)
289264

290-
cpdef pack(self, object obj):
265+
def pack(self, object obj):
291266
cdef int ret
292267
try:
293268
ret = self._pack(obj, DEFAULT_RECURSE_LIMIT)
@@ -308,11 +283,7 @@ cdef class Packer:
308283
def pack_array_header(self, long long size):
309284
if size > ITEM_LIMIT:
310285
raise ValueError
311-
cdef int ret = msgpack_pack_array(&self.pk, size)
312-
if ret == -1:
313-
raise MemoryError
314-
elif ret: # should not happen
315-
raise TypeError
286+
msgpack_pack_array(&self.pk, size)
316287
if self.autoreset:
317288
buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length)
318289
self.pk.length = 0
@@ -321,11 +292,7 @@ cdef class Packer:
321292
def pack_map_header(self, long long size):
322293
if size > ITEM_LIMIT:
323294
raise ValueError
324-
cdef int ret = msgpack_pack_map(&self.pk, size)
325-
if ret == -1:
326-
raise MemoryError
327-
elif ret: # should not happen
328-
raise TypeError
295+
msgpack_pack_map(&self.pk, size)
329296
if self.autoreset:
330297
buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length)
331298
self.pk.length = 0
@@ -338,17 +305,10 @@ cdef class Packer:
338305
*pairs* should be a sequence of pairs.
339306
(`len(pairs)` and `for k, v in pairs:` should be supported.)
340307
"""
341-
cdef int ret = msgpack_pack_map(&self.pk, len(pairs))
342-
if ret == 0:
343-
for k, v in pairs:
344-
ret = self._pack(k)
345-
if ret != 0: break
346-
ret = self._pack(v)
347-
if ret != 0: break
348-
if ret == -1:
349-
raise MemoryError
350-
elif ret: # should not happen
351-
raise TypeError
308+
msgpack_pack_map(&self.pk, len(pairs))
309+
for k, v in pairs:
310+
self._pack(k)
311+
self._pack(v)
352312
if self.autoreset:
353313
buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length)
354314
self.pk.length = 0

msgpack/pack.h

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -64,27 +64,6 @@ static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_
6464

6565
#include "pack_template.h"
6666

67-
// return -2 when o is too long
68-
static inline int
69-
msgpack_pack_unicode(msgpack_packer *pk, PyObject *o, long long limit)
70-
{
71-
assert(PyUnicode_Check(o));
72-
73-
Py_ssize_t len;
74-
const char* buf = PyUnicode_AsUTF8AndSize(o, &len);
75-
if (buf == NULL)
76-
return -1;
77-
78-
if (len > limit) {
79-
return -2;
80-
}
81-
82-
int ret = msgpack_pack_raw(pk, len);
83-
if (ret) return ret;
84-
85-
return msgpack_pack_raw_body(pk, buf, len);
86-
}
87-
8867
#ifdef __cplusplus
8968
}
9069
#endif

0 commit comments

Comments
 (0)