Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions quaddtype/numpy_quaddtype/src/ops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,8 @@ ld_isnan(const long double *op)

// Binary Quad operations
typedef Sleef_quad (*binary_op_quad_def)(const Sleef_quad *, const Sleef_quad *);
// Binary Quad operations with 2 outputs (for divmod, modf, frexp)
typedef void (*binary_op_2out_quad_def)(const Sleef_quad *, const Sleef_quad *, Sleef_quad *, Sleef_quad *);

static inline Sleef_quad
quad_add(const Sleef_quad *in1, const Sleef_quad *in2)
Expand Down Expand Up @@ -593,6 +595,14 @@ quad_fmod(const Sleef_quad *a, const Sleef_quad *b)
return result;
}

static inline void
quad_divmod(const Sleef_quad *a, const Sleef_quad *b,
Sleef_quad *out_quotient, Sleef_quad *out_remainder)
{
*out_quotient = quad_floor_divide(a, b);
*out_remainder = quad_mod(a, b);
}

static inline Sleef_quad
quad_minimum(const Sleef_quad *in1, const Sleef_quad *in2)
{
Expand Down Expand Up @@ -744,6 +754,8 @@ quad_logaddexp2(const Sleef_quad *x, const Sleef_quad *y)

// Binary long double operations
typedef long double (*binary_op_longdouble_def)(const long double *, const long double *);
// Binary long double operations with 2 outputs (for divmod, modf, frexp)
typedef void (*binary_op_2out_longdouble_def)(const long double *, const long double *, long double *, long double *);

static inline long double
ld_add(const long double *in1, const long double *in2)
Expand Down Expand Up @@ -871,6 +883,14 @@ ld_fmod(const long double *a, const long double *b)
return result;
}

static inline void
ld_divmod(const long double *a, const long double *b,
long double *out_quotient, long double *out_remainder)
{
*out_quotient = ld_floor_divide(a, b);
*out_remainder = ld_mod(a, b);
}

static inline long double
ld_minimum(const long double *in1, const long double *in2)
{
Expand Down
198 changes: 198 additions & 0 deletions quaddtype/numpy_quaddtype/src/umath/binary_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,201 @@ quad_generic_binop_strided_loop_aligned(PyArrayMethod_Context *context, char *co
}



// Resolve descriptors for binary ops with 2 outputs (2 inputs, 2 outputs)
static NPY_CASTING
quad_binary_op_2out_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[],
PyArray_Descr *const given_descrs[],
PyArray_Descr *loop_descrs[], npy_intp *NPY_UNUSED(view_offset))
{
QuadPrecDTypeObject *descr_in1 = (QuadPrecDTypeObject *)given_descrs[0];
QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1];
QuadBackendType target_backend;

// Determine target backend and if casting is needed
NPY_CASTING casting = NPY_NO_CASTING;
if (descr_in1->backend != descr_in2->backend) {
target_backend = BACKEND_LONGDOUBLE;
casting = NPY_SAFE_CASTING;
}
else {
target_backend = descr_in1->backend;
}

// Set up input descriptors, casting if necessary
for (int i = 0; i < 2; i++) {
if (((QuadPrecDTypeObject *)given_descrs[i])->backend != target_backend) {
loop_descrs[i] = (PyArray_Descr *)new_quaddtype_instance(target_backend);
if (!loop_descrs[i]) {
return (NPY_CASTING)-1;
}
}
else {
Py_INCREF(given_descrs[i]);
loop_descrs[i] = given_descrs[i];
}
}

// Set up output descriptors (2 outputs for divmod)
for (int i = 2; i < 4; i++) {
if (given_descrs[i] == NULL) {
loop_descrs[i] = (PyArray_Descr *)new_quaddtype_instance(target_backend);
if (!loop_descrs[i]) {
return (NPY_CASTING)-1;
}
}
else {
QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)given_descrs[i];
if (descr_out->backend != target_backend) {
loop_descrs[i] = (PyArray_Descr *)new_quaddtype_instance(target_backend);
if (!loop_descrs[i]) {
return (NPY_CASTING)-1;
}
}
else {
Py_INCREF(given_descrs[i]);
loop_descrs[i] = given_descrs[i];
}
}
}
return casting;
}

// Strided loop for binary ops with 2 outputs (unaligned)
template <binary_op_2out_quad_def sleef_op, binary_op_2out_longdouble_def longdouble_op>
int
quad_generic_binop_2out_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[],
npy_intp const dimensions[], npy_intp const strides[],
NpyAuxData *auxdata)
{
npy_intp N = dimensions[0];
char *in1_ptr = data[0], *in2_ptr = data[1];
char *out1_ptr = data[2], *out2_ptr = data[3];
npy_intp in1_stride = strides[0];
npy_intp in2_stride = strides[1];
npy_intp out1_stride = strides[2];
npy_intp out2_stride = strides[3];

QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0];
QuadBackendType backend = descr->backend;
size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double);

quad_value in1, in2, out1, out2;
while (N--) {
memcpy(&in1, in1_ptr, elem_size);
memcpy(&in2, in2_ptr, elem_size);
if (backend == BACKEND_SLEEF) {
sleef_op(&in1.sleef_value, &in2.sleef_value, &out1.sleef_value, &out2.sleef_value);
}
else {
longdouble_op(&in1.longdouble_value, &in2.longdouble_value,
&out1.longdouble_value, &out2.longdouble_value);
}
memcpy(out1_ptr, &out1, elem_size);
memcpy(out2_ptr, &out2, elem_size);

in1_ptr += in1_stride;
in2_ptr += in2_stride;
out1_ptr += out1_stride;
out2_ptr += out2_stride;
}
return 0;
}

// Strided loop for binary ops with 2 outputs (aligned)
template <binary_op_2out_quad_def sleef_op, binary_op_2out_longdouble_def longdouble_op>
int
quad_generic_binop_2out_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[],
npy_intp const dimensions[], npy_intp const strides[],
NpyAuxData *auxdata)
{
npy_intp N = dimensions[0];
char *in1_ptr = data[0], *in2_ptr = data[1];
char *out1_ptr = data[2], *out2_ptr = data[3];
npy_intp in1_stride = strides[0];
npy_intp in2_stride = strides[1];
npy_intp out1_stride = strides[2];
npy_intp out2_stride = strides[3];

QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0];
QuadBackendType backend = descr->backend;

while (N--) {
if (backend == BACKEND_SLEEF) {
sleef_op((Sleef_quad *)in1_ptr, (Sleef_quad *)in2_ptr,
(Sleef_quad *)out1_ptr, (Sleef_quad *)out2_ptr);
}
else {
longdouble_op((long double *)in1_ptr, (long double *)in2_ptr,
(long double *)out1_ptr, (long double *)out2_ptr);
}

in1_ptr += in1_stride;
in2_ptr += in2_stride;
out1_ptr += out1_stride;
out2_ptr += out2_stride;
}
return 0;
}

// Create binary ufunc with 2 outputs (generic for divmod, modf, frexp, etc.)
template <binary_op_2out_quad_def sleef_op, binary_op_2out_longdouble_def longdouble_op>
int
create_quad_binary_2out_ufunc(PyObject *numpy, const char *ufunc_name)
{
PyObject *ufunc = PyObject_GetAttrString(numpy, ufunc_name);
if (ufunc == NULL) {
return -1;
}

// 2 inputs, 2 outputs
PyArray_DTypeMeta *dtypes[4] = {&QuadPrecDType, &QuadPrecDType, &QuadPrecDType, &QuadPrecDType};

PyType_Slot slots[] = {
{NPY_METH_resolve_descriptors, (void *)&quad_binary_op_2out_resolve_descriptors},
{NPY_METH_strided_loop,
(void *)&quad_generic_binop_2out_strided_loop_aligned<sleef_op, longdouble_op>},
{NPY_METH_unaligned_strided_loop,
(void *)&quad_generic_binop_2out_strided_loop_unaligned<sleef_op, longdouble_op>},
{0, NULL}};

PyArrayMethod_Spec Spec = {
.name = "quad_binop_2out",
.nin = 2,
.nout = 2,
.casting = NPY_NO_CASTING,
.flags = (NPY_ARRAYMETHOD_FLAGS)(NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_IS_REORDERABLE),
.dtypes = dtypes,
.slots = slots,
};

if (PyUFunc_AddLoopFromSpec(ufunc, &Spec) < 0) {
return -1;
}

PyObject *promoter_capsule =
PyCapsule_New((void *)&quad_ufunc_promoter, "numpy._ufunc_promoter", NULL);
if (promoter_capsule == NULL) {
return -1;
}

PyObject *DTypes = PyTuple_Pack(4, &PyArrayDescr_Type, &PyArrayDescr_Type,
&PyArrayDescr_Type, &PyArrayDescr_Type);
if (DTypes == 0) {
Py_DECREF(promoter_capsule);
return -1;
}

if (PyUFunc_AddPromoter(ufunc, DTypes, promoter_capsule) < 0) {
Py_DECREF(promoter_capsule);
Py_DECREF(DTypes);
return -1;
}
Py_DECREF(promoter_capsule);
Py_DECREF(DTypes);
return 0;
}

template <binary_op_quad_def sleef_op, binary_op_longdouble_def longdouble_op>
int
create_quad_binary_ufunc(PyObject *numpy, const char *ufunc_name)
Expand Down Expand Up @@ -259,5 +454,8 @@ init_quad_binary_ops(PyObject *numpy)
if (create_quad_binary_ufunc<quad_logaddexp2, ld_logaddexp2>(numpy, "logaddexp2") < 0) {
return -1;
}
if (create_quad_binary_2out_ufunc<quad_divmod, ld_divmod>(numpy, "divmod") < 0) {
return -1;
}
return 0;
}
4 changes: 2 additions & 2 deletions quaddtype/release_tracker.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Plan for `numpy-quaddtype` v1.0.0

- [ ] High-Endian System support
- [x] High-Endian System support
- [ ] Complete Documentation

| ufunc name | Added | Edge Cases Tested\* |
Expand All @@ -21,7 +21,7 @@
| remainder | ✅ | ✅ |
| mod | ✅ | ✅ |
| fmod | ✅ | ✅ |
| divmod | | |
| divmod | | ✅ |
| absolute | ✅ | ✅ |
| fabs | | |
| rint | ✅ | ✅ |
Expand Down
Loading
Loading