Skip to content

Commit 45998f6

Browse files
committed
made type definitions in cython match numpy types explicitly
1 parent 3a95b2f commit 45998f6

File tree

3 files changed

+74
-68
lines changed

3 files changed

+74
-68
lines changed

setup.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,14 @@
66
from setuptools import setup, Extension
77
import setuptools_scm # noqa # pylint: disable=unused-import
88
from Cython.Build import build_ext
9+
import numpy
910

1011

1112
setup(
12-
ext_modules = [
13+
ext_modules=[
1314
Extension(
1415
"rsatoolbox.cengine.similarity",
15-
["src/rsatoolbox/cengine/similarity.pyx"])],
16+
["src/rsatoolbox/cengine/similarity.pyx"],
17+
include_dirs=[numpy.get_include()])],
1618
cmdclass={'build_ext': build_ext}
1719
)

src/rsatoolbox/cengine/similarity.pyx

+67-63
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,20 @@ from cython.view cimport array as cvarray
66
from libc.math cimport log, sqrt, isnan, NAN
77
from cpython.mem cimport PyMem_Malloc, PyMem_Realloc, PyMem_Free
88
cimport scipy.linalg.cython_blas as blas
9+
cimport numpy as cnp
910

11+
cnp.import_array()
12+
13+
ctypedef cnp.int64_t int_t
14+
ctypedef cnp.float64_t float_t
1015

1116
@cython.boundscheck(False)
1217
@cython.cdivision(True)
13-
cpdef double [:] calc(
14-
double [:, :] data, long [:] desc,
15-
long [:] cv_desc, int n,
16-
int method_idx, double [:, :] noise=None,
17-
double prior_lambda=1, double prior_weight=0.1,
18+
cpdef float_t [:] calc(
19+
float_t [:, :] data, int_t [:] desc,
20+
int_t [:] cv_desc, int n,
21+
int method_idx, float_t [:, :] noise=None,
22+
float_t prior_lambda=1, float_t prior_weight=0.1,
1823
int weighting=1, int crossval=0):
1924
# calculates an RDM from a double array of data with integer descriptors
2025
# There are no checks or saveguards in this function!
@@ -37,17 +42,17 @@ cpdef double [:] calc(
3742
# 0: each row has equal weight
3843
# 1: rows weighted by number of valid measurements
3944
cdef:
40-
double [:] vec_i
41-
double [:] vec_j
42-
double weight, sim
43-
double [:] weights
44-
double [:] values
45+
float_t [:] vec_i
46+
float_t [:] vec_j
47+
float_t weight, sim
48+
float_t [:] weights
49+
float_t [:] values
4550
int i, j, idx
4651
int n_rdm = (n * (n-1)) / 2
4752
int n_dim = data.shape[1]
48-
double prior_lambda_l = prior_lambda * prior_weight
49-
double prior_weight_l = 1 + prior_weight
50-
double [:, :] log_data
53+
float_t prior_lambda_l = prior_lambda * prior_weight
54+
float_t prior_weight_l = 1 + prior_weight
55+
float_t [:, :] log_data
5156
if (method_idx > 4) or (method_idx < 1):
5257
raise ValueError('dissimilarity method not recognized!')
5358
# precompute stuff for poisson KL
@@ -58,8 +63,8 @@ cpdef double [:] calc(
5863
for j in range(n_dim):
5964
data[i, j] = (data[i, j] + prior_lambda_l) / prior_weight_l
6065
log_data[i, j] = log(data[i, j])
61-
weights = <double [:(n_rdm+n)]> PyMem_Malloc((n_rdm+n) * sizeof(double))
62-
values = <double [:(n_rdm+n)]> PyMem_Malloc((n_rdm+n) * sizeof(double))
66+
weights = <float_t [:(n_rdm+n)]> PyMem_Malloc((n_rdm+n) * sizeof(float_t))
67+
values = <float_t [:(n_rdm+n)]> PyMem_Malloc((n_rdm+n) * sizeof(float_t))
6368
for idx in range(n_rdm + n):
6469
weights[idx] = 0
6570
values[idx] = 0
@@ -74,7 +79,7 @@ cpdef double [:] calc(
7479
sim, weight = euclid(data[i], data[i], n_dim)
7580
else:
7681
sim = mahalanobis(data[i], data[i], n_dim, noise)
77-
weight = <double> n_dim
82+
weight = <float_t> n_dim
7883
elif method_idx == 4: # method in ['poisson', 'poisson_cv']:
7984
sim, weight = poisson_cv(data[i], data[i], log_data[i], log_data[i], n_dim)
8085
idx = desc[i]
@@ -97,7 +102,7 @@ cpdef double [:] calc(
97102
sim, weight = euclid(data[i], data[j], n_dim)
98103
else:
99104
sim = mahalanobis(data[i], data[j], n_dim, noise)
100-
weight = <double> n_dim
105+
weight = <float_t> n_dim
101106
elif method_idx == 4: # method in ['poisson', 'poisson_cv']:
102107
sim, weight = poisson_cv(data[i], data[j], log_data[i], log_data[j], n_dim)
103108
if weight > 0:
@@ -124,25 +129,25 @@ cpdef double [:] calc(
124129

125130
@cython.boundscheck(False)
126131
@cython.cdivision(True)
127-
cpdef (double, double) calc_one(
128-
double [:, :] data_i, double [:, :] data_j,
129-
long [:] cv_desc_i, long [:] cv_desc_j,
132+
cpdef (float_t, float_t) calc_one(
133+
float_t [:, :] data_i, float_t [:, :] data_j,
134+
int_t [:] cv_desc_i, int_t [:] cv_desc_j,
130135
int n_i, int n_j,
131-
int method_idx, double [:, :] noise=None,
132-
double prior_lambda=1, double prior_weight=0.1,
136+
int method_idx, float_t [:, :] noise=None,
137+
float_t prior_lambda=1, float_t prior_weight=0.1,
133138
int weighting=1):
134139
cdef:
135140
#double [:] values = np.zeros(n_i * n_j)
136141
#double [:] weights = np.zeros(n_i * n_j)
137-
double [:] vec_i
138-
double [:] vec_j
139-
double weight, sim, weight_sum, value
142+
float_t [:] vec_i
143+
float_t [:] vec_j
144+
float_t weight, sim, weight_sum, value
140145
int i, j
141146
int n_dim = data_i.shape[1]
142-
double prior_lambda_l = prior_lambda * prior_weight
143-
double prior_weight_l = 1 + prior_weight
144-
double [:, :] log_data_i
145-
double [:, :] log_data_j
147+
float_t prior_lambda_l = prior_lambda * prior_weight
148+
float_t prior_weight_l = 1 + prior_weight
149+
float_t [:, :] log_data_i
150+
float_t [:, :] log_data_j
146151
if (method_idx > 4) or (method_idx < 1):
147152
raise ValueError('dissimilarity method not recognized!')
148153
# precompute stuff for poisson KL
@@ -173,7 +178,7 @@ cpdef (double, double) calc_one(
173178
sim, weight = euclid(data_i[i], data_j[j], n_dim)
174179
else:
175180
sim = mahalanobis(data_i[i], data_j[j], n_dim, noise)
176-
weight = <double> n_dim
181+
weight = <float_t> n_dim
177182
elif method_idx == 4: # method in ['poisson', 'poisson_cv']:
178183
sim, weight = poisson_cv(data_i[i], data_j[j], log_data_i[i], log_data_j[j], n_dim)
179184
if weight > 0:
@@ -191,8 +196,8 @@ cpdef (double, double) calc_one(
191196

192197

193198
@cython.boundscheck(False)
194-
cpdef (double, double) similarity(double [:] vec_i, double [:] vec_j, int method_idx,
195-
int n_dim, double [:, :] noise):
199+
cpdef (float_t, float_t) similarity(float_t [:] vec_i, float_t [:] vec_j, int method_idx,
200+
int n_dim, float_t [:, :] noise):
196201
"""
197202
double similarity(double [:] vec_i, double [:] vec_j, int method_idx,
198203
int n_dim, double [:, :] noise=None)
@@ -203,8 +208,8 @@ cpdef (double, double) similarity(double [:] vec_i, double [:] vec_j, int method
203208
204209
Mahalanobis distances require full measurement vectors at the moment!
205210
"""
206-
cdef double sim
207-
cdef double weight
211+
cdef float_t sim
212+
cdef float_t weight
208213
if method_idx == 1: # method == 'euclidean':
209214
sim, weight = euclid(vec_i, vec_j, n_dim)
210215
elif method_idx == 2: # method == 'correlation':
@@ -214,15 +219,15 @@ cpdef (double, double) similarity(double [:] vec_i, double [:] vec_j, int method
214219
sim, weight = euclid(vec_i, vec_j, n_dim)
215220
else:
216221
sim = mahalanobis(vec_i, vec_j, n_dim, noise)
217-
weight = <double> n_dim
222+
weight = <float_t> n_dim
218223
return sim, weight
219224

220225

221226
@cython.boundscheck(False)
222-
cdef (double, double) euclid(double [:] vec_i, double [:] vec_j, int n_dim):
227+
cdef (float_t, float_t) euclid(float_t [:] vec_i, float_t [:] vec_j, int n_dim):
223228
cdef:
224-
double sim = 0
225-
double weight = 0
229+
float_t sim = 0
230+
float_t weight = 0
226231
int i
227232
for i in range(n_dim):
228233
if not isnan(vec_i[i]) and not isnan(vec_j[i]):
@@ -233,12 +238,12 @@ cdef (double, double) euclid(double [:] vec_i, double [:] vec_j, int n_dim):
233238

234239
@cython.boundscheck(False)
235240
@cython.cdivision(True)
236-
cdef (double, double) poisson_cv(double [:] vec_i, double [:] vec_j,
237-
double [:] log_vec_i, double [:] log_vec_j,
241+
cdef (float_t, float_t) poisson_cv(float_t [:] vec_i, float_t [:] vec_j,
242+
float_t [:] log_vec_i, float_t [:] log_vec_j,
238243
int n_dim):
239244
cdef:
240-
double sim = 0
241-
double weight = 0
245+
float_t sim = 0
246+
float_t weight = 0
242247
int i
243248
for i in range(n_dim):
244249
if not isnan(vec_i[i]) and not isnan(vec_j[i]):
@@ -249,20 +254,20 @@ cdef (double, double) poisson_cv(double [:] vec_i, double [:] vec_j,
249254

250255

251256
@cython.boundscheck(False)
252-
cdef double mahalanobis(double [:] vec_i, double [:] vec_j, int n_dim,
253-
double [:, :] noise):
257+
cdef float_t mahalanobis(float_t [:] vec_i, float_t [:] vec_j, int n_dim,
258+
float_t [:, :] noise):
254259
cdef:
255-
double *vec1
256-
double *vec2
260+
float_t *vec1
261+
float_t *vec2
257262
int *finite
258263
int zero = 0
259264
int one = 1
260-
double onef = 1.0
261-
double zerof = 0.0
265+
float_t onef = 1.0
266+
float_t zerof = 0.0
262267
char trans = b'n'
263-
double sim = 0.0
268+
float_t sim = 0.0
264269
int i, j, k, l, n_finite
265-
double [:, :] noise_small
270+
float_t [:, :] noise_small
266271
finite = <int*> PyMem_Malloc(n_dim * sizeof(int))
267272
# use finite as a bool to choose the non-nan values
268273
n_finite = 0
@@ -272,11 +277,10 @@ cdef double mahalanobis(double [:] vec_i, double [:] vec_j, int n_dim,
272277
n_finite += 1
273278
else:
274279
finite[i] = 0
275-
vec1 = <double*> PyMem_Malloc(n_finite * sizeof(double))
276-
vec2 = <double*> PyMem_Malloc(n_finite * sizeof(double))
277-
vec3 = <double*> PyMem_Malloc(n_finite * sizeof(double))
278-
#noise_small = <double [:n_finite, :n_finite]> PyMem_Malloc(n_finite * n_finite * sizeof(double))
279-
noise_small = cvarray(shape=(n_finite, n_finite), itemsize=sizeof(double), format="d")
280+
vec1 = <float_t*> PyMem_Malloc(n_finite * sizeof(float_t))
281+
vec2 = <float_t*> PyMem_Malloc(n_finite * sizeof(float_t))
282+
vec3 = <float_t*> PyMem_Malloc(n_finite * sizeof(float_t))
283+
noise_small = cvarray(shape=(n_finite, n_finite), itemsize=sizeof(float_t), format="d")
280284
k = 0
281285
for i in range(n_dim):
282286
if finite[i]:
@@ -300,15 +304,15 @@ cdef double mahalanobis(double [:] vec_i, double [:] vec_j, int n_dim,
300304

301305
@cython.boundscheck(False)
302306
@cython.cdivision(True)
303-
cdef (double, double) correlation(double [:] vec_i, double [:] vec_j, int n_dim):
307+
cdef (float_t, float_t) correlation(float_t [:] vec_i, float_t [:] vec_j, int n_dim):
304308
cdef:
305-
double si = 0.0
306-
double sj = 0.0
307-
double si2 = 0.0
308-
double sj2 = 0.0
309-
double sij = 0.0
310-
double sim
311-
double weight = 0
309+
float_t si = 0.0
310+
float_t sj = 0.0
311+
float_t si2 = 0.0
312+
float_t sj2 = 0.0
313+
float_t sij = 0.0
314+
float_t sim
315+
float_t weight = 0
312316
int i
313317
for i in range(n_dim):
314318
if not isnan(vec_i[i]) and not isnan(vec_j[i]):

src/rsatoolbox/rdm/calc_unbalanced.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -91,14 +91,14 @@ def calc_rdm_unbalanced(dataset: SingleOrMultiDataset, method='euclidean',
9191
dataset.obs_descriptors[descriptor])
9292
# unique_cond = set(dataset.obs_descriptors[descriptor])
9393
if cv_descriptor is None:
94-
cv_desc_int = np.arange(dataset.n_obs, dtype=int)
94+
cv_desc_int = np.arange(dataset.n_obs, dtype=np.int64)
9595
crossval = 0
9696
else:
9797
_, indices = np.unique(
9898
dataset.obs_descriptors[cv_descriptor],
9999
return_inverse=True
100100
)
101-
cv_desc_int = indices.astype(int)
101+
cv_desc_int = indices.astype(np.int64)
102102
crossval = 1
103103
if method == 'euclidean':
104104
method_idx = 1
@@ -114,7 +114,7 @@ def calc_rdm_unbalanced(dataset: SingleOrMultiDataset, method='euclidean',
114114
weight_idx = 0
115115
else:
116116
weight_idx = 1
117-
cond_indices_int = cond_indices.astype(int)
117+
cond_indices_int = cond_indices.astype(np.int64)
118118
rdm = calc(
119119
ensure_double(dataset.measurements),
120120
cond_indices_int,

0 commit comments

Comments
 (0)