Skip to content
Open
1 change: 1 addition & 0 deletions stumpy/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2254,6 +2254,7 @@ def preprocess_diagonal(

M_T, Σ_T = compute_mean_std(T, m)
Σ_T[T_subseq_isconstant] = 1.0 # Avoid divide by zero in next inversion step
Σ_T[~T_subseq_isfinite] = np.nan
Σ_T_inverse = 1.0 / Σ_T
M_T_m_1, _ = compute_mean_std(T, m - 1)

Expand Down
3 changes: 2 additions & 1 deletion stumpy/gpu_ostinato.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,10 @@ def gpu_ostinato(Ts, m, device_id=0, normalize=True, p=2.0, Ts_subseq_isconstant
M_Ts = [None] * len(Ts)
Σ_Ts = [None] * len(Ts)
for i, T in enumerate(Ts):
Ts_copy[i], M_Ts[i], Σ_Ts[i], Ts_subseq_isconstant[i] = core.preprocess(
_, M_Ts[i], Σ_Ts[i], Ts_subseq_isconstant[i] = core.preprocess(
T, m, copy=True, T_subseq_isconstant=Ts_subseq_isconstant[i]
)
Ts_copy[i] = T.copy()

bsf_radius, bsf_Ts_idx, bsf_subseq_idx = _ostinato(
Ts_copy,
Expand Down
18 changes: 9 additions & 9 deletions stumpy/ostinato.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,9 @@ def _across_series_nearest_neighbors(
nns_subseq_idx = np.zeros(k, dtype=np.int64)

for i in range(k):
QT = core.sliding_dot_product(Ts[Ts_idx][subseq_idx : subseq_idx + m], Ts[i])
distance_profile = core._mass(
Q,
Ts[i],
QT = core.sliding_dot_product(Q, Ts[i])
Copy link
Collaborator Author

@NimaSarajpoor NimaSarajpoor Jul 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since Q and Ts[i] are UNPROCESSED, QT[idx] can get meaningless value for a non-finite subsequence that starts at idx. Better to just replace them with np.nan. So, for example, we can do something like this after computing QT (QT = core.sliding_dot_product(Q, Ts[i])):

# post-processing QT
if np.any(~np.isfinite(Q)):
     QT[:] = np.nan

QT[~np.isfinite(M_Ts[i])] = np.nan

Note that QT[idx] will be ignored though eventually once it comes to computing its corresponding value in the distance profile via core.calculate_distance_profile (see a few lines below). In other words, we do not need the "post-processing of QT" but I think it makes code clearer.

Any concerns?


[Update]
Note: Since we pass inputs with non-finite values to core.calculate_distance_profile (Not only QT but also M_T), maybe I should better stop here and try to address #1011 first. Then come back and resume the work on this PR.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand the need to replace the ugly Ts[Ts_idx][subseq_idx : subseq_idx + m] with Q. That's fine but

QT = core.sliding_dot_product(Q, Ts[i])

doesn't seem to require any post-processing when either Q or T contain np.nan.

Copy link
Collaborator Author

@NimaSarajpoor NimaSarajpoor Jul 27, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My bad! You are right. No need for post-processing QT

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@seanlaw

maybe I should better stop here and try to address #1011 first. Then come back and resume the work on this PR.

I did an experiment and shared the result in #1011. We can either work on that and then come back here. Or, on the second thought, we can still merge this (after I resolve the merge conflicts), and then work on #1011.

What do you think?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we have a merge conflict here anyways, let's fix #1011 first and then come back here and strikethrough the comment:

Note: Since we pass inputs with non-finite values to core.calculate_distance_profile (Not only QT but also M_T), maybe I should better stop here and try to address #1011 first. Then come back and resume the work on this PR.

so that we are focused on this issue/PR instead of being distracted by jumping back and forth. I am getting confused by having so many things open and so it feels disorganized

distance_profile = core.calculate_distance_profile(
m,
QT,
M_Ts[Ts_idx][subseq_idx],
Σ_Ts[Ts_idx][subseq_idx],
Expand Down Expand Up @@ -256,9 +255,8 @@ def _ostinato(
(
radius,
np.min(
core._mass(
Ts[j][q : q + m],
Ts[i],
core.calculate_distance_profile(
m,
QT,
M_Ts[j][q],
Σ_Ts[j][q],
Expand Down Expand Up @@ -373,9 +371,10 @@ def ostinato(Ts, m, normalize=True, p=2.0, Ts_subseq_isconstant=None):
Ts_subseq_isconstant = [None] * len(Ts)

for i, T in enumerate(Ts):
Ts_copy[i], M_Ts[i], Σ_Ts[i], Ts_subseq_isconstant[i] = core.preprocess(
_, M_Ts[i], Σ_Ts[i], Ts_subseq_isconstant[i] = core.preprocess(
T, m, copy=True, T_subseq_isconstant=Ts_subseq_isconstant[i]
)
Ts_copy[i] = T.copy()

bsf_radius, bsf_Ts_idx, bsf_subseq_idx = _ostinato(
Ts_copy, m, M_Ts, Σ_Ts, Ts_subseq_isconstant
Expand Down Expand Up @@ -516,9 +515,10 @@ def ostinatoed(client, Ts, m, normalize=True, p=2.0, Ts_subseq_isconstant=None):
if Ts_subseq_isconstant is None:
Ts_subseq_isconstant = [None] * len(Ts)
for i, T in enumerate(Ts):
Ts_copy[i], M_Ts[i], Σ_Ts[i], Ts_subseq_isconstant[i] = core.preprocess(
_, M_Ts[i], Σ_Ts[i], Ts_subseq_isconstant[i] = core.preprocess(
T, m, copy=True, T_subseq_isconstant=Ts_subseq_isconstant[i]
)
Ts_copy[i] = T.copy()

bsf_radius, bsf_Ts_idx, bsf_subseq_idx = _ostinato(
Ts_copy,
Expand Down
12 changes: 12 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -880,9 +880,11 @@ def test_preprocess_non_normalized():
def test_preprocess_diagonal():
T = np.array([0, np.nan, 2, 3, 4, 5, 6, 7, np.inf, 9])
m = 3
T_subseq_isfinite = core.rolling_isfinite(T, m)

ref_T = np.array([0, 0, 2, 3, 4, 5, 6, 7, 0, 9], dtype=float)
ref_M, ref_Σ = naive.compute_mean_std(ref_T, m)
ref_Σ[~T_subseq_isfinite] = np.nan
ref_Σ_inverse = 1.0 / ref_Σ
ref_M_m_1, _ = naive.compute_mean_std(ref_T, m - 1)

Expand Down Expand Up @@ -1753,3 +1755,13 @@ def test_process_isconstant_2d():
T_subseq_isconstant_comp = core.process_isconstant(T, m, T_subseq_isconstant)

npt.assert_array_equal(T_subseq_isconstant_ref, T_subseq_isconstant_comp)


@pytest.mark.filterwarnings(
"error:divide by zero encountered in divide", category=RuntimeWarning
)
def test_preprocess_diagonal_divide_by_zero():
T = np.random.rand(64)
m = 3
T[:m] = np.nan
core.preprocess_diagonal(T, m)
21 changes: 21 additions & 0 deletions tests/test_ostinato.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,3 +249,24 @@ def test_extract_several_consensus_ostinatoed(dask_cluster):
Ts_ref[i][np.isfinite(Ts_ref[i])],
Ts_comp[i][np.isfinite(Ts_comp[i])],
)


@pytest.mark.filterwarnings(
"error:divide by zero encountered in divide", category=RuntimeWarning
)
def test_divide_by_zero_ostinato():
Ts = [np.random.rand(n) for n in [64, 128, 256]]
m = 5
Ts[0][:m] = np.nan
stumpy.ostinato(Ts, m)


@pytest.mark.filterwarnings(
"error:divide by zero encountered in divide", category=RuntimeWarning
)
def test_divide_by_zero_ostinatoed(dask_cluster):
Ts = [np.random.rand(n) for n in [64, 128, 256]]
m = 5
Ts[0][:m] = np.nan
with Client(dask_cluster) as dask_client:
stumpy.ostinatoed(dask_client, Ts, m)