Skip to content

Fixed #1006 RuntimeWarning divide by zero encountered in divide #1012

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
1 change: 1 addition & 0 deletions stumpy/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2254,6 +2254,7 @@ def preprocess_diagonal(

M_T, Σ_T = compute_mean_std(T, m)
Σ_T[T_subseq_isconstant] = 1.0 # Avoid divide by zero in next inversion step
Σ_T[~T_subseq_isfinite] = np.nan
Σ_T_inverse = 1.0 / Σ_T
M_T_m_1, _ = compute_mean_std(T, m - 1)

Expand Down
3 changes: 2 additions & 1 deletion stumpy/gpu_ostinato.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,10 @@ def gpu_ostinato(Ts, m, device_id=0, normalize=True, p=2.0, Ts_subseq_isconstant
M_Ts = [None] * len(Ts)
Σ_Ts = [None] * len(Ts)
for i, T in enumerate(Ts):
Ts_copy[i], M_Ts[i], Σ_Ts[i], Ts_subseq_isconstant[i] = core.preprocess(
_, M_Ts[i], Σ_Ts[i], Ts_subseq_isconstant[i] = core.preprocess(
T, m, copy=True, T_subseq_isconstant=Ts_subseq_isconstant[i]
)
Ts_copy[i] = T.copy()

bsf_radius, bsf_Ts_idx, bsf_subseq_idx = _ostinato(
Ts_copy,
Expand Down
18 changes: 9 additions & 9 deletions stumpy/ostinato.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,9 @@ def _across_series_nearest_neighbors(
nns_subseq_idx = np.zeros(k, dtype=np.int64)

for i in range(k):
QT = core.sliding_dot_product(Ts[Ts_idx][subseq_idx : subseq_idx + m], Ts[i])
distance_profile = core._mass(
Q,
Ts[i],
QT = core.sliding_dot_product(Q, Ts[i])
Copy link
Collaborator Author

@NimaSarajpoor NimaSarajpoor Jul 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since Q and Ts[i] are UNPROCESSED, QT[idx] can get meaningless value for a non-finite subsequence that starts at idx. Better to just replace them with np.nan. So, for example, we can do something like this after computing QT (QT = core.sliding_dot_product(Q, Ts[i])):

# post-processing QT
if np.any(~np.isfinite(Q)):
     QT[:] = np.nan

QT[~np.isfinite(M_Ts[i])] = np.nan

Note that QT[idx] will be ignored though eventually once it comes to computing its corresponding value in the distance profile via core.calculate_distance_profile (see a few lines below). In other words, we do not need the "post-processing of QT" but I think it makes code clearer.

Any concerns?


[Update]
Note: Since we pass inputs with non-finite values to core.calculate_distance_profile (Not only QT but also M_T), maybe I should better stop here and try to address #1011 first. Then come back and resume the work on this PR.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand the need to replace the ugly Ts[Ts_idx][subseq_idx : subseq_idx + m] with Q. That's fine but

QT = core.sliding_dot_product(Q, Ts[i])

doesn't seem to require any post-processing when either Q or T contain np.nan.

Copy link
Collaborator Author

@NimaSarajpoor NimaSarajpoor Jul 27, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My bad! You are right. No need for post-processing QT

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@seanlaw

maybe I should better stop here and try to address #1011 first. Then come back and resume the work on this PR.

I did an experiment and shared the result in #1011. We can either work on that and then come back here. Or, on the second thought, we can still merge this (after I resolve the merge conflicts), and then work on #1011.

What do you think?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we have a merge conflict here anyways, let's fix #1011 first and then come back here and strikethrough the comment:

Note: Since we pass inputs with non-finite values to core.calculate_distance_profile (Not only QT but also M_T), maybe I should better stop here and try to address #1011 first. Then come back and resume the work on this PR.

so that we are focused on this issue/PR instead of being distracted by jumping back and forth. I am getting confused by having so many things open and so it feels disorganized

distance_profile = core.calculate_distance_profile(
m,
QT,
M_Ts[Ts_idx][subseq_idx],
Σ_Ts[Ts_idx][subseq_idx],
Expand Down Expand Up @@ -256,9 +255,8 @@ def _ostinato(
(
radius,
np.min(
core._mass(
Ts[j][q : q + m],
Ts[i],
core.calculate_distance_profile(
m,
QT,
M_Ts[j][q],
Σ_Ts[j][q],
Expand Down Expand Up @@ -373,9 +371,10 @@ def ostinato(Ts, m, normalize=True, p=2.0, Ts_subseq_isconstant=None):
Ts_subseq_isconstant = [None] * len(Ts)

for i, T in enumerate(Ts):
Ts_copy[i], M_Ts[i], Σ_Ts[i], Ts_subseq_isconstant[i] = core.preprocess(
_, M_Ts[i], Σ_Ts[i], Ts_subseq_isconstant[i] = core.preprocess(
T, m, copy=True, T_subseq_isconstant=Ts_subseq_isconstant[i]
)
Ts_copy[i] = T.copy()

bsf_radius, bsf_Ts_idx, bsf_subseq_idx = _ostinato(
Ts_copy, m, M_Ts, Σ_Ts, Ts_subseq_isconstant
Expand Down Expand Up @@ -516,9 +515,10 @@ def ostinatoed(client, Ts, m, normalize=True, p=2.0, Ts_subseq_isconstant=None):
if Ts_subseq_isconstant is None:
Ts_subseq_isconstant = [None] * len(Ts)
for i, T in enumerate(Ts):
Ts_copy[i], M_Ts[i], Σ_Ts[i], Ts_subseq_isconstant[i] = core.preprocess(
_, M_Ts[i], Σ_Ts[i], Ts_subseq_isconstant[i] = core.preprocess(
T, m, copy=True, T_subseq_isconstant=Ts_subseq_isconstant[i]
)
Ts_copy[i] = T.copy()

bsf_radius, bsf_Ts_idx, bsf_subseq_idx = _ostinato(
Ts_copy,
Expand Down
12 changes: 12 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -880,9 +880,11 @@ def test_preprocess_non_normalized():
def test_preprocess_diagonal():
T = np.array([0, np.nan, 2, 3, 4, 5, 6, 7, np.inf, 9])
m = 3
T_subseq_isfinite = core.rolling_isfinite(T, m)

ref_T = np.array([0, 0, 2, 3, 4, 5, 6, 7, 0, 9], dtype=float)
ref_M, ref_Σ = naive.compute_mean_std(ref_T, m)
ref_Σ[~T_subseq_isfinite] = np.nan
ref_Σ_inverse = 1.0 / ref_Σ
ref_M_m_1, _ = naive.compute_mean_std(ref_T, m - 1)

Expand Down Expand Up @@ -1753,3 +1755,13 @@ def test_process_isconstant_2d():
T_subseq_isconstant_comp = core.process_isconstant(T, m, T_subseq_isconstant)

npt.assert_array_equal(T_subseq_isconstant_ref, T_subseq_isconstant_comp)


@pytest.mark.filterwarnings(
"error:divide by zero encountered in divide", category=RuntimeWarning
)
def test_preprocess_diagonal_divide_by_zero():
T = np.random.rand(64)
m = 3
T[:m] = np.nan
core.preprocess_diagonal(T, m)
21 changes: 21 additions & 0 deletions tests/test_ostinato.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,3 +249,24 @@ def test_extract_several_consensus_ostinatoed(dask_cluster):
Ts_ref[i][np.isfinite(Ts_ref[i])],
Ts_comp[i][np.isfinite(Ts_comp[i])],
)


@pytest.mark.filterwarnings(
"error:divide by zero encountered in divide", category=RuntimeWarning
)
def test_divide_by_zero_ostinato():
Ts = [np.random.rand(n) for n in [64, 128, 256]]
m = 5
Ts[0][:m] = np.nan
stumpy.ostinato(Ts, m)


@pytest.mark.filterwarnings(
"error:divide by zero encountered in divide", category=RuntimeWarning
)
def test_divide_by_zero_ostinatoed(dask_cluster):
Ts = [np.random.rand(n) for n in [64, 128, 256]]
m = 5
Ts[0][:m] = np.nan
with Client(dask_cluster) as dask_client:
stumpy.ostinatoed(dask_client, Ts, m)
Loading