Skip to content

Fixed #1061 failure in snippet unit tests due to the instability of np.sum for array with many small floating point numbers #1080

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions tests/naive.py
Original file line number Diff line number Diff line change
Expand Up @@ -854,7 +854,7 @@ def get_array_ranges(a, n_chunks, truncate):
sum = 0
for i in range(a.shape[0]):
sum += a[i]
if sum > a.sum() / n_chunks:
if sum > math.fsum(a) / n_chunks:
out[ranges_idx, 0] = range_start_idx
out[ranges_idx, 1] = min(i + 1, a.shape[0]) # Exclusive stop index
# Reset and Update
Expand Down Expand Up @@ -1614,15 +1614,15 @@ def mpdist_snippets(
for snippet_idx in range(k):
min_area = np.inf
for i in range(D.shape[0]):
profile_area = np.sum(np.minimum(D[i], Q))
profile_area = math.fsum(np.minimum(D[i], Q))
if min_area > profile_area:
min_area = profile_area
idx = i

snippets[snippet_idx] = T[indices[idx] : indices[idx] + m]
snippets_indices[snippet_idx] = indices[idx]
snippets_profiles[snippet_idx] = D[idx]
snippets_areas[snippet_idx] = np.sum(np.minimum(D[idx], Q))
snippets_areas[snippet_idx] = math.fsum(np.minimum(D[idx], Q))

Q = np.minimum(D[idx], Q)

Expand Down Expand Up @@ -1737,15 +1737,15 @@ def aampdist_snippets(
for snippet_idx in range(k):
min_area = np.inf
for i in range(D.shape[0]):
profile_area = np.sum(np.minimum(D[i], Q))
profile_area = math.fsum(np.minimum(D[i], Q))
if min_area > profile_area:
min_area = profile_area
idx = i

snippets[snippet_idx] = T[indices[idx] : indices[idx] + m]
snippets_indices[snippet_idx] = indices[idx]
snippets_profiles[snippet_idx] = D[idx]
snippets_areas[snippet_idx] = np.sum(np.minimum(D[idx], Q))
snippets_areas[snippet_idx] = math.fsum(np.minimum(D[idx], Q))

Q = np.minimum(D[idx], Q)

Expand Down
6 changes: 5 additions & 1 deletion tests/test_non_normalized_decorator.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import math

import naive
import numpy as np
import numpy.testing as npt
Expand Down Expand Up @@ -44,7 +46,9 @@ def test_mass():
Q = np.random.rand(10)
T = np.random.rand(20)
T, T_subseq_isfinite = stumpy.core.preprocess_non_normalized(T, 10)
T_squared = np.sum(stumpy.core.rolling_window(T * T, Q.shape[0]), axis=-1)

arr = stumpy.core.rolling_window(T * T, Q.shape[0])
T_squared = np.array([math.fsum(arr[i]) for i in range(arr.shape[0])])
ref = stumpy.core.mass_absolute(Q, T)
comp = stumpy.core.mass(Q, T, M_T=T_subseq_isfinite, normalize=False)
npt.assert_almost_equal(ref, comp)
Expand Down
222 changes: 190 additions & 32 deletions tests/test_precision.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,15 +115,54 @@ def test_calculate_squared_distance():
npt.assert_almost_equal(ref, comp, decimal=14)


def test_snippets():
@pytest.mark.filterwarnings("ignore", category=NumbaPerformanceWarning)
@patch("stumpy.config.STUMPY_THREADS_PER_BLOCK", TEST_THREADS_PER_BLOCK)
def test_distance_symmetry_property_in_gpu():
if not cuda.is_available(): # pragma: no cover
pytest.skip("Skipping Tests No GPUs Available")

# This test function raises an error if the distance between a subsequence
# and another one does not satisfy the symmetry property.
seed = 332
np.random.seed(seed)
T = np.random.uniform(-1000.0, 1000.0, [64])
m = 3

i, j = 2, 10
# M_T, Σ_T = core.compute_mean_std(T, m)
# Σ_T[i] is `650.912209452633`
# Σ_T[j] is `722.0717285148525`

# This test raises an error if arithmetic operation in ...
# ... `gpu_stump._compute_and_update_PI_kernel` does not
# generates the same result if values of variable for mean and std
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"generate"

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"same values AS"

# are swapped.

T_A = T[i : i + m]
T_B = T[j : j + m]

mp_AB = stumpy.gpu_stump(T_A, m, T_B)
mp_BA = stumpy.gpu_stump(T_B, m, T_A)

d_ij = mp_AB[0, 0]
d_ji = mp_BA[0, 0]

comp = d_ij - d_ji
ref = 0.0

npt.assert_almost_equal(comp, ref, decimal=15)


def test_snippets_rare_case_1():
# This test function raises an error if there is a considerable loss of precision
# that violates the symmetry property of a distance measure.
seed = 332
np.random.seed(seed)
T = np.random.uniform(-1000.0, 1000.0, 64)

m = 10
k = 3
s = 3
seed = 332
np.random.seed(seed)
T = np.random.uniform(-1000.0, 1000.0, [64])

isconstant_custom_func = functools.partial(
naive.isconstant_func_stddev_threshold, quantile_threshold=0.05
Expand Down Expand Up @@ -169,12 +208,91 @@ def test_snippets():
T, m, k, s=s, mpdist_T_subseq_isconstant=isconstant_custom_func
)

npt.assert_almost_equal(
ref_indices, cmp_indices, decimal=config.STUMPY_TEST_PRECISION
)
npt.assert_almost_equal(
ref_snippets, cmp_snippets, decimal=config.STUMPY_TEST_PRECISION
)
npt.assert_almost_equal(
ref_profiles, cmp_profiles, decimal=config.STUMPY_TEST_PRECISION
)
npt.assert_almost_equal(
ref_fractions, cmp_fractions, decimal=config.STUMPY_TEST_PRECISION
)
npt.assert_almost_equal(ref_areas, cmp_areas, decimal=config.STUMPY_TEST_PRECISION)
npt.assert_almost_equal(ref_regimes, cmp_regimes)

if not numba.config.DISABLE_JIT: # pragma: no cover
# Revert fastmath flag back to their default values
fastmath._reset("core", "_calculate_squared_distance")
cache._recompile()


def test_snippets_rare_case_2():
# This test fails when the naive implementation of snippet,
# i.e. `naive.mpdist_snippets`, uses `np.sum` instead of
# math.fsum when calculating the sum of many small
# floating point numbers. For more details, see issue #1061

seed = 1615
np.random.seed(seed)
T = np.random.uniform(-1000.0, 1000.0, 64)

m = 10
s = 3
k = 3

isconstant_custom_func = functools.partial(
naive.isconstant_func_stddev_threshold, quantile_threshold=0.05
)
(
ref_snippets,
ref_indices,
ref_profiles,
ref_fractions,
ref_areas,
ref_regimes,
) = naive.mpdist_snippets(
T, m, k, s=s, mpdist_T_subseq_isconstant=isconstant_custom_func
)

(
cmp_snippets,
cmp_indices,
cmp_profiles,
cmp_fractions,
cmp_areas,
cmp_regimes,
) = stumpy.snippets(T, m, k, s=s, mpdist_T_subseq_isconstant=isconstant_custom_func)

if (
not np.allclose(ref_snippets, cmp_snippets) and not numba.config.DISABLE_JIT
): # pragma: no cover
# Revise fastmath flags by removing reassoc (to improve precision),
# recompile njit functions, and re-compute snippets.
fastmath._set(
"core", "_calculate_squared_distance", {"nsz", "arcp", "contract", "afn"}
)
cache._recompile()

(
cmp_snippets,
cmp_indices,
cmp_profiles,
cmp_fractions,
cmp_areas,
cmp_regimes,
) = stumpy.snippets(
T, m, k, s=s, mpdist_T_subseq_isconstant=isconstant_custom_func
)

npt.assert_almost_equal(
ref_indices, cmp_indices, decimal=config.STUMPY_TEST_PRECISION
)
npt.assert_almost_equal(
ref_snippets, cmp_snippets, decimal=config.STUMPY_TEST_PRECISION
)
npt.assert_almost_equal(
ref_profiles, cmp_profiles, decimal=config.STUMPY_TEST_PRECISION
)
Expand All @@ -190,39 +308,79 @@ def test_snippets():
cache._recompile()


@pytest.mark.filterwarnings("ignore", category=NumbaPerformanceWarning)
@patch("stumpy.config.STUMPY_THREADS_PER_BLOCK", TEST_THREADS_PER_BLOCK)
def test_distance_symmetry_property_in_gpu():
if not cuda.is_available(): # pragma: no cover
pytest.skip("Skipping Tests No GPUs Available")
def test_snippets_rare_case_3():
# This test fails when the naive implementation of snippet,
# i.e. `naive.mpdist_snippets`, uses `np.sum` instead of
# math.fsum when calculating the sum of many small
# floating point numbers. For more details, see issue #1061

# This test function raises an error if the distance between a subsequence
# and another one does not satisfy the symmetry property.
seed = 332
seed = 2636
np.random.seed(seed)
T = np.random.uniform(-1000.0, 1000.0, [64])
m = 3

i, j = 2, 10
# M_T, Σ_T = core.compute_mean_std(T, m)
# Σ_T[i] is `650.912209452633`
# Σ_T[j] is `722.0717285148525`
T = np.random.uniform(-1000.0, 1000.0, 64)
m = 9
s = 3
k = 3

# This test raises an error if arithmetic operation in ...
# ... `gpu_stump._compute_and_update_PI_kernel` does not
# generates the same result if values of variable for mean and std
# are swapped.
isconstant_custom_func = functools.partial(
naive.isconstant_func_stddev_threshold, quantile_threshold=0.05
)
(
ref_snippets,
ref_indices,
ref_profiles,
ref_fractions,
ref_areas,
ref_regimes,
) = naive.mpdist_snippets(
T, m, k, s=s, mpdist_T_subseq_isconstant=isconstant_custom_func
)

T_A = T[i : i + m]
T_B = T[j : j + m]
(
cmp_snippets,
cmp_indices,
cmp_profiles,
cmp_fractions,
cmp_areas,
cmp_regimes,
) = stumpy.snippets(T, m, k, s=s, mpdist_T_subseq_isconstant=isconstant_custom_func)

mp_AB = stumpy.gpu_stump(T_A, m, T_B)
mp_BA = stumpy.gpu_stump(T_B, m, T_A)
if (
not np.allclose(ref_snippets, cmp_snippets) and not numba.config.DISABLE_JIT
): # pragma: no cover
# Revise fastmath flags by removing reassoc (to improve precision),
# recompile njit functions, and re-compute snippets.
fastmath._set(
"core", "_calculate_squared_distance", {"nsz", "arcp", "contract", "afn"}
)
cache._recompile()

d_ij = mp_AB[0, 0]
d_ji = mp_BA[0, 0]
(
cmp_snippets,
cmp_indices,
cmp_profiles,
cmp_fractions,
cmp_areas,
cmp_regimes,
) = stumpy.snippets(
T, m, k, s=s, mpdist_T_subseq_isconstant=isconstant_custom_func
)

comp = d_ij - d_ji
ref = 0.0
npt.assert_almost_equal(
ref_indices, cmp_indices, decimal=config.STUMPY_TEST_PRECISION
)
npt.assert_almost_equal(
ref_snippets, cmp_snippets, decimal=config.STUMPY_TEST_PRECISION
)
npt.assert_almost_equal(
ref_profiles, cmp_profiles, decimal=config.STUMPY_TEST_PRECISION
)
npt.assert_almost_equal(
ref_fractions, cmp_fractions, decimal=config.STUMPY_TEST_PRECISION
)
npt.assert_almost_equal(ref_areas, cmp_areas, decimal=config.STUMPY_TEST_PRECISION)
npt.assert_almost_equal(ref_regimes, cmp_regimes)

npt.assert_almost_equal(comp, ref, decimal=15)
if not numba.config.DISABLE_JIT: # pragma: no cover
# Revert fastmath flag back to their default values
fastmath._reset("core", "_calculate_squared_distance")
cache._recompile()
Loading