From dea0c575be0fd565a3bd002c256cfbc0528d860b Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Wed, 3 Jul 2024 13:40:33 +0000 Subject: [PATCH 1/9] MAINT: Small NumPy 2 related fixes This applys some smaller NumPy 2 related fixes. With (in progress) cupy 13.2 fixups, the single gpu test suite seems to be doing fine (not quite finished, I may push more commits, but can also open a new PR). The one thinig I noticed that is a bit anonying is that hdbscan is not yet released for NumPy 2, is that actually still required since I think sklearn has a version? (I don't expect this to be a problem for long, but there is at least one odd test failure trying to make hdbscan work in https://github.com/scikit-learn-contrib/hdbscan/pull/644) --- python/cuml/cuml/_thirdparty/sklearn/utils/sparsefuncs.py | 2 +- python/cuml/cuml/internals/array.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/python/cuml/cuml/_thirdparty/sklearn/utils/sparsefuncs.py b/python/cuml/cuml/_thirdparty/sklearn/utils/sparsefuncs.py index e0697b98ce..7bea44a366 100644 --- a/python/cuml/cuml/_thirdparty/sklearn/utils/sparsefuncs.py +++ b/python/cuml/cuml/_thirdparty/sklearn/utils/sparsefuncs.py @@ -214,7 +214,7 @@ def _sparse_min_or_max(X, axis, min_or_max): if np.isnan(m): if 'nan' in min_or_max: m = 0 - elif X.nnz != cpu_np.product(X.shape): + elif X.nnz != cpu_np.prod(X.shape): if 'min' in min_or_max: m = m if m <= 0 else 0 else: diff --git a/python/cuml/cuml/internals/array.py b/python/cuml/cuml/internals/array.py index 6b664506ae..6873265261 100644 --- a/python/cuml/cuml/internals/array.py +++ b/python/cuml/cuml/internals/array.py @@ -1168,6 +1168,9 @@ def from_input( ) make_copy = force_contiguous and not arr.is_contiguous + if not make_copy: + # NumPy now interprets False as never copy, so must use None + make_copy = None if ( not fail_on_order and order != arr.order and order != "K" From 3565feff0c8f8b7351884b2a0ddbaa4820787bc5 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Wed, 3 Jul 2024 14:04:19 +0000 Subject: [PATCH 2/9] TST: `asfarray` is removed, it is the same as `asarray` here --- python/cuml/cuml/tests/test_metrics.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/python/cuml/cuml/tests/test_metrics.py b/python/cuml/cuml/tests/test_metrics.py index 6e92535cf7..eaed36fb5b 100644 --- a/python/cuml/cuml/tests/test_metrics.py +++ b/python/cuml/cuml/tests/test_metrics.py @@ -1065,7 +1065,7 @@ def test_pairwise_distances(metric: str, matrix_size, is_col_major): cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision) # Change precision of one parameter - Y = np.asfarray(Y, dtype=np.float32) + Y = np.asarray(Y, dtype=np.float32) S = pairwise_distances(X, Y, metric=metric) S2 = ref_dense_pairwise_dist(X, Y, metric=metric) cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision) @@ -1074,8 +1074,8 @@ def test_pairwise_distances(metric: str, matrix_size, is_col_major): compare_precision = 2 # Change precision of both parameters to float - X = np.asfarray(X, dtype=np.float32) - Y = np.asfarray(Y, dtype=np.float32) + X = np.asarray(X, dtype=np.float32) + Y = np.asarray(Y, dtype=np.float32) S = pairwise_distances(X, Y, metric=metric) S2 = ref_dense_pairwise_dist(X, Y, metric=metric) cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision) @@ -1132,8 +1132,8 @@ def test_pairwise_distances_sklearn_comparison(metric: str, matrix_size): # For fp32, compare at 4 decimals, (3 places less than the ~7 max) compare_precision = 4 - X = np.asfarray(X, dtype=np.float32) - Y = np.asfarray(Y, dtype=np.float32) + X = np.asarray(X, dtype=np.float32) + Y = np.asarray(Y, dtype=np.float32) # Compare to sklearn, fp32 S = pairwise_distances(X, Y, metric=metric) @@ -1228,7 +1228,7 @@ def test_pairwise_distances_exceptions(): X_int = rng.randint(10, size=(5, 4)) X_double = rng.random_sample((5, 4)) - X_float = np.asfarray(X_double, dtype=np.float32) + X_float = np.asarray(X_double, dtype=np.float32) X_bool = rng.choice([True, False], size=(5, 4)) # Test int inputs (only float/double accepted at this time) From 449466cd1fd2e70dcc3a76ffc09553acf606caab Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Wed, 3 Jul 2024 14:08:31 +0000 Subject: [PATCH 3/9] TST: Avoid behavior change in return_inverse of unique Even if NumPy reverts, this is not a problem. --- python/cuml/cuml/tests/test_make_classification.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/cuml/cuml/tests/test_make_classification.py b/python/cuml/cuml/tests/test_make_classification.py index d5c38dc651..a4196ec476 100644 --- a/python/cuml/cuml/tests/test_make_classification.py +++ b/python/cuml/cuml/tests/test_make_classification.py @@ -117,6 +117,9 @@ def test_make_classification_informative_features(): signs = np.sign(cp.asnumpy(X)) signs = signs.view(dtype="|S{0}".format(signs.strides[0])) unique_signs, cluster_index = np.unique(signs, return_inverse=True) + # NumPy 2 has a behavior change (maybe regression) for the inverse shape + # https://github.com/numpy/numpy/issues/26738. This always works: + cluster_index = cluster_index.reshape(-1) assert ( len(unique_signs) == n_clusters From bc7f7c1dbef5608ae37bfd02dc60cc76a99480b1 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Wed, 3 Jul 2024 15:18:27 +0000 Subject: [PATCH 4/9] TST: Use deepcopy for copying the random state I am not actually sure what changed here, but deepcopy seems sensible? --- python/cuml/cuml/tests/test_umap.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/cuml/cuml/tests/test_umap.py b/python/cuml/cuml/tests/test_umap.py index 6faa4ad8d3..6916ab2488 100644 --- a/python/cuml/cuml/tests/test_umap.py +++ b/python/cuml/cuml/tests/test_umap.py @@ -420,9 +420,9 @@ def get_embedding(n_components, random_state): ) return reducer.fit_transform(data, convert_dtype=True) - state = copy.copy(random_state) + state = copy.deepcopy(random_state) cuml_embedding1 = get_embedding(n_components, state) - state = copy.copy(random_state) + state = copy.deepcopy(random_state) cuml_embedding2 = get_embedding(n_components, state) assert not np.isnan(cuml_embedding1).any() @@ -475,9 +475,9 @@ def get_embedding(n_components, random_state): reducer.fit(fit_data, convert_dtype=True) return reducer.transform(transform_data, convert_dtype=True) - state = copy.copy(random_state) + state = copy.deepcopy(random_state) cuml_embedding1 = get_embedding(n_components, state) - state = copy.copy(random_state) + state = copy.deepcopy(random_state) cuml_embedding2 = get_embedding(n_components, state) assert not np.isnan(cuml_embedding1).any() From 3064e568a256185217e0ef11fb7afa3ee40e50c6 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Wed, 3 Jul 2024 15:32:14 +0000 Subject: [PATCH 5/9] STY: Fixup copyright/pre-commit --- python/cuml/cuml/internals/array.py | 4 ++-- python/cuml/cuml/tests/test_make_classification.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/cuml/cuml/internals/array.py b/python/cuml/cuml/internals/array.py index 6873265261..08f01b4336 100644 --- a/python/cuml/cuml/internals/array.py +++ b/python/cuml/cuml/internals/array.py @@ -1169,8 +1169,8 @@ def from_input( make_copy = force_contiguous and not arr.is_contiguous if not make_copy: - # NumPy now interprets False as never copy, so must use None - make_copy = None + # NumPy now interprets False as never copy, so must use None + make_copy = None if ( not fail_on_order and order != arr.order and order != "K" diff --git a/python/cuml/cuml/tests/test_make_classification.py b/python/cuml/cuml/tests/test_make_classification.py index a4196ec476..9daee2de25 100644 --- a/python/cuml/cuml/tests/test_make_classification.py +++ b/python/cuml/cuml/tests/test_make_classification.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From e81b2835b2e0b88d207719e5298b03f739841363 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Wed, 3 Jul 2024 16:48:50 +0000 Subject: [PATCH 6/9] Ignore python/_thirdparty for style pre-commit check --- .pre-commit-config.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 67ef2d6ad3..403bb1b247 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -73,7 +73,8 @@ repos: setup[.]cfg$ exclude: | (?x) - cpp/src/tsne/cannylab/bh[.]cu$ + cpp/src/tsne/cannylab/bh[.]cu$| + python/cuml/_thirdparty - id: verify-alpha-spec - repo: https://github.com/rapidsai/dependency-file-generator rev: v1.13.11 From f266d0b9bd7427d051aff727ec467b9ae6957e86 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Thu, 11 Jul 2024 22:25:45 +0200 Subject: [PATCH 7/9] Simplify return_inverse fixup --- python/cuml/cuml/tests/test_make_classification.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/python/cuml/cuml/tests/test_make_classification.py b/python/cuml/cuml/tests/test_make_classification.py index 9daee2de25..e8b0b7a088 100644 --- a/python/cuml/cuml/tests/test_make_classification.py +++ b/python/cuml/cuml/tests/test_make_classification.py @@ -115,11 +115,8 @@ def test_make_classification_informative_features(): # Cluster by sign, viewed as strings to allow uniquing signs = np.sign(cp.asnumpy(X)) - signs = signs.view(dtype="|S{0}".format(signs.strides[0])) + signs = signs.view(dtype="|S{0}".format(signs.strides[0])).ravel() unique_signs, cluster_index = np.unique(signs, return_inverse=True) - # NumPy 2 has a behavior change (maybe regression) for the inverse shape - # https://github.com/numpy/numpy/issues/26738. This always works: - cluster_index = cluster_index.reshape(-1) assert ( len(unique_signs) == n_clusters From 30c706c8765d1dbc1ba667ec8f756f9680665e8d Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Tue, 16 Jul 2024 13:28:02 -0700 Subject: [PATCH 8/9] Can't use `copy=None` on some older NumPy versions... --- python/cuml/cuml/internals/array.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/python/cuml/cuml/internals/array.py b/python/cuml/cuml/internals/array.py index 08f01b4336..83becf3c14 100644 --- a/python/cuml/cuml/internals/array.py +++ b/python/cuml/cuml/internals/array.py @@ -1168,19 +1168,20 @@ def from_input( ) make_copy = force_contiguous and not arr.is_contiguous - if not make_copy: - # NumPy now interprets False as never copy, so must use None - make_copy = None if ( not fail_on_order and order != arr.order and order != "K" ) or make_copy: - arr = cls( - arr.mem_type.xpy.array( - arr.to_output("array"), order=order, copy=make_copy - ), - index=index, - ) + if make_copy: + data = arr.mem_type.xpy.array( + arr.to_output("array"), order=order + ) + else: + data = arr.mem_type.xpy.asarray( + arr.to_output("array"), order=order + ) + + arr = cls(data, index=index) n_rows = arr.shape[0] From f399da25f314c98f90b325f1834f9730094e5a34 Mon Sep 17 00:00:00 2001 From: jakirkham Date: Thu, 18 Jul 2024 14:03:44 -0700 Subject: [PATCH 9/9] Fix copyright exclusion on thirdparty directory --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 403bb1b247..1878f90747 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -74,7 +74,7 @@ repos: exclude: | (?x) cpp/src/tsne/cannylab/bh[.]cu$| - python/cuml/_thirdparty + python/cuml/cuml/_thirdparty - id: verify-alpha-spec - repo: https://github.com/rapidsai/dependency-file-generator rev: v1.13.11