From dea0c575be0fd565a3bd002c256cfbc0528d860b Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastianb@nvidia.com>
Date: Wed, 3 Jul 2024 13:40:33 +0000
Subject: [PATCH 1/9] MAINT: Small NumPy 2 related fixes

This applys some smaller NumPy 2 related fixes.  With (in progress)
cupy 13.2 fixups, the single gpu test suite seems to be doing fine
(not quite finished, I may push more commits, but can also open a new PR).

The one thinig I noticed that is a bit anonying is that hdbscan is not
yet released for NumPy 2, is that actually still required since I think
sklearn has a version?
(I don't expect this to be a problem for long, but there is at least one odd test
failure trying to make hdbscan work in https://github.com/scikit-learn-contrib/hdbscan/pull/644)
---
 python/cuml/cuml/_thirdparty/sklearn/utils/sparsefuncs.py | 2 +-
 python/cuml/cuml/internals/array.py                       | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/python/cuml/cuml/_thirdparty/sklearn/utils/sparsefuncs.py b/python/cuml/cuml/_thirdparty/sklearn/utils/sparsefuncs.py
index e0697b98ce..7bea44a366 100644
--- a/python/cuml/cuml/_thirdparty/sklearn/utils/sparsefuncs.py
+++ b/python/cuml/cuml/_thirdparty/sklearn/utils/sparsefuncs.py
@@ -214,7 +214,7 @@ def _sparse_min_or_max(X, axis, min_or_max):
         if np.isnan(m):
             if 'nan' in min_or_max:
                 m = 0
-        elif X.nnz != cpu_np.product(X.shape):
+        elif X.nnz != cpu_np.prod(X.shape):
             if 'min' in min_or_max:
                 m = m if m <= 0 else 0
             else:
diff --git a/python/cuml/cuml/internals/array.py b/python/cuml/cuml/internals/array.py
index 6b664506ae..6873265261 100644
--- a/python/cuml/cuml/internals/array.py
+++ b/python/cuml/cuml/internals/array.py
@@ -1168,6 +1168,9 @@ def from_input(
             )
 
         make_copy = force_contiguous and not arr.is_contiguous
+        if not make_copy:
+           # NumPy now interprets False as never copy, so must use None
+           make_copy = None
 
         if (
             not fail_on_order and order != arr.order and order != "K"

From 3565feff0c8f8b7351884b2a0ddbaa4820787bc5 Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastianb@nvidia.com>
Date: Wed, 3 Jul 2024 14:04:19 +0000
Subject: [PATCH 2/9] TST: `asfarray` is removed, it is the same as `asarray`
 here

---
 python/cuml/cuml/tests/test_metrics.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/python/cuml/cuml/tests/test_metrics.py b/python/cuml/cuml/tests/test_metrics.py
index 6e92535cf7..eaed36fb5b 100644
--- a/python/cuml/cuml/tests/test_metrics.py
+++ b/python/cuml/cuml/tests/test_metrics.py
@@ -1065,7 +1065,7 @@ def test_pairwise_distances(metric: str, matrix_size, is_col_major):
     cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision)
 
     # Change precision of one parameter
-    Y = np.asfarray(Y, dtype=np.float32)
+    Y = np.asarray(Y, dtype=np.float32)
     S = pairwise_distances(X, Y, metric=metric)
     S2 = ref_dense_pairwise_dist(X, Y, metric=metric)
     cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision)
@@ -1074,8 +1074,8 @@ def test_pairwise_distances(metric: str, matrix_size, is_col_major):
     compare_precision = 2
 
     # Change precision of both parameters to float
-    X = np.asfarray(X, dtype=np.float32)
-    Y = np.asfarray(Y, dtype=np.float32)
+    X = np.asarray(X, dtype=np.float32)
+    Y = np.asarray(Y, dtype=np.float32)
     S = pairwise_distances(X, Y, metric=metric)
     S2 = ref_dense_pairwise_dist(X, Y, metric=metric)
     cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision)
@@ -1132,8 +1132,8 @@ def test_pairwise_distances_sklearn_comparison(metric: str, matrix_size):
     # For fp32, compare at 4 decimals, (3 places less than the ~7 max)
     compare_precision = 4
 
-    X = np.asfarray(X, dtype=np.float32)
-    Y = np.asfarray(Y, dtype=np.float32)
+    X = np.asarray(X, dtype=np.float32)
+    Y = np.asarray(Y, dtype=np.float32)
 
     # Compare to sklearn, fp32
     S = pairwise_distances(X, Y, metric=metric)
@@ -1228,7 +1228,7 @@ def test_pairwise_distances_exceptions():
 
     X_int = rng.randint(10, size=(5, 4))
     X_double = rng.random_sample((5, 4))
-    X_float = np.asfarray(X_double, dtype=np.float32)
+    X_float = np.asarray(X_double, dtype=np.float32)
     X_bool = rng.choice([True, False], size=(5, 4))
 
     # Test int inputs (only float/double accepted at this time)

From 449466cd1fd2e70dcc3a76ffc09553acf606caab Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastianb@nvidia.com>
Date: Wed, 3 Jul 2024 14:08:31 +0000
Subject: [PATCH 3/9] TST: Avoid behavior change in return_inverse of unique

Even if NumPy reverts, this is not a problem.
---
 python/cuml/cuml/tests/test_make_classification.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/python/cuml/cuml/tests/test_make_classification.py b/python/cuml/cuml/tests/test_make_classification.py
index d5c38dc651..a4196ec476 100644
--- a/python/cuml/cuml/tests/test_make_classification.py
+++ b/python/cuml/cuml/tests/test_make_classification.py
@@ -117,6 +117,9 @@ def test_make_classification_informative_features():
             signs = np.sign(cp.asnumpy(X))
             signs = signs.view(dtype="|S{0}".format(signs.strides[0]))
             unique_signs, cluster_index = np.unique(signs, return_inverse=True)
+            # NumPy 2 has a behavior change (maybe regression) for the inverse shape
+            # https://github.com/numpy/numpy/issues/26738. This always works:
+            cluster_index = cluster_index.reshape(-1)
 
             assert (
                 len(unique_signs) == n_clusters

From bc7f7c1dbef5608ae37bfd02dc60cc76a99480b1 Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastianb@nvidia.com>
Date: Wed, 3 Jul 2024 15:18:27 +0000
Subject: [PATCH 4/9] TST: Use deepcopy for copying the random state

I am not actually sure what changed here, but deepcopy seems sensible?
---
 python/cuml/cuml/tests/test_umap.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/cuml/cuml/tests/test_umap.py b/python/cuml/cuml/tests/test_umap.py
index 6faa4ad8d3..6916ab2488 100644
--- a/python/cuml/cuml/tests/test_umap.py
+++ b/python/cuml/cuml/tests/test_umap.py
@@ -420,9 +420,9 @@ def get_embedding(n_components, random_state):
         )
         return reducer.fit_transform(data, convert_dtype=True)
 
-    state = copy.copy(random_state)
+    state = copy.deepcopy(random_state)
     cuml_embedding1 = get_embedding(n_components, state)
-    state = copy.copy(random_state)
+    state = copy.deepcopy(random_state)
     cuml_embedding2 = get_embedding(n_components, state)
 
     assert not np.isnan(cuml_embedding1).any()
@@ -475,9 +475,9 @@ def get_embedding(n_components, random_state):
         reducer.fit(fit_data, convert_dtype=True)
         return reducer.transform(transform_data, convert_dtype=True)
 
-    state = copy.copy(random_state)
+    state = copy.deepcopy(random_state)
     cuml_embedding1 = get_embedding(n_components, state)
-    state = copy.copy(random_state)
+    state = copy.deepcopy(random_state)
     cuml_embedding2 = get_embedding(n_components, state)
 
     assert not np.isnan(cuml_embedding1).any()

From 3064e568a256185217e0ef11fb7afa3ee40e50c6 Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastianb@nvidia.com>
Date: Wed, 3 Jul 2024 15:32:14 +0000
Subject: [PATCH 5/9] STY: Fixup copyright/pre-commit

---
 python/cuml/cuml/internals/array.py                | 4 ++--
 python/cuml/cuml/tests/test_make_classification.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/cuml/cuml/internals/array.py b/python/cuml/cuml/internals/array.py
index 6873265261..08f01b4336 100644
--- a/python/cuml/cuml/internals/array.py
+++ b/python/cuml/cuml/internals/array.py
@@ -1169,8 +1169,8 @@ def from_input(
 
         make_copy = force_contiguous and not arr.is_contiguous
         if not make_copy:
-           # NumPy now interprets False as never copy, so must use None
-           make_copy = None
+            # NumPy now interprets False as never copy, so must use None
+            make_copy = None
 
         if (
             not fail_on_order and order != arr.order and order != "K"
diff --git a/python/cuml/cuml/tests/test_make_classification.py b/python/cuml/cuml/tests/test_make_classification.py
index a4196ec476..9daee2de25 100644
--- a/python/cuml/cuml/tests/test_make_classification.py
+++ b/python/cuml/cuml/tests/test_make_classification.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

From e81b2835b2e0b88d207719e5298b03f739841363 Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastianb@nvidia.com>
Date: Wed, 3 Jul 2024 16:48:50 +0000
Subject: [PATCH 6/9] Ignore python/_thirdparty for style pre-commit check

---
 .pre-commit-config.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 67ef2d6ad3..403bb1b247 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -73,7 +73,8 @@ repos:
                 setup[.]cfg$
           exclude: |
             (?x)
-                cpp/src/tsne/cannylab/bh[.]cu$
+                cpp/src/tsne/cannylab/bh[.]cu$|
+                python/cuml/_thirdparty
         - id: verify-alpha-spec
     - repo: https://github.com/rapidsai/dependency-file-generator
       rev: v1.13.11

From f266d0b9bd7427d051aff727ec467b9ae6957e86 Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastian@sipsolutions.net>
Date: Thu, 11 Jul 2024 22:25:45 +0200
Subject: [PATCH 7/9] Simplify return_inverse fixup

---
 python/cuml/cuml/tests/test_make_classification.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/python/cuml/cuml/tests/test_make_classification.py b/python/cuml/cuml/tests/test_make_classification.py
index 9daee2de25..e8b0b7a088 100644
--- a/python/cuml/cuml/tests/test_make_classification.py
+++ b/python/cuml/cuml/tests/test_make_classification.py
@@ -115,11 +115,8 @@ def test_make_classification_informative_features():
 
             # Cluster by sign, viewed as strings to allow uniquing
             signs = np.sign(cp.asnumpy(X))
-            signs = signs.view(dtype="|S{0}".format(signs.strides[0]))
+            signs = signs.view(dtype="|S{0}".format(signs.strides[0])).ravel()
             unique_signs, cluster_index = np.unique(signs, return_inverse=True)
-            # NumPy 2 has a behavior change (maybe regression) for the inverse shape
-            # https://github.com/numpy/numpy/issues/26738. This always works:
-            cluster_index = cluster_index.reshape(-1)
 
             assert (
                 len(unique_signs) == n_clusters

From 30c706c8765d1dbc1ba667ec8f756f9680665e8d Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastianb@nvidia.com>
Date: Tue, 16 Jul 2024 13:28:02 -0700
Subject: [PATCH 8/9] Can't use `copy=None` on some older NumPy versions...

---
 python/cuml/cuml/internals/array.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/python/cuml/cuml/internals/array.py b/python/cuml/cuml/internals/array.py
index 08f01b4336..83becf3c14 100644
--- a/python/cuml/cuml/internals/array.py
+++ b/python/cuml/cuml/internals/array.py
@@ -1168,19 +1168,20 @@ def from_input(
             )
 
         make_copy = force_contiguous and not arr.is_contiguous
-        if not make_copy:
-            # NumPy now interprets False as never copy, so must use None
-            make_copy = None
 
         if (
             not fail_on_order and order != arr.order and order != "K"
         ) or make_copy:
-            arr = cls(
-                arr.mem_type.xpy.array(
-                    arr.to_output("array"), order=order, copy=make_copy
-                ),
-                index=index,
-            )
+            if make_copy:
+                data = arr.mem_type.xpy.array(
+                    arr.to_output("array"), order=order
+                )
+            else:
+                data = arr.mem_type.xpy.asarray(
+                    arr.to_output("array"), order=order
+                )
+
+            arr = cls(data, index=index)
 
         n_rows = arr.shape[0]
 

From f399da25f314c98f90b325f1834f9730094e5a34 Mon Sep 17 00:00:00 2001
From: jakirkham <jakirkham@gmail.com>
Date: Thu, 18 Jul 2024 14:03:44 -0700
Subject: [PATCH 9/9] Fix copyright exclusion on thirdparty directory

---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 403bb1b247..1878f90747 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -74,7 +74,7 @@ repos:
           exclude: |
             (?x)
                 cpp/src/tsne/cannylab/bh[.]cu$|
-                python/cuml/_thirdparty
+                python/cuml/cuml/_thirdparty
         - id: verify-alpha-spec
     - repo: https://github.com/rapidsai/dependency-file-generator
       rev: v1.13.11