Merge pull request #79 from rapidsai/branch-25.02

Forward-merge branch-25.02 into branch-25.04
rapidsai · Feb 5, 2025 · a45711f · a45711f
2 parents 7089b9f + 5d865e9
commit a45711f
Show file tree

Hide file tree

Showing 2 changed files with 104 additions and 11 deletions.
diff --git a/nx_cugraph/algorithms/centrality/betweenness.py b/nx_cugraph/algorithms/centrality/betweenness.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -10,17 +10,18 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import cupy as cp
 import pylibcugraph as plc
+from networkx.utils import create_py_random_state
 
 from nx_cugraph.convert import _to_graph
-from nx_cugraph.utils import _seed_to_int, networkx_algorithm
+from nx_cugraph.utils import index_dtype, networkx_algorithm
 
 __all__ = ["betweenness_centrality", "edge_betweenness_centrality"]
 
 
 @networkx_algorithm(
     is_incomplete=True,  # weight not supported
-    is_different=True,  # RNG with seed is different
     version_added="23.10",
     _plc="betweenness_centrality",
 )
@@ -32,13 +33,17 @@ def betweenness_centrality(
         raise NotImplementedError(
             "Weighted implementation of betweenness centrality not currently supported"
         )
-    seed = _seed_to_int(seed)
+    random_state = create_py_random_state(seed)
     G = _to_graph(G, weight)
+    if k is not None and k < G._N:
+        nodes = cp.array(random_state.sample(range(G._N), k), index_dtype)
+    else:
+        nodes = None
     node_ids, values = plc.betweenness_centrality(
         resource_handle=plc.ResourceHandle(),
         graph=G._get_plc_graph(),
-        k=k,
-        random_state=seed,
+        k=nodes,
+        random_state=None,
         normalized=normalized,
         include_endpoints=endpoints,
         do_expensive_check=False,
@@ -53,7 +58,6 @@ def _(G, k=None, normalized=True, weight=None, endpoints=False, seed=None):
 
 @networkx_algorithm(
     is_incomplete=True,  # weight not supported
-    is_different=True,  # RNG with seed is different
     version_added="23.10",
     _plc="edge_betweenness_centrality",
 )
@@ -63,21 +67,48 @@ def edge_betweenness_centrality(G, k=None, normalized=True, weight=None, seed=No
         raise NotImplementedError(
             "Weighted implementation of betweenness centrality not currently supported"
         )
-    seed = _seed_to_int(seed)
+    random_state = create_py_random_state(seed)
     G = _to_graph(G, weight)
+    if k is not None and k < G._N:
+        nodes = cp.array(random_state.sample(range(G._N), k), index_dtype)
+    else:
+        nodes = None
     src_ids, dst_ids, values, _edge_ids = plc.edge_betweenness_centrality(
         resource_handle=plc.ResourceHandle(),
         graph=G._get_plc_graph(),
-        k=k,
-        random_state=seed,
+        k=nodes,
+        random_state=None,
         normalized=normalized,
         do_expensive_check=False,
     )
     if not G.is_directed():
+        if nodes is not None:
+            # For undirected graphs, PLC only gives us data for one direction of the
+            # edge (such as (i, j), but not (j, i)), but we don't know which one.
+            # That is, only data from node i to j gets added when going from node i.
+            # So, the cupy gymnastics below add (i, j) and (j, i) edges together.
+            dst_src = cp.hstack(
+                (cp.vstack((dst_ids, src_ids)), cp.vstack((src_ids, dst_ids)))
+            )
+            indices = cp.lexsort(dst_src)
+            dst_src = dst_src[:, indices][:, ::2]
+            dst_ids = dst_src[0]
+            src_ids = dst_src[1]
+            values = values[indices % values.size].reshape(values.size, 2).sum(axis=-1)
         mask = src_ids <= dst_ids
         src_ids = src_ids[mask]
         dst_ids = dst_ids[mask]
-        values = 2 * values[mask]
+        if nodes is not None:
+            # NetworkX doesn't scale the same when using k. Which is more "correct"?
+            # No need to x2 like we do below when using the mask, b/c this already
+            # includes contributions from both edge directions.
+            values = (k / G._N) * values[mask]
+        else:
+            # We discarded half the values with the mask so x2 to compensate.
+            values = 2 * values[mask]
+    elif nodes is not None:
+        # NetworkX doesn't scale the same when using k. Which is more "correct"?
+        values *= k / G._N
     return G._edgearrays_to_dict(src_ids, dst_ids, values)
 
 

diff --git a/nx_cugraph/tests/test_bc.py b/nx_cugraph/tests/test_bc.py
@@ -0,0 +1,62 @@
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import networkx as nx
+import pytest
+
+
+def test_bc():
+    G = nx.karate_club_graph()
+    bc_nx = nx.betweenness_centrality(G)
+    bc_cg = nx.betweenness_centrality(G, backend="cugraph")
+    assert pytest.approx(bc_nx) == bc_cg
+
+
+def test_bc_rng():
+    G = nx.karate_club_graph()
+    bc_nx = nx.betweenness_centrality(G, k=4, seed=42)
+    bc_cg = nx.betweenness_centrality(G, k=4, seed=42, backend="cugraph")
+    assert pytest.approx(bc_nx) == bc_cg
+
+
+def test_edge_bc():
+    G = nx.karate_club_graph()
+    bc_nx = nx.edge_betweenness_centrality(G)
+    bc_cg = nx.edge_betweenness_centrality(G, backend="cugraph")
+    assert pytest.approx(bc_nx) == bc_cg
+
+    G = nx.path_graph(5, create_using=nx.Graph)
+    bc_nx = nx.edge_betweenness_centrality(G)
+    bc_cg = nx.edge_betweenness_centrality(G, backend="cugraph")
+    assert pytest.approx(bc_nx) == bc_cg
+
+    G = nx.path_graph(10, create_using=nx.DiGraph)
+    bc_nx = nx.edge_betweenness_centrality(G)
+    bc_cg = nx.edge_betweenness_centrality(G, backend="cugraph")
+    assert pytest.approx(bc_nx) == bc_cg
+
+
+def test_edge_bc_rng():
+    G = nx.karate_club_graph()
+    bc_nx = nx.edge_betweenness_centrality(G, k=4, seed=7)
+    bc_cg = nx.edge_betweenness_centrality(G, k=4, seed=7, backend="cugraph")
+    assert pytest.approx(bc_nx) == bc_cg
+
+    G = nx.path_graph(5, create_using=nx.Graph)
+    bc_nx = nx.edge_betweenness_centrality(G, k=2, seed=7)
+    bc_cg = nx.edge_betweenness_centrality(G, k=2, seed=7, backend="cugraph")
+    assert pytest.approx(bc_nx) == bc_cg
+
+    G = nx.path_graph(10, create_using=nx.DiGraph)
+    bc_nx = nx.edge_betweenness_centrality(G, k=4, seed=8)
+    bc_cg = nx.edge_betweenness_centrality(G, k=4, seed=8, backend="cugraph")
+    assert pytest.approx(bc_nx) == bc_cg