diff --git a/README.md b/README.md index 2afd1e5b3..5ba41da36 100644 --- a/README.md +++ b/README.md @@ -156,6 +156,8 @@ Below is the list of algorithms that are currently supported in nx-cugraph. └─ pagerank link_prediction └─ jaccard_coefficient +lowest_common_ancestors + └─ lowest_common_ancestor operators └─ unary ├─ complement diff --git a/_nx_cugraph/__init__.py b/_nx_cugraph/__init__.py index a34dd0b4b..c83b489f2 100644 --- a/_nx_cugraph/__init__.py +++ b/_nx_cugraph/__init__.py @@ -120,6 +120,7 @@ "les_miserables_graph", "lollipop_graph", "louvain_communities", + "lowest_common_ancestor", "moebius_kantor_graph", "node_connected_component", "null_graph", @@ -186,6 +187,7 @@ "generic_bfs_edges": "`neighbors` parameter is not yet supported.", "katz_centrality": "`nstart` isn't used (but is checked), and `normalized=False` is not supported.", "louvain_communities": "`seed` parameter is currently ignored, and self-loops are not yet supported.", + "lowest_common_ancestor": "May not always raise NetworkXError for graphs that are not DAGs.", "pagerank": "`dangling` parameter is not supported, but it is checked for validity.", "shortest_path": "Negative weights are not yet supported.", "shortest_path_length": "Negative weights are not yet supported.", diff --git a/benchmarks/pytest-based/bench_algos.py b/benchmarks/pytest-based/bench_algos.py index 8f5f82938..9f8debd68 100644 --- a/benchmarks/pytest-based/bench_algos.py +++ b/benchmarks/pytest-based/bench_algos.py @@ -11,7 +11,7 @@ # See the License for the specific language governing permissions and # limitations under the License. - +import random from collections.abc import Mapping import networkx as nx @@ -902,6 +902,36 @@ def bench_ego_graph(benchmark, graph_obj, backend_wrapper): assert type(result) is type(G) +def bench_lowest_common_ancestor(benchmark, graph_obj, backend_wrapper): + # Must be DAG + if not nx.is_directed_acyclic_graph(graph_obj): + new_graph_obj = nx.DiGraph() + new_graph_obj.add_nodes_from(graph_obj.nodes(data=True)) + new_graph_obj.add_edges_from( + (src, dst, *rest) + for src, dst, *rest in graph_obj.edges(data=True) + if src < dst + ) + new_graph_obj.graph.update(graph_obj.graph) + print( + f"WARNING: graph was changed and now had {new_graph_obj.number_of_nodes()} " + "nodes and {new_graph_obj.number_of_edges()} edges." + ) + graph_obj = new_graph_obj + + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + r = random.Random(42) + node1, node2 = r.sample(sorted(G), 2) + result = benchmark.pedantic( + target=backend_wrapper(nx.lowest_common_ancestor), + args=(G, node1, node2), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert result is None or result in G + + def bench_bipartite_BC_n1000_m3000_k100000(benchmark, backend_wrapper): # Example how to run: # $ pytest -sv -k "bench_bipartite_BC" \ diff --git a/nx_cugraph/algorithms/__init__.py b/nx_cugraph/algorithms/__init__.py index 859d3c681..a409379d0 100644 --- a/nx_cugraph/algorithms/__init__.py +++ b/nx_cugraph/algorithms/__init__.py @@ -18,6 +18,7 @@ components, link_analysis, link_prediction, + lowest_common_ancestors, operators, shortest_paths, traversal, @@ -32,6 +33,7 @@ from .isolate import * from .link_analysis import * from .link_prediction import * +from .lowest_common_ancestors import * from .operators import * from .reciprocity import * from .shortest_paths import * diff --git a/nx_cugraph/algorithms/lowest_common_ancestors.py b/nx_cugraph/algorithms/lowest_common_ancestors.py new file mode 100644 index 000000000..343f6a35c --- /dev/null +++ b/nx_cugraph/algorithms/lowest_common_ancestors.py @@ -0,0 +1,104 @@ +# Copyright (c) 2024-2025, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import cupy as cp +import networkx as nx +import numpy as np +import pylibcugraph as plc + +from nx_cugraph.convert import _to_directed_graph +from nx_cugraph.utils import ( + _groupby, + index_dtype, + networkx_algorithm, + not_implemented_for, +) + +__all__ = ["lowest_common_ancestor"] + + +@not_implemented_for("undirected") +@networkx_algorithm(is_incomplete=True, version_added="24.12", _plc="bfs") +def lowest_common_ancestor(G, node1, node2, default=None): + """May not always raise NetworkXError for graphs that are not DAGs.""" + G = _to_directed_graph(G) + + # if not nxcg.is_directed_acyclic_graph(G): # TODO + # raise nx.NetworkXError("LCA only defined on directed acyclic graphs.") + + if G._N == 0: + raise nx.NetworkXPointlessConcept("LCA meaningless on null graphs.") + if node1 not in G: + nodes = {node1} + raise nx.NodeNotFound(f"Node(s) {nodes} from pair {(node1, node2)} not in G.") + if node2 not in G: + nodes = {node2} + raise nx.NodeNotFound(f"Node(s) {nodes} from pair {(node1, node2)} not in G.") + + # Ancestor BFS from node1 + node1_index = node1 if G.key_to_id is None else G.key_to_id[node1] + node2_index = node2 if G.key_to_id is None else G.key_to_id[node2] + if node1_index == node2_index: # Handle trivial case + return node1 + plc_graph = G._get_plc_graph(switch_indices=True) + distances1, predecessors1, node_ids1 = plc.bfs( + handle=plc.ResourceHandle(), + graph=plc_graph, + sources=cp.array([node1_index], index_dtype), + direction_optimizing=False, # True for undirected only + depth_limit=-1, + compute_predecessors=False, + do_expensive_check=False, + ) + mask1 = distances1 != np.iinfo(distances1.dtype).max + node_ids1 = node_ids1[mask1] + + # Ancestor BFS from node2 + distances2, predecessors2, node_ids2 = plc.bfs( + handle=plc.ResourceHandle(), + graph=plc_graph, + sources=cp.array([node2_index], index_dtype), + direction_optimizing=False, # True for undirected only + depth_limit=-1, + compute_predecessors=False, + do_expensive_check=False, + ) + mask2 = distances2 != np.iinfo(distances2.dtype).max + node_ids2 = node_ids2[mask2] + + # Find all common ancestors + common_ids = cp.intersect1d(node_ids1, node_ids2, assume_unique=True) + if common_ids.size == 0: + return default + if common_ids.size == 1: + # Only one; it must be the lowest common ancestor + node_index = common_ids[0].tolist() + return node_index if G.key_to_id is None else G.id_to_key[node_index] + + # Find nodes from `common_ids` that have no predecessors from `common_ids`. + # TODO: create utility functions for getting neighbors, predecessors, + # and successors of nodes, which may simplify this code. + mask = cp.isin(G.src_indices, common_ids) & (G.src_indices != G.dst_indices) + groups = _groupby(G.src_indices[mask], G.dst_indices[mask]) + # Walk along successors until we reach a lowest common ancestor + node_index = next(iter(groups)) # Choose arbitrary element + seen = set() + while True: + if node_index in seen: + raise nx.NetworkXError("LCA only defined on directed acyclic graphs.") + lower_ancestors = cp.intersect1d(groups[node_index], common_ids) + if lower_ancestors.size == 0: + break + seen.add(node_index) + node_index = lower_ancestors[0].tolist() # Arbitrary element + return node_index if G.key_to_id is None else G.id_to_key[node_index]