Skip to content

Commit

Permalink
Add lowest_common_ancestor algorithm (#35)
Browse files Browse the repository at this point in the history
This is a medium-priority algorithm that I considered pairing with @nv-rliu on instead of #32, so I thought I would knock it out while it was still fresh in mind. It was pretty quick.

To reviewers: this is low priority and may slip to the next release. Please do more important things first. It is reasonable to request benchmarking, which I hope to add when I get a chance.

I created more tests in networkx to help with this PR: networkx/networkx#7726

I think the implementation is pretty clean and straightforward, so hopefully others think so too (although it probably requires understanding _what_ this algorithm does).

Authors:
  - Erik Welch (https://github.com/eriknw)

Approvers:
  - Ralph Liu (https://github.com/nv-rliu)
  - Rick Ratzel (https://github.com/rlratzel)

URL: #35
  • Loading branch information
eriknw authored Feb 5, 2025
1 parent 19d52c7 commit 6b8105b
Show file tree
Hide file tree
Showing 5 changed files with 141 additions and 1 deletion.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,8 @@ Below is the list of algorithms that are currently supported in nx-cugraph.
└─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.link_analysis.pagerank_alg.pagerank.html#networkx.algorithms.link_analysis.pagerank_alg.pagerank">pagerank</a>
<a href="https://networkx.org/documentation/stable/reference/algorithms/link_prediction.html#module-networkx.algorithms.link_prediction">link_prediction</a>
└─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.link_prediction.jaccard_coefficient.html#networkx.algorithms.link_prediction.jaccard_coefficient">jaccard_coefficient</a>
<a href="https://networkx.org/documentation/stable/reference/algorithms/lowest_common_ancestors.html#module-networkx.algorithms.lowest_common_ancestors">lowest_common_ancestors</a>
└─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.lowest_common_ancestors.lowest_common_ancestor.html#networkx.algorithms.lowest_common_ancestors.lowest_common_ancestor">lowest_common_ancestor</a>
<a href="https://networkx.org/documentation/stable/reference/algorithms/operators.html">operators</a>
└─ <a href="https://networkx.org/documentation/stable/reference/algorithms/operators.html#module-networkx.algorithms.operators.unary">unary</a>
├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.operators.unary.complement.html#networkx.algorithms.operators.unary.complement">complement</a>
Expand Down
2 changes: 2 additions & 0 deletions _nx_cugraph/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@
"les_miserables_graph",
"lollipop_graph",
"louvain_communities",
"lowest_common_ancestor",
"moebius_kantor_graph",
"node_connected_component",
"null_graph",
Expand Down Expand Up @@ -186,6 +187,7 @@
"generic_bfs_edges": "`neighbors` parameter is not yet supported.",
"katz_centrality": "`nstart` isn't used (but is checked), and `normalized=False` is not supported.",
"louvain_communities": "`seed` parameter is currently ignored, and self-loops are not yet supported.",
"lowest_common_ancestor": "May not always raise NetworkXError for graphs that are not DAGs.",
"pagerank": "`dangling` parameter is not supported, but it is checked for validity.",
"shortest_path": "Negative weights are not yet supported.",
"shortest_path_length": "Negative weights are not yet supported.",
Expand Down
32 changes: 31 additions & 1 deletion benchmarks/pytest-based/bench_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.


import random
from collections.abc import Mapping

import networkx as nx
Expand Down Expand Up @@ -902,6 +902,36 @@ def bench_ego_graph(benchmark, graph_obj, backend_wrapper):
assert type(result) is type(G)


def bench_lowest_common_ancestor(benchmark, graph_obj, backend_wrapper):
# Must be DAG
if not nx.is_directed_acyclic_graph(graph_obj):
new_graph_obj = nx.DiGraph()
new_graph_obj.add_nodes_from(graph_obj.nodes(data=True))
new_graph_obj.add_edges_from(
(src, dst, *rest)
for src, dst, *rest in graph_obj.edges(data=True)
if src < dst
)
new_graph_obj.graph.update(graph_obj.graph)
print(
f"WARNING: graph was changed and now had {new_graph_obj.number_of_nodes()} "
"nodes and {new_graph_obj.number_of_edges()} edges."
)
graph_obj = new_graph_obj

G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
r = random.Random(42)
node1, node2 = r.sample(sorted(G), 2)
result = benchmark.pedantic(
target=backend_wrapper(nx.lowest_common_ancestor),
args=(G, node1, node2),
rounds=rounds,
iterations=iterations,
warmup_rounds=warmup_rounds,
)
assert result is None or result in G


def bench_bipartite_BC_n1000_m3000_k100000(benchmark, backend_wrapper):
# Example how to run:
# $ pytest -sv -k "bench_bipartite_BC" \
Expand Down
2 changes: 2 additions & 0 deletions nx_cugraph/algorithms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
components,
link_analysis,
link_prediction,
lowest_common_ancestors,
operators,
shortest_paths,
traversal,
Expand All @@ -32,6 +33,7 @@
from .isolate import *
from .link_analysis import *
from .link_prediction import *
from .lowest_common_ancestors import *
from .operators import *
from .reciprocity import *
from .shortest_paths import *
Expand Down
104 changes: 104 additions & 0 deletions nx_cugraph/algorithms/lowest_common_ancestors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# Copyright (c) 2024-2025, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import cupy as cp
import networkx as nx
import numpy as np
import pylibcugraph as plc

from nx_cugraph.convert import _to_directed_graph
from nx_cugraph.utils import (
_groupby,
index_dtype,
networkx_algorithm,
not_implemented_for,
)

__all__ = ["lowest_common_ancestor"]


@not_implemented_for("undirected")
@networkx_algorithm(is_incomplete=True, version_added="24.12", _plc="bfs")
def lowest_common_ancestor(G, node1, node2, default=None):
"""May not always raise NetworkXError for graphs that are not DAGs."""
G = _to_directed_graph(G)

# if not nxcg.is_directed_acyclic_graph(G): # TODO
# raise nx.NetworkXError("LCA only defined on directed acyclic graphs.")

if G._N == 0:
raise nx.NetworkXPointlessConcept("LCA meaningless on null graphs.")
if node1 not in G:
nodes = {node1}
raise nx.NodeNotFound(f"Node(s) {nodes} from pair {(node1, node2)} not in G.")
if node2 not in G:
nodes = {node2}
raise nx.NodeNotFound(f"Node(s) {nodes} from pair {(node1, node2)} not in G.")

# Ancestor BFS from node1
node1_index = node1 if G.key_to_id is None else G.key_to_id[node1]
node2_index = node2 if G.key_to_id is None else G.key_to_id[node2]
if node1_index == node2_index: # Handle trivial case
return node1
plc_graph = G._get_plc_graph(switch_indices=True)
distances1, predecessors1, node_ids1 = plc.bfs(
handle=plc.ResourceHandle(),
graph=plc_graph,
sources=cp.array([node1_index], index_dtype),
direction_optimizing=False, # True for undirected only
depth_limit=-1,
compute_predecessors=False,
do_expensive_check=False,
)
mask1 = distances1 != np.iinfo(distances1.dtype).max
node_ids1 = node_ids1[mask1]

# Ancestor BFS from node2
distances2, predecessors2, node_ids2 = plc.bfs(
handle=plc.ResourceHandle(),
graph=plc_graph,
sources=cp.array([node2_index], index_dtype),
direction_optimizing=False, # True for undirected only
depth_limit=-1,
compute_predecessors=False,
do_expensive_check=False,
)
mask2 = distances2 != np.iinfo(distances2.dtype).max
node_ids2 = node_ids2[mask2]

# Find all common ancestors
common_ids = cp.intersect1d(node_ids1, node_ids2, assume_unique=True)
if common_ids.size == 0:
return default
if common_ids.size == 1:
# Only one; it must be the lowest common ancestor
node_index = common_ids[0].tolist()
return node_index if G.key_to_id is None else G.id_to_key[node_index]

# Find nodes from `common_ids` that have no predecessors from `common_ids`.
# TODO: create utility functions for getting neighbors, predecessors,
# and successors of nodes, which may simplify this code.
mask = cp.isin(G.src_indices, common_ids) & (G.src_indices != G.dst_indices)
groups = _groupby(G.src_indices[mask], G.dst_indices[mask])
# Walk along successors until we reach a lowest common ancestor
node_index = next(iter(groups)) # Choose arbitrary element
seen = set()
while True:
if node_index in seen:
raise nx.NetworkXError("LCA only defined on directed acyclic graphs.")
lower_ancestors = cp.intersect1d(groups[node_index], common_ids)
if lower_ancestors.size == 0:
break
seen.add(node_index)
node_index = lower_ancestors[0].tolist() # Arbitrary element
return node_index if G.key_to_id is None else G.id_to_key[node_index]

0 comments on commit 6b8105b

Please sign in to comment.