Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Forward-merge branch-25.02 into branch-25.04 #113

Merged
merged 4 commits into from
Jan 31, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .devcontainer/cuda11.8-pip/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"args": {
"CUDA": "11.8",
"PYTHON_PACKAGE_MANAGER": "pip",
"BASE": "rapidsai/devcontainers:25.02-cpp-cuda11.8-ucx1.17.0-openmpi-ubuntu22.04"
"BASE": "rapidsai/devcontainers:25.02-cpp-cuda11.8-ucx1.18.0-openmpi-ubuntu22.04"
}
},
"runArgs": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
"context": "${localWorkspaceFolder}/.devcontainer",
"dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
"args": {
"CUDA": "12.5",
"CUDA": "12.8",
"PYTHON_PACKAGE_MANAGER": "conda",
"BASE": "rapidsai/devcontainers:25.02-cpp-mambaforge-ubuntu22.04"
}
},
"runArgs": [
"--rm",
"--name",
"${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.5-conda"
"${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.8-conda"
],
"hostRequirements": {"gpu": "optional"},
"features": {
Expand All @@ -20,7 +20,7 @@
"overrideFeatureInstallOrder": [
"ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
],
"initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.5-envs}"],
"initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.8-envs}"],
"postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
"workspaceFolder": "/home/coder",
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cugraph-gnn,type=bind,consistency=consistent",
Expand All @@ -29,7 +29,7 @@
"source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.5-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent"
"source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.8-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent"
],
"customizations": {
"vscode": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,20 @@
"context": "${localWorkspaceFolder}/.devcontainer",
"dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
"args": {
"CUDA": "12.5",
"CUDA": "12.8",
"PYTHON_PACKAGE_MANAGER": "pip",
"BASE": "rapidsai/devcontainers:25.02-cpp-cuda12.5-ucx1.17.0-openmpi-ubuntu22.04"
"BASE": "rapidsai/devcontainers:25.02-cpp-cuda12.8-ucx1.18.0-openmpi-ubuntu22.04"
}
},
"runArgs": [
"--rm",
"--name",
"${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.5-pip"
"${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.8-pip"
],
"hostRequirements": {"gpu": "optional"},
"features": {
"ghcr.io/rapidsai/devcontainers/features/cuda:25.2": {
"version": "12.5",
"version": "12.8",
"installcuBLAS": true,
"installcuSOLVER": true,
"installcuRAND": true,
Expand All @@ -28,15 +28,15 @@
"ghcr.io/rapidsai/devcontainers/features/cuda",
"ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
],
"initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.5-venvs}"],
"initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.8-venvs}"],
"postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
"workspaceFolder": "/home/coder",
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cugraph-gnn,type=bind,consistency=consistent",
"mounts": [
"source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.5-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent"
"source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.8-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent"
],
"customizations": {
"vscode": {
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ jobs:
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
arch: '["amd64"]'
cuda: '["12.5"]'
cuda: '["12.8"]'
build_command: |
sccache -z;
build-all --verbose -j$(nproc --ignore=1);
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ dependencies:
- pytest-forked
- pytest-xdist
- pytorch>=2.3
- pytorch_geometric>=2.5,<2.6
- pytorch_geometric>=2.5,<2.7
- rapids-build-backend>=0.3.0,<0.4.0.dev0
- rmm==25.2.*,>=0.0.0a0
- scikit-build-core>=0.10.0
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-121_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ dependencies:
- pytest-forked
- pytest-xdist
- pytorch>=2.3
- pytorch_geometric>=2.5,<2.6
- pytorch_geometric>=2.5,<2.7
- rapids-build-backend>=0.3.0,<0.4.0.dev0
- rmm==25.2.*,>=0.0.0a0
- scikit-build-core>=0.10.0
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-124_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ dependencies:
- pytest-forked
- pytest-xdist
- pytorch>=2.3
- pytorch_geometric>=2.5,<2.6
- pytorch_geometric>=2.5,<2.7
- rapids-build-backend>=0.3.0,<0.4.0.dev0
- rmm==25.2.*,>=0.0.0a0
- scikit-build-core>=0.10.0
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/cugraph-pyg/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ requirements:
- cupy >=12.0.0
- cugraph ={{ minor_version }}
- tensordict >=0.1.2
- pytorch_geometric >=2.5,<2.6
- pytorch_geometric >=2.5,<2.7

tests:
imports:
Expand Down
8 changes: 6 additions & 2 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,10 @@ dependencies:
cuda: "12.5"
packages:
- cuda-version=12.5
- matrix:
cuda: "12.8"
packages:
- cuda-version=12.8
cuda:
specific:
- output_types: [conda]
Expand Down Expand Up @@ -440,10 +444,10 @@ dependencies:
common:
- output_types: [conda]
packages:
- pytorch_geometric>=2.5,<2.6
- pytorch_geometric>=2.5,<2.7
- output_types: [pyproject, requirements]
packages:
- torch-geometric>=2.5,<2.6
- torch-geometric>=2.5,<2.7

depends_on_pylibwholegraph:
common:
Expand Down
2 changes: 1 addition & 1 deletion python/cugraph-pyg/conda/cugraph_pyg_dev_cuda-118.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ dependencies:
- pytest-cov
- pytest-xdist
- pytorch>=2.3
- pytorch_geometric>=2.5,<2.6
- pytorch_geometric>=2.5,<2.7
- tensordict>=0.1.2
- torchdata
name: cugraph_pyg_dev_cuda-118
4 changes: 2 additions & 2 deletions python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_mnmg.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,10 +150,10 @@ def load_partitioned_data(
)

# Load features
feature_store["node", "x"] = torch.load(
feature_store["node", "x", None] = torch.load(
os.path.join(feature_path, f"rank={rank}_x.pt")
)
feature_store["node", "y"] = torch.load(
feature_store["node", "y", None] = torch.load(
os.path.join(feature_path, f"rank={rank}_y.pt")
)

Expand Down
4 changes: 2 additions & 2 deletions python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_sg.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,8 @@ def load_data(
] = data.edge_index

feature_store = cugraph_pyg.data.TensorDictFeatureStore()
feature_store["node", "x"] = data.x
feature_store["node", "y"] = data.y
feature_store["node", "x", None] = data.x
feature_store["node", "y", None] = data.y

return (
(feature_store, graph_store),
Expand Down
4 changes: 2 additions & 2 deletions python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_snmg.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,8 @@ def run_train(
] = ixr

feature_store = TensorDictFeatureStore()
feature_store["node", "x"] = data.x
feature_store["node", "y"] = data.y
feature_store["node", "x", None] = data.x
feature_store["node", "y", None] = data.y

dist.barrier()

Expand Down
24 changes: 15 additions & 9 deletions python/cugraph-pyg/cugraph_pyg/examples/rgcn_link_class_mnmg.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -93,7 +93,11 @@ def train(epoch, model, optimizer, train_loader, edge_feature_store, num_steps=N
optimizer.zero_grad()

for i, batch in enumerate(train_loader):
r = edge_feature_store[("n", "e", "n"), "rel"][batch.e_id].flatten().cuda()
r = (
edge_feature_store[("n", "e", "n"), "rel", None][batch.e_id]
.flatten()
.cuda()
)
z = model.encode(batch.edge_index, r)

loss = model.recon_loss(z, batch.edge_index)
Expand Down Expand Up @@ -301,13 +305,18 @@ def load_partitioned_data(rank, edge_path, rel_path, pos_path, neg_path, meta_pa
feature_store = TensorDictFeatureStore()
edge_feature_store = WholeFeatureStore()

with open(meta_path, "r") as f:
meta = json.load(f)

print("num nodes:", meta["num_nodes"])

# Load edge index
graph_store[("n", "e", "n"), "coo"] = torch.load(
os.path.join(edge_path, f"rank={rank}.pt")
)
graph_store[
("n", "e", "n"), "coo", False, (meta["num_nodes"], meta["num_nodes"])
] = torch.load(os.path.join(edge_path, f"rank={rank}.pt"))

# Load edge rel type
edge_feature_store[("n", "e", "n"), "rel"] = torch.load(
edge_feature_store[("n", "e", "n"), "rel", None] = torch.load(
os.path.join(rel_path, f"rank={rank}.pt")
)

Expand All @@ -333,9 +342,6 @@ def load_partitioned_data(rank, edge_path, rel_path, pos_path, neg_path, meta_pa
splits[stage]["tail_neg"] = tail_neg
splits[stage]["relation"] = relation

with open(meta_path, "r") as f:
meta = json.load(f)

return (feature_store, graph_store), edge_feature_store, splits, meta


Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -90,8 +90,10 @@ def load_data(
edge_feature_store = TensorDictFeatureStore()
meta = {}

graph_store[("n", "e", "n"), "coo"] = dataset.edge_index
edge_feature_store[("n", "e", "n"), "rel"] = dataset.edge_reltype.pin_memory()
graph_store[
("n", "e", "n"), "coo", False, (dataset.num_nodes, dataset.num_nodes)
] = dataset.edge_index
edge_feature_store[("n", "e", "n"), "rel", None] = dataset.edge_reltype.pin_memory()
meta["num_nodes"] = dataset.num_nodes
meta["num_rels"] = dataset.edge_reltype.max() + 1

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -111,11 +111,13 @@ def load_data(
feature_store = TensorDictFeatureStore() # empty fs required by PyG
edge_feature_store = WholeFeatureStore()

graph_store[("n", "e", "n"), "coo"] = torch.tensor_split(
data.edge_index.cuda(), world_size, dim=1
)[rank]
print("num nodes:", data.num_nodes)

graph_store[
("n", "e", "n"), "coo", False, (data.num_nodes, data.num_nodes)
] = torch.tensor_split(data.edge_index.cuda(), world_size, dim=1)[rank]

edge_feature_store[("n", "e", "n"), "rel"] = torch.tensor_split(
edge_feature_store[("n", "e", "n"), "rel", None] = torch.tensor_split(
data.edge_reltype.cuda(),
world_size,
)[rank]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -30,15 +30,15 @@ def test_tensordict_feature_store_basic_api():

other_features = torch.randint(1024, (10, 5))

feature_store["node", "feat0"] = node_features_0
feature_store["node", "feat1"] = node_features_1
feature_store["other", "feat"] = other_features
feature_store["node", "feat0", None] = node_features_0
feature_store["node", "feat1", None] = node_features_1
feature_store["other", "feat", None] = other_features

assert (feature_store["node"]["feat0"][:] == node_features_0).all()
assert (feature_store["node"]["feat1"][:] == node_features_1).all()
assert (feature_store["other"]["feat"][:] == other_features).all()

assert len(feature_store.get_all_tensor_attrs()) == 3

del feature_store["node", "feat0"]
del feature_store["node", "feat0", None]
assert len(feature_store.get_all_tensor_attrs()) == 2
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -46,22 +46,24 @@ def run_test_wholegraph_feature_store_basic_api(rank, world_size, dtype):
features = features.reshape((features.numel() // 100, 100)).to(torch_dtype)

tensordict_store = TensorDictFeatureStore()
tensordict_store["node", "fea"] = features
tensordict_store["node", "fea", None] = features

whole_store = WholeFeatureStore()
whole_store["node", "fea"] = torch.tensor_split(features, world_size)[rank]
whole_store["node", "fea", None] = torch.tensor_split(features, world_size)[rank]

ix = torch.arange(features.shape[0])
assert (
whole_store["node", "fea"][ix].cpu() == tensordict_store["node", "fea"][ix]
whole_store["node", "fea", None][ix].cpu()
== tensordict_store["node", "fea", None][ix]
).all()

label = torch.arange(0, features.shape[0]).reshape((features.shape[0], 1))
tensordict_store["node", "label"] = label
whole_store["node", "label"] = torch.tensor_split(label, world_size)[rank]
tensordict_store["node", "label", None] = label
whole_store["node", "label", None] = torch.tensor_split(label, world_size)[rank]

assert (
whole_store["node", "fea"][ix].cpu() == tensordict_store["node", "fea"][ix]
whole_store["node", "fea", None][ix].cpu()
== tensordict_store["node", "fea", None][ix]
).all()

pylibwholegraph.torch.initialize.finalize()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -30,8 +30,12 @@ def test_graph_store_basic_api():

ei = torch.stack([dst, src])

num_nodes = karate.number_of_nodes()

graph_store = GraphStore()
graph_store.put_edge_index(ei, ("person", "knows", "person"), "coo")
graph_store.put_edge_index(
ei, ("person", "knows", "person"), "coo", False, (num_nodes, num_nodes)
)

rei = graph_store.get_edge_index(("person", "knows", "person"), "coo")

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -30,8 +30,12 @@ def test_graph_store_basic_api_mg():

ei = torch.stack([dst, src])

num_nodes = karate.number_of_nodes()

graph_store = GraphStore(is_multi_gpu=True)
graph_store.put_edge_index(ei, ("person", "knows", "person"), "coo")
graph_store.put_edge_index(
ei, ("person", "knows", "person"), "coo", False, (num_nodes, num_nodes)
)

rei = graph_store.get_edge_index(("person", "knows", "person"), "coo")

Expand Down
Loading
Loading