Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
yixinliu233 authored Nov 23, 2022
1 parent 63c864e commit b985a23
Show file tree
Hide file tree
Showing 18 changed files with 692 additions and 0 deletions.
126 changes: 126 additions & 0 deletions data_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import warnings
import torch
import scipy.sparse as sp
import numpy as np
import os
import torch_geometric.transforms as T
from torch_geometric.datasets import Planetoid, WikipediaNetwork, Actor, WebKB, Amazon, Coauthor, WikiCS
from torch_geometric.utils import remove_self_loops

warnings.simplefilter("ignore")


def get_split(num_samples: int, train_ratio: float = 0.1, test_ratio: float = 0.8, num_splits: int = 10):

assert train_ratio + test_ratio < 1
train_size = int(num_samples * train_ratio)
test_size = int(num_samples * test_ratio)

trains, vals, tests = [], [], []

for _ in range(num_splits):
indices = torch.randperm(num_samples)

train_mask = torch.zeros(num_samples, dtype=torch.bool)
train_mask.fill_(False)
train_mask[indices[:train_size]] = True

test_mask = torch.zeros(num_samples, dtype=torch.bool)
test_mask.fill_(False)
test_mask[indices[train_size: test_size + train_size]] = True

val_mask = torch.zeros(num_samples, dtype=torch.bool)
val_mask.fill_(False)
val_mask[indices[test_size + train_size:]] = True

trains.append(train_mask.unsqueeze(1))
vals.append(val_mask.unsqueeze(1))
tests.append(test_mask.unsqueeze(1))

train_mask_all = torch.cat(trains, 1)
val_mask_all = torch.cat(vals, 1)
test_mask_all = torch.cat(tests, 1)

return train_mask_all, val_mask_all, test_mask_all


def get_structural_encoding(edges, nnodes, str_enc_dim=16):

row = edges[0, :].numpy()
col = edges[1, :].numpy()
data = np.ones_like(row)

A = sp.csr_matrix((data, (row, col)), shape=(nnodes, nnodes))
D = (np.array(A.sum(1)).squeeze()) ** -1.0

Dinv = sp.diags(D)
RW = A * Dinv
M = RW

SE = [torch.from_numpy(M.diagonal()).float()]
M_power = M
for _ in range(str_enc_dim - 1):
M_power = M_power * M
SE.append(torch.from_numpy(M_power.diagonal()).float())
SE = torch.stack(SE, dim=-1)
return SE


def load_data(dataset_name):

path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '.', 'data', dataset_name)

if dataset_name in ['cora', 'citeseer', 'pubmed']:
dataset = Planetoid(path, dataset_name)
elif dataset_name in ['chameleon']:
dataset = WikipediaNetwork(path, dataset_name)
elif dataset_name in ['squirrel']:
dataset = WikipediaNetwork(path, dataset_name, transform=T.NormalizeFeatures())
elif dataset_name in ['actor']:
dataset = Actor(path)
elif dataset_name in ['cornell', 'texas', 'wisconsin']:
dataset = WebKB(path, dataset_name)
elif dataset_name in ['computers', 'photo']:
dataset = Amazon(path, dataset_name, transform=T.NormalizeFeatures())
elif dataset_name in ['cs', 'physics']:
dataset = Coauthor(path, dataset_name, transform=T.NormalizeFeatures())
elif dataset_name in ['wikics']:
dataset = WikiCS(path)

data = dataset[0]

edges = remove_self_loops(data.edge_index)[0]

features = data.x
[nnodes, nfeats] = features.shape
nclasses = torch.max(data.y).item() + 1

if dataset_name in ['computers', 'photo', 'cs', 'physics', 'wikics']:
train_mask, val_mask, test_mask = get_split(nnodes)
else:
train_mask, val_mask, test_mask = data.train_mask, data.val_mask, data.test_mask

if len(train_mask.shape) < 2:
train_mask = train_mask.unsqueeze(1)
val_mask = val_mask.unsqueeze(1)
test_mask = test_mask.unsqueeze(1)

labels = data.y

path = '../data/se/{}'.format(dataset_name)
if not os.path.exists(path):
os.makedirs(path)
file_name = path + '/{}_{}.pt'.format(dataset_name, 16)
if os.path.exists(file_name):
se = torch.load(file_name)
# print('Load exist structural encoding.')
else:
print('Computing structural encoding...')
se = get_structural_encoding(edges, nnodes)
torch.save(se, file_name)
print('Done. The structural encoding is saved as: {}.'.format(file_name))

return features, edges, se, train_mask, val_mask, test_mask, labels, nnodes, nfeats



161 changes: 161 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
import argparse
import numpy as np
import torch
import torch.nn.functional as F
import dgl
import random

from data_loader import load_data
from model import *
from utils import *

EOS = 1e-10


def setup_seed(seed):
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
np.random.seed(seed)
random.seed(seed)
dgl.seed(seed)
dgl.random.seed(seed)


def train_cl(cl_model, discriminator, optimizer_cl, features, str_encodings, edges):

cl_model.train()
discriminator.eval()

adj_1, adj_2, weights_lp, _ = discriminator(torch.cat((features, str_encodings), 1), edges)
features_1, adj_1, features_2, adj_2 = augmentation(features, adj_1, features, adj_2, args, cl_model.training)
cl_loss = cl_model(features_1, adj_1, features_2, adj_2)

optimizer_cl.zero_grad()
cl_loss.backward()
optimizer_cl.step()

return cl_loss.item()


def train_discriminator(cl_model, discriminator, optimizer_disc, features, str_encodings, edges, args):

cl_model.eval()
discriminator.train()

adj_1, adj_2, weights_lp, weights_hp = discriminator(torch.cat((features, str_encodings), 1), edges)
rand_np = generate_random_node_pairs(features.shape[0], edges.shape[1])
psu_label = torch.ones(edges.shape[1]).cuda()

embedding = cl_model.get_embedding(features, adj_1, adj_2)
edge_emb_sim = F.cosine_similarity(embedding[edges[0]], embedding[edges[1]])

rnp_emb_sim_lp = F.cosine_similarity(embedding[rand_np[0]], embedding[rand_np[1]])
loss_lp = F.margin_ranking_loss(edge_emb_sim, rnp_emb_sim_lp, psu_label, margin=args.margin_hom, reduction='none')
loss_lp *= torch.relu(weights_lp - 0.5)

rnp_emb_sim_hp = F.cosine_similarity(embedding[rand_np[0]], embedding[rand_np[1]])
loss_hp = F.margin_ranking_loss(rnp_emb_sim_hp, edge_emb_sim, psu_label, margin=args.margin_het, reduction='none')
loss_hp *= torch.relu(weights_hp - 0.5)

rank_loss = (loss_lp.mean() + loss_hp.mean()) / 2

optimizer_disc.zero_grad()
rank_loss.backward()
optimizer_disc.step()

return rank_loss.item()


def main(args):

setup_seed(0)
features, edges, str_encodings, train_mask, val_mask, test_mask, labels, nnodes, nfeats = load_data(args.dataset)
results = []

for trial in range(args.ntrials):

setup_seed(trial)

cl_model = GCL(nlayers=args.nlayers_enc, nlayers_proj=args.nlayers_proj, in_dim=nfeats, emb_dim=args.emb_dim,
proj_dim=args.proj_dim, dropout=args.dropout, sparse=args.sparse, batch_size=args.cl_batch_size).cuda()
cl_model.set_mask_knn(features.cpu(), k=args.k, dataset=args.dataset)
discriminator = Edge_Discriminator(nnodes, nfeats + str_encodings.shape[1], args.alpha, args.sparse).cuda()

optimizer_cl = torch.optim.Adam(cl_model.parameters(), lr=args.lr_gcl, weight_decay=args.w_decay)
optimizer_discriminator = torch.optim.Adam(discriminator.parameters(), lr=args.lr_disc, weight_decay=args.w_decay)

features = features.cuda()
str_encodings = str_encodings.cuda()
edges = edges.cuda()

best_acc_val = 0
best_acc_test = 0

for epoch in range(1, args.epochs + 1):

for _ in range(args.cl_rounds):
cl_loss = train_cl(cl_model, discriminator, optimizer_cl, features, str_encodings, edges)
rank_loss = train_discriminator(cl_model, discriminator, optimizer_discriminator, features, str_encodings, edges, args)

print("[TRAIN] Epoch:{:04d} | CL Loss {:.4f} | RANK loss:{:.4f} ".format(epoch, cl_loss, rank_loss))

if epoch % args.eval_freq == 0:
cl_model.eval()
discriminator.eval()
adj_1, adj_2, _, _ = discriminator(torch.cat((features, str_encodings), 1), edges)
embedding = cl_model.get_embedding(features, adj_1, adj_2)
cur_split = 0 if (train_mask.shape[1]==1) else (trial % train_mask.shape[1])
acc_test, acc_val = eval_test_mode(embedding, labels, train_mask[:, cur_split],
val_mask[:, cur_split], test_mask[:, cur_split])
print(
'[TEST] Epoch:{:04d} | CL loss:{:.4f} | RANK loss:{:.4f} | VAL ACC:{:.2f} | TEST ACC:{:.2f}'.format(
epoch, cl_loss, rank_loss, acc_val, acc_test))

if acc_val > best_acc_val:
best_acc_val = acc_val
best_acc_test = acc_test

results.append(best_acc_test)

print('\n[FINAL RESULT] Dataset:{} | Run:{} | ACC:{:.2f}+-{:.2f}'.format(args.dataset, args.ntrials, np.mean(results),
np.std(results)))

if __name__ == '__main__':
parser = argparse.ArgumentParser()

# ESSENTIAL
parser.add_argument('-dataset', type=str, default='cornell',
choices=['cora', 'citeseer', 'pubmed', 'chameleon', 'squirrel', 'actor', 'cornell',
'texas', 'wisconsin', 'computers', 'photo', 'cs', 'physics', 'wikics'])
parser.add_argument('-ntrials', type=int, default=10)
parser.add_argument('-sparse', type=int, default=0)
parser.add_argument('-eval_freq', type=int, default=20)
parser.add_argument('-epochs', type=int, default=400)
parser.add_argument('-lr_gcl', type=float, default=0.001)
parser.add_argument('-lr_disc', type=float, default=0.001)
parser.add_argument('-cl_rounds', type=int, default=2)
parser.add_argument('-w_decay', type=float, default=0.0)
parser.add_argument('-dropout', type=float, default=0.5)

# DISC Module - Hyper-param
parser.add_argument('-alpha', type=float, default=0.1)
parser.add_argument('-margin_hom', type=float, default=0.5)
parser.add_argument('-margin_het', type=float, default=0.5)

# GRL Module - Hyper-param
parser.add_argument('-nlayers_enc', type=int, default=2)
parser.add_argument('-nlayers_proj', type=int, default=1, choices=[1, 2])
parser.add_argument('-emb_dim', type=int, default=128)
parser.add_argument('-proj_dim', type=int, default=128)
parser.add_argument('-cl_batch_size', type=int, default=0)
parser.add_argument('-k', type=int, default=20)
parser.add_argument('-maskfeat_rate_1', type=float, default=0.1)
parser.add_argument('-maskfeat_rate_2', type=float, default=0.5)
parser.add_argument('-dropedge_rate_1', type=float, default=0.5)
parser.add_argument('-dropedge_rate_2', type=float, default=0.1)

args = parser.parse_args()

print(args)
main(args)
Loading

0 comments on commit b985a23

Please sign in to comment.