Skip to content

Commit b6493c0

Browse files
authored
Add files via upload
0 parents  commit b6493c0

File tree

4 files changed

+306
-0
lines changed

4 files changed

+306
-0
lines changed

benchmark.py

+97
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
import argparse
2+
import time
3+
from utils import *
4+
import pandas
5+
import os
6+
import warnings
7+
warnings.filterwarnings("ignore")
8+
seed_list = list(range(3407, 10000, 10))
9+
10+
def set_seed(seed=3407):
11+
os.environ['PYTHONHASHSEED'] = str(seed)
12+
random.seed(seed)
13+
np.random.seed(seed)
14+
torch.manual_seed(seed)
15+
if torch.cuda.is_available():
16+
torch.cuda.manual_seed(seed)
17+
torch.cuda.manual_seed_all(seed)
18+
torch.backends.cudnn.deterministic = True
19+
20+
21+
parser = argparse.ArgumentParser()
22+
parser.add_argument('--trials', type=int, default=1)
23+
parser.add_argument('--semi_supervised', type=int, default=0)
24+
parser.add_argument('--inductive', type=int, default=0)
25+
parser.add_argument('--models', type=str, default=None)
26+
parser.add_argument('--datasets', type=str, default=None)
27+
args = parser.parse_args()
28+
29+
columns = ['name']
30+
new_row = {}
31+
datasets = ['weibo','toloker']
32+
models = model_detector_dict.keys()
33+
34+
35+
if args.datasets is not None:
36+
if '-' in args.datasets:
37+
st, ed = args.datasets.split('-')
38+
datasets = datasets[int(st):int(ed)+1]
39+
else:
40+
datasets = [datasets[int(t)] for t in args.datasets.split(',')]
41+
print('Evaluated Datasets: ', datasets)
42+
43+
if args.models is not None:
44+
models = args.models.split('-')
45+
print('Evaluated Baselines: ', models)
46+
47+
for dataset in datasets:
48+
for metric in ['AUROC mean', 'AUROC std', 'AUPRC mean', 'AUPRC std',
49+
'RecK mean', 'RecK std', 'Time']:
50+
columns.append(dataset+'-'+metric)
51+
52+
results = pandas.DataFrame(columns=columns)
53+
file_id = None
54+
for model in models:
55+
model_result = {'name': model}
56+
for dataset_name in datasets:
57+
58+
time_cost = 0
59+
train_config = {
60+
'device': 'cuda:0',
61+
'epochs': 200,
62+
'patience': 50,
63+
'metric': 'AUPRC',
64+
'inductive': bool(args.inductive)
65+
}
66+
data = Dataset(dataset_name)
67+
model_config = {'model': model, 'lr': 0.01, 'drop_rate': 0}
68+
69+
auc_list, pre_list, rec_list = [], [], []
70+
for t in range(args.trials):
71+
torch.cuda.empty_cache()
72+
print("Dataset {}, Model {}, Trial {}".format(dataset_name, model, t))
73+
data.split(trial_id=1)
74+
seed = seed_list[t]
75+
set_seed(seed)
76+
train_config['seed'] = seed
77+
detector = model_detector_dict[model](train_config, model_config, data)
78+
st = time.time()
79+
80+
test_score = detector.train()
81+
auc_list.append(test_score['AUROC']), pre_list.append(test_score['AUPRC']), rec_list.append(test_score['RecK'])
82+
ed = time.time()
83+
time_cost += ed - st
84+
del detector, data
85+
86+
model_result[dataset_name+'-AUROC mean'] = np.mean(auc_list)
87+
model_result[dataset_name+'-AUROC std'] = np.std(auc_list)
88+
model_result[dataset_name+'-AUPRC mean'] = np.mean(pre_list)
89+
model_result[dataset_name+'-AUPRC std'] = np.std(pre_list)
90+
model_result[dataset_name+'-RecK mean'] = np.mean(rec_list)
91+
model_result[dataset_name+'-RecK std'] = np.std(rec_list)
92+
model_result[dataset_name+'-Time'] = time_cost/args.trials
93+
94+
model_result = pandas.DataFrame(model_result, index=[0])
95+
results = pandas.concat([results, model_result])
96+
file_id = save_results(results, file_id)
97+
print(results)

readme.md

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
## MGADN
2+
3+
This project implements the paper "Heterophily Learning and Global-local Dependencies Enhanced Multi-view Representation Learning for Graph Anomaly Detection" submitted to Knowledge-Based Systems.
4+
5+
6+
## Model Usage
7+
8+
### Dependencies
9+
10+
This project is tested on cuda 11.6 with several dependencies listed below:
11+
12+
```markdown
13+
pytorch=1.11.0
14+
torch-geometric=2.0.4
15+
```
16+
17+
18+
### Dataset
19+
20+
Public datasets weibo and toloker used for graph anomaly detection are available for evaluation.
21+
### Usage
22+
```
23+
python benchamrk.py --datasets 0/1
24+
```

transformer.py

+145
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
import torch
2+
from torch import nn
3+
4+
5+
# class Decoder(nn.Module):
6+
# def __init__(self, in_feats,h_feats,n_head,dropout_rate,n_layers):
7+
# super().__init__()
8+
9+
# self.layers = nn.ModuleList([DecoderLayer(in_feats=in_feats,h_feats=h_feats,n_head=n_head,
10+
# dropout_rate=0.)for _ in range(n_layers)])
11+
12+
# self.act_fn = nn.ReLU()
13+
# self.lin = nn.Linear(h_feats,in_feats)
14+
# self.mlp = nn.Sequential(nn.Linear(in_feats,h_feats) )
15+
16+
# def forward(self, x, edge_index):
17+
# _x = x
18+
# for layer in self.layers:
19+
# x = layer(x, edge_index)
20+
# x = x + _x
21+
# # x = self.lin(x)
22+
# output = self.mlp(x)
23+
# return output
24+
25+
class Decoder(nn.Module):
26+
def __init__(self, in_feats,h_feats,n_head,dropout_rate,n_layers):
27+
super().__init__()
28+
29+
self.layers = nn.ModuleList([DecoderLayer(in_feats=in_feats,h_feats=h_feats,n_head=n_head,
30+
dropout_rate=0.)for _ in range(n_layers)])
31+
self.act_fn = nn.ReLU()
32+
self.mlp = nn.Sequential(nn.Linear(in_feats,h_feats) )
33+
34+
def forward(self, x, edge_index):
35+
_x = x
36+
for layer in self.layers:
37+
x = layer(x, edge_index)
38+
x = x + _x
39+
40+
output = self.mlp(x)
41+
return output
42+
43+
class DecoderLayer(nn.Module):
44+
45+
def __init__(self, in_feats, h_feats, n_head, dropout_rate):
46+
super(DecoderLayer, self).__init__()
47+
self.self_attention = MultiHeadAttention(in_channels=in_feats, hid_channels=h_feats, n_head=n_head)
48+
self.linear = nn.Linear(in_feats, h_feats)
49+
self.norm1 = LayerNorm(hid_channels= h_feats)
50+
self.dropout1 = nn.Dropout(p=dropout_rate)
51+
self.norm3 = LayerNorm(hid_channels= h_feats)
52+
self.linear1 = nn.Linear(h_feats, in_feats)
53+
54+
def forward(self, x, edge_index):
55+
56+
_x = x
57+
x = self.self_attention(q=x, k=x, v=x)
58+
x = self.dropout1(x)
59+
_x = self.linear(_x)
60+
61+
x = self.norm1(x + _x)
62+
63+
x = self.linear1(x)
64+
return x
65+
66+
class LayerNorm(nn.Module):
67+
def __init__(self, hid_channels, eps=1e-12):
68+
super(LayerNorm, self).__init__()
69+
self.gamma = nn.Parameter(torch.ones(hid_channels))
70+
self.beta = nn.Parameter(torch.zeros(hid_channels))
71+
self.eps = eps
72+
73+
def forward(self, x):
74+
mean = x.mean(-1, keepdim=True)
75+
var = x.var(-1, unbiased=False, keepdim=True)
76+
77+
78+
out = (x - mean) / torch.sqrt(var + self.eps)
79+
out = self.gamma * out + self.beta
80+
return out
81+
82+
class MultiHeadAttention(nn.Module):
83+
84+
def __init__(self, in_channels, hid_channels,n_head):
85+
super(MultiHeadAttention, self).__init__()
86+
self.n_head = n_head
87+
self.attention = ScaleDotProductAttention()
88+
self.w_q = nn.Linear(in_channels, hid_channels)
89+
self.w_k = nn.Linear(in_channels, hid_channels)
90+
self.w_v = nn.Linear(in_channels, hid_channels)
91+
self.w_concat = nn.Linear(hid_channels, hid_channels)
92+
93+
def forward(self, q, k, v, mask=None):
94+
q, k, v = self.w_q(q), self.w_k(k), self.w_v(v)
95+
q, k, v = self.split(q), self.split(k), self.split(v)
96+
out, attention = self.attention(q, k, v, mask=mask)
97+
out = self.concat(out)
98+
out = self.w_concat(out)
99+
return out
100+
101+
def split(self, tensor):
102+
length, d_model = tensor.size()
103+
d_tensor = d_model // self.n_head
104+
tensor = tensor.view(length, self.n_head, d_tensor).transpose(1, 2)
105+
return tensor
106+
107+
def concat(self, tensor):
108+
length , head, d_tensor = tensor.size()
109+
d_model = head * d_tensor
110+
tensor = tensor.transpose(1, 2).contiguous().view(length, d_model)
111+
return tensor
112+
113+
class PositionwiseFeedForward(nn.Module):
114+
115+
def __init__(self, in_channels, hid_channels, drop_prob=0.1):
116+
super(PositionwiseFeedForward, self).__init__()
117+
self.linear1 = nn.Linear(hid_channels, hid_channels)
118+
self.linear2 = nn.Linear(hid_channels, in_channels)
119+
self.relu = nn.ReLU()
120+
self.dropout = nn.Dropout(p=drop_prob)
121+
122+
def forward(self, x):
123+
x = self.linear1(x)
124+
x = self.relu(x)
125+
x = self.dropout(x)
126+
x = self.linear2(x)
127+
return x
128+
129+
130+
import math
131+
from torch import nn
132+
133+
class ScaleDotProductAttention(nn.Module):
134+
def __init__(self):
135+
super(ScaleDotProductAttention, self).__init__()
136+
self.softmax = nn.Softmax(dim=-1)
137+
138+
def forward(self, q, k, v, mask=None, e=1e-12):
139+
head, length, d_tensor = k.size()
140+
k_t = k.transpose(1, 2)
141+
score = (q @ k_t) / math.sqrt(d_tensor)
142+
score = self.softmax(score)
143+
v = score @ v
144+
145+
return v, score

utils.py

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import random
2+
from models.detector import *
3+
from dgl.data.utils import load_graphs
4+
import os
5+
6+
class Dataset:
7+
def __init__(self, name='reddit', prefix='/home/hdou/model/GADBench-master/datasets/'):
8+
graph = load_graphs(prefix + name)[0][0]
9+
self.name = name
10+
self.graph = graph
11+
12+
def split(self, trial_id=0):
13+
self.graph.ndata['train_mask'] = self.graph.ndata['train_masks'][:,trial_id]
14+
self.graph.ndata['val_mask'] = self.graph.ndata['val_masks'][:,trial_id]
15+
self.graph.ndata['test_mask'] = self.graph.ndata['test_masks'][:,trial_id]
16+
17+
18+
model_detector_dict = {
19+
'MGADN': BaseGNNDetector,
20+
# 'GCN': BaseGNNDetector,
21+
# 'GraphSAGE': BaseGNNDetector,
22+
# 'GAT': BaseGNNDetector,
23+
# 'GAS': GASDetector,
24+
# 'PCGNN': PCGNNDetector,
25+
# 'AMNet': BaseGNNDetector,
26+
# 'BWGNN': BaseGNNDetector,
27+
# 'GHRN': GHRNDetector,
28+
}
29+
30+
def save_results(results, file_id):
31+
if not os.path.exists('results/'):
32+
os.mkdir('results/')
33+
if file_id is None:
34+
file_id = 0
35+
while os.path.exists('results/{}.xlsx'.format(file_id)):
36+
file_id += 1
37+
results.transpose().to_excel('results/{}.xlsx'.format(file_id))
38+
print('save to file ID: {}'.format(file_id))
39+
return file_id
40+

0 commit comments

Comments
 (0)