Add files via upload

Djerry-h · web-flow · commit b6493c0a3bda · 2024-11-21T12:15:01.000+08:00
diff --git a/benchmark.py b/benchmark.py
@@ -0,0 +1,97 @@
+import argparse
+import time
+from utils import *
+import pandas
+import os
+import warnings
+warnings.filterwarnings("ignore")
+seed_list = list(range(3407, 10000, 10))
+
+def set_seed(seed=3407):
+    os.environ['PYTHONHASHSEED'] = str(seed)
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed(seed)
+        torch.cuda.manual_seed_all(seed)
+        torch.backends.cudnn.deterministic = True
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--trials', type=int, default=1)
+parser.add_argument('--semi_supervised', type=int, default=0)
+parser.add_argument('--inductive', type=int, default=0)
+parser.add_argument('--models', type=str, default=None)
+parser.add_argument('--datasets', type=str, default=None)
+args = parser.parse_args()
+
+columns = ['name']
+new_row = {}
+datasets = ['weibo','toloker']
+models = model_detector_dict.keys()
+
+
+if args.datasets is not None:
+    if '-' in args.datasets:
+        st, ed = args.datasets.split('-')
+        datasets = datasets[int(st):int(ed)+1]
+    else:
+        datasets = [datasets[int(t)] for t in args.datasets.split(',')]
+    print('Evaluated Datasets: ', datasets)
+
+if args.models is not None:
+    models = args.models.split('-')
+    print('Evaluated Baselines: ', models)
+
+for dataset in datasets:
+    for metric in ['AUROC mean', 'AUROC std', 'AUPRC mean', 'AUPRC std',
+                   'RecK mean', 'RecK std', 'Time']:
+        columns.append(dataset+'-'+metric)
+
+results = pandas.DataFrame(columns=columns)
+file_id = None
+for model in models:
+    model_result = {'name': model}
+    for dataset_name in datasets:
+
+        time_cost = 0
+        train_config = {
+            'device': 'cuda:0',
+            'epochs': 200,
+            'patience': 50,
+            'metric': 'AUPRC',
+            'inductive': bool(args.inductive)
+        }
+        data = Dataset(dataset_name)
+        model_config = {'model': model, 'lr': 0.01, 'drop_rate': 0}
+
+        auc_list, pre_list, rec_list = [], [], []
+        for t in range(args.trials):
+            torch.cuda.empty_cache()
+            print("Dataset {}, Model {}, Trial {}".format(dataset_name, model, t))
+            data.split(trial_id=1)
+            seed = seed_list[t]
+            set_seed(seed)
+            train_config['seed'] = seed
+            detector = model_detector_dict[model](train_config, model_config, data)
+            st = time.time()
+
+            test_score = detector.train()
+            auc_list.append(test_score['AUROC']), pre_list.append(test_score['AUPRC']), rec_list.append(test_score['RecK'])
+            ed = time.time()
+            time_cost += ed - st
+        del detector, data
+
+        model_result[dataset_name+'-AUROC mean'] = np.mean(auc_list)
+        model_result[dataset_name+'-AUROC std'] = np.std(auc_list)
+        model_result[dataset_name+'-AUPRC mean'] = np.mean(pre_list)
+        model_result[dataset_name+'-AUPRC std'] = np.std(pre_list)
+        model_result[dataset_name+'-RecK mean'] = np.mean(rec_list)
+        model_result[dataset_name+'-RecK std'] = np.std(rec_list)
+        model_result[dataset_name+'-Time'] = time_cost/args.trials
+        
+    model_result = pandas.DataFrame(model_result, index=[0])
+    results = pandas.concat([results, model_result])
+    file_id = save_results(results, file_id)
+    print(results)
diff --git a/readme.md b/readme.md
@@ -0,0 +1,24 @@
+## MGADN
+
+This project implements the paper "Heterophily Learning and Global-local Dependencies Enhanced Multi-view Representation Learning for Graph Anomaly Detection" submitted to Knowledge-Based Systems.
+
+
+## Model Usage
+
+### Dependencies 
+
+This project is tested on cuda 11.6 with several dependencies listed below:
+
+```markdown
+pytorch=1.11.0
+torch-geometric=2.0.4
+```
+
+
+### Dataset 
+
+Public datasets weibo and toloker used for graph anomaly detection are available for evaluation. 
+### Usage
+```
+python benchamrk.py --datasets 0/1
+```
diff --git a/transformer.py b/transformer.py
@@ -0,0 +1,145 @@
+import torch
+from torch import nn
+
+
+# class Decoder(nn.Module):
+#     def __init__(self, in_feats,h_feats,n_head,dropout_rate,n_layers):
+#         super().__init__()
+
+#         self.layers = nn.ModuleList([DecoderLayer(in_feats=in_feats,h_feats=h_feats,n_head=n_head,
+#                                                   dropout_rate=0.)for _ in range(n_layers)])
+
+#         self.act_fn = nn.ReLU()
+#         self.lin = nn.Linear(h_feats,in_feats)
+#         self.mlp = nn.Sequential(nn.Linear(in_feats,h_feats) )
+
+#     def forward(self, x, edge_index):
+#         _x = x
+#         for layer in self.layers:
+#             x = layer(x, edge_index)
+#             x = x + _x
+#             # x = self.lin(x)
+#         output = self.mlp(x)
+#         return output
+
+class Decoder(nn.Module):
+    def __init__(self, in_feats,h_feats,n_head,dropout_rate,n_layers):
+        super().__init__()
+
+        self.layers = nn.ModuleList([DecoderLayer(in_feats=in_feats,h_feats=h_feats,n_head=n_head,
+                                                  dropout_rate=0.)for _ in range(n_layers)])
+        self.act_fn = nn.ReLU()
+        self.mlp = nn.Sequential(nn.Linear(in_feats,h_feats) )
+
+    def forward(self, x, edge_index):
+        _x = x
+        for layer in self.layers:
+            x = layer(x, edge_index)
+            x = x + _x
+          
+        output = self.mlp(x)
+        return output
+
+class DecoderLayer(nn.Module):
+
+    def __init__(self, in_feats, h_feats, n_head, dropout_rate):
+        super(DecoderLayer, self).__init__()
+        self.self_attention = MultiHeadAttention(in_channels=in_feats, hid_channels=h_feats, n_head=n_head)
+        self.linear = nn.Linear(in_feats, h_feats)
+        self.norm1 = LayerNorm(hid_channels= h_feats)
+        self.dropout1 = nn.Dropout(p=dropout_rate)
+        self.norm3 = LayerNorm(hid_channels= h_feats)
+        self.linear1 = nn.Linear(h_feats, in_feats)
+
+    def forward(self, x, edge_index):
+    
+        _x = x
+        x = self.self_attention(q=x, k=x, v=x)
+        x = self.dropout1(x)
+        _x = self.linear(_x)
+      
+        x = self.norm1(x + _x)
+      
+        x = self.linear1(x)
+        return x
+
+class LayerNorm(nn.Module):
+    def __init__(self, hid_channels, eps=1e-12):
+        super(LayerNorm, self).__init__()
+        self.gamma = nn.Parameter(torch.ones(hid_channels))
+        self.beta = nn.Parameter(torch.zeros(hid_channels))
+        self.eps = eps
+
+    def forward(self, x):
+        mean = x.mean(-1, keepdim=True)
+        var = x.var(-1, unbiased=False, keepdim=True)
+   
+
+        out = (x - mean) / torch.sqrt(var + self.eps)
+        out = self.gamma * out + self.beta
+        return out
+
+class MultiHeadAttention(nn.Module):
+
+    def __init__(self, in_channels, hid_channels,n_head):
+        super(MultiHeadAttention, self).__init__()
+        self.n_head = n_head
+        self.attention = ScaleDotProductAttention()
+        self.w_q = nn.Linear(in_channels, hid_channels)
+        self.w_k = nn.Linear(in_channels, hid_channels)
+        self.w_v = nn.Linear(in_channels, hid_channels)
+        self.w_concat = nn.Linear(hid_channels, hid_channels)
+
+    def forward(self, q, k, v, mask=None):
+        q, k, v = self.w_q(q), self.w_k(k), self.w_v(v)
+        q, k, v = self.split(q), self.split(k), self.split(v)
+        out, attention = self.attention(q, k, v, mask=mask)
+        out = self.concat(out)
+        out = self.w_concat(out)
+        return out
+
+    def split(self, tensor):
+        length, d_model = tensor.size()
+        d_tensor = d_model // self.n_head
+        tensor = tensor.view(length, self.n_head, d_tensor).transpose(1, 2)
+        return tensor
+
+    def concat(self, tensor):
+        length , head, d_tensor = tensor.size()
+        d_model = head * d_tensor
+        tensor = tensor.transpose(1, 2).contiguous().view(length, d_model)
+        return tensor
+
+class PositionwiseFeedForward(nn.Module):
+
+    def __init__(self, in_channels, hid_channels, drop_prob=0.1):
+        super(PositionwiseFeedForward, self).__init__()
+        self.linear1 = nn.Linear(hid_channels, hid_channels)
+        self.linear2 = nn.Linear(hid_channels, in_channels)
+        self.relu = nn.ReLU()
+        self.dropout = nn.Dropout(p=drop_prob)
+
+    def forward(self, x):
+        x = self.linear1(x)
+        x = self.relu(x)
+        x = self.dropout(x)
+        x = self.linear2(x)
+        return x
+
+
+import math
+from torch import nn
+
+class ScaleDotProductAttention(nn.Module):
+    def __init__(self):
+        super(ScaleDotProductAttention, self).__init__()
+        self.softmax = nn.Softmax(dim=-1)
+
+    def forward(self, q, k, v, mask=None, e=1e-12):
+        head, length, d_tensor = k.size()
+        k_t = k.transpose(1, 2)  
+        score = (q @ k_t) / math.sqrt(d_tensor)  
+        score = self.softmax(score)
+        v = score @ v
+
+        return v, score
diff --git a/utils.py b/utils.py
@@ -0,0 +1,40 @@
+import random
+from models.detector import *
+from dgl.data.utils import load_graphs
+import os
+
+class Dataset:
+    def __init__(self, name='reddit', prefix='/home/hdou/model/GADBench-master/datasets/'):
+        graph = load_graphs(prefix + name)[0][0]
+        self.name = name
+        self.graph = graph
+
+    def split(self, trial_id=0):
+        self.graph.ndata['train_mask'] = self.graph.ndata['train_masks'][:,trial_id]
+        self.graph.ndata['val_mask'] = self.graph.ndata['val_masks'][:,trial_id]
+        self.graph.ndata['test_mask'] = self.graph.ndata['test_masks'][:,trial_id]
+      
+
+model_detector_dict = {
+    'MGADN': BaseGNNDetector,
+    # 'GCN': BaseGNNDetector,
+    # 'GraphSAGE': BaseGNNDetector,
+    # 'GAT': BaseGNNDetector,
+    # 'GAS': GASDetector,
+    # 'PCGNN': PCGNNDetector,
+    # 'AMNet': BaseGNNDetector,
+    # 'BWGNN': BaseGNNDetector,
+    # 'GHRN': GHRNDetector,
+}
+
+def save_results(results, file_id):
+    if not os.path.exists('results/'):
+        os.mkdir('results/')
+    if file_id is None:
+        file_id = 0
+        while os.path.exists('results/{}.xlsx'.format(file_id)):
+            file_id += 1
+    results.transpose().to_excel('results/{}.xlsx'.format(file_id))
+    print('save to file ID: {}'.format(file_id))
+    return file_id
+