-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcomparison.py
92 lines (82 loc) · 3.36 KB
/
comparison.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import glob
import pandas as pd
import numpy as np
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--classifier', default="svm", type=str, nargs='?', help='classifier')
parser.add_argument('--optimize', default="acc", type=str, nargs='?', help='measure to optimize')
args = parser.parse_args()
classifier = args.classifier
measure_opt = args.optimize
if measure_opt == "acc":
measure_name = "Accuracy"
if measure_opt == "auc":
measure_name = "AUC"
datasets = ['german', 'bank', 'twins', 'compas', 'adult']
methods = ['Standard', 'Counterfactual', 'CCRAL']
NUM_FOLD = 5
if "Standard" in methods:
# join standard classifier results
all_files = []
for dataset in datasets:
files = glob.glob('result/std_{}_{}*.csv'.format(classifier, dataset))
all_files = np.append(all_files, files)
# remove "1.csv" in file names
groups = set([e[:-5] for e in all_files])
for group in groups:
subs = [e for e in all_files if e.startswith(group)]
subs.sort()
ls = [pd.read_csv(e) for e in subs]
if len(ls) == NUM_FOLD:
df = pd.concat(ls)
df.to_csv(group[:-4] + '_ALL.csv', index=None)
if "Counterfactual" in methods:
# join counterfactual results
all_files = []
for dataset in datasets:
files = glob.glob('result/cf_{}_{}*.csv'.format(classifier, dataset))
all_files = np.append(all_files, files)
# remove "1.csv" in file names
groups = set([e[:-5] for e in all_files])
for group in groups:
subs = [e for e in all_files if e.startswith(group)]
subs.sort()
ls = [pd.read_csv(e) for e in subs]
if len(ls) == NUM_FOLD:
df = pd.concat(ls)
df.to_csv(group[:-4] + '_ALL.csv', index=None)
if "CCRAL" in methods:
# join ccral results
all_files = []
for dataset in datasets:
files = glob.glob('result/ccral_{}_{}*.csv'.format(classifier, dataset))
all_files = np.append(all_files, files)
# remove "1.csv" in file names
groups = set([e[:-5] for e in all_files])
for group in groups:
subs = [e for e in all_files if e.startswith(group)]
subs.sort()
ls = [pd.read_csv(e) for e in subs]
if len(ls) == NUM_FOLD:
df = pd.concat(ls)
df.to_csv(group[:-4] + '_ALL.csv', index=None)
# summarize performance of each method
with open('result/_result_{}_{}.csv'.format(classifier, measure_opt), 'w') as f:
f.write('Dataset,Method,{}\n'.format(measure_name))
for dataset in datasets:
for method in methods:
if method == "Standard":
result = pd.read_csv('result/std_{}_{}_ALL.csv'.format(classifier, dataset))
result = result.mean()*100
if method == "Counterfactual":
result = pd.read_csv('result/cf_{}_{}_opt_{}_ALL.csv'.format(classifier, dataset, measure_opt))
result = result.mean() * 100
if method == "CCRAL":
result = pd.read_csv('result/ccral_{}_{}_opt_{}_ALL.csv'.format(classifier, dataset, measure_opt))
result = result.mean()*100
performance = round(result[measure_name], 2)
ls = [dataset, method, performance]
st = ','.join(map(str, ls)) + '\n'
f.write(st)
df = pd.read_csv('result/_result_{}_{}.csv'.format(classifier, measure_opt))
print(df)