-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdeep_rule_evaluation.py
248 lines (215 loc) · 9.89 KB
/
deep_rule_evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
import copy
import os
import json
from json import JSONDecodeError
import pytz
from os import path
from datetime import datetime
from networkx.readwrite import json_graph
from graph.deep_rule import DeepRule
from graph.groot_algo import deep_groot
from graph.rca_algo import deep_rca_rank
from util import log as logging
import conf.constants as const
logger = logging.getLogger(const.ROOT_API)
class DeepRuleEvaluation:
def __init__(self, catalog, dirname, weekINC='INC0181417'):
self.dirname = dirname
self.catalog = catalog
# all cases later than INC0181417 that have timestamps
# Only Print out the cases after weekINC
self.weekINC = weekINC
self.dataset_path = path.join(self.dirname, self.catalog, 'dataset')
self.report_folder = path.join(self.dirname, self.catalog, 'report')
self.not_found_list = ['INC0419163.json',
'INC0529818.json',
'INC0579093.json',
'INC0542291.json',
'INC0539966.json',
'INC0498854.json',
'INC0379871.json',
'INC0448093.json',
'INC0545653.json',
'INC0498021.json']
self.training_split = 0.5
self.rca_parameters = {}
self.dp = DeepRule(self.catalog, self.rca_parameters)
def train(self):
logger.info("Start to write report...")
"""
calculate response & write accuracy report
"""
# Training
training_data = []
for dir, _, files in os.walk(self.dataset_path):
sorted_files = sorted(files)
print(sorted_files)
for file in sorted_files[:int(0.1 * len(sorted_files))]:
if file == '.DS_Store':
continue
if file in self.not_found_list:
continue
filePath = path.join(dir, file)
print(file)
with open(filePath) as json_file:
current_case = json.load(json_file)
for key, data in current_case.items():
print(key)
if not (data['events'] and data['events']['poolEvents']):
continue
data = self.purify(data)
training_data.append(data)
model_stored_path = {
}
if self.catalog in model_stored_path:
self.dp.train(training_data, load_model_path=model_stored_path[self.catalog])
else:
self.dp.train(training_data, load_model_path=None)
def test(self, debug_files=None):
scores, rca, report_obj = {}, {}, {}
# Testing
for dir, _, files in os.walk(self.dataset_path):
if debug_files is not None:
testing_files = debug_files
else:
testing_files = files[int(self.training_split * len(files)):]
for file in testing_files:
if file == '.DS_Store':
continue
if file in self.not_found_list:
continue
filePath = path.join(dir, file)
# print(file)
with open(filePath) as json_file:
current_case = json.load(json_file)
for key, data in current_case.items():
# print(key)
report_obj[key], grades, length, preLength = {}, {}, 0, 0
if not (data['events'] and data['events']['poolEvents']):
continue
res, _ = self.get_res(data, file)
untied_res = {}
for n in res:
untied_res[n] = [res[n], 1]
res = untied_res
# sort res based on scores from high to low
res = {k: v for k, v in sorted(res.items(), key=lambda item: item[1], reverse=True)}
for n in res:
res[n] = tuple(res[n])
print(res)
report_obj[key]['fullResponse'] = res
# default value, which means rca result failed to hit any rank in groot result
report_obj[key]['rank'] = len(res)
rca[key] = [data['RCAEvent'] + ',' + data['RCAPool']]
print(rca[key])
# find the rank for rca result in groot result
for k, v in res.items():
if not v in grades:
length += list(res.values()).count(v)
# grade is a set, to explain tied rank
grades[v] = length
if k in rca[key]:
report_obj[key]['rank'], report_obj[key]['length'] = grades[v], length
print(file, report_obj[key]['rank'], report_obj[key]['length'])
break
# raise RuntimeError()
# report file name & wirte report
date_format = "%y-%m-%d_%H-%M-%S"
timeString = datetime.now().astimezone(pytz.timezone('US/Pacific')).strftime(date_format)
report_file = 'report_' + timeString + '_.json'
report_path = path.join(self.report_folder, report_file)
with open(report_path, 'w') as out_file:
json.dump(report_obj, out_file, indent=4)
rank1_list = []
rank23_list = []
failure_list = []
missing_list = []
for key, data in report_obj.items():
if 'length' in data:
if data['rank'] == 1 and data['length'] <= 5:
rank1_list.append(key)
elif data['rank'] <= 3 and data['length'] <= 5:
rank23_list.append(key)
else:
failure_list.append(key)
else:
missing_list.append(key)
print("RCA of {} is missing".format(key))
week_rank1 = [el for el in rank1_list if el >= self.weekINC]
week_rank23 = [el for el in rank23_list if el >= self.weekINC]
week_fail = [el for el in failure_list if el >= self.weekINC]
week_report = {k: v for k, v in report_obj.items() if k >= self.weekINC}
print('Report:')
print(json.dumps(week_report, indent=4))
print(f'For This Week {self.catalog}:')
print(f'Total: {len(week_rank1) + len(week_rank23) + len(week_fail) + len(missing_list)}')
print(f'Top1({len(week_rank1)}): {week_rank1}')
print(f'Rank2 - 3({len(week_rank23)}): {week_rank23}')
print(f'Rank > 3({len(week_fail)}): {week_fail}')
print(f'Missing cases({len(missing_list)}): {missing_list}')
def get_ranks(self):
report_path, reports_path = '', path.join(self.dirname, self.catalog, 'report')
for dir, _, reports in os.walk(reports_path):
report_path = path.join(dir, max(reports))
report = {}
with open(report_path) as json_obj:
try:
report_obj = json.load(json_obj)
for incident_num, contents in report_obj.items():
if incident_num.startswith('__'):
continue
report[incident_num] = (contents['rank'], contents['length'] if 'length' in contents else None)
return report, report_obj
except JSONDecodeError:
logger.info("Can't find accuracy report")
def purify(self, data):
for p in data['events']['poolEvents']:
for e in list(data['events']['poolEvents'][p].keys()):
if e == 'Badbox':
del_flag = True
for details in data['events']['poolEvents'][p][e]:
for d in data['events']['poolEvents'][p][e][details]:
if d['severity'] > 10:
del_flag = False
if del_flag:
del data['events']['poolEvents'][p][e]
for rc_event in ['adyen', 'paypal', 'afterpay']:
upper_rc_event = str.upper(rc_event[0]) + rc_event[1:]
# Merge ThirdParty into Issue
if data['RCAPool'] == rc_event:
# data['RCAEvent'] = 'Issue'
data['RCAEvent'] = f'Third Party {upper_rc_event} Error'
# Merge ThirdParty into Issue
if rc_event in data['events']['poolEvents']:
tmp_dict = {}
for k, v in data['events']['poolEvents'][rc_event].items():
tmp_dict.update(copy.deepcopy(v))
data['events']['poolEvents'][rc_event] = {}
data['events']['poolEvents'][rc_event][data['RCAEvent']] = tmp_dict
return data
# INC0568174
def get_res(self, data, data_name):
data = self.purify(data)
pool_events = data['events']['poolEvents']
domain_events = data['events']['domainEvents']
pool_list, subG_obj = data['subGraphPools'], data['subGraph']
for node in subG_obj['nodes']:
node['events'] = set(node['events'])
subG = json_graph.node_link_graph(subG_obj, True, True)
domain = None
if self.catalog == const.DOMAIN:
domain = data['domain']
else: # cs_md Case
pool_list = data[const.KEY_GENERAL].split(",")
def rca_algo(EG, **kwargs):
return deep_rca_rank(EG, self.dp, **self.rca_parameters,
data_name=data_name, **kwargs)
res, _, _ = deep_groot(pool_events, domain_events, subG, pool_list, domain, self.catalog, rca_algo)
return res, 0
if __name__ == "__main__":
# general / domain
dirname = os.path.abspath(__file__)
for var in [const.DOMAIN, const.GENERAL]:
f = DeepRuleEvaluation(var, dirname)
f.train()
f.test()