Skip to content

Commit

Permalink
KPIRoot
Browse files Browse the repository at this point in the history
  • Loading branch information
WenweiGu authored Oct 30, 2024
0 parents commit e255581
Show file tree
Hide file tree
Showing 2 changed files with 169 additions and 0 deletions.
96 changes: 96 additions & 0 deletions KPIRoot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import pandas as pd
import os
import statsmodels.tools.sm_exceptions
from preprocess import Align
from sklearn.preprocessing import MinMaxScaler
from features import RFeatures, SAXFeatures
from sklearn.metrics import jaccard_score
from statsmodels.tsa.stattools import grangercausalitytests
from sklearn.metrics import f1_score
import time

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

dataset_num = '1'
data_dir = f'./dataset{dataset_num}/'
instance_names = []
for root, dirs, files in os.walk(data_dir):
for directory in dirs:
instance_names.append(directory)

for instance_name in instance_names:
start = time.time()
if instance_name == 'label':
break
data_dir = f'./dataset{dataset_num}/{instance_name}'
label_dir = f'./dataset{dataset_num}/label/{instance_name}_label.csv'
label = pd.read_csv(f'{label_dir}')
correlated = list(label['names'])
k = 10
n = label['labels'][label['labels'] == 1].count()
correlation_scores = dict()

# 增加
for kpi in correlated:
alarm_kpi = pd.read_csv(f'{data_dir}/origin_data.csv')
correlation_kpi = pd.read_csv(f'{data_dir}/{kpi}')
aligned = Align().realign_online(alarm_kpi, correlation_kpi)
R_interval = RFeatures(5, 1.5, 0).get_rSegments(aligned.iloc[:, 0])

similarity_score = 0
causality_score = 0

for interval in R_interval:
alarm_kpi = aligned.iloc[:, 0][interval[0]: interval[1]] # 定位区间
correlation_kpi = aligned.iloc[:, 1][interval[0]: interval[1]]

sax_bin = 26
sax_alarm = SAXFeatures(sax_bin).sax_transform(alarm_kpi).flatten()
sax_kpi = SAXFeatures(sax_bin).sax_transform(correlation_kpi).flatten()

# encoding
sax_alarm = [ord(ele) - ord('a') for ele in sax_alarm]
sax_kpi = [ord(ele) - ord('a') for ele in sax_kpi]

similarity_score += jaccard_score(sax_alarm, sax_kpi, average='weighted')

granger_data = pd.concat([alarm_kpi, correlation_kpi], axis=1)
granger_normalize = MinMaxScaler().fit_transform(granger_data)

try:
granger = grangercausalitytests(pd.DataFrame(granger_data), maxlag=1, verbose=False)
p = granger[1][0]['lrtest'][0]
causality_score += p
except statsmodels.tools.sm_exceptions.InfeasibleTestError:
continue

if dataset_num != '3':
similarity_score /= len(R_interval)
causality_score /= len(R_interval)

correlation_score = 1 * similarity_score + 0.1 * causality_score
correlation_scores[kpi] = correlation_score

end = time.time()
print(end-start)

threshold = sorted(correlation_scores.values())[-k]
threshold_f1 = sorted(correlation_scores.values())[-n]

predict = pd.DataFrame(columns=['names', 'predicts', 'predicts_f1'])
predict['names'], predict['predicts'], predict['predicts_f1'] = \
correlation_scores.keys(), correlation_scores.values(), correlation_scores.values()
predict['predicts'] = predict['predicts'] >= threshold
predict['predicts'] = predict['predicts'].astype('int')
predict['predicts_f1'] = predict['predicts_f1'] >= threshold_f1
predict['predicts_f1'] = predict['predicts_f1'].astype('int')

result = pd.merge(label, predict, on='names')
f1 = f1_score(result['predicts_f1'], result['labels'])
# print(instance_name)
# print(result)

# hit rate直接算
hit = result[(result['predicts'] == 1) & (result['labels']) == 1].shape[0] / n
print(f1, hit)
73 changes: 73 additions & 0 deletions features.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import numpy as np
import math
from pyts.approximation import SymbolicAggregateApproximation
from scipy.stats import zscore


class RFeatures(object):
def __init__(self, rspace_win, rspace_upper_bound, rspace_lower_bound, rspace_max_thresh=100):
self.rspace_win = rspace_win
self.rspace_max_thresh = rspace_max_thresh
self.rspace_upper_bound = rspace_upper_bound
self.rspace_lower_bound = rspace_lower_bound

def r_detect(self, data: list) -> list:
r_data = []
for i in range(self.rspace_win, len(data) - self.rspace_win):
current_window_mean = np.mean(data[i: i + self.rspace_win])
before_window_mean = np.mean(data[i - self.rspace_win])
if math.isclose(before_window_mean, 0.0):
r_data.append(self.rspace_max_thresh)
else:
r_data.append(current_window_mean / before_window_mean)
return r_data

def judge_change_by_r(self, number: int) -> int:
if number > self.rspace_upper_bound:
return 1
elif number <= self.rspace_lower_bound:
return -1
return 0

def get_rTrans_result(self, data: list) -> list:
r_data = self.r_detect(data)
binary_sequence = [self.judge_change_by_r(elem) for elem in r_data]
return binary_sequence

def get_rSegments(self, data: list) -> np.array:
binary_result = self.get_rTrans_result(data)

segment = False
threshold = 0
start = []
end = []
for i in range(len(binary_result)):
if binary_result[i] == 1 and segment is False:
segment = True
threshold = data[i + self.rspace_win]
start.append(i + self.rspace_win)
if i + 3 * self.rspace_win > len(data) - 1:
end.append(len(data) - 1)
else:
end.append(i + 3 * self.rspace_win)
if data[i + self.rspace_win] < threshold and segment is True:
segment = False
# end.append(i + self.rspace_win)
if len(start) > len(end):
end.append(len(data) + self.rspace_win)

return np.concatenate((np.array(start).reshape(-1, 1), np.array(end).reshape(-1, 1)), axis=1)


class SAXFeatures(object):
def __init__(self, bins: int):
self.bins = bins

def sax_transform(self, data: list):
data = np.array(data).reshape(-1, 1)
data = zscore(data) # 要先Z score归一化
data[np.isnan(data)] = 0.01
sax = SymbolicAggregateApproximation(n_bins=self.bins, strategy='normal')
x_sax = sax.fit_transform(data)

return x_sax

0 comments on commit e255581

Please sign in to comment.