From 99d3af8bcc0939c57bafe103e802e86169bc7dc4 Mon Sep 17 00:00:00 2001 From: nvulym <79202444+nvulym@users.noreply.github.com> Date: Tue, 27 Apr 2021 17:16:21 +0300 Subject: [PATCH 1/2] ... --- Cluster_Y_max_Y_var.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/Cluster_Y_max_Y_var.py b/Cluster_Y_max_Y_var.py index 53af1db..ea01caf 100644 --- a/Cluster_Y_max_Y_var.py +++ b/Cluster_Y_max_Y_var.py @@ -114,7 +114,6 @@ def show_plot(data1, data2, data3, data, name_of_plot): Y_test = [j for i, j in enumerate(kmeans.labels_) if i not in random_numbers] model = RandomForestClassifier(n_estimators=500, max_features='log2', random_state=1, n_jobs=-1).fit( X_train, Y_train) - model.score(X_test, Y_test) # 0.9093959731543624 @@ -152,17 +151,14 @@ def show_plot(data1, data2, data3, data, name_of_plot): df_Y_var.columns = ['x', 'y'] df_Y_var['method'] = 'Y_VAR' - # Concat dataframes - df = pd.concat([df_Y_max, df_Y_var]) # Show plot fig, ax = plt.subplots(figsize= (15, 10)) markers = {'Y_VAR': 's', "Y_MAX": 'X'} sns.scatterplot(data=df_1, x="x", y="y",color='green', s = 50, ax=ax) sns.scatterplot(data=df_2, x="x", y="y", color='yellow', s = 50, ax=ax) -sns.regplot(data=df_coord, x="x", y="y", order=4.9, truncate=True, ci=None, scatter=False, ax=ax) -# order = 1 (прямая) => order - ... +sns.regplot(data=df_coord, x="x", y="y", order=4.9, truncate=True, ci=None, scatter=False, ax=ax) sns.scatterplot(data=centers, x="x", y="y", color='red', s = 90, marker = 'v', ax=ax) sns.scatterplot(data=df, x="x", y="y", palette = ['blue', 'orange'], hue = 'method', markers = markers,style='method', s = 50, alpha =0.8, ax=ax) From c5dfe884842a8afd34ef8413d0387030bbfb06b8 Mon Sep 17 00:00:00 2001 From: nvulym <79202444+nvulym@users.noreply.github.com> Date: Tue, 27 Apr 2021 18:04:19 +0300 Subject: [PATCH 2/2] update def show_plot() --- Cluster_Y_max_Y_var.py | 87 ++++++++++++++++++++---------------------- 1 file changed, 41 insertions(+), 46 deletions(-) diff --git a/Cluster_Y_max_Y_var.py b/Cluster_Y_max_Y_var.py index ea01caf..5debaf1 100644 --- a/Cluster_Y_max_Y_var.py +++ b/Cluster_Y_max_Y_var.py @@ -26,59 +26,65 @@ def make_curve(data): """ Функция рассчитывает координаты точек (x,y) кривой через минимальное среднее евликодово расстояние между граничными точками двух кластеров. Возвращает датафреймы: координаты точек 1го кластера, координаты точек 2го кластера, координаты точек полученной кривой """ + def dist(x, y): - """Функция рассчета евклидова расстояния""" - d=0 + """Функция рассчета евклидова расстояния""" + d = 0 for i in range(len(x)): - d+=(x[i]-y[i])**2 - return sqrt(d) - + d += (x[i] - y[i]) ** 2 + return sqrt(d) + cluster_1, cluster_2, coord_x, coord_y = [], [], [], [] - + for ind, row in data.iterrows(): if row[2] == 0: cluster_1.append((row[0], row[1])) else: cluster_2.append((row[0], row[1])) - - for i,j in cluster_1: + + for i, j in cluster_1: m = 0.9 for n, l in cluster_2: - d = dist((i,j), (n,l)) + d = dist((i, j), (n, l)) if d <= m: - x = (i+n)/2 - y = (j+l)/2 + x = (i + n) / 2 + y = (j + l) / 2 coord_x.append(x) coord_y.append(y) - - df_1 = pd.DataFrame(cluster_1) - df_2 = pd.DataFrame(cluster_2) - df_1.columns = ['x', 'y'] - df_2.columns = ['x', 'y'] + df_x = pd.DataFrame(coord_x) df_y = pd.DataFrame(coord_y) df_coord = pd.concat([df_x, df_y], axis=1) df_coord.columns = ['x', 'y'] - return df_1, df_2, df_coord - -df_1, df_2, df_coord = make_curve(dataset) - - -def show_plot(data1, data2, data3, data, name_of_plot): - if name_of_plot == 'scatter': - plt.scatter(data1['x'], data1['y'], c='red') - plt.scatter(data2['x'], data2['y'], c='yellow') - plt.scatter(data3['x'], data3['y'], c='blue') - if name_of_plot == 'scatter with reg': - fig, ax = plt.subplots(figsize= (15, 10)) - sns.scatterplot(data=data1, x="x", y="y",color='red', s=50, ax=ax) - sns.scatterplot(data=data2, x="x", y="y", color='yellow', s=50, ax=ax) - sns.regplot(data=data3, x="x", y="y", order=4.9, truncate=True, ci=None, scatter=False, ax=ax) - # order = 1 (прямая) => order - ... - sns.scatterplot(data=data, x="x", y="y", color='green', marker = 'X', s = 70, ax=ax) + return df_coord + +df_coord = make_curve(dataset) + + +def show_plot(data1, data2, data3, name_of_plot, data4=None): + if data4 is not None: + markers = {'Y_VAR': 's', "Y_MAX": 'X'} + fig, ax = plt.subplots(figsize=(15, 10)) + sns.scatterplot(data=data1, x="x", y="y", palette=['green', 'yellow'], hue='cluster', s=50, ax=ax) + sns.regplot(data=data2, x="x", y="y", order=4.9, truncate=True, ci=None, scatter=False, ax=ax) + sns.scatterplot(data=data4, x="x", y="y", palette=['blue', 'orange'], hue='method', markers=markers, + style='method', s=50, alpha=0.8, ax=ax) + else: + if name_of_plot == 'scatter': + plt.scatter(data1['x'], data1['y'], c=kmeans.labels_) + plt.scatter(data2['x'], data2['y'], c='blue') + if name_of_plot == 'scatter with reg': + fig, ax = plt.subplots(figsize=(15, 10)) + sns.scatterplot(data=data1, x="x", y="y", palette=['green', 'yellow'], hue='cluster', s=50, ax=ax) + sns.regplot(data=df_coord, x="x", y="y", order=4.9, truncate=True, ci=None, scatter=False, ax=ax) + + sns.scatterplot(data=data3, x="x", y="y", color='red', s=90, marker='v', ax=ax) + plt.setp(ax.get_legend().get_texts(), fontsize='10') # for legend text + plt.setp(ax.get_legend().get_title(), fontsize='10') # for legend title + plt.grid(True) plt.show() -show_plot(df_1, df_2, df_coord, centers, 'scatter with reg') +show_plot(dataset, df_coord, centers, 'scatter with reg') # ## Y_max, Y_Var, Random @@ -154,18 +160,7 @@ def show_plot(data1, data2, data3, data, name_of_plot): # Concat dataframes df = pd.concat([df_Y_max, df_Y_var]) # Show plot -fig, ax = plt.subplots(figsize= (15, 10)) -markers = {'Y_VAR': 's', "Y_MAX": 'X'} -sns.scatterplot(data=df_1, x="x", y="y",color='green', s = 50, ax=ax) -sns.scatterplot(data=df_2, x="x", y="y", color='yellow', s = 50, ax=ax) -sns.regplot(data=df_coord, x="x", y="y", order=4.9, truncate=True, ci=None, scatter=False, ax=ax) -sns.scatterplot(data=centers, x="x", y="y", color='red', s = 90, marker = 'v', ax=ax) -sns.scatterplot(data=df, x="x", y="y", palette = ['blue', 'orange'], hue = 'method', markers = markers,style='method', s = 50, - alpha =0.8, ax=ax) -plt.setp(ax.get_legend().get_texts(), fontsize='10') # for legend text -plt.setp(ax.get_legend().get_title(), fontsize='10') # for legend title -plt.grid(True) -plt.show() +show_plot(dataset, df_coord, centers, 'scatter with reg', df)