Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 41 additions & 50 deletions Cluster_Y_max_Y_var.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,59 +26,65 @@ def make_curve(data):
""" Функция рассчитывает координаты точек (x,y) кривой через минимальное среднее евликодово расстояние
между граничными точками двух кластеров. Возвращает датафреймы: координаты точек 1го кластера, координаты точек
2го кластера, координаты точек полученной кривой """

def dist(x, y):
"""Функция рассчета евклидова расстояния"""
d=0
"""Функция рассчета евклидова расстояния"""
d = 0
for i in range(len(x)):
d+=(x[i]-y[i])**2
return sqrt(d)
d += (x[i] - y[i]) ** 2
return sqrt(d)

cluster_1, cluster_2, coord_x, coord_y = [], [], [], []

for ind, row in data.iterrows():
if row[2] == 0:
cluster_1.append((row[0], row[1]))
else:
cluster_2.append((row[0], row[1]))
for i,j in cluster_1:

for i, j in cluster_1:
m = 0.9
for n, l in cluster_2:
d = dist((i,j), (n,l))
d = dist((i, j), (n, l))
if d <= m:
x = (i+n)/2
y = (j+l)/2
x = (i + n) / 2
y = (j + l) / 2
coord_x.append(x)
coord_y.append(y)

df_1 = pd.DataFrame(cluster_1)
df_2 = pd.DataFrame(cluster_2)
df_1.columns = ['x', 'y']
df_2.columns = ['x', 'y']

df_x = pd.DataFrame(coord_x)
df_y = pd.DataFrame(coord_y)
df_coord = pd.concat([df_x, df_y], axis=1)
df_coord.columns = ['x', 'y']
return df_1, df_2, df_coord

df_1, df_2, df_coord = make_curve(dataset)


def show_plot(data1, data2, data3, data, name_of_plot):
if name_of_plot == 'scatter':
plt.scatter(data1['x'], data1['y'], c='red')
plt.scatter(data2['x'], data2['y'], c='yellow')
plt.scatter(data3['x'], data3['y'], c='blue')
if name_of_plot == 'scatter with reg':
fig, ax = plt.subplots(figsize= (15, 10))
sns.scatterplot(data=data1, x="x", y="y",color='red', s=50, ax=ax)
sns.scatterplot(data=data2, x="x", y="y", color='yellow', s=50, ax=ax)
sns.regplot(data=data3, x="x", y="y", order=4.9, truncate=True, ci=None, scatter=False, ax=ax)
# order = 1 (прямая) => order - ...
sns.scatterplot(data=data, x="x", y="y", color='green', marker = 'X', s = 70, ax=ax)
return df_coord

df_coord = make_curve(dataset)


def show_plot(data1, data2, data3, name_of_plot, data4=None):
if data4 is not None:
markers = {'Y_VAR': 's', "Y_MAX": 'X'}
fig, ax = plt.subplots(figsize=(15, 10))
sns.scatterplot(data=data1, x="x", y="y", palette=['green', 'yellow'], hue='cluster', s=50, ax=ax)
sns.regplot(data=data2, x="x", y="y", order=4.9, truncate=True, ci=None, scatter=False, ax=ax)
sns.scatterplot(data=data4, x="x", y="y", palette=['blue', 'orange'], hue='method', markers=markers,
style='method', s=50, alpha=0.8, ax=ax)
else:
if name_of_plot == 'scatter':
plt.scatter(data1['x'], data1['y'], c=kmeans.labels_)
plt.scatter(data2['x'], data2['y'], c='blue')
if name_of_plot == 'scatter with reg':
fig, ax = plt.subplots(figsize=(15, 10))
sns.scatterplot(data=data1, x="x", y="y", palette=['green', 'yellow'], hue='cluster', s=50, ax=ax)
sns.regplot(data=df_coord, x="x", y="y", order=4.9, truncate=True, ci=None, scatter=False, ax=ax)

sns.scatterplot(data=data3, x="x", y="y", color='red', s=90, marker='v', ax=ax)
plt.setp(ax.get_legend().get_texts(), fontsize='10') # for legend text
plt.setp(ax.get_legend().get_title(), fontsize='10') # for legend title
plt.grid(True)
plt.show()

show_plot(df_1, df_2, df_coord, centers, 'scatter with reg')
show_plot(dataset, df_coord, centers, 'scatter with reg')


# ## Y_max, Y_Var, Random
Expand Down Expand Up @@ -114,7 +120,6 @@ def show_plot(data1, data2, data3, data, name_of_plot):
Y_test = [j for i, j in enumerate(kmeans.labels_) if i not in random_numbers]
model = RandomForestClassifier(n_estimators=500, max_features='log2', random_state=1, n_jobs=-1).fit(
X_train, Y_train)

model.score(X_test, Y_test) # 0.9093959731543624


Expand Down Expand Up @@ -152,24 +157,10 @@ def show_plot(data1, data2, data3, data, name_of_plot):
df_Y_var.columns = ['x', 'y']
df_Y_var['method'] = 'Y_VAR'


# Concat dataframes

df = pd.concat([df_Y_max, df_Y_var])
# Show plot
fig, ax = plt.subplots(figsize= (15, 10))
markers = {'Y_VAR': 's', "Y_MAX": 'X'}
sns.scatterplot(data=df_1, x="x", y="y",color='green', s = 50, ax=ax)
sns.scatterplot(data=df_2, x="x", y="y", color='yellow', s = 50, ax=ax)
sns.regplot(data=df_coord, x="x", y="y", order=4.9, truncate=True, ci=None, scatter=False, ax=ax)
# order = 1 (прямая) => order - ...
sns.scatterplot(data=centers, x="x", y="y", color='red', s = 90, marker = 'v', ax=ax)
sns.scatterplot(data=df, x="x", y="y", palette = ['blue', 'orange'], hue = 'method', markers = markers,style='method', s = 50,
alpha =0.8, ax=ax)
plt.setp(ax.get_legend().get_texts(), fontsize='10') # for legend text
plt.setp(ax.get_legend().get_title(), fontsize='10') # for legend title
plt.grid(True)
plt.show()
show_plot(dataset, df_coord, centers, 'scatter with reg', df)



Expand Down