-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfault_tolerant_kmeans.py
107 lines (81 loc) · 4.09 KB
/
fault_tolerant_kmeans.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# © 2021, Universität Bern, Chair of Quantitative Methods, Philipp Baumann
from sklearn.neighbors import KDTree
import numpy as np
import time
def update_centers(client_locations, centers, n_facilities, assignments):
for i in range(n_facilities):
centers[i] = client_locations[np.where(assignments == i)[0], :].mean(axis=0)
return centers
def assign_objects(client_locations, centers, n_assignments):
kd_tree = KDTree(centers)
_, labels = kd_tree.query(client_locations, k=n_assignments)
return labels
def get_total_distance(client_locations, centers, labels):
kd_tree = KDTree(centers)
dist, _ = kd_tree.query(client_locations, k=labels.shape[1])
return dist.sum()
def get_locations_assignments_and_total_assignment_costs(client_locations, potential_locations, centers, n_facilities,
n_assignments_per_client):
location_ids = []
kd_tree = KDTree(potential_locations)
_, closest_potential_location = kd_tree.query(centers, k=n_facilities)
for i in range(n_facilities):
j = 0
while j < n_facilities:
if closest_potential_location[i, j] not in location_ids:
location_ids.append(closest_potential_location[i, j])
break
else:
j += 1
location_ids = np.array(location_ids)
new_centers = potential_locations[location_ids, :]
kd_tree = KDTree(new_centers)
distances, assignments = kd_tree.query(client_locations, k=n_assignments_per_client)
return new_centers, assignments, distances.sum()
def fault_tolerant_kmeans(client_locations, potential_locations=None, n_facilities=3, n_assignments_per_client=2,
random_state=None, time_limit=1e7, max_iter=100):
# Start stopwatch
tic = time.perf_counter()
# Choose initial cluster centers randomly
np.random.seed(random_state)
center_ids = np.random.choice(np.arange(client_locations.shape[0]), size=n_facilities, replace=False)
centers = client_locations[center_ids, :]
# Assign clients
assignments = assign_objects(client_locations, centers, n_assignments_per_client)
# Initialize best assignments
best_assignments = assignments
# Update centers
centers = update_centers(client_locations, centers, n_facilities, assignments)
best_centers = centers.copy()
# Compute total assignment costs
best_total_assignment_costs = get_total_distance(client_locations, centers, assignments)
n_iter = 0
elapsed_time = time.perf_counter() - tic
while (n_iter < max_iter) and (elapsed_time < time_limit):
# Assign clients
assignments = assign_objects(client_locations, centers, n_assignments_per_client)
# Update centers
centers = update_centers(client_locations, centers, n_facilities, assignments)
# Compute total assignment costs
total_assignment_costs = get_total_distance(client_locations, centers, assignments)
# Check stopping criterion
if total_assignment_costs >= best_total_assignment_costs:
break
else:
# Update the best assignments and the best total assignment costs
best_assignments = assignments
best_total_assignment_costs = total_assignment_costs
best_centers = centers.copy()
# Increase iteration counter
n_iter += 1
# Update elapsed time
elapsed_time = time.perf_counter() - tic
# Select final locations among potential locations
if potential_locations is not None:
best_centers, best_assignments, best_total_assignment_costs = \
get_locations_assignments_and_total_assignment_costs(client_locations,
potential_locations,
best_centers,
n_facilities,
n_assignments_per_client)
return best_centers, best_assignments, best_total_assignment_costs