-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathinexact_flow_solver.py
194 lines (159 loc) · 8.97 KB
/
inexact_flow_solver.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
######################
# This is a minimal example of how to use the abstract class AbstractPathModelDAG to
# decompose an inexact flow (given as intervals [lb,ub] for each edge) into
# a given number of weighted paths.
#
# This model was introduced by
# Williams, Reynolds and Mumey, "RNA Transcript Assembly Using Inexact Flows", 2019 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)
# https://doi.org/10.1109/BIBM47256.2019.8983180
#
# In addition, to illustrate how to add an objective fuction to the model, we minimize the sum of the path weights.
#
# NOTE: See kflowdecomp.py or kminpatherror.py for best practices on how to implement a new decomposition model.
######################
import flowpaths as fp
import networkx as nx
class kInexactFlowDecomposition(fp.AbstractPathModelDAG):
def __init__(self, G: nx.DiGraph, lb:str, ub:str, num_paths:int, threads:int=4):
self.G = fp.stDiGraph(G)
self.lb = lb # We assume all lowerbounds are >= 0
self.ub = ub # We assume all upperbounds are >= 0
# self.k = num_paths will be available from the superclass AbstractPathModelDAG,
# after calling super().__init__(...), which happens below.
# We declare the __solution attribute, to be able to cache it.
# Note that we make it private to this class, so that we can access it only with get_solution()
self.__solution = None
# To be able to apply the safety optimizations, we get the edges that
# must appear in some solution path. For this problem, these are the edges
# that have a non-zero flow lowerbound, since they appear in at least one source-to-sink path.
trusted_edges_for_safety = self.G.get_non_zero_flow_edges(flow_attr=self.lb)
# We initialize the super class with the graph, the number of paths, and the trusted edges.
super().__init__(
self.G,
num_paths,
optimization_options={"trusted_edges_for_safety": trusted_edges_for_safety},
solver_options={"threads": threads}
)
# This method is called from the super class AbstractPathModelDAG
self.create_solver_and_paths()
# This method is called from the current class
self.__encode_inexact_flow_decomposition()
# We encode the objective, from the current class
self.__encode_objective()
def __encode_inexact_flow_decomposition(self):
# Get the maximum data in an edge indexed under self.lb.
# This will be used to set the upper bound of the path weights, since a path weight larger than this
# would not "fit" inside the flow interval of an edge.
maximum_allowed_path_weight = max(data.get(self.ub,0) for u, v, data in self.G.edges(data=True))
# From the super class, we already have the edge_vars, such that
# edge_vars[(u,v,i)] = 1 if path i goes through edge (u,v), 0 otherwise
# We now declare the path_weights_vars, such that
# path_weights_vars[(i)] = the weight of path i
# Note that the weights are non-negative, and we set the upper bound to the maximum allowed path weight
self.path_weights_vars = self.solver.add_variables(
self.path_indexes,
name_prefix="w",
lb=0,
ub=maximum_allowed_path_weight,
)
# The pi_vars will be used to encode the product of edge_vars and path_weights_vars
# Specifically, pi_vars[(u,v,i)] = edge_vars[(u,v,i)] * path_weights_vars[(i)]
# This means pi_vars[(u,v,i)] equals path_weights_vars[(i)] if path i goes through edge (u,v), otherwise it is 0
self.pi_vars = self.solver.add_variables(
self.edge_indexes,
name_prefix="pi",
lb=0,
ub=maximum_allowed_path_weight,
)
# We encode that for each edge (u,v), the sum of the weights of the paths
# going through the edge is in the flow interval of the edge.
for u, v, data in self.G.edges(data=True):
# We ignore edges incident to the artificial global source and sink
if (u, v) in self.G.source_sink_edges:
continue
# We encode that edge_vars[(u,v,i)] * path_weights_vars[(i)] = pi_vars[(u,v,i)]
# Since this is a non-linear term, we will use the add_product_constraint method that
# will introduce additional constraints to linearize it for us, assuming that
# 0 <= path_weights_vars[(i)] <= maximum_allowed_path_weight, which is the case
for i in range(self.k):
self.solver.add_binary_continuous_product_constraint(
binary_var=self.edge_vars[(u, v, i)],
continuous_var=self.path_weights_vars[(i)],
product_var=self.pi_vars[(u, v, i)],
lb=0,
ub=maximum_allowed_path_weight,
name=f"product_u={u}_v={v}_i={i}",
)
# We next encode that the sum of the weights of the paths going through the edge
# is at least the lowerbound, and at most the upper bound of the edge.
# That is, the sum of the pi_vars for edge (u,v) is at least the lowerbound of the edge,
self.solver.add_constraint(
self.solver.quicksum(self.pi_vars[(u, v, i)] for i in range(self.k)) >= data[self.lb],
name=f"lowerbound_u={u}_v={v}",
)
# and at most the upperbound of the edge.
self.solver.add_constraint(
self.solver.quicksum(self.pi_vars[(u, v, i)] for i in range(self.k)) <= data[self.ub],
name=f"upperbound_u={u}_v={v}",
)
def __encode_objective(self):
# We set the objective to minimize the sum of the path weights
self.solver.set_objective(
self.solver.quicksum(self.path_weights_vars[(i)] for i in range(self.k)),
sense="minimize",
)
def get_solution(self):
if self.__solution is not None:
return self.__solution
solution_weights_dict = self.solver.get_variable_values("w", [int])
self.__solution = {
"paths": self.get_solution_paths(),
"weights": [solution_weights_dict[i] for i in range(self.k)],
}
return self.__solution
def is_valid_solution(self):
# AbstractPathModelDAG requires implementing a basic check that the solution is valid,
# to make sure there were no errors in the encoding.
# This could be done by checking that, for each edge, the sum of the path weights going through it
# is within the flow interval of the edge. self.solver.get_objective_value() could also be checked,
# to make sure the sum of the path weights indeed equals the objective value returned by the solver.
return True
def get_objective_value(self):
# AbstractPathModelDAG requires implementing a method to get the objective value.
# This could be done by returning the sum of the path weights, as returned by the solver.
# This is useful if we want to compute the safe paths of any solution to our kInexactFlowDecomposition,
# namely those paths that are guaranteed to appear as subpath in some path of any optimal solution.
return self.solver.get_objective_value()
def get_lowerbound_k(self):
# AbstractPathModelDAG requires implementing a method to get the lowerbound for the number of paths.
# A possible implementation is as follows.
# We know that each edge with lb > 0 must be covered by at least one path.
# Therefore, a lowerbound is the minimum number of paths needed to cover all the edges with lb > 0.
weight_function = {(u,v): 1 for u, v, data in self.G.edges(data=True) if data.get(self.lb, 0 ) > 0}
return self.G.compute_max_edge_antichain(weight_function=weight_function)
if __name__ == "__main__":
# Create a simple graph
graph = nx.DiGraph()
graph.graph["id"] = "simple_graph"
graph.add_edge("0", "a", lb=2, ub=6)
graph.add_edge("0", "b", lb=7, ub=7)
graph.add_edge("a", "b", lb=1, ub=2)
graph.add_edge("a", "c", lb=3, ub=5)
graph.add_edge("b", "c", lb=5, ub=9)
graph.add_edge("c", "d", lb=3, ub=6)
graph.add_edge("c", "1", lb=4, ub=7)
graph.add_edge("d", "1", lb=2, ub=6)
# We create a kInexactFlowDecomposition model
# with the flow lower bounds in the attribute `lb` of the edges,
# the flow upper bounds in the attribute `ub` of the edges,
# and the number of paths to consider set to 3
kifd_model = kInexactFlowDecomposition(graph, lb="lb", ub="ub", num_paths=3)
# We solve it
kifd_model.solve()
# We process its solution
if kifd_model.is_solved():
print(kifd_model.get_solution())
print(kifd_model.solve_statistics)
print("model.is_valid_solution()", kifd_model.is_valid_solution())
else:
print("Model could not be solved.")