@@ -21,9 +21,9 @@ import (
21
21
"math/rand"
22
22
"time"
23
23
24
- "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics"
25
24
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/config"
26
25
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins"
26
+ pluginutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/util"
27
27
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
28
28
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
29
29
)
@@ -139,19 +139,21 @@ func leastQueuingFilterFunc(ctx *types.SchedulingContext, pods []types.Pod) []ty
139
139
filtered := []types.Pod {}
140
140
141
141
for _ , pod := range pods {
142
- podMetrics := pod .GetData ()[metrics .MetricsDataKey ].(* metrics.Metrics )
143
- if podMetrics .WaitingQueueSize <= min {
144
- min = podMetrics .WaitingQueueSize
145
- }
146
- if podMetrics .WaitingQueueSize >= max {
147
- max = podMetrics .WaitingQueueSize
142
+ if podMetrics := pluginutil .GetMetricsFromPodInfo (pod ); podMetrics != nil {
143
+ if podMetrics .WaitingQueueSize <= min {
144
+ min = podMetrics .WaitingQueueSize
145
+ }
146
+ if podMetrics .WaitingQueueSize >= max {
147
+ max = podMetrics .WaitingQueueSize
148
+ }
148
149
}
149
150
}
150
151
151
152
for _ , pod := range pods {
152
- podMetrics := pod .GetData ()[metrics .MetricsDataKey ].(* metrics.Metrics )
153
- if podMetrics .WaitingQueueSize >= min && podMetrics .WaitingQueueSize <= min + (max - min )/ len (pods ) {
154
- filtered = append (filtered , pod )
153
+ if podMetrics := pluginutil .GetMetricsFromPodInfo (pod ); podMetrics != nil {
154
+ if podMetrics .WaitingQueueSize >= min && podMetrics .WaitingQueueSize <= min + (max - min )/ len (pods ) {
155
+ filtered = append (filtered , pod )
156
+ }
155
157
}
156
158
}
157
159
return filtered
@@ -179,19 +181,21 @@ func leastKVCacheFilterFunc(ctx *types.SchedulingContext, pods []types.Pod) []ty
179
181
filtered := []types.Pod {}
180
182
181
183
for _ , pod := range pods {
182
- podMetrics := pod .GetData ()[metrics .MetricsDataKey ].(* metrics.Metrics )
183
- if podMetrics .KVCacheUsagePercent <= min {
184
- min = podMetrics .KVCacheUsagePercent
185
- }
186
- if podMetrics .KVCacheUsagePercent >= max {
187
- max = podMetrics .KVCacheUsagePercent
184
+ if podMetrics := pluginutil .GetMetricsFromPodInfo (pod ); podMetrics != nil {
185
+ if podMetrics .KVCacheUsagePercent <= min {
186
+ min = podMetrics .KVCacheUsagePercent
187
+ }
188
+ if podMetrics .KVCacheUsagePercent >= max {
189
+ max = podMetrics .KVCacheUsagePercent
190
+ }
188
191
}
189
192
}
190
193
191
194
for _ , pod := range pods {
192
- podMetrics := pod .GetData ()[metrics .MetricsDataKey ].(* metrics.Metrics )
193
- if podMetrics .KVCacheUsagePercent >= min && podMetrics .KVCacheUsagePercent <= min + (max - min )/ float64 (len (pods )) {
194
- filtered = append (filtered , pod )
195
+ if podMetrics := pluginutil .GetMetricsFromPodInfo (pod ); podMetrics != nil {
196
+ if podMetrics .KVCacheUsagePercent >= min && podMetrics .KVCacheUsagePercent <= min + (max - min )/ float64 (len (pods )) {
197
+ filtered = append (filtered , pod )
198
+ }
195
199
}
196
200
}
197
201
return filtered
@@ -226,14 +230,15 @@ func loRASoftAffinityFilterFunc(ctx *types.SchedulingContext, pods []types.Pod)
226
230
227
231
// Categorize pods based on affinity and availability
228
232
for _ , pod := range pods {
229
- podMetrics := pod .GetData ()[metrics .MetricsDataKey ].(* metrics.Metrics )
230
- _ , active := podMetrics .ActiveModels [ctx .Req .ResolvedTargetModel ]
231
- _ , waiting := podMetrics .WaitingModels [ctx .Req .ResolvedTargetModel ]
232
-
233
- if active || waiting {
234
- filtered_affinity = append (filtered_affinity , pod )
235
- } else if len (podMetrics .ActiveModels )+ len (podMetrics .WaitingModels ) < podMetrics .MaxActiveModels {
236
- filtered_available = append (filtered_available , pod )
233
+ if podMetrics := pluginutil .GetMetricsFromPodInfo (pod ); podMetrics != nil {
234
+ _ , active := podMetrics .ActiveModels [ctx .Req .ResolvedTargetModel ]
235
+ _ , waiting := podMetrics .WaitingModels [ctx .Req .ResolvedTargetModel ]
236
+
237
+ if active || waiting {
238
+ filtered_affinity = append (filtered_affinity , pod )
239
+ } else if len (podMetrics .ActiveModels )+ len (podMetrics .WaitingModels ) < podMetrics .MaxActiveModels {
240
+ filtered_available = append (filtered_available , pod )
241
+ }
237
242
}
238
243
}
239
244
@@ -267,15 +272,19 @@ type podPredicate func(req *types.LLMRequest, pod types.Pod) bool
267
272
268
273
func queueThresholdPredicate (queueThreshold int ) podPredicate {
269
274
return func (req * types.LLMRequest , pod types.Pod ) bool {
270
- podMetrics := pod .GetData ()[metrics .MetricsDataKey ].(* metrics.Metrics )
271
- return podMetrics .WaitingQueueSize <= queueThreshold
275
+ if podMetrics := pluginutil .GetMetricsFromPodInfo (pod ); podMetrics != nil {
276
+ return podMetrics .WaitingQueueSize <= queueThreshold
277
+ }
278
+ return false
272
279
}
273
280
}
274
281
275
282
func kvCacheThresholdPredicate (kvCacheThreshold float64 ) podPredicate {
276
283
return func (req * types.LLMRequest , pod types.Pod ) bool {
277
- podMetrics := pod .GetData ()[metrics .MetricsDataKey ].(* metrics.Metrics )
278
- return podMetrics .KVCacheUsagePercent <= kvCacheThreshold
284
+ if podMetrics := pluginutil .GetMetricsFromPodInfo (pod ); podMetrics != nil {
285
+ return podMetrics .KVCacheUsagePercent <= kvCacheThreshold
286
+ }
287
+ return false
279
288
}
280
289
}
281
290
0 commit comments