@@ -21,9 +21,9 @@ import (
2121 "math/rand"
2222 "time"
2323
24- "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics"
2524 "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/config"
2625 "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins"
26+ pluginutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/util"
2727 "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
2828 logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
2929)
@@ -139,19 +139,21 @@ func leastQueuingFilterFunc(ctx *types.SchedulingContext, pods []types.Pod) []ty
139139 filtered := []types.Pod {}
140140
141141 for _ , pod := range pods {
142- podMetrics := pod .GetData ()[metrics .MetricsDataKey ].(* metrics.Metrics )
143- if podMetrics .WaitingQueueSize <= min {
144- min = podMetrics .WaitingQueueSize
145- }
146- if podMetrics .WaitingQueueSize >= max {
147- max = podMetrics .WaitingQueueSize
142+ if podMetrics := pluginutil .GetMetricsFromPodInfo (pod ); podMetrics != nil {
143+ if podMetrics .WaitingQueueSize <= min {
144+ min = podMetrics .WaitingQueueSize
145+ }
146+ if podMetrics .WaitingQueueSize >= max {
147+ max = podMetrics .WaitingQueueSize
148+ }
148149 }
149150 }
150151
151152 for _ , pod := range pods {
152- podMetrics := pod .GetData ()[metrics .MetricsDataKey ].(* metrics.Metrics )
153- if podMetrics .WaitingQueueSize >= min && podMetrics .WaitingQueueSize <= min + (max - min )/ len (pods ) {
154- filtered = append (filtered , pod )
153+ if podMetrics := pluginutil .GetMetricsFromPodInfo (pod ); podMetrics != nil {
154+ if podMetrics .WaitingQueueSize >= min && podMetrics .WaitingQueueSize <= min + (max - min )/ len (pods ) {
155+ filtered = append (filtered , pod )
156+ }
155157 }
156158 }
157159 return filtered
@@ -179,19 +181,21 @@ func leastKVCacheFilterFunc(ctx *types.SchedulingContext, pods []types.Pod) []ty
179181 filtered := []types.Pod {}
180182
181183 for _ , pod := range pods {
182- podMetrics := pod .GetData ()[metrics .MetricsDataKey ].(* metrics.Metrics )
183- if podMetrics .KVCacheUsagePercent <= min {
184- min = podMetrics .KVCacheUsagePercent
185- }
186- if podMetrics .KVCacheUsagePercent >= max {
187- max = podMetrics .KVCacheUsagePercent
184+ if podMetrics := pluginutil .GetMetricsFromPodInfo (pod ); podMetrics != nil {
185+ if podMetrics .KVCacheUsagePercent <= min {
186+ min = podMetrics .KVCacheUsagePercent
187+ }
188+ if podMetrics .KVCacheUsagePercent >= max {
189+ max = podMetrics .KVCacheUsagePercent
190+ }
188191 }
189192 }
190193
191194 for _ , pod := range pods {
192- podMetrics := pod .GetData ()[metrics .MetricsDataKey ].(* metrics.Metrics )
193- if podMetrics .KVCacheUsagePercent >= min && podMetrics .KVCacheUsagePercent <= min + (max - min )/ float64 (len (pods )) {
194- filtered = append (filtered , pod )
195+ if podMetrics := pluginutil .GetMetricsFromPodInfo (pod ); podMetrics != nil {
196+ if podMetrics .KVCacheUsagePercent >= min && podMetrics .KVCacheUsagePercent <= min + (max - min )/ float64 (len (pods )) {
197+ filtered = append (filtered , pod )
198+ }
195199 }
196200 }
197201 return filtered
@@ -226,14 +230,15 @@ func loRASoftAffinityFilterFunc(ctx *types.SchedulingContext, pods []types.Pod)
226230
227231 // Categorize pods based on affinity and availability
228232 for _ , pod := range pods {
229- podMetrics := pod .GetData ()[metrics .MetricsDataKey ].(* metrics.Metrics )
230- _ , active := podMetrics .ActiveModels [ctx .Req .ResolvedTargetModel ]
231- _ , waiting := podMetrics .WaitingModels [ctx .Req .ResolvedTargetModel ]
232-
233- if active || waiting {
234- filtered_affinity = append (filtered_affinity , pod )
235- } else if len (podMetrics .ActiveModels )+ len (podMetrics .WaitingModels ) < podMetrics .MaxActiveModels {
236- filtered_available = append (filtered_available , pod )
233+ if podMetrics := pluginutil .GetMetricsFromPodInfo (pod ); podMetrics != nil {
234+ _ , active := podMetrics .ActiveModels [ctx .Req .ResolvedTargetModel ]
235+ _ , waiting := podMetrics .WaitingModels [ctx .Req .ResolvedTargetModel ]
236+
237+ if active || waiting {
238+ filtered_affinity = append (filtered_affinity , pod )
239+ } else if len (podMetrics .ActiveModels )+ len (podMetrics .WaitingModels ) < podMetrics .MaxActiveModels {
240+ filtered_available = append (filtered_available , pod )
241+ }
237242 }
238243 }
239244
@@ -267,15 +272,19 @@ type podPredicate func(req *types.LLMRequest, pod types.Pod) bool
267272
268273func queueThresholdPredicate (queueThreshold int ) podPredicate {
269274 return func (req * types.LLMRequest , pod types.Pod ) bool {
270- podMetrics := pod .GetData ()[metrics .MetricsDataKey ].(* metrics.Metrics )
271- return podMetrics .WaitingQueueSize <= queueThreshold
275+ if podMetrics := pluginutil .GetMetricsFromPodInfo (pod ); podMetrics != nil {
276+ return podMetrics .WaitingQueueSize <= queueThreshold
277+ }
278+ return false
272279 }
273280}
274281
275282func kvCacheThresholdPredicate (kvCacheThreshold float64 ) podPredicate {
276283 return func (req * types.LLMRequest , pod types.Pod ) bool {
277- podMetrics := pod .GetData ()[metrics .MetricsDataKey ].(* metrics.Metrics )
278- return podMetrics .KVCacheUsagePercent <= kvCacheThreshold
284+ if podMetrics := pluginutil .GetMetricsFromPodInfo (pod ); podMetrics != nil {
285+ return podMetrics .KVCacheUsagePercent <= kvCacheThreshold
286+ }
287+ return false
279288 }
280289}
281290
0 commit comments