more refactoring

nirrozenbaum · nirrozenbaum · commit 9a444e6ba50b · 2025-05-06T13:23:06.000+03:00
Signed-off-by: Nir Rozenbaum &lt;nirro@il.ibm.com&gt;
diff --git a/cmd/epp/main.go b/cmd/epp/main.go
@@ -173,11 +173,11 @@ func run() error {
 		&metrics.MetricsScraper{}: podinfo.NewScraperConfig(*refreshMetricsInterval, scrapeTimeout),
 		&models.ModelsScraper{}:   podinfo.NewScraperConfig(modelsScrapeInterval, scrapeTimeout),
 	}
-	podScraperFactory := podinfo.NewPodInfoFactory(podScrapers)
+	podInfoFactory := podinfo.NewPodInfoFactory(podScrapers)
 	// Setup runner.
 	ctx := ctrl.SetupSignalHandler()
 
-	datastore := datastore.NewDatastore(ctx, podScraperFactory)
+	datastore := datastore.NewDatastore(ctx, podInfoFactory)
 
 	scheduler := scheduling.NewScheduler(datastore)
 	serverRunner := &runserver.ExtProcServerRunner{
diff --git a/pkg/epp/backend/metrics/logger.go b/pkg/epp/backend/metrics/logger.go
@@ -73,13 +73,17 @@ func StartMetricsLogger(ctx context.Context, datastore Datastore, refreshPrometh
 					logger.V(logutil.DEFAULT).Info("Shutting down metrics logger thread")
 					return
 				case <-ticker.C:
-					podsWithFreshMetrics := datastore.PodList(func(s podinfo.PodInfo) bool {
-						metrics := getMetricsFromPodData(s.GetData())
-						return time.Since(metrics.UpdateTime) <= metricsValidityPeriod
+					podsWithFreshMetrics := datastore.PodList(func(podInfo podinfo.PodInfo) bool {
+						if metrics := getMetricsFromPodInfo(podInfo); metrics != nil {
+							return time.Since(metrics.UpdateTime) <= metricsValidityPeriod
+						}
+						return false
 					})
-					podsWithStaleMetrics := datastore.PodList(func(s podinfo.PodInfo) bool {
-						metrics := getMetricsFromPodData(s.GetData())
-						return time.Since(metrics.UpdateTime) > metricsValidityPeriod
+					podsWithStaleMetrics := datastore.PodList(func(podInfo podinfo.PodInfo) bool {
+						if metrics := getMetricsFromPodInfo(podInfo); metrics != nil {
+							return time.Since(metrics.UpdateTime) > metricsValidityPeriod
+						}
+						return false
 					})
 					s := fmt.Sprintf("Current Pods and metrics gathered. Fresh metrics: %+v, Stale metrics: %+v", podsWithFreshMetrics, podsWithStaleMetrics)
 					logger.V(logutil.VERBOSE).Info(s)
@@ -100,29 +104,27 @@ func refreshPrometheusMetrics(logger logr.Logger, datastore Datastore) {
 	var kvCacheTotal float64
 	var queueTotal int
 
-	podScrapers := datastore.PodGetAll()
-	logger.V(logutil.TRACE).Info("Refreshing Prometheus Metrics", "ReadyPods", len(podScrapers))
-	if len(podScrapers) == 0 {
+	podsInfo := datastore.PodGetAll()
+	logger.V(logutil.TRACE).Info("Refreshing Prometheus Metrics", "ReadyPods", len(podsInfo))
+	if len(podsInfo) == 0 {
 		return
 	}
 
-	for _, pod := range podScrapers {
-		metrics := getMetricsFromPodData(pod.GetData())
-		if metrics == nil {
-			continue
+	for _, podInfo := range podsInfo {
+		if metrics := getMetricsFromPodInfo(podInfo); metrics != nil {
+			kvCacheTotal += metrics.KVCacheUsagePercent
+			queueTotal += metrics.WaitingQueueSize
 		}
-		kvCacheTotal += metrics.KVCacheUsagePercent
-		queueTotal += metrics.WaitingQueueSize
 	}
 
-	podTotalCount := len(podScrapers)
+	podTotalCount := len(podsInfo)
 	metrics.RecordInferencePoolAvgKVCache(pool.Name, kvCacheTotal/float64(podTotalCount))
 	metrics.RecordInferencePoolAvgQueueSize(pool.Name, float64(queueTotal/podTotalCount))
 	metrics.RecordinferencePoolReadyPods(pool.Name, float64(podTotalCount))
 }
 
-func getMetricsFromPodData(podData map[string]podinfo.ScrapedData) *Metrics {
-	metrics, ok := podData[MetricsDataKey]
+func getMetricsFromPodInfo(podInfo podinfo.PodInfo) *Metrics {
+	metrics, ok := podInfo.GetData()[MetricsDataKey]
 	if !ok {
 		return nil // no entry in the map with metrics key
 	}
diff --git a/pkg/epp/backend/pod-info/fake_pod_info.go b/pkg/epp/backend/pod-info/fake_pod_info.go
@@ -25,7 +25,7 @@ import (
 
 var _ PodInfo = &FakePodInfo{}
 
-// FakePodInfo is an implementation of PodScraper that doesn't run the async scrape loop.
+// FakePodInfo is an implementation of PodInfo that doesn't run the async scrape loop.
 type FakePodInfo struct {
 	Pod  *backend.Pod
 	Data map[string]ScrapedData
diff --git a/pkg/epp/backend/pod-info/types.go b/pkg/epp/backend/pod-info/types.go
@@ -42,13 +42,13 @@ type ScraperConfig struct {
 type Scraper interface {
 	// Name returns the name of the scraper.
 	Name() string
-	// Init returns a empty ScrapeResult that will be stored upon initialization of the PodScraper.
+	// Init returns a empty ScrapedData that will be stored upon initialization of the Scraper.
 	// Each Scraper will have it's own data.
 	InitData() ScrapedData
 	// Scrape scrapes infromation from a pod.
 	Scrape(ctx context.Context, pod *backend.Pod, port int) (ScrapedData, error)
 	// ProcessResult process the returned object from Scrape function.
-	// This function should update PodScraper data field with the new result.
+	// This function should update PodInfo data field with the new result.
 	ProcessResult(ScrapedData)
 }
 
diff --git a/pkg/epp/controller/inferencemodel_reconciler_test.go b/pkg/epp/controller/inferencemodel_reconciler_test.go
@@ -195,10 +195,10 @@ func TestInferenceModelReconciler(t *testing.T) {
 				WithObjects(initObjs...).
 				WithIndex(&v1alpha2.InferenceModel{}, datastore.ModelNameIndexKey, indexInferenceModelsByModelName).
 				Build()
-			scraperFactory := podinfo.NewPodInfoFactory(map[podinfo.Scraper]*podinfo.ScraperConfig{
+			podInfoFactory := podinfo.NewPodInfoFactory(map[podinfo.Scraper]*podinfo.ScraperConfig{
 				&backendmetrics.FakeMetricsScraper{}: podinfo.NewScraperConfig(time.Second, 5*time.Second),
 			})
-			ds := datastore.NewDatastore(t.Context(), scraperFactory)
+			ds := datastore.NewDatastore(t.Context(), podInfoFactory)
 			for _, m := range test.modelsInStore {
 				ds.ModelSetIfOlder(m)
 			}
diff --git a/pkg/epp/controller/inferencepool_reconciler_test.go b/pkg/epp/controller/inferencepool_reconciler_test.go
@@ -32,7 +32,7 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/client/fake"
 	"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
 	backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics"
-	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/scrapers"
+	podinfo "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/pod-info"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
 	utiltest "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/testing"
 )
@@ -94,10 +94,10 @@ func TestInferencePoolReconciler(t *testing.T) {
 	namespacedName := types.NamespacedName{Name: pool1.Name, Namespace: pool1.Namespace}
 	req := ctrl.Request{NamespacedName: namespacedName}
 	ctx := context.Background()
-	scraperFactory := scrapers.NewPodInfoFactory(map[scrapers.Scraper]*scrapers.ScraperConfig{
-		&backendmetrics.FakeMetricsScraper{}: scrapers.NewScraperConfig(time.Second, 5*time.Second),
+	podInfoFactory := podinfo.NewPodInfoFactory(map[podinfo.Scraper]*podinfo.ScraperConfig{
+		&backendmetrics.FakeMetricsScraper{}: podinfo.NewScraperConfig(time.Second, 5*time.Second),
 	})
-	datastore := datastore.NewDatastore(ctx, scraperFactory)
+	datastore := datastore.NewDatastore(ctx, podInfoFactory)
 	inferencePoolReconciler := &InferencePoolReconciler{Client: fakeClient, Datastore: datastore}
 
 	// Step 1: Inception, only ready pods matching pool1 are added to the store.
diff --git a/pkg/epp/datastore/datastore.go b/pkg/epp/datastore/datastore.go
@@ -102,7 +102,7 @@ func (ds *datastore) Clear() {
 	defer ds.poolAndModelsMu.Unlock()
 	ds.pool = nil
 	ds.models = make(map[string]*v1alpha2.InferenceModel)
-	// stop all pods go routines for data collection before clearing the pods map.
+	// stop all pods go routines before clearing the pods map.
 	ds.pods.Range(func(_, v any) bool {
 		v.(podinfo.PodInfo).Stop()
 		return true
@@ -251,9 +251,9 @@ func (ds *datastore) PodGetAll() []podinfo.PodInfo {
 func (ds *datastore) PodList(predicate func(podinfo.PodInfo) bool) []podinfo.PodInfo {
 	res := []podinfo.PodInfo{}
 	ds.pods.Range(func(k, v any) bool {
-		podScraper := v.(podinfo.PodInfo)
-		if predicate(podScraper) {
-			res = append(res, podScraper)
+		podInfo := v.(podinfo.PodInfo)
+		if predicate(podInfo) {
+			res = append(res, podInfo)
 		}
 		return true
 	})
@@ -266,24 +266,24 @@ func (ds *datastore) PodUpdateOrAddIfNotExist(pod *corev1.Pod) bool {
 		Name:      pod.Name,
 		Namespace: pod.Namespace,
 	}
-	var podScraper podinfo.PodInfo
+	var podInfo podinfo.PodInfo
 	existing, ok := ds.pods.Load(namespacedName)
 	if !ok { // new pod. add a new pod scraper (it's startred internally)
-		podScraper := ds.podInfoFactory.NewPodInfo(ds.parentCtx, pod, ds)
-		ds.pods.Store(namespacedName, podScraper)
+		podInfo = ds.podInfoFactory.NewPodInfo(ds.parentCtx, pod, ds)
+		ds.pods.Store(namespacedName, podInfo)
 	} else {
-		podScraper = existing.(podinfo.PodInfo)
+		podInfo = existing.(podinfo.PodInfo)
 	}
 	// Update pod properties if anything changed.
-	podScraper.UpdatePod(pod)
+	podInfo.UpdatePod(pod)
 	return ok
 }
 
 func (ds *datastore) PodDelete(namespacedName types.NamespacedName) {
 	v, ok := ds.pods.LoadAndDelete(namespacedName)
 	if ok {
-		podScraper := v.(podinfo.PodInfo)
-		podScraper.Stop()
+		podInfo := v.(podinfo.PodInfo)
+		podInfo.Stop()
 	}
 }
 
@@ -313,10 +313,10 @@ func (ds *datastore) podResyncAll(ctx context.Context, ctrlClient client.Client)
 
 	// Remove pods that don't belong to the pool or not ready any more.
 	ds.pods.Range(func(k, v any) bool {
-		s := v.(podinfo.PodInfo)
-		if exist := activePods[s.GetPod().NamespacedName.Name]; !exist {
-			logger.V(logutil.VERBOSE).Info("Removing pod", "pod", s.GetPod())
-			ds.PodDelete(s.GetPod().NamespacedName)
+		podInfo := v.(podinfo.PodInfo)
+		if exist := activePods[podInfo.GetPod().NamespacedName.Name]; !exist {
+			logger.V(logutil.VERBOSE).Info("Removing pod", "pod", podInfo.GetPod())
+			ds.PodDelete(podInfo.GetPod().NamespacedName)
 		}
 		return true
 	})
diff --git a/pkg/epp/datastore/datastore_test.go b/pkg/epp/datastore/datastore_test.go
@@ -342,7 +342,7 @@ func TestMetrics(t *testing.T) {
 				WithScheme(scheme).
 				Build()
 			podInfoFactory := podinfo.NewPodInfoFactory(map[podinfo.Scraper]*podinfo.ScraperConfig{
-				&backendmetrics.FakeMetricsScraper{}: podinfo.NewScraperConfig(time.Second, 5*time.Second),
+				&backendmetrics.FakeMetricsScraper{}: podinfo.NewScraperConfig(time.Millisecond, 5*time.Second),
 			})
 			ds := NewDatastore(ctx, podInfoFactory)
 			_ = ds.PoolSet(ctx, fakeClient, inferencePool)
diff --git a/pkg/epp/scheduling/plugins/filter/filter.go b/pkg/epp/scheduling/plugins/filter/filter.go
@@ -21,9 +21,9 @@ import (
 	"math/rand"
 	"time"
 
-	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/config"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins"
+	pluginutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/util"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
 	logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
 )
@@ -139,19 +139,21 @@ func leastQueuingFilterFunc(ctx *types.SchedulingContext, pods []types.Pod) []ty
 	filtered := []types.Pod{}
 
 	for _, pod := range pods {
-		podMetrics := pod.GetData()[metrics.MetricsDataKey].(*metrics.Metrics)
-		if podMetrics.WaitingQueueSize <= min {
-			min = podMetrics.WaitingQueueSize
-		}
-		if podMetrics.WaitingQueueSize >= max {
-			max = podMetrics.WaitingQueueSize
+		if podMetrics := pluginutil.GetMetricsFromPodInfo(pod); podMetrics != nil {
+			if podMetrics.WaitingQueueSize <= min {
+				min = podMetrics.WaitingQueueSize
+			}
+			if podMetrics.WaitingQueueSize >= max {
+				max = podMetrics.WaitingQueueSize
+			}
 		}
 	}
 
 	for _, pod := range pods {
-		podMetrics := pod.GetData()[metrics.MetricsDataKey].(*metrics.Metrics)
-		if podMetrics.WaitingQueueSize >= min && podMetrics.WaitingQueueSize <= min+(max-min)/len(pods) {
-			filtered = append(filtered, pod)
+		if podMetrics := pluginutil.GetMetricsFromPodInfo(pod); podMetrics != nil {
+			if podMetrics.WaitingQueueSize >= min && podMetrics.WaitingQueueSize <= min+(max-min)/len(pods) {
+				filtered = append(filtered, pod)
+			}
 		}
 	}
 	return filtered
@@ -179,19 +181,21 @@ func leastKVCacheFilterFunc(ctx *types.SchedulingContext, pods []types.Pod) []ty
 	filtered := []types.Pod{}
 
 	for _, pod := range pods {
-		podMetrics := pod.GetData()[metrics.MetricsDataKey].(*metrics.Metrics)
-		if podMetrics.KVCacheUsagePercent <= min {
-			min = podMetrics.KVCacheUsagePercent
-		}
-		if podMetrics.KVCacheUsagePercent >= max {
-			max = podMetrics.KVCacheUsagePercent
+		if podMetrics := pluginutil.GetMetricsFromPodInfo(pod); podMetrics != nil {
+			if podMetrics.KVCacheUsagePercent <= min {
+				min = podMetrics.KVCacheUsagePercent
+			}
+			if podMetrics.KVCacheUsagePercent >= max {
+				max = podMetrics.KVCacheUsagePercent
+			}
 		}
 	}
 
 	for _, pod := range pods {
-		podMetrics := pod.GetData()[metrics.MetricsDataKey].(*metrics.Metrics)
-		if podMetrics.KVCacheUsagePercent >= min && podMetrics.KVCacheUsagePercent <= min+(max-min)/float64(len(pods)) {
-			filtered = append(filtered, pod)
+		if podMetrics := pluginutil.GetMetricsFromPodInfo(pod); podMetrics != nil {
+			if podMetrics.KVCacheUsagePercent >= min && podMetrics.KVCacheUsagePercent <= min+(max-min)/float64(len(pods)) {
+				filtered = append(filtered, pod)
+			}
 		}
 	}
 	return filtered
@@ -226,14 +230,15 @@ func loRASoftAffinityFilterFunc(ctx *types.SchedulingContext, pods []types.Pod)
 
 	// Categorize pods based on affinity and availability
 	for _, pod := range pods {
-		podMetrics := pod.GetData()[metrics.MetricsDataKey].(*metrics.Metrics)
-		_, active := podMetrics.ActiveModels[ctx.Req.ResolvedTargetModel]
-		_, waiting := podMetrics.WaitingModels[ctx.Req.ResolvedTargetModel]
-
-		if active || waiting {
-			filtered_affinity = append(filtered_affinity, pod)
-		} else if len(podMetrics.ActiveModels)+len(podMetrics.WaitingModels) < podMetrics.MaxActiveModels {
-			filtered_available = append(filtered_available, pod)
+		if podMetrics := pluginutil.GetMetricsFromPodInfo(pod); podMetrics != nil {
+			_, active := podMetrics.ActiveModels[ctx.Req.ResolvedTargetModel]
+			_, waiting := podMetrics.WaitingModels[ctx.Req.ResolvedTargetModel]
+
+			if active || waiting {
+				filtered_affinity = append(filtered_affinity, pod)
+			} else if len(podMetrics.ActiveModels)+len(podMetrics.WaitingModels) < podMetrics.MaxActiveModels {
+				filtered_available = append(filtered_available, pod)
+			}
 		}
 	}
 
@@ -267,15 +272,19 @@ type podPredicate func(req *types.LLMRequest, pod types.Pod) bool
 
 func queueThresholdPredicate(queueThreshold int) podPredicate {
 	return func(req *types.LLMRequest, pod types.Pod) bool {
-		podMetrics := pod.GetData()[metrics.MetricsDataKey].(*metrics.Metrics)
-		return podMetrics.WaitingQueueSize <= queueThreshold
+		if podMetrics := pluginutil.GetMetricsFromPodInfo(pod); podMetrics != nil {
+			return podMetrics.WaitingQueueSize <= queueThreshold
+		}
+		return false
 	}
 }
 
 func kvCacheThresholdPredicate(kvCacheThreshold float64) podPredicate {
 	return func(req *types.LLMRequest, pod types.Pod) bool {
-		podMetrics := pod.GetData()[metrics.MetricsDataKey].(*metrics.Metrics)
-		return podMetrics.KVCacheUsagePercent <= kvCacheThreshold
+		if podMetrics := pluginutil.GetMetricsFromPodInfo(pod); podMetrics != nil {
+			return podMetrics.KVCacheUsagePercent <= kvCacheThreshold
+		}
+		return false
 	}
 }
 
diff --git a/pkg/epp/scheduling/plugins/scorer/kvcache.go b/pkg/epp/scheduling/plugins/scorer/kvcache.go
@@ -17,7 +17,7 @@ limitations under the License.
 package scorer
 
 import (
-	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics"
+	pluginutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/util"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
 )
 
@@ -30,8 +30,12 @@ func (ss *KVCacheScorer) Name() string {
 func (ss *KVCacheScorer) Score(ctx *types.SchedulingContext, pods []types.Pod) map[types.Pod]float64 {
 	scores := make(map[types.Pod]float64, len(pods))
 	for _, pod := range pods {
-		podMetrics := pod.GetData()[metrics.MetricsDataKey].(*metrics.Metrics)
-		scores[pod] = 1 - podMetrics.KVCacheUsagePercent
+		if podMetrics := pluginutil.GetMetricsFromPodInfo(pod); podMetrics != nil {
+			scores[pod] = 1 - podMetrics.KVCacheUsagePercent
+		} else {
+			scores[pod] = 0
+		}
+
 	}
 	return scores
 }
diff --git a/pkg/epp/scheduling/plugins/scorer/queue.go b/pkg/epp/scheduling/plugins/scorer/queue.go
diff --git a/pkg/epp/scheduling/plugins/util/pod_info.go b/pkg/epp/scheduling/plugins/util/pod_info.go
diff --git a/pkg/epp/scheduling/types/types.go b/pkg/epp/scheduling/types/types.go