fix: Add RemovePod to prefix indexer

kfirtoledo · kfirtoledo · commit 651f62ef6d66 · 2025-08-14T11:50:03.000+03:00
Signed-off-by: Kfir Toledo &lt;kfir.toledo@ibm.com&gt;
diff --git a/pkg/epp/scheduling/framework/plugins/multi/prefix/indexer.go b/pkg/epp/scheduling/framework/plugins/multi/prefix/indexer.go
@@ -149,3 +149,23 @@ func (i *indexer) ReportLRUSize(interval time.Duration) {
 		i.mu.RUnlock()
 	}
 }
+
+// RemovePod removes a pod and its associated entries from the indexer.
+func (i *indexer) RemovePod(pod ServerID) {
+	i.mu.RLock()
+	lruCache, exists := i.podToLRU[pod]
+	i.mu.RUnlock()
+
+	if !exists {
+		return
+	}
+
+	// Remove all hashes associated with the pod from hashToPods (triggers eviction callbacks).
+	for _, hash := range lruCache.Keys() {
+		lruCache.Remove(hash)
+	}
+
+	i.mu.Lock()
+	delete(i.podToLRU, pod)
+	i.mu.Unlock()
+}
diff --git a/pkg/epp/scheduling/framework/plugins/multi/prefix/indexer_test.go b/pkg/epp/scheduling/framework/plugins/multi/prefix/indexer_test.go
@@ -45,3 +45,63 @@ func TestIndexer_AddAndGet(t *testing.T) {
 	servers = i.Get(BlockHash(4))
 	assert.Empty(t, servers, "Cache should not contain non-existent hash")
 }
+
+func TestIndexer_RemovePodAndEviction(t *testing.T) {
+	const indexerSize = 10
+
+	i := newIndexer(indexerSize)
+
+	server1 := ServerID{Namespace: "default", Name: "server1"}
+	server2 := ServerID{Namespace: "default", Name: "server2"}
+
+	// Add indexerSize hashes to both servers
+	var hashes []BlockHash
+	for j := 0; j < indexerSize; j++ {
+		h := BlockHash(j)
+		hashes = append(hashes, h)
+		i.Add([]BlockHash{h}, server1)
+		i.Add([]BlockHash{h}, server2)
+	}
+
+	// Ensure all entries are added
+	assert.Equal(t, indexerSize, i.podToLRU[server1].Len(), "server1 should have 10 entries")
+	assert.Equal(t, indexerSize, i.podToLRU[server2].Len(), "server2 should have 10 entries")
+
+	// Ensure each hash in hashToPods maps to both server1 and server2
+	for _, h := range hashes {
+		pods := i.hashToPods[h]
+		assert.Len(t, pods, 2, "Each hash should be associated with exactly 2 pods")
+		assert.Contains(t, pods, server1, "hash should be associated with server1")
+		assert.Contains(t, pods, server2, "hash should be associated with server2")
+	}
+
+	// Add indexerSize hash to server1 → should evict BlockHash(0)
+	evictedHash := BlockHash(0)
+	newHash := BlockHash(indexerSize)
+	i.Add([]BlockHash{newHash}, server1)
+
+	// server1 LRU should still be at max capacity
+	assert.Equal(t, indexerSize, i.podToLRU[server1].Len(), "server1 LRU should maintain max size")
+
+	// BlockHash(0) should no longer have server1 in hashToPods
+	pods := i.Get(evictedHash)
+	assert.NotContains(t, pods, server1, "server1 should be evicted from hashToPods for hash 0")
+	assert.Contains(t, pods, server2, "server2 should still have hash 0")
+
+	// Remove server2
+	i.RemovePod(server2)
+
+	// hashToPods for hash 0 should now be empty
+	pods = i.Get(evictedHash)
+	assert.NotContains(t, pods, server2, "server2 should be removed from hash 0")
+	assert.Empty(t, pods, "hash 0 should have no pods after both eviction and removal")
+
+	// All remaining hashes should map only to server1
+	for hash, pods := range i.hashToPods {
+		assert.Len(t, pods, 1, "hash %v should have only 1 pod after server2 removal", hash)
+		assert.Contains(t, pods, server1, "hash %v should only contain server1", hash)
+	}
+
+	// Ensure hashToPods contains exactly indexerSize hashes (post-eviction and server2 removal)
+	assert.Len(t, i.hashToPods, indexerSize, "hashToPods should contain %d hashes after cleanup", indexerSize)
+}
diff --git a/pkg/epp/scheduling/framework/plugins/multi/prefix/plugin.go b/pkg/epp/scheduling/framework/plugins/multi/prefix/plugin.go
@@ -21,6 +21,7 @@ import (
 	"encoding/binary"
 	"encoding/json"
 	"fmt"
+	"time"
 
 	"github.com/cespare/xxhash/v2"
 	k8stypes "k8s.io/apimachinery/pkg/types"
@@ -55,6 +56,11 @@ const (
 	PrefixCachePluginType = "prefix-cache-scorer"
 )
 
+const (
+	PodActiveCheckInterval = 1 * time.Minute
+	PodInactivityTimeout   = 5 * time.Minute
+)
+
 var DefaultConfig = Config{
 	HashBlockSize:          DefaultHashBlockSize,
 	MaxPrefixBlocksToMatch: DefaultMaxPrefixBlocks,
@@ -84,6 +90,7 @@ type podSet map[ServerID]struct{}
 type Indexer interface {
 	Get(hash BlockHash) podSet
 	Add(hashes []BlockHash, server ServerID)
+	RemovePod(server ServerID)
 }
 
 // BlockHash is a hash of the block of request body.
@@ -125,7 +132,7 @@ var _ framework.Scorer = &Plugin{}
 var _ framework.PostCycle = &Plugin{}
 
 // PrefixCachePluginFactory defines the factory function for Prefix plugin.
-func PrefixCachePluginFactory(name string, rawParameters json.RawMessage, _ plugins.Handle) (plugins.Plugin, error) {
+func PrefixCachePluginFactory(name string, rawParameters json.RawMessage, handle plugins.Handle) (plugins.Plugin, error) {
 	parameters := Config{
 		HashBlockSize:          DefaultHashBlockSize,
 		MaxPrefixBlocksToMatch: DefaultMaxPrefixBlocks,
@@ -138,7 +145,9 @@ func PrefixCachePluginFactory(name string, rawParameters json.RawMessage, _ plug
 		}
 	}
 
-	return New(parameters).WithName(name), nil
+	p := New(parameters).WithName(name)
+	go p.StartPodActiveWatcher(handle.Context(), handle)
+	return p, nil
 }
 
 // New initializes a new prefix Plugin and returns its pointer.
@@ -239,6 +248,45 @@ func (m *Plugin) matchLongestPrefix(ctx context.Context, hashes []BlockHash) map
 	return res
 }
 
+// StartPodActiveWatcher starts a goroutine that watches for active pods.
+func (m *Plugin) StartPodActiveWatcher(ctx context.Context, handle plugins.Handle) {
+	logger := log.FromContext(ctx).V(logutil.VERBOSE)
+
+	ticker := time.NewTicker(PodActiveCheckInterval)
+	defer ticker.Stop()
+
+	podLastSeen := make(map[ServerID]time.Time)
+
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case <-ticker.C:
+			now := time.Now()
+			activePods := handle.GetActivePods()
+
+			// Track active pods
+			activeSet := make(map[ServerID]struct{}, len(activePods))
+			for _, np := range activePods {
+				id := ServerID(np)
+				activeSet[id] = struct{}{}
+				podLastSeen[id] = now
+			}
+
+			// Remove stale pods
+			for pod, lastSeen := range podLastSeen {
+				if _, stillActive := activeSet[pod]; !stillActive {
+					if now.Sub(lastSeen) > PodInactivityTimeout {
+						m.indexer.RemovePod(pod)
+						delete(podLastSeen, pod)
+						logger.Info("Removed inactive pod from prefix cache", "pod", pod)
+					}
+				}
+			}
+		}
+	}
+}
+
 // hashPrompt divides the prompt into blocks and calculate the prefix cache for each block.
 // hash(0) is the hash of the model name, since different models generally don't share prefix cache.
 // For block i, hash(i) = hash(block i content, hash(i-1)).