1111 KVCacheEvent ,
1212)
1313from vllm .logger import init_logger
14+ from vllm .v1 .core .eviction_policies import FrequencyCostEvictionPolicy
1415from vllm .v1 .core .kv_cache_utils import (
1516 BlockHash ,
1617 BlockHashWithGroupId ,
@@ -166,6 +167,25 @@ def __init__(
166167 self .enable_kv_cache_events = enable_kv_cache_events
167168 self .kv_event_queue : list [KVCacheEvent ] = []
168169
170+ # Optional frequency-cost policy (set via configure_eviction_policy)
171+ self ._policy : FrequencyCostEvictionPolicy | None = None
172+
173+ def configure_eviction_policy (
174+ self ,
175+ policy : str ,
176+ * ,
177+ block_size : int ,
178+ alpha : float = 2.0 ,
179+ time_decay : float = 0.0 ,
180+ ) -> None :
181+ """Configure optional eviction policy. Defaults to LRU if not set."""
182+ if policy == "frequency_cost" :
183+ self ._policy = FrequencyCostEvictionPolicy (
184+ block_size = block_size , alpha = alpha , time_decay_factor = time_decay
185+ )
186+ else :
187+ self ._policy = None
188+
169189 def get_cached_block (
170190 self , block_hash : BlockHash , kv_cache_group_ids : list [int ]
171191 ) -> list [KVCacheBlock ] | None :
@@ -278,19 +298,65 @@ def get_new_blocks(self, num_blocks: int) -> list[KVCacheBlock]:
278298 if num_blocks > self .get_num_free_blocks ():
279299 raise ValueError (f"Cannot get { num_blocks } free blocks from the pool" )
280300
281- ret : list [KVCacheBlock ] = self .free_block_queue .popleft_n (num_blocks )
282-
283- # In order to only iterate the list once, we duplicated code a bit
301+ # Fast path: no policy configured -> original LRU behavior
302+ if self ._policy is None :
303+ ret : list [KVCacheBlock ] = self .free_block_queue .popleft_n (num_blocks )
304+ if self .enable_caching :
305+ for block in ret :
306+ self ._maybe_evict_cached_block (block )
307+ assert block .ref_cnt == 0
308+ block .ref_cnt += 1
309+ else :
310+ for block in ret :
311+ assert block .ref_cnt == 0
312+ block .ref_cnt += 1
313+ return ret
314+
315+ # Policy path: prefer non-cached free blocks from LRU head, then
316+ # choose cached-free blocks via policy ranking.
317+ selected : list [KVCacheBlock ] = []
318+ deferred_cached : list [KVCacheBlock ] = []
319+
320+ while len (selected ) < num_blocks :
321+ # Exhausted free blocks -> impossible due to initial check
322+ blk = self .free_block_queue .popleft ()
323+ if blk .block_hash is None :
324+ selected .append (blk )
325+ else :
326+ # remove from policy to avoid selecting it immediately
327+ if self ._policy is not None :
328+ self ._policy .remove_block (blk )
329+ deferred_cached .append (blk )
330+ if self .get_num_free_blocks () == 0 and len (selected ) < num_blocks :
331+ break
332+
333+ if len (selected ) < num_blocks :
334+ need = num_blocks - len (selected )
335+ # Ask policy for global cached-free candidates by block_id
336+ ids = self ._policy .get_eviction_candidates (need )
337+ for block_id in ids :
338+ blk = self .blocks [block_id ]
339+ # Remove from free list if still present
340+ if blk .prev_free_block is not None and blk .next_free_block is not None :
341+ self .free_block_queue .remove (blk )
342+ # Evict hash later below
343+ selected .append (blk )
344+
345+ # Return deferred cached blocks to the free list tail to keep queue sound
346+ for blk in deferred_cached :
347+ self .free_block_queue .append (blk )
348+
349+ # Finalize selection: evict hashes for cached blocks; inc ref_cnt
284350 if self .enable_caching :
285- for block in ret :
286- self ._maybe_evict_cached_block (block )
287- assert block .ref_cnt == 0
288- block .ref_cnt += 1
351+ for blk in selected :
352+ self ._maybe_evict_cached_block (blk )
353+ assert blk .ref_cnt == 0
354+ blk .ref_cnt += 1
289355 else :
290- for block in ret :
291- assert block .ref_cnt == 0
292- block .ref_cnt += 1
293- return ret
356+ for blk in selected :
357+ assert blk .ref_cnt == 0
358+ blk .ref_cnt += 1
359+ return selected
294360
295361 def _maybe_evict_cached_block (self , block : KVCacheBlock ) -> bool :
296362 """
@@ -342,7 +408,11 @@ def touch(self, blocks: tuple[Sequence[KVCacheBlock], ...]) -> None:
342408 # candidate), so remove it.
343409 if block .ref_cnt == 0 and not block .is_null :
344410 self .free_block_queue .remove (block )
411+ if self ._policy is not None :
412+ self ._policy .remove_block (block )
345413 block .ref_cnt += 1
414+ if self ._policy is not None :
415+ self ._policy .on_block_access (block )
346416
347417 def free_blocks (self , ordered_blocks : Iterable [KVCacheBlock ]) -> None :
348418 """Free a list of blocks. The blocks should be ordered by their
@@ -356,9 +426,15 @@ def free_blocks(self, ordered_blocks: Iterable[KVCacheBlock]) -> None:
356426 blocks_list = list (ordered_blocks )
357427 for block in blocks_list :
358428 block .ref_cnt -= 1
359- self .free_block_queue .append_n (
360- [block for block in blocks_list if block .ref_cnt == 0 and not block .is_null ]
361- )
429+ freed = [
430+ block for block in blocks_list if block .ref_cnt == 0 and not block .is_null
431+ ]
432+ self .free_block_queue .append_n (freed )
433+ if self ._policy is not None :
434+ for block in freed :
435+ # Track only cached-free blocks
436+ if block .block_hash is not None :
437+ self ._policy .on_block_release (block )
362438
363439 def reset_prefix_cache (self ) -> bool :
364440 """Reset prefix cache. This function may be used in RLHF
@@ -390,6 +466,9 @@ def reset_prefix_cache(self) -> bool:
390466 if self .enable_kv_cache_events :
391467 self .kv_event_queue .append (AllBlocksCleared ())
392468
469+ if self ._policy is not None :
470+ self ._policy .reset ()
471+
393472 return True
394473
395474 def get_num_free_blocks (self ) -> int :
0 commit comments