Skip to content

Commit ecf8230

Browse files
authored
[Metrics] Log number of preempted requests (#28522)
Add tracking and periodic logging for the number of preempted requests in the metrics logger. This helps monitor system behavior under load. Signed-off-by: Yining Liu <[email protected]>
1 parent 8cfbe89 commit ecf8230

File tree

1 file changed

+19
-4
lines changed

1 file changed

+19
-4
lines changed

vllm/v1/metrics/loggers.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -118,12 +118,14 @@ def _reset(self, now):
118118
self.num_prompt_tokens: int = 0
119119
self.num_generation_tokens: int = 0
120120
self.num_corrupted_reqs: int = 0
121+
self.num_preemptions: int = 0
121122

122123
def _track_iteration_stats(self, iteration_stats: IterationStats):
123124
# Save tracked stats for token counters.
124125
self.num_prompt_tokens += iteration_stats.num_prompt_tokens
125126
self.num_generation_tokens += iteration_stats.num_generation_tokens
126127
self.num_corrupted_reqs += iteration_stats.num_corrupted_reqs
128+
self.num_preemptions += iteration_stats.num_preempted_reqs
127129

128130
def _get_throughput(self, tracked_stats: int, now: float) -> float:
129131
# Compute summary metrics for tracked stats
@@ -196,18 +198,31 @@ def log(self):
196198
"Avg generation throughput: %.1f tokens/s",
197199
"Running: %d reqs",
198200
"Waiting: %d reqs",
199-
"GPU KV cache usage: %.1f%%",
200-
"Prefix cache hit rate: %.1f%%",
201201
]
202202
log_args = [
203203
self.last_prompt_throughput,
204204
self.last_generation_throughput,
205205
self.last_scheduler_stats.num_running_reqs,
206206
self.last_scheduler_stats.num_waiting_reqs,
207-
self.last_scheduler_stats.kv_cache_usage * 100,
208-
self.prefix_caching_metrics.hit_rate * 100,
209207
]
210208

209+
if self.num_preemptions > 0:
210+
log_parts.append("Preemptions: %d")
211+
log_args.append(self.num_preemptions)
212+
213+
log_parts.extend(
214+
[
215+
"GPU KV cache usage: %.1f%%",
216+
"Prefix cache hit rate: %.1f%%",
217+
]
218+
)
219+
log_args.extend(
220+
[
221+
self.last_scheduler_stats.kv_cache_usage * 100,
222+
self.prefix_caching_metrics.hit_rate * 100,
223+
]
224+
)
225+
211226
if envs.VLLM_COMPUTE_NANS_IN_LOGITS:
212227
log_parts.append("Corrupted: %d reqs")
213228
log_args.append(self.num_corrupted_reqs)

0 commit comments

Comments
 (0)