diff --git a/lib/llm/src/protocols/openai.rs b/lib/llm/src/protocols/openai.rs index 4d022ac01c83..ac179e33d071 100644 --- a/lib/llm/src/protocols/openai.rs +++ b/lib/llm/src/protocols/openai.rs @@ -63,6 +63,10 @@ pub(crate) trait OpenAIStopConditionsProvider { fn get_stop(&self) -> Option>; + fn get_stop_token_ids(&self) -> Result>> { + Ok(None) + } + fn nvext(&self) -> Option<&nvext::NvExt>; /// Get ignore_eos from CommonExt if the type supports it. @@ -180,6 +184,7 @@ impl StopConditionsProvider for T { let max_tokens = self.get_max_tokens(); let min_tokens = self.get_min_tokens(); let stop = self.get_stop(); + let stop_token_ids_hidden = self.get_stop_token_ids()?; let max_thinking_tokens = self.get_max_thinking_tokens(); if let Some(stop) = &stop @@ -195,7 +200,7 @@ impl StopConditionsProvider for T { max_tokens, min_tokens, stop, - stop_token_ids_hidden: None, + stop_token_ids_hidden, ignore_eos, max_thinking_tokens, }) diff --git a/lib/llm/src/protocols/openai/chat_completions.rs b/lib/llm/src/protocols/openai/chat_completions.rs index 6e200e53bd67..e296d73303ca 100644 --- a/lib/llm/src/protocols/openai/chat_completions.rs +++ b/lib/llm/src/protocols/openai/chat_completions.rs @@ -8,6 +8,7 @@ use validator::Validate; use crate::engines::ValidateRequest; use crate::preprocessor::media::MediaDecoder; +use crate::types::TokenIdType; use super::{ OpenAIOutputOptionsProvider, OpenAISamplingOptionsProvider, OpenAIStopConditionsProvider, @@ -307,6 +308,20 @@ impl OpenAIStopConditionsProvider for NvCreateChatCompletionRequest { }) } + fn get_stop_token_ids(&self) -> anyhow::Result>> { + let Some(value) = self.unsupported_fields.get("stop_token_ids") else { + return Ok(None); + }; + if value.is_null() { + return Ok(None); + } + serde_json::from_value(value.clone()) + .map(Some) + .map_err(|err| { + anyhow::anyhow!("stop_token_ids must be an array of unsigned token IDs: {err}") + }) + } + /// Returns a reference to the optional `NvExt` extension, if available. fn nvext(&self) -> Option<&NvExt> { self.nvext.as_ref() diff --git a/lib/llm/src/protocols/openai/validate.rs b/lib/llm/src/protocols/openai/validate.rs index e581d04303fc..5b8e25d1a66d 100644 --- a/lib/llm/src/protocols/openai/validate.rs +++ b/lib/llm/src/protocols/openai/validate.rs @@ -137,6 +137,9 @@ const PASSTHROUGH_EXTRA_FIELDS: &[&str] = &[ // Opt-in for `nvext.prompt_logprobs` on the response. Aliased through // to vLLM's `sampling_params.prompt_logprobs` in a follow-up. "return_prompt_logprobs", + // Renderer-style token stops. The OpenAI schema has string stops only, + // but Prime-RL/verifiers renderers already produce token IDs. + "stop_token_ids", ]; /// Validates that no unsupported fields are present in the request diff --git a/lib/llm/tests/test_common_ext.rs b/lib/llm/tests/test_common_ext.rs index 8e49c7377b09..dc07897b3a4c 100644 --- a/lib/llm/tests/test_common_ext.rs +++ b/lib/llm/tests/test_common_ext.rs @@ -1,13 +1,16 @@ // SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 -use dynamo_llm::protocols::{ - common::StopConditionsProvider, - openai::{ - chat_completions::NvCreateChatCompletionRequest, - common_ext::{CommonExt, CommonExtProvider}, - completions::NvCreateCompletionRequest, - nvext::NvExt, +use dynamo_llm::{ + engines::ValidateRequest, + protocols::{ + common::StopConditionsProvider, + openai::{ + chat_completions::NvCreateChatCompletionRequest, + common_ext::{CommonExt, CommonExtProvider}, + completions::NvCreateCompletionRequest, + nvext::NvExt, + }, }, }; @@ -213,6 +216,27 @@ fn test_max_thinking_tokens_extraction() { assert_eq!(stop_conditions_none.max_thinking_tokens, None); } +#[test] +fn test_chat_completions_stop_token_ids_extraction() { + let json_str = r#"{ + "model": "test-model", + "messages": [{"role": "user", "content": "(token-in mode)"}], + "nvext": { + "token_data": [1, 2, 3] + }, + "stop_token_ids": [151645, 151643] + }"#; + + let request: NvCreateChatCompletionRequest = serde_json::from_str(json_str).unwrap(); + + request.validate().unwrap(); + let stop_conditions = request.extract_stop_conditions().unwrap(); + assert_eq!( + stop_conditions.stop_token_ids_hidden, + Some(vec![151645, 151643]) + ); +} + #[test] fn test_chat_completions_no_common_values() { // Test that when no common values are set, we get None