Skip to content

Commit a47979b

Browse files
committed
[Feature] Add support for optional BOS token handling in input preprocessing
Signed-off-by: 0xrushi <[email protected]>
1 parent e39dc46 commit a47979b

File tree

1 file changed

+8
-0
lines changed

1 file changed

+8
-0
lines changed

vllm/inputs/preprocess.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,14 @@ def _tokenize_prompt(
221221
tokenizer = self.get_tokenizer()
222222
tokenization_kwargs = self._get_tokenization_kw(tokenization_kwargs)
223223

224+
225+
bos_token_text = getattr(tokenizer, "bos_token", None)
226+
if bos_token_text and isinstance(bos_token_text, str):
227+
if prompt.lstrip().startswith(bos_token_text):
228+
# override if not explicitly set by caller.
229+
if "add_special_tokens" not in tokenization_kwargs:
230+
tokenization_kwargs["add_special_tokens"] = False
231+
224232
encoder_config = self.model_config.encoder_config
225233

226234
if encoder_config and encoder_config.get("do_lower_case", False):

0 commit comments

Comments
 (0)