We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent e39dc46 commit a47979bCopy full SHA for a47979b
vllm/inputs/preprocess.py
@@ -221,6 +221,14 @@ def _tokenize_prompt(
221
tokenizer = self.get_tokenizer()
222
tokenization_kwargs = self._get_tokenization_kw(tokenization_kwargs)
223
224
+
225
+ bos_token_text = getattr(tokenizer, "bos_token", None)
226
+ if bos_token_text and isinstance(bos_token_text, str):
227
+ if prompt.lstrip().startswith(bos_token_text):
228
+ # override if not explicitly set by caller.
229
+ if "add_special_tokens" not in tokenization_kwargs:
230
+ tokenization_kwargs["add_special_tokens"] = False
231
232
encoder_config = self.model_config.encoder_config
233
234
if encoder_config and encoder_config.get("do_lower_case", False):
0 commit comments