@@ -149,7 +149,8 @@ def _create_completion_logprobs(tokenizer: Tokenizer,
149
149
skip_special_tokens : bool = True ,
150
150
offset : int = 0 ,
151
151
all_token_ids : List [int ] = None ,
152
- state : DetokenizeState = None ):
152
+ state : DetokenizeState = None ,
153
+ spaces_between_special_tokens : bool = True ):
153
154
"""create openai LogProbs for completion.
154
155
155
156
Args:
@@ -162,6 +163,9 @@ def _create_completion_logprobs(tokenizer: Tokenizer,
162
163
offset (int): text offset.
163
164
all_token_ids (int): the history output token ids.
164
165
state (DetokenizeState): tokenizer decode state.
166
+ spaces_between_special_tokens (bool): Whether or not to add spaces
167
+ around special tokens. The behavior of Fast tokenizers is to have
168
+ this to False. This is setup to True in slow tokenizers.
165
169
"""
166
170
if logprobs is None or len (logprobs ) == 0 :
167
171
return None , None , None , None
@@ -183,7 +187,8 @@ def _create_completion_logprobs(tokenizer: Tokenizer,
183
187
response , _state = tokenizer .detokenize_incrementally (
184
188
all_token_ids + [top_id ],
185
189
copy .deepcopy (state ),
186
- skip_special_tokens = skip_special_tokens )
190
+ skip_special_tokens = skip_special_tokens ,
191
+ spaces_between_special_tokens = spaces_between_special_tokens )
187
192
res [response ] = prob
188
193
if top_id == token_id :
189
194
out_state = _state
@@ -323,6 +328,9 @@ async def chat_completions_v1(request: ChatCompletionRequest,
323
328
- ignore_eos (bool): indicator for ignoring eos
324
329
- skip_special_tokens (bool): Whether or not to remove special tokens
325
330
in the decoding. Default to be True.
331
+ - spaces_between_special_tokens (bool): Whether or not to add spaces
332
+ around special tokens. The behavior of Fast tokenizers is to have
333
+ this to False. This is setup to True in slow tokenizers.
326
334
- min_new_tokens (int): To generate at least numbers of tokens.
327
335
- min_p (float): Minimum token probability, which will be scaled by the
328
336
probability of the most likely token. It must be a value between
@@ -393,7 +401,8 @@ async def chat_completions_v1(request: ChatCompletionRequest,
393
401
logits_processors = logits_processors ,
394
402
min_new_tokens = request .min_new_tokens ,
395
403
min_p = request .min_p ,
396
- random_seed = random_seed )
404
+ random_seed = random_seed ,
405
+ spaces_between_special_tokens = request .spaces_between_special_tokens )
397
406
398
407
tools = None
399
408
if request .tools and request .tool_choice != 'none' :
@@ -581,6 +590,9 @@ async def completions_v1(request: CompletionRequest,
581
590
- ignore_eos (bool): indicator for ignoring eos
582
591
- skip_special_tokens (bool): Whether or not to remove special tokens
583
592
in the decoding. Default to be True.
593
+ - spaces_between_special_tokens (bool): Whether or not to add spaces
594
+ around special tokens. The behavior of Fast tokenizers is to have
595
+ this to False. This is setup to True in slow tokenizers.
584
596
- top_k (int): The number of the highest probability vocabulary
585
597
tokens to keep for top-k-filtering
586
598
@@ -623,7 +635,8 @@ async def completions_v1(request: CompletionRequest,
623
635
ignore_eos = request .ignore_eos ,
624
636
stop_words = request .stop ,
625
637
skip_special_tokens = request .skip_special_tokens ,
626
- random_seed = random_seed )
638
+ random_seed = random_seed ,
639
+ spaces_between_special_tokens = request .spaces_between_special_tokens )
627
640
generators = []
628
641
for i in range (len (request .prompt )):
629
642
result_generator = VariableInterface .async_engine .generate (
@@ -672,7 +685,7 @@ async def completion_stream_generator() -> AsyncGenerator[str, None]:
672
685
VariableInterface .async_engine .tokenizer ,
673
686
res .token_ids , res .logprobs ,
674
687
gen_config .skip_special_tokens , offset , all_token_ids ,
675
- state )
688
+ state , gen_config . spaces_between_special_tokens )
676
689
if request .stream_options and request .stream_options .include_usage : # noqa E501
677
690
final_res = res
678
691
total_tokens = sum ([
@@ -724,8 +737,12 @@ async def _inner_call(i, generator):
724
737
logprobs = None
725
738
if request .logprobs and len (final_logprobs ):
726
739
logprobs , _ , _ , _ = _create_completion_logprobs (
727
- VariableInterface .async_engine .tokenizer , final_token_ids ,
728
- final_logprobs , gen_config .skip_special_tokens )
740
+ VariableInterface .async_engine .tokenizer ,
741
+ final_token_ids ,
742
+ final_logprobs ,
743
+ gen_config .skip_special_tokens ,
744
+ spaces_between_special_tokens = gen_config .
745
+ spaces_between_special_tokens )
729
746
730
747
assert final_res is not None
731
748
choice_data = CompletionResponseChoice (
0 commit comments