Skip to content

Commit 086481e

Browse files
authored
Expose spaces_between_special_tokens (#2991)
1 parent 551e6d5 commit 086481e

File tree

5 files changed

+36
-9
lines changed

5 files changed

+36
-9
lines changed

lmdeploy/messages.py

+4
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ class GenerationConfig:
5252
ignoring the number of tokens in the prompt.
5353
skip_special_tokens (bool): Whether or not to remove special tokens
5454
in the decoding. Default to be True.
55+
spaces_between_special_tokens (bool): Whether or not to add spaces
56+
around special tokens. The behavior of Fast tokenizers is to have
57+
this to False. This is setup to True in slow tokenizers.
5558
logprobs (int): Number of log probabilities to return per output token.
5659
response_format (Dict): Only pytorch backend support formatting
5760
response. Examples:
@@ -94,6 +97,7 @@ class GenerationConfig:
9497
bad_token_ids: List[int] = None
9598
min_new_tokens: int = None
9699
skip_special_tokens: bool = True
100+
spaces_between_special_tokens: bool = True
97101
logprobs: int = None
98102
response_format: Optional[Dict] = None
99103
logits_processors: Optional[List[LogitsProcessor]] = None

lmdeploy/serve/async_engine.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -781,7 +781,9 @@ def is_error(status):
781781
response, state = self.tokenizer.detokenize_incrementally(
782782
token_ids,
783783
state,
784-
skip_special_tokens=gen_config.skip_special_tokens)
784+
skip_special_tokens=gen_config.skip_special_tokens,
785+
spaces_between_special_tokens=gen_config.
786+
spaces_between_special_tokens)
785787
res = token_ids[ids_offset:]
786788

787789
out = GenOut(response, history_len, input_len, gen_len,

lmdeploy/serve/gradio/vl.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,9 @@ def chat(chatbot, session, max_new_tokens, top_p, top_k, temperature):
151151
response, state = engine.tokenizer.detokenize_incrementally(
152152
res,
153153
state,
154-
skip_special_tokens=gen_config.skip_special_tokens)
154+
skip_special_tokens=gen_config.skip_special_tokens,
155+
spaces_between_special_tokens=gen_config.
156+
spaces_between_special_tokens) # noqa
155157
if chatbot[-1][1] is None:
156158
chatbot[-1][1] = ''
157159
history[-1][1] = ''

lmdeploy/serve/openai/api_server.py

+24-7
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,8 @@ def _create_completion_logprobs(tokenizer: Tokenizer,
149149
skip_special_tokens: bool = True,
150150
offset: int = 0,
151151
all_token_ids: List[int] = None,
152-
state: DetokenizeState = None):
152+
state: DetokenizeState = None,
153+
spaces_between_special_tokens: bool = True):
153154
"""create openai LogProbs for completion.
154155
155156
Args:
@@ -162,6 +163,9 @@ def _create_completion_logprobs(tokenizer: Tokenizer,
162163
offset (int): text offset.
163164
all_token_ids (int): the history output token ids.
164165
state (DetokenizeState): tokenizer decode state.
166+
spaces_between_special_tokens (bool): Whether or not to add spaces
167+
around special tokens. The behavior of Fast tokenizers is to have
168+
this to False. This is setup to True in slow tokenizers.
165169
"""
166170
if logprobs is None or len(logprobs) == 0:
167171
return None, None, None, None
@@ -183,7 +187,8 @@ def _create_completion_logprobs(tokenizer: Tokenizer,
183187
response, _state = tokenizer.detokenize_incrementally(
184188
all_token_ids + [top_id],
185189
copy.deepcopy(state),
186-
skip_special_tokens=skip_special_tokens)
190+
skip_special_tokens=skip_special_tokens,
191+
spaces_between_special_tokens=spaces_between_special_tokens)
187192
res[response] = prob
188193
if top_id == token_id:
189194
out_state = _state
@@ -323,6 +328,9 @@ async def chat_completions_v1(request: ChatCompletionRequest,
323328
- ignore_eos (bool): indicator for ignoring eos
324329
- skip_special_tokens (bool): Whether or not to remove special tokens
325330
in the decoding. Default to be True.
331+
- spaces_between_special_tokens (bool): Whether or not to add spaces
332+
around special tokens. The behavior of Fast tokenizers is to have
333+
this to False. This is setup to True in slow tokenizers.
326334
- min_new_tokens (int): To generate at least numbers of tokens.
327335
- min_p (float): Minimum token probability, which will be scaled by the
328336
probability of the most likely token. It must be a value between
@@ -393,7 +401,8 @@ async def chat_completions_v1(request: ChatCompletionRequest,
393401
logits_processors=logits_processors,
394402
min_new_tokens=request.min_new_tokens,
395403
min_p=request.min_p,
396-
random_seed=random_seed)
404+
random_seed=random_seed,
405+
spaces_between_special_tokens=request.spaces_between_special_tokens)
397406

398407
tools = None
399408
if request.tools and request.tool_choice != 'none':
@@ -581,6 +590,9 @@ async def completions_v1(request: CompletionRequest,
581590
- ignore_eos (bool): indicator for ignoring eos
582591
- skip_special_tokens (bool): Whether or not to remove special tokens
583592
in the decoding. Default to be True.
593+
- spaces_between_special_tokens (bool): Whether or not to add spaces
594+
around special tokens. The behavior of Fast tokenizers is to have
595+
this to False. This is setup to True in slow tokenizers.
584596
- top_k (int): The number of the highest probability vocabulary
585597
tokens to keep for top-k-filtering
586598
@@ -623,7 +635,8 @@ async def completions_v1(request: CompletionRequest,
623635
ignore_eos=request.ignore_eos,
624636
stop_words=request.stop,
625637
skip_special_tokens=request.skip_special_tokens,
626-
random_seed=random_seed)
638+
random_seed=random_seed,
639+
spaces_between_special_tokens=request.spaces_between_special_tokens)
627640
generators = []
628641
for i in range(len(request.prompt)):
629642
result_generator = VariableInterface.async_engine.generate(
@@ -672,7 +685,7 @@ async def completion_stream_generator() -> AsyncGenerator[str, None]:
672685
VariableInterface.async_engine.tokenizer,
673686
res.token_ids, res.logprobs,
674687
gen_config.skip_special_tokens, offset, all_token_ids,
675-
state)
688+
state, gen_config.spaces_between_special_tokens)
676689
if request.stream_options and request.stream_options.include_usage: # noqa E501
677690
final_res = res
678691
total_tokens = sum([
@@ -724,8 +737,12 @@ async def _inner_call(i, generator):
724737
logprobs = None
725738
if request.logprobs and len(final_logprobs):
726739
logprobs, _, _, _ = _create_completion_logprobs(
727-
VariableInterface.async_engine.tokenizer, final_token_ids,
728-
final_logprobs, gen_config.skip_special_tokens)
740+
VariableInterface.async_engine.tokenizer,
741+
final_token_ids,
742+
final_logprobs,
743+
gen_config.skip_special_tokens,
744+
spaces_between_special_tokens=gen_config.
745+
spaces_between_special_tokens)
729746

730747
assert final_res is not None
731748
choice_data = CompletionResponseChoice(

lmdeploy/serve/openai/protocol.py

+2
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ class ChatCompletionRequest(BaseModel):
135135
session_id: Optional[int] = -1
136136
ignore_eos: Optional[bool] = False
137137
skip_special_tokens: Optional[bool] = True
138+
spaces_between_special_tokens: Optional[bool] = True
138139
top_k: Optional[int] = 40
139140
seed: Optional[int] = None
140141
min_new_tokens: Optional[int] = Field(default=None, examples=[None])
@@ -251,6 +252,7 @@ class CompletionRequest(BaseModel):
251252
session_id: Optional[int] = -1
252253
ignore_eos: Optional[bool] = False
253254
skip_special_tokens: Optional[bool] = True
255+
spaces_between_special_tokens: Optional[bool] = True
254256
top_k: Optional[int] = 40 # for opencompass
255257
seed: Optional[int] = None
256258

0 commit comments

Comments
 (0)